diff --git a/.github/workflows/cd.yml b/.github/workflows/cd.yml index 8fb8b9c94b9..62231a7afeb 100644 --- a/.github/workflows/cd.yml +++ b/.github/workflows/cd.yml @@ -17,8 +17,6 @@ jobs: go-version: "1.20" - name: Checkout repo uses: actions/checkout@v2 - - name: Build relic - run: make crypto_setup_gopath # Provide Google Service Account credentials to Github Action, allowing interaction with the Google Container Registry # Logging in as github-actions@dl-flow.iam.gserviceaccount.com - id: auth diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f865bc9f0a5..904eebaebe0 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -41,8 +41,10 @@ jobs: with: go-version: ${{ env.GO_VERSION }} cache: true - - name: Build relic - run: make crypto_setup_gopath + - name: Install C formatter + run: sudo apt-get install -y clang-format + - name: Run C formatter and sanitizer for ./crypto + run: make -C crypto c-format && make -C crypto c-sanitize - name: Run go generate run: go generate working-directory: ${{ matrix.dir }} @@ -51,10 +53,11 @@ jobs: with: # Required: the version of golangci-lint is required and must be specified without patch version: we always use the latest patch version. version: v1.54 - args: -v --build-tags relic + args: -v working-directory: ${{ matrix.dir }} # https://github.com/golangci/golangci-lint-action/issues/244 skip-cache: true + tidy: name: Tidy @@ -72,18 +75,6 @@ jobs: - name: code sanity check run: make code-sanity-check - shell-check: - name: ShellCheck - runs-on: ubuntu-latest - steps: - - name: Checkout repo - uses: actions/checkout@v3 - - name: Run ShellCheck - uses: ludeeus/action-shellcheck@203a3fd018dfe73f8ae7e3aa8da2c149a5f41c33 - with: - scandir: './crypto' - ignore: 'relic' - create-dynamic-test-matrix: name: Create Dynamic Test Matrix runs-on: ubuntu-latest @@ -144,20 +135,17 @@ jobs: matrix: include: - name: crypto - make1: -C crypto setup - make2: unittest + setup: noop retries: 1 race: 1 runner: ubuntu-latest - name: insecure - make1: install-tools - make2: test + setup: install-tools retries: 5 race: 0 runner: buildjet-4vcpu-ubuntu-2204 - name: integration - make1: install-tools - make2: test + setup: install-tools retries: 5 race: 0 runner: buildjet-4vcpu-ubuntu-2204 @@ -171,7 +159,7 @@ jobs: go-version: ${{ env.GO_VERSION }} cache: true - name: Setup tests (${{ matrix.name }}) - run: make ${{ matrix.make1 }} + run: make ${{ matrix.setup }} - name: Run tests (${{ matrix.name }}) env: RACE_DETECTOR: ${{ matrix.race }} @@ -179,8 +167,8 @@ jobs: with: timeout_minutes: 35 max_attempts: ${{ matrix.retries }} - # run `make2` target inside each module's root - command: VERBOSE=1 make -C ${{ matrix.name }} ${{ matrix.make2 }} + # run test target inside each module's root + command: VERBOSE=1 make -C ${{ matrix.name }} test - name: Upload coverage report uses: codecov/codecov-action@v3 with: @@ -202,8 +190,6 @@ jobs: with: go-version: ${{ env.GO_VERSION }} cache: true - - name: Build relic - run: make crypto_setup_gopath - name: Docker build run: make docker-build-flow docker-build-flow-corrupt - name: Save Docker images @@ -294,8 +280,6 @@ jobs: with: go-version: ${{ env.GO_VERSION }} cache: true - - name: Build relic - run: make crypto_setup_gopath - name: Load cached Docker images uses: actions/cache@v3 with: diff --git a/.github/workflows/flaky-test-monitor.yml b/.github/workflows/flaky-test-monitor.yml index 06731f77b9a..b3e380beaaa 100644 --- a/.github/workflows/flaky-test-monitor.yml +++ b/.github/workflows/flaky-test-monitor.yml @@ -82,18 +82,15 @@ jobs: matrix: include: - name: crypto - make1: -C crypto setup - make2: unittest + setup: noop race: 1 test_category: unit-crypto - name: insecure - make1: install-tools - make2: test + setup: install-tools race: 0 test_category: unit-insecure - name: integration - make1: install-tools - make2: test + setup: install-tools race: 0 test_category: unit-integration runs-on: ubuntu-latest @@ -106,11 +103,11 @@ jobs: go-version: ${{ env.GO_VERSION }} cache: true - name: Setup tests (${{ matrix.name }}) - run: make ${{ matrix.make1 }} + run: make ${{ matrix.setup }} - name: Run tests (${{ matrix.name }}) env: RACE_DETECTOR: ${{ matrix.race }} - run: make -es -C ${{ matrix.name }} ${{ matrix.make2 }} > test-output + run: make -es -C ${{ matrix.name }} test > test-output timeout-minutes: 100 continue-on-error: true - name: Process test results (${{ matrix.name }}) @@ -167,8 +164,6 @@ jobs: with: go-version: ${{ env.GO_VERSION }} cache: true - - name: Build relic - run: make crypto_setup_gopath - name: Docker build run: make docker-build-flow docker-build-flow-corrupt - name: Run tests diff --git a/.github/stale.yml b/.github/workflows/stale.yml similarity index 100% rename from .github/stale.yml rename to .github/workflows/stale.yml diff --git a/.github/workflows/tools.yml b/.github/workflows/tools.yml index 77d27066919..c9cfdfbfd5d 100644 --- a/.github/workflows/tools.yml +++ b/.github/workflows/tools.yml @@ -38,8 +38,6 @@ jobs: # to accurately get the version tag fetch-depth: 0 ref: ${{ inputs.tag }} - - name: Build relic - run: make crypto_setup_gopath - name: Build and upload boot-tools run: | make tool-bootstrap tool-transit diff --git a/.gitignore b/.gitignore index 1be2e18a99f..0c025be2692 100644 --- a/.gitignore +++ b/.gitignore @@ -7,8 +7,6 @@ /cmd/util/util /cmd/bootstrap/bootstrap -# crypto relic folder -crypto/relic/ # Test binary, build with `go test -c` *.test diff --git a/Makefile b/Makefile index 04977eafda5..91b54bc0afd 100644 --- a/Makefile +++ b/Makefile @@ -39,16 +39,20 @@ K8S_YAMLS_LOCATION_STAGING=./k8s/staging export CONTAINER_REGISTRY := gcr.io/flow-container-registry export DOCKER_BUILDKIT := 1 -# setup the crypto package under the GOPATH: needed to test packages importing flow-go/crypto -.PHONY: crypto_setup_gopath -crypto_setup_gopath: - bash crypto_setup.sh +include crypto_adx_flag.mk + +CGO_FLAG := CGO_CFLAGS=$(CRYPTO_FLAG) + +# needed for CI +.PHONY: noop +noop: + @echo "This is a no-op target" cmd/collection/collection: - go build -o cmd/collection/collection cmd/collection/main.go + $(CGO_FLAG) go build -o cmd/collection/collection cmd/collection/main.go cmd/util/util: - go build -o cmd/util/util --tags relic cmd/util/main.go + $(CGO_FLAG) go build -o cmd/util/util cmd/util/main.go .PHONY: update-core-contracts-version update-core-contracts-version: @@ -64,13 +68,10 @@ update-cadence-version: ./scripts/update-cadence.sh $(CC_VERSION) make tidy -############################################################################################ -# CAUTION: DO NOT MODIFY THESE TARGETS! DOING SO WILL BREAK THE FLAKY TEST MONITOR - .PHONY: unittest-main unittest-main: - # test all packages with Relic library enabled - go test $(if $(VERBOSE),-v,) -coverprofile=$(COVER_PROFILE) -covermode=atomic $(if $(RACE_DETECTOR),-race,) $(if $(JSON_OUTPUT),-json,) $(if $(NUM_RUNS),-count $(NUM_RUNS),) --tags relic $(GO_TEST_PACKAGES) + # test all packages + $(CGO_FLAG) go test $(if $(VERBOSE),-v,) -coverprofile=$(COVER_PROFILE) -covermode=atomic $(if $(RACE_DETECTOR),-race,) $(if $(JSON_OUTPUT),-json,) $(if $(NUM_RUNS),-count $(NUM_RUNS),) $(GO_TEST_PACKAGES) .PHONY: install-mock-generators install-mock-generators: @@ -79,7 +80,7 @@ install-mock-generators: go install github.com/golang/mock/mockgen@v1.6.0; .PHONY: install-tools -install-tools: crypto_setup_gopath check-go-version install-mock-generators +install-tools: check-go-version install-mock-generators cd ${GOPATH}; \ go install github.com/golang/protobuf/protoc-gen-go@v1.3.2; \ go install github.com/uber/prototool/cmd/prototool@v1.9.0; \ @@ -90,13 +91,6 @@ install-tools: crypto_setup_gopath check-go-version install-mock-generators verify-mocks: tidy generate-mocks git diff --exit-code -############################################################################################ - -.PHONY: emulator-norelic-check -emulator-norelic-check: - # test the fvm package compiles with Relic library disabled (required for the emulator build) - cd ./fvm && go test ./... -run=NoTestHasThisPrefix - .SILENT: go-math-rand-check go-math-rand-check: # check that the insecure math/rand Go package isn't used by production code. @@ -112,12 +106,12 @@ go-math-rand-check: fi .PHONY: code-sanity-check -code-sanity-check: go-math-rand-check emulator-norelic-check +code-sanity-check: go-math-rand-check .PHONY: fuzz-fvm fuzz-fvm: # run fuzz tests in the fvm package - cd ./fvm && go test -fuzz=Fuzz -run ^$$ --tags relic + cd ./fvm && $(CGO_FLAG) go test -fuzz=Fuzz -run ^$$ .PHONY: test test: verify-mocks unittest-main @@ -155,18 +149,18 @@ generate-proto: .PHONY: generate-fvm-env-wrappers generate-fvm-env-wrappers: - go run ./fvm/environment/generate-wrappers fvm/environment/parse_restricted_checker.go + $(CGO_FLAG) go run ./fvm/environment/generate-wrappers fvm/environment/parse_restricted_checker.go .PHONY: generate-mocks generate-mocks: install-mock-generators mockery --name '(Connector|PingInfoProvider)' --dir=network/p2p --case=underscore --output="./network/mocknetwork" --outpkg="mocknetwork" - mockgen -destination=storage/mocks/storage.go -package=mocks github.com/onflow/flow-go/storage Blocks,Headers,Payloads,Collections,Commits,Events,ServiceEvents,TransactionResults - #mockgen -destination=module/mocks/network.go -package=mocks github.com/onflow/flow-go/module Local,Requester - mockgen -destination=network/mocknetwork/mock_network.go -package=mocknetwork github.com/onflow/flow-go/network EngineRegistry + $(CGO_FLAG) mockgen -destination=storage/mocks/storage.go -package=mocks github.com/onflow/flow-go/storage Blocks,Headers,Payloads,Collections,Commits,Events,ServiceEvents,TransactionResults + #$(CGO_FLAG) mockgen -destination=module/mocks/network.go -package=mocks github.com/onflow/flow-go/module Local,Requester + $(CGO_FLAG) mockgen -destination=network/mocknetwork/mock_network.go -package=mocknetwork github.com/onflow/flow-go/network EngineRegistry mockery --name='.*' --dir=integration/benchmark/mocksiface --case=underscore --output="integration/benchmark/mock" --outpkg="mock" mockery --name=ExecutionDataStore --dir=module/executiondatasync/execution_data --case=underscore --output="./module/executiondatasync/execution_data/mock" --outpkg="mock" mockery --name=Downloader --dir=module/executiondatasync/execution_data --case=underscore --output="./module/executiondatasync/execution_data/mock" --outpkg="mock" - mockery --name 'ExecutionDataRequester' --dir=module/state_synchronization --case=underscore --output="./module/state_synchronization/mock" --outpkg="state_synchronization" + mockery --name '(ExecutionDataRequester|IndexReporter)' --dir=module/state_synchronization --case=underscore --output="./module/state_synchronization/mock" --outpkg="state_synchronization" mockery --name 'ExecutionState' --dir=engine/execution/state --case=underscore --output="engine/execution/state/mock" --outpkg="mock" mockery --name 'BlockComputer' --dir=engine/execution/computation/computer --case=underscore --output="engine/execution/computation/computer/mock" --outpkg="mock" mockery --name 'ComputationManager' --dir=engine/execution/computation --case=underscore --output="engine/execution/computation/mock" --outpkg="mock" @@ -177,7 +171,7 @@ generate-mocks: install-mock-generators mockery --name 'ProviderEngine' --dir=engine/execution/provider --case=underscore --output="engine/execution/provider/mock" --outpkg="mock" (cd ./crypto && mockery --name 'PublicKey' --case=underscore --output="../module/mock" --outpkg="mock") mockery --name '.*' --dir=state/cluster --case=underscore --output="state/cluster/mock" --outpkg="mock" - mockery --name '.*' --dir=module --case=underscore --tags="relic" --output="./module/mock" --outpkg="mock" + mockery --name '.*' --dir=module --case=underscore --output="./module/mock" --outpkg="mock" mockery --name '.*' --dir=module/mempool --case=underscore --output="./module/mempool/mock" --outpkg="mempool" mockery --name '.*' --dir=module/component --case=underscore --output="./module/component/mock" --outpkg="component" mockery --name '.*' --dir=network --case=underscore --output="./network/mocknetwork" --outpkg="mocknetwork" @@ -234,12 +228,12 @@ tidy: .PHONY: lint lint: tidy # revive -config revive.toml -exclude storage/ledger/trie ./... - golangci-lint run -v --build-tags relic ./... + golangci-lint run -v ./... .PHONY: fix-lint fix-lint: # revive -config revive.toml -exclude storage/ledger/trie ./... - golangci-lint run -v --build-tags relic --fix ./... + golangci-lint run -v --fix ./... # Runs unit tests with different list of packages as passed by CI so they run in parallel .PHONY: ci @@ -247,7 +241,7 @@ ci: install-tools test # Runs integration tests .PHONY: ci-integration -ci-integration: crypto_setup_gopath +ci-integration: $(MAKE) -C integration ci-integration-test # Runs benchmark tests @@ -269,7 +263,6 @@ docker-ci: # Runs integration tests in Docker (for mac) .PHONY: docker-ci-integration docker-ci-integration: - rm -rf crypto/relic docker run \ --env DOCKER_API_VERSION='1.39' \ --network host \ @@ -282,59 +275,59 @@ docker-ci-integration: .PHONY: docker-build-collection docker-build-collection: - docker build -f cmd/Dockerfile --build-arg TARGET=./cmd/collection --build-arg COMMIT=$(COMMIT) --build-arg VERSION=$(IMAGE_TAG) --build-arg GOARCH=$(GOARCH) --target production \ + docker build -f cmd/Dockerfile --build-arg TARGET=./cmd/collection --build-arg COMMIT=$(COMMIT) --build-arg VERSION=$(IMAGE_TAG) --build-arg GOARCH=$(GOARCH) --build-arg CGO_FLAG=$(CRYPTO_FLAG) --target production \ --secret id=git_creds,env=GITHUB_CREDS --build-arg GOPRIVATE=$(GOPRIVATE) \ --label "git_commit=${COMMIT}" --label "git_tag=${IMAGE_TAG}" \ -t "$(CONTAINER_REGISTRY)/collection:latest" -t "$(CONTAINER_REGISTRY)/collection:$(SHORT_COMMIT)" -t "$(CONTAINER_REGISTRY)/collection:$(IMAGE_TAG)" . .PHONY: docker-build-collection-without-netgo docker-build-collection-without-netgo: - docker build -f cmd/Dockerfile --build-arg TAGS=relic --build-arg TARGET=./cmd/collection --build-arg COMMIT=$(COMMIT) --build-arg VERSION=$(IMAGE_TAG_NO_NETGO) --build-arg GOARCH=$(GOARCH) --target production \ + docker build -f cmd/Dockerfile --build-arg TARGET=./cmd/collection --build-arg COMMIT=$(COMMIT) --build-arg VERSION=$(IMAGE_TAG_NO_NETGO) --build-arg GOARCH=$(GOARCH) --build-arg CGO_FLAG=$(CRYPTO_FLAG) --target production \ --secret id=git_creds,env=GITHUB_CREDS --build-arg GOPRIVATE=$(GOPRIVATE) \ --label "git_commit=${COMMIT}" --label "git_tag=$(IMAGE_TAG_NO_NETGO)" \ -t "$(CONTAINER_REGISTRY)/collection:$(IMAGE_TAG_NO_NETGO)" . .PHONY: docker-build-collection-debug docker-build-collection-debug: - docker build -f cmd/Dockerfile --build-arg TARGET=./cmd/collection --build-arg COMMIT=$(COMMIT) --build-arg VERSION=$(IMAGE_TAG) --build-arg GOARCH=$(GOARCH) --target debug \ + docker build -f cmd/Dockerfile --build-arg TARGET=./cmd/collection --build-arg COMMIT=$(COMMIT) --build-arg VERSION=$(IMAGE_TAG) --build-arg GOARCH=$(GOARCH) --build-arg CGO_FLAG=$(CRYPTO_FLAG) --target debug \ -t "$(CONTAINER_REGISTRY)/collection-debug:latest" -t "$(CONTAINER_REGISTRY)/collection-debug:$(SHORT_COMMIT)" -t "$(CONTAINER_REGISTRY)/collection-debug:$(IMAGE_TAG)" . .PHONY: docker-build-consensus docker-build-consensus: - docker build -f cmd/Dockerfile --build-arg TARGET=./cmd/consensus --build-arg COMMIT=$(COMMIT) --build-arg VERSION=$(IMAGE_TAG) --build-arg GOARCH=$(GOARCH) --target production \ + docker build -f cmd/Dockerfile --build-arg TARGET=./cmd/consensus --build-arg COMMIT=$(COMMIT) --build-arg VERSION=$(IMAGE_TAG) --build-arg GOARCH=$(GOARCH) --build-arg CGO_FLAG=$(CRYPTO_FLAG) --target production \ --secret id=git_creds,env=GITHUB_CREDS --build-arg GOPRIVATE=$(GOPRIVATE) \ --label "git_commit=${COMMIT}" --label "git_tag=${IMAGE_TAG}" \ -t "$(CONTAINER_REGISTRY)/consensus:latest" -t "$(CONTAINER_REGISTRY)/consensus:$(SHORT_COMMIT)" -t "$(CONTAINER_REGISTRY)/consensus:$(IMAGE_TAG)" . .PHONY: docker-build-consensus-without-netgo docker-build-consensus-without-netgo: - docker build -f cmd/Dockerfile --build-arg TAGS=relic --build-arg TARGET=./cmd/consensus --build-arg COMMIT=$(COMMIT) --build-arg VERSION=$(IMAGE_TAG_NO_NETGO) --build-arg GOARCH=$(GOARCH) --target production \ + docker build -f cmd/Dockerfile --build-arg TARGET=./cmd/consensus --build-arg COMMIT=$(COMMIT) --build-arg VERSION=$(IMAGE_TAG_NO_NETGO) --build-arg GOARCH=$(GOARCH) --build-arg CGO_FLAG=$(CRYPTO_FLAG) --target production \ --secret id=git_creds,env=GITHUB_CREDS --build-arg GOPRIVATE=$(GOPRIVATE) \ --label "git_commit=${COMMIT}" --label "git_tag=$(IMAGE_TAG_NO_NETGO)" \ -t "$(CONTAINER_REGISTRY)/consensus:$(IMAGE_TAG_NO_NETGO)" . .PHONY: docker-build-consensus-debug docker-build-consensus-debug: - docker build -f cmd/Dockerfile --build-arg TARGET=./cmd/consensus --build-arg COMMIT=$(COMMIT) --build-arg VERSION=$(IMAGE_TAG) --build-arg GOARCH=$(GOARCH) --target debug \ + docker build -f cmd/Dockerfile --build-arg TARGET=./cmd/consensus --build-arg COMMIT=$(COMMIT) --build-arg VERSION=$(IMAGE_TAG) --build-arg GOARCH=$(GOARCH) --build-arg CGO_FLAG=$(CRYPTO_FLAG) --target debug \ -t "$(CONTAINER_REGISTRY)/consensus-debug:latest" -t "$(CONTAINER_REGISTRY)/consensus-debug:$(SHORT_COMMIT)" -t "$(CONTAINER_REGISTRY)/consensus-debug:$(IMAGE_TAG)" . .PHONY: docker-build-execution docker-build-execution: - docker build -f cmd/Dockerfile --build-arg TARGET=./cmd/execution --build-arg COMMIT=$(COMMIT) --build-arg VERSION=$(IMAGE_TAG) --build-arg GOARCH=$(GOARCH) --target production \ + docker build -f cmd/Dockerfile --build-arg TARGET=./cmd/execution --build-arg COMMIT=$(COMMIT) --build-arg VERSION=$(IMAGE_TAG) --build-arg GOARCH=$(GOARCH) --build-arg CGO_FLAG=$(CRYPTO_FLAG) --target production \ --secret id=git_creds,env=GITHUB_CREDS --build-arg GOPRIVATE=$(GOPRIVATE) \ --label "git_commit=${COMMIT}" --label "git_tag=${IMAGE_TAG}" \ -t "$(CONTAINER_REGISTRY)/execution:latest" -t "$(CONTAINER_REGISTRY)/execution:$(SHORT_COMMIT)" -t "$(CONTAINER_REGISTRY)/execution:$(IMAGE_TAG)" . .PHONY: docker-build-execution-without-netgo docker-build-execution-without-netgo: - docker build -f cmd/Dockerfile --build-arg TAGS=relic --build-arg TARGET=./cmd/execution --build-arg COMMIT=$(COMMIT) --build-arg VERSION=$(IMAGE_TAG_NO_NETGO) --build-arg GOARCH=$(GOARCH) --target production \ + docker build -f cmd/Dockerfile --build-arg TARGET=./cmd/execution --build-arg COMMIT=$(COMMIT) --build-arg VERSION=$(IMAGE_TAG_NO_NETGO) --build-arg GOARCH=$(GOARCH) --build-arg CGO_FLAG=$(CRYPTO_FLAG) --target production \ --secret id=git_creds,env=GITHUB_CREDS --build-arg GOPRIVATE=$(GOPRIVATE) \ --label "git_commit=${COMMIT}" --label "git_tag=$(IMAGE_TAG_NO_NETGO)" \ -t "$(CONTAINER_REGISTRY)/execution:$(IMAGE_TAG_NO_NETGO)" . .PHONY: docker-build-execution-debug docker-build-execution-debug: - docker build -f cmd/Dockerfile --build-arg TARGET=./cmd/execution --build-arg COMMIT=$(COMMIT) --build-arg VERSION=$(IMAGE_TAG) --build-arg GOARCH=$(GOARCH) --target debug \ + docker build -f cmd/Dockerfile --build-arg TARGET=./cmd/execution --build-arg COMMIT=$(COMMIT) --build-arg VERSION=$(IMAGE_TAG) --build-arg GOARCH=$(GOARCH) --build-arg CGO_FLAG=$(CRYPTO_FLAG) --target debug \ -t "$(CONTAINER_REGISTRY)/execution-debug:latest" -t "$(CONTAINER_REGISTRY)/execution-debug:$(SHORT_COMMIT)" -t "$(CONTAINER_REGISTRY)/execution-debug:$(IMAGE_TAG)" . # build corrupt execution node for BFT testing @@ -342,28 +335,28 @@ docker-build-execution-debug: docker-build-execution-corrupt: # temporarily make insecure/ a non-module to allow Docker to use corrupt builders there ./insecure/cmd/mods_override.sh - docker build -f cmd/Dockerfile --build-arg TARGET=./insecure/cmd/execution --build-arg COMMIT=$(COMMIT) --build-arg VERSION=$(IMAGE_TAG) --build-arg GOARCH=$(GOARCH) --target production \ + docker build -f cmd/Dockerfile --build-arg TARGET=./insecure/cmd/execution --build-arg COMMIT=$(COMMIT) --build-arg VERSION=$(IMAGE_TAG) --build-arg GOARCH=$(GOARCH) --build-arg CGO_FLAG=$(CRYPTO_FLAG) --target production \ --label "git_commit=${COMMIT}" --label "git_tag=${IMAGE_TAG}" \ -t "$(CONTAINER_REGISTRY)/execution-corrupted:latest" -t "$(CONTAINER_REGISTRY)/execution-corrupted:$(SHORT_COMMIT)" -t "$(CONTAINER_REGISTRY)/execution-corrupted:$(IMAGE_TAG)" . ./insecure/cmd/mods_restore.sh .PHONY: docker-build-verification docker-build-verification: - docker build -f cmd/Dockerfile --build-arg TARGET=./cmd/verification --build-arg COMMIT=$(COMMIT) --build-arg VERSION=$(IMAGE_TAG) --build-arg GOARCH=$(GOARCH) --target production \ + docker build -f cmd/Dockerfile --build-arg TARGET=./cmd/verification --build-arg COMMIT=$(COMMIT) --build-arg VERSION=$(IMAGE_TAG) --build-arg GOARCH=$(GOARCH) --build-arg CGO_FLAG=$(CRYPTO_FLAG) --target production \ --secret id=git_creds,env=GITHUB_CREDS --build-arg GOPRIVATE=$(GOPRIVATE) \ --label "git_commit=${COMMIT}" --label "git_tag=${IMAGE_TAG}" \ -t "$(CONTAINER_REGISTRY)/verification:latest" -t "$(CONTAINER_REGISTRY)/verification:$(SHORT_COMMIT)" -t "$(CONTAINER_REGISTRY)/verification:$(IMAGE_TAG)" . .PHONY: docker-build-verification-without-netgo docker-build-verification-without-netgo: - docker build -f cmd/Dockerfile --build-arg TAGS=relic --build-arg TARGET=./cmd/verification --build-arg COMMIT=$(COMMIT) --build-arg VERSION=$(IMAGE_TAG_NO_NETGO) --build-arg GOARCH=$(GOARCH) --target production \ + docker build -f cmd/Dockerfile --build-arg TARGET=./cmd/verification --build-arg COMMIT=$(COMMIT) --build-arg VERSION=$(IMAGE_TAG_NO_NETGO) --build-arg GOARCH=$(GOARCH) --build-arg CGO_FLAG=$(CRYPTO_FLAG) --target production \ --secret id=git_creds,env=GITHUB_CREDS --build-arg GOPRIVATE=$(GOPRIVATE) \ --label "git_commit=${COMMIT}" --label "git_tag=$(IMAGE_TAG_NO_NETGO)" \ -t "$(CONTAINER_REGISTRY)/verification:$(IMAGE_TAG_NO_NETGO)" . .PHONY: docker-build-verification-debug docker-build-verification-debug: - docker build -f cmd/Dockerfile --build-arg TARGET=./cmd/verification --build-arg COMMIT=$(COMMIT) --build-arg VERSION=$(IMAGE_TAG) --build-arg GOARCH=$(GOARCH) --target debug \ + docker build -f cmd/Dockerfile --build-arg TARGET=./cmd/verification --build-arg COMMIT=$(COMMIT) --build-arg VERSION=$(IMAGE_TAG) --build-arg GOARCH=$(GOARCH) --build-arg CGO_FLAG=$(CRYPTO_FLAG) --target debug \ -t "$(CONTAINER_REGISTRY)/verification-debug:latest" -t "$(CONTAINER_REGISTRY)/verification-debug:$(SHORT_COMMIT)" -t "$(CONTAINER_REGISTRY)/verification-debug:$(IMAGE_TAG)" . # build corrupt verification node for BFT testing @@ -371,28 +364,28 @@ docker-build-verification-debug: docker-build-verification-corrupt: # temporarily make insecure/ a non-module to allow Docker to use corrupt builders there ./insecure/cmd/mods_override.sh - docker build -f cmd/Dockerfile --build-arg TARGET=./insecure/cmd/verification --build-arg COMMIT=$(COMMIT) --build-arg VERSION=$(IMAGE_TAG) --build-arg GOARCH=$(GOARCH) --target production \ + docker build -f cmd/Dockerfile --build-arg TARGET=./insecure/cmd/verification --build-arg COMMIT=$(COMMIT) --build-arg VERSION=$(IMAGE_TAG) --build-arg GOARCH=$(GOARCH) --build-arg CGO_FLAG=$(CRYPTO_FLAG) --target production \ --label "git_commit=${COMMIT}" --label "git_tag=${IMAGE_TAG}" \ -t "$(CONTAINER_REGISTRY)/verification-corrupted:latest" -t "$(CONTAINER_REGISTRY)/verification-corrupted:$(SHORT_COMMIT)" -t "$(CONTAINER_REGISTRY)/verification-corrupted:$(IMAGE_TAG)" . ./insecure/cmd/mods_restore.sh .PHONY: docker-build-access docker-build-access: - docker build -f cmd/Dockerfile --build-arg TARGET=./cmd/access --build-arg COMMIT=$(COMMIT) --build-arg VERSION=$(IMAGE_TAG) --build-arg GOARCH=$(GOARCH) --target production \ + docker build -f cmd/Dockerfile --build-arg TARGET=./cmd/access --build-arg COMMIT=$(COMMIT) --build-arg VERSION=$(IMAGE_TAG) --build-arg GOARCH=$(GOARCH) --build-arg CGO_FLAG=$(CRYPTO_FLAG) --target production \ --secret id=git_creds,env=GITHUB_CREDS --build-arg GOPRIVATE=$(GOPRIVATE) \ --label "git_commit=${COMMIT}" --label "git_tag=${IMAGE_TAG}" \ -t "$(CONTAINER_REGISTRY)/access:latest" -t "$(CONTAINER_REGISTRY)/access:$(SHORT_COMMIT)" -t "$(CONTAINER_REGISTRY)/access:$(IMAGE_TAG)" . .PHONY: docker-build-access-without-netgo docker-build-access-without-netgo: - docker build -f cmd/Dockerfile --build-arg TAGS=relic --build-arg TARGET=./cmd/access --build-arg COMMIT=$(COMMIT) --build-arg VERSION=$(IMAGE_TAG_NO_NETGO) --build-arg GOARCH=$(GOARCH) --target production \ + docker build -f cmd/Dockerfile --build-arg TARGET=./cmd/access --build-arg COMMIT=$(COMMIT) --build-arg VERSION=$(IMAGE_TAG_NO_NETGO) --build-arg GOARCH=$(GOARCH) --build-arg CGO_FLAG=$(CRYPTO_FLAG) --target production \ --secret id=git_creds,env=GITHUB_CREDS --build-arg GOPRIVATE=$(GOPRIVATE) \ --label "git_commit=${COMMIT}" --label "git_tag=$(IMAGE_TAG_NO_NETGO)" \ -t "$(CONTAINER_REGISTRY)/access:$(IMAGE_TAG_NO_NETGO)" . .PHONY: docker-build-access-debug docker-build-access-debug: - docker build -f cmd/Dockerfile --build-arg TARGET=./cmd/access --build-arg COMMIT=$(COMMIT) --build-arg VERSION=$(IMAGE_TAG) --build-arg GOARCH=$(GOARCH) --target debug \ + docker build -f cmd/Dockerfile --build-arg TARGET=./cmd/access --build-arg COMMIT=$(COMMIT) --build-arg VERSION=$(IMAGE_TAG) --build-arg GOARCH=$(GOARCH) --build-arg CGO_FLAG=$(CRYPTO_FLAG) --target debug \ -t "$(CONTAINER_REGISTRY)/access-debug:latest" -t "$(CONTAINER_REGISTRY)/access-debug:$(SHORT_COMMIT)" -t "$(CONTAINER_REGISTRY)/access-debug:$(IMAGE_TAG)" . # build corrupt access node for BFT testing @@ -400,21 +393,21 @@ docker-build-access-debug: docker-build-access-corrupt: #temporarily make insecure/ a non-module to allow Docker to use corrupt builders there ./insecure/cmd/mods_override.sh - docker build -f cmd/Dockerfile --build-arg TARGET=./insecure/cmd/access --build-arg COMMIT=$(COMMIT) --build-arg VERSION=$(IMAGE_TAG) --build-arg GOARCH=$(GOARCH) --target production \ + docker build -f cmd/Dockerfile --build-arg TARGET=./insecure/cmd/access --build-arg COMMIT=$(COMMIT) --build-arg VERSION=$(IMAGE_TAG) --build-arg GOARCH=$(GOARCH) --build-arg CGO_FLAG=$(CRYPTO_FLAG) --target production \ --label "git_commit=${COMMIT}" --label "git_tag=${IMAGE_TAG}" \ -t "$(CONTAINER_REGISTRY)/access-corrupted:latest" -t "$(CONTAINER_REGISTRY)/access-corrupted:$(SHORT_COMMIT)" -t "$(CONTAINER_REGISTRY)/access-corrupted:$(IMAGE_TAG)" . ./insecure/cmd/mods_restore.sh .PHONY: docker-build-observer docker-build-observer: - docker build -f cmd/Dockerfile --build-arg TARGET=./cmd/observer --build-arg COMMIT=$(COMMIT) --build-arg VERSION=$(IMAGE_TAG) --build-arg GOARCH=$(GOARCH) --target production \ + docker build -f cmd/Dockerfile --build-arg TARGET=./cmd/observer --build-arg COMMIT=$(COMMIT) --build-arg VERSION=$(IMAGE_TAG) --build-arg GOARCH=$(GOARCH) --build-arg CGO_FLAG=$(CRYPTO_FLAG) --target production \ --secret id=git_creds,env=GITHUB_CREDS --build-arg GOPRIVATE=$(GOPRIVATE) \ --label "git_commit=${COMMIT}" --label "git_tag=${IMAGE_TAG}" \ -t "$(CONTAINER_REGISTRY)/observer:latest" -t "$(CONTAINER_REGISTRY)/observer:$(SHORT_COMMIT)" -t "$(CONTAINER_REGISTRY)/observer:$(IMAGE_TAG)" . .PHONY: docker-build-observer-without-netgo docker-build-observer-without-netgo: - docker build -f cmd/Dockerfile --build-arg TAGS=relic --build-arg TARGET=./cmd/observer --build-arg COMMIT=$(COMMIT) --build-arg VERSION=$(IMAGE_TAG_NO_NETGO) --build-arg GOARCH=$(GOARCH) --target production \ + docker build -f cmd/Dockerfile --build-arg TARGET=./cmd/observer --build-arg COMMIT=$(COMMIT) --build-arg VERSION=$(IMAGE_TAG_NO_NETGO) --build-arg GOARCH=$(GOARCH) --build-arg CGO_FLAG=$(CRYPTO_FLAG) --target production \ --secret id=git_creds,env=GITHUB_CREDS --build-arg GOPRIVATE=$(GOPRIVATE) \ --label "git_commit=${COMMIT}" --label "git_tag=$(IMAGE_TAG_NO_NETGO)" \ -t "$(CONTAINER_REGISTRY)/observer:$(IMAGE_TAG_NO_NETGO)" . @@ -422,18 +415,18 @@ docker-build-observer-without-netgo: .PHONY: docker-build-ghost docker-build-ghost: - docker build -f cmd/Dockerfile --build-arg TARGET=./cmd/ghost --build-arg COMMIT=$(COMMIT) --build-arg VERSION=$(IMAGE_TAG) --build-arg GOARCH=$(GOARCH) --target production \ + docker build -f cmd/Dockerfile --build-arg TARGET=./cmd/ghost --build-arg COMMIT=$(COMMIT) --build-arg VERSION=$(IMAGE_TAG) --build-arg GOARCH=$(GOARCH) --build-arg CGO_FLAG=$(CRYPTO_FLAG) --target production \ --label "git_commit=${COMMIT}" --label "git_tag=${IMAGE_TAG}" \ -t "$(CONTAINER_REGISTRY)/ghost:latest" -t "$(CONTAINER_REGISTRY)/ghost:$(SHORT_COMMIT)" -t "$(CONTAINER_REGISTRY)/ghost:$(IMAGE_TAG)" . .PHONY: docker-build-ghost-debug docker-build-ghost-debug: - docker build -f cmd/Dockerfile --build-arg TARGET=./cmd/ghost --build-arg COMMIT=$(COMMIT) --build-arg VERSION=$(IMAGE_TAG) --build-arg GOARCH=$(GOARCH) --target debug \ + docker build -f cmd/Dockerfile --build-arg TARGET=./cmd/ghost --build-arg COMMIT=$(COMMIT) --build-arg VERSION=$(IMAGE_TAG) --build-arg GOARCH=$(GOARCH) --build-arg CGO_FLAG=$(CRYPTO_FLAG) --target debug \ -t "$(CONTAINER_REGISTRY)/ghost-debug:latest" -t "$(CONTAINER_REGISTRY)/ghost-debug:$(SHORT_COMMIT)" -t "$(CONTAINER_REGISTRY)/ghost-debug:$(IMAGE_TAG)" . PHONY: docker-build-bootstrap docker-build-bootstrap: - docker build -f cmd/Dockerfile --build-arg TARGET=./cmd/bootstrap --build-arg GOARCH=$(GOARCH) --build-arg VERSION=$(IMAGE_TAG) --target production \ + docker build -f cmd/Dockerfile --build-arg TARGET=./cmd/bootstrap --build-arg GOARCH=$(GOARCH) --build-arg VERSION=$(IMAGE_TAG) --build-arg CGO_FLAG=$(CRYPTO_FLAG) --target production \ --label "git_commit=${COMMIT}" --label "git_tag=${IMAGE_TAG}" \ -t "$(CONTAINER_REGISTRY)/bootstrap:latest" -t "$(CONTAINER_REGISTRY)/bootstrap:$(SHORT_COMMIT)" -t "$(CONTAINER_REGISTRY)/bootstrap:$(IMAGE_TAG)" . @@ -443,7 +436,7 @@ tool-bootstrap: docker-build-bootstrap .PHONY: docker-build-bootstrap-transit docker-build-bootstrap-transit: - docker build -f cmd/Dockerfile --build-arg TARGET=./cmd/bootstrap/transit --build-arg COMMIT=$(COMMIT) --build-arg VERSION=$(VERSION) --build-arg GOARCH=$(GOARCH) --no-cache \ + docker build -f cmd/Dockerfile --build-arg TARGET=./cmd/bootstrap/transit --build-arg COMMIT=$(COMMIT) --build-arg VERSION=$(VERSION) --build-arg GOARCH=$(GOARCH) --build-arg CGO_FLAG=$(CRYPTO_FLAG) --no-cache \ --target production \ -t "$(CONTAINER_REGISTRY)/bootstrap-transit:latest" -t "$(CONTAINER_REGISTRY)/bootstrap-transit:$(SHORT_COMMIT)" -t "$(CONTAINER_REGISTRY)/bootstrap-transit:$(IMAGE_TAG)" . @@ -453,7 +446,7 @@ tool-transit: docker-build-bootstrap-transit .PHONY: docker-build-loader docker-build-loader: - docker build -f ./integration/benchmark/cmd/manual/Dockerfile --build-arg TARGET=./benchmark/cmd/manual --target production \ + docker build -f ./integration/benchmark/cmd/manual/Dockerfile --build-arg TARGET=./benchmark/cmd/manual --build-arg CGO_FLAG=$(CRYPTO_FLAG) --target production \ --label "git_commit=${COMMIT}" --label "git_tag=${IMAGE_TAG}" \ -t "$(CONTAINER_REGISTRY)/loader:latest" -t "$(CONTAINER_REGISTRY)/loader:$(SHORT_COMMIT)" -t "$(CONTAINER_REGISTRY)/loader:$(IMAGE_TAG)" . @@ -633,7 +626,7 @@ docker-all-tools: tool-util tool-remove-execution-fork PHONY: docker-build-util docker-build-util: - docker build -f cmd/Dockerfile --build-arg TARGET=./cmd/util --build-arg GOARCH=$(GOARCH) --build-arg VERSION=$(IMAGE_TAG) --target production \ + docker build -f cmd/Dockerfile --build-arg TARGET=./cmd/util --build-arg GOARCH=$(GOARCH) --build-arg VERSION=$(IMAGE_TAG) --build-arg CGO_FLAG=$(CRYPTO_FLAG) --target production \ -t "$(CONTAINER_REGISTRY)/util:latest" -t "$(CONTAINER_REGISTRY)/util:$(SHORT_COMMIT)" -t "$(CONTAINER_REGISTRY)/util:$(IMAGE_TAG)" . PHONY: tool-util @@ -642,7 +635,7 @@ tool-util: docker-build-util PHONY: docker-build-remove-execution-fork docker-build-remove-execution-fork: - docker build -f cmd/Dockerfile --ssh default --build-arg TARGET=./cmd/util/cmd/remove-execution-fork --build-arg GOARCH=$(GOARCH) --build-arg VERSION=$(IMAGE_TAG) --target production \ + docker build -f cmd/Dockerfile --ssh default --build-arg TARGET=./cmd/util/cmd/remove-execution-fork --build-arg GOARCH=$(GOARCH) --build-arg VERSION=$(IMAGE_TAG) --build-arg CGO_FLAG=$(CRYPTO_FLAG) --target production \ -t "$(CONTAINER_REGISTRY)/remove-execution-fork:latest" -t "$(CONTAINER_REGISTRY)/remove-execution-fork:$(SHORT_COMMIT)" -t "$(CONTAINER_REGISTRY)/remove-execution-fork:$(IMAGE_TAG)" . PHONY: tool-remove-execution-fork diff --git a/README.md b/README.md index 39bd7a13e3e..291e45de347 100644 --- a/README.md +++ b/README.md @@ -55,7 +55,6 @@ The following table lists all work streams and links to their home directory and - Clone this repository - Install [Go](https://golang.org/doc/install) (Flow supports Go 1.18 and later) -- Install [CMake](https://cmake.org/install/), which is used for building the crypto library - Install [Docker](https://docs.docker.com/get-docker/), which is used for running a local network and integration tests - Make sure the [`GOPATH`](https://golang.org/cmd/go/#hdr-GOPATH_environment_variable) and `GOBIN` environment variables are set, and `GOBIN` is added to your path: @@ -75,12 +74,6 @@ The following table lists all work streams and links to their home directory and At this point, you should be ready to build, test, and run Flow! 🎉 -Note: Whenever the crypto module version imported by "go.mod" is updated to a version that was never locally imported before, the crypto dependency needs to be set-up. If not, you should notice errors about "relic" or "crypto". Run the following command to set-up the new module version: - -```bash -make crypto_setup_gopath -``` - ## Development Workflow ### Testing diff --git a/SECURITY.md b/SECURITY.md index 6b370e9060b..2a38679616c 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -9,4 +9,4 @@ If you care about making a difference, please follow the guidelines below. # **Guidelines For Responsible Disclosure** -We ask that all researchers adhere to these guidelines [here](https://docs.onflow.org/bounties/responsible-disclosure/) +We ask that all researchers adhere to these guidelines [here](https://flow.com/flow-responsible-disclosure) diff --git a/access/api.go b/access/api.go index 2086224e6c4..9f878a752ee 100644 --- a/access/api.go +++ b/access/api.go @@ -32,6 +32,8 @@ type API interface { GetTransactionResult(ctx context.Context, id flow.Identifier, blockID flow.Identifier, collectionID flow.Identifier, requiredEventEncodingVersion entities.EventEncodingVersion) (*TransactionResult, error) GetTransactionResultByIndex(ctx context.Context, blockID flow.Identifier, index uint32, requiredEventEncodingVersion entities.EventEncodingVersion) (*TransactionResult, error) GetTransactionResultsByBlockID(ctx context.Context, blockID flow.Identifier, requiredEventEncodingVersion entities.EventEncodingVersion) ([]*TransactionResult, error) + GetSystemTransaction(ctx context.Context, blockID flow.Identifier) (*flow.TransactionBody, error) + GetSystemTransactionResult(ctx context.Context, blockID flow.Identifier, requiredEventEncodingVersion entities.EventEncodingVersion) (*TransactionResult, error) GetAccount(ctx context.Context, address flow.Address) (*flow.Account, error) GetAccountAtLatestBlock(ctx context.Context, address flow.Address) (*flow.Account, error) diff --git a/access/handler.go b/access/handler.go index d9af3ea4720..4d12a4e5e8e 100644 --- a/access/handler.go +++ b/access/handler.go @@ -24,6 +24,16 @@ type Handler struct { me module.Local } +// TODO: this is implemented in https://github.com/onflow/flow-go/pull/4957, remove when merged +func (h *Handler) GetProtocolStateSnapshotByBlockID(ctx context.Context, request *access.GetProtocolStateSnapshotByBlockIDRequest) (*access.ProtocolStateSnapshotResponse, error) { + panic("implement me") +} + +// TODO: this is implemented in https://github.com/onflow/flow-go/pull/4957, remove when merged +func (h *Handler) GetProtocolStateSnapshotByHeight(ctx context.Context, request *access.GetProtocolStateSnapshotByHeightRequest) (*access.ProtocolStateSnapshotResponse, error) { + panic("implement me") +} + // HandlerOption is used to hand over optional constructor parameters type HandlerOption func(*Handler) @@ -310,6 +320,50 @@ func (h *Handler) GetTransactionResultsByBlockID( return message, nil } +func (h *Handler) GetSystemTransaction( + ctx context.Context, + req *access.GetSystemTransactionRequest, +) (*access.TransactionResponse, error) { + metadata := h.buildMetadataResponse() + + id, err := convert.BlockID(req.GetBlockId()) + if err != nil { + return nil, status.Errorf(codes.InvalidArgument, "invalid block id: %v", err) + } + + tx, err := h.api.GetSystemTransaction(ctx, id) + if err != nil { + return nil, err + } + + return &access.TransactionResponse{ + Transaction: convert.TransactionToMessage(*tx), + Metadata: metadata, + }, nil +} + +func (h *Handler) GetSystemTransactionResult( + ctx context.Context, + req *access.GetSystemTransactionResultRequest, +) (*access.TransactionResultResponse, error) { + metadata := h.buildMetadataResponse() + + id, err := convert.BlockID(req.GetBlockId()) + if err != nil { + return nil, status.Errorf(codes.InvalidArgument, "invalid block id: %v", err) + } + + result, err := h.api.GetSystemTransactionResult(ctx, id, req.GetEventEncodingVersion()) + if err != nil { + return nil, err + } + + message := TransactionResultToMessage(result) + message.Metadata = metadata + + return message, nil +} + func (h *Handler) GetTransactionsByBlockID( ctx context.Context, req *access.GetTransactionsByBlockIDRequest, diff --git a/access/mock/api.go b/access/mock/api.go index ca8439b299b..44b526f3d08 100644 --- a/access/mock/api.go +++ b/access/mock/api.go @@ -569,6 +569,58 @@ func (_m *API) GetNodeVersionInfo(ctx context.Context) (*access.NodeVersionInfo, return r0, r1 } +// GetSystemTransaction provides a mock function with given fields: ctx, blockID +func (_m *API) GetSystemTransaction(ctx context.Context, blockID flow.Identifier) (*flow.TransactionBody, error) { + ret := _m.Called(ctx, blockID) + + var r0 *flow.TransactionBody + var r1 error + if rf, ok := ret.Get(0).(func(context.Context, flow.Identifier) (*flow.TransactionBody, error)); ok { + return rf(ctx, blockID) + } + if rf, ok := ret.Get(0).(func(context.Context, flow.Identifier) *flow.TransactionBody); ok { + r0 = rf(ctx, blockID) + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).(*flow.TransactionBody) + } + } + + if rf, ok := ret.Get(1).(func(context.Context, flow.Identifier) error); ok { + r1 = rf(ctx, blockID) + } else { + r1 = ret.Error(1) + } + + return r0, r1 +} + +// GetSystemTransactionResult provides a mock function with given fields: ctx, blockID, requiredEventEncodingVersion +func (_m *API) GetSystemTransactionResult(ctx context.Context, blockID flow.Identifier, requiredEventEncodingVersion entities.EventEncodingVersion) (*access.TransactionResult, error) { + ret := _m.Called(ctx, blockID, requiredEventEncodingVersion) + + var r0 *access.TransactionResult + var r1 error + if rf, ok := ret.Get(0).(func(context.Context, flow.Identifier, entities.EventEncodingVersion) (*access.TransactionResult, error)); ok { + return rf(ctx, blockID, requiredEventEncodingVersion) + } + if rf, ok := ret.Get(0).(func(context.Context, flow.Identifier, entities.EventEncodingVersion) *access.TransactionResult); ok { + r0 = rf(ctx, blockID, requiredEventEncodingVersion) + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).(*access.TransactionResult) + } + } + + if rf, ok := ret.Get(1).(func(context.Context, flow.Identifier, entities.EventEncodingVersion) error); ok { + r1 = rf(ctx, blockID, requiredEventEncodingVersion) + } else { + r1 = ret.Error(1) + } + + return r0, r1 +} + // GetTransaction provides a mock function with given fields: ctx, id func (_m *API) GetTransaction(ctx context.Context, id flow.Identifier) (*flow.TransactionBody, error) { ret := _m.Called(ctx, id) diff --git a/cmd/Dockerfile b/cmd/Dockerfile index d9d7800546c..5f72b5c1c48 100644 --- a/cmd/Dockerfile +++ b/cmd/Dockerfile @@ -6,7 +6,7 @@ FROM golang:1.20-bullseye AS build-setup RUN apt-get update -RUN apt-get -y install cmake zip +RUN apt-get -y install zip ## (2) Setup crypto dependencies FROM build-setup AS build-env @@ -25,8 +25,7 @@ COPY . . RUN --mount=type=cache,sharing=locked,target=/go/pkg/mod \ --mount=type=cache,target=/root/.cache/go-build \ - --mount=type=secret,id=git_creds,dst=/root/.netrc \ - make crypto_setup_gopath + --mount=type=secret,id=git_creds,dst=/root/.netrc #################################### ## (3) Build the production app binary @@ -36,14 +35,16 @@ WORKDIR /app ARG GOARCH=amd64 # TAGS can be overriden to modify the go build tags (e.g. build without netgo) -ARG TAGS="relic,netgo" +ARG TAGS="netgo" +# CGO_FLAG can be overwritten +ARG CGO_FLAG # Keep Go's build cache between builds. # https://github.com/golang/go/issues/27719#issuecomment-514747274 RUN --mount=type=cache,sharing=locked,target=/go/pkg/mod \ --mount=type=cache,target=/root/.cache/go-build \ --mount=type=secret,id=git_creds,dst=/root/.netrc \ - CGO_ENABLED=1 GOOS=linux go build --tags "${TAGS}" -ldflags "-extldflags -static \ + CGO_ENABLED=1 GOOS=linux CGO_FLAGS="${CGO_FLAG}" go build --tags "${TAGS}" -ldflags "-extldflags -static \ -X 'github.com/onflow/flow-go/cmd/build.commit=${COMMIT}' -X 'github.com/onflow/flow-go/cmd/build.semver=${VERSION}'" \ -o ./app ${TARGET} @@ -64,7 +65,7 @@ ARG GOARCH=amd64 RUN --mount=type=ssh \ --mount=type=cache,sharing=locked,target=/go/pkg/mod \ --mount=type=cache,target=/root/.cache/go-build \ - CGO_ENABLED=1 GOOS=linux go build --tags "relic,netgo" -ldflags "-extldflags -static \ + CGO_ENABLED=1 GOOS=linux CGO_FLAGS="${CGO_FLAG}" go build --tags "netgo" -ldflags "-extldflags -static \ -X 'github.com/onflow/flow-go/cmd/build.commit=${COMMIT}' -X 'github.com/onflow/flow-go/cmd/build.semver=${VERSION}'" \ -gcflags="all=-N -l" -o ./app ${TARGET} diff --git a/cmd/access/node_builder/access_node_builder.go b/cmd/access/node_builder/access_node_builder.go index 3b5006fc142..d9df1140d09 100644 --- a/cmd/access/node_builder/access_node_builder.go +++ b/cmd/access/node_builder/access_node_builder.go @@ -143,9 +143,11 @@ type AccessNodeConfig struct { executionDataConfig edrequester.ExecutionDataConfig PublicNetworkConfig PublicNetworkConfig TxResultCacheSize uint + TxErrorMessagesCacheSize uint executionDataIndexingEnabled bool registersDBPath string checkpointFile string + scriptExecutorConfig query.QueryConfig } type PublicNetworkConfig struct { @@ -201,6 +203,7 @@ func DefaultAccessNodeConfig() *AccessNodeConfig { EventFilterConfig: state_stream.DefaultEventFilterConfig, ResponseLimit: state_stream.DefaultResponseLimit, HeartbeatInterval: state_stream.DefaultHeartbeatInterval, + RegisterIDsRequestLimit: state_stream.DefaultRegisterIDsRequestLimit, }, stateStreamFilterConf: nil, ExecutionNodeAddress: "localhost:9000", @@ -214,6 +217,7 @@ func DefaultAccessNodeConfig() *AccessNodeConfig { apiRatelimits: nil, apiBurstlimits: nil, TxResultCacheSize: 0, + TxErrorMessagesCacheSize: 1000, PublicNetworkConfig: PublicNetworkConfig{ BindAddress: cmd.NotSet, Metrics: metrics.NewNoopCollector(), @@ -232,6 +236,7 @@ func DefaultAccessNodeConfig() *AccessNodeConfig { executionDataIndexingEnabled: false, registersDBPath: filepath.Join(homedir, ".flow", "execution_state"), checkpointFile: cmd.NotSet, + scriptExecutorConfig: query.NewDefaultConfig(), } } @@ -268,6 +273,7 @@ type FlowAccessNodeBuilder struct { ExecutionIndexer *indexer.Indexer ExecutionIndexerCore *indexer.IndexerCore ScriptExecutor *backend.ScriptExecutor + RegistersAsyncStore *execution.RegistersAsyncStore // The sync engine participants provider is the libp2p peer store for the access node // which is not available until after the network has started. @@ -711,14 +717,14 @@ func (builder *FlowAccessNodeBuilder) BuildExecutionSyncComponents() *FlowAccess checkpointHeight := builder.SealedRootBlock.Header.Height - bootstrap, err := pStorage.NewRegisterBootstrap(pdb, checkpointFile, checkpointHeight, builder.Logger) + buutstrap, err := pStorage.NewRegisterBootstrap(pdb, checkpointFile, checkpointHeight, builder.Logger) if err != nil { return nil, fmt.Errorf("could not create registers bootstrapper: %w", err) } // TODO: find a way to hook a context up to this to allow a graceful shutdown workerCount := 10 - err = bootstrap.IndexCheckpointFile(context.Background(), workerCount) + err = buutstrap.IndexCheckpointFile(context.Background(), workerCount) if err != nil { return nil, fmt.Errorf("could not load checkpoint file: %w", err) } @@ -759,6 +765,11 @@ func (builder *FlowAccessNodeBuilder) BuildExecutionSyncComponents() *FlowAccess return nil, err } + err = builder.RegistersAsyncStore.InitDataAvailable(registers) + if err != nil { + return nil, err + } + // setup requester to notify indexer when new execution data is received execDataDistributor.AddOnExecutionDataReceivedConsumer(builder.ExecutionIndexer.OnExecutionData) @@ -771,6 +782,7 @@ func (builder *FlowAccessNodeBuilder) BuildExecutionSyncComponents() *FlowAccess query.NewProtocolStateWrapper(builder.State), builder.Storage.Headers, builder.ExecutionIndexerCore.RegisterValue, + builder.scriptExecutorConfig, ) if err != nil { return nil, err @@ -813,7 +825,8 @@ func (builder *FlowAccessNodeBuilder) BuildExecutionSyncComponents() *FlowAccess executionDataStoreCache, broadcaster, builder.executionDataConfig.InitialBlockHeight, - highestAvailableHeight) + highestAvailableHeight, + builder.RegistersAsyncStore) if err != nil { return nil, fmt.Errorf("could not create state stream backend: %w", err) } @@ -893,6 +906,7 @@ func (builder *FlowAccessNodeBuilder) extraFlags() { flags.BoolVar(&builder.retryEnabled, "retry-enabled", defaultConfig.retryEnabled, "whether to enable the retry mechanism at the access node level") flags.BoolVar(&builder.rpcMetricsEnabled, "rpc-metrics-enabled", defaultConfig.rpcMetricsEnabled, "whether to enable the rpc metrics") flags.UintVar(&builder.TxResultCacheSize, "transaction-result-cache-size", defaultConfig.TxResultCacheSize, "transaction result cache size.(Disabled by default i.e 0)") + flags.UintVar(&builder.TxErrorMessagesCacheSize, "transaction-error-messages-cache-size", defaultConfig.TxErrorMessagesCacheSize, "transaction error messages cache size.(By default 1000)") flags.StringVarP(&builder.nodeInfoFile, "node-info-file", "", defaultConfig.nodeInfoFile, "full path to a json file which provides more details about nodes when reporting its reachability metrics") flags.StringToIntVar(&builder.apiRatelimits, "api-rate-limits", defaultConfig.apiRatelimits, "per second rate limits for Access API methods e.g. Ping=300,GetTransaction=500 etc.") flags.StringToIntVar(&builder.apiBurstlimits, "api-burst-limits", defaultConfig.apiBurstlimits, "burst limits for Access API methods e.g. Ping=100,GetTransaction=100 etc.") @@ -921,6 +935,7 @@ func (builder *FlowAccessNodeBuilder) extraFlags() { flags.UintVar(&builder.stateStreamConf.ClientSendBufferSize, "state-stream-send-buffer-size", defaultConfig.stateStreamConf.ClientSendBufferSize, "maximum number of responses to buffer within a stream") flags.Float64Var(&builder.stateStreamConf.ResponseLimit, "state-stream-response-limit", defaultConfig.stateStreamConf.ResponseLimit, "max number of responses per second to send over streaming endpoints. this helps manage resources consumed by each client querying data not in the cache e.g. 3 or 0.5. 0 means no limit") flags.Uint64Var(&builder.stateStreamConf.HeartbeatInterval, "state-stream-heartbeat-interval", defaultConfig.stateStreamConf.HeartbeatInterval, "default interval in blocks at which heartbeat messages should be sent. applied when client did not specify a value.") + flags.Uint32Var(&builder.stateStreamConf.RegisterIDsRequestLimit, "state-stream-max-register-values", defaultConfig.stateStreamConf.RegisterIDsRequestLimit, "maximum number of register ids to include in a single request to the GetRegisters endpoint") // Execution Data Indexer flags.BoolVar(&builder.executionDataIndexingEnabled, "execution-data-indexing-enabled", defaultConfig.executionDataIndexingEnabled, "whether to enable the execution data indexing") @@ -929,6 +944,11 @@ func (builder *FlowAccessNodeBuilder) extraFlags() { // Script Execution flags.StringVar(&builder.rpcConf.BackendConfig.ScriptExecutionMode, "script-execution-mode", defaultConfig.rpcConf.BackendConfig.ScriptExecutionMode, "mode to use when executing scripts. one of (local-only, execution-nodes-only, failover, compare)") + flags.Uint64Var(&builder.scriptExecutorConfig.ComputationLimit, "script-execution-computation-limit", defaultConfig.scriptExecutorConfig.ComputationLimit, "maximum number of computation units a locally executed script can use. default: 100000") + flags.IntVar(&builder.scriptExecutorConfig.MaxErrorMessageSize, "script-execution-max-error-length", defaultConfig.scriptExecutorConfig.MaxErrorMessageSize, "maximum number characters to include in error message strings. additional characters are truncated. default: 1000") + flags.DurationVar(&builder.scriptExecutorConfig.LogTimeThreshold, "script-execution-log-time-threshold", defaultConfig.scriptExecutorConfig.LogTimeThreshold, "emit a log for any scripts that take over this threshold. default: 1s") + flags.DurationVar(&builder.scriptExecutorConfig.ExecutionTimeLimit, "script-execution-timeout", defaultConfig.scriptExecutorConfig.ExecutionTimeLimit, "timeout value for locally executed scripts. default: 10s") + }).ValidateFlags(func() error { if builder.supportsObserver && (builder.PublicNetworkConfig.BindAddress == cmd.NotSet || builder.PublicNetworkConfig.BindAddress == "") { return errors.New("public-network-address must be set if supports-observer is true") @@ -973,6 +993,9 @@ func (builder *FlowAccessNodeBuilder) extraFlags() { if builder.stateStreamConf.ResponseLimit < 0 { return errors.New("state-stream-response-limit must be greater than or equal to 0") } + if builder.stateStreamConf.RegisterIDsRequestLimit <= 0 { + return errors.New("state-stream-max-register-values must be greater than 0") + } } if builder.rpcConf.BackendConfig.CircuitBreakerConfig.Enabled { if builder.rpcConf.BackendConfig.CircuitBreakerConfig.MaxFailures == 0 { @@ -985,6 +1008,9 @@ func (builder *FlowAccessNodeBuilder) extraFlags() { return errors.New("circuit-breaker-restore-timeout must be greater than 0") } } + if builder.TxErrorMessagesCacheSize == 0 { + return errors.New("transaction-error-messages-cache-size must be greater than 0") + } return nil }) @@ -1236,6 +1262,10 @@ func (builder *FlowAccessNodeBuilder) Build() (cmd.Node, error) { builder.ScriptExecutor = backend.NewScriptExecutor() return nil }). + Module("async register store", func(node *cmd.NodeConfig) error { + builder.RegistersAsyncStore = execution.NewRegistersAsyncStore() + return nil + }). Component("RPC engine", func(node *cmd.NodeConfig) (module.ReadyDoneAware, error) { config := builder.rpcConf backendConfig := config.BackendConfig @@ -1294,6 +1324,7 @@ func (builder *FlowAccessNodeBuilder) Build() (cmd.Node, error) { SnapshotHistoryLimit: backend.DefaultSnapshotHistoryLimit, Communicator: backend.NewNodeCommunicator(backendConfig.CircuitBreakerConfig.Enabled), TxResultCacheSize: builder.TxResultCacheSize, + TxErrorMessagesCacheSize: builder.TxErrorMessagesCacheSize, ScriptExecutor: builder.ScriptExecutor, ScriptExecutionMode: scriptExecMode, }) @@ -1545,17 +1576,19 @@ func (builder *FlowAccessNodeBuilder) initPublicLibp2pNode(networkKey crypto.Pri } meshTracer := tracer.NewGossipSubMeshTracer(meshTracerCfg) - libp2pNode, err := p2pbuilder.NewNodeBuilder(builder.Logger, &p2pconfig.MetricsConfig{ - HeroCacheFactory: builder.HeroCacheMetricsFactory(), - Metrics: networkMetrics, - }, + libp2pNode, err := p2pbuilder.NewNodeBuilder(builder.Logger, + &p2pconfig.MetricsConfig{ + HeroCacheFactory: builder.HeroCacheMetricsFactory(), + Metrics: networkMetrics, + }, network.PublicNetwork, bindAddress, networkKey, builder.SporkID, builder.IdentityProvider, + builder.FlowConfig.NetworkConfig.GossipSubConfig.GossipSubScoringRegistryConfig, &builder.FlowConfig.NetworkConfig.ResourceManager, - &builder.FlowConfig.NetworkConfig.GossipSubConfig.GossipSubRPCInspectorsConfig, + &builder.FlowConfig.NetworkConfig.GossipSubConfig, &p2pconfig.PeerManagerConfig{ // TODO: eventually, we need pruning enabled even on public network. However, it needs a modified version of // the peer manager that also operate on the public identities. diff --git a/cmd/bootstrap/README.md b/cmd/bootstrap/README.md index 14339cc91ac..6b138946ca1 100644 --- a/cmd/bootstrap/README.md +++ b/cmd/bootstrap/README.md @@ -46,7 +46,7 @@ _Each cluster_ of collector nodes needs to have its own root Block and root QC # Usage -`go run -tags relic ./cmd/bootstrap` prints usage information +`go run ./cmd/bootstrap` prints usage information ## Phase 1: Generate networking and staking keys for partner nodes: @@ -65,7 +65,7 @@ If seeds are not provided, the CLI will try to use the system's pseudo-random nu #### Example ```bash -go run -tags relic ./cmd/bootstrap key --address "example.com:1234" --role "consensus" -o ./bootstrap/partner-node-infos +go run ./cmd/bootstrap key --address "example.com:1234" --role "consensus" -o ./bootstrap/partner-node-infos ``` #### Generated output files @@ -97,7 +97,7 @@ Each input is a config file specified as a command line parameter: #### Example ```bash -go run -tags relic ./cmd/bootstrap finalize \ +go run ./cmd/bootstrap finalize \ --root-chain main \ --root-height 0 \ --root-parent 0000000000000000000000000000000000000000000000000000000000000000 \ @@ -152,7 +152,7 @@ go run -tags relic ./cmd/bootstrap finalize \ This generates the networking key used by observers to connect to the public libp2p network. It is a different key format than staked nodes and should only be used for Observers. ```bash -go run -tags relic ./cmd/bootstrap observer-network-key -f ./path/network-key +go run ./cmd/bootstrap observer-network-key -f ./path/network-key ``` This key must be kept secret as it's used to encrypt and sign network requests sent by the observers. diff --git a/cmd/bootstrap/cmd/dkg.go b/cmd/bootstrap/cmd/dkg.go index d7069534e64..f87cbde2492 100644 --- a/cmd/bootstrap/cmd/dkg.go +++ b/cmd/bootstrap/cmd/dkg.go @@ -19,7 +19,7 @@ func runBeaconKG(nodes []model.NodeInfo) dkg.DKGData { log.Debug().Msgf("will run DKG") var dkgData dkg.DKGData var err error - dkgData, err = bootstrapDKG.RandomBeaconKG(n, GenerateRandomSeed(crypto.SeedMinLenDKG)) + dkgData, err = bootstrapDKG.RandomBeaconKG(n, GenerateRandomSeed(crypto.KeyGenSeedMinLen)) if err != nil { log.Fatal().Err(err).Msg("error running DKG") } diff --git a/cmd/bootstrap/cmd/genconfig.go b/cmd/bootstrap/cmd/genconfig.go index 404bd5e873e..ccf66104ecc 100644 --- a/cmd/bootstrap/cmd/genconfig.go +++ b/cmd/bootstrap/cmd/genconfig.go @@ -63,7 +63,7 @@ func genconfigCmdRun(_ *cobra.Command, _ []string) { var genconfigCmd = &cobra.Command{ Use: "genconfig", Short: "Generate node-config.json", - Long: "example: go run -tags relic ./cmd/bootstrap genconfig --address-format \"%s-%03d.devnet19.nodes.onflow.org:3569\" --access 2 --collection 3 --consensus 3 --execution 2 --verification 1 --weight 100", + Long: "example: go run ./cmd/bootstrap genconfig --address-format \"%s-%03d.devnet19.nodes.onflow.org:3569\" --access 2 --collection 3 --consensus 3 --execution 2 --verification 1 --weight 100", Run: genconfigCmdRun, } diff --git a/cmd/bootstrap/dkg/dkg_test.go b/cmd/bootstrap/dkg/dkg_test.go index a5d5a56de18..fb92aad0ee0 100644 --- a/cmd/bootstrap/dkg/dkg_test.go +++ b/cmd/bootstrap/dkg/dkg_test.go @@ -10,7 +10,7 @@ import ( ) func TestBeaconKG(t *testing.T) { - seed := unittest.SeedFixture(2 * crypto.SeedMinLenDKG) + seed := unittest.SeedFixture(2 * crypto.KeyGenSeedMinLen) // n = 0 _, err := RandomBeaconKG(0, seed) diff --git a/cmd/bootstrap/run/qc_test.go b/cmd/bootstrap/run/qc_test.go index 329ca9da171..58dd04a6ac4 100644 --- a/cmd/bootstrap/run/qc_test.go +++ b/cmd/bootstrap/run/qc_test.go @@ -50,7 +50,7 @@ func createSignerData(t *testing.T, n int) *ParticipantData { networkingKeys := unittest.NetworkingKeys(n) stakingKeys := unittest.StakingKeys(n) - seed := make([]byte, crypto.SeedMinLenDKG) + seed := make([]byte, crypto.KeyGenSeedMinLen) _, err := rand.Read(seed) require.NoError(t, err) randomBSKs, randomBPKs, groupKey, err := crypto.BLSThresholdKeyGen(n, diff --git a/cmd/collection/main.go b/cmd/collection/main.go index 5d5ebaf964f..197a26f2d3d 100644 --- a/cmd/collection/main.go +++ b/cmd/collection/main.go @@ -613,10 +613,7 @@ func createQCContractClient(node *cmd.NodeConfig, machineAccountInfo *bootstrap. var qcContractClient module.QCContractClient - contracts, err := systemcontracts.SystemContractsForChain(node.RootChainID) - if err != nil { - return nil, err - } + contracts := systemcontracts.SystemContractsForChain(node.RootChainID) qcContractAddress := contracts.ClusterQC.Address.Hex() // construct signer from private key @@ -631,7 +628,16 @@ func createQCContractClient(node *cmd.NodeConfig, machineAccountInfo *bootstrap. } // create actual qc contract client, all flags and machine account info file found - qcContractClient = epochs.NewQCContractClient(node.Logger, flowClient, anID, node.Me.NodeID(), machineAccountInfo.Address, machineAccountInfo.KeyIndex, qcContractAddress, txSigner) + qcContractClient = epochs.NewQCContractClient( + node.Logger, + flowClient, + anID, + node.Me.NodeID(), + machineAccountInfo.Address, + machineAccountInfo.KeyIndex, + qcContractAddress, + txSigner, + ) return qcContractClient, nil } diff --git a/cmd/consensus/main.go b/cmd/consensus/main.go index 4a47fac2810..90bb0f66a44 100644 --- a/cmd/consensus/main.go +++ b/cmd/consensus/main.go @@ -603,6 +603,8 @@ func main() { ) notifier.AddParticipantConsumer(telemetryConsumer) + notifier.AddCommunicatorConsumer(telemetryConsumer) + notifier.AddFinalizationConsumer(telemetryConsumer) notifier.AddFollowerConsumer(followerDistributor) // initialize the persister @@ -958,10 +960,7 @@ func loadBeaconPrivateKey(dir string, myID flow.Identifier) (*encodable.RandomBe func createDKGContractClient(node *cmd.NodeConfig, machineAccountInfo *bootstrap.NodeMachineAccountInfo, flowClient *client.Client, anID flow.Identifier) (module.DKGContractClient, error) { var dkgClient module.DKGContractClient - contracts, err := systemcontracts.SystemContractsForChain(node.RootChainID) - if err != nil { - return nil, err - } + contracts := systemcontracts.SystemContractsForChain(node.RootChainID) dkgContractAddress := contracts.DKG.Address.Hex() // construct signer from private key diff --git a/cmd/execution_builder.go b/cmd/execution_builder.go index b690950048f..d4d4d0ea5bf 100644 --- a/cmd/execution_builder.go +++ b/cmd/execution_builder.go @@ -57,6 +57,7 @@ import ( "github.com/onflow/flow-go/engine/execution/scripts" "github.com/onflow/flow-go/engine/execution/state" "github.com/onflow/flow-go/engine/execution/state/bootstrap" + "github.com/onflow/flow-go/engine/execution/storehouse" "github.com/onflow/flow-go/fvm" "github.com/onflow/flow-go/fvm/storage/snapshot" "github.com/onflow/flow-go/fvm/systemcontracts" @@ -65,6 +66,7 @@ import ( ledger "github.com/onflow/flow-go/ledger/complete" "github.com/onflow/flow-go/ledger/complete/wal" bootstrapFilenames "github.com/onflow/flow-go/model/bootstrap" + modelbootstrap "github.com/onflow/flow-go/model/bootstrap" "github.com/onflow/flow-go/model/flow" "github.com/onflow/flow-go/model/flow/filter" "github.com/onflow/flow-go/module" @@ -74,6 +76,7 @@ import ( exedataprovider "github.com/onflow/flow-go/module/executiondatasync/provider" "github.com/onflow/flow-go/module/executiondatasync/pruner" "github.com/onflow/flow-go/module/executiondatasync/tracker" + "github.com/onflow/flow-go/module/finalizedreader" finalizer "github.com/onflow/flow-go/module/finalizer/consensus" "github.com/onflow/flow-go/module/mempool/queue" "github.com/onflow/flow-go/module/metrics" @@ -86,6 +89,7 @@ import ( storageerr "github.com/onflow/flow-go/storage" storage "github.com/onflow/flow-go/storage/badger" "github.com/onflow/flow-go/storage/badger/procedure" + storagepebble "github.com/onflow/flow-go/storage/pebble" sutil "github.com/onflow/flow-go/storage/util" ) @@ -124,6 +128,7 @@ type ExecutionNode struct { followerState protocol.FollowerState committee hotstuff.DynamicCommittee ledgerStorage *ledger.Ledger + registerStore *storehouse.RegisterStore events *storage.Events serviceEvents *storage.ServiceEvents txResults *storage.TransactionResults @@ -190,6 +195,7 @@ func (builder *ExecutionNodeBuilder) LoadComponentsAndModules() { Module("execution data getter", exeNode.LoadExecutionDataGetter). Module("blobservice peer manager dependencies", exeNode.LoadBlobservicePeerManagerDependencies). Module("bootstrap", exeNode.LoadBootstrapper). + Module("register store", exeNode.LoadRegisterStore). Component("execution state ledger", exeNode.LoadExecutionStateLedger). // TODO: Modules should be able to depends on components @@ -537,22 +543,17 @@ func (exeNode *ExecutionNode) LoadProviderEngine( // Get latest executed block and a view at that block ctx := context.Background() - _, blockID, err := exeNode.executionState.GetHighestExecutedBlockID(ctx) + height, blockID, err := exeNode.executionState.GetHighestExecutedBlockID(ctx) if err != nil { return nil, fmt.Errorf( - "cannot get the latest executed block id: %w", - err) + "cannot get the latest executed block id at height %v: %w", + height, err) } - stateCommit, err := exeNode.executionState.StateCommitmentByBlockID( - ctx, - blockID) + blockSnapshot, _, err := exeNode.executionState.CreateStorageSnapshot(blockID) if err != nil { - return nil, fmt.Errorf( - "cannot get the state commitment at latest executed block id %s: %w", - blockID.String(), - err) + return nil, fmt.Errorf("cannot create a storage snapshot at block %v at height %v: %w", blockID, + height, err) } - blockSnapshot := exeNode.executionState.NewStorageSnapshot(stateCommit) // Get the epoch counter from the smart contract at the last executed block. contractEpochCounter, err := getContractEpochCounter( @@ -561,7 +562,8 @@ func (exeNode *ExecutionNode) LoadProviderEngine( blockSnapshot) // Failing to fetch the epoch counter from the smart contract is a fatal error. if err != nil { - return nil, fmt.Errorf("cannot get epoch counter from the smart contract at block %s: %w", blockID.String(), err) + return nil, fmt.Errorf("cannot get epoch counter from the smart contract at block %s at height %v: %w", + blockID.String(), height, err) } // Get the epoch counter form the protocol state, at the same block. @@ -580,6 +582,7 @@ func (exeNode *ExecutionNode) LoadProviderEngine( Uint64("contractEpochCounter", contractEpochCounter). Uint64("protocolStateEpochCounter", protocolStateEpochCounter). Str("blockID", blockID.String()). + Uint64("height", height). Logger() if contractEpochCounter != protocolStateEpochCounter { @@ -697,6 +700,8 @@ func (exeNode *ExecutionNode) LoadExecutionState( exeNode.txResults, node.DB, node.Tracer, + exeNode.registerStore, + exeNode.exeConf.enableStorehouse, ) return &module.NoopReadyDoneAware{}, nil @@ -748,6 +753,75 @@ func (exeNode *ExecutionNode) LoadStopControl( return stopControl, nil } +func (exeNode *ExecutionNode) LoadRegisterStore( + node *NodeConfig, +) error { + if !exeNode.exeConf.enableStorehouse { + node.Logger.Info().Msg("register store disabled") + return nil + } + + node.Logger.Info(). + Str("pebble_db_path", exeNode.exeConf.registerDir). + Msg("register store enabled") + pebbledb, err := storagepebble.OpenRegisterPebbleDB(exeNode.exeConf.registerDir) + + if err != nil { + return fmt.Errorf("could not create disk register store: %w", err) + } + + // close pebble db on shut down + exeNode.builder.ShutdownFunc(func() error { + err := pebbledb.Close() + if err != nil { + return fmt.Errorf("could not close register store: %w", err) + } + return nil + }) + + bootstrapped, err := storagepebble.IsBootstrapped(pebbledb) + if err != nil { + return fmt.Errorf("could not check if registers db is bootstrapped: %w", err) + } + + node.Logger.Info().Msgf("register store bootstrapped: %v", bootstrapped) + + if !bootstrapped { + checkpointFile := path.Join(exeNode.exeConf.triedir, modelbootstrap.FilenameWALRootCheckpoint) + root, err := exeNode.builder.RootSnapshot.Head() + if err != nil { + return fmt.Errorf("could not get root snapshot head: %w", err) + } + + checkpointHeight := root.Height + + err = bootstrap.ImportRegistersFromCheckpoint(node.Logger, checkpointFile, checkpointHeight, pebbledb, exeNode.exeConf.importCheckpointWorkerCount) + if err != nil { + return fmt.Errorf("could not import registers from checkpoint: %w", err) + } + } + diskStore, err := storagepebble.NewRegisters(pebbledb) + if err != nil { + return fmt.Errorf("could not create registers storage: %w", err) + } + + reader := finalizedreader.NewFinalizedReader(node.Storage.Headers, node.LastFinalizedHeader.Height) + node.ProtocolEvents.AddConsumer(reader) + + registerStore, err := storehouse.NewRegisterStore( + diskStore, + nil, // TODO: replace with real WAL + reader, + node.Logger, + ) + if err != nil { + return err + } + + exeNode.registerStore = registerStore + return nil +} + func (exeNode *ExecutionNode) LoadExecutionStateLedger( node *NodeConfig, ) ( @@ -871,7 +945,7 @@ func (exeNode *ExecutionNode) LoadIngestionEngine( } fetcher := fetcher.NewCollectionFetcher(node.Logger, exeNode.collectionRequester, node.State, exeNode.exeConf.onflowOnlyLNs) - loader := loader.NewLoader(node.Logger, node.State, node.Storage.Headers, exeNode.executionState) + loader := loader.NewUnexecutedLoader(node.Logger, node.State, node.Storage.Headers, exeNode.executionState) exeNode.ingestionEng, err = ingestion.New( exeNode.ingestionUnit, @@ -908,7 +982,6 @@ func (exeNode *ExecutionNode) LoadScriptsEngine(node *NodeConfig) (module.ReadyD exeNode.scriptsEng = scripts.New( node.Logger, - node.State, exeNode.computationManager.QueryExecutor(), exeNode.executionState, ) @@ -1178,17 +1251,10 @@ func getContractEpochCounter( uint64, error, ) { - // Get the address of the FlowEpoch smart contract - sc, err := systemcontracts.SystemContractsForChain(vmCtx.Chain.ChainID()) - if err != nil { - return 0, fmt.Errorf("could not get system contracts: %w", err) - } - address := sc.Epoch.Address + sc := systemcontracts.SystemContractsForChain(vmCtx.Chain.ChainID()) // Generate the script to get the epoch counter from the FlowEpoch smart contract - scriptCode := templates.GenerateGetCurrentEpochCounterScript(templates.Environment{ - EpochAddress: address.Hex(), - }) + scriptCode := templates.GenerateGetCurrentEpochCounterScript(sc.AsTemplateEnv()) script := fvm.Script(scriptCode) // execute the script diff --git a/cmd/execution_config.go b/cmd/execution_config.go index 6c6e0033ad0..5a6df5aef47 100644 --- a/cmd/execution_config.go +++ b/cmd/execution_config.go @@ -11,6 +11,7 @@ import ( "github.com/onflow/flow-go/engine/common/provider" "github.com/onflow/flow-go/engine/execution/computation/query" exeprovider "github.com/onflow/flow-go/engine/execution/provider" + "github.com/onflow/flow-go/fvm" "github.com/onflow/flow-go/model/flow" "github.com/onflow/flow-go/module/mempool" "github.com/onflow/flow-go/utils/grpcutils" @@ -27,6 +28,7 @@ type ExecutionConfig struct { rpcConf rpc.Config triedir string executionDataDir string + registerDir string mTrieCacheSize uint32 transactionResultsCacheSize uint checkpointDistance uint @@ -51,6 +53,7 @@ type ExecutionConfig struct { blobstoreBurstLimit int chunkDataPackRequestWorkers uint maxGracefulStopDuration time.Duration + importCheckpointWorkerCount int computationConfig computation.ComputationConfig receiptRequestWorkers uint // common provider engine workers @@ -60,7 +63,8 @@ type ExecutionConfig struct { // It works around an issue where some collection nodes are not configured with enough // this works around an issue where some collection nodes are not configured with enough // file descriptors causing connection failures. - onflowOnlyLNs bool + onflowOnlyLNs bool + enableStorehouse bool } func (exeConf *ExecutionConfig) SetupFlags(flags *pflag.FlagSet) { @@ -71,6 +75,7 @@ func (exeConf *ExecutionConfig) SetupFlags(flags *pflag.FlagSet) { flags.BoolVar(&exeConf.rpcConf.RpcMetricsEnabled, "rpc-metrics-enabled", false, "whether to enable the rpc metrics") flags.StringVar(&exeConf.triedir, "triedir", datadir, "directory to store the execution State") flags.StringVar(&exeConf.executionDataDir, "execution-data-dir", filepath.Join(datadir, "execution_data"), "directory to use for storing Execution Data") + flags.StringVar(&exeConf.registerDir, "register-dir", filepath.Join(datadir, "register"), "directory to use for storing registers Data") flags.Uint32Var(&exeConf.mTrieCacheSize, "mtrie-cache-size", 500, "cache size for MTrie") flags.UintVar(&exeConf.checkpointDistance, "checkpoint-distance", 20, "number of WAL segments between checkpoints") flags.UintVar(&exeConf.checkpointsToKeep, "checkpoints-to-keep", 5, "number of recent checkpoints to keep (0 to keep all)") @@ -89,6 +94,8 @@ func (exeConf *ExecutionConfig) SetupFlags(flags *pflag.FlagSet) { "threshold for logging script execution") flags.DurationVar(&exeConf.computationConfig.QueryConfig.ExecutionTimeLimit, "script-execution-time-limit", query.DefaultExecutionTimeLimit, "script execution time limit") + flags.Uint64Var(&exeConf.computationConfig.QueryConfig.ComputationLimit, "script-execution-computation-limit", fvm.DefaultComputationLimit, + "script execution computation limit") flags.UintVar(&exeConf.transactionResultsCacheSize, "transaction-results-cache-size", 10000, "number of transaction results to be cached") flags.BoolVar(&exeConf.extensiveLog, "extensive-logging", false, "extensive logging logs tx contents and block headers") flags.DurationVar(&exeConf.chunkDataPackQueryTimeout, "chunk-data-pack-query-timeout", exeprovider.DefaultChunkDataPackQueryTimeout, "timeout duration to determine a chunk data pack query being slow") @@ -107,8 +114,11 @@ func (exeConf *ExecutionConfig) SetupFlags(flags *pflag.FlagSet) { flags.IntVar(&exeConf.blobstoreRateLimit, "blobstore-rate-limit", 0, "per second outgoing rate limit for Execution Data blobstore") flags.IntVar(&exeConf.blobstoreBurstLimit, "blobstore-burst-limit", 0, "outgoing burst limit for Execution Data blobstore") flags.DurationVar(&exeConf.maxGracefulStopDuration, "max-graceful-stop-duration", stop.DefaultMaxGracefulStopDuration, "the maximum amount of time stop control will wait for ingestion engine to gracefully shutdown before crashing") + flags.IntVar(&exeConf.importCheckpointWorkerCount, "import-checkpoint-worker-count", 10, "number of workers to import checkpoint file during bootstrap") flags.BoolVar(&exeConf.onflowOnlyLNs, "temp-onflow-only-lns", false, "do not use unless required. forces node to only request collections from onflow collection nodes") + flags.BoolVar(&exeConf.enableStorehouse, "enable-storehouse", false, "enable storehouse to store registers on disk, default is false") + } func (exeConf *ExecutionConfig) ValidateFlags() error { diff --git a/cmd/observer/node_builder/observer_builder.go b/cmd/observer/node_builder/observer_builder.go index a1212713b35..378b85e81ff 100644 --- a/cmd/observer/node_builder/observer_builder.go +++ b/cmd/observer/node_builder/observer_builder.go @@ -16,6 +16,8 @@ import ( "github.com/rs/zerolog" "github.com/spf13/pflag" + "google.golang.org/grpc/credentials" + "github.com/onflow/flow-go/cmd" "github.com/onflow/flow-go/consensus" "github.com/onflow/flow-go/consensus/hotstuff" @@ -708,8 +710,7 @@ func (builder *ObserverServiceBuilder) initPublicLibp2pNode(networkKey crypto.Pr } meshTracer := tracer.NewGossipSubMeshTracer(meshTracerCfg) - node, err := p2pbuilder.NewNodeBuilder( - builder.Logger, + node, err := p2pbuilder.NewNodeBuilder(builder.Logger, &p2pconfig.MetricsConfig{ HeroCacheFactory: builder.HeroCacheMetricsFactory(), Metrics: builder.Metrics.Network, @@ -719,8 +720,9 @@ func (builder *ObserverServiceBuilder) initPublicLibp2pNode(networkKey crypto.Pr networkKey, builder.SporkID, builder.IdentityProvider, + builder.FlowConfig.NetworkConfig.GossipSubConfig.GossipSubScoringRegistryConfig, &builder.FlowConfig.NetworkConfig.ResourceManager, - &builder.FlowConfig.NetworkConfig.GossipSubConfig.GossipSubRPCInspectorsConfig, + &builder.FlowConfig.NetworkConfig.GossipSubConfig, p2pconfig.PeerManagerDisableConfig(), // disable peer manager for observer node. &p2p.DisallowListCacheConfig{ MaxSize: builder.FlowConfig.NetworkConfig.DisallowListNotificationCacheSize, @@ -898,6 +900,16 @@ func (builder *ObserverServiceBuilder) enqueueRPCServer() { ) return nil }) + builder.Module("server certificate", func(node *cmd.NodeConfig) error { + // generate the server certificate that will be served by the GRPC server + x509Certificate, err := grpcutils.X509Certificate(node.NetworkKey) + if err != nil { + return err + } + tlsConfig := grpcutils.DefaultServerTLSConfig(x509Certificate) + builder.rpcConf.TransportCredentials = credentials.NewTLS(tlsConfig) + return nil + }) builder.Component("RPC engine", func(node *cmd.NodeConfig) (module.ReadyDoneAware, error) { accessMetrics := builder.AccessMetrics config := builder.rpcConf @@ -916,8 +928,8 @@ func (builder *ObserverServiceBuilder) enqueueRPCServer() { connFactory := &rpcConnection.ConnectionFactoryImpl{ CollectionGRPCPort: 0, ExecutionGRPCPort: 0, - CollectionNodeGRPCTimeout: backendConfig.CollectionClientTimeout, - ExecutionNodeGRPCTimeout: backendConfig.ExecutionClientTimeout, + CollectionNodeGRPCTimeout: builder.apiTimeout, + ExecutionNodeGRPCTimeout: builder.apiTimeout, AccessMetrics: accessMetrics, Log: node.Logger, Manager: rpcConnection.NewManager( @@ -957,8 +969,7 @@ func (builder *ObserverServiceBuilder) enqueueRPCServer() { restHandler, err := restapiproxy.NewRestProxyHandler( accessBackend, builder.upstreamIdentities, - builder.apiTimeout, - config.MaxMsgSize, + connFactory, builder.Logger, observerCollector, node.RootChainID.Chain()) @@ -987,7 +998,7 @@ func (builder *ObserverServiceBuilder) enqueueRPCServer() { } // upstream access node forwarder - forwarder, err := apiproxy.NewFlowAccessAPIForwarder(builder.upstreamIdentities, builder.apiTimeout, config.MaxMsgSize) + forwarder, err := apiproxy.NewFlowAccessAPIForwarder(builder.upstreamIdentities, connFactory) if err != nil { return nil, err } diff --git a/cmd/scaffold.go b/cmd/scaffold.go index f8c4bcc95f7..86b8cc17dae 100644 --- a/cmd/scaffold.go +++ b/cmd/scaffold.go @@ -395,7 +395,6 @@ func (fnb *FlowNodeBuilder) EnqueueNetworkInit() { connGaterCfg, peerManagerCfg, &fnb.FlowConfig.NetworkConfig.GossipSubConfig, - &fnb.FlowConfig.NetworkConfig.GossipSubRPCInspectorsConfig, &fnb.FlowConfig.NetworkConfig.ResourceManager, uniCfg, &fnb.FlowConfig.NetworkConfig.ConnectionManagerConfig, @@ -1005,7 +1004,8 @@ func (fnb *FlowNodeBuilder) initStorage() error { setups := bstorage.NewEpochSetups(fnb.Metrics.Cache, fnb.DB) epochCommits := bstorage.NewEpochCommits(fnb.Metrics.Cache, fnb.DB) commits := bstorage.NewCommits(fnb.Metrics.Cache, fnb.DB) - protocolState := bstorage.NewProtocolState(fnb.Metrics.Cache, setups, epochCommits, fnb.DB, bstorage.DefaultCacheSize) + protocolState := bstorage.NewProtocolState(fnb.Metrics.Cache, setups, epochCommits, fnb.DB, + bstorage.DefaultProtocolStateCacheSize, bstorage.DefaultProtocolStateByBlockIDCacheSize) versionBeacons := bstorage.NewVersionBeacons(fnb.DB) fnb.Storage = Storage{ diff --git a/cmd/util/cmd/common/transactions.go b/cmd/util/cmd/common/transactions.go index 15f6c3746fb..bd9061ea6e0 100644 --- a/cmd/util/cmd/common/transactions.go +++ b/cmd/util/cmd/common/transactions.go @@ -3,13 +3,15 @@ package common import ( "fmt" + "github.com/onflow/flow-core-contracts/lib/go/templates" + "github.com/onflow/flow-go/fvm/systemcontracts" "github.com/onflow/flow-go/model/flow" ) const ( getInfoForProposedNodesScript = ` - import FlowIDTableStaking from 0x%s + import FlowIDTableStaking from 0xIDENTITYTABLEADDRESS pub fun main(): [FlowIDTableStaking.NodeInfo] { let nodeIDs = FlowIDTableStaking.getProposedNodeIDs() @@ -26,11 +28,12 @@ const ( // GetNodeInfoForProposedNodesScript returns a script that will return an array of FlowIDTableStaking.NodeInfo for each // node in the proposed table. func GetNodeInfoForProposedNodesScript(network string) ([]byte, error) { - contracts, err := systemcontracts.SystemContractsForChain(flow.ChainID(fmt.Sprintf("flow-%s", network))) - if err != nil { - return nil, fmt.Errorf("failed to get system contracts for network (%s): %w", network, err) - } + contracts := systemcontracts.SystemContractsForChain(flow.ChainID(fmt.Sprintf("flow-%s", network))) - //NOTE: The FlowIDTableStaking contract is deployed to the same account as the Epoch contract - return []byte(fmt.Sprintf(getInfoForProposedNodesScript, contracts.Epoch.Address)), nil + return []byte( + templates.ReplaceAddresses( + getInfoForProposedNodesScript, + contracts.AsTemplateEnv(), + ), + ), nil } diff --git a/cmd/util/cmd/epochs/cmd/deploy.go b/cmd/util/cmd/epochs/cmd/deploy.go index 32e3da4acf3..d8340b29e84 100644 --- a/cmd/util/cmd/epochs/cmd/deploy.go +++ b/cmd/util/cmd/epochs/cmd/deploy.go @@ -254,10 +254,7 @@ func getDeployEpochTransactionText(snapshot *inmem.Snapshot) []byte { // root chain id and system contractsRegister chainID := head.ChainID - systemContracts, err := systemcontracts.SystemContractsForChain(chainID) - if err != nil { - log.Fatal().Err(err).Str("chain_id", chainID.String()).Msgf("could not get system contracts for chainID") - } + systemContracts := systemcontracts.SystemContractsForChain(chainID) // epoch contract name and get code for contract epochContractCode := contracts.FlowEpoch( diff --git a/cmd/util/cmd/rollback-executed-height/cmd/rollback_executed_height_test.go b/cmd/util/cmd/rollback-executed-height/cmd/rollback_executed_height_test.go index ef2f9ae6284..6126cd1b059 100644 --- a/cmd/util/cmd/rollback-executed-height/cmd/rollback_executed_height_test.go +++ b/cmd/util/cmd/rollback-executed-height/cmd/rollback_executed_height_test.go @@ -61,6 +61,8 @@ func TestReExecuteBlock(t *testing.T) { txResults, db, trace.NewNoopTracer(), + nil, + false, ) require.NotNil(t, es) @@ -183,6 +185,8 @@ func TestReExecuteBlockWithDifferentResult(t *testing.T) { txResults, db, trace.NewNoopTracer(), + nil, + false, ) require.NotNil(t, es) diff --git a/cmd/util/ledger/reporters/account_reporter.go b/cmd/util/ledger/reporters/account_reporter.go index aed287c0298..7a0dcd4ce56 100644 --- a/cmd/util/ledger/reporters/account_reporter.go +++ b/cmd/util/ledger/reporters/account_reporter.go @@ -16,6 +16,7 @@ import ( "github.com/onflow/flow-go/fvm/environment" "github.com/onflow/flow-go/fvm/storage/snapshot" "github.com/onflow/flow-go/fvm/storage/state" + "github.com/onflow/flow-go/fvm/systemcontracts" "github.com/onflow/flow-go/ledger" "github.com/onflow/flow-go/model/flow" ) @@ -166,6 +167,7 @@ func newAccountDataProcessor( snapshot snapshot.StorageSnapshot, ) *balanceProcessor { bp := NewBalanceReporter(chain, snapshot) + sc := systemcontracts.SystemContractsForChain(bp.ctx.Chain.ChainID()) bp.logger = logger bp.rwa = rwa @@ -181,7 +183,7 @@ func newAccountDataProcessor( ?? panic("Could not borrow Balance reference to the Vault") return vaultRef.balance } - `, fvm.FungibleTokenAddress(bp.ctx.Chain), fvm.FlowTokenAddress(bp.ctx.Chain))) + `, sc.FungibleToken.Address.Hex(), sc.FlowToken.Address.Hex())) bp.fusdScript = []byte(fmt.Sprintf(` import FungibleToken from 0x%s @@ -193,7 +195,7 @@ func newAccountDataProcessor( ?? panic("Could not borrow Balance reference to the Vault") return vaultRef.balance } - `, fvm.FungibleTokenAddress(bp.ctx.Chain), "3c5959b568896393")) + `, sc.FungibleToken.Address.Hex(), "3c5959b568896393")) bp.momentsScript = []byte(` import TopShot from 0x0b2a3299cc857e29 diff --git a/cmd/util/ledger/reporters/fungible_token_tracker.go b/cmd/util/ledger/reporters/fungible_token_tracker.go index 30d9a140c67..24a2c09ac56 100644 --- a/cmd/util/ledger/reporters/fungible_token_tracker.go +++ b/cmd/util/ledger/reporters/fungible_token_tracker.go @@ -14,9 +14,9 @@ import ( "github.com/onflow/cadence/runtime/interpreter" "github.com/onflow/flow-go/cmd/util/ledger/migrations" - "github.com/onflow/flow-go/fvm" "github.com/onflow/flow-go/fvm/environment" "github.com/onflow/flow-go/fvm/storage/state" + "github.com/onflow/flow-go/fvm/systemcontracts" "github.com/onflow/flow-go/ledger" "github.com/onflow/flow-go/model/flow" ) @@ -41,7 +41,8 @@ type FungibleTokenTracker struct { } func FlowTokenTypeID(chain flow.Chain) string { - return fmt.Sprintf("A.%s.FlowToken.Vault", fvm.FlowTokenAddress(chain).Hex()) + sc := systemcontracts.SystemContractsForChain(chain.ChainID()) + return fmt.Sprintf("A.%s.FlowToken.Vault", sc.FlowToken.Address.Hex()) } func NewFungibleTokenTracker(logger zerolog.Logger, rwf ReportWriterFactory, chain flow.Chain, vaultTypeIDs []string) *FungibleTokenTracker { diff --git a/cmd/util/ledger/reporters/fungible_token_tracker_test.go b/cmd/util/ledger/reporters/fungible_token_tracker_test.go index 60a3988299c..528911ef7dc 100644 --- a/cmd/util/ledger/reporters/fungible_token_tracker_test.go +++ b/cmd/util/ledger/reporters/fungible_token_tracker_test.go @@ -15,6 +15,7 @@ import ( "github.com/onflow/flow-go/cmd/util/ledger/reporters" "github.com/onflow/flow-go/fvm" "github.com/onflow/flow-go/fvm/storage/state" + "github.com/onflow/flow-go/fvm/systemcontracts" "github.com/onflow/flow-go/ledger" "github.com/onflow/flow-go/model/flow" "github.com/onflow/flow-go/utils/unittest" @@ -69,6 +70,8 @@ func TestFungibleTokenTracker(t *testing.T) { err = view.Merge(snapshot) require.NoError(t, err) + sc := systemcontracts.SystemContractsForChain(chain.ChainID()) + // deploy wrapper resource testContract := fmt.Sprintf(` import FungibleToken from 0x%s @@ -87,7 +90,7 @@ func TestFungibleTokenTracker(t *testing.T) { pub fun CreateWrappedVault(inp: @FungibleToken.Vault): @WrappedToken.WrappedVault { return <-create WrappedVault(v :<- inp) } - }`, fvm.FungibleTokenAddress(chain)) + }`, sc.FungibleToken.Address.Hex()) deployingTestContractScript := []byte(fmt.Sprintf(` transaction { @@ -109,7 +112,8 @@ func TestFungibleTokenTracker(t *testing.T) { err = view.Merge(snapshot) require.NoError(t, err) - wrapTokenScript := []byte(fmt.Sprintf(` + wrapTokenScript := []byte(fmt.Sprintf( + ` import FungibleToken from 0x%s import FlowToken from 0x%s import WrappedToken from 0x%s @@ -123,7 +127,11 @@ func TestFungibleTokenTracker(t *testing.T) { let wrappedFlow <- WrappedToken.CreateWrappedVault(inp :<- sentVault) signer.save(<-wrappedFlow, to: /storage/wrappedToken) } - }`, fvm.FungibleTokenAddress(chain), fvm.FlowTokenAddress(chain), chain.ServiceAddress())) + }`, + sc.FungibleToken.Address.Hex(), + sc.FlowToken.Address.Hex(), + sc.FlowServiceAccount.Address.Hex(), + )) txBody = flow.NewTransactionBody(). SetScript(wrapTokenScript). diff --git a/config/README.md b/config/README.md index 3a4fe42c918..8308efcde8a 100644 --- a/config/README.md +++ b/config/README.md @@ -15,12 +15,12 @@ defined. A single default value can be overridden by setting the CLI flag for th config to false. Override entire config file. ```shell -go build -tags relic -o flow-access-node ./cmd/access +go build -o flow-access-node ./cmd/access ./flow-access-node --config-file=config/config.yml ``` Override a single configuration value. ```shell -go build -tags relic -o flow-access-node ./cmd/access +go build -o flow-access-node ./cmd/access ./flow-access-node --networking-connection-pruning=false ``` ### Adding a new config value diff --git a/config/default-config.yml b/config/default-config.yml index 5c75ed4b338..07b0d6b408b 100644 --- a/config/default-config.yml +++ b/config/default-config.yml @@ -33,18 +33,6 @@ network-config: # retry a unicast stream to a remote peer 3 times, the peer will give up and will not retry creating a unicast stream to that remote peer. # When it is set to zero it means that the peer will not retry creating a unicast stream to a remote peer if it fails. unicast-max-stream-creation-retry-attempt-times: 3 - # The number of seconds that the local peer waits since the last successful dial to a remote peer before resetting the unicast dial retry budget from zero to the maximum default. - # If it is set to 3600s (1h) for example, it means that if it has passed at least one hour since the last successful dial, and the remote peer has a zero dial retry budget, - # the unicast dial retry budget for that remote peer will be reset to the maximum default. - unicast-dial-zero-retry-reset-threshold: 3600s - # The maximum number of retry attempts for dialing a remote peer before giving up. If it is set to 3 for example, it means that if a peer fails to dial a remote peer 3 times, - # the peer will give up and will not retry dialing that remote peer. - unicast-max-dial-retry-attempt-times: 3 - # The backoff delay used in the exponential backoff for consecutive failed unicast dial attempts to a remote peer. - unicast-dial-backoff-delay: 1s - # The backoff delay used in the exponential backoff for backing off concurrent create stream attempts to the same remote peer - # when there is no available connections to that remote peer and a dial is in progress. - unicast-dial-in-progress-backoff-delay: 1s # The size of the dial config cache used to keep track of the dial config for each remote peer. The dial config is used to keep track of the dial retry budget for each remote peer. # Recommended to set it to the maximum number of remote peers in the network. unicast-dial-config-cache-size: 10_000 @@ -146,6 +134,15 @@ network-config: gossipsub-rpc-sent-tracker-workers: 5 # Peer scoring is the default value for enabling peer scoring gossipsub-peer-scoring-enabled: true + # The interval for updating the list of subscribed peers to all topics in gossipsub. This is used to keep track of subscriptions + # violations and penalize peers accordingly. Recommended value is in the order of a few minutes to avoid contentions; as the operation + # reads all topics and all peers subscribed to each topic. + gossipsub-subscription-provider-update-interval: 10m + # The size of cache for keeping the list of all peers subscribed to each topic (same as the local node). This cache is the local node's + # view of the network and is used to detect subscription violations and penalize peers accordingly. Recommended to be big enough to + # keep the entire network's size. Otherwise, the local node's view of the network will be incomplete due to cache eviction. + # Recommended size is 10x the number of peers in the network. + gossipsub-subscription-provider-cache-size: 10000 # Gossipsub rpc inspectors configs # The size of the queue for notifications about invalid RPC messages @@ -194,6 +191,12 @@ network-config: gossipsub-rpc-metrics-inspector-workers: 1 # The size of the queue used by worker pool for the control message metrics inspector gossipsub-rpc-metrics-inspector-cache-size: 100 + # Threshold level for penalty. At each evaluation period, when a node's penalty is below this value, the decay rate slows down, ensuring longer decay periods for malicious nodes and quicker decay for honest ones. + gossipsub-app-specific-penalty-decay-slowdown-threshold: -99 + # This setting adjusts the decay rate when a node's penalty falls below the threshold. The decay rate, ranging between 0 and 1, dictates how quickly penalties decrease: a higher rate results in slower decay. The decay calculation is multiplicative (newPenalty = decayRate * oldPenalty). The reduction factor increases the decay rate, thus decelerating the penalty reduction. For instance, with a 0.01 reduction factor, the decay rate increases by 0.01 at each evaluation interval when the penalty is below the threshold. Consequently, a decay rate of `x` diminishes the penalty to zero more rapidly than a rate of `x+0.01`. + gossipsub-app-specific-penalty-decay-rate-reduction-factor: .01 + # Defines the frequency for evaluating and potentially adjusting the decay process of a spam record. At each interval, the system assesses the current penalty of a node. If this penalty is below the defined threshold, the decay rate is modified according to the reduction factor, slowing down the penalty reduction process. This reassessment at regular intervals ensures that the decay rate is dynamically adjusted to reflect the node's ongoing behavior, maintaining a balance between penalizing malicious activity and allowing recovery for honest nodes. + gossipsub-app-specific-penalty-decay-evaluation-period: 10m # Application layer spam prevention alsp-spam-record-cache-size: 1000 alsp-spam-report-queue-size: 10_000 diff --git a/consensus/hotstuff/notifications/telemetry.go b/consensus/hotstuff/notifications/telemetry.go index 7bbf57f79de..d6cc3852179 100644 --- a/consensus/hotstuff/notifications/telemetry.go +++ b/consensus/hotstuff/notifications/telemetry.go @@ -38,7 +38,10 @@ type TelemetryConsumer struct { noPathLogger zerolog.Logger } +// Telemetry implements consumers for _all happy-path_ interfaces in consensus/hotstuff/notifications/telemetry.go: var _ hotstuff.ParticipantConsumer = (*TelemetryConsumer)(nil) +var _ hotstuff.CommunicatorConsumer = (*TelemetryConsumer)(nil) +var _ hotstuff.FinalizationConsumer = (*TelemetryConsumer)(nil) var _ hotstuff.VoteCollectorConsumer = (*TelemetryConsumer)(nil) var _ hotstuff.TimeoutCollectorConsumer = (*TelemetryConsumer)(nil) diff --git a/consensus/hotstuff/pacemaker/pacemaker.go b/consensus/hotstuff/pacemaker/pacemaker.go index fc3ba87dbe3..ae62aee0ea2 100644 --- a/consensus/hotstuff/pacemaker/pacemaker.go +++ b/consensus/hotstuff/pacemaker/pacemaker.go @@ -31,7 +31,7 @@ type ActivePaceMaker struct { ctx context.Context timeoutControl *timeout.Controller - notifier hotstuff.Consumer + notifier hotstuff.ParticipantConsumer viewTracker viewTracker started bool } diff --git a/consensus/hotstuff/signature/randombeacon_inspector_test.go b/consensus/hotstuff/signature/randombeacon_inspector_test.go index 5df5b897289..3aead48f822 100644 --- a/consensus/hotstuff/signature/randombeacon_inspector_test.go +++ b/consensus/hotstuff/signature/randombeacon_inspector_test.go @@ -40,7 +40,7 @@ func (rs *randomBeaconSuite) SetupTest() { // generate threshold keys rs.rng = unittest.GetPRG(rs.T()) - seed := make([]byte, crypto.SeedMinLenDKG) + seed := make([]byte, crypto.KeyGenSeedMinLen) _, err := rs.rng.Read(seed) require.NoError(rs.T(), err) rs.skShares, rs.pkShares, rs.pkGroup, err = crypto.BLSThresholdKeyGen(rs.n, rs.threshold, seed) diff --git a/consensus/hotstuff/verification/combined_verifier_v2.go b/consensus/hotstuff/verification/combined_verifier_v2.go index e0894771d00..cafc16b15ca 100644 --- a/consensus/hotstuff/verification/combined_verifier_v2.go +++ b/consensus/hotstuff/verification/combined_verifier_v2.go @@ -1,6 +1,3 @@ -//go:build relic -// +build relic - package verification import ( diff --git a/consensus/hotstuff/verification/combined_verifier_v3.go b/consensus/hotstuff/verification/combined_verifier_v3.go index 4863690994b..4c91aeac1c1 100644 --- a/consensus/hotstuff/verification/combined_verifier_v3.go +++ b/consensus/hotstuff/verification/combined_verifier_v3.go @@ -1,6 +1,3 @@ -//go:build relic -// +build relic - package verification import ( diff --git a/consensus/hotstuff/verification/staking_verifier.go b/consensus/hotstuff/verification/staking_verifier.go index 207e11c8ad9..1b15e6e213c 100644 --- a/consensus/hotstuff/verification/staking_verifier.go +++ b/consensus/hotstuff/verification/staking_verifier.go @@ -1,6 +1,3 @@ -//go:build relic -// +build relic - package verification import ( diff --git a/consensus/integration/nodes_test.go b/consensus/integration/nodes_test.go index c44775cd14b..ef5501d189d 100644 --- a/consensus/integration/nodes_test.go +++ b/consensus/integration/nodes_test.go @@ -377,7 +377,8 @@ func createNode( qcsDB := storage.NewQuorumCertificates(metricsCollector, db, storage.DefaultCacheSize) setupsDB := storage.NewEpochSetups(metricsCollector, db) commitsDB := storage.NewEpochCommits(metricsCollector, db) - protocolStateDB := storage.NewProtocolState(metricsCollector, setupsDB, commitsDB, db, storage.DefaultCacheSize) + protocolStateDB := storage.NewProtocolState(metricsCollector, setupsDB, commitsDB, db, + storage.DefaultProtocolStateCacheSize, storage.DefaultProtocolStateByBlockIDCacheSize) versionBeaconDB := storage.NewVersionBeacons(db) protocolStateEvents := events.NewDistributor() diff --git a/crypto/.dockerignore b/crypto/.dockerignore deleted file mode 100644 index 5c75f82093a..00000000000 --- a/crypto/.dockerignore +++ /dev/null @@ -1 +0,0 @@ -relic/build diff --git a/crypto/Dockerfile b/crypto/Dockerfile index d75e9543de4..9c3fbff6363 100644 --- a/crypto/Dockerfile +++ b/crypto/Dockerfile @@ -2,7 +2,7 @@ FROM golang:1.20-buster RUN apt-get update -RUN apt-get -y install cmake zip +RUN apt-get -y install zip RUN go install github.com/axw/gocov/gocov@latest RUN go install github.com/matm/gocov-html@latest WORKDIR /go/src/flow diff --git a/crypto/Makefile b/crypto/Makefile index c66774e1033..14016e40619 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -3,6 +3,9 @@ COVER_PROFILE := cover.out IMAGE_TAG := v0.0.7 +# OS +UNAME := $(shell uname -s) + # allows CI to specify whether to have race detection on / off ifeq ($(RACE_DETECTOR),1) RACE_FLAG := -race @@ -10,42 +13,89 @@ else RACE_FLAG := endif -ADX_SUPPORT := $(shell if ([ -f "/proc/cpuinfo" ] && grep -q -e '^flags.*\badx\b' /proc/cpuinfo); then echo 1; else echo 0; fi) - -.PHONY: setup -setup: - go generate +# `ADX_SUPPORT` is 1 if ADX instructions are supported and 0 otherwise. +ifeq ($(UNAME),Linux) +# detect ADX support on the CURRENT linux machine. + ADX_SUPPORT := $(shell if ([ -f "/proc/cpuinfo" ] && grep -q -e '^flags.*\badx\b' /proc/cpuinfo); then echo 1; else echo 0; fi) +else +# on non-linux machines, set the flag to 1 by default + ADX_SUPPORT := 1 +endif -# test BLS-related functionalities requiring the Relic library (and hence relic Go build flag) -.PHONY: relic_tests -relic_tests: +# the crypto package uses BLST source files underneath which may use ADX instructions. ifeq ($(ADX_SUPPORT), 1) - go test -coverprofile=$(COVER_PROFILE) $(RACE_FLAG) $(if $(JSON_OUTPUT),-json,) $(if $(NUM_RUNS),-count $(NUM_RUNS),) --tags relic $(if $(VERBOSE),-v,) +# if ADX instructions are supported, default is to use a fast ADX BLST implementation + CRYPTO_FLAG := "" else - CGO_CFLAGS="-D__BLST_PORTABLE__" go test -coverprofile=$(COVER_PROFILE) $(RACE_FLAG) $(if $(JSON_OUTPUT),-json,) $(if $(NUM_RUNS),-count $(NUM_RUNS),) --tags relic $(if $(VERBOSE),-v,) +# if ADX instructions aren't supported, this CGO flags uses a slower non-ADX BLST implementation + CRYPTO_FLAG := "-O -D__BLST_PORTABLE__" endif +CGO_FLAG := CGO_CFLAGS=$(CRYPTO_FLAG) -# test all packages that do not require Relic library (all functionalities except the BLS-related ones) -.PHONY: non_relic_tests -non_relic_tests: -# root package without relic - go test -coverprofile=$(COVER_PROFILE) $(RACE_FLAG) $(if $(JSON_OUTPUT),-json,) $(if $(NUM_RUNS),-count $(NUM_RUNS),) $(if $(VERBOSE),-v,) -# sub packages - go test -coverprofile=$(COVER_PROFILE) $(RACE_FLAG) $(if $(JSON_OUTPUT),-json,) $(if $(NUM_RUNS),-count $(NUM_RUNS),) $(if $(VERBOSE),-v,) ./hash - go test -coverprofile=$(COVER_PROFILE) $(RACE_FLAG) $(if $(JSON_OUTPUT),-json,) $(if $(NUM_RUNS),-count $(NUM_RUNS),) $(if $(VERBOSE),-v,) ./random +# format C code +.PHONY: c-format +c-format: + clang-format -style=llvm -dump-config > .clang-format + clang-format -i *.c + clang-format -i *.h + rm -f .clang-format + git diff --exit-code -############################################################################################ -# CAUTION: DO NOT MODIFY THIS TARGET! DOING SO WILL BREAK THE FLAKY TEST MONITOR +# address sanitization and other checks +.SILENT: c-asan +c-asan: +# - address sanitization and other checks (only on linux) + if [ $(UNAME) = "Linux" ]; then \ + $(CGO_FLAG) CC="clang -O0 -g -fsanitize=address -fno-omit-frame-pointer -fsanitize=leak -fsanitize=undefined -fno-sanitize-recover=all -fsanitize=float-divide-by-zero -fsanitize=float-cast-overflow -fno-sanitize=null -fno-sanitize=alignment" \ + LD="-fsanitize=address -fsanitize=leak" go test; \ + if [ $$? -ne 0 ]; then exit 1; fi; \ + else \ + echo "sanitization is only supported on Linux"; \ + fi; \ -# sets up the crypto module and runs all tests -.PHONY: test -test: setup unittest +# memory sanitization +.SILENT: c-msan +c-msan: +# - memory sanitization (only on linux and using clang) - (could use go test -msan) +# currently, this leads to many false positives, most likely because of assembly code not handled properly +# by asan. If you would like to run this command, you can use `NO_MSAN` to diable msan in some C functions. +# For instance "void NO_MSAN f() {...}" disables msan in function f. `NO_MSAN` is already defined in +# bls12381_utils.h + if [ $(UNAME) = "Linux" ]; then \ + $(CGO_FLAG) CC="clang -DMSAN -O0 -g -fsanitize=memory -fno-omit-frame-pointer -fsanitize-memory-track-origins" \ + LD="-fsanitize=memory" go test; \ + if [ $$? -ne 0 ]; then exit 1; fi; \ + else \ + echo "sanitization is only supported on Linux"; \ + fi; \ + +# sanitize C code +.SILENT: c-sanitize +c-sanitize: c-asan +# - address sanitization and other checks (only on linux) +# - memory sanitization (target m-san) is disabled because of multiple false positives -# runs the unit tests of the module (assumes the module was set up) -.PHONY: unittest -unittest: relic_tests non_relic_tests +# Go tidy +.PHONY: go-tidy +go-tidy: + go mod tidy -v + git diff --exit-code -############################################################################################ +# Go lint +.PHONY: go-lint +go-lint: +lint: go-tidy + # revive -config revive.toml + golangci-lint run -v ./... + +# test all packages +.PHONY: test +test: +# root package + $(CGO_FLAG) go test -coverprofile=$(COVER_PROFILE) $(RACE_FLAG) $(if $(JSON_OUTPUT),-json,) $(if $(NUM_RUNS),-count $(NUM_RUNS),) $(if $(VERBOSE),-v,) +# sub packages + $(CGO_FLAG) go test -coverprofile=$(COVER_PROFILE) $(RACE_FLAG) $(if $(JSON_OUTPUT),-json,) $(if $(NUM_RUNS),-count $(NUM_RUNS),) $(if $(VERBOSE),-v,) ./hash + $(CGO_FLAG) go test -coverprofile=$(COVER_PROFILE) $(RACE_FLAG) $(if $(JSON_OUTPUT),-json,) $(if $(NUM_RUNS),-count $(NUM_RUNS),) $(if $(VERBOSE),-v,) ./random .PHONY: docker-build docker-build: diff --git a/crypto/README.md b/crypto/README.md index 9f29ad03e16..c15d0a36462 100644 --- a/crypto/README.md +++ b/crypto/README.md @@ -6,86 +6,22 @@ Most of the primitives and protocols can be used in other projects and are not s Flow is an ongoing project, which means that new features will still be added and modifications will still be made to improve security and performance of the cryptography package. Notes: - - The package has been audited for security in January 2021 on [this version](https://github.com/onflow/flow-go/tree/2707acdabb851138e298b2d186e73f47df8a14dd). The package had a few improvements since. + - The package has been audited for security in January 2021 on [this version](https://github.com/onflow/flow-go/tree/2707acdabb851138e298b2d186e73f47df8a14dd). The package had a major refactor to switch all the BLS12-381 curve implementation to use [BLST](https://github.com/supranational/blst/tree/master/src) starting from [this version](TODO: link the commit/tag). - The package does not provide security against side channel or fault attacks. ## Package import -Cloning Flow repository and following the [installation steps](https://github.com/onflow/flow-go) builds the necessary tools to use Flow cryptography. +To use the Flow cryptography package, you can: -If you wish to only import the Flow cryptography package into your Go project, please follow the following steps: - -- Get Flow cryptography package +- get the package ``` go get github.com/onflow/flow-go/crypto ``` -or simply import the package to your Go project +- or simply import the package to your Go project ``` import "github.com/onflow/flow-go/crypto" ``` -This is enough to run the package code for many functionalities. However, this isn't enough if BLS signature related functionalities are used. The BLS features rely on an extrnal C library ([Relic](https://github.com/relic-toolkit/relic)) for lower level mathematical operations. Building your project at this stage including BLS functionalities would result in build errors related to missing "relic" files. For instance: -``` -fatal error: 'relic.h' file not found -#include "relic.h" - ^~~~~~~~~ -``` - - An extra step is required to compile the external dependency (Relic) locally. - -- Install [CMake](https://cmake.org/install/), which is used for building the package. The build also requires [Git](http://git-scm.com/) and bash scripting. -- From the Go package directory in `$GOPATH/pkg/mod/github.com/onflow/flow-go/crypto@/`, build the package dependencies. `version-tag` is the imported package version. -For instance: -``` -cd $GOPATH/pkg/mod/github.com/onflow/flow-go/crypto@v0.25.0/ -go generate -``` - -Below is a bash script example to automate the above steps. The script can be copied into your Go project root directory. -It extracts the imported pacakage version from your project's go.mod file and performs the remaining steps. -```bash -#!/bin/bash - -# crypto package -PKG_NAME="github.com/onflow/flow-go/crypto" - -# go get the package -go get ${PKG_NAME} - -# go.mod -MOD_FILE="./go.mod" - -# the version of onflow/flow-go/crypto used in the project is read from the go.mod file -if [ -f "${MOD_FILE}" ] -then - # extract the version from the go.mod file - VERSION="$(grep ${PKG_NAME} < ${MOD_FILE} | cut -d' ' -f 2)" - # using the right version, get the package directory path - PKG_DIR="$(go env GOPATH)/pkg/mod/${PKG_NAME}@${VERSION}" -else - { echo "couldn't find go.mod file - make sure the script is in the project root directory"; exit 1; } -fi - -# grant permissions if not existant -if [[ ! -r ${PKG_DIR} || ! -w ${PKG_DIR} || ! -x ${PKG_DIR} ]]; then - sudo chmod -R 755 "${PKG_DIR}" -fi - -# get into the package directory and set up the external dependencies -( - cd "${PKG_DIR}" || { echo "cd into the GOPATH package folder failed"; exit 1; } - go generate -) -``` - - -Finally, when building your project and including any BLS functionality, adding a Go build tag to include the BLS files in the build is required. -The tag is not required when the package is used without BLS functions. It was introduced to avoid build errors when BLS (and therefore Relic) is not needed. - -``` -go build -tags=relic -``` - ## Algorithms ### Hashing and Message Authentication Code: @@ -103,11 +39,11 @@ All signature schemes use the generic interfaces of `PrivateKey` and `PublicKey` * ECDSA * public keys are compressed or uncompressed. - * ephemeral key is derived from the private key, hash and an external entropy using a CSPRNG (based on https://golang.org/pkg/crypto/ecdsa/). + * ephemeral key is derived from the private key, hash and the system entropy (based on https://golang.org/pkg/crypto/ecdsa/). * supports NIST P-256 (secp256r1) and secp256k1 curves. * BLS - * supports [BLS 12-381](https://electriccoin.co/blog/new-snark-curve/) curve. + * supports [BLS12-381](https://electriccoin.co/blog/new-snark-curve/) curve. * is implementing the minimal-signature-size variant: signatures in G1 and public keys in G2. * default set-up uses [compressed](https://www.ietf.org/archive/id/draft-irtf-cfrg-pairing-friendly-curves-08.html#name-zcash-serialization-format-) G1/G2 points, @@ -119,18 +55,13 @@ All signature schemes use the generic interfaces of `PrivateKey` and `PublicKey` and BLS_POP_BLS12381G1_XOF:KMAC128_SSWU_RO_POP_ for proofs of possession. * signature verification includes the signature membership check in G1. * public key membership check in G2 is provided outside of the signature verification. - * membership check in G1 is using [Bowe's fast check](https://eprint.iacr.org/2019/814.pdf), while membership check in G2 is using a simple scalar multiplication by the group order (both will be updated to use Scott's method) - * non-interactive aggregation of signatures, public keys and private keys. - * multi-signature verification of an aggregated signature of a single message under multiple public keys. - * multi-signature verification of an aggregated signature of multiple messages under multiple public keys. + * aggregation of signatures, public keys and private keys. + * verification of an aggregated signature of a single message under multiple public keys. + * verification of an aggregated signature of multiple messages under multiple public keys. * batch verification of multiple signatures of a single message under multiple - public keys: use a binary tree of aggregations to find the invalid signatures. + public keys, using a binary tree of aggregations. * SPoCK scheme based on BLS: verifies two signatures have been generated from the same message that is unknown to the verifier. - * Future features: - * membership checks in G1/G2 using [Scotts's method](https://eprint.iacr.org/2021/1130.pdf). - * support minimal-pubkey-size variant - ### PRNG * ChaCha20-based CSPRNG @@ -146,9 +77,6 @@ All signature schemes use the generic interfaces of `PrivateKey` and `PublicKey` * key generation (single dealer) to provide the set of keys. * provides a stateless api and a stateful api. - * Future features: - * support a partial signature reconstruction in the stateful api to avoid a long final reconstruction. - ### Discrete-Log based distributed key generation @@ -158,7 +86,7 @@ All supported Distributed Key Generation protocols are [discrete log based](http * simple verifiable secret sharing with a single dealer. * the library does not implement the communication channels between participants. The caller should implement the methods `PrivateSend` (1-to-1 messaging) and `Broadcast` (1-to-n messaging) * 1-to-1 messaging must be a private channel, the caller must make sure the channel preserves confidentialiy and authenticates the sender. - * 1-to-n broadcasting assume all destination participants receive the same copy of the message. The channel should also authenticate the broadcaster. + * 1-to-n broadcasting is a reliable broadcast, where honest senders are able to reach all honest receivers, and where all honest receivers end up with the same received messages. The channel should also authenticate the broadcaster. * It is recommended that both communication channels are unique per protocol instance. This could be achieved by prepending the messages to send/broadcast by a unique protocol instance ID. * Feldman VSS Qual. * an extension of the simple Feldman VSS. diff --git a/crypto/bls.go b/crypto/bls.go index 1e009304fe2..27ddd881bfd 100644 --- a/crypto/bls.go +++ b/crypto/bls.go @@ -1,15 +1,13 @@ -//go:build relic -// +build relic - package crypto -// BLS signature scheme implementation using BLS12-381 curve -// ([zcash]https://electriccoin.co/blog/new-snark-curve/) -// Pairing, ellipic curve and modular arithmetic is using Relic library. -// This implementation does not include any security against side-channel attacks. +// BLS signature scheme implementation using the BLS12-381 curve +// ([zcash]https://electriccoin.co/blog/new-snark-curve/). +// Pairing, ellipic curve and modular arithmetic are using [BLST](https://github.com/supranational/blst/tree/master/src) +// tools underneath. +// This implementation does not include security against side-channel or fault attacks. -// existing features: -// - the implementation variant is minimal-signature-size signatures: +// Existing features: +// - the implementation variant is minimal-signature-size: // shorter signatures in G1, longer public keys in G2 // - serialization of points on G1 and G2 is compressed ([zcash] // https://www.ietf.org/archive/id/draft-irtf-cfrg-pairing-friendly-curves-08.html#name-zcash-serialization-format-) @@ -21,25 +19,16 @@ package crypto // and BLS_POP_BLS12381G1_XOF:KMAC128_SSWU_RO_POP_ for proofs of possession. // - signature verification checks the membership of signature in G1. // - the public key membership check in G2 is implemented separately from the signature verification. -// - membership check in G1 is implemented using fast Bowe's check (to be updated to Scott's check). -// - membership check in G2 is using a simple scalar multiplication with the group order (to be updated to Scott's check). // - multi-signature tools are defined in bls_multisg.go -// - SPoCK scheme based on BLS: verifies two signatures have been generated from the same message, -// that is unknown to the verifier. - -// future features: -// - membership checks G2 using Bowe's method (https://eprint.iacr.org/2019/814.pdf) -// - implement a G1/G2 swap (signatures on G2 and public keys on G1) +// - SPoCK scheme based on BLS: verifies two signatures are generated from the same message, +// even though the message is unknown to the verifier. -// #cgo CFLAGS: -g -Wall -std=c99 -// #cgo LDFLAGS: -L${SRCDIR}/relic/build/lib -l relic_s // #include "bls_include.h" import "C" import ( "bytes" "crypto/sha256" - "errors" "fmt" "golang.org/x/crypto/hkdf" @@ -48,26 +37,15 @@ import ( ) const ( - // BLS12-381 - // p size in bytes, where G1 is defined over the field Zp - fieldSize = 48 - // - // 1 for compressed, 0 for uncompressed - values should not be changed - uncompressed = 0 //nolint - compressed = 1 - // Points compression when serialized - serializationG1 = compressed - serializationG2 = compressed - // - // SignatureLenBLSBLS12381 is the size of G1 elements - SignatureLenBLSBLS12381 = fieldSize * (2 - serializationG1) // the length is divided by 2 if compression is on - PrKeyLenBLSBLS12381 = 32 - // PubKeyLenBLSBLS12381 is the size of G2 elements - PubKeyLenBLSBLS12381 = 2 * fieldSize * (2 - serializationG2) // the length is divided by 2 if compression is on + // SignatureLenBLSBLS12381 is the serialization size of a `G_1` element. + SignatureLenBLSBLS12381 = g1BytesLen + // PubKeyLenBLSBLS12381 is the serialization size of a `G_2` element. + PubKeyLenBLSBLS12381 = g2BytesLen + // PrKeyLenBLSBLS12381 is the serialization size of a `F_r` element, + // where `r` is the order of `G_1` and `G_2`. + PrKeyLenBLSBLS12381 = frBytesLen // Hash to curve params - // expandMsgOutput is the output length of the expand_message step as required by the hash_to_curve algorithm - expandMsgOutput = 2 * (fieldSize + (securityBits / 8)) // hash to curve suite ID of the form : CurveID_ || HashID_ || MapID_ || encodingVariant_ h2cSuiteID = "BLS12381G1_XOF:KMAC128_SSWU_RO_" // scheme implemented as a countermasure for rogue attacks of the form : SchemeTag_ @@ -77,12 +55,13 @@ const ( // Cipher suite used for BLS PoP of the form : BLS_POP_ || h2cSuiteID || SchemeTag_ // The PoP cipher suite is guaranteed to be different than all signature ciphersuites blsPOPCipherSuite = "BLS_POP_" + h2cSuiteID + schemeTag + // expandMsgOutput is the output length of the expand_message step as required by the + // hash_to_curve algorithm (and the map to G1 step). + expandMsgOutput = int(C.MAP_TO_G1_INPUT_LEN) ) // blsBLS12381Algo, embeds SignAlgo type blsBLS12381Algo struct { - // points to Relic context of BLS12-381 with all the parameters - context ctx // the signing algo and parameters algo SigningAlgorithm } @@ -165,12 +144,9 @@ func (sk *prKeyBLSBLS12381) Sign(data []byte, kmac hash.Hasher) (Signature, erro // hash the input to 128 bytes h := kmac.ComputeHash(data) - // set BLS context - blsInstance.reInit() - s := make([]byte, SignatureLenBLSBLS12381) C.bls_sign((*C.uchar)(&s[0]), - (*C.bn_st)(&sk.scalar), + (*C.Fr)(&sk.scalar), (*C.uchar)(&h[0]), (C.int)(len(h))) return s, nil @@ -202,10 +178,7 @@ func (pk *pubKeyBLSBLS12381) Verify(s Signature, data []byte, kmac hash.Hasher) return false, err } - // intialize BLS context - blsInstance.reInit() - - if len(s) != signatureLengthBLSBLS12381 { + if len(s) != SignatureLenBLSBLS12381 { return false, nil } @@ -217,7 +190,7 @@ func (pk *pubKeyBLSBLS12381) Verify(s Signature, data []byte, kmac hash.Hasher) return false, nil } - verif := C.bls_verify((*C.ep2_st)(&pk.point), + verif := C.bls_verify((*C.E2)(&pk.point), (*C.uchar)(&s[0]), (*C.uchar)(&h[0]), (C.int)(len(h))) @@ -228,15 +201,10 @@ func (pk *pubKeyBLSBLS12381) Verify(s Signature, data []byte, kmac hash.Hasher) case valid: return true, nil default: - return false, fmt.Errorf("signature verification failed") + return false, fmt.Errorf("signature verification failed: code %d", verif) } } -// 0xC0 is the header of the point at infinity serialization (either in G1 or G2) -const infinityPointHeader = 0xC0 - -var identityBLSSignature = append([]byte{infinityPointHeader}, make([]byte, signatureLengthBLSBLS12381-1)...) - // IsBLSSignatureIdentity checks whether the input signature is // the identity signature (point at infinity in G1). // @@ -246,7 +214,7 @@ var identityBLSSignature = append([]byte{infinityPointHeader}, make([]byte, sign // suspected to be equal to identity, which avoids failing the aggregated // signature verification. func IsBLSSignatureIdentity(s Signature) bool { - return bytes.Equal(s, identityBLSSignature) + return bytes.Equal(s, g1Serialization) } // generatePrivateKey deterministically generates a private key for BLS on BLS12-381 curve. @@ -277,7 +245,7 @@ func (a *blsBLS12381Algo) generatePrivateKey(ikm []byte) (PrivateKey, error) { // L is the OKM length // L = ceil((3 * ceil(log2(r))) / 16) which makes L (security_bits/8)-larger than r size - okmLength := (3 * PrKeyLenBLSBLS12381) / 2 + okmLength := (3 * frBytesLen) / 2 // HKDF secret = IKM || I2OSP(0, 1) secret := make([]byte, len(ikm)+1) @@ -299,8 +267,9 @@ func (a *blsBLS12381Algo) generatePrivateKey(ikm []byte) (PrivateKey, error) { } defer overwrite(okm) // overwrite okm - // map the bytes to a private key : SK = OS2IP(OKM) mod r - isZero := mapToZr(&sk.scalar, okm) + // map the bytes to a private key using modular reduction + // SK = OS2IP(OKM) mod r + isZero := mapToFr(&sk.scalar, okm) if !isZero { return sk, nil } @@ -315,31 +284,27 @@ func (a *blsBLS12381Algo) generatePrivateKey(ikm []byte) (PrivateKey, error) { const invalidBLSSignatureHeader = byte(0xE0) // BLSInvalidSignature returns an invalid signature that fails when verified -// with any message and public key. +// with any message and public key, which can be used for testing. // // The signature bytes represent an invalid serialization of a point which // makes the verification fail early. The verification would return (false, nil). func BLSInvalidSignature() Signature { signature := make([]byte, SignatureLenBLSBLS12381) - signature[0] = invalidBLSSignatureHeader // invalid header as per C.ep_read_bin_compact + signature[0] = invalidBLSSignatureHeader // invalid header as per the Zcash serialization return signature } // decodePrivateKey decodes a slice of bytes into a private key. +// Decoding assumes a bytes big endian format. // It checks the scalar is non-zero and is less than the group order. func (a *blsBLS12381Algo) decodePrivateKey(privateKeyBytes []byte) (PrivateKey, error) { - if len(privateKeyBytes) != prKeyLengthBLSBLS12381 { - return nil, invalidInputsErrorf("input length must be %d, got %d", - prKeyLengthBLSBLS12381, len(privateKeyBytes)) - } sk := newPrKeyBLSBLS12381(nil) - readScalar(&sk.scalar, privateKeyBytes) - if C.check_membership_Zr_star((*C.bn_st)(&sk.scalar)) == valid { - return sk, nil + err := readScalarFrStar(&sk.scalar, privateKeyBytes) + if err != nil { + return nil, fmt.Errorf("failed to read the private key: %w", err) } - - return nil, invalidInputsErrorf("the private key is not a valid BLS12-381 curve key") + return sk, nil } // decodePublicKey decodes a slice of bytes into a public key. @@ -350,18 +315,18 @@ func (a *blsBLS12381Algo) decodePrivateKey(privateKeyBytes []byte) (PrivateKey, // a faster check during signature verifications. Any verification against an identity // public key outputs `false`. func (a *blsBLS12381Algo) decodePublicKey(publicKeyBytes []byte) (PublicKey, error) { - if len(publicKeyBytes) != pubKeyLengthBLSBLS12381 { + if len(publicKeyBytes) != PubKeyLenBLSBLS12381 { return nil, invalidInputsErrorf("input length must be %d, got %d", - pubKeyLengthBLSBLS12381, len(publicKeyBytes)) + PubKeyLenBLSBLS12381, len(publicKeyBytes)) } var pk pubKeyBLSBLS12381 - err := readPointG2(&pk.point, publicKeyBytes) + err := readPointE2(&pk.point, publicKeyBytes) if err != nil { - return nil, fmt.Errorf("decode public key failed %w", err) + return nil, fmt.Errorf("decode public key failed: %w", err) } // membership check in G2 - if C.check_membership_G2((*C.ep2_st)(&pk.point)) != valid { + if !bool(C.E2_in_G2((*C.E2)(&pk.point))) { return nil, invalidInputsErrorf("input key is infinity or does not encode a BLS12-381 point in the valid group") } @@ -374,7 +339,7 @@ func (a *blsBLS12381Algo) decodePublicKey(publicKeyBytes []byte) (PublicKey, err // decodePublicKeyCompressed decodes a slice of bytes into a public key. // since we use the compressed representation by default, this checks the default and delegates to decodePublicKeyCompressed func (a *blsBLS12381Algo) decodePublicKeyCompressed(publicKeyBytes []byte) (PublicKey, error) { - if serializationG2 != compressed { + if !isG2Compressed() { panic("library is not configured to use compressed public key serialization") } return a.decodePublicKey(publicKeyBytes) @@ -388,20 +353,19 @@ type prKeyBLSBLS12381 struct { scalar scalar } +var _ PrivateKey = (*prKeyBLSBLS12381)(nil) + // newPrKeyBLSBLS12381 creates a new BLS private key with the given scalar. // If no scalar is provided, the function allocates an // empty scalar. func newPrKeyBLSBLS12381(x *scalar) *prKeyBLSBLS12381 { - var sk prKeyBLSBLS12381 - if x == nil { - // initialize the scalar - C.bn_new_wrapper((*C.bn_st)(&sk.scalar)) - } else { - // set the scalar - sk.scalar = *x + if x != nil { + return &prKeyBLSBLS12381{ + // the embedded public key is only computed when needed + scalar: *x, + } } - // the embedded public key is only computed when needed - return &sk + return &prKeyBLSBLS12381{} } // Algorithm returns the Signing Algorithm @@ -440,7 +404,7 @@ func (sk *prKeyBLSBLS12381) PublicKey() PublicKey { // Encode returns a byte encoding of the private key. // The encoding is a raw encoding in big endian padded to the group order func (a *prKeyBLSBLS12381) Encode() []byte { - dest := make([]byte, prKeyLengthBLSBLS12381) + dest := make([]byte, frBytesLen) writeScalar(dest, &a.scalar) return dest } @@ -451,12 +415,12 @@ func (sk *prKeyBLSBLS12381) Equals(other PrivateKey) bool { if !ok { return false } - return sk.scalar.equals(&otherBLS.scalar) + return (&sk.scalar).equals(&otherBLS.scalar) } // String returns the hex string representation of the key. func (sk *prKeyBLSBLS12381) String() string { - return fmt.Sprintf("%#x", sk.Encode()) + return sk.scalar.String() } // pubKeyBLSBLS12381 is the public key of BLS using BLS12_381, @@ -472,15 +436,17 @@ type pubKeyBLSBLS12381 struct { // sure the comparison is performed after an instance is created. // // public key G2 point - point pointG2 + point pointE2 // G2 identity check cache isIdentity bool } +var _ PublicKey = (*pubKeyBLSBLS12381)(nil) + // newPubKeyBLSBLS12381 creates a new BLS public key with the given point. // If no scalar is provided, the function allocates an // empty scalar. -func newPubKeyBLSBLS12381(p *pointG2) *pubKeyBLSBLS12381 { +func newPubKeyBLSBLS12381(p *pointE2) *pubKeyBLSBLS12381 { if p != nil { key := &pubKeyBLSBLS12381{ point: *p, @@ -507,17 +473,19 @@ func (pk *pubKeyBLSBLS12381) Size() int { // The encoding is a compressed encoding of the point // [zcash] https://www.ietf.org/archive/id/draft-irtf-cfrg-pairing-friendly-curves-08.html#name-zcash-serialization-format- func (a *pubKeyBLSBLS12381) EncodeCompressed() []byte { - if serializationG2 != compressed { + if !isG2Compressed() { panic("library is not configured to use compressed public key serialization") } return a.Encode() } -// Encode returns a byte encoding of the public key. -// Since we use a compressed encoding by default, this delegates to EncodeCompressed +// Encode returns a byte encoding of the public key (a G2 point). +// The current encoding is a compressed serialization of G2 following [zcash] https://www.ietf.org/archive/id/draft-irtf-cfrg-pairing-friendly-curves-08.html#name-zcash-serialization-format- +// +// The function should evolve in the future to support uncompressed compresion too. func (a *pubKeyBLSBLS12381) Encode() []byte { - dest := make([]byte, pubKeyLengthBLSBLS12381) - writePointG2(dest, &a.point) + dest := make([]byte, g2BytesLen) + writePointE2(dest, &a.point) return dest } @@ -532,46 +500,7 @@ func (pk *pubKeyBLSBLS12381) Equals(other PublicKey) bool { // String returns the hex string representation of the key. func (pk *pubKeyBLSBLS12381) String() string { - return fmt.Sprintf("%#x", pk.Encode()) -} - -// Get Macro definitions from the C layer as Cgo does not export macros -var signatureLengthBLSBLS12381 = int(C.get_signature_len()) -var pubKeyLengthBLSBLS12381 = int(C.get_pk_len()) -var prKeyLengthBLSBLS12381 = int(C.get_sk_len()) - -// init sets the context of BLS12-381 curve -func (a *blsBLS12381Algo) init() error { - // initializes relic context and sets the B12_381 parameters - if err := a.context.initContext(); err != nil { - return err - } - - // compare the Go and C layer constants as a sanity check - if signatureLengthBLSBLS12381 != SignatureLenBLSBLS12381 || - pubKeyLengthBLSBLS12381 != PubKeyLenBLSBLS12381 || - prKeyLengthBLSBLS12381 != PrKeyLenBLSBLS12381 { - return errors.New("BLS-12381 length settings in Go and C are not consistent, check hardcoded lengths and compressions") - } - return nil -} - -// set the context of BLS 12-381 curve in the lower C and Relic layers assuming the context -// was previously initialized with a call to init(). -// -// If the implementation evolves to support multiple contexts, -// reinit should be called at every blsBLS12381Algo operation. -func (a *blsBLS12381Algo) reInit() { - a.context.setContext() -} - -// This is only a TEST/DEBUG/BENCH function. -// It returns the hash to G1 point from a slice of 128 bytes -func mapToG1(data []byte) *pointG1 { - l := len(data) - var h pointG1 - C.map_to_G1((*C.ep_st)(&h), (*C.uchar)(&data[0]), (C.int)(l)) - return &h + return pk.point.String() } // This is only a TEST function. @@ -592,7 +521,7 @@ func (sk *prKeyBLSBLS12381) signWithXMDSHA256(data []byte) Signature { // sign the hash s := make([]byte, SignatureLenBLSBLS12381) C.bls_sign((*C.uchar)(&s[0]), - (*C.bn_st)(&sk.scalar), + (*C.Fr)(&sk.scalar), (*C.uchar)(&hash[0]), (C.int)(len(hash))) return s diff --git a/crypto/bls12381_hashtocurve.c b/crypto/bls12381_hashtocurve.c deleted file mode 100644 index 229f9c009de..00000000000 --- a/crypto/bls12381_hashtocurve.c +++ /dev/null @@ -1,338 +0,0 @@ -// +build relic - -#include "bls12381_utils.h" -#include "bls_include.h" - -extern prec_st* bls_prec; - -#if (hashToPoint== LOCAL_SSWU) - -// These constants are taken from https://github.com/kwantam/bls12-381_hash -// and converted to the Mongtomery domain. -// Copyright 2019 Riad S. Wahby -const uint64_t iso_Nx_data[ELLP_Nx_LEN][Fp_DIGITS] = { - {0x4d18b6f3af00131c, 0x19fa219793fee28c, 0x3f2885f1467f19ae, - 0x23dcea34f2ffb304, 0xd15b58d2ffc00054, 0x0913be200a20bef4,}, - {0x898985385cdbbd8b, 0x3c79e43cc7d966aa, 0x1597e193f4cd233a, - 0x8637ef1e4d6623ad, 0x11b22deed20d827b, 0x07097bc5998784ad,}, - {0xa542583a480b664b, 0xfc7169c026e568c6, 0x5ba2ef314ed8b5a6, - 0x5b5491c05102f0e7, 0xdf6e99707d2a0079, 0x0784151ed7605524,}, - {0x494e212870f72741, 0xab9be52fbda43021, 0x26f5577994e34c3d, - 0x049dfee82aefbd60, 0x65dadd7828505289, 0x0e93d431ea011aeb,}, - {0x90ee774bd6a74d45, 0x7ada1c8a41bfb185, 0x0f1a8953b325f464, - 0x104c24211be4805c, 0x169139d319ea7a8f, 0x09f20ead8e532bf6,}, - {0x6ddd93e2f43626b7, 0xa5482c9aa1ccd7bd, 0x143245631883f4bd, - 0x2e0a94ccf77ec0db, 0xb0282d480e56489f, 0x18f4bfcbb4368929,}, - {0x23c5f0c953402dfd, 0x7a43ff6958ce4fe9, 0x2c390d3d2da5df63, - 0xd0df5c98e1f9d70f, 0xffd89869a572b297, 0x1277ffc72f25e8fe,}, - {0x79f4f0490f06a8a6, 0x85f894a88030fd81, 0x12da3054b18b6410, - 0xe2a57f6505880d65, 0xbba074f260e400f1, 0x08b76279f621d028,}, - {0xe67245ba78d5b00b, 0x8456ba9a1f186475, 0x7888bff6e6b33bb4, - 0xe21585b9a30f86cb, 0x05a69cdcef55feee, 0x09e699dd9adfa5ac,}, - {0x0de5c357bff57107, 0x0a0db4ae6b1a10b2, 0xe256bb67b3b3cd8d, - 0x8ad456574e9db24f, 0x0443915f50fd4179, 0x098c4bf7de8b6375,}, - {0xe6b0617e7dd929c7, 0xfe6e37d442537375, 0x1dafdeda137a489e, - 0xe4efd1ad3f767ceb, 0x4a51d8667f0fe1cf, 0x054fdf4bbf1d821c,}, - {0x72db2a50658d767b, 0x8abf91faa257b3d5, 0xe969d6833764ab47, - 0x464170142a1009eb, 0xb14f01aadb30be2f, 0x18ae6a856f40715d,}, -}; - -const uint64_t iso_Ny_data[ELLP_Ny_LEN][Fp_DIGITS] = { - {0x2b567ff3e2837267, 0x1d4d9e57b958a767, 0xce028fea04bd7373, - 0xcc31a30a0b6cd3df, 0x7d7b18a682692693, 0x0d300744d42a0310,}, - {0x99c2555fa542493f, 0xfe7f53cc4874f878, 0x5df0608b8f97608a, - 0x14e03832052b49c8, 0x706326a6957dd5a4, 0x0a8dadd9c2414555,}, - {0x13d942922a5cf63a, 0x357e33e36e261e7d, 0xcf05a27c8456088d, - 0x0000bd1de7ba50f0, 0x83d0c7532f8c1fde, 0x13f70bf38bbf2905,}, - {0x5c57fd95bfafbdbb, 0x28a359a65e541707, 0x3983ceb4f6360b6d, - 0xafe19ff6f97e6d53, 0xb3468f4550192bf7, 0x0bb6cde49d8ba257,}, - {0x590b62c7ff8a513f, 0x314b4ce372cacefd, 0x6bef32ce94b8a800, - 0x6ddf84a095713d5f, 0x64eace4cb0982191, 0x0386213c651b888d,}, - {0xa5310a31111bbcdd, 0xa14ac0f5da148982, 0xf9ad9cc95423d2e9, - 0xaa6ec095283ee4a7, 0xcf5b1f022e1c9107, 0x01fddf5aed881793,}, - {0x65a572b0d7a7d950, 0xe25c2d8183473a19, 0xc2fcebe7cb877dbd, - 0x05b2d36c769a89b0, 0xba12961be86e9efb, 0x07eb1b29c1dfde1f,}, - {0x93e09572f7c4cd24, 0x364e929076795091, 0x8569467e68af51b5, - 0xa47da89439f5340f, 0xf4fa918082e44d64, 0x0ad52ba3e6695a79,}, - {0x911429844e0d5f54, 0xd03f51a3516bb233, 0x3d587e5640536e66, - 0xfa86d2a3a9a73482, 0xa90ed5adf1ed5537, 0x149c9c326a5e7393,}, - {0x462bbeb03c12921a, 0xdc9af5fa0a274a17, 0x9a558ebde836ebed, - 0x649ef8f11a4fae46, 0x8100e1652b3cdc62, 0x1862bd62c291dacb,}, - {0x05c9b8ca89f12c26, 0x0194160fa9b9ac4f, 0x6a643d5a6879fa2c, - 0x14665bdd8846e19d, 0xbb1d0d53af3ff6bf, 0x12c7e1c3b28962e5,}, - {0xb55ebf900b8a3e17, 0xfedc77ec1a9201c4, 0x1f07db10ea1a4df4, - 0x0dfbd15dc41a594d, 0x389547f2334a5391, 0x02419f98165871a4,}, - {0xb416af000745fc20, 0x8e563e9d1ea6d0f5, 0x7c763e17763a0652, - 0x01458ef0159ebbef, 0x8346fe421f96bb13, 0x0d2d7b829ce324d2,}, - {0x93096bb538d64615, 0x6f2a2619951d823a, 0x8f66b3ea59514fa4, - 0xf563e63704f7092f, 0x724b136c4cf2d9fa, 0x046959cfcfd0bf49,}, - {0xea748d4b6e405346, 0x91e9079c2c02d58f, 0x41064965946d9b59, - 0xa06731f1d2bbe1ee, 0x07f897e267a33f1b, 0x1017290919210e5f,}, - {0x872aa6c17d985097, 0xeecc53161264562a, 0x07afe37afff55002, - 0x54759078e5be6838, 0xc4b92d15db8acca8, 0x106d87d1b51d13b9,}, -}; - -// sqrt_ration optimized for p mod 4 = 3. -// Check if (U/V) is a square, return 1 if yes, 0 otherwise -// If 1 is returned, out contains sqrt(U/V), -// otherwise out is sqrt(z*U/V) -// out should not be the same as U, or V -static int sqrt_ratio_3mod4(fp_t out, const fp_t u, const fp_t v) { - fp_t t0, t1, t2; - - fp_sqr(t1, v); // V^2 - fp_mul(t2, u, v); // U*V - fp_mul(t1, t1, t2); // U*V^3 - fp_exp(out, t1, &bls_prec->p_3div4); // (U*V^3)^((p-3)/4) - fp_mul(out, out, t2); // (U*V)*(U*V^3)^((p-3)/4) = U^((p+1)/4) * V^(3p-5)/4 - - fp_sqr(t0, out); // out^2 - fp_mul(t0, t0, v); // out^2 * V - - int res = 1; - if (fp_cmp(t0, u) != RLC_EQ) { // check whether U/V is a quadratic residue - fp_mul(out, out, bls_prec->sqrt_z); // sqrt(-z)*U*V(UV^3)^((p-3)/4) - res = 0; - } - - return res; -} - -// returns 1 if input is odd and 0 if input is even -static int sign_0(const fp_t in) { -#if FP_RDC == MONTY - bn_t tmp; - fp_prime_back(tmp, in); // TODO: entire reduction may not be needed to get the parity - return bn_is_even(tmp); -#endif - return in[0]&1; -} - -// Maps the field element t to a point p in E1(Fp) where E1: y^2 = g(x) = x^3 + a1*x + b1 -// using optimized non-constant-time Simplified SWU implementation (A.B = 0) -// Outout point p is in Jacobian coordinates to avoid extra inversions. -static inline void map_to_E1_osswu(ep_t p, const fp_t t) { - fp_t t0, t1, t2, t3, t4; - - // get the isogeny map coefficients - ctx_t* ctx = core_get(); - fp_t *a1 = &ctx->ep_iso.a; - fp_t *b1 = &ctx->ep_iso.b; - fp_t *z = &ctx->ep_map_u; - - // compute numerator and denominator of X0(t) = N / D - fp_sqr(t1, t); // t^2 - fp_mul(t1, t1, *z); // z * t^2 - fp_sqr(t2, t1); // z^2 * t^4 - fp_add(t2, t2, t1); // z * t^2 + z^2 * t^4 - fp_add(t3, t2, bls_prec->r); // z * t^2 + z^2 * t^4 + 1 - fp_mul(t3, t3, *b1); // N = b * (z * t^2 + z^2 * t^4 + 1) - - if (fp_is_zero(t2)) { - fp_copy(p->z, bls_prec->a1z); // D = a * z - } else { - fp_mul(p->z, t2, bls_prec->minus_a1); // D = - a * (z * t^2 + z^2 * t^4) - } - - // compute numerator and denominator of g(X0(t)) = U / V - // U = N^3 + a1 * N * D^2 + b1 * D^3 - // V = D^3 - fp_sqr(t2, t3); // N^2 - fp_sqr(t0, p->z); // D^2 - fp_mul(t4, *a1, t0); // a * D^2 - fp_add(t2, t4, t2); // N^2 + a * D^2 - fp_mul(t2, t3, t2); // N^3 + a * N * D^2 - fp_mul(t0, t0, p->z); // V = D^3 - fp_mul(t4, *b1, t0); // b * V = b * D^3 - fp_add(t2, t4, t2); // U = N^3 + a1 * N * D^2 + b1 * D^3 - - // compute sqrt(U/V) - int is_sqr = sqrt_ratio_3mod4(p->y, t2, t0); - if (is_sqr) { - fp_copy(p->x, t3); // x = N - } else { - fp_mul(p->x, t1, t3); // x = N * z * t^2 - fp_mul(t1, t1, t); // z * t^3 - fp_mul(p->y, p->y, t1); // y = z * t^3 * sqrt(r * U/V) where r is 1 or map coefficient z - } - - // negate y to be the same sign of t - if (sign_0(t) != sign_0(p->y)) { - fp_neg(p->y, p->y); // -y - } - - // convert (x/D, y) into Jacobian (X,Y,Z) where Z=D to avoid inversion. - // Z = D, X = x/D * D^2 = x*D , Y = y*D^3 - fp_mul(p->x, p->x, p->z); // X = N*D - fp_mul(p->y, p->y, t0); // Y = y*D^3 - // p->z is already equal to D - p->coord = JACOB; -} - -// This code is taken from https://github.com/kwantam/bls12-381_hash -// and adapted to use Relic modular arithemtic. -// Copyright 2019 Riad S. Wahby -static inline void hornerPolynomial(fp_t accumulator, const fp_t x, const int start_val, const fp_t fp_tmp[]) { - for (int i = start_val; i >= 0; --i) { - fp_mul(accumulator, accumulator, x); // acc *= x - fp_add(accumulator, accumulator, fp_tmp[i]); // acc += next_val - } -} - -// This code is taken from https://github.com/kwantam/bls12-381_hash -// and adapted to use Relic modular arithemtic. -// Copyright 2019 Riad S. Wahby -static inline void compute_map_zvals(fp_t out[], const fp_t inv[], const fp_t zv[], const unsigned len) { - for (unsigned i = 0; i < len; ++i) { - fp_mul(out[i], inv[i], zv[i]); - } -} - -// 11-isogeny map -// computes the mapping of p and stores the result in r -// -// This code is taken from https://github.com/kwantam/bls12-381_hash -// and adapted to use Relic modular arithemtic. The constant tables -// iso_D and iso_N were converted to the Montgomery domain. -// -// Copyright 2019 Riad S. Wahby -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at - -// http://www.apache.org/licenses/LICENSE-2.0 - -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -static inline void eval_iso11(ep_t r, const ep_t p) { - fp_t fp_tmp[32]; - - // precompute even powers of Z up to Z^30 in fp_tmp[31]..fp_tmp[17] - fp_sqr(fp_tmp[31], p->z); // Z^2 - fp_sqr(fp_tmp[30], fp_tmp[31]); // Z^4 - fp_mul(fp_tmp[29], fp_tmp[30], fp_tmp[31]); // Z^6 - fp_sqr(fp_tmp[28], fp_tmp[30]); // Z^8 - fp_mul(fp_tmp[27], fp_tmp[28], fp_tmp[31]); // Z^10 - fp_sqr(fp_tmp[26], fp_tmp[29]); // Z^12 - fp_mul(fp_tmp[25], fp_tmp[26], fp_tmp[31]); // Z^14 - fp_sqr(fp_tmp[24], fp_tmp[28]); // Z^16 - fp_mul(fp_tmp[23], fp_tmp[24], fp_tmp[31]); // Z^18 - fp_sqr(fp_tmp[22], fp_tmp[27]); // Z^20 - fp_mul(fp_tmp[21], fp_tmp[22], fp_tmp[31]); // Z^22 - fp_sqr(fp_tmp[20], fp_tmp[26]); // Z^24 - fp_mul(fp_tmp[19], fp_tmp[20], fp_tmp[31]); // Z^26 - fp_sqr(fp_tmp[18], fp_tmp[25]); // Z^28 - fp_mul(fp_tmp[17], fp_tmp[18], fp_tmp[31]); // Z^30 - - // get isogeny map coefficients - iso_t iso = ep_curve_get_iso(); - // hardcode the constant to avoid warnings of gcc -Wstringop-overread - const int deg_dy = 15; // also equal to iso->deg_yd; - const int deg_dx = 10; // also equal to iso->deg_xd; - // TODO: get N coefficient from Relic and update N computations - - // y = Ny/Dy - // compute Dy - compute_map_zvals(fp_tmp, iso->yd, fp_tmp + 17, deg_dy); // k_(15-i) Z^(2i) - fp_add(fp_tmp[16], p->x, fp_tmp[deg_dy - 1]); // X + k_14 Z^2 - hornerPolynomial(fp_tmp[16], p->x, deg_dy - 2, fp_tmp); // Horner for the rest - fp_mul(fp_tmp[15], fp_tmp[16], fp_tmp[31]); // Dy * Z^2 - fp_mul(fp_tmp[15], fp_tmp[15], p->z); // Dy * Z^3 - - // compute Ny - compute_map_zvals(fp_tmp, bls_prec->iso_Ny, fp_tmp + 17, ELLP_Ny_LEN - 1); // k_(15-i) Z^(2i) - fp_mul(fp_tmp[16], p->x, bls_prec->iso_Ny[ELLP_Ny_LEN - 1]); // k_15 * X - fp_add(fp_tmp[16], fp_tmp[16], fp_tmp[ELLP_Ny_LEN - 2]); // k_15 * X + k_14 Z^2 - hornerPolynomial(fp_tmp[16], p->x, ELLP_Ny_LEN - 3, fp_tmp); // Horner for the rest - fp_mul(fp_tmp[16], fp_tmp[16], p->y); // Ny * Y - - // x = Nx/Dx - // compute Dx - compute_map_zvals(fp_tmp, iso->xd, fp_tmp + 22, deg_dx); // k_(10-i) Z^(2i) - fp_add(fp_tmp[14], p->x, fp_tmp[deg_dx - 1]); // X + k_9 Z^2 - hornerPolynomial(fp_tmp[14], p->x, deg_dx - 2, fp_tmp); // Horner for the rest - fp_mul(fp_tmp[14], fp_tmp[14], fp_tmp[31]); // Dx * Z^2 - - // compute Nx - compute_map_zvals(fp_tmp, bls_prec->iso_Nx, fp_tmp + 21, ELLP_Nx_LEN - 1); // k_(11-i) Z^(2i) - fp_mul(fp_tmp[13], p->x, bls_prec->iso_Nx[ELLP_Nx_LEN - 1]); // k_11 * X - fp_add(fp_tmp[13], fp_tmp[13], fp_tmp[ELLP_Nx_LEN - 2]); // k_11 * X + k_10 * Z^2 - hornerPolynomial(fp_tmp[13], p->x, ELLP_Nx_LEN - 3, fp_tmp); // Dy: Horner for the rest - - // compute the resulting point (Xo,Yo,Zo) - fp_mul(r->z, fp_tmp[14], fp_tmp[15]); // Zo = Dx Dy - fp_mul(r->x, fp_tmp[13], fp_tmp[15]); // Nx Dy - fp_mul(r->x, r->x, r->z); // Xo = Nx Dy Z - fp_sqr(fp_tmp[12], r->z); // Zo^2 - fp_mul(r->y, fp_tmp[16], fp_tmp[14]); // Ny Dx - fp_mul(r->y, r->y, fp_tmp[12]); // Yo = Ny Dx Zo^2 - r->coord = JACOB; -} - -// map an input point in E to a point in G1 by clearing the cofactor of G1 -static void clear_cofactor(ep_t out, const ep_t in) { - bn_t z; - bn_new(z); - fp_prime_get_par(z); - // compute 1-z - bn_neg(z, z); - bn_add_dig(z, z, 1); - ep_mul_dig(out, in, z->dp[0]); // z fits in 64 bits - bn_free(z); -} - -// construction 2 section 5 in in https://eprint.iacr.org/2019/403.pdf -// evaluate the optimized SSWU map twice, add resulting points, apply isogeny map, clear cofactor -// the result is stored in p -// msg is the input message to hash, must be at least 2*(FP_BYTES+16) = 128 bytes -static void map_to_G1_local(ep_t p, const uint8_t *msg, int len) { - RLC_TRY { - if (len < 2*(Fp_BYTES+16)) { - RLC_THROW(ERR_NO_BUFFER); - } - - fp_t t1, t2; - bn_t tmp; - bn_new(tmp); - bn_read_bin(tmp, msg, len/2); - fp_prime_conv(t1, tmp); - bn_read_bin(tmp, msg + len/2, len - len/2); - fp_prime_conv(t2, tmp); - bn_free(tmp); - - ep_t p_temp; - ep_new(p_temp); - // first mapping - map_to_E1_osswu(p_temp, t1); // map to E1 - eval_iso11(p_temp, p_temp); // map to E - - // second mapping - map_to_E1_osswu(p, t2); // map to E1 - eval_iso11(p, p); // map to E - // sum - // TODO: implement point addition in E1 and apply the isogeny map only once. - // Gives 4% improvement for map-to-curve overall - ep_add_jacob(p, p, p_temp); - - // clear the cofactor - clear_cofactor(p, p); // map to G1 - ep_free(p_temp); - } - RLC_CATCH_ANY { - RLC_THROW(ERR_CAUGHT); - } -} -#endif - -// computes a hash of input data to G1 -// construction 2 from section 5 in https://eprint.iacr.org/2019/403.pdf -void map_to_G1(ep_t h, const byte* data, const int len) { - #if hashToPoint==LOCAL_SSWU - map_to_G1_local(h, data, len); - #elif hashToPoint==RELIC_SSWU - ep_map_from_field(h, data, len); - #endif -} diff --git a/crypto/bls12381_utils.c b/crypto/bls12381_utils.c index 19a1b730b5e..fc29046e47f 100644 --- a/crypto/bls12381_utils.c +++ b/crypto/bls12381_utils.c @@ -1,852 +1,1174 @@ -// +build relic - // this file contains utility functions for the curve BLS 12-381 -// these tools are shared by the BLS signature scheme, the BLS based threshold signature -// and the BLS distributed key generation protocols +// these tools are shared by the BLS signature scheme, the BLS based threshold +// signature, BLS-SPoCK and the BLS distributed key generation protocols #include "bls12381_utils.h" -#include "bls_include.h" #include "assert.h" +#include "bls_include.h" -// The functions are tested for ALLOC=AUTO (not for ALLOC=DYNAMIC) +// compile all blst C src along with this file +#include "blst_src.c" + +// make sure flow crypto types are consistent with BLST types +void types_sanity(void) { + assert(sizeof(Fr) == sizeof(vec256)); + assert(sizeof(Fp) == sizeof(vec384)); + assert(sizeof(Fp2) == sizeof(vec384x)); + assert(sizeof(E1) == sizeof(POINTonE1)); + assert(sizeof(E2) == sizeof(POINTonE2)); + assert(sizeof(Fp12) == sizeof(vec384fp12)); +} -// return macro values to the upper Go Layer -int get_valid() { - return VALID; +// ------------------- Fr utilities + +// Montgomery constant R related to the curve order r +// R = (1<<256) mod r +const Fr BLS12_381_rR = {{ + TO_LIMB_T(0x1824b159acc5056f), + TO_LIMB_T(0x998c4fefecbc4ff5), + TO_LIMB_T(0x5884b7fa00034802), + TO_LIMB_T(0x00000001fffffffe), +}}; + +// returns true if a is zero and false otherwise +bool Fr_is_zero(const Fr *a) { return vec_is_zero(a, sizeof(Fr)); } + +// returns true if a == b and false otherwise +bool Fr_is_equal(const Fr *a, const Fr *b) { + return vec_is_equal(a, b, sizeof(Fr)); } -int get_invalid() { - return INVALID; +// sets `a` to limb `l` +void Fr_set_limb(Fr *a, const limb_t l) { + vec_zero((byte *)a + sizeof(limb_t), sizeof(Fr) - sizeof(limb_t)); + *((limb_t *)a) = l; } -void bn_new_wrapper(bn_t a) { - bn_new(a); +void Fr_copy(Fr *res, const Fr *a) { + if ((uptr_t)a == (uptr_t)res) { + return; + } + vec_copy((byte *)res, (byte *)a, sizeof(Fr)); } -// global variable of the pre-computed data -prec_st bls_prec_st; -prec_st* bls_prec = NULL; +// sets `a` to 0 +void Fr_set_zero(Fr *a) { vec_zero((byte *)a, sizeof(Fr)); } -// required constants for the optimized SWU hash to curve -#if (hashToPoint == LOCAL_SSWU) -extern const uint64_t iso_Nx_data[ELLP_Nx_LEN][Fp_DIGITS]; -extern const uint64_t iso_Ny_data[ELLP_Ny_LEN][Fp_DIGITS]; -#endif +void Fr_add(Fr *res, const Fr *a, const Fr *b) { + add_mod_256((limb_t *)res, (limb_t *)a, (limb_t *)b, BLS12_381_r); +} -#if (MEMBERSHIP_CHECK_G1 == BOWE) -extern const uint64_t beta_data[Fp_DIGITS]; -extern const uint64_t z2_1_by3_data[2]; -#endif +void Fr_sub(Fr *res, const Fr *a, const Fr *b) { + sub_mod_256((limb_t *)res, (limb_t *)a, (limb_t *)b, BLS12_381_r); +} -// sets the global variable to input -void precomputed_data_set(const prec_st* p) { - bls_prec = (prec_st*)p; -} - -// Reads a prime field element from a digit vector in big endian format. -// There is no conversion to Montgomery domain in this function. - #define fp_read_raw(a, data_pointer) dv_copy((a), (data_pointer), Fp_DIGITS) - -// pre-compute some data required for curve BLS12-381 -prec_st* init_precomputed_data_BLS12_381() { - bls_prec = &bls_prec_st; - ctx_t* ctx = core_get(); - - // (p-1)/2 - bn_div_dig(&bls_prec->p_1div2, &ctx->prime, 2); - #if (hashToPoint == LOCAL_SSWU) - // (p-3)/4 - bn_div_dig(&bls_prec->p_3div4, &bls_prec->p_1div2, 2); - // sqrt(-z) - fp_neg(bls_prec->sqrt_z, ctx->ep_map_u); - fp_srt(bls_prec->sqrt_z, bls_prec->sqrt_z); - // -a1 and a1*z - fp_neg(bls_prec->minus_a1, ctx->ep_iso.a); - fp_mul(bls_prec->a1z, ctx->ep_iso.a, ctx->ep_map_u); - - for (int i=0; iiso_Nx[i], iso_Nx_data[i]); - for (int i=0; iiso_Ny[i], iso_Ny_data[i]); - #endif - - #if (MEMBERSHIP_CHECK_G1 == BOWE) - bn_new(&bls_prec->beta); - bn_read_raw(&bls_prec->beta, beta_data, Fp_DIGITS); - bn_new(&bls_prec->z2_1_by3); - bn_read_raw(&bls_prec->z2_1_by3, z2_1_by3_data, 2); - #endif - - // Montgomery constant R - fp_set_dig(bls_prec->r, 1); - return bls_prec; -} - -// Initializes Relic context with BLS12-381 parameters -ctx_t* relic_init_BLS12_381() { - // check Relic was compiled with the right conf - assert(ALLOC == AUTO); - - // sanity check of Relic constants the package is relying on - assert(RLC_OK == RLC_EQ); - - // initialize relic core with a new context - ctx_t* bls_ctx = (ctx_t*) calloc(1, sizeof(ctx_t)); - if (!bls_ctx) return NULL; - core_set(bls_ctx); - if (core_init() != RLC_OK) return NULL; - - // init BLS curve - int ret = RLC_OK; - #if (FP_PRIME == 381) - ret = ep_param_set_any_pairf(); // sets B12_P381 if FP_PRIME = 381 in relic config - #else - ep_param_set(B12_P381); - ep2_curve_set_twist(EP_MTYPE); // Multiplicative twist - #endif - - if (ret != RLC_OK) return NULL; - return core_get(); -} - -// seeds relic PRG -void seed_relic(byte* seed, int len) { - #if RAND == HASHD - // instantiate a new DRBG - ctx_t *ctx = core_get(); - ctx->seeded = 0; - #endif - rand_seed(seed, len); -} - -// Exponentiation of a generic point p in G1 -void ep_mult(ep_t res, const ep_t p, const bn_t expo) { - // Using window NAF of size 2 - ep_mul_lwnaf(res, p, expo); -} - -// Exponentiation of generator g1 in G1 -// These two function are here for bench purposes only -void ep_mult_gen_bench(ep_t res, const bn_t expo) { - // Using precomputed table of size 4 - ep_mul_gen(res, (bn_st *)expo); -} - -void ep_mult_generic_bench(ep_t res, const bn_t expo) { - // generic point multiplication - ep_mult(res, &core_get()->ep_g, expo); -} - -// Exponentiation of a generic point p in G2 -void ep2_mult(ep2_t res, ep2_t p, bn_t expo) { - // Using window NAF of size 2 - ep2_mul_lwnaf(res, p, expo); -} - -// Exponentiation of fixed g2 in G2 -void ep2_mult_gen(ep2_t res, const bn_t expo) { - // Using precomputed table of size 4 - g2_mul_gen(res, (bn_st*)expo); -} - -// DEBUG printing functions -void bytes_print_(char* s, byte* data, int len) { - printf("[%s]:\n", s); - for (int i=0; iep_r); -} - -// Reads a scalar from an array and maps it to Zr. -// The resulting scalar `a` satisfies 0 <= a < r. -// `len` must be less than BITS_TO_BYTES(RLC_BN_BITS). -// It returns VALID if scalar is zero and INVALID otherwise -int bn_map_to_Zr(bn_t a, const uint8_t* bin, int len) { - bn_t tmp; - bn_new(tmp); - bn_new_size(tmp, BYTES_TO_DIGITS(len)); - bn_read_bin(tmp, bin, len); - bn_mod(a, tmp, &core_get()->ep_r); - bn_rand(tmp, RLC_POS, len << 3); // overwrite tmp - bn_free(tmp); - if (bn_cmp_dig(a, 0) == RLC_EQ) { - return VALID; - } - return INVALID; +// res = a*R^(-1) +void Fr_from_montg(Fr *res, const Fr *a) { + from_mont_256((limb_t *)res, (limb_t *)a, BLS12_381_r, r0); } -// Reads a scalar from an array and maps it to Zr*. -// The resulting scalar `a` satisfies 0 < a < r. -// `len` must be less than BITS_TO_BYTES(RLC_BN_BITS) -void bn_map_to_Zr_star(bn_t a, const uint8_t* bin, int len) { - bn_t tmp; - bn_new(tmp); - bn_new_size(tmp, BYTES_TO_DIGITS(len)); - bn_read_bin(tmp, bin, len); - bn_t r_1; - bn_new(r_1); - bn_sub_dig(r_1, &core_get()->ep_r, 1); - bn_mod_basic(a,tmp,r_1); - bn_add_dig(a,a,1); - bn_rand(tmp, RLC_POS, len << 3); // overwrite tmp - bn_free(tmp); - bn_free(r_1); -} - -// returns the sign of y. -// 1 if y > (p - 1)/2 and 0 otherwise. -static int fp_get_sign(const fp_t y) { - bn_t bn_y; - bn_new(bn_y); - fp_prime_back(bn_y, y); - return bn_cmp(bn_y, &bls_prec->p_1div2) == RLC_GT; -} - -// ep_write_bin_compact exports a point a in E(Fp) to a buffer bin in a compressed or uncompressed form. -// len is the allocated size of the buffer bin. -// The serialization is following: -// https://www.ietf.org/archive/id/draft-irtf-cfrg-pairing-friendly-curves-08.html#name-zcash-serialization-format-) -// The code is a modified version of Relic ep_write_bin -void ep_write_bin_compact(byte *bin, const ep_t a, const int len) { - const int G1_size = (G1_BYTES/(G1_SERIALIZATION+1)); - - if (len!=G1_size) { - RLC_THROW(ERR_NO_BUFFER); - return; - } - - if (ep_is_infty(a)) { - // set the infinity bit - bin[0] = (G1_SERIALIZATION << 7) | 0x40; - memset(bin+1, 0, G1_size-1); - return; - } +// res = a^(-1)*R +void Fr_inv_montg_eucl(Fr *res, const Fr *a) { + // copied and modified from BLST code + // Copyright Supranational LLC + static const vec256 rx2 = { + /* left-aligned value of the modulus */ + TO_LIMB_T(0xfffffffe00000002), + TO_LIMB_T(0xa77b4805fffcb7fd), + TO_LIMB_T(0x6673b0101343b00a), + TO_LIMB_T(0xe7db4ea6533afa90), + }; + vec512 temp; + ct_inverse_mod_256(temp, (limb_t *)a, BLS12_381_r, rx2); + redc_mont_256((limb_t *)res, temp, BLS12_381_r, r0); +} - RLC_TRY { - ep_t t; - ep_null(t); - ep_new(t); - ep_norm(t, a); - fp_write_bin(bin, Fp_BYTES, t->x); - - if (G1_SERIALIZATION == COMPRESSED) { - bin[0] |= (fp_get_sign(t->y) << 5); - } else { - fp_write_bin(bin + Fp_BYTES, Fp_BYTES, t->y); - } - ep_free(t); - } RLC_CATCH_ANY { - RLC_THROW(ERR_CAUGHT); - } +// computes the sum of the array elements and writes the sum in jointx +void Fr_sum_vector(Fr *jointx, const Fr x[], const int x_len) { + Fr_set_zero(jointx); + for (int i = 0; i < x_len; i++) { + Fr_add(jointx, jointx, &x[i]); + } +} - bin[0] |= (G1_SERIALIZATION << 7); - } - -// fp_read_bin_safe is a modified version of Relic's (void fp_read_bin). -// It reads a field element from a buffer and makes sure the big number read can be -// written as a field element (is reduced modulo p). -// Unlike Relic's versions, the function does not reduce the read integer modulo p and does -// not throw an exception for an integer larger than p. The function returns RLC_OK if the input -// corresponds to a field element, and returns RLC_ERR otherwise. -static int fp_read_bin_safe(fp_t a, const uint8_t *bin, int len) { - if (len != Fp_BYTES) { - return RLC_ERR; +// internal type of BLST `pow256` uses bytes little endian. +// input is bytes big endian as used by Flow crypto lib external scalars. +static void pow256_from_be_bytes(pow256 ret, const byte a[Fr_BYTES]) { + byte *b = (byte *)a + Fr_BYTES - 1; + if ((uptr_t)ret == (uptr_t)a) { // swap in place + for (int i = 0; i < Fr_BYTES / 2; i++) { + byte tmp = *ret; + *(ret++) = *b; + *(b--) = tmp; } + } else { + for (int i = 0; i < Fr_BYTES; i++) { + *(ret++) = *(b--); + } + } +} - int ret = RLC_ERR; - bn_t t; - bn_new(t); - bn_read_bin(t, bin, Fp_BYTES); +// internal type of BLST `pow256` uses bytes little endian. +static void pow256_from_Fr(pow256 ret, const Fr *in) { + le_bytes_from_limbs(ret, (limb_t *)in, Fr_BYTES); +} - // make sure read bn is reduced modulo p - // first check is sanity check, since current implementation of `bn_read_bin` insures - // output bn is positive - if (bn_sign(t) == RLC_NEG || bn_cmp(t, &core_get()->prime) != RLC_LT) { - goto out; - } +// reads a scalar in `a` and checks it is a valid Fr element (a < r). +// input is bytes-big-endian. +// returns: +// - BAD_ENCODING if the length is invalid +// - BAD_VALUE if the scalar isn't in Fr +// - VALID if the scalar is valid +ERROR Fr_read_bytes(Fr *a, const byte *in, int in_len) { + if (in_len != Fr_BYTES) { + return BAD_ENCODING; + } + // compare to r using BLST internal function + pow256 tmp; + pow256_from_be_bytes(tmp, in); + // (check_mod_256 compares pow256 against a vec256!) + if (!check_mod_256(tmp, BLS12_381_r)) { + return BAD_VALUE; + } + vec_zero(tmp, sizeof(tmp)); + limbs_from_be_bytes((limb_t *)a, in, Fr_BYTES); + return VALID; +} - if (bn_is_zero(t)) { - fp_zero(a); - } else { - if (t->used == 1) { - fp_prime_conv_dig(a, t->dp[0]); - } else { - fp_prime_conv(a, t); - } - } - ret = RLC_OK; -out: - bn_free(t); +// reads a scalar in `a` and checks it is a valid Fr_star element (0 < a < r). +// input bytes are big endian. +// returns: +// - BAD_ENCODING if the length is invalid +// - BAD_VALUE if the scalar isn't in Fr_star +// - VALID if the scalar is valid +ERROR Fr_star_read_bytes(Fr *a, const byte *in, int in_len) { + int ret = Fr_read_bytes(a, in, in_len); + if (ret != VALID) { return ret; + } + // check if a=0 + if (Fr_is_zero(a)) { + return BAD_VALUE; + } + return VALID; } -// ep_read_bin_compact imports a point from a buffer in a compressed or uncompressed form. -// len is the size of the input buffer. +// write Fr element `a` in big endian bytes. +void Fr_write_bytes(byte *out, const Fr *a) { + // be_bytes_from_limbs works for both limb endianness types + be_bytes_from_limbs(out, (limb_t *)a, Fr_BYTES); +} + +// maps big-endian bytes of any size into an Fr element using modular reduction. +// Input is byte-big-endian, output is Fr (internally vec256). // -// The resulting point is guaranteed to be on the curve E1. -// The serialization follows: -// https://www.ietf.org/archive/id/draft-irtf-cfrg-pairing-friendly-curves-08.html#name-zcash-serialization-format-) -// The code is a modified version of Relic ep_read_bin +// Note: could use redc_mont_256(vec256 ret, const vec512 a, const vec256 p, +// limb_t n0) to reduce 512 bits at a time. +static void Fr_from_be_bytes(Fr *out, const byte *in, const int in_len) { + // input can be written in base 2^|R|, with R the Montgomery constant + // N = l_1 + L_2*2^|R| .. + L_n*2^(|R|*(n-1)) + // Therefore N mod p can be expressed using R as: + // N mod p = l_1 + L_2*R .. + L_n*R^(n-1) + Fr digit, radix; + Fr_set_zero(out); + Fr_copy(&radix, (Fr *)BLS12_381_rRR); // R^2 + + int n = in_len; + byte *p = (byte *)in + in_len; + while (n > Fr_BYTES) { + // limbs_from_be_bytes works for both limb endiannesses + limbs_from_be_bytes((limb_t *)&digit, p -= Fr_BYTES, Fr_BYTES); // l_i + Fr_mul_montg(&digit, &digit, + &radix); // l_i * R^i (i is the loop number starting at 1) + Fr_add(out, out, &digit); + Fr_mul_montg(&radix, &radix, (Fr *)BLS12_381_rRR); // R^(i+1) + n -= Fr_BYTES; + } + Fr_set_zero(&digit); + limbs_from_be_bytes((limb_t *)&digit, p - n, n); + Fr_mul_montg(&digit, &digit, &radix); + Fr_add(out, out, &digit); + // at this point : out = l_1*R + L_2*R^2 .. + L_n*R^n, + // reduce the extra R + Fr_from_montg(out, out); + // clean up possible sensitive data + Fr_set_zero(&digit); +} + +// Reads a scalar from an array and maps it to Fr using modular reduction. +// Input is byte-big-endian as used by the external APIs. +// It returns true if scalar is zero and false otherwise. +bool map_bytes_to_Fr(Fr *a, const byte *in, int in_len) { + Fr_from_be_bytes(a, in, in_len); + return Fr_is_zero(a); +} + +// ------------------- Fp utilities + +// Montgomery constants related to the prime p +const Fp BLS12_381_pR = {ONE_MONT_P}; /* R mod p = (1<<384)%p */ + +// sets `a` to 0 +static void Fp_set_zero(Fp *a) { vec_zero((byte *)a, sizeof(Fp)); } + +// sets `a` to limb `l` +static void Fp_set_limb(Fp *a, const limb_t l) { + vec_zero((byte *)a + sizeof(limb_t), sizeof(Fp) - sizeof(limb_t)); + *((limb_t *)a) = l; +} + +void Fp_copy(Fp *res, const Fp *a) { + if ((uptr_t)a == (uptr_t)res) { + return; + } + vec_copy((byte *)res, (byte *)a, sizeof(Fp)); +} + +static void Fp_add(Fp *res, const Fp *a, const Fp *b) { + add_mod_384((limb_t *)res, (limb_t *)a, (limb_t *)b, BLS12_381_P); +} + +static void Fp_sub(Fp *res, const Fp *a, const Fp *b) { + sub_mod_384((limb_t *)res, (limb_t *)a, (limb_t *)b, BLS12_381_P); +} + +static void Fp_neg(Fp *res, const Fp *a) { + cneg_mod_384((limb_t *)res, (limb_t *)a, 1, BLS12_381_P); +} + +// checks if `a` is a quadratic residue in Fp. If yes, it computes +// the square root in `res`. // -// It returns RLC_OK if the inputs are valid (input buffer lengths are valid and coordinates correspond -// to a point on curve) and the execution completes, and RLC_ERR otherwise. -int ep_read_bin_compact(ep_t a, const byte *bin, const int len) { - // check the length - const int G1_size = (G1_BYTES/(G1_SERIALIZATION+1)); - if (len!=G1_size) { - return RLC_ERR; - } +// The boolean output is valid whether `a` is in Montgomery form or not, +// since montgomery constant `R` is a quadratic residue. +// However, the square root is valid only if `a` is in montgomery form. +static bool Fp_sqrt_montg(Fp *res, const Fp *a) { + return sqrt_fp((limb_t *)res, (limb_t *)a); +} - // check the compression bit - int compressed = bin[0] >> 7; - if ((compressed == 1) != (G1_SERIALIZATION == COMPRESSED)) { - return RLC_ERR; - } - - // check if the point is infinity - int is_infinity = bin[0] & 0x40; - if (is_infinity) { - // check if the remaining bits are cleared - if (bin[0] & 0x3F) { - return RLC_ERR; - } - for (int i=1; i> 5) & 1; - if (y_sign && (!compressed)) { - return RLC_ERR; - } - - a->coord = BASIC; - fp_set_dig(a->z, 1); - // use a temporary buffer to mask the header bits and read a.x - byte temp[Fp_BYTES]; - memcpy(temp, bin, Fp_BYTES); - temp[0] &= 0x1F; - if (fp_read_bin_safe(a->x, temp, sizeof(temp)) != RLC_OK) { - return RLC_ERR; - } +static bool Fp_check(const Fp *a) { + // use same method as in BLST internal function + // which seems the most efficient. The method uses the assembly-based + // modular addition instead of limbs comparison + Fp temp; + Fp_add(&temp, a, &ZERO_384); + return vec_is_equal(&temp, a, Fp_BYTES); + // no need to clear `tmp` as no current use-case involves sensitive data being + // passed as `a` +} - if (G1_SERIALIZATION == UNCOMPRESSED) { - if (fp_read_bin_safe(a->y, bin + Fp_BYTES, Fp_BYTES) != RLC_OK) { - return RLC_ERR; - } - // check read point is on curve - if (!ep_on_curve(a)) { - return RLC_ERR; - } - return RLC_OK; - } - fp_zero(a->y); - fp_set_bit(a->y, 0, y_sign); - if (ep_upk(a, a) == 1) { - // resulting point is guaranteed to be on curve - return RLC_OK; - } - return RLC_ERR; +// res = a*b*R^(-1) +void Fp_mul_montg(Fp *res, const Fp *a, const Fp *b) { + mul_mont_384((limb_t *)res, (limb_t *)a, (limb_t *)b, BLS12_381_P, p0); } +// res = a^2 * R^(-1) +void Fp_squ_montg(Fp *res, const Fp *a) { + sqr_mont_384((limb_t *)res, (limb_t *)a, BLS12_381_P, p0); +} -// returns the sign of y. -// sign(y_0) if y_1 = 0, else sign(y_1) -static int fp2_get_sign(fp2_t y) { - if (fp_is_zero(y[1])) { // no need to convert back as the montgomery form of 0 is 0 - return fp_get_sign(y[0]); - } - return fp_get_sign(y[1]); +// res = a*R +void Fp_to_montg(Fp *res, const Fp *a) { + mul_mont_384((limb_t *)res, (limb_t *)a, BLS12_381_RR, BLS12_381_P, p0); } -// ep2_write_bin_compact exports a point in E(Fp^2) to a buffer in a compressed or uncompressed form. -// len is the allocated size of the buffer bin. -// The serialization is following: -// https://www.ietf.org/archive/id/draft-irtf-cfrg-pairing-friendly-curves-08.html#name-zcash-serialization-format-) -// The code is a modified version of Relic ep2_write_bin -void ep2_write_bin_compact(byte *bin, const ep2_t a, const int len) { - ep2_t t; - ep2_null(t); - const int G2_size = (G2_BYTES/(G2_SERIALIZATION+1)); - - if (len!=G2_size) { - RLC_THROW(ERR_NO_BUFFER); - return; - } - - if (ep2_is_infty((ep2_st *)a)) { - // set the infinity bit - bin[0] = (G2_SERIALIZATION << 7) | 0x40; - memset(bin+1, 0, G2_size-1); - return; - } +// res = a*R^(-1) +void Fp_from_montg(Fp *res, const Fp *a) { + from_mont_384((limb_t *)res, (limb_t *)a, BLS12_381_P, p0); +} - RLC_TRY { - ep2_new(t); - ep2_norm(t, (ep2_st *)a); - fp2_write_bin(bin, Fp2_BYTES, t->x, 0); - - if (G2_SERIALIZATION == COMPRESSED) { - bin[0] |= (fp2_get_sign(t->y) << 5); - } else { - fp2_write_bin(bin + Fp2_BYTES, Fp2_BYTES, t->y, 0); - } - } RLC_CATCH_ANY { - RLC_THROW(ERR_CAUGHT); - } +// reads a scalar in `out` and checks it is a valid Fp element (out < p). +// input is bytes-big-endian. +// returns: +// - BAD_ENCODING if the length is invalid +// - BAD_VALUE if the scalar isn't in Fp +// - VALID if the scalar is valid +ERROR Fp_read_bytes(Fp *out, const byte *in, int in_len) { + if (in_len != Fp_BYTES) { + return BAD_ENCODING; + } + limbs_from_be_bytes((limb_t *)out, in, Fp_BYTES); + // compare read scalar to p + if (!Fp_check(out)) { + return BAD_VALUE; + } + return VALID; +} - bin[0] |= (G2_SERIALIZATION << 7); - ep_free(t); +// write Fp element to `out`, +// assuming `out` has `Fp_BYTES` allocated bytes. +void Fp_write_bytes(byte *out, const Fp *a) { + be_bytes_from_limbs(out, (limb_t *)a, Fp_BYTES); } -// fp2_read_bin_safe is a modified version of Relic's (void fp2_read_bin). -// It reads an Fp^2 element from a buffer and makes sure the big numbers read can be -// written as field elements (are reduced modulo p). -// Unlike Relic's versions, the function does not reduce the read integers modulo p and does -// not throw an exception for integers larger than p. The function returns RLC_OK if the input -// corresponds to a field element in Fp^2, and returns RLC_ERR otherwise. -static int fp2_read_bin_safe(fp2_t a, const uint8_t *bin, int len) { - if (len != Fp2_BYTES) { - return RLC_ERR; - } - if (fp_read_bin_safe(a[0], bin, Fp_BYTES) != RLC_OK) { - return RLC_ERR; - } - if (fp_read_bin_safe(a[1], bin + Fp_BYTES, Fp_BYTES) != RLC_OK) { - return RLC_ERR; - } - return RLC_OK; +// returns the sign of y: +// 1 if y > (p - 1)/2 and 0 otherwise. +// y is in montgomery form! +static byte Fp_get_sign(const Fp *y) { + // - BLST's sgn0_pty_mont_384 requires input to be in Montg form. + // - The needed sign bit is on position 1 + return (sgn0_pty_mont_384((const limb_t *)y, BLS12_381_P, p0) >> 1) & 1; } -// ep2_read_bin_compact imports a point from a buffer in a compressed or uncompressed form. -// The resulting point is guaranteed to be on curve E2. +// ------------------- Fp^2 utilities + +// sets `a` to limb `l` +static void Fp2_set_limb(Fp2 *a, const limb_t l) { + Fp_set_limb(&real(a), l); + Fp_set_zero(&imag(a)); +} + +static void Fp2_add(Fp2 *res, const Fp2 *a, const Fp2 *b) { + add_mod_384x((vec384 *)res, (vec384 *)a, (vec384 *)b, BLS12_381_P); +} + +static void Fp2_sub(Fp2 *res, const Fp2 *a, const Fp2 *b) { + sub_mod_384x((vec384 *)res, (vec384 *)a, (vec384 *)b, BLS12_381_P); +} + +static void Fp2_neg(Fp2 *res, const Fp2 *a) { + cneg_mod_384(real(res), real(a), 1, BLS12_381_P); + cneg_mod_384(imag(res), imag(a), 1, BLS12_381_P); +} + +// res = a*b in montgomery form +static void Fp2_mul_montg(Fp2 *res, const Fp2 *a, const Fp2 *b) { + mul_mont_384x((vec384 *)res, (vec384 *)a, (vec384 *)b, BLS12_381_P, p0); +} + +// res = a^2 in montgomery form +static void Fp2_squ_montg(Fp2 *res, const Fp2 *a) { + sqr_mont_384x((vec384 *)res, (vec384 *)a, BLS12_381_P, p0); +} + +// checks if `a` is a quadratic residue in Fp^2. If yes, it computes +// the square root in `res`. // -// It returns RLC_OK if the inputs are valid (input buffer lengths are valid and read coordinates -// correspond to a point on curve) and the execution completes and RLC_ERR otherwise. -// The code is a modified version of Relic ep2_read_bin -int ep2_read_bin_compact(ep2_t a, const byte *bin, const int len) { - // check the length - const int G2size = (G2_BYTES/(G2_SERIALIZATION+1)); - if (len!=G2size) { - return RLC_ERR; - } +// The boolean output is valid whether `a` is in Montgomery form or not, +// since montgomery constant `R` is itself a quadratic residue. +// However, the square root is correct only if `a` is in montgomery form +// (the square root would be in montgomery form too). +static bool Fp2_sqrt_montg(Fp2 *res, const Fp2 *a) { + return sqrt_fp2((vec384 *)res, (vec384 *)a); +} - // check the compression bit - int compressed = bin[0] >> 7; - if ((compressed == 1) != (G2_SERIALIZATION == COMPRESSED)) { - return RLC_ERR; - } - - // check if the point in infinity - int is_infinity = bin[0] & 0x40; - if (is_infinity) { - // the remaining bits need to be cleared - if (bin[0] & 0x3F) { - return RLC_ERR; - } - for (int i=1; i> 5) & 1; - if (y_sign && (!compressed)) { - return RLC_ERR; - } - - a->coord = BASIC; - fp2_set_dig(a->z, 1); // a.z - // use a temporary buffer to mask the header bits and read a.x - byte temp[Fp2_BYTES]; - memcpy(temp, bin, Fp2_BYTES); - temp[0] &= 0x1F; // clear the header bits - if (fp2_read_bin_safe(a->x, temp, sizeof(temp)) != RLC_OK) { - return RLC_ERR; - } +// returns the sign of y: +// sign(y_0) if y_1 = 0, else sign(y_1). +// y coordinates must be in montgomery form! +static byte Fp2_get_sign(Fp2 *y) { + // - BLST's sgn0_pty_mont_384x requires input to be in montgomery form. + // - the sign bit is on position 1 + return (sgn0_pty_mont_384x((vec384 *)y, BLS12_381_P, p0) >> 1) & 1; +} - if (G2_SERIALIZATION == UNCOMPRESSED) { - if (fp2_read_bin_safe(a->y, bin + Fp2_BYTES, Fp2_BYTES) != RLC_OK){ - return RLC_ERR; - } - // check read point is on curve - if (!ep2_on_curve(a)) { - return RLC_ERR; - } - return RLC_OK; - } - - fp2_zero(a->y); - fp_set_bit(a->y[0], 0, y_sign); - fp_zero(a->y[1]); - if (ep2_upk(a, a) == 1) { - // resulting point is guaranteed to be on curve - return RLC_OK; - } - return RLC_ERR; +// reads an Fp^2 element in `a`. +// input is a serialization of real(a) concatenated to serializetion of imag(a). +// a[i] are both Fp elements. +// returns: +// - BAD_ENCODING if the length is invalid +// - BAD_VALUE if the scalar isn't in Fp +// - VALID if the scalar is valid +static ERROR Fp2_read_bytes(Fp2 *a, const byte *in, int in_len) { + if (in_len != Fp2_BYTES) { + return BAD_ENCODING; + } + ERROR ret = Fp_read_bytes(&real(a), in, Fp_BYTES); + if (ret != VALID) { + return ret; + } + ret = Fp_read_bytes(&imag(a), in + Fp_BYTES, Fp_BYTES); + if (ret != VALID) { + return ret; + } + return VALID; } -// reads a scalar in a and checks it is a valid Zr element (a < r) -// returns RLC_OK if the scalar is valid and RLC_ERR otherwise. -int bn_read_Zr_bin(bn_t a, const uint8_t *bin, int len) { - if (len!=Fr_BYTES) { - return RLC_ERR; - } - bn_read_bin(a, bin, Fr_BYTES); - bn_t r; - bn_new(r); - g2_get_ord(r); - if (bn_cmp(a, r) == RLC_LT) { - return RLC_OK; +// write Fp2 element to bin and assume `bin` has `Fp2_BYTES` allocated bytes. +void Fp2_write_bytes(byte *out, const Fp2 *a) { + Fp_write_bytes(out, &real(a)); + Fp_write_bytes(out + Fp_BYTES, &imag(a)); +} + +// ------------------- E1 utilities + +void E1_copy(E1 *res, const E1 *p) { + if ((uptr_t)p == (uptr_t)res) { + return; + } + vec_copy(res, p, sizeof(E1)); +} + +// checks p1 == p2 +bool E1_is_equal(const E1 *p1, const E1 *p2) { + // `POINTonE1_is_equal` includes the infinity case + return POINTonE1_is_equal((const POINTonE1 *)p1, (const POINTonE1 *)p2); +} + +// compare `p` to infinity +bool E1_is_infty(const E1 *p) { + // BLST infinity points are defined by Z=0 + return vec_is_zero(p->z, sizeof(p->z)); +} + +// set `p` to infinity +void E1_set_infty(E1 *p) { + // BLST infinity points are defined by Z=0 + vec_zero(p->z, sizeof(p->z)); +} + +// converts an E1 point from Jacobian into affine coordinates (z=1) +void E1_to_affine(E1 *res, const E1 *p) { + // optimize in case coordinates are already affine + if (vec_is_equal(p->z, BLS12_381_pR, Fp_BYTES)) { + E1_copy(res, p); + return; + } + // convert from Jacobian + POINTonE1_from_Jacobian((POINTonE1 *)res, (const POINTonE1 *)p); +} + +// checks affine point `p` is in E1 +bool E1_affine_on_curve(const E1 *p) { + // BLST's `POINTonE1_affine_on_curve` does not include the infinity case! + return POINTonE1_affine_on_curve((POINTonE1_affine *)p) | E1_is_infty(p); +} + +// checks if input E1 point is on the subgroup G1. +// It assumes input `p` is on E1. +bool E1_in_G1(const E1 *p) { + // currently uses Scott method + return POINTonE1_in_G1((const POINTonE1 *)p); +} + +// E1_read_bytes imports a E1(Fp) point from a buffer in a compressed or +// uncompressed form. The resulting point is guaranteed to be on curve E1 (no G1 +// check is included). Expected serialization follows: +// https://www.ietf.org/archive/id/draft-irtf-cfrg-pairing-friendly-curves-08.html#name-zcash-serialization-format-) +// +// returns: +// - BAD_ENCODING if the length is invalid or serialization header bits are +// invalid +// - BAD_VALUE if Fp coordinates couldn't deserialize +// - POINT_NOT_ON_CURVE if deserialized point isn't on E1 +// - VALID if deserialization is valid + +// Note: could use POINTonE1_Deserialize_BE and POINTonE1_Uncompress_Z, +// but needs to update the logic around G2 subgroup check +ERROR E1_read_bytes(E1 *a, const byte *in, const int in_len) { + // check the length + if (in_len != G1_SER_BYTES) { + return BAD_ENCODING; + } + + // check the compression bit + int compressed = in[0] >> 7; + if ((compressed == 1) != (G1_SERIALIZATION == COMPRESSED)) { + return BAD_ENCODING; + } + + // check if the point in infinity + int is_infinity = in[0] & 0x40; + if (is_infinity) { + // the remaining bits need to be cleared + if (in[0] & 0x3F) { + return BAD_ENCODING; } - return RLC_ERR; -} - -// computes the sum of the array elements x and writes the sum in jointx -// the sum is computed in Zr -void bn_sum_vector(bn_t jointx, const bn_st* x, const int len) { - bn_t r; - bn_new(r); - g2_get_ord(r); - bn_set_dig(jointx, 0); - bn_new_size(jointx, BITS_TO_DIGITS(Fr_BITS+1)); - for (int i=0; i> 5) & 1; + if (y_sign && (!compressed)) { + return BAD_ENCODING; + } + + // use a temporary buffer to mask the header bits and read a.x + byte temp[Fp_BYTES]; + memcpy(temp, in, Fp_BYTES); + temp[0] &= 0x1F; // clear the header bits + ERROR ret = Fp_read_bytes(&a->x, temp, sizeof(temp)); + if (ret != VALID) { + return ret; + } + Fp_to_montg(&a->x, &a->x); -// computes the sum of the G2 array elements y and writes the sum in jointy -void ep2_sum_vector(ep2_t jointy, ep2_st* y, const int len){ - ep2_set_infty(jointy); - for (int i=0; iz, &BLS12_381_pR); + + if (G1_SERIALIZATION == UNCOMPRESSED) { + ret = Fp_read_bytes(&a->y, in + Fp_BYTES, sizeof(a->y)); + if (ret != VALID) { + return ret; } - ep2_norm(jointy, jointy); // not necessary but left here to optimize the - // multiple pairing computations with the same - // public key -} - -// Verifies the validity of 2 SPoCK proofs and 2 public keys. -// Membership check in G1 of both proofs is verified in this function. -// Membership check in G2 of both keys is not verified in this function. -// the membership check in G2 is separated to allow optimizing multiple verifications -// using the same public keys. -int bls_spock_verify(const ep2_t pk1, const byte* sig1, const ep2_t pk2, const byte* sig2) { - ep_t elemsG1[2]; - ep2_t elemsG2[2]; - - // elemsG1[0] = s1 - ep_new(elemsG1[0]); - int read_ret = ep_read_bin_compact(elemsG1[0], sig1, SIGNATURE_LEN); - if (read_ret != RLC_OK) - return read_ret; - - // check s1 is in G1 - if (check_membership_G1(elemsG1[0]) != VALID) // only enabled if MEMBERSHIP_CHECK==1 - return INVALID; - - // elemsG1[1] = s2 - ep_new(elemsG1[1]); - read_ret = ep_read_bin_compact(elemsG1[1], sig2, SIGNATURE_LEN); - if (read_ret != RLC_OK) - return read_ret; - - // check s2 in G1 - if (check_membership_G1(elemsG1[1]) != VALID) // only enabled if MEMBERSHIP_CHECK==1 - return INVALID; - - // elemsG2[1] = pk1 - ep2_new(elemsG2[1]); - ep2_copy(elemsG2[1], (ep2_st*)pk1); - - // elemsG2[0] = pk2 - ep2_new(elemsG2[0]); - ep2_copy(elemsG2[0], (ep2_st*)pk2); - -#if DOUBLE_PAIRING - // elemsG2[0] = -pk2 - ep2_neg(elemsG2[0], elemsG2[0]); - - fp12_t pair; - fp12_new(&pair); - // double pairing with Optimal Ate - pp_map_sim_oatep_k12(pair, (ep_t*)(elemsG1) , (ep2_t*)(elemsG2), 2); - - // compare the result to 1 - int res = fp12_cmp_dig(pair, 1); - -#elif SINGLE_PAIRING - fp12_t pair1, pair2; - fp12_new(&pair1); fp12_new(&pair2); - pp_map_oatep_k12(pair1, elemsG1[0], elemsG2[0]); - pp_map_oatep_k12(pair2, elemsG1[1], elemsG2[1]); - - int res = fp12_cmp(pair1, pair2); -#endif - fp12_free(&one); - ep_free(elemsG1[0]); - ep_free(elemsG1[1]); - ep2_free(elemsG2[0]); - ep2_free(elemsG2[1]); - - if (core_get()->code == RLC_OK) { - if (res == RLC_EQ) return VALID; - return INVALID; + Fp_to_montg(&a->y, &a->y); + // check read point is on curve + if (!E1_affine_on_curve(a)) { + return POINT_NOT_ON_CURVE; } - return UNDEFINED; + return VALID; + } + + // compute the possible square root + Fp_squ_montg(&a->y, &a->x); + Fp_mul_montg(&a->y, &a->y, &a->x); // x^3 + Fp_add(&a->y, &a->y, &B_E1); // B_E1 is already in montg form + // check whether x^3+b is a quadratic residue + if (!Fp_sqrt_montg(&a->y, &a->y)) { + return POINT_NOT_ON_CURVE; + } + + // resulting (x,y) is guaranteed to be on curve (y is already in montg form) + if (Fp_get_sign(&a->y) != y_sign) { + Fp_neg(&a->y, &a->y); // flip y sign if needed + } + return VALID; } -// Subtracts the sum of a G2 array elements y from an element x and writes the -// result in res -void ep2_subtract_vector(ep2_t res, ep2_t x, ep2_st* y, const int len){ - ep2_sum_vector(res, y, len); - ep2_neg(res, res); - ep2_add_projc(res, x, res); +// E1_write_bytes exports a point in E1(Fp) to a buffer in a compressed or +// uncompressed form. It assumes buffer is of length G1_SER_BYTES The +// serialization follows: +// https://www.ietf.org/archive/id/draft-irtf-cfrg-pairing-friendly-curves-08.html#name-zcash-serialization-format-) +void E1_write_bytes(byte *out, const E1 *a) { + if (E1_is_infty(a)) { + memset(out, 0, G1_SER_BYTES); + // set the infinity bit + out[0] = (G1_SERIALIZATION << 7) | (1 << 6); + return; + } + E1 tmp; + E1_to_affine(&tmp, a); + + Fp_from_montg(&tmp.x, &tmp.x); + Fp_write_bytes(out, &tmp.x); + + if (G1_SERIALIZATION == COMPRESSED) { + out[0] |= (Fp_get_sign(&tmp.y) << 5); + } else { + Fp_from_montg(&tmp.y, &tmp.y); + Fp_write_bytes(out + Fp_BYTES, &tmp.y); + } + // compression bit + out[0] |= (G1_SERIALIZATION << 7); } -// computes the sum of the G1 array elements y and writes the sum in jointy -void ep_sum_vector(ep_t jointx, ep_st* x, const int len) { - ep_set_infty(jointx); - for (int i=0; iep_r); - if (!ep_is_infty(inf)){ - ep_free(inf); - return INVALID; - } - ep_free(inf); - return VALID; + return error; } -// uses a simple scalar multiplication by G1's order -// to check whether a point on the curve E2 is in G2. -int simple_subgroup_check_G2(const ep2_t p){ - ep2_t inf; - ep2_new(inf); - // check p^order == infinity - // use basic double & add as lwnaf reduces the expo modulo r - ep2_mul_basic(inf, (ep2_st*)p, &core_get()->ep_r); - if (!ep2_is_infty(inf)){ - ep2_free(inf); - return INVALID; - } - ep2_free(inf); - return VALID; +// Exponentiation of generator g1 of G1, res = expo.g1 +void G1_mult_gen(E1 *res, const Fr *expo) { + pow256 tmp; + pow256_from_Fr(tmp, expo); + POINTonE1_mult_glv((POINTonE1 *)res, &BLS12_381_G1, tmp); + vec_zero(&tmp, sizeof(tmp)); +} + +// Reads a scalar bytes and maps it to Fp using modular reduction. +// output is in Montgomery form. +// `in_len` must be less or equal to 96 bytes and must be a multiple of 8. +// This function is only used by `map_to_G1` where input is 64 bytes. +// input `in_len` is not checked to satisfy the conditions above. +static void map_96_bytes_to_Fp(Fp *a, const byte *in, int in_len) { + vec768 tmp; + vec_zero(&tmp, sizeof(tmp)); + limbs_from_be_bytes((limb_t *)tmp, in, in_len); + redc_mont_384((limb_t *)a, tmp, BLS12_381_P, p0); // aR^(-2) + Fp_mul_montg(a, a, (Fp *)BLS12_381_RRRR); // aR +} + +// maps bytes input `hash` to G1. +// `hash` must be `MAP_TO_G1_INPUT_LEN` (128 bytes) +// It uses construction 2 from section 5 in https://eprint.iacr.org/2019/403.pdf +int map_to_G1(E1 *h, const byte *hash, const int hash_len) { + // sanity check of length + if (hash_len != MAP_TO_G1_INPUT_LEN) { + return INVALID; + } + // map to field elements + Fp u[2]; + const int half = MAP_TO_G1_INPUT_LEN / 2; + map_96_bytes_to_Fp(&u[0], hash, half); + map_96_bytes_to_Fp(&u[1], hash + half, half); + // map field elements to G1 + // inputs must be in Montgomery form + map_to_g1((POINTonE1 *)h, (limb_t *)&u[0], (limb_t *)&u[1]); + return VALID; +} + +// maps the bytes to a point in G1. +// `len` should be at least Fr_BYTES. +// this is a testing file only, should not be used in any protocol! +void unsafe_map_bytes_to_G1(E1 *p, const byte *bytes, int len) { + assert(len >= Fr_BYTES); + // map to Fr + Fr log; + map_bytes_to_Fr(&log, bytes, len); + // multiplies G1 generator by a random scalar + G1_mult_gen(p, &log); } -#if (MEMBERSHIP_CHECK_G1 == BOWE) -// beta such that beta^3 == 1 mod p -// beta is in the Montgomery form -const uint64_t beta_data[Fp_DIGITS] = { - 0xcd03c9e48671f071, 0x5dab22461fcda5d2, 0x587042afd3851b95, - 0x8eb60ebe01bacb9e, 0x03f97d6e83d050d2, 0x18f0206554638741, -}; - - -// (z^2-1)/3 with z being the parameter of bls12-381 -const uint64_t z2_1_by3_data[2] = { - 0x0000000055555555, 0x396c8c005555e156 -}; - -// uses Bowe's check from section 3.2 from https://eprint.iacr.org/2019/814.pdf -// to check whether a point on the curve E1 is in G1. -int bowe_subgroup_check_G1(const ep_t p){ - if (ep_is_infty(p) == 1) - return VALID; - fp_t b; - dv_copy(b, beta_data, Fp_DIGITS); - ep_t sigma, sigma2, p_inv; - ep_new(sigma); - ep_new(sigma2); - ep_new(p_inv); - - // si(p) - ep_copy(sigma, p); - fp_mul(sigma[0].x, sigma[0].x, b); - // -si^2(p) - ep_copy(sigma2, sigma); - fp_mul(sigma2[0].x, sigma2[0].x, b); - fp_neg(sigma2[0].y, sigma2[0].y); - ep_dbl(sigma, sigma); - // -p - ep_copy(p_inv, p); - fp_neg(p_inv[0].y, p_inv[0].y); - // (z^2-1)/3 (2*si(p) - p - si^2(p)) - si^2(p) - ep_add(sigma, sigma, p_inv); - ep_add(sigma, sigma, sigma2); - // TODO: multiplication using a chain? - ep_mul_lwnaf(sigma, sigma, &bls_prec->z2_1_by3); - ep_add(sigma, sigma, sigma2); - - ep_free(sigma2); - ep_free(p_inv); - // check result against infinity - if (!ep_is_infty(sigma)){ - ep_free(sigma); - return INVALID; +// maps bytes to a point in E1\G1. +// `len` must be at least 96 bytes. +// this is a testing function only, should not be used in any protocol! +void unsafe_map_bytes_to_G1complement(E1 *p, const byte *in, int in_len) { + assert(in_len >= 96); + Fp u; + map_96_bytes_to_Fp(&u, in, 96); + // map to E1's isogenous and then to E1 + map_to_isogenous_E1((POINTonE1 *)p, u); + isogeny_map_to_E1((POINTonE1 *)p, (POINTonE1 *)p); + // clear G1 order + E1_mult(p, p, (Fr *)&BLS12_381_r); +} + +// ------------------- E2 utilities + +const E2 *BLS12_381_g2 = (const E2 *)&BLS12_381_G2; +const E2 *BLS12_381_minus_g2 = (const E2 *)&BLS12_381_NEG_G2; + +// E2_read_bytes imports a E2(Fp^2) point from a buffer in a compressed or +// uncompressed form. The resulting point is guaranteed to be on curve E2 (no G2 +// check is included). +// E2 point is in affine coordinates. This avoids further conversions +// when the point is used in multiple pairing computation. +// +// returns: +// - BAD_ENCODING if the length is invalid or serialization header bits are +// invalid +// - BAD_VALUE if Fp^2 coordinates couldn't deserialize +// - POINT_NOT_ON_CURVE if deserialized point isn't on E2 +// - VALID if deserialization is valid +// +// Note: can use with POINTonE2_Deserialize_BE and POINTonE2_Uncompress_Z, +// and update the logic around G2 subgroup check. +ERROR E2_read_bytes(E2 *a, const byte *in, const int in_len) { + // check the length + if (in_len != G2_SER_BYTES) { + return BAD_ENCODING; + } + + // check the compression bit + int compressed = in[0] >> 7; + if ((compressed == 1) != (G2_SERIALIZATION == COMPRESSED)) { + return BAD_ENCODING; + } + + // check if the point in infinity + int is_infinity = in[0] & 0x40; + if (is_infinity) { + // the remaining bits need to be cleared + if (in[0] & 0x3F) { + return BAD_ENCODING; + } + for (int i = 1; i < G2_SER_BYTES - 1; i++) { + if (in[i]) { + return BAD_ENCODING; + } } - ep_free(sigma); + E2_set_infty(a); return VALID; + } + + // read the sign bit and check for consistency + int y_sign = (in[0] >> 5) & 1; + if (y_sign && (!compressed)) { + return BAD_ENCODING; + } + + // use a temporary buffer to mask the header bits and read a.x + byte temp[Fp2_BYTES]; + memcpy(temp, in, Fp2_BYTES); + temp[0] &= 0x1F; // clear the header bits + ERROR ret = Fp2_read_bytes(&a->x, temp, sizeof(temp)); + if (ret != VALID) { + return ret; + } + Fp2 *a_x = &(a->x); + Fp_to_montg(&real(a_x), &real(a_x)); + Fp_to_montg(&imag(a_x), &imag(a_x)); + + // set a.z to 1 + Fp2 *a_z = &(a->z); + Fp_copy(&real(a_z), &BLS12_381_pR); + Fp_set_zero(&imag(a_z)); + + Fp2 *a_y = &(a->y); + if (G2_SERIALIZATION == UNCOMPRESSED) { + ret = Fp2_read_bytes(a_y, in + Fp2_BYTES, sizeof(a->y)); + if (ret != VALID) { + return ret; + } + Fp_to_montg(&real(a_y), &real(a_y)); + Fp_to_montg(&imag(a_y), &imag(a_y)); + // check read point is on curve + if (!E2_affine_on_curve(a)) { + return POINT_NOT_ON_CURVE; + } + return VALID; + } + + // compute the possible square root + Fp2_squ_montg(a_y, a_x); + Fp2_mul_montg(a_y, a_y, a_x); // x^3 + Fp2_add(a_y, a_y, &B_E2); // B_E2 is already in Montg form + if (!Fp2_sqrt_montg(a_y, a_y)) // check whether x^3+b is a quadratic residue + return POINT_NOT_ON_CURVE; + + // resulting (x,y) is guaranteed to be on curve (y is already in Montg form) + if (Fp2_get_sign(a_y) != y_sign) { + Fp2_neg(a_y, a_y); // flip y sign if needed + } + return VALID; +} + +// E2_write_bytes exports a point in E2(Fp^2) to a buffer in a compressed or +// uncompressed form. It assumes buffer is of length G2_SER_BYTES The +// serialization follows: +// https://www.ietf.org/archive/id/draft-irtf-cfrg-pairing-friendly-curves-08.html#name-zcash-serialization-format-) +void E2_write_bytes(byte *out, const E2 *a) { + if (E2_is_infty(a)) { + // set the infinity bit + out[0] = (G2_SERIALIZATION << 7) | (1 << 6); + memset(out + 1, 0, G2_SER_BYTES - 1); + return; + } + E2 tmp; + E2_to_affine(&tmp, a); + + Fp2 *t_x = &(tmp.x); + Fp_from_montg(&real(t_x), &real(t_x)); + Fp_from_montg(&imag(t_x), &imag(t_x)); + Fp2_write_bytes(out, t_x); + + Fp2 *t_y = &(tmp.y); + if (G2_SERIALIZATION == COMPRESSED) { + out[0] |= (Fp2_get_sign(t_y) << 5); + } else { + Fp_from_montg(&real(t_y), &real(t_y)); + Fp_from_montg(&imag(t_y), &imag(t_y)); + Fp2_write_bytes(out + Fp2_BYTES, t_y); + } + + out[0] |= (G2_SERIALIZATION << 7); +} + +// set p to infinity +void E2_set_infty(E2 *p) { + // BLST infinity points are defined by Z=0 + vec_zero(p->z, sizeof(p->z)); +} + +// check if `p` is infinity +bool E2_is_infty(const E2 *p) { + // BLST infinity points are defined by Z=0 + return vec_is_zero(p->z, sizeof(p->z)); +} + +// checks affine point `p` is in E2 +bool E2_affine_on_curve(const E2 *p) { + // BLST's `POINTonE2_affine_on_curve` does not include the infinity case! + return POINTonE2_affine_on_curve((POINTonE2_affine *)p) | E2_is_infty(p); +} + +// checks p1 == p2 +bool E2_is_equal(const E2 *p1, const E2 *p2) { + // `POINTonE2_is_equal` includes the infinity case + return POINTonE2_is_equal((const POINTonE2 *)p1, (const POINTonE2 *)p2); +} + +// res = p +void E2_copy(E2 *res, const E2 *p) { + if ((uptr_t)p == (uptr_t)res) { + return; + } + vec_copy(res, p, sizeof(E2)); +} + +// converts an E2 point from Jacobian into affine coordinates (z=1) +void E2_to_affine(E2 *res, const E2 *p) { + // optimize in case coordinates are already affine + if (vec_is_equal(p->z, BLS12_381_Rx.p2, sizeof(p->z))) { + E2_copy(res, p); + return; + } + // convert from Jacobian + POINTonE2_from_Jacobian((POINTonE2 *)res, (const POINTonE2 *)p); +} + +// generic point addition that must handle doubling and points at infinity +void E2_add(E2 *res, const E2 *a, const E2 *b) { + POINTonE2_dadd((POINTonE2 *)res, (POINTonE2 *)a, (POINTonE2 *)b, NULL); +} + +// generic point double that must handle point at infinity +static void E2_double(E2 *res, const E2 *a) { + POINTonE2_double((POINTonE2 *)res, (POINTonE2 *)a); +} + +// Point negation: res = -a +void E2_neg(E2 *res, const E2 *a) { + E2_copy(res, a); + POINTonE2_cneg((POINTonE2 *)res, 1); } -#endif -// generates a random point in G1 and stores it in p -void ep_rand_G1(ep_t p) { - // multiplies G1 generator by a random scalar - ep_rand(p); -} - -// generates a random point in E1\G1 and stores it in p -void ep_rand_G1complement(ep_t p) { - // generate a random point in E1 - p->coord = BASIC; - fp_set_dig(p->z, 1); - do { - fp_rand(p->x); // set x to a random field element - byte r; - rand_bytes(&r, 1); - fp_zero(p->y); - fp_set_bit(p->y, 0, r&1); // set y randomly to 0 or 1 +// Exponentiation of a generic point `a` in E2, res = expo.a +void E2_mult(E2 *res, const E2 *p, const Fr *expo) { + pow256 tmp; + pow256_from_Fr(tmp, expo); + POINTonE2_mult_gls((POINTonE2 *)res, (POINTonE2 *)p, tmp); + vec_zero(&tmp, sizeof(tmp)); +} + +// Exponentiation of a generic point `a` in E2 by a byte exponent, +// using a classic double-and-add algorithm (non constant-time) +void E2_mult_small_expo(E2 *res, const E2 *p, const byte expo) { + // return early if expo is zero + if (expo == 0) { + E2_set_infty(res); + return; + } + // expo is non zero + + byte mask = 1 << 7; + // process the most significant zero bits + while ((expo & mask) == 0) { + mask >>= 1; + } + + // process the first `1` bit + E2 tmp; + E2_copy(&tmp, p); + mask >>= 1; + // scan the remaining bits + for (; mask != 0; mask >>= 1) { + E2_double(&tmp, &tmp); + if (expo & mask) { + E2_add(&tmp, &tmp, p); } - while (ep_upk(p, p) == 0); // make sure p is in E1 + } + E2_copy(res, &tmp); +} - // map the point to E1\G1 by clearing G1 order - ep_mul_basic(p, p, &core_get()->ep_r); +// Exponentiation of generator g2 of G2, res = expo.g2 +void G2_mult_gen(E2 *res, const Fr *expo) { + pow256 tmp; + pow256_from_Fr(tmp, expo); + POINTonE2_mult_gls((POINTonE2 *)res, (POINTonE2 *)BLS12_381_g2, tmp); + vec_zero(&tmp, sizeof(tmp)); +} + +// Exponentiation of generator g2 of G2, res = expo.g2. +// +// Result is converted to affine. This is useful for results being used multiple +// times in pairings. Conversion to affine saves later pre-pairing conversions. +void G2_mult_gen_to_affine(E2 *res, const Fr *expo) { + G2_mult_gen(res, expo); + E2_to_affine(res, res); +} - assert(ep_on_curve(p)); // sanity check to make sure p is in E1 +// checks if input E2 point is on the subgroup G2. +// It assumes input `p` is on E2. +bool E2_in_G2(const E2 *p) { + // currently uses Scott method + return POINTonE2_in_G2((const POINTonE2 *)p); } -// generates a random point in G2 and stores it in p -void ep2_rand_G2(ep2_t p) { - // multiplies G2 generator by a random scalar - ep2_rand(p); +// computes the sum of the E2 array elements `y[i]` and writes it in `sum` +void E2_sum_vector(E2 *sum, const E2 *y, const int y_len) { + E2_set_infty(sum); + for (int i = 0; i < y_len; i++) { + E2_add(sum, sum, &y[i]); + } } -// generates a random point in E2\G2 and stores it in p -void ep2_rand_G2complement(ep2_t p) { - // generate a random point in E2 - p->coord = BASIC; - fp_set_dig(p->z[0], 1); - fp_zero(p->z[1]); - do { - fp2_rand(p->x); // set x to a random field element - byte r; - rand_bytes(&r, 1); - fp2_zero(p->y); - fp_set_bit(p->y[0], 0, r&1); // set y randomly to 0 or 1 +// computes the sum of the E2 array elements `y[i]`, converts it +// to affine coordinates, and writes it in `sum`. +// +// Result is converted to affine. This is useful for results being used multiple +// times in pairings. Conversion to affine saves later pre-pairing conversions. +void E2_sum_vector_to_affine(E2 *sum, const E2 *y, const int y_len) { + E2_sum_vector(sum, y, y_len); + E2_to_affine(sum, sum); +} + +// Subtracts all G2 array elements `y` from an element `x` and writes the +// result in res. +void E2_subtract_vector(E2 *res, const E2 *x, const E2 *y, const int y_len) { + E2_sum_vector(res, y, y_len); + E2_neg(res, res); + E2_add(res, x, res); +} + +// maps the bytes to a point in G2. +// `in_len` should be at least Fr_BYTES. +// this is a testing tool only, it should not be used in any protocol! +void unsafe_map_bytes_to_G2(E2 *p, const byte *in, int in_len) { + assert(in_len >= Fr_BYTES); + // map to Fr + Fr log; + map_bytes_to_Fr(&log, in, in_len); + // multiplies G2 generator by a random scalar + G2_mult_gen(p, &log); +} + +// maps `in` to a point in E2\G2 and stores it in p. +// `len` should be at least 192. +// this is a testing tool only, it should not be used in any protocol! +void unsafe_map_bytes_to_G2complement(E2 *p, const byte *in, int in_len) { + assert(in_len >= 192); + Fp2 u; + map_96_bytes_to_Fp(&real(&u), in, 96); + map_96_bytes_to_Fp(&imag(&u), in + 96, 96); + // map to E2's isogenous and then to E2 + map_to_isogenous_E2((POINTonE2 *)p, u); + isogeny_map_to_E2((POINTonE2 *)p, (POINTonE2 *)p); + // clear G2 order + E2_mult(p, p, (Fr *)&BLS12_381_r); +} + +// ------------------- Pairing utilities + +bool Fp12_is_one(Fp12 *a) { + return vec_is_equal(a, BLS12_381_Rx.p12, sizeof(Fp12)); +} + +void Fp12_set_one(Fp12 *a) { vec_copy(a, BLS12_381_Rx.p12, sizeof(Fp12)); } + +// computes e(p[0], q[0]) * ... * e(q[len-1], q[len-1]) +// by optimizing a common final exponentiation for all pairings. +// result is stored in `res`. +// It assumes `p` and `q` are correctly initialized and all +// p[i] and q[i] are respectively on G1 and G2 (it does not +// check their memberships). +void Fp12_multi_pairing(Fp12 *res, const E1 *p, const E2 *q, const int len) { + // easier access pointer + vec384fp6 *res_vec = (vec384fp6 *)res; + // N_MAX is defined within BLST. It should represent a good tradeoff of the + // max number of miller loops to be batched in one call to `miller_loop_n`. + // miller_loop_n expects an array of `POINTonEx_affine`. + POINTonE1_affine p_aff[N_MAX]; + POINTonE2_affine q_aff[N_MAX]; + int n = 0; // the number of couples (p,q) held in p_aff and q_aff + int init_flag = 0; + + for (int i = 0; i < len; i++) { + if (E1_is_infty(p + i) || E2_is_infty(q + i)) { + continue; + } + // `miller_loop_n` expects affine coordinates in a `POINTonEx_affine` array. + // `POINTonEx_affine` has a different size than `POINTonEx` and `Ex` ! + E1 tmp1; + E1_to_affine(&tmp1, p + i); + vec_copy(p_aff + n, &tmp1, sizeof(POINTonE1_affine)); + E2 tmp2; + E2_to_affine(&tmp2, q + i); + vec_copy(q_aff + n, &tmp2, sizeof(POINTonE2_affine)); + n++; + // if p_aff and q_aff are filled, batch `N_MAX` miller loops + if (n == N_MAX) { + if (!init_flag) { + miller_loop_n(res_vec, q_aff, p_aff, N_MAX); + init_flag = 1; + } else { + vec384fp12 tmp; + miller_loop_n(tmp, q_aff, p_aff, N_MAX); + mul_fp12(res_vec, res_vec, tmp); + } + n = 0; } - while (ep2_upk(p, p) == 0); // make sure p is in E1 + } + // if p_aff and q_aff aren't empty, + // the remaining couples are also batched in `n` miller loops + if (n > 0) { + if (!init_flag) { + miller_loop_n(res_vec, q_aff, p_aff, n); + init_flag = 1; + } else { + vec384fp12 tmp; + miller_loop_n(tmp, q_aff, p_aff, n); + mul_fp12(res_vec, res_vec, tmp); + } + } + + // check if no miller loop was computed + if (!init_flag) { + Fp12_set_one(res); + } + final_exp(res_vec, res_vec); +} + +// ------------------- Other utilities + +// This is a testing function and is not used in exported functions +// It uses an expand message XMD based on SHA2-256. +void xmd_sha256(byte *hash, int len_hash, byte *msg, int len_msg, byte *dst, + int len_dst) { + expand_message_xmd(hash, len_hash, NULL, 0, msg, len_msg, dst, len_dst); +} + +// DEBUG printing functions +#ifdef DEBUG +void bytes_print_(char *s, byte *data, int len) { + if (strlen(s)) + printf("[%s]:\n", s); + for (int i = 0; i < len; i++) + printf("%02X,", data[i]); + printf("\n"); +} + +void Fr_print_(char *s, Fr *a) { + if (strlen(s)) + printf("[%s]:\n", s); + limb_t *p = (limb_t *)(a) + Fr_LIMBS; + for (int i = 0; i < Fr_LIMBS; i++) + printf("%016llX", *(--p)); + printf("\n"); +} - // map the point to E1\G1 by clearing G1 order - ep2_mul_basic(p, p, &core_get()->ep_r); +void Fp_print_(char *s, const Fp *a) { + if (strlen(s)) + printf("[%s]:\n", s); + Fp tmp; + Fp_from_montg(&tmp, a); + limb_t *p = (limb_t *)(&tmp) + Fp_LIMBS; + for (int i = 0; i < Fp_LIMBS; i++) + printf("%016llX ", *(--p)); + printf("\n"); +} - assert(ep2_on_curve(p)); // sanity check to make sure p is in E1 +void Fp2_print_(char *s, const Fp2 *a) { + if (strlen(s)) + printf("[%s]:\n", s); + Fp_print_("", &real(a)); + Fp_print_("", &imag(a)); } -// This is a testing function. -// It wraps a call to a Relic macro since cgo can't call macros. -void xmd_sha256(uint8_t *hash, int len_hash, uint8_t *msg, int len_msg, uint8_t *dst, int len_dst){ - md_xmd_sh256(hash, len_hash, msg, len_msg, dst, len_dst); +void Fp12_print_(char *s, const Fp12 *a) { + if (strlen(s)) + printf("[%s]:\n", s); + for (int i = 0; i < 2; i++) { + vec384fp6 *a_ = (vec384fp6 *)a + i; + for (int j = 0; j < 3; j++) { + vec384fp2 *a__ = (vec384fp2 *)a_ + j; + Fp2_print_("", a__); + } + } } + +void E1_print_(char *s, const E1 *p, const int jacob) { + E1 a; + E1_copy(&a, p); + if (!jacob) + E1_to_affine(&a, &a); + if (strlen(s)) + printf("[%s]:\n", s); + Fp_print_(".x", &(a.x)); + Fp_print_(".y", &(a.y)); + if (jacob) + Fp_print_(".z", &(a.z)); +} + +void E2_print_(char *s, const E2 *p, const int jacob) { + E2 a; + E2_copy(&a, p); + if (!jacob) + E2_to_affine(&a, &a); + if (strlen(s)) + printf("[%s]:\n", s); + Fp2_print_("", &(a.x)); + Fp2_print_("", &(a.y)); + if (jacob) + Fp2_print_("", &(a.z)); +} + +#endif diff --git a/crypto/bls12381_utils.go b/crypto/bls12381_utils.go index 50676fc2c04..65a54bb9dd4 100644 --- a/crypto/bls12381_utils.go +++ b/crypto/bls12381_utils.go @@ -1,239 +1,316 @@ -//go:build relic -// +build relic - package crypto // this file contains utility functions for the curve BLS 12-381 // these tools are shared by the BLS signature scheme, the BLS based threshold signature // and the BLS distributed key generation protocols -// #cgo CFLAGS: -g -Wall -std=c99 -I${SRCDIR}/ -I${SRCDIR}/relic/build/include -I${SRCDIR}/relic/include -I${SRCDIR}/relic/include/low -// #cgo LDFLAGS: -L${SRCDIR}/relic/build/lib -l relic_s +// #cgo CFLAGS: -I${SRCDIR}/ -I${SRCDIR}/blst_src -I${SRCDIR}/blst_src/build -D__BLST_CGO__ -Wall -fno-builtin-memcpy -fno-builtin-memset -Wno-unused-function -Wno-unused-macros -Wno-unused-variable +// #cgo amd64 CFLAGS: -D__ADX__ -mno-avx +// #cgo mips64 mips64le ppc64 ppc64le riscv64 s390x CFLAGS: -D__BLST_NO_ASM__ // #include "bls12381_utils.h" -// #include "bls_include.h" +// +// #if defined(__x86_64__) && (defined(__unix__) || defined(__APPLE__)) +// # include +// # include +// # include +// static void handler(int signum) +// { char text[1024] = "Caught SIGILL in blst_cgo_init, BLST library (used by flow-go/crypto) requires ADX support, build with CGO_CFLAGS=\"-O -D__BLST_PORTABLE__\"\n"; +// ssize_t n = write(2, &text, strlen(text)); +// _exit(128+SIGILL); +// (void)n; +// } +// __attribute__((constructor)) static void flow_crypto_cgo_init() +// { Fp temp = { 0 }; +// struct sigaction act = {{ handler }}, oact; +// sigaction(SIGILL, &act, &oact); +// Fp_squ_montg(&temp, &temp); +// sigaction(SIGILL, &oact, NULL); +// } +// #endif +// import "C" import ( "errors" + "fmt" + + "github.com/onflow/flow-go/crypto/random" ) -// Go wrappers to Relic C types -// Relic is compiled with ALLOC=AUTO -type pointG1 C.ep_st -type pointG2 C.ep2_st -type scalar C.bn_st +// Go wrappers around BLST C types +type pointE1 C.E1 +type pointE2 C.E2 +type scalar C.Fr + +// Note that scalars and field elements F_r are represented in Go by the same type +// called `scalar`, which is internally represented by C type `Fr`. Scalars used by the +// Go layer are all reduced modulo the curve order `r`. + +const ( + // BLS12-381 related lengths imported from the C layer + frBytesLen = int(C.Fr_BYTES) + fpBytesLen = int(C.Fp_BYTES) + g1BytesLen = int(C.G1_SER_BYTES) + g2BytesLen = int(C.G2_SER_BYTES) + + // error constants imported from the C layer + valid = C.VALID + invalid = C.INVALID + badEncoding = C.BAD_ENCODING + badValue = C.BAD_VALUE + pointNotOnCurve = C.POINT_NOT_ON_CURVE +) -// context required for the BLS set-up -type ctx struct { - relicCtx *C.ctx_t - precCtx *C.prec_st -} +// header of the point at infinity serializations +var g1SerHeader byte // g1 (G1 identity) +var g2SerHeader byte // g2 (G2 identity) -// get some constants from the C layer -// (Cgo does not export C macros) -var valid = C.get_valid() -var invalid = C.get_invalid() +// `g1` serialization +var g1Serialization []byte -// initContext sets relic B12_381 parameters and precomputes some data in the C layer -func (ct *ctx) initContext() error { - c := C.relic_init_BLS12_381() - if c == nil { - return errors.New("Relic core init failed") - } - ct.relicCtx = c - ct.precCtx = C.init_precomputed_data_BLS12_381() - return nil -} +var g2PublicKey pubKeyBLSBLS12381 + +// initialization of BLS12-381 curve +func initBLS12381() { + C.types_sanity() -// seeds the internal relic random function. -// relic context must be initialized before seeding. -func seedRelic(seed []byte) error { - if len(seed) < (securityBits / 8) { - return invalidInputsErrorf( - "seed length needs to be larger than %d", - securityBits/8) + if isG1Compressed() { + g1SerHeader = 0xC0 + } else { + g1SerHeader = 0x40 } - if len(seed) > maxRelicPrgSeed { - return invalidInputsErrorf( - "seed length needs to be less than %x", - maxRelicPrgSeed) + g1Serialization = append([]byte{g1SerHeader}, make([]byte, g1BytesLen-1)...) + if isG2Compressed() { + g2SerHeader = 0xC0 + } else { + g2SerHeader = 0x40 } - C.seed_relic((*C.uchar)(&seed[0]), (C.int)(len(seed))) - return nil + // set a global point to infinity + C.E2_set_infty((*C.E2)(&g2PublicKey.point)) + g2PublicKey.isIdentity = true } -// setContext sets the context (previously initialized) of the C layer with -// pre-saved data. -func (ct *ctx) setContext() { - C.core_set(ct.relicCtx) - C.precomputed_data_set(ct.precCtx) +// String returns a hex-encoded representation of the scalar. +func (a *scalar) String() string { + encoding := make([]byte, frBytesLen) + writeScalar(encoding, a) + return fmt.Sprintf("%#x", encoding) } -// Exponentiation in G1 (scalar point multiplication) -func (p *pointG1) scalarMultG1(res *pointG1, expo *scalar) { - C.ep_mult((*C.ep_st)(res), (*C.ep_st)(p), (*C.bn_st)(expo)) +// String returns a hex-encoded representation of the E2 point. +func (p *pointE2) String() string { + encoding := make([]byte, g2BytesLen) + writePointE2(encoding, p) + return fmt.Sprintf("%#x", encoding) } -// This function is for TEST only -// Exponentiation of g1 in G1 -func generatorScalarMultG1(res *pointG1, expo *scalar) { - C.ep_mult_gen_bench((*C.ep_st)(res), (*C.bn_st)(expo)) +// Scalar multiplication of a generic point `p` in E1 +func (p *pointE1) scalarMultE1(res *pointE1, expo *scalar) { + C.E1_mult((*C.E1)(res), (*C.E1)(p), (*C.Fr)(expo)) } -// This function is for TEST only -// Generic Exponentiation G1 -func genericScalarMultG1(res *pointG1, expo *scalar) { - C.ep_mult_generic_bench((*C.ep_st)(res), (*C.bn_st)(expo)) +// Scalar multiplication of generator g1 in G1 +func generatorScalarMultG1(res *pointE1, expo *scalar) { + C.G1_mult_gen((*C.E1)(res), (*C.Fr)(expo)) } -// Exponentiation of g2 in G2 -func generatorScalarMultG2(res *pointG2, expo *scalar) { - C.ep2_mult_gen((*C.ep2_st)(res), (*C.bn_st)(expo)) +// Scalar multiplication of generator g2 in G2 +// +// This often results in a public key that is used in +// multiple pairing computation. Therefore, convert the +// resulting point to affine coordinate to save pre-pairing +// conversions. +func generatorScalarMultG2(res *pointE2, expo *scalar) { + C.G2_mult_gen_to_affine((*C.E2)(res), (*C.Fr)(expo)) } -// comparison in Zr where r is the group order of G1/G2 +// comparison in Fr where r is the group order of G1/G2 // (both scalars should be reduced mod r) func (x *scalar) equals(other *scalar) bool { - return C.bn_cmp((*C.bn_st)(x), (*C.bn_st)(other)) == valid + return bool(C.Fr_is_equal((*C.Fr)(x), (*C.Fr)(other))) +} + +// comparison in E1 +func (p *pointE1) equals(other *pointE1) bool { + return bool(C.E1_is_equal((*C.E1)(p), (*C.E1)(other))) } -// comparison in G2 -func (p *pointG2) equals(other *pointG2) bool { - return C.ep2_cmp((*C.ep2_st)(p), (*C.ep2_st)(other)) == valid +// comparison in E2 +func (p *pointE2) equals(other *pointE2) bool { + return bool(C.E2_is_equal((*C.E2)(p), (*C.E2)(other))) } -// Comparison to zero in Zr. +// Comparison to zero in Fr. // Scalar must be already reduced modulo r func (x *scalar) isZero() bool { - return C.bn_is_zero((*C.bn_st)(x)) == 1 + return bool(C.Fr_is_zero((*C.Fr)(x))) } // Comparison to point at infinity in G2. -func (p *pointG2) isInfinity() bool { - return C.ep2_is_infty((*C.ep2_st)(p)) == 1 +func (p *pointE2) isInfinity() bool { + return bool(C.E2_is_infty((*C.E2)(p))) } -// returns a random number in Zr -func randZr(x *scalar) { - C.bn_randZr((*C.bn_st)(x)) +// generates a random element in F_r using input random source, +// and saves the random in `x`. +// returns `true` if generated element is zero. +func randFr(x *scalar, rand random.Rand) bool { + // use extra 128 bits to reduce the modular reduction bias + bytes := make([]byte, frBytesLen+securityBits/8) + rand.Read(bytes) + // modular reduction + return mapToFr(x, bytes) } -// returns a random non-zero number in Zr -func randZrStar(x *scalar) { - C.bn_randZr_star((*C.bn_st)(x)) +// generates a random element in F_r* using input random source, +// and saves the random in `x`. +func randFrStar(x *scalar, rand random.Rand) { + isZero := true + // extremely unlikely this loop runs more than once, + // but force the output to be non-zero instead of propagating an error. + for isZero { + isZero = randFr(x, rand) + } } -// mapToZr reads a scalar from a slice of bytes and maps it to Zr. -// The resulting scalar `k` satisfies 0 <= k < r. +// mapToFr reads a scalar from a slice of bytes and maps it to Fr using modular reduction. +// The resulting element `k` therefore satisfies 0 <= k < r. // It returns true if scalar is zero and false otherwise. -func mapToZr(x *scalar, src []byte) bool { - isZero := C.bn_map_to_Zr((*C.bn_st)(x), +func mapToFr(x *scalar, src []byte) bool { + isZero := C.map_bytes_to_Fr((*C.Fr)(x), (*C.uchar)(&src[0]), (C.int)(len(src))) - return isZero == valid + return bool(isZero) } -// writeScalar writes a G2 point in a slice of bytes +// writeScalar writes a scalar in a slice of bytes func writeScalar(dest []byte, x *scalar) { - C.bn_write_bin((*C.uchar)(&dest[0]), - (C.ulong)(prKeyLengthBLSBLS12381), - (*C.bn_st)(x), - ) + C.Fr_write_bytes((*C.uchar)(&dest[0]), (*C.Fr)(x)) } -// readScalar reads a scalar from a slice of bytes -func readScalar(x *scalar, src []byte) { - C.bn_read_bin((*C.bn_st)(x), - (*C.uchar)(&src[0]), - (C.ulong)(len(src)), - ) +// writePointE2 writes a G2 point in a slice of bytes +// The slice should be of size g2BytesLen and the serialization +// follows the Zcash format specified in draft-irtf-cfrg-pairing-friendly-curves +func writePointE2(dest []byte, a *pointE2) { + C.E2_write_bytes((*C.uchar)(&dest[0]), (*C.E2)(a)) } -// writePointG2 writes a G2 point in a slice of bytes -// The slice should be of size PubKeyLenBLSBLS12381 and the serialization will -// follow the Zcash format specified in draft-irtf-cfrg-pairing-friendly-curves -func writePointG2(dest []byte, a *pointG2) { - C.ep2_write_bin_compact((*C.uchar)(&dest[0]), - (*C.ep2_st)(a), - (C.int)(pubKeyLengthBLSBLS12381), - ) +// writePointE1 writes a G1 point in a slice of bytes +// The slice should be of size g1BytesLen and the serialization +// follows the Zcash format specified in draft-irtf-cfrg-pairing-friendly-curves +func writePointE1(dest []byte, a *pointE1) { + C.E1_write_bytes((*C.uchar)(&dest[0]), (*C.E1)(a)) } -// writePointG1 writes a G1 point in a slice of bytes -// The slice should be of size SignatureLenBLSBLS12381 and the serialization will -// follow the Zcash format specified in draft-irtf-cfrg-pairing-friendly-curves -func writePointG1(dest []byte, a *pointG1) { - C.ep_write_bin_compact((*C.uchar)(&dest[0]), - (*C.ep_st)(a), - (C.int)(signatureLengthBLSBLS12381), - ) +// read an Fr* element from a byte slice +// and stores it into a `scalar` type element. +func readScalarFrStar(a *scalar, src []byte) error { + read := C.Fr_star_read_bytes( + (*C.Fr)(a), + (*C.uchar)(&src[0]), + (C.int)(len(src))) + + switch read { + case valid: + return nil + case badEncoding: + return invalidInputsErrorf("input length must be %d, got %d", + frBytesLen, len(src)) + case badValue: + return invalidInputsErrorf("scalar is not in the correct range") + default: + return invalidInputsErrorf("reading the scalar failed") + } } -// readPointG2 reads a G2 point from a slice of bytes -// The slice is expected to be of size PubKeyLenBLSBLS12381 and the deserialization will -// follow the Zcash format specified in draft-irtf-cfrg-pairing-friendly-curves -func readPointG2(a *pointG2, src []byte) error { - switch C.ep2_read_bin_compact((*C.ep2_st)(a), +// readPointE2 reads a E2 point from a slice of bytes +// The slice is expected to be of size g2BytesLen and the deserialization +// follows the Zcash format specified in draft-irtf-cfrg-pairing-friendly-curves. +// No G2 membership check is performed. +func readPointE2(a *pointE2, src []byte) error { + read := C.E2_read_bytes((*C.E2)(a), (*C.uchar)(&src[0]), - (C.int)(len(src))) { + (C.int)(len(src))) + + switch read { case valid: return nil - case invalid: - return invalidInputsErrorf("input is not a G2 point") + case badEncoding, badValue: + return invalidInputsErrorf("input could not deserialize to an E2 point") + case pointNotOnCurve: + return invalidInputsErrorf("input is not a point on curve E2") default: - return errors.New("reading a G2 point failed") + return errors.New("reading E2 point failed") } } -// readPointG1 reads a G1 point from a slice of bytes -// The slice should be of size SignatureLenBLSBLS12381 and the deserialization will -// follow the Zcash format specified in draft-irtf-cfrg-pairing-friendly-curves -func readPointG1(a *pointG1, src []byte) error { - switch C.ep_read_bin_compact((*C.ep_st)(a), +// readPointE1 reads a E1 point from a slice of bytes +// The slice should be of size g1BytesLen and the deserialization +// follows the Zcash format specified in draft-irtf-cfrg-pairing-friendly-curves. +// No G1 membership check is performed. +func readPointE1(a *pointE1, src []byte) error { + read := C.E1_read_bytes((*C.E1)(a), (*C.uchar)(&src[0]), - (C.int)(len(src))) { + (C.int)(len(src))) + + switch read { case valid: return nil - case invalid: - return invalidInputsErrorf("input is not a G1 point") + case badEncoding, badValue: + return invalidInputsErrorf("input could not deserialize to a E1 point") + case pointNotOnCurve: + return invalidInputsErrorf("input is not a point on curve E1") default: - return errors.New("reading a G1 point failed") + return errors.New("reading E1 point failed") } } // checkMembershipG1 wraps a call to a subgroup check in G1 since cgo can't be used // in go test files. -func checkMembershipG1(pt *pointG1) int { - return int(C.check_membership_G1((*C.ep_st)(pt))) +func checkMembershipG1(pt *pointE1) bool { + return bool(C.E1_in_G1((*C.E1)(pt))) } // checkMembershipG2 wraps a call to a subgroup check in G2 since cgo can't be used // in go test files. -func checkMembershipG2(pt *pointG2) int { - return int(C.check_membership_G2((*C.ep2_st)(pt))) +func checkMembershipG2(pt *pointE2) bool { + return bool(C.E2_in_G2((*C.E2)(pt))) +} + +// This is only a TEST/DEBUG/BENCH function. +// It returns the hash-to-G1 point from a slice of 128 bytes +func mapToG1(data []byte) *pointE1 { + l := len(data) + var h pointE1 + if C.map_to_G1((*C.E1)(&h), (*C.uchar)(&data[0]), (C.int)(l)) != valid { + return nil + } + return &h } -// randPointG1 wraps a call to C since cgo can't be used in go test files. -// It generates a random point in G1 and stores it in input point. -func randPointG1(pt *pointG1) { - C.ep_rand_G1((*C.ep_st)(pt)) +// mapToG1 is a test function, it wraps a call to C since cgo can't be used in go test files. +// It maps input bytes to a point in G2 and stores it in input point. +// THIS IS NOT the kind of mapping function that is used in BLS signature. +func unsafeMapToG1(pt *pointE1, seed []byte) { + C.unsafe_map_bytes_to_G1((*C.E1)(pt), (*C.uchar)(&seed[0]), (C.int)(len(seed))) } -// randPointG1Complement wraps a call to C since cgo can't be used in go test files. -// It generates a random point in E1\G1 and stores it in input point. -func randPointG1Complement(pt *pointG1) { - C.ep_rand_G1complement((*C.ep_st)(pt)) +// unsafeMapToG1Complement is a test function, it wraps a call to C since cgo can't be used in go test files. +// It generates a random point in E2\G2 and stores it in input point. +func unsafeMapToG1Complement(pt *pointE1, seed []byte) { + C.unsafe_map_bytes_to_G1complement((*C.E1)(pt), (*C.uchar)(&seed[0]), (C.int)(len(seed))) } -// randPointG2 wraps a call to C since cgo can't be used in go test files. -// It generates a random point in G2 and stores it in input point. -func randPointG2(pt *pointG2) { - C.ep2_rand_G2((*C.ep2_st)(pt)) +// unsafeMapToG2 is a test function, it wraps a call to C since cgo can't be used in go test files. +// It maps input bytes to a point in G2 and stores it in input point. +// THIS IS NOT the kind of mapping function that is used in BLS signature. +func unsafeMapToG2(pt *pointE2, seed []byte) { + C.unsafe_map_bytes_to_G2((*C.E2)(pt), (*C.uchar)(&seed[0]), (C.int)(len(seed))) } -// randPointG1Complement wraps a call to C since cgo can't be used in go test files. +// unsafeMapToG2Complement is a test function, it wraps a call to C since cgo can't be used in go test files. // It generates a random point in E2\G2 and stores it in input point. -func randPointG2Complement(pt *pointG2) { - C.ep2_rand_G2complement((*C.ep2_st)(pt)) +func unsafeMapToG2Complement(pt *pointE2, seed []byte) { + C.unsafe_map_bytes_to_G2complement((*C.E2)(pt), (*C.uchar)(&seed[0]), (C.int)(len(seed))) } // This is only a TEST function. @@ -254,11 +331,21 @@ func hashToG1Bytes(data, dst []byte) []byte { (*C.uchar)(&dst[0]), (C.int)(len(dst))) // map the hash to G1 - var point pointG1 - C.map_to_G1((*C.ep_st)(&point), (*C.uchar)(&hash[0]), (C.int)(len(hash))) + var point pointE1 + if C.map_to_G1((*C.E1)(&point), (*C.uchar)(&hash[0]), (C.int)(len(hash))) != valid { + return nil + } // serialize the point - pointBytes := make([]byte, signatureLengthBLSBLS12381) - writePointG1(pointBytes, &point) + pointBytes := make([]byte, g1BytesLen) + writePointE1(pointBytes, &point) return pointBytes } + +func isG1Compressed() bool { + return g1BytesLen == fpBytesLen +} + +func isG2Compressed() bool { + return g2BytesLen == 2*fpBytesLen +} diff --git a/crypto/bls12381_utils.h b/crypto/bls12381_utils.h index 2c96503654c..923208ef3f3 100644 --- a/crypto/bls12381_utils.h +++ b/crypto/bls12381_utils.h @@ -1,143 +1,165 @@ -// +build relic - // this file contains utility functions for the curve BLS 12-381 -// these tools are shared by the BLS signature scheme, the BLS based threshold signature -// and the BLS distributed key generation protocols +// these tools are shared by the BLS signature scheme, the BLS based threshold +// signature, BLS-SPoCK and the BLS distributed key generation protocols -#ifndef _REL_MISC_INCLUDE_H -#define _REL_MISC_INCLUDE_H +#ifndef _BLS12_381_UTILS_H +#define _BLS12_381_UTILS_H -#include "relic.h" +#include "blst_include.h" +#include +#include typedef uint8_t byte; - -#define VALID RLC_OK -#define INVALID RLC_ERR -#define UNDEFINED (((VALID&1)^1) | ((INVALID&2)^2)) // different value than RLC_OK and RLC_ERR - -#define BITS_TO_BYTES(x) ((x+7)>>3) -#define BITS_TO_DIGITS(x) ((x+63)>>6) -#define BYTES_TO_DIGITS(x) ((x+7)>>3) -#define MIN(a,b) ((a)>(b)?(b):(a)) +typedef _Bool bool; // assuming cgo is using a modern enough compiler + +// minimum targeted security level +#define SEC_BITS 128 + +typedef enum { + VALID = 0, + INVALID, + BAD_ENCODING, + BAD_VALUE, + POINT_NOT_ON_CURVE, + POINT_NOT_IN_GROUP, + UNDEFINED, +} ERROR; + +#define BITS_TO_BYTES(x) ((x + 7) >> 3) +#define BITS_TO_LIMBS(x) ((x + 63) >> 6) +#define BYTES_TO_LIMBS(x) ((x + 7) >> 3) +#define LIMBS_TO_BYTES(x) ((x) << 3) +#define MIN(a, b) ((a) > (b) ? (b) : (a)) // Fields and Group serialization lengths -#define SEC_BITS 128 -#define Fp_BITS 381 -#define Fr_BITS 255 -#define Fp_BYTES BITS_TO_BYTES(Fp_BITS) -#define Fp2_BYTES (2*Fp_BYTES) -#define Fp_DIGITS BITS_TO_DIGITS(Fp_BITS) -#define Fr_BYTES BITS_TO_BYTES(Fr_BITS) +#define Fp_BITS 381 +#define Fp2_BYTES (2 * Fp_BYTES) +#define Fp_LIMBS BITS_TO_LIMBS(Fp_BITS) +#define Fp_BYTES LIMBS_TO_BYTES(Fp_LIMBS) // BLST implements Fp as a limb array +#define Fr_BITS 255 +#define Fr_LIMBS BITS_TO_LIMBS(Fr_BITS) +#define Fr_BYTES LIMBS_TO_BYTES(Fr_LIMBS) // BLST implements Fr as a limb array -#define G1_BYTES (2*Fp_BYTES) -#define G2_BYTES (2*Fp2_BYTES) +#define G1_BYTES (2 * Fp_BYTES) +#define G2_BYTES (2 * Fp2_BYTES) // Compressed and uncompressed points -#define COMPRESSED 1 -#define UNCOMPRESSED 0 -#define G1_SERIALIZATION COMPRESSED -#define G2_SERIALIZATION COMPRESSED - -// Subgroup membership check method -#define EXP_ORDER 0 -#define BOWE 1 -#define MEMBERSHIP_CHECK_G1 BOWE -#define MEMBERSHIP_CHECK_G2 EXP_ORDER - - -// constants used in the optimized SWU hash to curve -#if (hashToPoint == LOCAL_SSWU) - #define ELLP_Nx_LEN 12 - #define ELLP_Dx_LEN 10 - #define ELLP_Ny_LEN 16 - #define ELLP_Dy_LEN 15 -#endif - - -// Structure of precomputed data -typedef struct prec_ { - #if (hashToPoint == LOCAL_SSWU) - // constants needed in optimized SSWU - bn_st p_3div4; - fp_st sqrt_z; - // related hardcoded constants for faster access, - // where a1 is the coefficient of isogenous curve E1 - fp_st minus_a1; - fp_st a1z; - // coefficients of the isogeny map - fp_st iso_Nx[ELLP_Nx_LEN]; - fp_st iso_Ny[ELLP_Ny_LEN]; - #endif - #if (MEMBERSHIP_CHECK_G1 == BOWE) - bn_st beta; - bn_st z2_1_by3; - #endif - // other field-related constants - bn_st p_1div2; - fp_t r; // Montgomery multiplication constant -} prec_st; - -// BLS based SPoCK -int bls_spock_verify(const ep2_t, const byte*, const ep2_t, const byte*); - -// hash to curve functions (functions in bls12381_hashtocurve.c) -void map_to_G1(ep_t, const byte*, const int); - -// Utility functions -int get_valid(); -int get_invalid(); -void bn_new_wrapper(bn_t a); - -ctx_t* relic_init_BLS12_381(); -prec_st* init_precomputed_data_BLS12_381(); -void precomputed_data_set(const prec_st* p); -void seed_relic(byte*, int); - -int ep_read_bin_compact(ep_t, const byte *, const int); -void ep_write_bin_compact(byte *, const ep_t, const int); -int ep2_read_bin_compact(ep2_t, const byte *, const int); -void ep2_write_bin_compact(byte *, const ep2_t, const int); -int bn_read_Zr_bin(bn_t, const uint8_t *, int ); - -void ep_mult_gen_bench(ep_t, const bn_t); -void ep_mult_generic_bench(ep_t, const bn_t); -void ep_mult(ep_t, const ep_t, const bn_t); -void ep2_mult_gen(ep2_t, const bn_t); - -void bn_randZr(bn_t); -void bn_randZr_star(bn_t); -int bn_map_to_Zr(bn_t, const uint8_t*, int); -void bn_map_to_Zr_star(bn_t, const uint8_t*, int); - -void bn_sum_vector(bn_t, const bn_st*, const int); -void ep_sum_vector(ep_t, ep_st*, const int); -void ep2_sum_vector(ep2_t, ep2_st*, const int); -int ep_sum_vector_byte(byte*, const byte*, const int); -void ep2_subtract_vector(ep2_t res, ep2_t x, ep2_st* y, const int len); - -// membership checks -int check_membership_G1(const ep_t); -int check_membership_G2(const ep2_t); -int check_membership_Zr_star(const bn_t); - -int simple_subgroup_check_G1(const ep_t); -int simple_subgroup_check_G2(const ep2_t); -void ep_rand_G1(ep_t); -void ep_rand_G1complement( ep_t); -void ep2_rand_G2(ep2_t); -void ep2_rand_G2complement( ep2_t); -#if (MEMBERSHIP_CHECK_G1 == BOWE) -int bowe_subgroup_check_G1(const ep_t); -#endif +#define UNCOMPRESSED 0 +#define COMPRESSED (UNCOMPRESSED ^ 1) +#define G1_SERIALIZATION (COMPRESSED) +#define G2_SERIALIZATION (COMPRESSED) +#define G1_SER_BYTES \ + (G1_SERIALIZATION == UNCOMPRESSED ? G1_BYTES : (G1_BYTES / 2)) +#define G2_SER_BYTES \ + (G2_SERIALIZATION == UNCOMPRESSED ? G2_BYTES : (G2_BYTES / 2)) + +// init-related functions +void types_sanity(void); + +// Fr utilities +extern const Fr BLS12_381_rR; +bool Fr_is_zero(const Fr *a); +bool Fr_is_equal(const Fr *a, const Fr *b); +void Fr_set_limb(Fr *, const limb_t); +void Fr_copy(Fr *, const Fr *); +void Fr_set_zero(Fr *); +void Fr_add(Fr *res, const Fr *a, const Fr *b); +void Fr_sub(Fr *res, const Fr *a, const Fr *b); +void Fr_neg(Fr *res, const Fr *a); +void Fr_sum_vector(Fr *, const Fr x[], const int); +void Fr_mul_montg(Fr *res, const Fr *a, const Fr *b); +void Fr_squ_montg(Fr *res, const Fr *a); +void Fr_to_montg(Fr *res, const Fr *a); +void Fr_from_montg(Fr *res, const Fr *a); +void Fr_inv_montg_eucl(Fr *res, const Fr *a); +ERROR Fr_read_bytes(Fr *a, const byte *bin, int len); +ERROR Fr_star_read_bytes(Fr *a, const byte *bin, int len); +void Fr_write_bytes(byte *bin, const Fr *a); +bool map_bytes_to_Fr(Fr *, const byte *, int); + +// Fp utilities +void Fp_mul_montg(Fp *, const Fp *, const Fp *); +void Fp_squ_montg(Fp *, const Fp *); + +// E1 and G1 utilities +void E1_copy(E1 *, const E1 *); +bool E1_is_equal(const E1 *, const E1 *); +void E1_set_infty(E1 *); +bool E1_is_infty(const E1 *); +void E1_to_affine(E1 *, const E1 *); +bool E1_affine_on_curve(const E1 *); +bool E1_in_G1(const E1 *); +void E1_mult(E1 *, const E1 *, const Fr *); +void E1_add(E1 *, const E1 *, const E1 *); +void E1_neg(E1 *, const E1 *); +void E1_sum_vector(E1 *, const E1 *, const int); +int E1_sum_vector_byte(byte *, const byte *, const int); +void G1_mult_gen(E1 *, const Fr *); +ERROR E1_read_bytes(E1 *, const byte *, const int); +void E1_write_bytes(byte *, const E1 *); +void unsafe_map_bytes_to_G1(E1 *, const byte *, int); +void unsafe_map_bytes_to_G1complement(E1 *, const byte *, int); + +#define MAP_TO_G1_INPUT_LEN (2 * (Fp_BYTES + SEC_BITS / 8)) +int map_to_G1(E1 *, const byte *, const int); + +// E2 and G2 utilities +void E2_set_infty(E2 *p); +bool E2_is_infty(const E2 *); +bool E2_affine_on_curve(const E2 *); +bool E2_is_equal(const E2 *, const E2 *); +void E2_copy(E2 *, const E2 *); +void E2_to_affine(E2 *, const E2 *); +ERROR E2_read_bytes(E2 *, const byte *, const int); +void E2_write_bytes(byte *, const E2 *); +void G2_mult_gen(E2 *, const Fr *); +void G2_mult_gen_to_affine(E2 *, const Fr *); +void E2_mult(E2 *, const E2 *, const Fr *); +void E2_mult_small_expo(E2 *, const E2 *, const byte); +void E2_add(E2 *res, const E2 *a, const E2 *b); +void E2_neg(E2 *, const E2 *); +void E2_sum_vector(E2 *, const E2 *, const int); +void E2_sum_vector_to_affine(E2 *, const E2 *, const int); +void E2_subtract_vector(E2 *res, const E2 *x, const E2 *y, const int len); +bool E2_in_G2(const E2 *); +void unsafe_map_bytes_to_G2(E2 *, const byte *, int); +void unsafe_map_bytes_to_G2complement(E2 *, const byte *, int); + +// pairing and Fp12 +bool Fp12_is_one(Fp12 *); +void Fp12_set_one(Fp12 *); +void Fp12_multi_pairing(Fp12 *, const E1 *, const E2 *, const int); // utility testing function -void xmd_sha256(uint8_t *, int, uint8_t *, int, uint8_t *, int); +void xmd_sha256(byte *, int, byte *, int, byte *, int); // Debugging related functions -void bytes_print_(char*, byte*, int); -void fp_print_(char*, fp_t); -void bn_print_(char*, bn_st*); -void ep_print_(char*, ep_st*); -void ep2_print_(char*, ep2_st*); - -#endif \ No newline at end of file +// DEBUG can be enabled directly from the Go command: CC="clang -DDEBUG" go test +#ifdef DEBUG +#include +void bytes_print_(char *, byte *, int); +void Fr_print_(char *, Fr *); +void Fp_print_(char *, const Fp *); +void Fp2_print_(char *, const Fp2 *); +void Fp12_print_(char *, const Fp12 *); +void E1_print_(char *, const E1 *, const int); +void E2_print_(char *, const E2 *, const int); + +#endif /* DEBUG */ + +// memory sanitization disabler +#define NO_MSAN +#ifdef MSAN +/* add NO_MSAN to a function defintion to disable MSAN in that function ( void + * NO_MSAN f(..) {} ) */ +#if defined(__has_feature) +#if __has_feature(memory_sanitizer) +// disable memory sanitization in this function because of a +// use-of-uninitialized-value false positive. +#undef NO_MSAN +#define NO_MSAN __attribute__((no_sanitize("memory"))) +#endif /* __has_feature(memory_sanitizer) */ +#endif /* __has_feature*/ +#endif /*MSAN*/ + +#endif /* BLS12_381_UTILS */ \ No newline at end of file diff --git a/crypto/bls12381_utils_test.go b/crypto/bls12381_utils_test.go index f8278414e4a..a528e240363 100644 --- a/crypto/bls12381_utils_test.go +++ b/crypto/bls12381_utils_test.go @@ -1,10 +1,7 @@ -//go:build relic -// +build relic - package crypto import ( - crand "crypto/rand" + "crypto/rand" "encoding/hex" "testing" @@ -12,85 +9,96 @@ import ( "github.com/stretchr/testify/require" ) -func TestDeterministicKeyGen(t *testing.T) { - // 2 keys generated with the same seed should be equal - seed := make([]byte, KeyGenSeedMinLen) - n, err := crand.Read(seed) - require.Equal(t, n, KeyGenSeedMinLen) +// Sanity check of G1 and G2 scalar multiplication +func TestScalarMultBLS12381(t *testing.T) { + expoBytes, err := hex.DecodeString("444465cb6cc2dba9474e6beeb6a9013fbf1260d073429fb14a31e63e89129390") require.NoError(t, err) - sk1, err := GeneratePrivateKey(BLSBLS12381, seed) - require.Nil(t, err) - sk2, err := GeneratePrivateKey(BLSBLS12381, seed) - require.Nil(t, err) - assert.True(t, sk1.Equals(sk2), "private keys should be equal") -} -// test the deterministicity of the relic PRG (used by the DKG polynomials) -func TestPRGseeding(t *testing.T) { - blsInstance.reInit() - // 2 scalars generated with the same seed should be equal - seed := make([]byte, KeyGenSeedMinLen) - n, err := crand.Read(seed) - require.Equal(t, n, KeyGenSeedMinLen) - require.NoError(t, err) - // 1st scalar (wrapped in a private key) - err = seedRelic(seed) - require.Nil(t, err) - var sk1 prKeyBLSBLS12381 - randZr(&sk1.scalar) - // 2nd scalar (wrapped in a private key) - err = seedRelic(seed) - require.Nil(t, err) - var sk2 prKeyBLSBLS12381 - randZr(&sk2.scalar) - // compare the 2 scalars (by comparing the private keys) - assert.True(t, sk1.Equals(&sk2), "private keys should be equal") + var expo scalar + isZero := mapToFr(&expo, expoBytes) + require.False(t, isZero) + + // G1 generator multiplication + // Note that generator and random point multiplications + // are implemented with the same algorithm + t.Run("G1", func(t *testing.T) { + if !isG1Compressed() { + t.Skip() + } + var p pointE1 + generatorScalarMultG1(&p, &expo) + expected, err := hex.DecodeString("96484ca50719f5d2533047960878b6bae8289646c0f00a942a1e6992be9981a9e0c7a51e9918f9b19d178cf04a8018a4") + require.NoError(t, err) + pBytes := make([]byte, g1BytesLen) + writePointE1(pBytes, &p) + assert.Equal(t, pBytes, expected) + }) + + // G2 generator multiplication + // Note that generator and random point multiplications + // are implemented with the same algorithm + t.Run("G2", func(t *testing.T) { + if !isG2Compressed() { + t.Skip() + } + var p pointE2 + generatorScalarMultG2(&p, &expo) + expected, err := hex.DecodeString("b35f5043f166848805b98da62dcb9c5d2f25e497bd0d9c461d4a00d19e4e67cc1e813de3c99479d5a2c62fb754fd7df40c4fd60c46834c8ae665343a3ff7dc3cc929de34ad62b7b55974f4e3fd20990d3e564b96e4d33de87716052d58cf823e") + require.NoError(t, err) + pBytes := make([]byte, g2BytesLen) + writePointE2(pBytes, &p) + assert.Equal(t, pBytes, expected) + }) } // G1 and G2 scalar multiplication -func BenchmarkScalarMultG1G2(b *testing.B) { - blsInstance.reInit() +func BenchmarkScalarMult(b *testing.B) { seed := make([]byte, securityBits/8) - _, err := crand.Read(seed) + _, err := rand.Read(seed) require.NoError(b, err) - _ = seedRelic(seed) + var expo scalar - randZr(&expo) + _ = mapToFr(&expo, seed) // G1 generator multiplication + // Note that generator and random point multiplications + // are implemented with the same algorithm + var res pointE1 b.Run("G1 gen", func(b *testing.B) { - var res pointG1 b.ResetTimer() for i := 0; i < b.N; i++ { generatorScalarMultG1(&res, &expo) } - b.StopTimer() }) - // G1 base point multiplication - b.Run("G1 generic", func(b *testing.B) { - var res pointG1 + // E1 random point multiplication + // Note that generator and random point multiplications + // are implemented with the same algorithm + b.Run("E1 rand", func(b *testing.B) { + var res pointE1 b.ResetTimer() for i := 0; i < b.N; i++ { - genericScalarMultG1(&res, &expo) + res.scalarMultE1(&res, &expo) } - b.StopTimer() }) - // G2 base point multiplication + // G2 generator multiplication + // Note that generator and random point multiplications + // are implemented with the same algorithm b.Run("G2 gen", func(b *testing.B) { - var res pointG2 + var res pointE2 b.ResetTimer() for i := 0; i < b.N; i++ { generatorScalarMultG2(&res, &expo) } - b.StopTimer() }) } // Sanity-check of the map-to-G1 with regards to the IETF draft hash-to-curve func TestMapToG1(t *testing.T) { - + if !isG1Compressed() { + t.Skip() + } // test vectors from https://datatracker.ietf.org/doc/html/draft-irtf-cfrg-hash-to-curve-14#appendix-J.9.1 dst := []byte("QUUX-V01-CS02-with-BLS12381G1_XMD:SHA-256_SSWU_RO_") @@ -112,6 +120,7 @@ func TestMapToG1(t *testing.T) { for i, msg := range msgs { pointBytes := hashToG1Bytes(msg, dst) + require.NotNil(t, pointBytes) expectedPointBytes, err := hex.DecodeString(expectedPointString[i]) require.NoError(t, err) @@ -123,69 +132,142 @@ func TestMapToG1(t *testing.T) { // Hashing to G1 bench func BenchmarkMapToG1(b *testing.B) { - blsInstance.reInit() input := make([]byte, expandMsgOutput) for i := 0; i < len(input); i++ { input[i] = byte(i) } b.ResetTimer() + var p *pointE1 for i := 0; i < b.N; i++ { - mapToG1(input) + p = mapToG1(input) } - b.StopTimer() + require.NotNil(b, p) } // test subgroup membership check in G1 and G2 func TestSubgroupCheck(t *testing.T) { - blsInstance.reInit() - // seed Relic PRG - seed := make([]byte, securityBits/8) - _, err := crand.Read(seed) + prg := getPRG(t) + seed := make([]byte, 192) + _, err := prg.Read(seed) require.NoError(t, err) - _ = seedRelic(seed) t.Run("G1", func(t *testing.T) { - var p pointG1 - randPointG1(&p) // point in G1 - res := checkMembershipG1(&p) - assert.Equal(t, res, int(valid)) - randPointG1Complement(&p) // point in E1\G1 - res = checkMembershipG1(&p) - assert.Equal(t, res, int(invalid)) + var p pointE1 + unsafeMapToG1(&p, seed) // point in G1 + assert.True(t, checkMembershipG1(&p)) + + unsafeMapToG1Complement(&p, seed) // point in E2\G2 + assert.False(t, checkMembershipG1(&p)) }) t.Run("G2", func(t *testing.T) { - var p pointG2 - randPointG2(&p) // point in G2 - res := checkMembershipG2(&p) - assert.Equal(t, res, int(valid)) - randPointG2Complement(&p) // point in E2\G2 - res = checkMembershipG2(&p) - assert.Equal(t, res, int(invalid)) + var p pointE2 + unsafeMapToG2(&p, seed) // point in G2 + assert.True(t, checkMembershipG2(&p)) + + unsafeMapToG2Complement(&p, seed) // point in E2\G2 + assert.False(t, checkMembershipG2(&p)) }) } // subgroup membership check bench func BenchmarkSubgroupCheck(b *testing.B) { - blsInstance.reInit() + seed := make([]byte, g2BytesLen) + _, err := rand.Read(seed) + require.NoError(b, err) b.Run("G1", func(b *testing.B) { - var p pointG1 - randPointG1(&p) + var p pointE1 + unsafeMapToG1(&p, seed) // point in G1 b.ResetTimer() for i := 0; i < b.N; i++ { _ = checkMembershipG1(&p) // G1 } - b.StopTimer() }) b.Run("G2", func(b *testing.B) { - var p pointG2 - randPointG2(&p) + var p pointE2 + unsafeMapToG2(&p, seed) // point in G2 b.ResetTimer() for i := 0; i < b.N; i++ { _ = checkMembershipG2(&p) // G2 } - b.StopTimer() }) } + +// specific test of G1 points Encode and decode (BLS signature since the library is set for min_sig). +// G2 points read and write are implicitly tested by public keys Encode/Decode. +func TestReadWriteG1(t *testing.T) { + prg := getPRG(t) + seed := make([]byte, frBytesLen) + bytes := make([]byte, g1BytesLen) + // generate a random G1 point, encode it, decode it, + // and compare it the original point + t.Run("random points", func(t *testing.T) { + iterations := 50 + for i := 0; i < iterations; i++ { + var p, q pointE1 + _, err := prg.Read(seed) + unsafeMapToG1(&p, seed) + require.NoError(t, err) + writePointE1(bytes, &p) + err = readPointE1(&q, bytes) + require.NoError(t, err) + assert.True(t, p.equals(&q)) + } + }) + + t.Run("infinity", func(t *testing.T) { + var p, q pointE1 + seed := make([]byte, frBytesLen) + unsafeMapToG1(&p, seed) // this results in the infinity point given how `unsafeMapToG1` works with an empty scalar + writePointE1(bytes, &p) + require.True(t, IsBLSSignatureIdentity(bytes)) // sanity check + err := readPointE1(&q, bytes) + require.NoError(t, err) + assert.True(t, p.equals(&q)) + }) +} + +// test some edge cases of MapToFr to validate modular reduction and endianness: +// - inputs `0` and curve order `r` +// - inputs `1` and `r+1` +func TestMapToFr(t *testing.T) { + var x scalar + offset := 10 + bytes := make([]byte, frBytesLen+offset) + expectedEncoding := make([]byte, frBytesLen) + // zero bytes + isZero := mapToFr(&x, bytes) + assert.True(t, isZero) + assert.True(t, x.isZero()) + assert.Equal(t, expectedEncoding, newPrKeyBLSBLS12381(&x).Encode()) + // curve order bytes + copy(bytes[offset:], BLS12381Order) + isZero = mapToFr(&x, bytes) + assert.True(t, isZero) + assert.True(t, x.isZero()) + assert.Equal(t, expectedEncoding, newPrKeyBLSBLS12381(&x).Encode()) + // curve order + 1 + g1, err := hex.DecodeString("824aa2b2f08f0a91260805272dc51051c6e47ad4fa403b02b4510b647ae3d1770bac0326a805bbefd48056c8c121bdb813e02b6052719f607dacd3a088274f65596bd0d09920b61ab5da61bbdc7f5049334cf11213945d57e5ac7d055d042b7e") + require.NoError(t, err) + bytes[len(bytes)-1] += 1 + isZero = mapToFr(&x, bytes) + assert.False(t, isZero) + assert.False(t, x.isZero()) + expectedEncoding[frBytesLen-1] = 1 + sk := newPrKeyBLSBLS12381(&x) + assert.Equal(t, expectedEncoding, sk.Encode()) + // check scalar is equal to "1" in the lower layer (scalar multiplication) + assert.Equal(t, sk.PublicKey().Encode(), g1, "scalar should be 1, check endianness in the C layer") + // 1 + copy(bytes[offset:], expectedEncoding) + isZero = mapToFr(&x, bytes) + assert.False(t, isZero) + assert.False(t, x.isZero()) + expectedEncoding[frBytesLen-1] = 1 + sk = newPrKeyBLSBLS12381(&x) + assert.Equal(t, expectedEncoding, sk.Encode()) + // check scalar is equal to "1" in the lower layer (scalar multiplication) + assert.Equal(t, sk.PublicKey().Encode(), g1, "scalar should be 1, check endianness in the C layer") +} diff --git a/crypto/bls_core.c b/crypto/bls_core.c index 32b56a5d03d..65f510f5987 100644 --- a/crypto/bls_core.c +++ b/crypto/bls_core.c @@ -1,541 +1,506 @@ -// +build relic - #include "bls_include.h" // this file is about the core functions required by the BLS signature scheme -// The functions are tested for ALLOC=AUTO (not for ALLOC=DYNAMIC) - -// functions to export macros to the Go layer (because cgo does not import macros) -int get_signature_len() { - return SIGNATURE_LEN; +// Compute a BLS signature from a G1 point (not checked) and writes it in `out`. +// `out` must be allocated properly with `G1_SER_BYTES` bytes. +static void bls_sign_E1(byte *out, const Fr *sk, const E1 *h) { + // s = h^sk + E1 s; + E1_mult(&s, h, sk); + E1_write_bytes(out, &s); } -int get_pk_len() { - return PK_LEN; +// Computes a BLS signature from a hash and writes it in `out`. +// `hash` represents the hashed message with length `hash_len` equal to +// `MAP_TO_G1_INPUT_LEN`. +// `out` must be allocated properly with `G1_SER_BYTES` bytes. +int bls_sign(byte *out, const Fr *sk, const byte *hash, const int hash_len) { + // hash to G1 + E1 h; + if (map_to_G1(&h, hash, hash_len) != VALID) { + return INVALID; + } + // s = h^sk + bls_sign_E1(out, sk, &h); + return VALID; } -int get_sk_len() { - return SK_LEN; -} - -// checks an input scalar a satisfies 0 < a < r -// where (r) is the order of G1/G2 -int check_membership_Zr_star(const bn_t a){ - if (bn_cmp(a, &core_get()->ep_r) != RLC_LT || bn_cmp_dig(a, 0) != RLC_GT) { - return INVALID; - } - return VALID; -} - -// Checks if input point p is in the subgroup G1. -// The function assumes the input is known to be on the curve E1. -int check_membership_G1(const ep_t p){ -#if MEMBERSHIP_CHECK - #if MEMBERSHIP_CHECK_G1 == EXP_ORDER - return simple_subgroup_check_G1(p); - #elif MEMBERSHIP_CHECK_G1 == BOWE - // section 3.2 from https://eprint.iacr.org/2019/814.pdf - return bowe_subgroup_check_G1(p); - #else - return UNDEFINED; - #endif -#endif - return VALID; -} - -// checks if input point s is on the curve E2 -// and is in the subgroup G2. -// -// membership check in G2 is using a scalar multiplication by the group order. -// TODO: switch to the faster Bowe check -int check_membership_G2(const ep2_t p){ -#if MEMBERSHIP_CHECK - // check p is on curve - if (!ep2_on_curve((ep2_st*)p)) - return INVALID; - // check p is in G2 - #if MEMBERSHIP_CHECK_G2 == EXP_ORDER - return simple_subgroup_check_G2(p); - #elif MEMBERSHIP_CHECK_G2 == BOWE - // TODO: implement Bowe's check - return UNDEFINED; - #else - return UNDEFINED; - #endif -#endif - return VALID; -} - -// Computes a BLS signature from a G1 point -static void bls_sign_ep(byte* s, const bn_t sk, const ep_t h) { - ep_t p; - ep_new(p); - // s = h^sk - ep_mult(p, h, sk); - ep_write_bin_compact(s, p, SIGNATURE_LEN); - ep_free(p); -} - -// Computes a BLS signature from a hash -void bls_sign(byte* s, const bn_t sk, const byte* data, const int len) { - ep_t h; - ep_new(h); - // hash to G1 - map_to_G1(h, data, len); - // s = h^sk - bls_sign_ep(s, sk, h); - ep_free(h); -} +extern const E2 *BLS12_381_minus_g2; // Verifies a BLS signature (G1 point) against a public key (G2 point) -// and a message data. -// The signature and public key are assumed to be in G1 and G2 respectively. This -// function only checks the pairing equality. -static int bls_verify_ep(const ep2_t pk, const ep_t s, const byte* data, const int len) { - - ep_t elemsG1[2]; - ep2_t elemsG2[2]; - - // elemsG1[0] = s - ep_new(elemsG1[0]); - ep_copy(elemsG1[0], (ep_st*)s); - - // elemsG1[1] = h - ep_new(elemsG1[1]); - // hash to G1 - map_to_G1(elemsG1[1], data, len); - - // elemsG2[1] = pk - ep2_new(elemsG2[1]); - ep2_copy(elemsG2[1], (ep2_st*)pk); - ep2_new(&elemsG2[0]); - - int ret = UNDEFINED; - -#if DOUBLE_PAIRING - // elemsG2[0] = -g2 - ep2_neg(elemsG2[0], core_get()->ep2_g); // could be hardcoded - - fp12_t pair; - fp12_new(&pair); - // double pairing with Optimal Ate - pp_map_sim_oatep_k12(pair, (ep_t*)(elemsG1) , (ep2_t*)(elemsG2), 2); - - // compare the result to 1 - int res = fp12_cmp_dig(pair, 1); - -#elif SINGLE_PAIRING - fp12_t pair1, pair2; - fp12_new(&pair1); fp12_new(&pair2); - pp_map_oatep_k12(pair1, elemsG1[0], core_get()->ep2_g); - pp_map_oatep_k12(pair2, elemsG1[1], elemsG2[1]); - - int res = fp12_cmp(pair1, pair2); -#endif - if (core_get()->code == RLC_OK) { - if (res == RLC_EQ) { - ret = VALID; - goto out; - } else { - ret = INVALID; - goto out; - } - } - -out: - ep_free(elemsG1[0]); - ep_free(elemsG1[1]); - ep2_free(elemsG2[0]); - ep2_free(elemsG2[1]); - - return ret; +// and a message hash `h` (G1 point). +// Hash, signature and public key are assumed to be in G1, G1 and G2 +// respectively. +// This function only checks the pairing equality. +static int bls_verify_E1(const E2 *pk, const E1 *s, const E1 *h) { + E1 elemsG1[2]; + E2 elemsG2[2]; + + // elemsG1[0] = s, elemsG1[1] = h + E1_copy(&elemsG1[0], s); + E1_copy(&elemsG1[1], h); + + // elemsG2[0] = -g2, elemsG2[1] = pk + E2_copy(&elemsG2[0], BLS12_381_minus_g2); + E2_copy(&elemsG2[1], pk); + + // double pairing + Fp12 e; + Fp12_multi_pairing(&e, elemsG1, elemsG2, 2); + if (Fp12_is_one(&e)) { + return VALID; + } + return INVALID; } - // Verifies the validity of an aggregated BLS signature under distinct messages. // -// Each message is mapped to a set of public keys, so that the verification equation is -// optimized to compute one pairing per message. +// Each message is mapped to a set of public keys, so that the verification +// equation is optimized to compute one pairing per message. // - sig is the signature. // - nb_hashes is the number of the messages (hashes) in the map -// - hashes is pointer to all flattened hashes in order where the hash at index i has a byte length len_hashes[i], -// is mapped to pks_per_hash[i] public keys. +// - hashes is pointer to all flattened hashes in order where the hash at index +// i has a byte length len_hashes[i], +// is mapped to pks_per_hash[i] public keys. // - the keys are flattened in pks in the same hashes order. // // membership check of the signature in G1 is verified in this function // membership check of pks in G2 is not verified in this function -// the membership check is separated to allow optimizing multiple verifications using the same pks -int bls_verifyPerDistinctMessage(const byte* sig, - const int nb_hashes, const byte* hashes, const uint32_t* len_hashes, - const uint32_t* pks_per_hash, const ep2_st* pks) { - - int ret = UNDEFINED; // return value - - ep_t* elemsG1 = (ep_t*)malloc((nb_hashes + 1) * sizeof(ep_t)); - if (!elemsG1) goto outG1; - ep2_t* elemsG2 = (ep2_t*)malloc((nb_hashes + 1) * sizeof(ep2_t)); - if (!elemsG2) goto outG2; - - for (int i=0; i < nb_hashes+1; i++) { - ep_new(elemsG1[i]); - ep2_new(elemsG2[i]); - } - - // elemsG1[0] = sig - ret = ep_read_bin_compact(elemsG1[0], sig, SIGNATURE_LEN); - if (ret != RLC_OK) goto out; - - // check s is in G1 - ret = check_membership_G1(elemsG1[0]); // only enabled if MEMBERSHIP_CHECK==1 - if (ret != VALID) goto out; - - // elemsG2[0] = -g2 - ep2_neg(elemsG2[0], core_get()->ep2_g); // could be hardcoded - - // map all hashes to G1 - int offset = 0; - for (int i=1; i < nb_hashes+1; i++) { - // elemsG1[i] = h - // hash to G1 - map_to_G1(elemsG1[i], &hashes[offset], len_hashes[i-1]); - offset += len_hashes[i-1]; - } - - // aggregate public keys mapping to the same hash - offset = 0; - for (int i=1; i < nb_hashes+1; i++) { - // elemsG2[i] = agg_pk[i] - ep2_sum_vector(elemsG2[i], (ep2_st*) &pks[offset] , pks_per_hash[i-1]); - offset += pks_per_hash[i-1]; - } - - fp12_t pair; - fp12_new(&pair); - // double pairing with Optimal Ate - pp_map_sim_oatep_k12(pair, (ep_t*)(elemsG1) , (ep2_t*)(elemsG2), nb_hashes+1); - - // compare the result to 1 - int cmp_res = fp12_cmp_dig(pair, 1); - - if (core_get()->code == RLC_OK) { - if (cmp_res == RLC_EQ) ret = VALID; - else ret = INVALID; - } else { - ret = UNDEFINED; - } +// the membership check is separated to allow optimizing multiple verifications +// using the same pks +int bls_verifyPerDistinctMessage(const byte *sig, const int nb_hashes, + const byte *hashes, const uint32_t *len_hashes, + const uint32_t *pks_per_hash, const E2 *pks) { + + int ret = UNDEFINED; // return value + + E1 *elemsG1 = (E1 *)malloc((nb_hashes + 1) * sizeof(E1)); + if (!elemsG1) + goto outG1; + E2 *elemsG2 = (E2 *)malloc((nb_hashes + 1) * sizeof(E2)); + if (!elemsG2) + goto outG2; + + // elemsG1[0] = sig + if (E1_read_bytes(&elemsG1[0], sig, G1_SER_BYTES) != VALID) { + ret = INVALID; + goto out; + } + + // check signature is in G1 + if (!E1_in_G1(&elemsG1[0])) { + ret = INVALID; + goto out; + } + + // elemsG2[0] = -g2 + E2_copy(&elemsG2[0], BLS12_381_minus_g2); + + // map all hashes to G1 + int offset = 0; + for (int i = 1; i < nb_hashes + 1; i++) { + // elemsG1[i] = h + // hash to G1 + map_to_G1(&elemsG1[i], &hashes[offset], len_hashes[i - 1]); + offset += len_hashes[i - 1]; + } + + // aggregate public keys mapping to the same hash + offset = 0; + for (int i = 1; i < nb_hashes + 1; i++) { + // elemsG2[i] = agg_pk[i] + E2_sum_vector(&elemsG2[i], &pks[offset], pks_per_hash[i - 1]); + offset += pks_per_hash[i - 1]; + } + + // multi pairing + Fp12 e; + Fp12_multi_pairing(&e, elemsG1, elemsG2, nb_hashes + 1); + if (Fp12_is_one(&e)) { + ret = VALID; + } else { + ret = INVALID; + } out: - for (int i=0; i < nb_hashes+1; i++) { - ep_free(elemsG1[i]); - ep2_free(elemsG2[i]); - } - free(elemsG2); + free(elemsG2); outG2: - free(elemsG1); + free(elemsG1); outG1: - return ret; + return ret; } - -// Verifies the validity of an aggregated BLS signature under distinct public keys. +// Verifies the validity of an aggregated BLS signature under distinct public +// keys. // -// Each key is mapped to a set of messages, so that the verification equation is -// optimized to compute one pairing per public key. +// Each key is mapped to a set of messages, so that the verification equation is +// optimized to compute one pairing per public key. // - nb_pks is the number of the public keys in the map. // - pks is pointer to all pks in order where the key at index i -// is mapped to hashes_per_pk[i] hashes. +// is mapped to hashes_per_pk[i] hashes. // - the messages (hashes) are flattened in hashes in the same public key order, // each with a length in len_hashes. // // membership check of the signature in G1 is verified in this function // membership check of pks in G2 is not verified in this function -// the membership check is separated to allow optimizing multiple verifications using the same pks -int bls_verifyPerDistinctKey(const byte* sig, - const int nb_pks, const ep2_st* pks, const uint32_t* hashes_per_pk, - const byte* hashes, const uint32_t* len_hashes){ - - int ret = UNDEFINED; // return value - - ep_t* elemsG1 = (ep_t*)malloc((nb_pks + 1) * sizeof(ep_t)); - if (!elemsG1) goto outG1; - ep2_t* elemsG2 = (ep2_t*)malloc((nb_pks + 1) * sizeof(ep2_t)); - if (!elemsG2) goto outG2; - for (int i=0; i < nb_pks+1; i++) { - ep_new(elemsG1[i]); - ep2_new(elemsG2[i]); +// the membership check is separated to allow optimizing multiple verifications +// using the same pks +int bls_verifyPerDistinctKey(const byte *sig, const int nb_pks, const E2 *pks, + const uint32_t *hashes_per_pk, const byte *hashes, + const uint32_t *len_hashes) { + + int ret = UNDEFINED; // return value + + E1 *elemsG1 = (E1 *)malloc((nb_pks + 1) * sizeof(E1)); + if (!elemsG1) + goto outG1; + E2 *elemsG2 = (E2 *)malloc((nb_pks + 1) * sizeof(E2)); + if (!elemsG2) + goto outG2; + + // elemsG1[0] = s + if (E1_read_bytes(&elemsG1[0], sig, G1_SER_BYTES) != VALID) { + ret = INVALID; + goto out; + } + + // check s in G1 + if (!E1_in_G1(&elemsG1[0])) { + ret = INVALID; + goto out; + } + + // elemsG2[0] = -g2 + E2_copy(&elemsG2[0], BLS12_381_minus_g2); + + // set the public keys + for (int i = 1; i < nb_pks + 1; i++) { + E2_copy(&elemsG2[i], &pks[i - 1]); + } + + // map all hashes to G1 and aggregate the ones with the same public key + + // tmp_hashes is a temporary array of all hashes under a same key mapped to a + // G1 point. tmp_hashes size is set to the maximum possible size to minimize + // malloc calls. + int tmp_hashes_size = hashes_per_pk[0]; + for (int i = 1; i < nb_pks; i++) { + if (hashes_per_pk[i] > tmp_hashes_size) { + tmp_hashes_size = hashes_per_pk[i]; } - - // elemsG1[0] = s - ret = ep_read_bin_compact(elemsG1[0], sig, SIGNATURE_LEN); - if (ret != RLC_OK) goto out; - - // check s in G1 - ret = check_membership_G1(elemsG1[0]); // only enabled if MEMBERSHIP_CHECK==1 - if (ret != VALID) goto out; - - // elemsG2[0] = -g2 - ep2_neg(elemsG2[0], core_get()->ep2_g); // could be hardcoded - - // set the public keys - for (int i=1; i < nb_pks+1; i++) { - ep2_copy(elemsG2[i], (ep2_st*) &pks[i-1]); + } + E1 *tmp_hashes = (E1 *)malloc(tmp_hashes_size * sizeof(E1)); + if (!tmp_hashes) { + ret = UNDEFINED; + goto out; + } + + // sum hashes under the same key + int data_offset = 0; + int index_offset = 0; + for (int i = 1; i < nb_pks + 1; i++) { + for (int j = 0; j < hashes_per_pk[i - 1]; j++) { + // map the hash to G1 + map_to_G1(&tmp_hashes[j], &hashes[data_offset], len_hashes[index_offset]); + data_offset += len_hashes[index_offset]; + index_offset++; } + // aggregate all the points of the array + E1_sum_vector(&elemsG1[i], tmp_hashes, hashes_per_pk[i - 1]); + } + free(tmp_hashes); - // map all hashes to G1 and aggregate the ones with the same public key - - // tmp_hashes is a temporary array of all hashes under a same key mapped to a G1 point. - // tmp_hashes size is set to the maximum possible size to minimize malloc calls. - int tmp_hashes_size = hashes_per_pk[0]; - for (int i=1; i tmp_hashes_size) - tmp_hashes_size = hashes_per_pk[i]; - ep_st* tmp_hashes = (ep_st*)malloc(tmp_hashes_size * sizeof(ep_st)); - if (!tmp_hashes) { - ret = UNDEFINED; - goto out; - } + // multi pairing + Fp12 e; + Fp12_multi_pairing(&e, elemsG1, elemsG2, nb_pks + 1); - // sum hashes under the same key - for (int i=0; icode == RLC_OK) { - if (cmp_res == RLC_EQ) ret = VALID; - else ret = INVALID; - } else { - ret = UNDEFINED; - } + if (Fp12_is_one(&e)) { + ret = VALID; + } else { + ret = INVALID; + } out: - for (int i=0; i < nb_pks+1; i++) { - ep_free(elemsG1[i]); - ep2_free(elemsG2[i]); - } - free(elemsG2); + free(elemsG2); outG2: - free(elemsG1); + free(elemsG1); outG1: - return ret; + return ret; } // Verifies a BLS signature in a byte buffer. // membership check of the signature in G1 is verified. // membership check of pk in G2 is not verified in this function. -// the membership check in G2 is separated to allow optimizing multiple verifications using the same key. -int bls_verify(const ep2_t pk, const byte* sig, const byte* data, const int len) { - ep_t s; - ep_new(s); - - // deserialize the signature into a curve point - int read_ret = ep_read_bin_compact(s, sig, SIGNATURE_LEN); - if (read_ret != RLC_OK) - return read_ret; - - // check s is in G1 - if (check_membership_G1(s) != VALID) // only enabled if MEMBERSHIP_CHECK==1 - return INVALID; - - return bls_verify_ep(pk, s, data, len); +// the membership check in G2 is separated to optimize multiple verifications +// using the same key. `hash` represents the hashed message with length +// `hash_len` equal to `MAP_TO_G1_INPUT_LEN`. +int bls_verify(const E2 *pk, const byte *sig, const byte *hash, + const int hash_len) { + E1 s, h; + // deserialize the signature into a curve point + if (E1_read_bytes(&s, sig, G1_SER_BYTES) != VALID) { + return INVALID; + } + + // check s is in G1 + if (!E1_in_G1(&s)) { + return INVALID; + } + + if (map_to_G1(&h, hash, hash_len) != VALID) { + return INVALID; + } + + return bls_verify_E1(pk, &s, &h); } // binary tree structure to be used by bls_batch verify. -// Each node contains a signature and a public key, the signature (resp. the public key) -// being the aggregated signature of the two children's signature (resp. public keys). -// The leaves contain the initial signatures and public keys. -typedef struct st_node { - ep_st* sig; - ep2_st* pk; - struct st_node* left; - struct st_node* right; +// Each node contains a signature and a public key, the signature (resp. the +// public key) being the aggregated signature of the two children's signature +// (resp. public keys). The leaves contain the initial signatures and public +// keys. +typedef struct st_node { + E1 *sig; + E2 *pk; + struct st_node *left; + struct st_node *right; } node; -static node* new_node(const ep2_st* pk, const ep_st* sig){ - node* t = (node*) malloc(sizeof(node)); - if (t) { - t->pk = (ep2_st*)pk; - t->sig = (ep_st*)sig; - t->right = t->left = NULL; - } - return t; +static node *new_node(const E2 *pk, const E1 *sig) { + node *t = (node *)malloc(sizeof(node)); + if (t) { + t->pk = (E2 *)pk; + t->sig = (E1 *)sig; + t->right = t->left = NULL; + } + return t; } -static void free_tree(node* root) { - if (!root) return; - - // only free pks and sigs of non-leafs, data of leafs are allocated - // as an entire array in `bls_batchVerify`. - if (root->left) { // no need to check the right child for the leaf check because - // the recursive build starts with the left side first - // relic free - if (root->sig) ep_free(root->sig); - if (root->pk) ep2_free(root->pk); - // pointer free - free(root->sig); - free(root->pk); - // free the children nodes - free_tree(root->left); - free_tree(root->right); - } - free(root); +static void free_tree(node *root) { + if (!root) + return; + + // only free pks and sigs of non-leafs, data of leafs are allocated + // as an entire array in `bls_batch_verify`. + if (root->left) { // no need to check the right child for the leaf check + // because + // the recursive build starts with the left side first + // pointer free + free(root->sig); + free(root->pk); + // free the children nodes + free_tree(root->left); + free_tree(root->right); + } + free(root); } -// builds a binary tree of aggregation of signatures and public keys recursively. -static node* build_tree(const int len, const ep2_st* pks, const ep_st* sigs) { - // check if a leaf is reached - if (len == 1) { - return new_node(&pks[0], &sigs[0]); // use the first element of the arrays - } - - // a leaf is not reached yet, - int right_len = len/2; - int left_len = len - right_len; - - // create a new node with new points - ep2_st* new_pk = (ep2_st*)malloc(sizeof(ep2_st)); - if (!new_pk) goto error; - ep_st* new_sig = (ep_st*)malloc(sizeof(ep_st)); - if (!new_sig) goto error_sig; - - node* t = new_node(new_pk, new_sig); - if (!t) goto error_node; - ep_new(t->sig); - ep2_new(t->pk); - - // build the tree in a top-down way - t->left = build_tree(left_len, &pks[0], &sigs[0]); - if (!t->left) { free_tree(t); goto error; } - - t->right = build_tree(right_len, &pks[left_len], &sigs[left_len]); - if (!t->right) { free_tree(t); goto error; } - // sum the children - ep_add_jacob(t->sig, t->left->sig, t->right->sig); - ep2_add_projc(t->pk, t->left->pk, t->right->pk); - return t; +// builds a binary tree of aggregation of signatures and public keys +// recursively. +static node *build_tree(const int len, const E2 *pks, const E1 *sigs) { + // check if a leaf is reached + if (len == 1) { + return new_node(&pks[0], &sigs[0]); // use the first element of the arrays + } + + // a leaf is not reached yet, + int right_len = len / 2; + int left_len = len - right_len; + + // create a new node with new points + E2 *new_pk = (E2 *)malloc(sizeof(E2)); + if (!new_pk) { + goto error; + } + E1 *new_sig = (E1 *)malloc(sizeof(E1)); + if (!new_sig) { + goto error_sig; + } + + node *t = new_node(new_pk, new_sig); + if (!t) + goto error_node; + + // build the tree in a top-down way + t->left = build_tree(left_len, &pks[0], &sigs[0]); + if (!t->left) { + free_tree(t); + goto error; + } + + t->right = build_tree(right_len, &pks[left_len], &sigs[left_len]); + if (!t->right) { + free_tree(t); + goto error; + } + // sum the children + E1_add(t->sig, t->left->sig, t->right->sig); + E2_add(t->pk, t->left->pk, t->right->pk); + return t; error_node: - free(new_sig); + free(new_sig); error_sig: - free(new_pk); + free(new_pk); error: - return NULL; + return NULL; } -// verify the binary tree and fill the results using recursive batch verifications. -static void bls_batchVerify_tree(const node* root, const int len, byte* results, - const byte* data, const int data_len) { - - // verify the aggregated signature against the aggregated public key. - int res = bls_verify_ep(root->pk, root->sig, data, data_len); - - // if the result is valid, all the subtree signatures are valid. - if (res == VALID) { - for (int i=0; i < len; i++) { - if (results[i] == UNDEFINED) results[i] = VALID; // do not overwrite invalid results - } - return; +// verify the binary tree and fill the results using recursive batch +// verifications. +static void bls_batch_verify_tree(const node *root, const int len, + byte *results, const E1 *h) { + // verify the aggregated signature against the aggregated public key. + int res = bls_verify_E1(root->pk, root->sig, h); + + // if the result is valid, all the subtree signatures are valid. + if (res == VALID) { + for (int i = 0; i < len; i++) { + if (results[i] == UNDEFINED) + results[i] = VALID; // do not overwrite invalid results } - - // check if root is a leaf - if (root->left == NULL) { // no need to check the right side - *results = INVALID; - return; - } - - // otherwise, at least one of the subtree signatures is invalid. - // use the binary tree structure to find the invalid signatures. - int right_len = len/2; - int left_len = len - right_len; - bls_batchVerify_tree(root->left, left_len, &results[0], data, data_len); - bls_batchVerify_tree(root->right, right_len, &results[left_len], data, data_len); + return; + } + + // check if root is a leaf + if (root->left == NULL) { // no need to check the right side + *results = INVALID; + return; + } + + // otherwise, at least one of the subtree signatures is invalid. + // use the binary tree structure to find the invalid signatures. + int right_len = len / 2; + int left_len = len - right_len; + bls_batch_verify_tree(root->left, left_len, &results[0], h); + bls_batch_verify_tree(root->right, right_len, &results[left_len], h); } -// Batch verifies the validity of a multiple BLS signatures of the -// same message under multiple public keys. +// Batch verifies the validity of a multiple BLS signatures of the +// same message under multiple public keys. Each signature at index `i` is +// verified against the public key at index `i`. `seed` is used as the entropy +// source for randoms required by the computation. The function assumes the +// source size is at least (16*sigs_len) of random bytes of entropy at least 128 +// bits. // // - membership checks of all signatures is verified upfront. -// - use random coefficients for signatures and public keys at the same index. -// - optimize the verification by verifying an aggregated signature against an aggregated -// public key, and use a recursive verification to find invalid signatures. -void bls_batchVerify(const int sigs_len, byte* results, const ep2_st* pks_input, - const byte* sigs_bytes, const byte* data, const int data_len) { - - // initialize results to undefined - memset(results, UNDEFINED, sigs_len); - - // build the arrays of G1 and G2 elements to verify - ep2_st* pks = (ep2_st*) malloc(sigs_len * sizeof(ep2_st)); - if (!pks) return; - ep_st* sigs = (ep_st*) malloc(sigs_len * sizeof(ep_st)); - if (!sigs) goto out_sigs; - for (int i=0; i < sigs_len; i++) { - ep_new(sigs[i]); - ep2_new(pks[i]); - } - bn_t r; bn_new(r); - - for (int i=0; i < sigs_len; i++) { - // convert the signature points: - // - invalid points are stored as infinity points with an invalid result, so that - // the tree aggregations remain valid. - // - valid points are multiplied by a random scalar (same for public keys at same index) - // to make sure a signature at index (i) is verified against the public key at the same index. - int read_ret = ep_read_bin_compact(&sigs[i], &sigs_bytes[SIGNATURE_LEN*i], SIGNATURE_LEN); - if ( read_ret != RLC_OK || check_membership_G1(&sigs[i]) != VALID) { - if (read_ret == UNDEFINED) // unexpected error case - goto out; - // set signature and key to infinity (no effect on the aggregation tree) - // and set result to invalid - ep_set_infty(&sigs[i]); - ep2_set_infty(&pks[i]); - results[i] = INVALID; - // multiply signatures and public keys at the same index by random coefficients - } else { - // random non-zero coefficient of a least 128 bits - bn_rand(r, RLC_POS, SEC_BITS); - bn_add_dig(r, r, 1); - ep_mul_lwnaf(&sigs[i], &sigs[i], r); - ep2_mul_lwnaf(&pks[i], (ep2_st*) &pks_input[i], r); - } +// - use random coefficients for signatures and public keys at the same index to +// prevent +// indices mixup. +// - optimize the verification by verifying an aggregated signature against an +// aggregated +// public key, and use a top-down recursive verification to find invalid +// signatures. +void bls_batch_verify(const int sigs_len, byte *results, const E2 *pks_input, + const byte *sigs_bytes, const byte *data, + const int data_len, const byte *seed) { + + // initialize results to undefined + memset(results, UNDEFINED, sigs_len); + + // build the arrays of G1 and G2 elements to verify + E2 *pks = (E2 *)malloc(sigs_len * sizeof(E2)); + if (!pks) { + return; + } + E1 *sigs = (E1 *)malloc(sigs_len * sizeof(E1)); + if (!sigs) { + goto out_sigs; + } + + E1 h; + if (map_to_G1(&h, data, data_len) != VALID) { + goto out; + } + + for (int i = 0; i < sigs_len; i++) { + // convert the signature points: + // - invalid points are stored as infinity points with an invalid result, so + // that the tree aggregations remain valid. + // - valid points are multiplied by a random scalar (same for public keys at + // same index) to make sure a signature at index (i) is verified against the + // public key at the same index. + int read_ret = + E1_read_bytes(&sigs[i], &sigs_bytes[G1_SER_BYTES * i], G1_SER_BYTES); + if (read_ret != VALID || !E1_in_G1(&sigs[i])) { + // set signature and key to infinity (no effect on the aggregation tree) + // and set result to invalid (result won't be overwritten) + E2_set_infty(&pks[i]); + E1_set_infty(&sigs[i]); + results[i] = INVALID; + } else { + // choose a random non-zero coefficient of at least 128 bits + Fr r, one; + // r = random, i-th seed is used for i-th signature + Fr_set_zero(&r); + const int seed_len = SEC_BITS / 8; + limbs_from_be_bytes((limb_t *)&r, seed + (seed_len * i), + seed_len); // faster shortcut than Fr_map_bytes + // r = random + 1 + Fr_set_limb(&one, 1); + Fr_add(&r, &r, &one); + // multiply public key and signature by the same random exponent r + E2_mult(&pks[i], &pks_input[i], &r); + E1_mult(&sigs[i], &sigs[i], &r); } - - // build a binary tree of aggreagtions - node* root = build_tree(sigs_len, &pks[0], &sigs[0]); - if (!root) goto out; - - // verify the binary tree and fill the results using batch verification - bls_batchVerify_tree(root, sigs_len, &results[0], data, data_len); - // free the allocated tree - free_tree(root); - + } + // build a binary tree of aggregations + node *root = build_tree(sigs_len, &pks[0], &sigs[0]); + if (!root) { + goto out; + } + + // verify the binary tree and fill the results using batch verification + bls_batch_verify_tree(root, sigs_len, &results[0], &h); + // free the allocated tree + free_tree(root); out: - bn_free(r); - for (int i=0; i < sigs_len; i++) { - ep_free(sigs[i]); - ep2_free(pks[i]); - } - free(sigs); + free(sigs); out_sigs: - free(pks); + free(pks); +} + +// Verifies the validity of 2 SPoCK proofs and 2 public keys. +// Membership check in G1 of both proofs is verified in this function. +// Membership check in G2 of both keys is not verified in this function. +// the membership check in G2 is separated to allow optimizing multiple +// verifications using the same public keys. +int bls_spock_verify(const E2 *pk1, const byte *sig1, const E2 *pk2, + const byte *sig2) { + E1 elemsG1[2]; + E2 elemsG2[2]; + + // elemsG1[0] = s1 + if (E1_read_bytes(&elemsG1[0], sig1, G1_SER_BYTES) != VALID) { + return INVALID; + }; + // check s1 is in G1 + if (!E1_in_G1(&elemsG1[0])) { + return INVALID; + } + + // elemsG1[1] = s2 + if (E1_read_bytes(&elemsG1[1], sig2, G1_SER_BYTES) != VALID) { + return INVALID; + }; + // check s2 is in G1 + if (!E1_in_G1(&elemsG1[1])) { + return INVALID; + } + + // elemsG2[1] = pk1 + E2_copy(&elemsG2[1], pk1); + + // elemsG2[0] = -pk2 + E2_neg(&elemsG2[0], pk2); + + // double pairing + Fp12 e; + Fp12_multi_pairing(&e, elemsG1, elemsG2, 2); + + if (Fp12_is_one(&e)) { + return VALID; + } + return INVALID; } diff --git a/crypto/bls_crossBLST_test.go b/crypto/bls_crossBLST_test.go index 5ac9e996cc1..3b3939eaf6c 100644 --- a/crypto/bls_crossBLST_test.go +++ b/crypto/bls_crossBLST_test.go @@ -1,6 +1,3 @@ -//go:build relic -// +build relic - package crypto // This file contains tests against the library BLST (https://github.com/supranational/blst). @@ -21,8 +18,9 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" - blst "github.com/supranational/blst/bindings/go" "pgregory.net/rapid" + + "github.com/onflow/flow-go/crypto/internal/blst" ) // validPrivateKeyBytesFlow generates bytes of a valid private key in Flow library @@ -82,7 +80,7 @@ func validSignatureBytesBLST(t *rapid.T) []byte { // testEncodeDecodePrivateKeyCrossBLST tests encoding and decoding of private keys are consistent with BLST. // This test assumes private key serialization is identical to the one in BLST. func testEncodeDecodePrivateKeyCrossBLST(t *rapid.T) { - randomSlice := rapid.SliceOfN(rapid.Byte(), prKeyLengthBLSBLS12381, prKeyLengthBLSBLS12381) + randomSlice := rapid.SliceOfN(rapid.Byte(), PrKeyLenBLSBLS12381, PrKeyLenBLSBLS12381) validSliceFlow := rapid.Custom(validPrivateKeyBytesFlow) validSliceBLST := rapid.Custom(validPrivateKeyBytesBLST) // skBytes are bytes of either a valid or a random private key @@ -129,39 +127,36 @@ func testEncodeDecodePublicKeyCrossBLST(t *rapid.T) { if flowPass && blstPass { pkFlowOutBytes := pkFlow.Encode() pkBLSTOutBytes := pkBLST.Compress() - assert.Equal(t, pkFlowOutBytes, pkBLSTOutBytes) } } -// testEncodeDecodeSignatureCrossBLST tests encoding and decoding of signatures are consistent with BLST. -// This test assumes signature serialization is identical to the one in BLST. -func testEncodeDecodeSignatureCrossBLST(t *rapid.T) { - randomSlice := rapid.SliceOfN(rapid.Byte(), SignatureLenBLSBLS12381, SignatureLenBLSBLS12381) +// testEncodeDecodeG1CrossBLST tests encoding and decoding of G1 points are consistent with BLST. +// This test assumes signature serialization is identical to BLST. +func testEncodeDecodeG1CrossBLST(t *rapid.T) { + randomSlice := rapid.SliceOfN(rapid.Byte(), g1BytesLen, g1BytesLen) validSignatureFlow := rapid.Custom(validSignatureBytesFlow) validSignatureBLST := rapid.Custom(validSignatureBytesBLST) - // sigBytes are bytes of either a valid or a random signature + // sigBytes are bytes of either a valid serialization of a E1/G1 point, or random bytes sigBytes := rapid.OneOf(randomSlice, validSignatureFlow, validSignatureBLST).Example().([]byte) // check decoding results are consistent - var pointFlow pointG1 - // here we test readPointG1 rather than the simple Signature type alias - err := readPointG1(&pointFlow, sigBytes) - flowPass := (err == nil) && (checkMembershipG1(&pointFlow) == int(valid)) + var pointFlow pointE1 + err := readPointE1(&pointFlow, sigBytes) + flowPass := (err == nil) && (checkMembershipG1(&pointFlow)) var pointBLST blst.P1Affine + // res is non-nil iff point is in G1 res := pointBLST.Uncompress(sigBytes) - // flow validation has no infinity rejection for G1 blstPass := (res != nil) && pointBLST.SigValidate(false) - require.Equal(t, flowPass, blstPass, "deserialization of signature %x differs", sigBytes) + require.Equal(t, flowPass, blstPass, "deserialization of G1 %x differs", sigBytes) - // check both signatures (G1 points) are equal + // check both serializations of G1 points are equal if flowPass && blstPass { - sigFlowOutBytes := make([]byte, signatureLengthBLSBLS12381) - writePointG1(sigFlowOutBytes, &pointFlow) + sigFlowOutBytes := make([]byte, g1BytesLen) + writePointE1(sigFlowOutBytes, &pointFlow) sigBLSTOutBytes := pointBLST.Compress() - assert.Equal(t, sigFlowOutBytes, sigBLSTOutBytes) } } @@ -177,10 +172,10 @@ func testEncodeDecodeSignatureCrossBLST(t *rapid.T) { // // The test also assumes Flow signature serialization is identical to the one in BLST. func testSignHashCrossBLST(t *rapid.T) { - // generate two private keys from the same seed + // decode two private keys from the same bytes skBytes := rapid.Custom(validPrivateKeyBytesFlow).Example().([]byte) - skFlow, err := DecodePrivateKey(BLSBLS12381, skBytes) + require.NoError(t, err) var skBLST blst.Scalar res := skBLST.Deserialize(skBytes) @@ -195,7 +190,7 @@ func testSignHashCrossBLST(t *rapid.T) { sigBytesBLST := sigBLST.Compress() skFlowBLS, ok := skFlow.(*prKeyBLSBLS12381) - require.True(t, ok, "incoherent key type assertion") + require.True(t, ok) sigFlow := skFlowBLS.signWithXMDSHA256(message) sigBytesFlow := sigFlow.Bytes() @@ -214,10 +209,10 @@ func testKeyGenCrossBLST(t *rapid.T) { assert.Equal(t, skFlow.Encode(), skBLST.Serialize()) } -func TestAgainstBLST(t *testing.T) { +func TestCrossBLST(t *testing.T) { rapid.Check(t, testKeyGenCrossBLST) rapid.Check(t, testEncodeDecodePrivateKeyCrossBLST) rapid.Check(t, testEncodeDecodePublicKeyCrossBLST) - rapid.Check(t, testEncodeDecodeSignatureCrossBLST) + rapid.Check(t, testEncodeDecodeG1CrossBLST) rapid.Check(t, testSignHashCrossBLST) } diff --git a/crypto/bls_include.h b/crypto/bls_include.h index 016845719e1..af380735237 100644 --- a/crypto/bls_include.h +++ b/crypto/bls_include.h @@ -1,48 +1,22 @@ -// +build relic - // this file is about the core functions required by the BLS signature scheme -#ifndef _REL_BLS_INCLUDE_H -#define _REL_BLS_INCLUDE_H +#ifndef _BLS_INCLUDE_H +#define _BLS_INCLUDE_H -#include "relic.h" #include "bls12381_utils.h" -// Signature, Public key and Private key lengths -#define FULL_SIGNATURE_LEN G1_BYTES -#define FULL_PK_LEN G2_BYTES -#define SIGNATURE_LEN (FULL_SIGNATURE_LEN/(G1_SERIALIZATION+1)) -#define PK_LEN (FULL_PK_LEN/(G2_SERIALIZATION+1)) -#define SK_BITS (Fr_BITS) -#define SK_LEN BITS_TO_BYTES(SK_BITS) - -// Simultaneous Pairing in verification -#define DOUBLE_PAIRING 1 -#define SINGLE_PAIRING (DOUBLE_PAIRING^1) - -// Signature and public key membership check -#define MEMBERSHIP_CHECK 1 - -// algorithm choice for the hashing to G1 -// both methods are similar implementations of the same optimzed SSWU -// but offer different timings. -#define RELIC_SSWU 1 // relic library implementation -#define LOCAL_SSWU 2 // local implementation -#define hashToPoint LOCAL_SSWU - -// bls core (functions in bls_core.c) -int get_signature_len(); -int get_pk_len(); -int get_sk_len(); - -void bls_sign(byte*, const bn_t, const byte*, const int); -int bls_verify(const ep2_t, const byte*, const byte*, const int); -int bls_verifyPerDistinctMessage(const byte*, const int, const byte*, const uint32_t*, - const uint32_t*, const ep2_st*); -int bls_verifyPerDistinctKey(const byte*, - const int, const ep2_st*, const uint32_t*, - const byte*, const uint32_t*); -void bls_batchVerify(const int, byte*, const ep2_st*, - const byte*, const byte*, const int); +// BLS signature core (functions in bls_core.c) +int bls_sign(byte *, const Fr *, const byte *, const int); +int bls_verify(const E2 *, const byte *, const byte *, const int); +int bls_verifyPerDistinctMessage(const byte *, const int, const byte *, + const uint32_t *, const uint32_t *, + const E2 *); +int bls_verifyPerDistinctKey(const byte *, const int, const E2 *, + const uint32_t *, const byte *, const uint32_t *); +void bls_batch_verify(const int, byte *, const E2 *, const byte *, const byte *, + const int, const byte *); + +// BLS based SPoCK +int bls_spock_verify(const E2 *, const byte *, const E2 *, const byte *); #endif diff --git a/crypto/bls_multisig.go b/crypto/bls_multisig.go index af2c6ce2f3c..ea534f790f1 100644 --- a/crypto/bls_multisig.go +++ b/crypto/bls_multisig.go @@ -1,9 +1,7 @@ -//go:build relic -// +build relic - package crypto import ( + "crypto/rand" "errors" "fmt" @@ -12,23 +10,23 @@ import ( // BLS multi-signature using BLS12-381 curve // ([zcash]https://github.com/zkcrypto/pairing/blob/master/src/bls12_381/README.md#bls12-381) -// Pairing, ellipic curve and modular arithmetic is using Relic library. -// This implementation does not include any security against side-channel attacks. +// Pairing, ellipic curve and modular arithmetic are using [BLST](https://github.com/supranational/blst/tree/master/src) +// tools underneath. +// This implementation does not include any security against side-channel side-channel or fault attacks. -// existing features: +// Existing features: // - the same BLS set-up in bls.go // - Use the proof of possession scheme (PoP) to prevent against rogue public-key attack. -// - Non-interactive aggregation of private keys, public keys and signatures. -// - Non-interactive subtraction of multiple public keys from an (aggregated) public key. +// - Aggregation of private keys, public keys and signatures. +// - Subtraction of multiple public keys from an (aggregated) public key. // - Multi-signature verification of an aggregated signature of a single message // under multiple public keys. // - Multi-signature verification of an aggregated signature of multiple messages under // multiple public keys. // - batch verification of multiple signatures of a single message under multiple -// public keys: use a binary tree of aggregations to find the invalid signatures. +// public keys, using a binary tree of aggregations. -// #cgo CFLAGS: -g -Wall -std=c99 -// #cgo LDFLAGS: -L${SRCDIR}/relic/build/lib -l relic_s +// #include "bls12381_utils.h" // #include "bls_include.h" import "C" @@ -92,29 +90,26 @@ func BLSVerifyPOP(pk PublicKey, s Signature) (bool, error) { // - (nil, error) if an unexpected error occurs // - (aggregated_signature, nil) otherwise func AggregateBLSSignatures(sigs []Signature) (Signature, error) { - // set BLS context - blsInstance.reInit() - // check for empty list if len(sigs) == 0 { return nil, blsAggregateEmptyListError } // flatten the shares (required by the C layer) - flatSigs := make([]byte, 0, signatureLengthBLSBLS12381*len(sigs)) + flatSigs := make([]byte, 0, SignatureLenBLSBLS12381*len(sigs)) for i, sig := range sigs { - if len(sig) != signatureLengthBLSBLS12381 { + if len(sig) != SignatureLenBLSBLS12381 { return nil, fmt.Errorf("signature at index %d has an invalid length: %w", i, invalidSignatureError) } flatSigs = append(flatSigs, sig...) } - aggregatedSig := make([]byte, signatureLengthBLSBLS12381) + aggregatedSig := make([]byte, SignatureLenBLSBLS12381) // add the points in the C layer - result := C.ep_sum_vector_byte( + result := C.E1_sum_vector_byte( (*C.uchar)(&aggregatedSig[0]), (*C.uchar)(&flatSigs[0]), - (C.int)(len(sigs))) + (C.int)(len(flatSigs))) switch result { case valid: @@ -139,9 +134,6 @@ func AggregateBLSSignatures(sigs []Signature) (Signature, error) { // - (nil, blsAggregateEmptyListError) if no keys are provided (input slice is empty) // - (aggregated_key, nil) otherwise func AggregateBLSPrivateKeys(keys []PrivateKey) (PrivateKey, error) { - // set BLS context - blsInstance.reInit() - // check for empty list if len(keys) == 0 { return nil, blsAggregateEmptyListError @@ -157,8 +149,7 @@ func AggregateBLSPrivateKeys(keys []PrivateKey) (PrivateKey, error) { } var sum scalar - C.bn_new_wrapper((*C.bn_st)(&sum)) - C.bn_sum_vector((*C.bn_st)(&sum), (*C.bn_st)(&scalars[0]), + C.Fr_sum_vector((*C.Fr)(&sum), (*C.Fr)(&scalars[0]), (C.int)(len(scalars))) return newPrKeyBLSBLS12381(&sum), nil } @@ -177,15 +168,13 @@ func AggregateBLSPrivateKeys(keys []PrivateKey) (PrivateKey, error) { // - (nil, blsAggregateEmptyListError) no keys are provided (input slice is empty) // - (aggregated_key, nil) otherwise func AggregateBLSPublicKeys(keys []PublicKey) (PublicKey, error) { - // set BLS context - blsInstance.reInit() // check for empty list if len(keys) == 0 { return nil, blsAggregateEmptyListError } - points := make([]pointG2, 0, len(keys)) + points := make([]pointE2, 0, len(keys)) for i, pk := range keys { pkBLS, ok := pk.(*pubKeyBLSBLS12381) if !ok { @@ -194,8 +183,8 @@ func AggregateBLSPublicKeys(keys []PublicKey) (PublicKey, error) { points = append(points, pkBLS.point) } - var sum pointG2 - C.ep2_sum_vector((*C.ep2_st)(&sum), (*C.ep2_st)(&points[0]), + var sum pointE2 + C.E2_sum_vector_to_affine((*C.E2)(&sum), (*C.E2)(&points[0]), (C.int)(len(points))) sumKey := newPubKeyBLSBLS12381(&sum) @@ -203,16 +192,9 @@ func AggregateBLSPublicKeys(keys []PublicKey) (PublicKey, error) { } // IdentityBLSPublicKey returns an identity public key which corresponds to the point -// at infinity in G2 (identity element of G2). +// at infinity in G2 (identity element g2). func IdentityBLSPublicKey() PublicKey { - // set BLS context - blsInstance.reInit() - - identity := *newPubKeyBLSBLS12381(nil) - // set the point to infinity - C.ep2_set_infty((*C.ep2_st)(&identity.point)) - identity.isIdentity = true - return &identity + return &g2PublicKey } // RemoveBLSPublicKeys removes multiple BLS public keys from a given (aggregated) public key. @@ -230,15 +212,13 @@ func IdentityBLSPublicKey() PublicKey { // - (nil, notBLSKeyError) if at least one input key is not of type BLS BLS12-381 // - (remaining_key, nil) otherwise func RemoveBLSPublicKeys(aggKey PublicKey, keysToRemove []PublicKey) (PublicKey, error) { - // set BLS context - blsInstance.reInit() aggPKBLS, ok := aggKey.(*pubKeyBLSBLS12381) if !ok { return nil, notBLSKeyError } - pointsToSubtract := make([]pointG2, 0, len(keysToRemove)) + pointsToSubtract := make([]pointE2, 0, len(keysToRemove)) for i, pk := range keysToRemove { pkBLS, ok := pk.(*pubKeyBLSBLS12381) if !ok { @@ -252,9 +232,9 @@ func RemoveBLSPublicKeys(aggKey PublicKey, keysToRemove []PublicKey) (PublicKey, return aggKey, nil } - var resultPoint pointG2 - C.ep2_subtract_vector((*C.ep2_st)(&resultPoint), (*C.ep2_st)(&aggPKBLS.point), - (*C.ep2_st)(&pointsToSubtract[0]), (C.int)(len(pointsToSubtract))) + var resultPoint pointE2 + C.E2_subtract_vector((*C.E2)(&resultPoint), (*C.E2)(&aggPKBLS.point), + (*C.E2)(&pointsToSubtract[0]), (C.int)(len(pointsToSubtract))) resultKey := newPubKeyBLSBLS12381(&resultPoint) return resultKey, nil @@ -330,11 +310,9 @@ func VerifyBLSSignatureOneMessage( func VerifyBLSSignatureManyMessages( pks []PublicKey, s Signature, messages [][]byte, kmac []hash.Hasher, ) (bool, error) { - // set BLS context - blsInstance.reInit() // check signature length - if len(s) != signatureLengthBLSBLS12381 { + if len(s) != SignatureLenBLSBLS12381 { return false, nil } // check the list lengths @@ -363,13 +341,13 @@ func VerifyBLSSignatureManyMessages( // The comparison of the maps length minimizes the number of pairings to // compute by aggregating either public keys or the message hashes in // the verification equation. - mapPerHash := make(map[string][]pointG2) - mapPerPk := make(map[pointG2][][]byte) + mapPerHash := make(map[string][]pointE2) + mapPerPk := make(map[pointE2][][]byte) // Note: mapPerPk is using a cgo structure as map keys which may lead to 2 equal public keys // being considered distinct. This does not make the verification equation wrong but leads to // computing extra pairings. This case is considered unlikely to happen since a caller is likely // to use the same struct for a same public key. - // One way to fix this is to use the public key encoding as the map keys and store the "pointG2" + // One way to fix this is to use the public key encoding as the map keys and store the "pointE2" // structure with the map value, which adds more complexity and processing time. // fill the 2 maps @@ -397,7 +375,7 @@ func VerifyBLSSignatureManyMessages( flatDistinctHashes := make([]byte, 0) lenHashes := make([]uint32, 0) pkPerHash := make([]uint32, 0, len(mapPerHash)) - allPks := make([]pointG2, 0) + allPks := make([]pointE2, 0) for hash, pksVal := range mapPerHash { flatDistinctHashes = append(flatDistinctHashes, []byte(hash)...) lenHashes = append(lenHashes, uint32(len([]byte(hash)))) @@ -410,13 +388,13 @@ func VerifyBLSSignatureManyMessages( (*C.uchar)(&flatDistinctHashes[0]), (*C.uint32_t)(&lenHashes[0]), (*C.uint32_t)(&pkPerHash[0]), - (*C.ep2_st)(&allPks[0]), + (*C.E2)(&allPks[0]), ) } else { // aggregate hashes per distinct key // using the linearity of the pairing on the G1 variables. - distinctPks := make([]pointG2, 0, len(mapPerPk)) + distinctPks := make([]pointE2, 0, len(mapPerPk)) hashPerPk := make([]uint32, 0, len(mapPerPk)) flatHashes := make([]byte, 0) lenHashes := make([]uint32, 0) @@ -432,7 +410,7 @@ func VerifyBLSSignatureManyMessages( verif = C.bls_verifyPerDistinctKey( (*C.uchar)(&s[0]), (C.int)(len(mapPerPk)), - (*C.ep2_st)(&distinctPks[0]), + (*C.E2)(&distinctPks[0]), (*C.uint32_t)(&hashPerPk[0]), (*C.uchar)(&flatHashes[0]), (*C.uint32_t)(&lenHashes[0])) @@ -482,9 +460,6 @@ func VerifyBLSSignatureManyMessages( func BatchVerifyBLSSignaturesOneMessage( pks []PublicKey, sigs []Signature, message []byte, kmac hash.Hasher, ) ([]bool, error) { - // set BLS context - blsInstance.reInit() - // boolean array returned when errors occur falseSlice := make([]bool, len(sigs)) @@ -505,10 +480,10 @@ func BatchVerifyBLSSignaturesOneMessage( } // flatten the shares (required by the C layer) - flatSigs := make([]byte, 0, signatureLengthBLSBLS12381*len(sigs)) - pkPoints := make([]pointG2, 0, len(pks)) + flatSigs := make([]byte, 0, SignatureLenBLSBLS12381*len(sigs)) + pkPoints := make([]pointE2, 0, len(pks)) - getIdentityPoint := func() pointG2 { + getIdentityPoint := func() pointE2 { pk, _ := IdentityBLSPublicKey().(*pubKeyBLSBLS12381) // second value is guaranteed to be true return pk.point } @@ -520,13 +495,13 @@ func BatchVerifyBLSSignaturesOneMessage( return falseSlice, fmt.Errorf("key at index %d is invalid: %w", i, notBLSKeyError) } - if len(sigs[i]) != signatureLengthBLSBLS12381 || pkBLS.isIdentity { + if len(sigs[i]) != SignatureLenBLSBLS12381 || pkBLS.isIdentity { // case of invalid signature: set the signature and public key at index `i` // to identities so that there is no effect on the aggregation tree computation. // However, the boolean return for index `i` is set to `false` and won't be overwritten. returnBool[i] = false pkPoints = append(pkPoints, getIdentityPoint()) - flatSigs = append(flatSigs, identityBLSSignature...) + flatSigs = append(flatSigs, g1Serialization...) } else { returnBool[i] = true // default to true pkPoints = append(pkPoints, pkBLS.point) @@ -537,14 +512,22 @@ func BatchVerifyBLSSignaturesOneMessage( // hash the input to 128 bytes h := kmac.ComputeHash(message) verifInt := make([]byte, len(sigs)) + // internal non-determministic entropy source required by bls_batch_verify + // specific length of the seed is required by bls_batch_verify. + seed := make([]byte, (securityBits/8)*len(verifInt)) + _, err := rand.Read(seed) + if err != nil { + return falseSlice, fmt.Errorf("generating randoms failed: %w", err) + } - C.bls_batchVerify( + C.bls_batch_verify( (C.int)(len(verifInt)), (*C.uchar)(&verifInt[0]), - (*C.ep2_st)(&pkPoints[0]), + (*C.E2)(&pkPoints[0]), (*C.uchar)(&flatSigs[0]), (*C.uchar)(&h[0]), (C.int)(len(h)), + (*C.uchar)(&seed[0]), ) for i, v := range verifInt { diff --git a/crypto/bls_no_relic.go b/crypto/bls_no_relic.go deleted file mode 100644 index fed6c216398..00000000000 --- a/crypto/bls_no_relic.go +++ /dev/null @@ -1,156 +0,0 @@ -//go:build !relic -// +build !relic - -package crypto - -import ( - "github.com/onflow/flow-go/crypto/hash" -) - -// The functions below are the non-Relic versions of the public APIs -// requiring the Relic library. -// All BLS functionalities in the package require the Relic dependency, -// and therefore the "relic" build tag. -// Building without the "relic" tag is successful, but and calling one of the -// BLS functions results in a runtime panic. This allows projects depending on the -// crypto library to build successfully with or without the "relic" tag. - -const relic_panic = "function is not supported when building without \"relic\" Go build tag" - -const ( - SignatureLenBLSBLS12381 = 48 -) - -// bls.go functions -func NewExpandMsgXOFKMAC128(tag string) hash.Hasher { - panic(relic_panic) -} - -func BLSInvalidSignature() Signature { - panic(relic_panic) -} - -// bls_multisig.go functions -func BLSGeneratePOP(sk PrivateKey) (Signature, error) { - panic(relic_panic) -} - -func BLSVerifyPOP(pk PublicKey, s Signature) (bool, error) { - panic(relic_panic) -} - -func AggregateBLSSignatures(sigs []Signature) (Signature, error) { - panic(relic_panic) -} - -func AggregateBLSPrivateKeys(keys []PrivateKey) (PrivateKey, error) { - panic(relic_panic) -} - -func AggregateBLSPublicKeys(keys []PublicKey) (PublicKey, error) { - panic(relic_panic) -} - -func IdentityBLSPublicKey() PublicKey { - panic(relic_panic) -} - -func IsBLSAggregateEmptyListError(err error) bool { - panic(relic_panic) -} - -func IsInvalidSignatureError(err error) bool { - panic(relic_panic) -} - -func IsNotBLSKeyError(err error) bool { - panic(relic_panic) -} - -func IsBLSSignatureIdentity(s Signature) bool { - panic(relic_panic) -} - -func RemoveBLSPublicKeys(aggKey PublicKey, keysToRemove []PublicKey) (PublicKey, error) { - panic(relic_panic) -} - -func VerifyBLSSignatureOneMessage(pks []PublicKey, s Signature, - message []byte, kmac hash.Hasher) (bool, error) { - panic(relic_panic) -} - -func VerifyBLSSignatureManyMessages(pks []PublicKey, s Signature, - messages [][]byte, kmac []hash.Hasher) (bool, error) { - panic(relic_panic) -} - -func BatchVerifyBLSSignaturesOneMessage(pks []PublicKey, sigs []Signature, - message []byte, kmac hash.Hasher) ([]bool, error) { - panic(relic_panic) -} - -func SPOCKProve(sk PrivateKey, data []byte, kmac hash.Hasher) (Signature, error) { - panic(relic_panic) -} - -func SPOCKVerifyAgainstData(pk PublicKey, proof Signature, data []byte, kmac hash.Hasher) (bool, error) { - panic(relic_panic) -} - -func SPOCKVerify(pk1 PublicKey, proof1 Signature, pk2 PublicKey, proof2 Signature) (bool, error) { - panic(relic_panic) -} - -// bls_threshold.go functions -func NewBLSThresholdSignatureParticipant( - groupPublicKey PublicKey, - sharePublicKeys []PublicKey, - threshold int, - myIndex int, - myPrivateKey PrivateKey, - message []byte, - dsTag string, -) (ThresholdSignatureParticipant, error) { - panic(relic_panic) -} - -func NewBLSThresholdSignatureInspector( - groupPublicKey PublicKey, - sharePublicKeys []PublicKey, - threshold int, - message []byte, - dsTag string, -) (ThresholdSignatureInspector, error) { - panic(relic_panic) -} - -func BLSReconstructThresholdSignature(size int, threshold int, - shares []Signature, signers []int) (Signature, error) { - panic(relic_panic) -} - -func EnoughShares(threshold int, sharesNumber int) (bool, error) { - panic(relic_panic) -} - -func BLSThresholdKeyGen(size int, threshold int, seed []byte) ([]PrivateKey, - []PublicKey, PublicKey, error) { - panic(relic_panic) -} - -// dkg.go functions -func NewFeldmanVSS(size int, threshold int, myIndex int, - processor DKGProcessor, dealerIndex int) (DKGState, error) { - panic(relic_panic) -} - -func NewFeldmanVSSQual(size int, threshold int, myIndex int, - processor DKGProcessor, dealerIndex int) (DKGState, error) { - panic(relic_panic) -} - -func NewJointFeldman(size int, threshold int, myIndex int, - processor DKGProcessor) (DKGState, error) { - panic(relic_panic) -} diff --git a/crypto/bls_no_relic_test.go b/crypto/bls_no_relic_test.go deleted file mode 100644 index 47f8120060f..00000000000 --- a/crypto/bls_no_relic_test.go +++ /dev/null @@ -1,42 +0,0 @@ -//go:build !relic -// +build !relic - -package crypto - -import ( - "testing" - - "github.com/stretchr/testify/assert" -) - -// Test for all public APIs requiring relic build tag. -// These functions should panic if build without the relic tag. -func TestNoRelicPanic(t *testing.T) { - assert.PanicsWithValue(t, relic_panic, func() { NewExpandMsgXOFKMAC128("") }) - assert.PanicsWithValue(t, relic_panic, func() { BLSInvalidSignature() }) - assert.PanicsWithValue(t, relic_panic, func() { BLSGeneratePOP(nil) }) - assert.PanicsWithValue(t, relic_panic, func() { BLSVerifyPOP(nil, nil) }) - assert.PanicsWithValue(t, relic_panic, func() { AggregateBLSSignatures(nil) }) - assert.PanicsWithValue(t, relic_panic, func() { AggregateBLSPrivateKeys(nil) }) - assert.PanicsWithValue(t, relic_panic, func() { AggregateBLSPublicKeys(nil) }) - assert.PanicsWithValue(t, relic_panic, func() { IdentityBLSPublicKey() }) - assert.PanicsWithValue(t, relic_panic, func() { IsBLSAggregateEmptyListError(nil) }) - assert.PanicsWithValue(t, relic_panic, func() { IsInvalidSignatureError(nil) }) - assert.PanicsWithValue(t, relic_panic, func() { IsNotBLSKeyError(nil) }) - assert.PanicsWithValue(t, relic_panic, func() { IsBLSSignatureIdentity(nil) }) - assert.PanicsWithValue(t, relic_panic, func() { RemoveBLSPublicKeys(nil, nil) }) - assert.PanicsWithValue(t, relic_panic, func() { VerifyBLSSignatureOneMessage(nil, nil, nil, nil) }) - assert.PanicsWithValue(t, relic_panic, func() { VerifyBLSSignatureManyMessages(nil, nil, nil, nil) }) - assert.PanicsWithValue(t, relic_panic, func() { BatchVerifyBLSSignaturesOneMessage(nil, nil, nil, nil) }) - assert.PanicsWithValue(t, relic_panic, func() { SPOCKProve(nil, nil, nil) }) - assert.PanicsWithValue(t, relic_panic, func() { SPOCKVerify(nil, nil, nil, nil) }) - assert.PanicsWithValue(t, relic_panic, func() { SPOCKVerifyAgainstData(nil, nil, nil, nil) }) - assert.PanicsWithValue(t, relic_panic, func() { NewBLSThresholdSignatureParticipant(nil, nil, 0, 0, nil, nil, "") }) - assert.PanicsWithValue(t, relic_panic, func() { NewBLSThresholdSignatureInspector(nil, nil, 0, nil, "") }) - assert.PanicsWithValue(t, relic_panic, func() { BLSReconstructThresholdSignature(0, 0, nil, nil) }) - assert.PanicsWithValue(t, relic_panic, func() { EnoughShares(0, 0) }) - assert.PanicsWithValue(t, relic_panic, func() { BLSThresholdKeyGen(0, 0, nil) }) - assert.PanicsWithValue(t, relic_panic, func() { NewFeldmanVSS(0, 0, 0, nil, 0) }) - assert.PanicsWithValue(t, relic_panic, func() { NewFeldmanVSSQual(0, 0, 0, nil, 0) }) - assert.PanicsWithValue(t, relic_panic, func() { NewJointFeldman(0, 0, 0, nil) }) -} diff --git a/crypto/bls_test.go b/crypto/bls_test.go index c967546f640..4fa02958496 100644 --- a/crypto/bls_test.go +++ b/crypto/bls_test.go @@ -1,6 +1,3 @@ -//go:build relic -// +build relic - package crypto import ( @@ -30,7 +27,11 @@ func TestBLSMainMethods(t *testing.T) { // This test checks that: // - signature decoding handles input x-coordinates larger than p (doesn't result in an exception) // - signature decoding only accepts reduced x-coordinates to avoid signature malleability + t.Run("invalid x coordinate larger than p", func(t *testing.T) { + if !isG1Compressed() || !isG2Compressed() { + t.Skip() + } msg, err := hex.DecodeString("7f26ba692dc2da7ff828ef4675ff1cd6ab855fca0637b6dab295f1df8e51bc8bb1b8f0c6610aabd486cf1f098f2ddbc6691d94e10f928816f890a3d366ce46249836a595c7ea1828af52e899ba2ab627ab667113bb563918c5d5a787c414399487b4e3a7") require.NoError(t, err) validSig, err := hex.DecodeString("80b0cac2a0f4f8881913edf2b29065675dfed6f6f4e17e9b5d860a845d4e7d476b277d06a493b81482e63d8131f9f2fa") @@ -74,8 +75,7 @@ func TestBLSMainMethods(t *testing.T) { // test a valid signature result, err := pk.Verify(s, input, hasher) assert.NoError(t, err) - assert.True(t, result, - "Verification should succeed:\n signature:%s\n message:%x\n private key:%s", s, input, sk) + assert.True(t, result) } }) } @@ -187,26 +187,35 @@ func TestBLSEncodeDecode(t *testing.T) { // specific tests for BLS // zero private key - skBytes := make([]byte, PrKeyLenBLSBLS12381) - sk, err := DecodePrivateKey(BLSBLS12381, skBytes) - require.Error(t, err, "decoding identity private key should fail") - assert.True(t, IsInvalidInputsError(err)) - assert.Nil(t, sk) + t.Run("zero private key", func(t *testing.T) { + skBytes := make([]byte, PrKeyLenBLSBLS12381) + sk, err := DecodePrivateKey(BLSBLS12381, skBytes) + require.Error(t, err, "decoding identity private key should fail") + assert.True(t, IsInvalidInputsError(err)) + assert.Nil(t, sk) + }) // identity public key - pkBytes := make([]byte, PubKeyLenBLSBLS12381) - pkBytes[0] = infinityPointHeader - pk, err := DecodePublicKey(BLSBLS12381, pkBytes) - require.NoError(t, err, "decoding identity public key should succeed") - assert.True(t, pk.Equals(IdentityBLSPublicKey())) + t.Run("infinity public key", func(t *testing.T) { + // decode an identity public key + pkBytes := make([]byte, PubKeyLenBLSBLS12381) + pkBytes[0] = g2SerHeader + pk, err := DecodePublicKey(BLSBLS12381, pkBytes) + require.NoError(t, err, "decoding identity public key should succeed") + assert.True(t, pk.Equals(IdentityBLSPublicKey())) + // encode an identity public key + assert.Equal(t, pk.Encode(), pkBytes) + }) // invalid point - pkBytes = make([]byte, PubKeyLenBLSBLS12381) - pkBytes[0] = invalidBLSSignatureHeader - pk, err = DecodePublicKey(BLSBLS12381, pkBytes) - require.Error(t, err, "the key decoding should fail - key value is invalid") - assert.True(t, IsInvalidInputsError(err)) - assert.Nil(t, pk) + t.Run("invalid public key", func(t *testing.T) { + pkBytes := make([]byte, PubKeyLenBLSBLS12381) + pkBytes[0] = invalidBLSSignatureHeader + pk, err := DecodePublicKey(BLSBLS12381, pkBytes) + require.Error(t, err, "the key decoding should fail - key value is invalid") + assert.True(t, IsInvalidInputsError(err)) + assert.Nil(t, pk) + }) // Test a public key serialization with a point encoded with a coordinate x with // x[0] or x[1] not reduced mod p. @@ -217,21 +226,26 @@ func TestBLSEncodeDecode(t *testing.T) { // Although uniqueness of public key respresentation isn't a security property, some implementations // may implicitely rely on the property. - // valid pk with x[0] < p and x[1] < p - validPk, err := hex.DecodeString("818d72183e3e908af5bd6c2e37494c749b88f0396d3fbc2ba4d9ea28f1c50d1c6a540ec8fe06b6d860f72ec9363db3b8038360809700d36d761cb266af6babe9a069dc7364d3502e84536bd893d5f09ec2dd4f07cae1f8a178ffacc450f9b9a2") - require.NoError(t, err) - _, err = DecodePublicKey(BLSBLS12381, validPk) - assert.NoError(t, err) - // invalidpk1 with x[0]+p and same x[1] - invalidPk1, err := hex.DecodeString("9B8E840277BE772540D913E47A94F94C00003BBE60C4CEEB0C0ABCC9E876034089000EC7AF5AB6D81AF62EC9363D5E63038360809700d36d761cb266af6babe9a069dc7364d3502e84536bd893d5f09ec2dd4f07cae1f8a178ffacc450f9b9a2") - require.NoError(t, err) - _, err = DecodePublicKey(BLSBLS12381, invalidPk1) - assert.Error(t, err) - // invalidpk1 with same x[0] and x[1]+p - invalidPk2, err := hex.DecodeString("818d72183e3e908af5bd6c2e37494c749b88f0396d3fbc2ba4d9ea28f1c50d1c6a540ec8fe06b6d860f72ec9363db3b81D84726AD080BA07C1385A1CF2B758C104E127F8585862EDEB843E798A86E6C2E1894F067C35F8A132FEACC450F9644D") - require.NoError(t, err) - _, err = DecodePublicKey(BLSBLS12381, invalidPk2) - assert.Error(t, err) + t.Run("public key with non-reduced coordinates", func(t *testing.T) { + if !isG2Compressed() { + t.Skip() + } + // valid pk with x[0] < p and x[1] < p + validPk, err := hex.DecodeString("818d72183e3e908af5bd6c2e37494c749b88f0396d3fbc2ba4d9ea28f1c50d1c6a540ec8fe06b6d860f72ec9363db3b8038360809700d36d761cb266af6babe9a069dc7364d3502e84536bd893d5f09ec2dd4f07cae1f8a178ffacc450f9b9a2") + require.NoError(t, err) + _, err = DecodePublicKey(BLSBLS12381, validPk) + assert.NoError(t, err) + // invalidpk1 with x[0]+p and same x[1] + invalidPk1, err := hex.DecodeString("9B8E840277BE772540D913E47A94F94C00003BBE60C4CEEB0C0ABCC9E876034089000EC7AF5AB6D81AF62EC9363D5E63038360809700d36d761cb266af6babe9a069dc7364d3502e84536bd893d5f09ec2dd4f07cae1f8a178ffacc450f9b9a2") + require.NoError(t, err) + _, err = DecodePublicKey(BLSBLS12381, invalidPk1) + assert.Error(t, err) + // invalidpk1 with same x[0] and x[1]+p + invalidPk2, err := hex.DecodeString("818d72183e3e908af5bd6c2e37494c749b88f0396d3fbc2ba4d9ea28f1c50d1c6a540ec8fe06b6d860f72ec9363db3b81D84726AD080BA07C1385A1CF2B758C104E127F8585862EDEB843E798A86E6C2E1894F067C35F8A132FEACC450F9644D") + require.NoError(t, err) + _, err = DecodePublicKey(BLSBLS12381, invalidPk2) + assert.Error(t, err) + }) } // TestBLSEquals tests equal for BLS keys @@ -273,7 +287,7 @@ func TestBLSPOP(t *testing.T) { // test a valid PoP result, err := BLSVerifyPOP(pk, s) require.NoError(t, err) - assert.True(t, result, "Verification should succeed:\n signature:%s\n private key:%s", s, sk) + assert.True(t, result) // test with a valid but different key seed[0] ^= 1 @@ -281,7 +295,7 @@ func TestBLSPOP(t *testing.T) { require.NoError(t, err) result, err = BLSVerifyPOP(wrongSk.PublicKey(), s) require.NoError(t, err) - assert.False(t, result, "Verification should fail:\n signature:%s\n private key:%s", s, sk) + assert.False(t, result) } }) @@ -300,7 +314,7 @@ func TestBLSPOP(t *testing.T) { } // BLS multi-signature -// signature aggregation sanity check +// signature aggregation with the same message sanity check // // Aggregate n signatures of the same message under different keys, and compare // it against the signature of the message under an aggregated private key. @@ -315,7 +329,7 @@ func TestBLSAggregateSignatures(t *testing.T) { // hasher kmac := NewExpandMsgXOFKMAC128("test tag") // number of signatures to aggregate - sigsNum := mrand.Intn(100) + 1 + sigsNum := rand.Intn(100) + 1 sigs := make([]Signature, 0, sigsNum) sks := make([]PrivateKey, 0, sigsNum) pks := make([]PublicKey, 0, sigsNum) @@ -342,40 +356,34 @@ func TestBLSAggregateSignatures(t *testing.T) { aggSig, err := AggregateBLSSignatures(sigs) require.NoError(t, err) // First check: check the signatures are equal - assert.Equal(t, aggSig, expectedSig, - "incorrect signature %s, should be %s, private keys are %s, input is %x", - aggSig, expectedSig, sks, input) + assert.Equal(t, aggSig, expectedSig) // Second check: Verify the aggregated signature valid, err := VerifyBLSSignatureOneMessage(pks, aggSig, input, kmac) require.NoError(t, err) - assert.True(t, valid, - "Verification of %s failed, signature should be %s private keys are %s, input is %x", - aggSig, expectedSig, sks, input) + assert.True(t, valid) }) // check if one signature is not correct t.Run("one invalid signature", func(t *testing.T) { input[0] ^= 1 - randomIndex := mrand.Intn(sigsNum) - sigs[randomIndex], err = sks[randomIndex].Sign(input, kmac) + randomIndex := rand.Intn(sigsNum) + sigs[randomIndex], err = sks[randomIndex].Sign(input, kmac) // sign a different message input[0] ^= 1 aggSig, err = AggregateBLSSignatures(sigs) require.NoError(t, err) - assert.NotEqual(t, aggSig, expectedSig, - "signature %s shouldn't be %s private keys are %s, input is %x", - aggSig, expectedSig, sks, input) + // First check: check the signatures are not equal + assert.NotEqual(t, aggSig, expectedSig) + // Second check: multi-verification should fail valid, err := VerifyBLSSignatureOneMessage(pks, aggSig, input, kmac) require.NoError(t, err) - assert.False(t, valid, - "verification of signature %s should fail, it shouldn't be %s private keys are %s, input is %x", - aggSig, expectedSig, sks, input) - sigs[randomIndex], err = sks[randomIndex].Sign(input, kmac) + assert.False(t, valid) + sigs[randomIndex], err = sks[randomIndex].Sign(input, kmac) // rebuild the correct signature require.NoError(t, err) }) // check if one the public keys is not correct t.Run("one invalid public key", func(t *testing.T) { - randomIndex := mrand.Intn(sigsNum) + randomIndex := rand.Intn(sigsNum) newSk := randomSK(t, rand) sks[randomIndex] = newSk pks[randomIndex] = newSk.PublicKey() @@ -383,14 +391,10 @@ func TestBLSAggregateSignatures(t *testing.T) { require.NoError(t, err) expectedSig, err = aggSk.Sign(input, kmac) require.NoError(t, err) - assert.NotEqual(t, aggSig, expectedSig, - "signature %s shouldn't be %s, private keys are %s, input is %x, wrong key is of index %d", - aggSig, expectedSig, sks, input, randomIndex) + assert.NotEqual(t, aggSig, expectedSig) valid, err := VerifyBLSSignatureOneMessage(pks, aggSig, input, kmac) require.NoError(t, err) - assert.False(t, valid, - "signature %s should fail, shouldn't be %s, private keys are %s, input is %x, wrong key is of index %d", - aggSig, expectedSig, sks, input, randomIndex) + assert.False(t, valid) }) t.Run("invalid inputs", func(t *testing.T) { @@ -407,7 +411,7 @@ func TestBLSAggregateSignatures(t *testing.T) { assert.False(t, result) // test with a signature of a wrong length - shortSig := sigs[0][:signatureLengthBLSBLS12381-1] + shortSig := sigs[0][:SignatureLenBLSBLS12381-1] aggSig, err = AggregateBLSSignatures([]Signature{shortSig}) assert.Error(t, err) assert.True(t, IsInvalidSignatureError(err)) @@ -441,10 +445,10 @@ func TestBLSAggregateSignatures(t *testing.T) { // Aggregate n public keys and their respective private keys and compare // the public key of the aggregated private key is equal to the aggregated // public key -func TestBLSAggregatePubKeys(t *testing.T) { +func TestBLSAggregatePublicKeys(t *testing.T) { rand := getPRG(t) // number of keys to aggregate - pkNum := mrand.Intn(100) + 1 + pkNum := rand.Intn(100) + 1 pks := make([]PublicKey, 0, pkNum) sks := make([]PrivateKey, 0, pkNum) @@ -490,9 +494,7 @@ func TestBLSAggregatePubKeys(t *testing.T) { keys := []PublicKey{pks[0], IdentityBLSPublicKey()} aggPkWithIdentity, err := AggregateBLSPublicKeys(keys) assert.NoError(t, err) - assert.True(t, aggPkWithIdentity.Equals(pks[0]), - "incorrect public key %s, should be %s", - aggPkWithIdentity, pks[0]) + assert.True(t, aggPkWithIdentity.Equals(pks[0])) }) t.Run("invalid inputs", func(t *testing.T) { @@ -512,8 +514,8 @@ func TestBLSAggregatePubKeys(t *testing.T) { // check that the public key corresponding to the zero private key is indeed identity // The package doesn't allow to generate a zero private key. One way to obtain a zero - // private key is via aggrgeting opposite private keys - t.Run("public key of zero private key", func(t *testing.T) { + // private key is via aggregating opposite private keys + t.Run("Identity public key from identity private key", func(t *testing.T) { // sk1 is group order of bls12-381 minus one groupOrderMinus1 := []byte{0x73, 0xED, 0xA7, 0x53, 0x29, 0x9D, 0x7D, 0x48, 0x33, 0x39, 0xD8, 0x08, 0x09, 0xA1, 0xD8, 0x05, 0x53, 0xBD, 0xA4, 0x02, 0xFF, 0xFE, @@ -525,9 +527,42 @@ func TestBLSAggregatePubKeys(t *testing.T) { one[PrKeyLenBLSBLS12381-1] = 1 sk2, err := DecodePrivateKey(BLSBLS12381, one) require.NoError(t, err) + // public key of aggregated private keys aggSK, err := AggregateBLSPrivateKeys([]PrivateKey{sk1, sk2}) require.NoError(t, err) assert.True(t, aggSK.PublicKey().Equals(IdentityBLSPublicKey())) + // aggregated public keys + aggPK, err := AggregateBLSPublicKeys([]PublicKey{sk1.PublicKey(), sk2.PublicKey()}) + require.NoError(t, err) + assert.True(t, aggPK.Equals(IdentityBLSPublicKey())) + // check of internal identity flag + blsKey, ok := aggPK.(*pubKeyBLSBLS12381) + require.True(t, ok) + assert.True(t, blsKey.isIdentity) + // check of encoding header + pkBytes := aggPK.Encode() + assert.Equal(t, g2SerHeader, pkBytes[0]) + }) + + t.Run("Identity public key from opposite points", func(t *testing.T) { + if !isG2Compressed() { + t.Skip() + } + pkBytes := pks[0].Encode() + negateCompressedPoint(pkBytes) + minusPk, err := DecodePublicKey(BLSBLS12381, pkBytes) + require.NoError(t, err) + // aggregated public keys + aggPK, err := AggregateBLSPublicKeys([]PublicKey{pks[0], minusPk}) + require.NoError(t, err) + assert.True(t, aggPK.Equals(IdentityBLSPublicKey())) + // check of internal identity flag + blsKey, ok := aggPK.(*pubKeyBLSBLS12381) + require.True(t, ok) + assert.True(t, blsKey.isIdentity) + // check of encoding header + pkBytes = aggPK.Encode() + assert.Equal(t, g2SerHeader, pkBytes[0]) }) } @@ -536,7 +571,7 @@ func TestBLSAggregatePubKeys(t *testing.T) { func TestBLSRemovePubKeys(t *testing.T) { rand := getPRG(t) // number of keys to aggregate - pkNum := mrand.Intn(100) + 1 + pkNum := rand.Intn(100) + 1 pks := make([]PublicKey, 0, pkNum) // generate public keys @@ -549,7 +584,7 @@ func TestBLSRemovePubKeys(t *testing.T) { require.NoError(t, err) // random number of keys to remove (at least one key is left) - pkToRemoveNum := mrand.Intn(pkNum) + pkToRemoveNum := rand.Intn(pkNum) expectedPatrialPk, err := AggregateBLSPublicKeys(pks[pkToRemoveNum:]) require.NoError(t, err) @@ -561,9 +596,7 @@ func TestBLSRemovePubKeys(t *testing.T) { BLSkey, ok := expectedPatrialPk.(*pubKeyBLSBLS12381) require.True(t, ok) - assert.True(t, BLSkey.Equals(partialPk), - "incorrect key %s, should be %s, keys are %s, index is %d", - partialPk, BLSkey, pks, pkToRemoveNum) + assert.True(t, BLSkey.Equals(partialPk)) }) // remove an extra key and check inequality @@ -574,9 +607,7 @@ func TestBLSRemovePubKeys(t *testing.T) { BLSkey, ok := expectedPatrialPk.(*pubKeyBLSBLS12381) require.True(t, ok) - assert.False(t, BLSkey.Equals(partialPk), - "incorrect key %s, should not be %s, keys are %s, index is %d, extra key is %s", - partialPk, BLSkey, pks, pkToRemoveNum, extraPk) + assert.False(t, BLSkey.Equals(partialPk)) }) // specific test to remove all keys @@ -591,9 +622,7 @@ func TestBLSRemovePubKeys(t *testing.T) { BLSRandomPk, ok := randomPk.(*pubKeyBLSBLS12381) require.True(t, ok) - assert.True(t, BLSRandomPk.Equals(randomPkPlusIdentityPk), - "incorrect key %s, should be infinity point, keys are %s", - identityPk, pks) + assert.True(t, BLSRandomPk.Equals(randomPkPlusIdentityPk)) }) // specific test with an empty slice of keys to remove @@ -604,9 +633,7 @@ func TestBLSRemovePubKeys(t *testing.T) { aggBLSkey, ok := aggPk.(*pubKeyBLSBLS12381) require.True(t, ok) - assert.True(t, aggBLSkey.Equals(partialPk), - "incorrect key %s, should be %s", - partialPk, aggBLSkey) + assert.True(t, aggBLSkey.Equals(partialPk)) }) t.Run("invalid inputs", func(t *testing.T) { @@ -640,7 +667,6 @@ func TestBLSBatchVerify(t *testing.T) { // number of signatures to aggregate sigsNum := rand.Intn(100) + 2 sigs := make([]Signature, 0, sigsNum) - sks := make([]PrivateKey, 0, sigsNum) pks := make([]PublicKey, 0, sigsNum) expectedValid := make([]bool, 0, sigsNum) @@ -650,7 +676,6 @@ func TestBLSBatchVerify(t *testing.T) { s, err := sk.Sign(input, kmac) require.NoError(t, err) sigs = append(sigs, s) - sks = append(sks, sk) pks = append(pks, sk.PublicKey()) expectedValid = append(expectedValid, true) } @@ -659,9 +684,26 @@ func TestBLSBatchVerify(t *testing.T) { t.Run("all signatures are valid", func(t *testing.T) { valid, err := BatchVerifyBLSSignaturesOneMessage(pks, sigs, input, kmac) require.NoError(t, err) - assert.Equal(t, valid, expectedValid, - "Verification of %s failed, private keys are %s, input is %x, results is %v", - sigs, sks, input, valid) + assert.Equal(t, valid, expectedValid) + }) + + // valid signatures but indices aren't correct: sig[i] is correct under pks[j] + // and sig[j] is correct under pks[j]. + // implementations simply aggregating all signatures and keys would fail this test. + t.Run("valid signatures with incorrect indices", func(t *testing.T) { + i := rand.Intn(sigsNum-1) + 1 + j := rand.Intn(i) + // swap correct keys + pks[i], pks[j] = pks[j], pks[i] + + valid, err := BatchVerifyBLSSignaturesOneMessage(pks, sigs, input, kmac) + require.NoError(t, err) + expectedValid[i], expectedValid[j] = false, false + assert.Equal(t, valid, expectedValid) + + // restore keys + pks[i], pks[j] = pks[j], pks[i] + expectedValid[i], expectedValid[j] = true, true }) // valid signatures but indices aren't correct: sig[i] is correct under pks[j] @@ -676,9 +718,7 @@ func TestBLSBatchVerify(t *testing.T) { valid, err := BatchVerifyBLSSignaturesOneMessage(pks, sigs, input, kmac) require.NoError(t, err) expectedValid[i], expectedValid[j] = false, false - assert.Equal(t, valid, expectedValid, - "Verification of %s failed, private keys are %s, input is %x, results is %v", - sigs, sks, input, valid) + assert.Equal(t, valid, expectedValid) // restore keys pks[i], pks[j] = pks[j], pks[i] @@ -689,9 +729,7 @@ func TestBLSBatchVerify(t *testing.T) { t.Run("one valid signature", func(t *testing.T) { valid, err := BatchVerifyBLSSignaturesOneMessage(pks[:1], sigs[:1], input, kmac) require.NoError(t, err) - assert.Equal(t, valid, expectedValid[:1], - "Verification of %s failed, private keys are %s, input is %x, results is %v", - sigs, sks, input, valid) + assert.Equal(t, expectedValid[:1], valid) }) // pick a random number of invalid signatures @@ -715,9 +753,7 @@ func TestBLSBatchVerify(t *testing.T) { valid, err := BatchVerifyBLSSignaturesOneMessage(pks, sigs, input, kmac) require.NoError(t, err) - assert.Equal(t, expectedValid, valid, - "Verification of %s failed\n private keys are %s\n input is %x\n results is %v", - sigs, sks, input, valid) + assert.Equal(t, expectedValid, valid) }) // all signatures are invalid @@ -732,9 +768,7 @@ func TestBLSBatchVerify(t *testing.T) { valid, err := BatchVerifyBLSSignaturesOneMessage(pks, sigs, input, kmac) require.NoError(t, err) - assert.Equal(t, valid, expectedValid, - "Verification of %s failed, private keys are %s, input is %x, results is %v", - sigs, sks, input, valid) + assert.Equal(t, valid, expectedValid) }) // test the empty list case @@ -742,8 +776,7 @@ func TestBLSBatchVerify(t *testing.T) { valid, err := BatchVerifyBLSSignaturesOneMessage(pks[:0], sigs[:0], input, kmac) require.Error(t, err) assert.True(t, IsBLSAggregateEmptyListError(err)) - assert.Equal(t, valid, expectedValid[:0], - "verification should fail with empty list key, got %v", valid) + assert.Equal(t, valid, expectedValid[:0]) }) // test incorrect inputs @@ -754,8 +787,7 @@ func TestBLSBatchVerify(t *testing.T) { valid, err := BatchVerifyBLSSignaturesOneMessage(pks[:len(pks)-1], sigs, input, kmac) require.Error(t, err) assert.True(t, IsInvalidInputsError(err)) - assert.Equal(t, valid, expectedValid, - "verification should fail with incorrect input lenghts, got %v", valid) + assert.Equal(t, valid, expectedValid) }) // test wrong hasher @@ -767,8 +799,7 @@ func TestBLSBatchVerify(t *testing.T) { require.Error(t, err) assert.True(t, IsNilHasherError(err)) - assert.Equal(t, valid, expectedValid, - "verification should fail with nil hasher, got %v", valid) + assert.Equal(t, valid, expectedValid) }) // test wrong key @@ -781,11 +812,17 @@ func TestBLSBatchVerify(t *testing.T) { require.Error(t, err) assert.True(t, IsNotBLSKeyError(err)) - assert.Equal(t, valid, expectedValid, - "verification should fail with invalid key, got %v", valid) + assert.Equal(t, valid, expectedValid) }) } +// Utility function that flips a point sign bit to negate the point +// this is shortcut which works only for zcash BLS12-381 compressed serialization. +// Applicable to both signatures and public keys. +func negateCompressedPoint(pointbytes []byte) { + pointbytes[0] ^= 0x20 +} + // alter or fix a signature func alterSignature(s Signature) { // this causes the signature to remain in G1 and be invalid @@ -855,16 +892,15 @@ func BenchmarkBatchVerify(b *testing.B) { // // Aggregate n signatures of distinct messages under different keys, // and verify the aggregated signature using the multi-signature verification with -// many message. +// many messages. func TestBLSAggregateSignaturesManyMessages(t *testing.T) { rand := getPRG(t) - // number of signatures to aggregate - sigsNum := mrand.Intn(20) + 1 + sigsNum := rand.Intn(40) + 1 sigs := make([]Signature, 0, sigsNum) - // number of keys - keysNum := mrand.Intn(sigsNum) + 1 + // number of keys (less than the number of signatures) + keysNum := rand.Intn(sigsNum) + 1 sks := make([]PrivateKey, 0, keysNum) // generate the keys for i := 0; i < keysNum; i++ { @@ -873,7 +909,7 @@ func TestBLSAggregateSignaturesManyMessages(t *testing.T) { } // number of messages (could be larger or smaller than the number of keys) - msgsNum := mrand.Intn(sigsNum) + 1 + msgsNum := rand.Intn(sigsNum) + 1 messages := make([][20]byte, msgsNum) for i := 0; i < msgsNum; i++ { _, err := rand.Read(messages[i][:]) @@ -888,10 +924,10 @@ func TestBLSAggregateSignaturesManyMessages(t *testing.T) { for i := 0; i < sigsNum; i++ { kmac := NewExpandMsgXOFKMAC128("test tag") // pick a key randomly from the list - skRand := mrand.Intn(keysNum) + skRand := rand.Intn(keysNum) sk := sks[skRand] // pick a message randomly from the list - msgRand := mrand.Intn(msgsNum) + msgRand := rand.Intn(msgsNum) msg := messages[msgRand][:] // generate a signature s, err := sk.Sign(msg, kmac) @@ -912,15 +948,13 @@ func TestBLSAggregateSignaturesManyMessages(t *testing.T) { // Verify the aggregated signature valid, err := VerifyBLSSignatureManyMessages(inputPks, aggSig, inputMsgs, inputKmacs) require.NoError(t, err) - assert.True(t, valid, - "Verification of %s failed, should be valid, private keys are %s, inputs are %x, input public keys are %s", - aggSig, sks, inputMsgs, inputPks) + assert.True(t, valid) }) // check if one of the signatures is not correct t.Run("one signature is invalid", func(t *testing.T) { - randomIndex := mrand.Intn(sigsNum) // pick a random signature - messages[0][0] ^= 1 // make sure the signature is different + randomIndex := rand.Intn(sigsNum) // pick a random signature + messages[0][0] ^= 1 // make sure the signature is different var err error sigs[randomIndex], err = sks[0].Sign(messages[0][:], inputKmacs[0]) require.NoError(t, err) @@ -929,9 +963,7 @@ func TestBLSAggregateSignaturesManyMessages(t *testing.T) { require.NoError(t, err) valid, err := VerifyBLSSignatureManyMessages(inputPks, aggSig, inputMsgs, inputKmacs) require.NoError(t, err) - assert.False(t, valid, - "Verification of %s should fail, private keys are %s, inputs are %x, input public keys are %s", - aggSig, sks, inputMsgs, inputPks) + assert.False(t, valid) }) // test the empty keys case @@ -939,8 +971,7 @@ func TestBLSAggregateSignaturesManyMessages(t *testing.T) { valid, err := VerifyBLSSignatureManyMessages(inputPks[:0], aggSig, inputMsgs, inputKmacs) assert.Error(t, err) assert.True(t, IsBLSAggregateEmptyListError(err)) - assert.False(t, valid, - "verification should fail with an empty key list") + assert.False(t, valid) }) // test inconsistent input arrays @@ -949,13 +980,13 @@ func TestBLSAggregateSignaturesManyMessages(t *testing.T) { valid, err := VerifyBLSSignatureManyMessages(inputPks, aggSig, inputMsgs[:sigsNum-1], inputKmacs) assert.Error(t, err) assert.True(t, IsInvalidInputsError(err)) - assert.False(t, valid, "verification should fail with inconsistent messages and hashers") + assert.False(t, valid) // empty key list valid, err = VerifyBLSSignatureManyMessages(inputPks[:0], aggSig, inputMsgs, inputKmacs) assert.Error(t, err) assert.True(t, IsBLSAggregateEmptyListError(err)) - assert.False(t, valid, "verification should fail with empty list key") + assert.False(t, valid) // nil hasher tmp := inputKmacs[0] @@ -963,7 +994,7 @@ func TestBLSAggregateSignaturesManyMessages(t *testing.T) { valid, err = VerifyBLSSignatureManyMessages(inputPks, aggSig, inputMsgs, inputKmacs) assert.Error(t, err) assert.True(t, IsNilHasherError(err)) - assert.False(t, valid, "verification should fail with nil hasher") + assert.False(t, valid) inputKmacs[0] = tmp // wrong key @@ -972,9 +1003,48 @@ func TestBLSAggregateSignaturesManyMessages(t *testing.T) { valid, err = VerifyBLSSignatureManyMessages(inputPks, aggSig, inputMsgs, inputKmacs) assert.Error(t, err) assert.True(t, IsNotBLSKeyError(err)) - assert.False(t, valid, "verification should fail with nil hasher") + assert.False(t, valid) inputPks[0] = tmpPK }) + + t.Run("variable number of distinct keys and messages", func(t *testing.T) { + // use a specific PRG for easier reproduction + prg := getPRG(t) + // number of signatures to aggregate + N := 100 + sigs := make([]Signature, 0, N) + msgs := make([][]byte, 0, N) + pks := make([]PublicKey, 0, N) + kmacs := make([]hash.Hasher, 0, N) + kmac := NewExpandMsgXOFKMAC128("test tag") + for i := 0; i < N; i++ { + // distinct message + msg := make([]byte, 20) + msgs = append(msgs, msg) + _, err := prg.Read(msg) + require.NoError(t, err) + // distinct key + sk := randomSK(t, prg) + pks = append(pks, sk.PublicKey()) + // generate a signature + s, err := sk.Sign(msg, kmac) + require.NoError(t, err) + sigs = append(sigs, s) + kmacs = append(kmacs, kmac) + } + + // go through all numbers of couples (msg, key) + for i := 1; i < N; i++ { + // aggregate signatures + var err error + aggSig, err = AggregateBLSSignatures(sigs[:i]) + require.NoError(t, err) + // Verify the aggregated signature + valid, err := VerifyBLSSignatureManyMessages(pks[:i], aggSig, msgs[:i], kmacs[:i]) + require.NoError(t, err, "verification errored with %d couples (msg,key)", i) + assert.True(t, valid, "verification failed with %d couples (msg,key)", i) + } + }) } // TestBLSErrorTypes verifies working of error-type-detecting functions @@ -1111,17 +1181,22 @@ func TestBLSIdentity(t *testing.T) { hasher := NewExpandMsgXOFKMAC128("") t.Run("identity signature comparison", func(t *testing.T) { + if !isG1Compressed() { + t.Skip() + } // verify that constructed identity signatures are recognized as such by IsBLSSignatureIdentity. // construct identity signature by summing (aggregating) a random signature and its inverse. - assert.True(t, IsBLSSignatureIdentity(identityBLSSignature)) + + // sanity check to start + assert.True(t, IsBLSSignatureIdentity(g1Serialization)) // sum up a random signature and its inverse to get identity sk := randomSK(t, rand) sig, err := sk.Sign(msg, hasher) require.NoError(t, err) - oppositeSig := make([]byte, signatureLengthBLSBLS12381) + oppositeSig := make([]byte, SignatureLenBLSBLS12381) copy(oppositeSig, sig) - negatePoint(oppositeSig) + negateCompressedPoint(oppositeSig) aggSig, err := AggregateBLSSignatures([]Signature{sig, oppositeSig}) require.NoError(t, err) assert.True(t, IsBLSSignatureIdentity(aggSig)) diff --git a/crypto/bls_thresholdsign.go b/crypto/bls_thresholdsign.go index 4256af84ab9..412f06f962a 100644 --- a/crypto/bls_thresholdsign.go +++ b/crypto/bls_thresholdsign.go @@ -1,9 +1,5 @@ -//go:build relic -// +build relic - package crypto -// #cgo CFLAGS: -g -Wall -std=c99 // #include "bls_thresholdsign_include.h" import "C" @@ -46,6 +42,8 @@ type blsThresholdSignatureParticipant struct { myPrivateKey PrivateKey } +var _ ThresholdSignatureParticipant = (*blsThresholdSignatureParticipant)(nil) + // blsThresholdSignatureInspector implements ThresholdSignatureInspector // based on the BLS signature scheme type blsThresholdSignatureInspector struct { @@ -72,6 +70,8 @@ type blsThresholdSignatureInspector struct { lock sync.RWMutex } +var _ ThresholdSignatureInspector = (*blsThresholdSignatureInspector)(nil) + // NewBLSThresholdSignatureParticipant creates a new instance of Threshold signature Participant using BLS. // A participant is able to participate in a threshold signing protocol as well as following the // protocol. @@ -82,8 +82,8 @@ type blsThresholdSignatureInspector struct { // participant is indexed by `myIndex` and holds the input private key // where n is the length of the public key shares slice. // -// The function returns -// - (nil, invalidInputsError) if: +// The function returns: +// - (nil, invalidInputsError) if: // - n is not in [`ThresholdSignMinSize`, `ThresholdSignMaxSize`] // - threshold value is not in interval [1, n-1] // - input private key and public key at my index do not match @@ -138,8 +138,8 @@ func NewBLSThresholdSignatureParticipant( // Participants are defined by their public key share, and are indexed from 0 to n-1 // where n is the length of the public key shares slice. // -// The function returns -// - (nil, invalidInputsError) if: +// The function returns: +// - (nil, invalidInputsError) if: // - n is not in [`ThresholdSignMinSize`, `ThresholdSignMaxSize`] // - threshold value is not in interval [1, n-1] // - (nil, notBLSKeyError) at least one public key is not of type pubKeyBLSBLS12381 @@ -402,24 +402,21 @@ func (s *blsThresholdSignatureInspector) reconstructThresholdSignature() (Signat return nil, notEnoughSharesErrorf("number of signature shares %d is not enough, %d are required", len(s.shares), s.threshold+1) } - thresholdSignature := make([]byte, signatureLengthBLSBLS12381) + thresholdSignature := make([]byte, SignatureLenBLSBLS12381) // prepare the C layer inputs - shares := make([]byte, 0, len(s.shares)*signatureLengthBLSBLS12381) + shares := make([]byte, 0, len(s.shares)*SignatureLenBLSBLS12381) signers := make([]index, 0, len(s.shares)) for index, share := range s.shares { shares = append(shares, share...) - signers = append(signers, index) + signers = append(signers, index+1) } - // set BLS settings - blsInstance.reInit() - // Lagrange Interpolate at point 0 - result := C.G1_lagrangeInterpolateAtZero( + result := C.E1_lagrange_interpolate_at_zero_write( (*C.uchar)(&thresholdSignature[0]), (*C.uchar)(&shares[0]), - (*C.uint8_t)(&signers[0]), (C.int)(s.threshold+1)) + (*C.uint8_t)(&signers[0]), (C.int)(s.threshold)) if result != valid { return nil, invalidSignatureError @@ -443,10 +440,14 @@ func (s *blsThresholdSignatureInspector) reconstructThresholdSignature() (Signat // // size is the number of participants, it must be in the range [ThresholdSignMinSize..ThresholdSignMaxSize]. // threshold is the threshold value, it must be in the range [MinimumThreshold..size-1]. -// The function does not check the validity of the shares, and does not check -// the validity of the resulting signature. +// The function does not accept any input public key. Therefore, it does not check the validity of the +// shares against individual public keys, and does not check the validity of the resulting signature +// against the group public key. // BLSReconstructThresholdSignature returns: -// - (nil, error) if the inputs are not in the correct range, if the threshold is not reached +// - (nil, invalidInputsError) if : +// -- numbers of shares does not match the number of signers +// -- the inputs are not in the correct range. +// - (nil, notEnoughSharesError) if the threshold is not reached. // - (nil, duplicatedSignerError) if input signers are not distinct. // - (nil, invalidSignatureError) if at least one of the first (threshold+1) signatures. // does not serialize to a valid E1 point. @@ -456,8 +457,6 @@ func (s *blsThresholdSignatureInspector) reconstructThresholdSignature() (Signat // are considered to reconstruct the signature. func BLSReconstructThresholdSignature(size int, threshold int, shares []Signature, signers []int) (Signature, error) { - // set BLS settings - blsInstance.reInit() if size < ThresholdSignMinSize || size > ThresholdSignMaxSize { return nil, invalidInputsErrorf( @@ -478,15 +477,15 @@ func BLSReconstructThresholdSignature(size int, threshold int, } if len(shares) < threshold+1 { - return nil, invalidInputsErrorf( - "the number of signatures does not reach the threshold") + return nil, notEnoughSharesErrorf( + "the number of signatures %d is less than the minimum %d", len(shares), threshold+1) } // map to check signers are distinct m := make(map[index]bool) // flatten the shares (required by the C layer) - flatShares := make([]byte, 0, signatureLengthBLSBLS12381*(threshold+1)) + flatShares := make([]byte, 0, SignatureLenBLSBLS12381*(threshold+1)) indexSigners := make([]index, 0, threshold+1) for i, share := range shares { flatShares = append(flatShares, share...) @@ -501,15 +500,15 @@ func BLSReconstructThresholdSignature(size int, threshold int, "%d is a duplicate signer", index(signers[i])) } m[index(signers[i])] = true - indexSigners = append(indexSigners, index(signers[i])) + indexSigners = append(indexSigners, index(signers[i])+1) } - thresholdSignature := make([]byte, signatureLengthBLSBLS12381) + thresholdSignature := make([]byte, SignatureLenBLSBLS12381) // Lagrange Interpolate at point 0 - if C.G1_lagrangeInterpolateAtZero( + if C.E1_lagrange_interpolate_at_zero_write( (*C.uchar)(&thresholdSignature[0]), (*C.uchar)(&flatShares[0]), - (*C.uint8_t)(&indexSigners[0]), (C.int)(threshold+1), + (*C.uint8_t)(&indexSigners[0]), (C.int)(threshold), ) != valid { return nil, invalidSignatureError } @@ -536,13 +535,15 @@ func EnoughShares(threshold int, sharesNumber int) (bool, error) { // BLSThresholdKeyGen is a key generation for a BLS-based // threshold signature scheme with a trusted dealer. // -// The function returns : -// - (nil, nil, nil, invalidInputsErrorf) if: +// The function returns: +// - (nil, nil, nil, invalidInputsErrorf) if: +// - seed is too short // - n is not in [`ThresholdSignMinSize`, `ThresholdSignMaxSize`] // - threshold value is not in interval [1, n-1] // - (groupPrivKey, []pubKeyShares, groupPubKey, nil) otherwise func BLSThresholdKeyGen(size int, threshold int, seed []byte) ([]PrivateKey, []PublicKey, PublicKey, error) { + if size < ThresholdSignMinSize || size > ThresholdSignMaxSize { return nil, nil, nil, invalidInputsErrorf( "size should be between %d and %d, got %d", @@ -558,33 +559,23 @@ func BLSThresholdKeyGen(size int, threshold int, seed []byte) ([]PrivateKey, threshold) } - // set BLS settings - blsInstance.reInit() - // the scalars x and G2 points y x := make([]scalar, size) - y := make([]pointG2, size) - var X0 pointG2 - - // seed relic - if err := seedRelic(seed); err != nil { - return nil, nil, nil, fmt.Errorf("seeding relic failed: %w", err) - } - // Generate a polynomial P in Zr[X] of degree t - a := make([]scalar, threshold+1) - randZrStar(&a[0]) // non-identity key - if threshold > 0 { - for i := 1; i < threshold; i++ { - randZr(&a[i]) - } - randZrStar(&a[threshold]) // enforce the polynomial degree + y := make([]pointE2, size) + var X0 pointE2 + + // Generate a polynomial P in Fr[X] of degree t + a, err := generateFrPolynomial(seed, threshold) + if err != nil { + return nil, nil, nil, fmt.Errorf("failed to generate random polynomial: %w", err) } + // compute the shares for i := index(1); int(i) <= size; i++ { - C.Zr_polynomialImage( - (*C.bn_st)(&x[i-1]), - (*C.ep2_st)(&y[i-1]), - (*C.bn_st)(&a[0]), (C.int)(len(a)), + C.Fr_polynomial_image( + (*C.Fr)(&x[i-1]), + (*C.E2)(&y[i-1]), + (*C.Fr)(&a[0]), (C.int)(len(a)-1), (C.uint8_t)(i), ) } diff --git a/crypto/bls_thresholdsign_core.c b/crypto/bls_thresholdsign_core.c index dc57355df47..7c1d809d228 100644 --- a/crypto/bls_thresholdsign_core.c +++ b/crypto/bls_thresholdsign_core.c @@ -1,123 +1,118 @@ -// +build relic - #include "bls_thresholdsign_include.h" -// Computes the Lagrange coefficient L(i+1) at 0 with regards to the range [signers(0)+1..signers(t)+1] -// and stores it in res, where t is the degree of the polynomial P -static void Zr_lagrangeCoefficientAtZero(bn_t res, const int i, const uint8_t* signers, const int len){ - // r is the order of G1 and G2 - bn_t r, r_2; - bn_new(r); - g2_get_ord(r); - // (r-2) is needed to compute the inverse in Zr - // using little Fermat theorem - bn_new(r_2); - bn_sub_dig(r_2, r, 2); - //#define MOD_METHOD MONTY - #define MOD_METHOD BASIC +// the highest index of a threshold participant +#define MAX_IND 255 +#define MAX_IND_BITS 8 // equal to ceiling(log_2(MAX_IND)) + +// Computes the Lagrange coefficient L_i(0) in Fr with regards to the range +// [indices(0)..indices(t)] and stores it in `res`, where t is the degree of the +// polynomial P. +// `degree` is equal to the polynomial degree `t`. +static void Fr_lagrange_coeff_at_zero(Fr *res, const int i, + const byte indices[], const int degree) { - #if MOD_METHOD == MONTY - bn_t u; - bn_new(u) - // Montgomery reduction constant - // TODO: hardcode u - bn_mod_pre_monty(u, r); - #endif + // coefficient is computed as N * D^(-1) + Fr numerator; // eventually would represent N*R^k + Fr denominator; // eventually would represent D*R^k - // temp buffers - bn_t acc, inv, base, numerator; - bn_new(inv); - bn_new(base); - bn_new_size(base, BITS_TO_DIGITS(Fr_BITS)) - bn_new(acc); - bn_new(numerator); - bn_new_size(acc, BITS_TO_DIGITS(3*Fr_BITS)); + // Initialize N and D to Montgomery constant R + Fr_copy(&numerator, &BLS12_381_rR); + Fr_copy(&denominator, &BLS12_381_rR); - // the accumulator of the largarnge coeffiecient - // the sign (sign of acc) is equal to 1 if acc is positive, 0 otherwise - bn_set_dig(acc, 1); - int sign = 1; + // sign of D: 0 for positive and 1 for negative + int sign = 0; - // loops is the maximum number of loops that takes the accumulator to - // overflow modulo r, mainly the highest k such that fact(MAX_IND)/fact(MAX_IND-k) < r - const int loops = MAX_IND_LOOPS; - int k,j = 0; - while (j/src/*.c` and `/src/*.h` files (C source files) but `server.c`. +- `server.c` is replaced by `./blst_src.c` (which lists only the files needed by Flow crypto). +- all `/build` (assembly generated files). +- this `README` file. + +To upgrade the BLST version: +- [ ] audit all BLST updates, with focus on `/src`: https://github.com/supranational/blst/compare/v0.3.11... +- [ ] delete all files in this folder `./blst_src/` but `blst_src.c` and `README.md`. +- [ ] delete all files in `./internal/blst/`. +- [ ] open BLST repository on the new version. +- [ ] copy all `.c` and `.h` files from `/src/` into `./blst_src/`. +- [ ] delete newly copied `./blst_src/server.c`. +- [ ] copy the folder `/build/` into this folder `./blst_src`. +- [ ] copy `/bindings/blst.h`, `/bindings/blst_aux.h`, and `/bindings/go/blst.go` into `./internal/blst/.`. +- [ ] check that C flags in `./bls12381_utils.go` still include the C flags in `/bindings/go/blst.go`. +- [ ] update `./blst_src/blst_src.c` if needed. +- [ ] solve all breaking changes that may occur. +- [ ] update the commit version on this `./blst_src/README`. + +Note that Flow crypto is using non exported internal functions from BLST. Checking for interfaces breaking changes in BLST should be done along with auditing changes between the old and new versions. This includes checking logical changes and assumptions beyond interfaces, and assessing their security and performance impact on protocols implemented in Flow crypto. diff --git a/crypto/blst_src/aggregate.c b/crypto/blst_src/aggregate.c new file mode 100644 index 00000000000..ca78876acad --- /dev/null +++ b/crypto/blst_src/aggregate.c @@ -0,0 +1,673 @@ +/* + * Copyright Supranational LLC + * Licensed under the Apache License, Version 2.0, see LICENSE for details. + * SPDX-License-Identifier: Apache-2.0 + */ + +/* + * Usage pattern on single-processor system is + * + * blst_pairing_init(ctx, hash_or_encode, DST); + * blst_pairing_aggregate_pk_in_g1(ctx, PK[0], aggregated_signature, msg[0]); + * blst_pairing_aggregate_pk_in_g1(ctx, PK[1], NULL, msg[1]); + * ... + * blst_pairing_commit(ctx); + * blst_pairing_finalverify(ctx, NULL); + * + *********************************************************************** + * Usage pattern on multi-processor system is + * + * blst_pairing_init(pk[0], hash_or_encode, DST); + * blst_pairing_init(pk[1], hash_or_encode, DST); + * ... + * start threads each processing an N/nthreads slice of PKs and messages: + * blst_pairing_aggregate_pk_in_g1(pk[i], PK[i*n+0], NULL, msg[i*n+0]); + * blst_pairing_aggregate_pk_in_g1(pk[i], PK[i*n+1], NULL, msg[i*n+1]); + * ... + * blst_pairing_commit(pkx); + * ... + * meanwhile in main thread + * blst_fp12 gtsig; + * blst_aggregated_in_g2(>sig, aggregated_signature); + * join threads and merge their contexts: + * blst_pairing_merge(pk[0], pk[1]); + * blst_pairing_merge(pk[0], pk[2]); + * ... + * blst_pairing_finalverify(pk[0], gtsig); + */ + +#ifndef N_MAX +# define N_MAX 8 +#endif + +typedef union { POINTonE1 e1; POINTonE2 e2; } AggregatedSignature; +typedef struct { + unsigned int ctrl; + unsigned int nelems; + const void *DST; + size_t DST_len; + vec384fp12 GT; + AggregatedSignature AggrSign; + POINTonE2_affine Q[N_MAX]; + POINTonE1_affine P[N_MAX]; +} PAIRING; + +enum { AGGR_UNDEFINED = 0, + AGGR_MIN_SIG = 1, + AGGR_MIN_PK = 2, + AGGR_SIGN_SET = 0x10, + AGGR_GT_SET = 0x20, + AGGR_HASH_OR_ENCODE = 0x40 }; +#define MIN_SIG_OR_PK (AGGR_MIN_SIG | AGGR_MIN_PK) + +static const size_t sizeof_pairing = (sizeof(PAIRING) + 7) & ~(size_t)7; + +size_t blst_pairing_sizeof(void) +{ return sizeof_pairing; } + +void blst_pairing_init(PAIRING *ctx, int hash_or_encode, + const void *DST, size_t DST_len) +{ + ctx->ctrl = AGGR_UNDEFINED | (hash_or_encode ? AGGR_HASH_OR_ENCODE : 0); + ctx->nelems = 0; + ctx->DST = (uptr_t)DST==(uptr_t)((byte *)ctx+sizeof_pairing) ? (void *)42 + : DST; + ctx->DST_len = DST_len; +} + +static const void *pairing_get_dst(const PAIRING *ctx) +{ return (uptr_t)ctx->DST==(uptr_t)42 ? (const byte *)ctx+sizeof_pairing + : ctx->DST; +} + +const void *blst_pairing_get_dst(const PAIRING *ctx) +{ return pairing_get_dst(ctx); } + +#define FROM_AFFINE(out,in) do { \ + vec_copy((out)->X, in->X, 2*sizeof(in->X)), \ + vec_select((out)->Z, in->X, BLS12_381_Rx.p, sizeof(in->X), \ + vec_is_zero(in->X, 2*sizeof(in->X))); } while(0) + +/* + * Optional |nbits|-wide |scalar| is used to facilitate multiple aggregated + * signature verification as discussed at + * https://ethresear.ch/t/fast-verification-of-multiple-bls-signatures/5407. + * Usage pattern is not finalized yet, because (sig != NULL) is better and + * will be handled separately... + */ +static BLST_ERROR PAIRING_Aggregate_PK_in_G2(PAIRING *ctx, + const POINTonE2_affine *PK, + size_t pk_groupcheck, + const POINTonE1_affine *sig, + size_t sig_groupcheck, + const byte *scalar, size_t nbits, + const void *msg, size_t msg_len, + const void *aug, size_t aug_len) +{ + if (ctx->ctrl & AGGR_MIN_PK) + return BLST_AGGR_TYPE_MISMATCH; + + ctx->ctrl |= AGGR_MIN_SIG; + + /* + * Since we don't know if the signature is individual or aggregated, + * the only sensible thing to do is to skip over infinite one and + * count on the corresponding infinite public key to be rejected, + * in case the signature is non-aggregated that is. + */ + if (sig != NULL && !vec_is_zero(sig, sizeof(*sig))) { + POINTonE1 *S = &ctx->AggrSign.e1; + POINTonE1 P[1]; + + FROM_AFFINE(P, sig); + + if (sig_groupcheck && !POINTonE1_in_G1(P)) + return BLST_POINT_NOT_IN_GROUP; + + if (ctx->ctrl & AGGR_SIGN_SET) { + if (nbits != 0 && scalar != NULL) { + POINTonE1_mult_w5(P, P, scalar, nbits); + POINTonE1_dadd(S, S, P, NULL); + } else { + POINTonE1_dadd_affine(S, S, sig); + } + } else { + ctx->ctrl |= AGGR_SIGN_SET; + if (nbits != 0 && scalar != NULL) + POINTonE1_mult_w5(S, P, scalar, nbits); + else + vec_copy(S, P, sizeof(P)); + } + } + + if (PK != NULL) { + unsigned int n; + POINTonE1 H[1]; + const void *DST = pairing_get_dst(ctx); + + /* + * Reject infinite public keys. + */ + if (vec_is_zero(PK, sizeof(*PK))) + return BLST_PK_IS_INFINITY; + + if (pk_groupcheck) { + POINTonE2 P[1]; + + FROM_AFFINE(P, PK); + if (!POINTonE2_in_G2(P)) + return BLST_POINT_NOT_IN_GROUP; + } + + if (ctx->ctrl & AGGR_HASH_OR_ENCODE) + Hash_to_G1(H, msg, msg_len, DST, ctx->DST_len, aug, aug_len); + else + Encode_to_G1(H, msg, msg_len, DST, ctx->DST_len, aug, aug_len); + + if (nbits != 0 && scalar != NULL) + POINTonE1_mult_w5(H, H, scalar, nbits); + + POINTonE1_from_Jacobian(H, H); + + n = ctx->nelems; + vec_copy(ctx->Q + n, PK, sizeof(POINTonE2_affine)); + vec_copy(ctx->P + n, H, sizeof(POINTonE1_affine)); + if (++n == N_MAX) { + if (ctx->ctrl & AGGR_GT_SET) { + vec384fp12 GT; + miller_loop_n(GT, ctx->Q, ctx->P, n); + mul_fp12(ctx->GT, ctx->GT, GT); + } else { + miller_loop_n(ctx->GT, ctx->Q, ctx->P, n); + ctx->ctrl |= AGGR_GT_SET; + } + n = 0; + } + ctx->nelems = n; + } + + return BLST_SUCCESS; +} + +BLST_ERROR blst_pairing_aggregate_pk_in_g2(PAIRING *ctx, + const POINTonE2_affine *PK, + const POINTonE1_affine *signature, + const void *msg, size_t msg_len, + const void *aug, size_t aug_len) +{ return PAIRING_Aggregate_PK_in_G2(ctx, PK, 0, signature, 1, NULL, 0, + msg, msg_len, aug, aug_len); +} + +BLST_ERROR blst_pairing_mul_n_aggregate_pk_in_g2(PAIRING *ctx, + const POINTonE2_affine *PK, + const POINTonE1_affine *sig, + const byte *scalar, + size_t nbits, + const void *msg, + size_t msg_len, + const void *aug, + size_t aug_len) +{ return PAIRING_Aggregate_PK_in_G2(ctx, PK, 0, sig, 1, scalar, nbits, + msg, msg_len, aug, aug_len); +} + +BLST_ERROR blst_pairing_chk_n_aggr_pk_in_g2(PAIRING *ctx, + const POINTonE2_affine *PK, + size_t pk_grpchk, + const POINTonE1_affine *signature, + size_t sig_grpchk, + const void *msg, size_t msg_len, + const void *aug, size_t aug_len) +{ return PAIRING_Aggregate_PK_in_G2(ctx, PK, pk_grpchk, signature, sig_grpchk, + NULL, 0, msg, msg_len, aug, aug_len); +} + +BLST_ERROR blst_pairing_chk_n_mul_n_aggr_pk_in_g2(PAIRING *ctx, + const POINTonE2_affine *PK, + size_t pk_grpchk, + const POINTonE1_affine *sig, + size_t sig_grpchk, + const byte *scalar, + size_t nbits, + const void *msg, + size_t msg_len, + const void *aug, + size_t aug_len) +{ return PAIRING_Aggregate_PK_in_G2(ctx, PK, pk_grpchk, sig, sig_grpchk, + scalar, nbits, + msg, msg_len, aug, aug_len); +} + +static BLST_ERROR PAIRING_Aggregate_PK_in_G1(PAIRING *ctx, + const POINTonE1_affine *PK, + size_t pk_groupcheck, + const POINTonE2_affine *sig, + size_t sig_groupcheck, + const byte *scalar, size_t nbits, + const void *msg, size_t msg_len, + const void *aug, size_t aug_len) +{ + if (ctx->ctrl & AGGR_MIN_SIG) + return BLST_AGGR_TYPE_MISMATCH; + + ctx->ctrl |= AGGR_MIN_PK; + + /* + * Since we don't know if the signature is individual or aggregated, + * the only sensible thing to do is to skip over infinite one and + * count on the corresponding infinite public key to be rejected, + * in case the signature is non-aggregated that is. + */ + if (sig != NULL && !vec_is_zero(sig, sizeof(*sig))) { + POINTonE2 *S = &ctx->AggrSign.e2; + POINTonE2 P[1]; + + FROM_AFFINE(P, sig); + + if (sig_groupcheck && !POINTonE2_in_G2(P)) + return BLST_POINT_NOT_IN_GROUP; + + if (ctx->ctrl & AGGR_SIGN_SET) { + if (nbits != 0 && scalar != NULL) { + + POINTonE2_mult_w5(P, P, scalar, nbits); + POINTonE2_dadd(S, S, P, NULL); + } else { + POINTonE2_dadd_affine(S, S, sig); + } + } else { + ctx->ctrl |= AGGR_SIGN_SET; + if (nbits != 0 && scalar != NULL) + POINTonE2_mult_w5(S, P, scalar, nbits); + else + vec_copy(S, P, sizeof(P)); + } + } + + if (PK != NULL) { + unsigned int n; + POINTonE2 H[1]; + POINTonE1 pk[1]; + const void *DST = pairing_get_dst(ctx); + + /* + * Reject infinite public keys. + */ + if (vec_is_zero(PK, sizeof(*PK))) + return BLST_PK_IS_INFINITY; + + if (pk_groupcheck) { + POINTonE1 P[1]; + + FROM_AFFINE(P, PK); + if (!POINTonE1_in_G1(P)) + return BLST_POINT_NOT_IN_GROUP; + } + + if (ctx->ctrl & AGGR_HASH_OR_ENCODE) + Hash_to_G2(H, msg, msg_len, DST, ctx->DST_len, aug, aug_len); + else + Encode_to_G2(H, msg, msg_len, DST, ctx->DST_len, aug, aug_len); + + POINTonE2_from_Jacobian(H, H); + + if (nbits != 0 && scalar != NULL) { + FROM_AFFINE(pk, PK); + POINTonE1_mult_w5(pk, pk, scalar, nbits); + POINTonE1_from_Jacobian(pk, pk); + PK = (const POINTonE1_affine *)pk; + } + + n = ctx->nelems; + vec_copy(ctx->Q + n, H, sizeof(POINTonE2_affine)); + vec_copy(ctx->P + n, PK, sizeof(POINTonE1_affine)); + if (++n == N_MAX) { + if (ctx->ctrl & AGGR_GT_SET) { + vec384fp12 GT; + miller_loop_n(GT, ctx->Q, ctx->P, n); + mul_fp12(ctx->GT, ctx->GT, GT); + } else { + miller_loop_n(ctx->GT, ctx->Q, ctx->P, n); + ctx->ctrl |= AGGR_GT_SET; + } + n = 0; + } + ctx->nelems = n; + } + + return BLST_SUCCESS; +} + +BLST_ERROR blst_pairing_aggregate_pk_in_g1(PAIRING *ctx, + const POINTonE1_affine *PK, + const POINTonE2_affine *signature, + const void *msg, size_t msg_len, + const void *aug, size_t aug_len) +{ return PAIRING_Aggregate_PK_in_G1(ctx, PK, 0, signature, 1, NULL, 0, + msg, msg_len, aug, aug_len); +} + +BLST_ERROR blst_pairing_mul_n_aggregate_pk_in_g1(PAIRING *ctx, + const POINTonE1_affine *PK, + const POINTonE2_affine *sig, + const byte *scalar, + size_t nbits, + const void *msg, + size_t msg_len, + const void *aug, + size_t aug_len) +{ return PAIRING_Aggregate_PK_in_G1(ctx, PK, 0, sig, 1, scalar, nbits, + msg, msg_len, aug, aug_len); +} + +BLST_ERROR blst_pairing_chk_n_aggr_pk_in_g1(PAIRING *ctx, + const POINTonE1_affine *PK, + size_t pk_grpchk, + const POINTonE2_affine *signature, + size_t sig_grpchk, + const void *msg, size_t msg_len, + const void *aug, size_t aug_len) +{ return PAIRING_Aggregate_PK_in_G1(ctx, PK, pk_grpchk, signature, sig_grpchk, + NULL, 0, msg, msg_len, aug, aug_len); +} + +BLST_ERROR blst_pairing_chk_n_mul_n_aggr_pk_in_g1(PAIRING *ctx, + const POINTonE1_affine *PK, + size_t pk_grpchk, + const POINTonE2_affine *sig, + size_t sig_grpchk, + const byte *scalar, + size_t nbits, + const void *msg, + size_t msg_len, + const void *aug, + size_t aug_len) +{ return PAIRING_Aggregate_PK_in_G1(ctx, PK, pk_grpchk, sig, sig_grpchk, + scalar, nbits, + msg, msg_len, aug, aug_len); +} + +static void PAIRING_Commit(PAIRING *ctx) +{ + unsigned int n; + + if ((n = ctx->nelems) != 0) { + if (ctx->ctrl & AGGR_GT_SET) { + vec384fp12 GT; + miller_loop_n(GT, ctx->Q, ctx->P, n); + mul_fp12(ctx->GT, ctx->GT, GT); + } else { + miller_loop_n(ctx->GT, ctx->Q, ctx->P, n); + ctx->ctrl |= AGGR_GT_SET; + } + ctx->nelems = 0; + } +} + +void blst_pairing_commit(PAIRING *ctx) +{ PAIRING_Commit(ctx); } + +BLST_ERROR blst_pairing_merge(PAIRING *ctx, const PAIRING *ctx1) +{ + if ((ctx->ctrl & MIN_SIG_OR_PK) != AGGR_UNDEFINED + && (ctx1->ctrl & MIN_SIG_OR_PK) != AGGR_UNDEFINED + && (ctx->ctrl & ctx1->ctrl & MIN_SIG_OR_PK) == 0) + return BLST_AGGR_TYPE_MISMATCH; + + /* context producers are expected to have called blst_pairing_commit */ + if (ctx->nelems || ctx1->nelems) + return BLST_AGGR_TYPE_MISMATCH; + + ctx->ctrl |= ctx1->ctrl & MIN_SIG_OR_PK; + + switch (ctx->ctrl & MIN_SIG_OR_PK) { + case AGGR_MIN_SIG: + if (ctx->ctrl & ctx1->ctrl & AGGR_SIGN_SET) { + POINTonE1_dadd(&ctx->AggrSign.e1, &ctx->AggrSign.e1, + &ctx1->AggrSign.e1, NULL); + } else if (ctx1->ctrl & AGGR_SIGN_SET) { + ctx->ctrl |= AGGR_SIGN_SET; + vec_copy(&ctx->AggrSign.e1, &ctx1->AggrSign.e1, + sizeof(ctx->AggrSign.e1)); + } + break; + case AGGR_MIN_PK: + if (ctx->ctrl & ctx1->ctrl & AGGR_SIGN_SET) { + POINTonE2_dadd(&ctx->AggrSign.e2, &ctx->AggrSign.e2, + &ctx1->AggrSign.e2, NULL); + } else if (ctx1->ctrl & AGGR_SIGN_SET) { + ctx->ctrl |= AGGR_SIGN_SET; + vec_copy(&ctx->AggrSign.e2, &ctx1->AggrSign.e2, + sizeof(ctx->AggrSign.e2)); + } + break; + case AGGR_UNDEFINED: + break; + default: + return BLST_AGGR_TYPE_MISMATCH; + } + + if (ctx->ctrl & ctx1->ctrl & AGGR_GT_SET) { + mul_fp12(ctx->GT, ctx->GT, ctx1->GT); + } else if (ctx1->ctrl & AGGR_GT_SET) { + ctx->ctrl |= AGGR_GT_SET; + vec_copy(ctx->GT, ctx1->GT, sizeof(ctx->GT)); + } + + return BLST_SUCCESS; +} + +static bool_t PAIRING_FinalVerify(const PAIRING *ctx, const vec384fp12 GTsig) +{ + vec384fp12 GT; + + if (!(ctx->ctrl & AGGR_GT_SET)) + return 0; + + if (GTsig != NULL) { + vec_copy(GT, GTsig, sizeof(GT)); + } else if (ctx->ctrl & AGGR_SIGN_SET) { + AggregatedSignature AggrSign; + + switch (ctx->ctrl & MIN_SIG_OR_PK) { + case AGGR_MIN_SIG: + POINTonE1_from_Jacobian(&AggrSign.e1, &ctx->AggrSign.e1); + miller_loop_n(GT, (const POINTonE2_affine *)&BLS12_381_G2, + (const POINTonE1_affine *)&AggrSign.e1, 1); + break; + case AGGR_MIN_PK: + POINTonE2_from_Jacobian(&AggrSign.e2, &ctx->AggrSign.e2); + miller_loop_n(GT, (const POINTonE2_affine *)&AggrSign.e2, + (const POINTonE1_affine *)&BLS12_381_G1, 1); + break; + default: + return 0; + } + } else { + /* + * The aggregated signature was infinite, relation between the + * hashes and the public keys has to be VERY special... + */ + vec_copy(GT, BLS12_381_Rx.p12, sizeof(GT)); + } + + conjugate_fp12(GT); + mul_fp12(GT, GT, ctx->GT); + final_exp(GT, GT); + + /* return GT==1 */ + return vec_is_equal(GT[0][0], BLS12_381_Rx.p2, sizeof(GT[0][0])) & + vec_is_zero(GT[0][1], sizeof(GT) - sizeof(GT[0][0])); +} + +int blst_pairing_finalverify(const PAIRING *ctx, const vec384fp12 GTsig) +{ return (int)PAIRING_FinalVerify(ctx, GTsig); } + +int blst_fp12_finalverify(const vec384fp12 GT1, const vec384fp12 GT2) +{ + vec384fp12 GT; + + vec_copy(GT, GT1, sizeof(GT)); + conjugate_fp12(GT); + mul_fp12(GT, GT, GT2); + final_exp(GT, GT); + + /* return GT==1 */ + return (int)(vec_is_equal(GT[0][0], BLS12_381_Rx.p2, sizeof(GT[0][0])) & + vec_is_zero(GT[0][1], sizeof(GT) - sizeof(GT[0][0]))); +} + +void blst_pairing_raw_aggregate(PAIRING *ctx, const POINTonE2_affine *q, + const POINTonE1_affine *p) +{ + unsigned int n; + + if (vec_is_zero(q, sizeof(*q)) & vec_is_zero(p, sizeof(*p))) + return; + + n = ctx->nelems; + vec_copy(ctx->Q + n, q, sizeof(*q)); + vec_copy(ctx->P + n, p, sizeof(*p)); + if (++n == N_MAX) { + if (ctx->ctrl & AGGR_GT_SET) { + vec384fp12 GT; + miller_loop_n(GT, ctx->Q, ctx->P, n); + mul_fp12(ctx->GT, ctx->GT, GT); + } else { + miller_loop_n(ctx->GT, ctx->Q, ctx->P, n); + ctx->ctrl |= AGGR_GT_SET; + } + n = 0; + } + ctx->nelems = n; +} + +vec384fp12 *blst_pairing_as_fp12(PAIRING *ctx) +{ + PAIRING_Commit(ctx); + return (vec384fp12 *)ctx->GT; +} + +/* + * PAIRING context-free entry points. + * + * To perform FastAggregateVerify, aggregate all public keys and + * signatures with corresponding blst_aggregate_in_g{12}, convert + * result to affine and call suitable blst_core_verify_pk_in_g{12} + * or blst_aggregated_in_g{12}... + */ +BLST_ERROR blst_aggregate_in_g1(POINTonE1 *out, const POINTonE1 *in, + const unsigned char *zwire) +{ + POINTonE1 P[1]; + BLST_ERROR ret; + + ret = POINTonE1_Deserialize_Z((POINTonE1_affine *)P, zwire); + + if (ret != BLST_SUCCESS) + return ret; + + if (vec_is_zero(P, sizeof(POINTonE1_affine))) { + if (in == NULL) + vec_zero(out, sizeof(*out)); + return BLST_SUCCESS; + } + + vec_copy(P->Z, BLS12_381_Rx.p, sizeof(P->Z)); + + if (!POINTonE1_in_G1(P)) + return BLST_POINT_NOT_IN_GROUP; + + if (in == NULL) + vec_copy(out, P, sizeof(P)); + else + POINTonE1_dadd_affine(out, in, (POINTonE1_affine *)P); + + return BLST_SUCCESS; +} + +BLST_ERROR blst_aggregate_in_g2(POINTonE2 *out, const POINTonE2 *in, + const unsigned char *zwire) +{ + POINTonE2 P[1]; + BLST_ERROR ret; + + ret = POINTonE2_Deserialize_Z((POINTonE2_affine *)P, zwire); + + if (ret != BLST_SUCCESS) + return ret; + + if (vec_is_zero(P, sizeof(POINTonE2_affine))) { + if (in == NULL) + vec_zero(out, sizeof(*out)); + return BLST_SUCCESS; + } + + vec_copy(P->Z, BLS12_381_Rx.p, sizeof(P->Z)); + + if (!POINTonE2_in_G2(P)) + return BLST_POINT_NOT_IN_GROUP; + + if (in == NULL) { + vec_copy(out, P, sizeof(P)); + } else { + POINTonE2_dadd_affine(out, in, (POINTonE2_affine *)P); + } + return BLST_SUCCESS; +} + +void blst_aggregated_in_g1(vec384fp12 ret, const POINTonE1_affine *sig) +{ miller_loop_n(ret, (const POINTonE2_affine *)&BLS12_381_G2, sig, 1); } + +void blst_aggregated_in_g2(vec384fp12 ret, const POINTonE2_affine *sig) +{ miller_loop_n(ret, sig, (const POINTonE1_affine *)&BLS12_381_G1, 1); } + +BLST_ERROR blst_core_verify_pk_in_g1(const POINTonE1_affine *pk, + const POINTonE2_affine *signature, + int hash_or_encode, + const void *msg, size_t msg_len, + const void *DST, size_t DST_len, + const void *aug, size_t aug_len) +{ + PAIRING ctx; + BLST_ERROR ret; + + ctx.ctrl = AGGR_UNDEFINED | (hash_or_encode ? AGGR_HASH_OR_ENCODE : 0); + ctx.nelems = 0; + ctx.DST = DST; + ctx.DST_len = DST_len; + + ret = PAIRING_Aggregate_PK_in_G1(&ctx, pk, 1, signature, 1, NULL, 0, + msg, msg_len, aug, aug_len); + if (ret != BLST_SUCCESS) + return ret; + + PAIRING_Commit(&ctx); + + return PAIRING_FinalVerify(&ctx, NULL) ? BLST_SUCCESS : BLST_VERIFY_FAIL; +} + +BLST_ERROR blst_core_verify_pk_in_g2(const POINTonE2_affine *pk, + const POINTonE1_affine *signature, + int hash_or_encode, + const void *msg, size_t msg_len, + const void *DST, size_t DST_len, + const void *aug, size_t aug_len) +{ + PAIRING ctx; + BLST_ERROR ret; + + ctx.ctrl = AGGR_UNDEFINED | (hash_or_encode ? AGGR_HASH_OR_ENCODE : 0); + ctx.nelems = 0; + ctx.DST = DST; + ctx.DST_len = DST_len; + + ret = PAIRING_Aggregate_PK_in_G2(&ctx, pk, 1, signature, 1, NULL, 0, + msg, msg_len, aug, aug_len); + if (ret != BLST_SUCCESS) + return ret; + + PAIRING_Commit(&ctx); + + return PAIRING_FinalVerify(&ctx, NULL) ? BLST_SUCCESS : BLST_VERIFY_FAIL; +} diff --git a/crypto/blst_src/blst_src.c b/crypto/blst_src/blst_src.c new file mode 100644 index 00000000000..9e064657e72 --- /dev/null +++ b/crypto/blst_src/blst_src.c @@ -0,0 +1,24 @@ +// This file contains all BLST lib C files needed for +// Flow crypto. +// +// The list may need to be updated in a new version of BLST is used. + +#include "keygen.c" +#include "hash_to_field.c" +#include "e1.c" +#include "map_to_g1.c" +#include "e2.c" +#include "map_to_g2.c" +#include "fp12_tower.c" +#include "pairing.c" +#include "exp.c" +#include "sqrt.c" +#include "recip.c" +#include "aggregate.c" +#include "bulk_addition.c" +#include "multi_scalar.c" +#include "consts.c" +#include "vect.c" +#include "exports.c" + + diff --git a/crypto/blst_src/build/assembly.S b/crypto/blst_src/build/assembly.S new file mode 100644 index 00000000000..c0c5db30850 --- /dev/null +++ b/crypto/blst_src/build/assembly.S @@ -0,0 +1,116 @@ +#if defined(__x86_64) || defined(__x86_64__) +# if defined(__ELF__) +# if defined(__BLST_PORTABLE__) +# include "elf/sha256-portable-x86_64.s" +# define blst_sha256_block_data_order blst_sha256_block_ssse3 +# endif +# include "elf/sha256-x86_64.s" +# if defined(__ADX__) || defined(__BLST_PORTABLE__) +# include "elf/ctx_inverse_mod_384-x86_64.s" +# endif +# if !defined(__ADX__) || defined(__BLST_PORTABLE__) +# include "elf/ctq_inverse_mod_384-x86_64.s" +# endif +# include "elf/add_mod_384-x86_64.s" +# include "elf/add_mod_384x384-x86_64.s" +# if defined(__ADX__) || defined(__BLST_PORTABLE__) +# include "elf/mulx_mont_384-x86_64.s" +# include "elf/mulx_mont_256-x86_64.s" +# endif +# if !defined(__ADX__) || defined(__BLST_PORTABLE__) +# include "elf/mulq_mont_384-x86_64.s" +# include "elf/mulq_mont_256-x86_64.s" +# endif +# include "elf/add_mod_256-x86_64.s" +# include "elf/ct_inverse_mod_256-x86_64.s" +# include "elf/div3w-x86_64.s" +# include "elf/ct_is_square_mod_384-x86_64.s" +# elif defined(_WIN64) || defined(__CYGWIN__) +# include "coff/sha256-x86_64.s" +# if defined(__ADX__) || defined(__BLST_PORTABLE__) +# include "coff/ctx_inverse_mod_384-x86_64.s" +# endif +# if !defined(__ADX__) || defined(__BLST_PORTABLE__) +# include "coff/ctq_inverse_mod_384-x86_64.s" +# endif +# include "coff/add_mod_384-x86_64.s" +# include "coff/add_mod_384x384-x86_64.s" +# if defined(__ADX__) || defined(__BLST_PORTABLE__) +# include "coff/mulx_mont_384-x86_64.s" +# include "coff/mulx_mont_256-x86_64.s" +# endif +# if !defined(__ADX__) || defined(__BLST_PORTABLE__) +# include "coff/mulq_mont_384-x86_64.s" +# include "coff/mulq_mont_256-x86_64.s" +# endif +# include "coff/add_mod_256-x86_64.s" +# include "coff/ct_inverse_mod_256-x86_64.s" +# include "coff/div3w-x86_64.s" +# include "coff/ct_is_square_mod_384-x86_64.s" +# elif defined(__APPLE__) +# include "mach-o/sha256-x86_64.s" +# if defined(__ADX__) || defined(__BLST_PORTABLE__) +# include "mach-o/ctx_inverse_mod_384-x86_64.s" +# endif +# if !defined(__ADX__) || defined(__BLST_PORTABLE__) +# include "mach-o/ctq_inverse_mod_384-x86_64.s" +# endif +# include "mach-o/add_mod_384-x86_64.s" +# include "mach-o/add_mod_384x384-x86_64.s" +# if defined(__ADX__) || defined(__BLST_PORTABLE__) +# include "mach-o/mulx_mont_384-x86_64.s" +# include "mach-o/mulx_mont_256-x86_64.s" +# endif +# if !defined(__ADX__) || defined(__BLST_PORTABLE__) +# include "mach-o/mulq_mont_384-x86_64.s" +# include "mach-o/mulq_mont_256-x86_64.s" +# endif +# include "mach-o/add_mod_256-x86_64.s" +# include "mach-o/ct_inverse_mod_256-x86_64.s" +# include "mach-o/div3w-x86_64.s" +# include "mach-o/ct_is_square_mod_384-x86_64.s" +# endif +#elif defined(__aarch64__) +# if defined(__ELF__) +# include "elf/sha256-armv8.S" +# include "elf/ct_inverse_mod_384-armv8.S" +# include "elf/add_mod_384-armv8.S" +# define __add_mod_384 __add_mont_384 +# define __sub_mod_384 __sub_mont_384 +# include "elf/mul_mont_384-armv8.S" +# include "elf/mul_mont_256-armv8.S" +# include "elf/add_mod_256-armv8.S" +# include "elf/ct_inverse_mod_256-armv8.S" +# include "elf/div3w-armv8.S" +# include "elf/ct_is_square_mod_384-armv8.S" +# elif defined(_WIN64) +# include "coff/sha256-armv8.S" +# include "coff/ct_inverse_mod_384-armv8.S" +# include "coff/add_mod_384-armv8.S" +# define __add_mod_384 __add_mont_384 +# define __sub_mod_384 __sub_mont_384 +# include "coff/mul_mont_384-armv8.S" +# include "coff/mul_mont_256-armv8.S" +# include "coff/add_mod_256-armv8.S" +# include "coff/ct_inverse_mod_256-armv8.S" +# include "coff/div3w-armv8.S" +# include "coff/ct_is_square_mod_384-armv8.S" +# elif defined(__APPLE__) +# include "mach-o/sha256-armv8.S" +# include "mach-o/ct_inverse_mod_384-armv8.S" +# include "mach-o/add_mod_384-armv8.S" +# define __add_mod_384 __add_mont_384 +# define __sub_mod_384 __sub_mont_384 +# include "mach-o/mul_mont_384-armv8.S" +# include "mach-o/mul_mont_256-armv8.S" +# include "mach-o/add_mod_256-armv8.S" +# include "mach-o/ct_inverse_mod_256-armv8.S" +# include "mach-o/div3w-armv8.S" +# include "mach-o/ct_is_square_mod_384-armv8.S" +# endif +#elif defined(__BLST_NO_ASM__) || \ + (defined(__SIZEOF_POINTER__) && __SIZEOF_POINTER__==4) +/* inaccurate way to detect a 32-bit processor, but it's close enough */ +#else +# error "unsupported platform" +#endif diff --git a/crypto/blst_src/build/bindings_trim.pl b/crypto/blst_src/build/bindings_trim.pl new file mode 100755 index 00000000000..0880352d79e --- /dev/null +++ b/crypto/blst_src/build/bindings_trim.pl @@ -0,0 +1,40 @@ +#!/usr/bin/env perl + +# read whole file +while(<>) { push @file, $_; } + +# traverse and remove auto-generated PartialEq for chosen types +for (my $i = 0; $i <= $#file; $i++) { + if (@file[$i] =~ m/pub\s+(?:struct|enum)\s+(\w+)/) { + push @structs, $1; + } + + if (@file[$i] =~ m/struct\s+blst_p[12]/) { + @file[$i-1] =~ s/,\s*PartialEq//; + } elsif (@file[$i] =~ m/struct\s+blst_fp12/) { + @file[$i-1] =~ s/,\s*(?:Default|PartialEq)//g; + } elsif (@file[$i] =~ m/struct\s+(blst_pairing|blst_uniq)/) { + @file[$i-1] =~ s/,\s*(?:Copy|Clone|Eq|PartialEq)//g; + } elsif (@file[$i] =~ m/struct\s+blst_scalar/) { + @file[$i-1] =~ s/,\s*Copy//; + @file[$i-1] =~ s/\)/, Zeroize\)/; + splice @file, $i, 0, "#[zeroize(drop)]\n"; $i++; + } else { + @file[$i] =~ s/::std::/::core::/g; + } +} + +print @file; + +print << '___'; +#[test] +fn bindgen_test_normal_types() { + // from "Rust for Rustaceans" by Jon Gjengset + fn is_normal() {} +___ +for (@structs) { + print " is_normal::<$_>();\n"; +} +print "}\n"; + +close STDOUT; diff --git a/crypto/blst_src/build/coff/add_mod_256-armv8.S b/crypto/blst_src/build/coff/add_mod_256-armv8.S new file mode 100644 index 00000000000..27b64ef4ca4 --- /dev/null +++ b/crypto/blst_src/build/coff/add_mod_256-armv8.S @@ -0,0 +1,397 @@ +.text + +.globl add_mod_256 + +.def add_mod_256; +.type 32; +.endef +.p2align 5 +add_mod_256: + ldp x8,x9,[x1] + ldp x12,x13,[x2] + + ldp x10,x11,[x1,#16] + adds x8,x8,x12 + ldp x14,x15,[x2,#16] + adcs x9,x9,x13 + ldp x4,x5,[x3] + adcs x10,x10,x14 + ldp x6,x7,[x3,#16] + adcs x11,x11,x15 + adc x3,xzr,xzr + + subs x16,x8,x4 + sbcs x17,x9,x5 + sbcs x1,x10,x6 + sbcs x2,x11,x7 + sbcs xzr,x3,xzr + + csel x8,x8,x16,lo + csel x9,x9,x17,lo + csel x10,x10,x1,lo + stp x8,x9,[x0] + csel x11,x11,x2,lo + stp x10,x11,[x0,#16] + + ret + + +.globl mul_by_3_mod_256 + +.def mul_by_3_mod_256; +.type 32; +.endef +.p2align 5 +mul_by_3_mod_256: + ldp x12,x13,[x1] + ldp x14,x15,[x1,#16] + + adds x8,x12,x12 + ldp x4,x5,[x2] + adcs x9,x13,x13 + ldp x6,x7,[x2,#16] + adcs x10,x14,x14 + adcs x11,x15,x15 + adc x3,xzr,xzr + + subs x16,x8,x4 + sbcs x17,x9,x5 + sbcs x1,x10,x6 + sbcs x2,x11,x7 + sbcs xzr,x3,xzr + + csel x8,x8,x16,lo + csel x9,x9,x17,lo + csel x10,x10,x1,lo + csel x11,x11,x2,lo + + adds x8,x8,x12 + adcs x9,x9,x13 + adcs x10,x10,x14 + adcs x11,x11,x15 + adc x3,xzr,xzr + + subs x16,x8,x4 + sbcs x17,x9,x5 + sbcs x1,x10,x6 + sbcs x2,x11,x7 + sbcs xzr,x3,xzr + + csel x8,x8,x16,lo + csel x9,x9,x17,lo + csel x10,x10,x1,lo + stp x8,x9,[x0] + csel x11,x11,x2,lo + stp x10,x11,[x0,#16] + + ret + + +.globl lshift_mod_256 + +.def lshift_mod_256; +.type 32; +.endef +.p2align 5 +lshift_mod_256: + ldp x8,x9,[x1] + ldp x10,x11,[x1,#16] + + ldp x4,x5,[x3] + ldp x6,x7,[x3,#16] + +.Loop_lshift_mod_256: + adds x8,x8,x8 + sub x2,x2,#1 + adcs x9,x9,x9 + adcs x10,x10,x10 + adcs x11,x11,x11 + adc x3,xzr,xzr + + subs x12,x8,x4 + sbcs x13,x9,x5 + sbcs x14,x10,x6 + sbcs x15,x11,x7 + sbcs xzr,x3,xzr + + csel x8,x8,x12,lo + csel x9,x9,x13,lo + csel x10,x10,x14,lo + csel x11,x11,x15,lo + + cbnz x2,.Loop_lshift_mod_256 + + stp x8,x9,[x0] + stp x10,x11,[x0,#16] + + ret + + +.globl rshift_mod_256 + +.def rshift_mod_256; +.type 32; +.endef +.p2align 5 +rshift_mod_256: + ldp x8,x9,[x1] + ldp x10,x11,[x1,#16] + + ldp x4,x5,[x3] + ldp x6,x7,[x3,#16] + +.Loop_rshift: + adds x12,x8,x4 + sub x2,x2,#1 + adcs x13,x9,x5 + adcs x14,x10,x6 + adcs x15,x11,x7 + adc x3,xzr,xzr + tst x8,#1 + + csel x12,x12,x8,ne + csel x13,x13,x9,ne + csel x14,x14,x10,ne + csel x15,x15,x11,ne + csel x3,x3,xzr,ne + + extr x8,x13,x12,#1 + extr x9,x14,x13,#1 + extr x10,x15,x14,#1 + extr x11,x3,x15,#1 + + cbnz x2,.Loop_rshift + + stp x8,x9,[x0] + stp x10,x11,[x0,#16] + + ret + + +.globl cneg_mod_256 + +.def cneg_mod_256; +.type 32; +.endef +.p2align 5 +cneg_mod_256: + ldp x8,x9,[x1] + ldp x4,x5,[x3] + + ldp x10,x11,[x1,#16] + subs x12,x4,x8 + ldp x6,x7,[x3,#16] + orr x4,x8,x9 + sbcs x13,x5,x9 + orr x5,x10,x11 + sbcs x14,x6,x10 + orr x3,x4,x5 + sbc x15,x7,x11 + + cmp x3,#0 + csetm x3,ne + ands x2,x2,x3 + + csel x8,x8,x12,eq + csel x9,x9,x13,eq + csel x10,x10,x14,eq + stp x8,x9,[x0] + csel x11,x11,x15,eq + stp x10,x11,[x0,#16] + + ret + + +.globl sub_mod_256 + +.def sub_mod_256; +.type 32; +.endef +.p2align 5 +sub_mod_256: + ldp x8,x9,[x1] + ldp x12,x13,[x2] + + ldp x10,x11,[x1,#16] + subs x8,x8,x12 + ldp x14,x15,[x2,#16] + sbcs x9,x9,x13 + ldp x4,x5,[x3] + sbcs x10,x10,x14 + ldp x6,x7,[x3,#16] + sbcs x11,x11,x15 + sbc x3,xzr,xzr + + and x4,x4,x3 + and x5,x5,x3 + adds x8,x8,x4 + and x6,x6,x3 + adcs x9,x9,x5 + and x7,x7,x3 + adcs x10,x10,x6 + stp x8,x9,[x0] + adc x11,x11,x7 + stp x10,x11,[x0,#16] + + ret + + +.globl check_mod_256 + +.def check_mod_256; +.type 32; +.endef +.p2align 5 +check_mod_256: + ldp x8,x9,[x0] + ldp x10,x11,[x0,#16] + ldp x4,x5,[x1] + ldp x6,x7,[x1,#16] + +#ifdef __AARCH64EB__ + rev x8,x8 + rev x9,x9 + rev x10,x10 + rev x11,x11 +#endif + + subs xzr,x8,x4 + sbcs xzr,x9,x5 + orr x8,x8,x9 + sbcs xzr,x10,x6 + orr x8,x8,x10 + sbcs xzr,x11,x7 + orr x8,x8,x11 + sbc x1,xzr,xzr + + cmp x8,#0 + mov x0,#1 + csel x0,x0,xzr,ne + and x0,x0,x1 + + ret + + +.globl add_n_check_mod_256 + +.def add_n_check_mod_256; +.type 32; +.endef +.p2align 5 +add_n_check_mod_256: + ldp x8,x9,[x1] + ldp x12,x13,[x2] + ldp x10,x11,[x1,#16] + ldp x14,x15,[x2,#16] + +#ifdef __AARCH64EB__ + rev x8,x8 + rev x12,x12 + rev x9,x9 + rev x13,x13 + rev x10,x10 + rev x14,x14 + rev x11,x11 + rev x15,x15 +#endif + + adds x8,x8,x12 + ldp x4,x5,[x3] + adcs x9,x9,x13 + ldp x6,x7,[x3,#16] + adcs x10,x10,x14 + adcs x11,x11,x15 + adc x3,xzr,xzr + + subs x16,x8,x4 + sbcs x17,x9,x5 + sbcs x1,x10,x6 + sbcs x2,x11,x7 + sbcs xzr,x3,xzr + + csel x8,x8,x16,lo + csel x9,x9,x17,lo + csel x10,x10,x1,lo + csel x11,x11,x2,lo + + orr x16, x8, x9 + orr x17, x10, x11 + orr x16, x16, x17 + +#ifdef __AARCH64EB__ + rev x8,x8 + rev x9,x9 + rev x10,x10 + rev x11,x11 +#endif + + stp x8,x9,[x0] + stp x10,x11,[x0,#16] + + mov x17, #1 + cmp x16, #0 + csel x0, x17, xzr, ne + + ret + + +.globl sub_n_check_mod_256 + +.def sub_n_check_mod_256; +.type 32; +.endef +.p2align 5 +sub_n_check_mod_256: + ldp x8,x9,[x1] + ldp x12,x13,[x2] + ldp x10,x11,[x1,#16] + ldp x14,x15,[x2,#16] + +#ifdef __AARCH64EB__ + rev x8,x8 + rev x12,x12 + rev x9,x9 + rev x13,x13 + rev x10,x10 + rev x14,x14 + rev x11,x11 + rev x15,x15 +#endif + + subs x8,x8,x12 + sbcs x9,x9,x13 + ldp x4,x5,[x3] + sbcs x10,x10,x14 + ldp x6,x7,[x3,#16] + sbcs x11,x11,x15 + sbc x3,xzr,xzr + + and x4,x4,x3 + and x5,x5,x3 + adds x8,x8,x4 + and x6,x6,x3 + adcs x9,x9,x5 + and x7,x7,x3 + adcs x10,x10,x6 + adc x11,x11,x7 + + orr x16, x8, x9 + orr x17, x10, x11 + orr x16, x16, x17 + +#ifdef __AARCH64EB__ + rev x8,x8 + rev x9,x9 + rev x10,x10 + rev x11,x11 +#endif + + stp x8,x9,[x0] + stp x10,x11,[x0,#16] + + mov x17, #1 + cmp x16, #0 + csel x0, x17, xzr, ne + + ret + diff --git a/crypto/blst_src/build/coff/add_mod_256-x86_64.s b/crypto/blst_src/build/coff/add_mod_256-x86_64.s new file mode 100644 index 00000000000..c2c83502a18 --- /dev/null +++ b/crypto/blst_src/build/coff/add_mod_256-x86_64.s @@ -0,0 +1,924 @@ +.text + +.globl add_mod_256 + +.def add_mod_256; .scl 2; .type 32; .endef +.p2align 5 +add_mod_256: + .byte 0xf3,0x0f,0x1e,0xfa + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + movq %rsp,%r11 +.LSEH_begin_add_mod_256: + + + pushq %rbp + + movq %rcx,%rdi + movq %rdx,%rsi + movq %r8,%rdx + movq %r9,%rcx + pushq %rbx + + subq $8,%rsp + +.LSEH_body_add_mod_256: + + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + +.Loaded_a_add_mod_256: + addq 0(%rdx),%r8 + adcq 8(%rdx),%r9 + movq %r8,%rax + adcq 16(%rdx),%r10 + movq %r9,%rsi + adcq 24(%rdx),%r11 + sbbq %rdx,%rdx + + movq %r10,%rbx + subq 0(%rcx),%r8 + sbbq 8(%rcx),%r9 + sbbq 16(%rcx),%r10 + movq %r11,%rbp + sbbq 24(%rcx),%r11 + sbbq $0,%rdx + + cmovcq %rax,%r8 + cmovcq %rsi,%r9 + movq %r8,0(%rdi) + cmovcq %rbx,%r10 + movq %r9,8(%rdi) + cmovcq %rbp,%r11 + movq %r10,16(%rdi) + movq %r11,24(%rdi) + + movq 8(%rsp),%rbx + + movq 16(%rsp),%rbp + + leaq 24(%rsp),%rsp + +.LSEH_epilogue_add_mod_256: + mov 8(%rsp),%rdi + mov 16(%rsp),%rsi + + .byte 0xf3,0xc3 + +.LSEH_end_add_mod_256: + + +.globl mul_by_3_mod_256 + +.def mul_by_3_mod_256; .scl 2; .type 32; .endef +.p2align 5 +mul_by_3_mod_256: + .byte 0xf3,0x0f,0x1e,0xfa + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + movq %rsp,%r11 +.LSEH_begin_mul_by_3_mod_256: + + + pushq %rbp + + movq %rcx,%rdi + movq %rdx,%rsi + movq %r8,%rdx + pushq %rbx + + pushq %r12 + +.LSEH_body_mul_by_3_mod_256: + + + movq %rdx,%rcx + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq %rsi,%rdx + movq 24(%rsi),%r11 + + call __lshift_mod_256 + movq 0(%rsp),%r12 + + jmp .Loaded_a_add_mod_256 + + movq 8(%rsp),%rbx + + movq 16(%rsp),%rbp + + leaq 24(%rsp),%rsp + +.LSEH_epilogue_mul_by_3_mod_256: + mov 8(%rsp),%rdi + mov 16(%rsp),%rsi + + .byte 0xf3,0xc3 + +.LSEH_end_mul_by_3_mod_256: + +.def __lshift_mod_256; .scl 3; .type 32; .endef +.p2align 5 +__lshift_mod_256: + .byte 0xf3,0x0f,0x1e,0xfa + + addq %r8,%r8 + adcq %r9,%r9 + movq %r8,%rax + adcq %r10,%r10 + movq %r9,%rsi + adcq %r11,%r11 + sbbq %r12,%r12 + + movq %r10,%rbx + subq 0(%rcx),%r8 + sbbq 8(%rcx),%r9 + sbbq 16(%rcx),%r10 + movq %r11,%rbp + sbbq 24(%rcx),%r11 + sbbq $0,%r12 + + cmovcq %rax,%r8 + cmovcq %rsi,%r9 + cmovcq %rbx,%r10 + cmovcq %rbp,%r11 + + .byte 0xf3,0xc3 + + + +.globl lshift_mod_256 + +.def lshift_mod_256; .scl 2; .type 32; .endef +.p2align 5 +lshift_mod_256: + .byte 0xf3,0x0f,0x1e,0xfa + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + movq %rsp,%r11 +.LSEH_begin_lshift_mod_256: + + + pushq %rbp + + movq %rcx,%rdi + movq %rdx,%rsi + movq %r8,%rdx + movq %r9,%rcx + pushq %rbx + + pushq %r12 + +.LSEH_body_lshift_mod_256: + + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + +.Loop_lshift_mod_256: + call __lshift_mod_256 + decl %edx + jnz .Loop_lshift_mod_256 + + movq %r8,0(%rdi) + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + + movq 0(%rsp),%r12 + + movq 8(%rsp),%rbx + + movq 16(%rsp),%rbp + + leaq 24(%rsp),%rsp + +.LSEH_epilogue_lshift_mod_256: + mov 8(%rsp),%rdi + mov 16(%rsp),%rsi + + .byte 0xf3,0xc3 + +.LSEH_end_lshift_mod_256: + + +.globl rshift_mod_256 + +.def rshift_mod_256; .scl 2; .type 32; .endef +.p2align 5 +rshift_mod_256: + .byte 0xf3,0x0f,0x1e,0xfa + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + movq %rsp,%r11 +.LSEH_begin_rshift_mod_256: + + + pushq %rbp + + movq %rcx,%rdi + movq %rdx,%rsi + movq %r8,%rdx + movq %r9,%rcx + pushq %rbx + + subq $8,%rsp + +.LSEH_body_rshift_mod_256: + + + movq 0(%rsi),%rbp + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + +.Loop_rshift_mod_256: + movq %rbp,%r8 + andq $1,%rbp + movq 0(%rcx),%rax + negq %rbp + movq 8(%rcx),%rsi + movq 16(%rcx),%rbx + + andq %rbp,%rax + andq %rbp,%rsi + andq %rbp,%rbx + andq 24(%rcx),%rbp + + addq %rax,%r8 + adcq %rsi,%r9 + adcq %rbx,%r10 + adcq %rbp,%r11 + sbbq %rax,%rax + + shrq $1,%r8 + movq %r9,%rbp + shrq $1,%r9 + movq %r10,%rbx + shrq $1,%r10 + movq %r11,%rsi + shrq $1,%r11 + + shlq $63,%rbp + shlq $63,%rbx + orq %r8,%rbp + shlq $63,%rsi + orq %rbx,%r9 + shlq $63,%rax + orq %rsi,%r10 + orq %rax,%r11 + + decl %edx + jnz .Loop_rshift_mod_256 + + movq %rbp,0(%rdi) + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + + movq 8(%rsp),%rbx + + movq 16(%rsp),%rbp + + leaq 24(%rsp),%rsp + +.LSEH_epilogue_rshift_mod_256: + mov 8(%rsp),%rdi + mov 16(%rsp),%rsi + + .byte 0xf3,0xc3 + +.LSEH_end_rshift_mod_256: + + +.globl cneg_mod_256 + +.def cneg_mod_256; .scl 2; .type 32; .endef +.p2align 5 +cneg_mod_256: + .byte 0xf3,0x0f,0x1e,0xfa + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + movq %rsp,%r11 +.LSEH_begin_cneg_mod_256: + + + pushq %rbp + + movq %rcx,%rdi + movq %rdx,%rsi + movq %r8,%rdx + movq %r9,%rcx + pushq %rbx + + pushq %r12 + +.LSEH_body_cneg_mod_256: + + + movq 0(%rsi),%r12 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq %r12,%r8 + movq 24(%rsi),%r11 + orq %r9,%r12 + orq %r10,%r12 + orq %r11,%r12 + movq $-1,%rbp + + movq 0(%rcx),%rax + cmovnzq %rbp,%r12 + movq 8(%rcx),%rsi + movq 16(%rcx),%rbx + andq %r12,%rax + movq 24(%rcx),%rbp + andq %r12,%rsi + andq %r12,%rbx + andq %r12,%rbp + + subq %r8,%rax + sbbq %r9,%rsi + sbbq %r10,%rbx + sbbq %r11,%rbp + + orq %rdx,%rdx + + cmovzq %r8,%rax + cmovzq %r9,%rsi + movq %rax,0(%rdi) + cmovzq %r10,%rbx + movq %rsi,8(%rdi) + cmovzq %r11,%rbp + movq %rbx,16(%rdi) + movq %rbp,24(%rdi) + + movq 0(%rsp),%r12 + + movq 8(%rsp),%rbx + + movq 16(%rsp),%rbp + + leaq 24(%rsp),%rsp + +.LSEH_epilogue_cneg_mod_256: + mov 8(%rsp),%rdi + mov 16(%rsp),%rsi + + .byte 0xf3,0xc3 + +.LSEH_end_cneg_mod_256: + + +.globl sub_mod_256 + +.def sub_mod_256; .scl 2; .type 32; .endef +.p2align 5 +sub_mod_256: + .byte 0xf3,0x0f,0x1e,0xfa + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + movq %rsp,%r11 +.LSEH_begin_sub_mod_256: + + + pushq %rbp + + movq %rcx,%rdi + movq %rdx,%rsi + movq %r8,%rdx + movq %r9,%rcx + pushq %rbx + + subq $8,%rsp + +.LSEH_body_sub_mod_256: + + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + + subq 0(%rdx),%r8 + movq 0(%rcx),%rax + sbbq 8(%rdx),%r9 + movq 8(%rcx),%rsi + sbbq 16(%rdx),%r10 + movq 16(%rcx),%rbx + sbbq 24(%rdx),%r11 + movq 24(%rcx),%rbp + sbbq %rdx,%rdx + + andq %rdx,%rax + andq %rdx,%rsi + andq %rdx,%rbx + andq %rdx,%rbp + + addq %rax,%r8 + adcq %rsi,%r9 + movq %r8,0(%rdi) + adcq %rbx,%r10 + movq %r9,8(%rdi) + adcq %rbp,%r11 + movq %r10,16(%rdi) + movq %r11,24(%rdi) + + movq 8(%rsp),%rbx + + movq 16(%rsp),%rbp + + leaq 24(%rsp),%rsp + +.LSEH_epilogue_sub_mod_256: + mov 8(%rsp),%rdi + mov 16(%rsp),%rsi + + .byte 0xf3,0xc3 + +.LSEH_end_sub_mod_256: + + +.globl check_mod_256 + +.def check_mod_256; .scl 2; .type 32; .endef +.p2align 5 +check_mod_256: + .byte 0xf3,0x0f,0x1e,0xfa + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + movq %rsp,%r11 +.LSEH_begin_check_mod_256: + + + movq %rcx,%rdi + movq %rdx,%rsi + movq 0(%rdi),%rax + movq 8(%rdi),%r9 + movq 16(%rdi),%r10 + movq 24(%rdi),%r11 + + movq %rax,%r8 + orq %r9,%rax + orq %r10,%rax + orq %r11,%rax + + subq 0(%rsi),%r8 + sbbq 8(%rsi),%r9 + sbbq 16(%rsi),%r10 + sbbq 24(%rsi),%r11 + sbbq %rsi,%rsi + + movq $1,%rdx + cmpq $0,%rax + cmovneq %rdx,%rax + andq %rsi,%rax +.LSEH_epilogue_check_mod_256: + mov 8(%rsp),%rdi + mov 16(%rsp),%rsi + + .byte 0xf3,0xc3 + +.LSEH_end_check_mod_256: + + +.globl add_n_check_mod_256 + +.def add_n_check_mod_256; .scl 2; .type 32; .endef +.p2align 5 +add_n_check_mod_256: + .byte 0xf3,0x0f,0x1e,0xfa + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + movq %rsp,%r11 +.LSEH_begin_add_n_check_mod_256: + + + pushq %rbp + + movq %rcx,%rdi + movq %rdx,%rsi + movq %r8,%rdx + movq %r9,%rcx + pushq %rbx + + subq $8,%rsp + +.LSEH_body_add_n_check_mod_256: + + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + + addq 0(%rdx),%r8 + adcq 8(%rdx),%r9 + movq %r8,%rax + adcq 16(%rdx),%r10 + movq %r9,%rsi + adcq 24(%rdx),%r11 + sbbq %rdx,%rdx + + movq %r10,%rbx + subq 0(%rcx),%r8 + sbbq 8(%rcx),%r9 + sbbq 16(%rcx),%r10 + movq %r11,%rbp + sbbq 24(%rcx),%r11 + sbbq $0,%rdx + + cmovcq %rax,%r8 + cmovcq %rsi,%r9 + movq %r8,0(%rdi) + cmovcq %rbx,%r10 + movq %r9,8(%rdi) + cmovcq %rbp,%r11 + movq %r10,16(%rdi) + movq %r11,24(%rdi) + + orq %r9,%r8 + orq %r11,%r10 + orq %r10,%r8 + movq $1,%rax + cmovzq %r8,%rax + + movq 8(%rsp),%rbx + + movq 16(%rsp),%rbp + + leaq 24(%rsp),%rsp + +.LSEH_epilogue_add_n_check_mod_256: + mov 8(%rsp),%rdi + mov 16(%rsp),%rsi + + .byte 0xf3,0xc3 + +.LSEH_end_add_n_check_mod_256: + + +.globl sub_n_check_mod_256 + +.def sub_n_check_mod_256; .scl 2; .type 32; .endef +.p2align 5 +sub_n_check_mod_256: + .byte 0xf3,0x0f,0x1e,0xfa + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + movq %rsp,%r11 +.LSEH_begin_sub_n_check_mod_256: + + + pushq %rbp + + movq %rcx,%rdi + movq %rdx,%rsi + movq %r8,%rdx + movq %r9,%rcx + pushq %rbx + + subq $8,%rsp + +.LSEH_body_sub_n_check_mod_256: + + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + + subq 0(%rdx),%r8 + movq 0(%rcx),%rax + sbbq 8(%rdx),%r9 + movq 8(%rcx),%rsi + sbbq 16(%rdx),%r10 + movq 16(%rcx),%rbx + sbbq 24(%rdx),%r11 + movq 24(%rcx),%rbp + sbbq %rdx,%rdx + + andq %rdx,%rax + andq %rdx,%rsi + andq %rdx,%rbx + andq %rdx,%rbp + + addq %rax,%r8 + adcq %rsi,%r9 + movq %r8,0(%rdi) + adcq %rbx,%r10 + movq %r9,8(%rdi) + adcq %rbp,%r11 + movq %r10,16(%rdi) + movq %r11,24(%rdi) + + orq %r9,%r8 + orq %r11,%r10 + orq %r10,%r8 + movq $1,%rax + cmovzq %r8,%rax + + movq 8(%rsp),%rbx + + movq 16(%rsp),%rbp + + leaq 24(%rsp),%rsp + +.LSEH_epilogue_sub_n_check_mod_256: + mov 8(%rsp),%rdi + mov 16(%rsp),%rsi + + .byte 0xf3,0xc3 + +.LSEH_end_sub_n_check_mod_256: +.section .pdata +.p2align 2 +.rva .LSEH_begin_add_mod_256 +.rva .LSEH_body_add_mod_256 +.rva .LSEH_info_add_mod_256_prologue + +.rva .LSEH_body_add_mod_256 +.rva .LSEH_epilogue_add_mod_256 +.rva .LSEH_info_add_mod_256_body + +.rva .LSEH_epilogue_add_mod_256 +.rva .LSEH_end_add_mod_256 +.rva .LSEH_info_add_mod_256_epilogue + +.rva .LSEH_begin_mul_by_3_mod_256 +.rva .LSEH_body_mul_by_3_mod_256 +.rva .LSEH_info_mul_by_3_mod_256_prologue + +.rva .LSEH_body_mul_by_3_mod_256 +.rva .LSEH_epilogue_mul_by_3_mod_256 +.rva .LSEH_info_mul_by_3_mod_256_body + +.rva .LSEH_epilogue_mul_by_3_mod_256 +.rva .LSEH_end_mul_by_3_mod_256 +.rva .LSEH_info_mul_by_3_mod_256_epilogue + +.rva .LSEH_begin_lshift_mod_256 +.rva .LSEH_body_lshift_mod_256 +.rva .LSEH_info_lshift_mod_256_prologue + +.rva .LSEH_body_lshift_mod_256 +.rva .LSEH_epilogue_lshift_mod_256 +.rva .LSEH_info_lshift_mod_256_body + +.rva .LSEH_epilogue_lshift_mod_256 +.rva .LSEH_end_lshift_mod_256 +.rva .LSEH_info_lshift_mod_256_epilogue + +.rva .LSEH_begin_rshift_mod_256 +.rva .LSEH_body_rshift_mod_256 +.rva .LSEH_info_rshift_mod_256_prologue + +.rva .LSEH_body_rshift_mod_256 +.rva .LSEH_epilogue_rshift_mod_256 +.rva .LSEH_info_rshift_mod_256_body + +.rva .LSEH_epilogue_rshift_mod_256 +.rva .LSEH_end_rshift_mod_256 +.rva .LSEH_info_rshift_mod_256_epilogue + +.rva .LSEH_begin_cneg_mod_256 +.rva .LSEH_body_cneg_mod_256 +.rva .LSEH_info_cneg_mod_256_prologue + +.rva .LSEH_body_cneg_mod_256 +.rva .LSEH_epilogue_cneg_mod_256 +.rva .LSEH_info_cneg_mod_256_body + +.rva .LSEH_epilogue_cneg_mod_256 +.rva .LSEH_end_cneg_mod_256 +.rva .LSEH_info_cneg_mod_256_epilogue + +.rva .LSEH_begin_sub_mod_256 +.rva .LSEH_body_sub_mod_256 +.rva .LSEH_info_sub_mod_256_prologue + +.rva .LSEH_body_sub_mod_256 +.rva .LSEH_epilogue_sub_mod_256 +.rva .LSEH_info_sub_mod_256_body + +.rva .LSEH_epilogue_sub_mod_256 +.rva .LSEH_end_sub_mod_256 +.rva .LSEH_info_sub_mod_256_epilogue + +.rva .LSEH_epilogue_check_mod_256 +.rva .LSEH_end_check_mod_256 +.rva .LSEH_info_check_mod_256_epilogue + +.rva .LSEH_begin_add_n_check_mod_256 +.rva .LSEH_body_add_n_check_mod_256 +.rva .LSEH_info_add_n_check_mod_256_prologue + +.rva .LSEH_body_add_n_check_mod_256 +.rva .LSEH_epilogue_add_n_check_mod_256 +.rva .LSEH_info_add_n_check_mod_256_body + +.rva .LSEH_epilogue_add_n_check_mod_256 +.rva .LSEH_end_add_n_check_mod_256 +.rva .LSEH_info_add_n_check_mod_256_epilogue + +.rva .LSEH_begin_sub_n_check_mod_256 +.rva .LSEH_body_sub_n_check_mod_256 +.rva .LSEH_info_sub_n_check_mod_256_prologue + +.rva .LSEH_body_sub_n_check_mod_256 +.rva .LSEH_epilogue_sub_n_check_mod_256 +.rva .LSEH_info_sub_n_check_mod_256_body + +.rva .LSEH_epilogue_sub_n_check_mod_256 +.rva .LSEH_end_sub_n_check_mod_256 +.rva .LSEH_info_sub_n_check_mod_256_epilogue + +.section .xdata +.p2align 3 +.LSEH_info_add_mod_256_prologue: +.byte 1,0,5,0x0b +.byte 0,0x74,1,0 +.byte 0,0x64,2,0 +.byte 0,0xb3 +.byte 0,0 +.long 0,0 +.LSEH_info_add_mod_256_body: +.byte 1,0,9,0 +.byte 0x00,0x34,0x01,0x00 +.byte 0x00,0x54,0x02,0x00 +.byte 0x00,0x74,0x04,0x00 +.byte 0x00,0x64,0x05,0x00 +.byte 0x00,0x22 +.byte 0x00,0x00,0x00,0x00,0x00,0x00 +.byte 0x00,0x00,0x00,0x00 +.LSEH_info_add_mod_256_epilogue: +.byte 1,0,4,0 +.byte 0x00,0x74,0x01,0x00 +.byte 0x00,0x64,0x02,0x00 +.byte 0x00,0x00,0x00,0x00 + +.LSEH_info_mul_by_3_mod_256_prologue: +.byte 1,0,5,0x0b +.byte 0,0x74,1,0 +.byte 0,0x64,2,0 +.byte 0,0xb3 +.byte 0,0 +.long 0,0 +.LSEH_info_mul_by_3_mod_256_body: +.byte 1,0,11,0 +.byte 0x00,0xc4,0x00,0x00 +.byte 0x00,0x34,0x01,0x00 +.byte 0x00,0x54,0x02,0x00 +.byte 0x00,0x74,0x04,0x00 +.byte 0x00,0x64,0x05,0x00 +.byte 0x00,0x22 +.byte 0x00,0x00,0x00,0x00,0x00,0x00 +.LSEH_info_mul_by_3_mod_256_epilogue: +.byte 1,0,4,0 +.byte 0x00,0x74,0x01,0x00 +.byte 0x00,0x64,0x02,0x00 +.byte 0x00,0x00,0x00,0x00 + +.LSEH_info_lshift_mod_256_prologue: +.byte 1,0,5,0x0b +.byte 0,0x74,1,0 +.byte 0,0x64,2,0 +.byte 0,0xb3 +.byte 0,0 +.long 0,0 +.LSEH_info_lshift_mod_256_body: +.byte 1,0,11,0 +.byte 0x00,0xc4,0x00,0x00 +.byte 0x00,0x34,0x01,0x00 +.byte 0x00,0x54,0x02,0x00 +.byte 0x00,0x74,0x04,0x00 +.byte 0x00,0x64,0x05,0x00 +.byte 0x00,0x22 +.byte 0x00,0x00,0x00,0x00,0x00,0x00 +.LSEH_info_lshift_mod_256_epilogue: +.byte 1,0,4,0 +.byte 0x00,0x74,0x01,0x00 +.byte 0x00,0x64,0x02,0x00 +.byte 0x00,0x00,0x00,0x00 + +.LSEH_info_rshift_mod_256_prologue: +.byte 1,0,5,0x0b +.byte 0,0x74,1,0 +.byte 0,0x64,2,0 +.byte 0,0xb3 +.byte 0,0 +.long 0,0 +.LSEH_info_rshift_mod_256_body: +.byte 1,0,9,0 +.byte 0x00,0x34,0x01,0x00 +.byte 0x00,0x54,0x02,0x00 +.byte 0x00,0x74,0x04,0x00 +.byte 0x00,0x64,0x05,0x00 +.byte 0x00,0x22 +.byte 0x00,0x00,0x00,0x00,0x00,0x00 +.byte 0x00,0x00,0x00,0x00 +.LSEH_info_rshift_mod_256_epilogue: +.byte 1,0,4,0 +.byte 0x00,0x74,0x01,0x00 +.byte 0x00,0x64,0x02,0x00 +.byte 0x00,0x00,0x00,0x00 + +.LSEH_info_cneg_mod_256_prologue: +.byte 1,0,5,0x0b +.byte 0,0x74,1,0 +.byte 0,0x64,2,0 +.byte 0,0xb3 +.byte 0,0 +.long 0,0 +.LSEH_info_cneg_mod_256_body: +.byte 1,0,11,0 +.byte 0x00,0xc4,0x00,0x00 +.byte 0x00,0x34,0x01,0x00 +.byte 0x00,0x54,0x02,0x00 +.byte 0x00,0x74,0x04,0x00 +.byte 0x00,0x64,0x05,0x00 +.byte 0x00,0x22 +.byte 0x00,0x00,0x00,0x00,0x00,0x00 +.LSEH_info_cneg_mod_256_epilogue: +.byte 1,0,4,0 +.byte 0x00,0x74,0x01,0x00 +.byte 0x00,0x64,0x02,0x00 +.byte 0x00,0x00,0x00,0x00 + +.LSEH_info_sub_mod_256_prologue: +.byte 1,0,5,0x0b +.byte 0,0x74,1,0 +.byte 0,0x64,2,0 +.byte 0,0xb3 +.byte 0,0 +.long 0,0 +.LSEH_info_sub_mod_256_body: +.byte 1,0,9,0 +.byte 0x00,0x34,0x01,0x00 +.byte 0x00,0x54,0x02,0x00 +.byte 0x00,0x74,0x04,0x00 +.byte 0x00,0x64,0x05,0x00 +.byte 0x00,0x22 +.byte 0x00,0x00,0x00,0x00,0x00,0x00 +.byte 0x00,0x00,0x00,0x00 +.LSEH_info_sub_mod_256_epilogue: +.byte 1,0,4,0 +.byte 0x00,0x74,0x01,0x00 +.byte 0x00,0x64,0x02,0x00 +.byte 0x00,0x00,0x00,0x00 + +.LSEH_info_check_mod_256_epilogue: +.byte 1,0,4,0 +.byte 0x00,0x74,0x01,0x00 +.byte 0x00,0x64,0x02,0x00 +.byte 0x00,0x00,0x00,0x00 + +.LSEH_info_add_n_check_mod_256_prologue: +.byte 1,0,5,0x0b +.byte 0,0x74,1,0 +.byte 0,0x64,2,0 +.byte 0,0xb3 +.byte 0,0 +.long 0,0 +.LSEH_info_add_n_check_mod_256_body: +.byte 1,0,9,0 +.byte 0x00,0x34,0x01,0x00 +.byte 0x00,0x54,0x02,0x00 +.byte 0x00,0x74,0x04,0x00 +.byte 0x00,0x64,0x05,0x00 +.byte 0x00,0x22 +.byte 0x00,0x00,0x00,0x00,0x00,0x00 +.byte 0x00,0x00,0x00,0x00 +.LSEH_info_add_n_check_mod_256_epilogue: +.byte 1,0,4,0 +.byte 0x00,0x74,0x01,0x00 +.byte 0x00,0x64,0x02,0x00 +.byte 0x00,0x00,0x00,0x00 + +.LSEH_info_sub_n_check_mod_256_prologue: +.byte 1,0,5,0x0b +.byte 0,0x74,1,0 +.byte 0,0x64,2,0 +.byte 0,0xb3 +.byte 0,0 +.long 0,0 +.LSEH_info_sub_n_check_mod_256_body: +.byte 1,0,9,0 +.byte 0x00,0x34,0x01,0x00 +.byte 0x00,0x54,0x02,0x00 +.byte 0x00,0x74,0x04,0x00 +.byte 0x00,0x64,0x05,0x00 +.byte 0x00,0x22 +.byte 0x00,0x00,0x00,0x00,0x00,0x00 +.byte 0x00,0x00,0x00,0x00 +.LSEH_info_sub_n_check_mod_256_epilogue: +.byte 1,0,4,0 +.byte 0x00,0x74,0x01,0x00 +.byte 0x00,0x64,0x02,0x00 +.byte 0x00,0x00,0x00,0x00 + diff --git a/crypto/blst_src/build/coff/add_mod_384-armv8.S b/crypto/blst_src/build/coff/add_mod_384-armv8.S new file mode 100644 index 00000000000..2eff0677f54 --- /dev/null +++ b/crypto/blst_src/build/coff/add_mod_384-armv8.S @@ -0,0 +1,1056 @@ +.text + +.globl add_mod_384 + +.def add_mod_384; +.type 32; +.endef +.p2align 5 +add_mod_384: +.long 3573752639 + stp x29,x30,[sp,#-48]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + + ldp x4,x5,[x3] + ldp x6,x7,[x3,#16] + ldp x8,x9,[x3,#32] + + bl __add_mod_384 + ldr x30,[sp,#8] + + stp x10,x11,[x0] + stp x12,x13,[x0,#16] + stp x14,x15,[x0,#32] + + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldr x29,[sp],#48 +.long 3573752767 + ret + + +.def __add_mod_384; +.type 32; +.endef +.p2align 5 +__add_mod_384: + ldp x10,x11,[x1] + ldp x16,x17,[x2] + ldp x12,x13,[x1,#16] + ldp x19,x20,[x2,#16] + ldp x14,x15,[x1,#32] + ldp x21,x22,[x2,#32] + +__add_mod_384_ab_are_loaded: + adds x10,x10,x16 + adcs x11,x11,x17 + adcs x12,x12,x19 + adcs x13,x13,x20 + adcs x14,x14,x21 + adcs x15,x15,x22 + adc x3,xzr,xzr + + subs x16,x10,x4 + sbcs x17,x11,x5 + sbcs x19,x12,x6 + sbcs x20,x13,x7 + sbcs x21,x14,x8 + sbcs x22,x15,x9 + sbcs xzr,x3,xzr + + csel x10,x10,x16,lo + csel x11,x11,x17,lo + csel x12,x12,x19,lo + csel x13,x13,x20,lo + csel x14,x14,x21,lo + csel x15,x15,x22,lo + + ret + + +.globl add_mod_384x + +.def add_mod_384x; +.type 32; +.endef +.p2align 5 +add_mod_384x: +.long 3573752639 + stp x29,x30,[sp,#-48]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + + ldp x4,x5,[x3] + ldp x6,x7,[x3,#16] + ldp x8,x9,[x3,#32] + + bl __add_mod_384 + + stp x10,x11,[x0] + add x1,x1,#48 + stp x12,x13,[x0,#16] + add x2,x2,#48 + stp x14,x15,[x0,#32] + + bl __add_mod_384 + ldr x30,[sp,#8] + + stp x10,x11,[x0,#48] + stp x12,x13,[x0,#64] + stp x14,x15,[x0,#80] + + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldr x29,[sp],#48 +.long 3573752767 + ret + + +.globl rshift_mod_384 + +.def rshift_mod_384; +.type 32; +.endef +.p2align 5 +rshift_mod_384: +.long 3573752639 + stp x29,x30,[sp,#-48]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + + ldp x10,x11,[x1] + ldp x12,x13,[x1,#16] + ldp x14,x15,[x1,#32] + + ldp x4,x5,[x3] + ldp x6,x7,[x3,#16] + ldp x8,x9,[x3,#32] + +.Loop_rshift_mod_384: + sub x2,x2,#1 + bl __rshift_mod_384 + cbnz x2,.Loop_rshift_mod_384 + + ldr x30,[sp,#8] + stp x10,x11,[x0] + stp x12,x13,[x0,#16] + stp x14,x15,[x0,#32] + + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldr x29,[sp],#48 +.long 3573752767 + ret + + +.def __rshift_mod_384; +.type 32; +.endef +.p2align 5 +__rshift_mod_384: + sbfx x22,x10,#0,#1 + and x16,x22,x4 + and x17,x22,x5 + adds x10,x10,x16 + and x19,x22,x6 + adcs x11,x11,x17 + and x20,x22,x7 + adcs x12,x12,x19 + and x21,x22,x8 + adcs x13,x13,x20 + and x22,x22,x9 + adcs x14,x14,x21 + extr x10,x11,x10,#1 // a[0:5] >>= 1 + adcs x15,x15,x22 + extr x11,x12,x11,#1 + adc x22,xzr,xzr + extr x12,x13,x12,#1 + extr x13,x14,x13,#1 + extr x14,x15,x14,#1 + extr x15,x22,x15,#1 + ret + + +.globl div_by_2_mod_384 + +.def div_by_2_mod_384; +.type 32; +.endef +.p2align 5 +div_by_2_mod_384: +.long 3573752639 + stp x29,x30,[sp,#-48]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + + ldp x10,x11,[x1] + ldp x12,x13,[x1,#16] + ldp x14,x15,[x1,#32] + + ldp x4,x5,[x2] + ldp x6,x7,[x2,#16] + ldp x8,x9,[x2,#32] + + bl __rshift_mod_384 + + ldr x30,[sp,#8] + stp x10,x11,[x0] + stp x12,x13,[x0,#16] + stp x14,x15,[x0,#32] + + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldr x29,[sp],#48 +.long 3573752767 + ret + + +.globl lshift_mod_384 + +.def lshift_mod_384; +.type 32; +.endef +.p2align 5 +lshift_mod_384: +.long 3573752639 + stp x29,x30,[sp,#-48]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + + ldp x10,x11,[x1] + ldp x12,x13,[x1,#16] + ldp x14,x15,[x1,#32] + + ldp x4,x5,[x3] + ldp x6,x7,[x3,#16] + ldp x8,x9,[x3,#32] + +.Loop_lshift_mod_384: + sub x2,x2,#1 + bl __lshift_mod_384 + cbnz x2,.Loop_lshift_mod_384 + + ldr x30,[sp,#8] + stp x10,x11,[x0] + stp x12,x13,[x0,#16] + stp x14,x15,[x0,#32] + + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldr x29,[sp],#48 +.long 3573752767 + ret + + +.def __lshift_mod_384; +.type 32; +.endef +.p2align 5 +__lshift_mod_384: + adds x10,x10,x10 + adcs x11,x11,x11 + adcs x12,x12,x12 + adcs x13,x13,x13 + adcs x14,x14,x14 + adcs x15,x15,x15 + adc x3,xzr,xzr + + subs x16,x10,x4 + sbcs x17,x11,x5 + sbcs x19,x12,x6 + sbcs x20,x13,x7 + sbcs x21,x14,x8 + sbcs x22,x15,x9 + sbcs xzr,x3,xzr + + csel x10,x10,x16,lo + csel x11,x11,x17,lo + csel x12,x12,x19,lo + csel x13,x13,x20,lo + csel x14,x14,x21,lo + csel x15,x15,x22,lo + + ret + + +.globl mul_by_3_mod_384 + +.def mul_by_3_mod_384; +.type 32; +.endef +.p2align 5 +mul_by_3_mod_384: +.long 3573752639 + stp x29,x30,[sp,#-48]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + + ldp x10,x11,[x1] + ldp x12,x13,[x1,#16] + ldp x14,x15,[x1,#32] + + ldp x4,x5,[x2] + ldp x6,x7,[x2,#16] + ldp x8,x9,[x2,#32] + + bl __lshift_mod_384 + + ldp x16,x17,[x1] + ldp x19,x20,[x1,#16] + ldp x21,x22,[x1,#32] + + bl __add_mod_384_ab_are_loaded + ldr x30,[sp,#8] + + stp x10,x11,[x0] + stp x12,x13,[x0,#16] + stp x14,x15,[x0,#32] + + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldr x29,[sp],#48 +.long 3573752767 + ret + + +.globl mul_by_8_mod_384 + +.def mul_by_8_mod_384; +.type 32; +.endef +.p2align 5 +mul_by_8_mod_384: +.long 3573752639 + stp x29,x30,[sp,#-48]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + + ldp x10,x11,[x1] + ldp x12,x13,[x1,#16] + ldp x14,x15,[x1,#32] + + ldp x4,x5,[x2] + ldp x6,x7,[x2,#16] + ldp x8,x9,[x2,#32] + + bl __lshift_mod_384 + bl __lshift_mod_384 + bl __lshift_mod_384 + ldr x30,[sp,#8] + + stp x10,x11,[x0] + stp x12,x13,[x0,#16] + stp x14,x15,[x0,#32] + + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldr x29,[sp],#48 +.long 3573752767 + ret + + +.globl mul_by_3_mod_384x + +.def mul_by_3_mod_384x; +.type 32; +.endef +.p2align 5 +mul_by_3_mod_384x: +.long 3573752639 + stp x29,x30,[sp,#-48]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + + ldp x10,x11,[x1] + ldp x12,x13,[x1,#16] + ldp x14,x15,[x1,#32] + + ldp x4,x5,[x2] + ldp x6,x7,[x2,#16] + ldp x8,x9,[x2,#32] + + bl __lshift_mod_384 + + ldp x16,x17,[x1] + ldp x19,x20,[x1,#16] + ldp x21,x22,[x1,#32] + + bl __add_mod_384_ab_are_loaded + + stp x10,x11,[x0] + ldp x10,x11,[x1,#48] + stp x12,x13,[x0,#16] + ldp x12,x13,[x1,#64] + stp x14,x15,[x0,#32] + ldp x14,x15,[x1,#80] + + bl __lshift_mod_384 + + ldp x16,x17,[x1,#48] + ldp x19,x20,[x1,#64] + ldp x21,x22,[x1,#80] + + bl __add_mod_384_ab_are_loaded + ldr x30,[sp,#8] + + stp x10,x11,[x0,#48] + stp x12,x13,[x0,#64] + stp x14,x15,[x0,#80] + + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldr x29,[sp],#48 +.long 3573752767 + ret + + +.globl mul_by_8_mod_384x + +.def mul_by_8_mod_384x; +.type 32; +.endef +.p2align 5 +mul_by_8_mod_384x: +.long 3573752639 + stp x29,x30,[sp,#-48]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + + ldp x10,x11,[x1] + ldp x12,x13,[x1,#16] + ldp x14,x15,[x1,#32] + + ldp x4,x5,[x2] + ldp x6,x7,[x2,#16] + ldp x8,x9,[x2,#32] + + bl __lshift_mod_384 + bl __lshift_mod_384 + bl __lshift_mod_384 + + stp x10,x11,[x0] + ldp x10,x11,[x1,#48] + stp x12,x13,[x0,#16] + ldp x12,x13,[x1,#64] + stp x14,x15,[x0,#32] + ldp x14,x15,[x1,#80] + + bl __lshift_mod_384 + bl __lshift_mod_384 + bl __lshift_mod_384 + ldr x30,[sp,#8] + + stp x10,x11,[x0,#48] + stp x12,x13,[x0,#64] + stp x14,x15,[x0,#80] + + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldr x29,[sp],#48 +.long 3573752767 + ret + + +.globl cneg_mod_384 + +.def cneg_mod_384; +.type 32; +.endef +.p2align 5 +cneg_mod_384: +.long 3573752639 + stp x29,x30,[sp,#-48]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + + ldp x10,x11,[x1] + ldp x4,x5,[x3] + ldp x12,x13,[x1,#16] + ldp x6,x7,[x3,#16] + + subs x16,x4,x10 + ldp x14,x15,[x1,#32] + ldp x8,x9,[x3,#32] + orr x3,x10,x11 + sbcs x17,x5,x11 + orr x3,x3,x12 + sbcs x19,x6,x12 + orr x3,x3,x13 + sbcs x20,x7,x13 + orr x3,x3,x14 + sbcs x21,x8,x14 + orr x3,x3,x15 + sbc x22,x9,x15 + + cmp x3,#0 + csetm x3,ne + ands x2,x2,x3 + + csel x10,x10,x16,eq + csel x11,x11,x17,eq + csel x12,x12,x19,eq + csel x13,x13,x20,eq + stp x10,x11,[x0] + csel x14,x14,x21,eq + stp x12,x13,[x0,#16] + csel x15,x15,x22,eq + stp x14,x15,[x0,#32] + + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldr x29,[sp],#48 +.long 3573752767 + ret + + +.globl sub_mod_384 + +.def sub_mod_384; +.type 32; +.endef +.p2align 5 +sub_mod_384: +.long 3573752639 + stp x29,x30,[sp,#-48]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + + ldp x4,x5,[x3] + ldp x6,x7,[x3,#16] + ldp x8,x9,[x3,#32] + + bl __sub_mod_384 + ldr x30,[sp,#8] + + stp x10,x11,[x0] + stp x12,x13,[x0,#16] + stp x14,x15,[x0,#32] + + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldr x29,[sp],#48 +.long 3573752767 + ret + + +.def __sub_mod_384; +.type 32; +.endef +.p2align 5 +__sub_mod_384: + ldp x10,x11,[x1] + ldp x16,x17,[x2] + ldp x12,x13,[x1,#16] + ldp x19,x20,[x2,#16] + ldp x14,x15,[x1,#32] + ldp x21,x22,[x2,#32] + + subs x10,x10,x16 + sbcs x11,x11,x17 + sbcs x12,x12,x19 + sbcs x13,x13,x20 + sbcs x14,x14,x21 + sbcs x15,x15,x22 + sbc x3,xzr,xzr + + and x16,x4,x3 + and x17,x5,x3 + adds x10,x10,x16 + and x19,x6,x3 + adcs x11,x11,x17 + and x20,x7,x3 + adcs x12,x12,x19 + and x21,x8,x3 + adcs x13,x13,x20 + and x22,x9,x3 + adcs x14,x14,x21 + adc x15,x15,x22 + + ret + + +.globl sub_mod_384x + +.def sub_mod_384x; +.type 32; +.endef +.p2align 5 +sub_mod_384x: +.long 3573752639 + stp x29,x30,[sp,#-48]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + + ldp x4,x5,[x3] + ldp x6,x7,[x3,#16] + ldp x8,x9,[x3,#32] + + bl __sub_mod_384 + + stp x10,x11,[x0] + add x1,x1,#48 + stp x12,x13,[x0,#16] + add x2,x2,#48 + stp x14,x15,[x0,#32] + + bl __sub_mod_384 + ldr x30,[sp,#8] + + stp x10,x11,[x0,#48] + stp x12,x13,[x0,#64] + stp x14,x15,[x0,#80] + + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldr x29,[sp],#48 +.long 3573752767 + ret + + +.globl mul_by_1_plus_i_mod_384x + +.def mul_by_1_plus_i_mod_384x; +.type 32; +.endef +.p2align 5 +mul_by_1_plus_i_mod_384x: +.long 3573752639 + stp x29,x30,[sp,#-48]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + + ldp x4,x5,[x2] + ldp x6,x7,[x2,#16] + ldp x8,x9,[x2,#32] + add x2,x1,#48 + + bl __sub_mod_384 // a->re - a->im + + ldp x16,x17,[x1] + ldp x19,x20,[x1,#16] + ldp x21,x22,[x1,#32] + stp x10,x11,[x0] + ldp x10,x11,[x1,#48] + stp x12,x13,[x0,#16] + ldp x12,x13,[x1,#64] + stp x14,x15,[x0,#32] + ldp x14,x15,[x1,#80] + + bl __add_mod_384_ab_are_loaded // a->re + a->im + ldr x30,[sp,#8] + + stp x10,x11,[x0,#48] + stp x12,x13,[x0,#64] + stp x14,x15,[x0,#80] + + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldr x29,[sp],#48 +.long 3573752767 + ret + + +.globl sgn0_pty_mod_384 + +.def sgn0_pty_mod_384; +.type 32; +.endef +.p2align 5 +sgn0_pty_mod_384: + ldp x10,x11,[x0] + ldp x12,x13,[x0,#16] + ldp x14,x15,[x0,#32] + + ldp x4,x5,[x1] + ldp x6,x7,[x1,#16] + ldp x8,x9,[x1,#32] + + and x0,x10,#1 + adds x10,x10,x10 + adcs x11,x11,x11 + adcs x12,x12,x12 + adcs x13,x13,x13 + adcs x14,x14,x14 + adcs x15,x15,x15 + adc x3,xzr,xzr + + subs x10,x10,x4 + sbcs x11,x11,x5 + sbcs x12,x12,x6 + sbcs x13,x13,x7 + sbcs x14,x14,x8 + sbcs x15,x15,x9 + sbc x3,x3,xzr + + mvn x3,x3 + and x3,x3,#2 + orr x0,x0,x3 + + ret + + +.globl sgn0_pty_mod_384x + +.def sgn0_pty_mod_384x; +.type 32; +.endef +.p2align 5 +sgn0_pty_mod_384x: + ldp x10,x11,[x0] + ldp x12,x13,[x0,#16] + ldp x14,x15,[x0,#32] + + ldp x4,x5,[x1] + ldp x6,x7,[x1,#16] + ldp x8,x9,[x1,#32] + + and x2,x10,#1 + orr x3,x10,x11 + adds x10,x10,x10 + orr x3,x3,x12 + adcs x11,x11,x11 + orr x3,x3,x13 + adcs x12,x12,x12 + orr x3,x3,x14 + adcs x13,x13,x13 + orr x3,x3,x15 + adcs x14,x14,x14 + adcs x15,x15,x15 + adc x16,xzr,xzr + + subs x10,x10,x4 + sbcs x11,x11,x5 + sbcs x12,x12,x6 + sbcs x13,x13,x7 + sbcs x14,x14,x8 + sbcs x15,x15,x9 + sbc x16,x16,xzr + + ldp x10,x11,[x0,#48] + ldp x12,x13,[x0,#64] + ldp x14,x15,[x0,#80] + + mvn x16,x16 + and x16,x16,#2 + orr x2,x2,x16 + + and x0,x10,#1 + orr x1,x10,x11 + adds x10,x10,x10 + orr x1,x1,x12 + adcs x11,x11,x11 + orr x1,x1,x13 + adcs x12,x12,x12 + orr x1,x1,x14 + adcs x13,x13,x13 + orr x1,x1,x15 + adcs x14,x14,x14 + adcs x15,x15,x15 + adc x16,xzr,xzr + + subs x10,x10,x4 + sbcs x11,x11,x5 + sbcs x12,x12,x6 + sbcs x13,x13,x7 + sbcs x14,x14,x8 + sbcs x15,x15,x9 + sbc x16,x16,xzr + + mvn x16,x16 + and x16,x16,#2 + orr x0,x0,x16 + + cmp x3,#0 + csel x3,x0,x2,eq // a->re==0? prty(a->im) : prty(a->re) + + cmp x1,#0 + csel x1,x0,x2,ne // a->im!=0? sgn0(a->im) : sgn0(a->re) + + and x3,x3,#1 + and x1,x1,#2 + orr x0,x1,x3 // pack sign and parity + + ret + +.globl vec_select_32 + +.def vec_select_32; +.type 32; +.endef +.p2align 5 +vec_select_32: + dup v6.2d, x3 + ld1 {v0.2d, v1.2d, v2.2d}, [x1],#48 + cmeq v6.2d, v6.2d, #0 + ld1 {v3.2d, v4.2d, v5.2d}, [x2],#48 + bit v0.16b, v3.16b, v6.16b + bit v1.16b, v4.16b, v6.16b + bit v2.16b, v5.16b, v6.16b + st1 {v0.2d, v1.2d, v2.2d}, [x0] + ret + +.globl vec_select_48 + +.def vec_select_48; +.type 32; +.endef +.p2align 5 +vec_select_48: + dup v6.2d, x3 + ld1 {v0.2d, v1.2d, v2.2d}, [x1],#48 + cmeq v6.2d, v6.2d, #0 + ld1 {v3.2d, v4.2d, v5.2d}, [x2],#48 + bit v0.16b, v3.16b, v6.16b + bit v1.16b, v4.16b, v6.16b + bit v2.16b, v5.16b, v6.16b + st1 {v0.2d, v1.2d, v2.2d}, [x0] + ret + +.globl vec_select_96 + +.def vec_select_96; +.type 32; +.endef +.p2align 5 +vec_select_96: + dup v6.2d, x3 + ld1 {v0.2d, v1.2d, v2.2d}, [x1],#48 + cmeq v6.2d, v6.2d, #0 + ld1 {v3.2d, v4.2d, v5.2d}, [x2],#48 + bit v0.16b, v3.16b, v6.16b + ld1 {v16.2d, v17.2d, v18.2d}, [x1],#48 + bit v1.16b, v4.16b, v6.16b + ld1 {v19.2d, v20.2d, v21.2d}, [x2],#48 + bit v2.16b, v5.16b, v6.16b + st1 {v0.2d, v1.2d, v2.2d}, [x0],#48 + bit v16.16b, v19.16b, v6.16b + bit v17.16b, v20.16b, v6.16b + bit v18.16b, v21.16b, v6.16b + st1 {v16.2d, v17.2d, v18.2d}, [x0] + ret + +.globl vec_select_192 + +.def vec_select_192; +.type 32; +.endef +.p2align 5 +vec_select_192: + dup v6.2d, x3 + ld1 {v0.2d, v1.2d, v2.2d}, [x1],#48 + cmeq v6.2d, v6.2d, #0 + ld1 {v3.2d, v4.2d, v5.2d}, [x2],#48 + bit v0.16b, v3.16b, v6.16b + ld1 {v16.2d, v17.2d, v18.2d}, [x1],#48 + bit v1.16b, v4.16b, v6.16b + ld1 {v19.2d, v20.2d, v21.2d}, [x2],#48 + bit v2.16b, v5.16b, v6.16b + st1 {v0.2d, v1.2d, v2.2d}, [x0],#48 + bit v16.16b, v19.16b, v6.16b + ld1 {v0.2d, v1.2d, v2.2d}, [x1],#48 + bit v17.16b, v20.16b, v6.16b + ld1 {v3.2d, v4.2d, v5.2d}, [x2],#48 + bit v18.16b, v21.16b, v6.16b + st1 {v16.2d, v17.2d, v18.2d}, [x0],#48 + bit v0.16b, v3.16b, v6.16b + ld1 {v16.2d, v17.2d, v18.2d}, [x1],#48 + bit v1.16b, v4.16b, v6.16b + ld1 {v19.2d, v20.2d, v21.2d}, [x2],#48 + bit v2.16b, v5.16b, v6.16b + st1 {v0.2d, v1.2d, v2.2d}, [x0],#48 + bit v16.16b, v19.16b, v6.16b + bit v17.16b, v20.16b, v6.16b + bit v18.16b, v21.16b, v6.16b + st1 {v16.2d, v17.2d, v18.2d}, [x0] + ret + +.globl vec_select_144 + +.def vec_select_144; +.type 32; +.endef +.p2align 5 +vec_select_144: + dup v6.2d, x3 + ld1 {v0.2d, v1.2d, v2.2d}, [x1],#48 + cmeq v6.2d, v6.2d, #0 + ld1 {v3.2d, v4.2d, v5.2d}, [x2],#48 + bit v0.16b, v3.16b, v6.16b + ld1 {v16.2d, v17.2d, v18.2d}, [x1],#48 + bit v1.16b, v4.16b, v6.16b + ld1 {v19.2d, v20.2d, v21.2d}, [x2],#48 + bit v2.16b, v5.16b, v6.16b + st1 {v0.2d, v1.2d, v2.2d}, [x0],#48 + bit v16.16b, v19.16b, v6.16b + ld1 {v0.2d, v1.2d, v2.2d}, [x1],#48 + bit v17.16b, v20.16b, v6.16b + ld1 {v3.2d, v4.2d, v5.2d}, [x2],#48 + bit v18.16b, v21.16b, v6.16b + st1 {v16.2d, v17.2d, v18.2d}, [x0],#48 + bit v0.16b, v3.16b, v6.16b + bit v1.16b, v4.16b, v6.16b + bit v2.16b, v5.16b, v6.16b + st1 {v0.2d, v1.2d, v2.2d}, [x0] + ret + +.globl vec_select_288 + +.def vec_select_288; +.type 32; +.endef +.p2align 5 +vec_select_288: + dup v6.2d, x3 + ld1 {v0.2d, v1.2d, v2.2d}, [x1],#48 + cmeq v6.2d, v6.2d, #0 + ld1 {v3.2d, v4.2d, v5.2d}, [x2],#48 + bit v0.16b, v3.16b, v6.16b + ld1 {v16.2d, v17.2d, v18.2d}, [x1],#48 + bit v1.16b, v4.16b, v6.16b + ld1 {v19.2d, v20.2d, v21.2d}, [x2],#48 + bit v2.16b, v5.16b, v6.16b + st1 {v0.2d, v1.2d, v2.2d}, [x0],#48 + bit v16.16b, v19.16b, v6.16b + ld1 {v0.2d, v1.2d, v2.2d}, [x1],#48 + bit v17.16b, v20.16b, v6.16b + ld1 {v3.2d, v4.2d, v5.2d}, [x2],#48 + bit v18.16b, v21.16b, v6.16b + st1 {v16.2d, v17.2d, v18.2d}, [x0],#48 + bit v0.16b, v3.16b, v6.16b + ld1 {v16.2d, v17.2d, v18.2d}, [x1],#48 + bit v1.16b, v4.16b, v6.16b + ld1 {v19.2d, v20.2d, v21.2d}, [x2],#48 + bit v2.16b, v5.16b, v6.16b + st1 {v0.2d, v1.2d, v2.2d}, [x0],#48 + bit v16.16b, v19.16b, v6.16b + ld1 {v0.2d, v1.2d, v2.2d}, [x1],#48 + bit v17.16b, v20.16b, v6.16b + ld1 {v3.2d, v4.2d, v5.2d}, [x2],#48 + bit v18.16b, v21.16b, v6.16b + st1 {v16.2d, v17.2d, v18.2d}, [x0],#48 + bit v0.16b, v3.16b, v6.16b + ld1 {v16.2d, v17.2d, v18.2d}, [x1],#48 + bit v1.16b, v4.16b, v6.16b + ld1 {v19.2d, v20.2d, v21.2d}, [x2],#48 + bit v2.16b, v5.16b, v6.16b + st1 {v0.2d, v1.2d, v2.2d}, [x0],#48 + bit v16.16b, v19.16b, v6.16b + bit v17.16b, v20.16b, v6.16b + bit v18.16b, v21.16b, v6.16b + st1 {v16.2d, v17.2d, v18.2d}, [x0] + ret + +.globl vec_prefetch + +.def vec_prefetch; +.type 32; +.endef +.p2align 5 +vec_prefetch: + add x1, x1, x0 + sub x1, x1, #1 + mov x2, #64 + prfm pldl1keep, [x0] + add x0, x0, x2 + cmp x0, x1 + csel x0, x1, x0, hi + csel x2, xzr, x2, hi + prfm pldl1keep, [x0] + add x0, x0, x2 + cmp x0, x1 + csel x0, x1, x0, hi + csel x2, xzr, x2, hi + prfm pldl1keep, [x0] + add x0, x0, x2 + cmp x0, x1 + csel x0, x1, x0, hi + csel x2, xzr, x2, hi + prfm pldl1keep, [x0] + add x0, x0, x2 + cmp x0, x1 + csel x0, x1, x0, hi + csel x2, xzr, x2, hi + prfm pldl1keep, [x0] + add x0, x0, x2 + cmp x0, x1 + csel x0, x1, x0, hi + csel x2, xzr, x2, hi + prfm pldl1keep, [x0] + add x0, x0, x2 + cmp x0, x1 + csel x0, x1, x0, hi + prfm pldl1keep, [x0] + ret + +.globl vec_is_zero_16x + +.def vec_is_zero_16x; +.type 32; +.endef +.p2align 5 +vec_is_zero_16x: + ld1 {v0.2d}, [x0], #16 + lsr x1, x1, #4 + sub x1, x1, #1 + cbz x1, .Loop_is_zero_done + +.Loop_is_zero: + ld1 {v1.2d}, [x0], #16 + orr v0.16b, v0.16b, v1.16b + sub x1, x1, #1 + cbnz x1, .Loop_is_zero + +.Loop_is_zero_done: + dup v1.2d, v0.d[1] + orr v0.16b, v0.16b, v1.16b + mov x1, v0.d[0] + mov x0, #1 + cmp x1, #0 + csel x0, x0, xzr, eq + ret + +.globl vec_is_equal_16x + +.def vec_is_equal_16x; +.type 32; +.endef +.p2align 5 +vec_is_equal_16x: + ld1 {v0.2d}, [x0], #16 + ld1 {v1.2d}, [x1], #16 + lsr x2, x2, #4 + eor v0.16b, v0.16b, v1.16b + +.Loop_is_equal: + sub x2, x2, #1 + cbz x2, .Loop_is_equal_done + ld1 {v1.2d}, [x0], #16 + ld1 {v2.2d}, [x1], #16 + eor v1.16b, v1.16b, v2.16b + orr v0.16b, v0.16b, v1.16b + b .Loop_is_equal + nop + +.Loop_is_equal_done: + dup v1.2d, v0.d[1] + orr v0.16b, v0.16b, v1.16b + mov x1, v0.d[0] + mov x0, #1 + cmp x1, #0 + csel x0, x0, xzr, eq + ret + diff --git a/crypto/blst_src/build/coff/add_mod_384-x86_64.s b/crypto/blst_src/build/coff/add_mod_384-x86_64.s new file mode 100644 index 00000000000..3ef562a3bf2 --- /dev/null +++ b/crypto/blst_src/build/coff/add_mod_384-x86_64.s @@ -0,0 +1,2510 @@ +.text + +.globl add_mod_384 + +.def add_mod_384; .scl 2; .type 32; .endef +.p2align 5 +add_mod_384: + .byte 0xf3,0x0f,0x1e,0xfa + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + movq %rsp,%r11 +.LSEH_begin_add_mod_384: + + + pushq %rbp + + movq %rcx,%rdi + movq %rdx,%rsi + movq %r8,%rdx + movq %r9,%rcx + pushq %rbx + + pushq %r12 + + pushq %r13 + + pushq %r14 + + pushq %r15 + + subq $8,%rsp + +.LSEH_body_add_mod_384: + + + call __add_mod_384 + + movq 8(%rsp),%r15 + + movq 16(%rsp),%r14 + + movq 24(%rsp),%r13 + + movq 32(%rsp),%r12 + + movq 40(%rsp),%rbx + + movq 48(%rsp),%rbp + + leaq 56(%rsp),%rsp + +.LSEH_epilogue_add_mod_384: + mov 8(%rsp),%rdi + mov 16(%rsp),%rsi + + .byte 0xf3,0xc3 + +.LSEH_end_add_mod_384: + +.def __add_mod_384; .scl 3; .type 32; .endef +.p2align 5 +__add_mod_384: + .byte 0xf3,0x0f,0x1e,0xfa + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + +__add_mod_384_a_is_loaded: + addq 0(%rdx),%r8 + adcq 8(%rdx),%r9 + adcq 16(%rdx),%r10 + movq %r8,%r14 + adcq 24(%rdx),%r11 + movq %r9,%r15 + adcq 32(%rdx),%r12 + movq %r10,%rax + adcq 40(%rdx),%r13 + movq %r11,%rbx + sbbq %rdx,%rdx + + subq 0(%rcx),%r8 + sbbq 8(%rcx),%r9 + movq %r12,%rbp + sbbq 16(%rcx),%r10 + sbbq 24(%rcx),%r11 + sbbq 32(%rcx),%r12 + movq %r13,%rsi + sbbq 40(%rcx),%r13 + sbbq $0,%rdx + + cmovcq %r14,%r8 + cmovcq %r15,%r9 + cmovcq %rax,%r10 + movq %r8,0(%rdi) + cmovcq %rbx,%r11 + movq %r9,8(%rdi) + cmovcq %rbp,%r12 + movq %r10,16(%rdi) + cmovcq %rsi,%r13 + movq %r11,24(%rdi) + movq %r12,32(%rdi) + movq %r13,40(%rdi) + + .byte 0xf3,0xc3 + + +.globl add_mod_384x + +.def add_mod_384x; .scl 2; .type 32; .endef +.p2align 5 +add_mod_384x: + .byte 0xf3,0x0f,0x1e,0xfa + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + movq %rsp,%r11 +.LSEH_begin_add_mod_384x: + + + pushq %rbp + + movq %rcx,%rdi + movq %rdx,%rsi + movq %r8,%rdx + movq %r9,%rcx + pushq %rbx + + pushq %r12 + + pushq %r13 + + pushq %r14 + + pushq %r15 + + subq $24,%rsp + +.LSEH_body_add_mod_384x: + + + movq %rsi,0(%rsp) + movq %rdx,8(%rsp) + leaq 48(%rsi),%rsi + leaq 48(%rdx),%rdx + leaq 48(%rdi),%rdi + call __add_mod_384 + + movq 0(%rsp),%rsi + movq 8(%rsp),%rdx + leaq -48(%rdi),%rdi + call __add_mod_384 + + movq 24+0(%rsp),%r15 + + movq 24+8(%rsp),%r14 + + movq 24+16(%rsp),%r13 + + movq 24+24(%rsp),%r12 + + movq 24+32(%rsp),%rbx + + movq 24+40(%rsp),%rbp + + leaq 24+48(%rsp),%rsp + +.LSEH_epilogue_add_mod_384x: + mov 8(%rsp),%rdi + mov 16(%rsp),%rsi + + .byte 0xf3,0xc3 + +.LSEH_end_add_mod_384x: + + +.globl rshift_mod_384 + +.def rshift_mod_384; .scl 2; .type 32; .endef +.p2align 5 +rshift_mod_384: + .byte 0xf3,0x0f,0x1e,0xfa + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + movq %rsp,%r11 +.LSEH_begin_rshift_mod_384: + + + pushq %rbp + + movq %rcx,%rdi + movq %rdx,%rsi + movq %r8,%rdx + movq %r9,%rcx + pushq %rbx + + pushq %r12 + + pushq %r13 + + pushq %r14 + + pushq %r15 + + pushq %rdi + +.LSEH_body_rshift_mod_384: + + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + +.Loop_rshift_mod_384: + call __rshift_mod_384 + decl %edx + jnz .Loop_rshift_mod_384 + + movq %r8,0(%rdi) + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + movq %r12,32(%rdi) + movq %r13,40(%rdi) + + movq 8(%rsp),%r15 + + movq 16(%rsp),%r14 + + movq 24(%rsp),%r13 + + movq 32(%rsp),%r12 + + movq 40(%rsp),%rbx + + movq 48(%rsp),%rbp + + leaq 56(%rsp),%rsp + +.LSEH_epilogue_rshift_mod_384: + mov 8(%rsp),%rdi + mov 16(%rsp),%rsi + + .byte 0xf3,0xc3 + +.LSEH_end_rshift_mod_384: + +.def __rshift_mod_384; .scl 3; .type 32; .endef +.p2align 5 +__rshift_mod_384: + .byte 0xf3,0x0f,0x1e,0xfa + + movq $1,%rsi + movq 0(%rcx),%r14 + andq %r8,%rsi + movq 8(%rcx),%r15 + negq %rsi + movq 16(%rcx),%rax + andq %rsi,%r14 + movq 24(%rcx),%rbx + andq %rsi,%r15 + movq 32(%rcx),%rbp + andq %rsi,%rax + andq %rsi,%rbx + andq %rsi,%rbp + andq 40(%rcx),%rsi + + addq %r8,%r14 + adcq %r9,%r15 + adcq %r10,%rax + adcq %r11,%rbx + adcq %r12,%rbp + adcq %r13,%rsi + sbbq %r13,%r13 + + shrq $1,%r14 + movq %r15,%r8 + shrq $1,%r15 + movq %rax,%r9 + shrq $1,%rax + movq %rbx,%r10 + shrq $1,%rbx + movq %rbp,%r11 + shrq $1,%rbp + movq %rsi,%r12 + shrq $1,%rsi + shlq $63,%r8 + shlq $63,%r9 + orq %r14,%r8 + shlq $63,%r10 + orq %r15,%r9 + shlq $63,%r11 + orq %rax,%r10 + shlq $63,%r12 + orq %rbx,%r11 + shlq $63,%r13 + orq %rbp,%r12 + orq %rsi,%r13 + + .byte 0xf3,0xc3 + + +.globl div_by_2_mod_384 + +.def div_by_2_mod_384; .scl 2; .type 32; .endef +.p2align 5 +div_by_2_mod_384: + .byte 0xf3,0x0f,0x1e,0xfa + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + movq %rsp,%r11 +.LSEH_begin_div_by_2_mod_384: + + + pushq %rbp + + movq %rcx,%rdi + movq %rdx,%rsi + movq %r8,%rdx + pushq %rbx + + pushq %r12 + + pushq %r13 + + pushq %r14 + + pushq %r15 + + pushq %rdi + +.LSEH_body_div_by_2_mod_384: + + + movq 0(%rsi),%r8 + movq %rdx,%rcx + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + + call __rshift_mod_384 + + movq %r8,0(%rdi) + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + movq %r12,32(%rdi) + movq %r13,40(%rdi) + + movq 8(%rsp),%r15 + + movq 16(%rsp),%r14 + + movq 24(%rsp),%r13 + + movq 32(%rsp),%r12 + + movq 40(%rsp),%rbx + + movq 48(%rsp),%rbp + + leaq 56(%rsp),%rsp + +.LSEH_epilogue_div_by_2_mod_384: + mov 8(%rsp),%rdi + mov 16(%rsp),%rsi + + .byte 0xf3,0xc3 + +.LSEH_end_div_by_2_mod_384: + + +.globl lshift_mod_384 + +.def lshift_mod_384; .scl 2; .type 32; .endef +.p2align 5 +lshift_mod_384: + .byte 0xf3,0x0f,0x1e,0xfa + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + movq %rsp,%r11 +.LSEH_begin_lshift_mod_384: + + + pushq %rbp + + movq %rcx,%rdi + movq %rdx,%rsi + movq %r8,%rdx + movq %r9,%rcx + pushq %rbx + + pushq %r12 + + pushq %r13 + + pushq %r14 + + pushq %r15 + + pushq %rdi + +.LSEH_body_lshift_mod_384: + + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + +.Loop_lshift_mod_384: + addq %r8,%r8 + adcq %r9,%r9 + adcq %r10,%r10 + movq %r8,%r14 + adcq %r11,%r11 + movq %r9,%r15 + adcq %r12,%r12 + movq %r10,%rax + adcq %r13,%r13 + movq %r11,%rbx + sbbq %rdi,%rdi + + subq 0(%rcx),%r8 + sbbq 8(%rcx),%r9 + movq %r12,%rbp + sbbq 16(%rcx),%r10 + sbbq 24(%rcx),%r11 + sbbq 32(%rcx),%r12 + movq %r13,%rsi + sbbq 40(%rcx),%r13 + sbbq $0,%rdi + + movq (%rsp),%rdi + cmovcq %r14,%r8 + cmovcq %r15,%r9 + cmovcq %rax,%r10 + cmovcq %rbx,%r11 + cmovcq %rbp,%r12 + cmovcq %rsi,%r13 + + decl %edx + jnz .Loop_lshift_mod_384 + + movq %r8,0(%rdi) + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + movq %r12,32(%rdi) + movq %r13,40(%rdi) + + movq 8(%rsp),%r15 + + movq 16(%rsp),%r14 + + movq 24(%rsp),%r13 + + movq 32(%rsp),%r12 + + movq 40(%rsp),%rbx + + movq 48(%rsp),%rbp + + leaq 56(%rsp),%rsp + +.LSEH_epilogue_lshift_mod_384: + mov 8(%rsp),%rdi + mov 16(%rsp),%rsi + + .byte 0xf3,0xc3 + +.LSEH_end_lshift_mod_384: + +.def __lshift_mod_384; .scl 3; .type 32; .endef +.p2align 5 +__lshift_mod_384: + .byte 0xf3,0x0f,0x1e,0xfa + + addq %r8,%r8 + adcq %r9,%r9 + adcq %r10,%r10 + movq %r8,%r14 + adcq %r11,%r11 + movq %r9,%r15 + adcq %r12,%r12 + movq %r10,%rax + adcq %r13,%r13 + movq %r11,%rbx + sbbq %rdx,%rdx + + subq 0(%rcx),%r8 + sbbq 8(%rcx),%r9 + movq %r12,%rbp + sbbq 16(%rcx),%r10 + sbbq 24(%rcx),%r11 + sbbq 32(%rcx),%r12 + movq %r13,%rsi + sbbq 40(%rcx),%r13 + sbbq $0,%rdx + + cmovcq %r14,%r8 + cmovcq %r15,%r9 + cmovcq %rax,%r10 + cmovcq %rbx,%r11 + cmovcq %rbp,%r12 + cmovcq %rsi,%r13 + + .byte 0xf3,0xc3 + + + +.globl mul_by_3_mod_384 + +.def mul_by_3_mod_384; .scl 2; .type 32; .endef +.p2align 5 +mul_by_3_mod_384: + .byte 0xf3,0x0f,0x1e,0xfa + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + movq %rsp,%r11 +.LSEH_begin_mul_by_3_mod_384: + + + pushq %rbp + + movq %rcx,%rdi + movq %rdx,%rsi + movq %r8,%rdx + pushq %rbx + + pushq %r12 + + pushq %r13 + + pushq %r14 + + pushq %r15 + + pushq %rsi + +.LSEH_body_mul_by_3_mod_384: + + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + movq %rdx,%rcx + + call __lshift_mod_384 + + movq (%rsp),%rdx + call __add_mod_384_a_is_loaded + + movq 8(%rsp),%r15 + + movq 16(%rsp),%r14 + + movq 24(%rsp),%r13 + + movq 32(%rsp),%r12 + + movq 40(%rsp),%rbx + + movq 48(%rsp),%rbp + + leaq 56(%rsp),%rsp + +.LSEH_epilogue_mul_by_3_mod_384: + mov 8(%rsp),%rdi + mov 16(%rsp),%rsi + + .byte 0xf3,0xc3 + +.LSEH_end_mul_by_3_mod_384: + +.globl mul_by_8_mod_384 + +.def mul_by_8_mod_384; .scl 2; .type 32; .endef +.p2align 5 +mul_by_8_mod_384: + .byte 0xf3,0x0f,0x1e,0xfa + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + movq %rsp,%r11 +.LSEH_begin_mul_by_8_mod_384: + + + pushq %rbp + + movq %rcx,%rdi + movq %rdx,%rsi + movq %r8,%rdx + pushq %rbx + + pushq %r12 + + pushq %r13 + + pushq %r14 + + pushq %r15 + + subq $8,%rsp + +.LSEH_body_mul_by_8_mod_384: + + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + movq %rdx,%rcx + + call __lshift_mod_384 + call __lshift_mod_384 + call __lshift_mod_384 + + movq %r8,0(%rdi) + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + movq %r12,32(%rdi) + movq %r13,40(%rdi) + + movq 8(%rsp),%r15 + + movq 16(%rsp),%r14 + + movq 24(%rsp),%r13 + + movq 32(%rsp),%r12 + + movq 40(%rsp),%rbx + + movq 48(%rsp),%rbp + + leaq 56(%rsp),%rsp + +.LSEH_epilogue_mul_by_8_mod_384: + mov 8(%rsp),%rdi + mov 16(%rsp),%rsi + + .byte 0xf3,0xc3 + +.LSEH_end_mul_by_8_mod_384: + + +.globl mul_by_3_mod_384x + +.def mul_by_3_mod_384x; .scl 2; .type 32; .endef +.p2align 5 +mul_by_3_mod_384x: + .byte 0xf3,0x0f,0x1e,0xfa + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + movq %rsp,%r11 +.LSEH_begin_mul_by_3_mod_384x: + + + pushq %rbp + + movq %rcx,%rdi + movq %rdx,%rsi + movq %r8,%rdx + pushq %rbx + + pushq %r12 + + pushq %r13 + + pushq %r14 + + pushq %r15 + + pushq %rsi + +.LSEH_body_mul_by_3_mod_384x: + + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + movq %rdx,%rcx + + call __lshift_mod_384 + + movq (%rsp),%rdx + call __add_mod_384_a_is_loaded + + movq (%rsp),%rsi + leaq 48(%rdi),%rdi + + movq 48(%rsi),%r8 + movq 56(%rsi),%r9 + movq 64(%rsi),%r10 + movq 72(%rsi),%r11 + movq 80(%rsi),%r12 + movq 88(%rsi),%r13 + + call __lshift_mod_384 + + movq $48,%rdx + addq (%rsp),%rdx + call __add_mod_384_a_is_loaded + + movq 8(%rsp),%r15 + + movq 16(%rsp),%r14 + + movq 24(%rsp),%r13 + + movq 32(%rsp),%r12 + + movq 40(%rsp),%rbx + + movq 48(%rsp),%rbp + + leaq 56(%rsp),%rsp + +.LSEH_epilogue_mul_by_3_mod_384x: + mov 8(%rsp),%rdi + mov 16(%rsp),%rsi + + .byte 0xf3,0xc3 + +.LSEH_end_mul_by_3_mod_384x: + +.globl mul_by_8_mod_384x + +.def mul_by_8_mod_384x; .scl 2; .type 32; .endef +.p2align 5 +mul_by_8_mod_384x: + .byte 0xf3,0x0f,0x1e,0xfa + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + movq %rsp,%r11 +.LSEH_begin_mul_by_8_mod_384x: + + + pushq %rbp + + movq %rcx,%rdi + movq %rdx,%rsi + movq %r8,%rdx + pushq %rbx + + pushq %r12 + + pushq %r13 + + pushq %r14 + + pushq %r15 + + pushq %rsi + +.LSEH_body_mul_by_8_mod_384x: + + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + movq %rdx,%rcx + + call __lshift_mod_384 + call __lshift_mod_384 + call __lshift_mod_384 + + movq (%rsp),%rsi + movq %r8,0(%rdi) + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + movq %r12,32(%rdi) + movq %r13,40(%rdi) + + movq 48+0(%rsi),%r8 + movq 48+8(%rsi),%r9 + movq 48+16(%rsi),%r10 + movq 48+24(%rsi),%r11 + movq 48+32(%rsi),%r12 + movq 48+40(%rsi),%r13 + + call __lshift_mod_384 + call __lshift_mod_384 + call __lshift_mod_384 + + movq %r8,48+0(%rdi) + movq %r9,48+8(%rdi) + movq %r10,48+16(%rdi) + movq %r11,48+24(%rdi) + movq %r12,48+32(%rdi) + movq %r13,48+40(%rdi) + + movq 8(%rsp),%r15 + + movq 16(%rsp),%r14 + + movq 24(%rsp),%r13 + + movq 32(%rsp),%r12 + + movq 40(%rsp),%rbx + + movq 48(%rsp),%rbp + + leaq 56(%rsp),%rsp + +.LSEH_epilogue_mul_by_8_mod_384x: + mov 8(%rsp),%rdi + mov 16(%rsp),%rsi + + .byte 0xf3,0xc3 + +.LSEH_end_mul_by_8_mod_384x: + + +.globl cneg_mod_384 + +.def cneg_mod_384; .scl 2; .type 32; .endef +.p2align 5 +cneg_mod_384: + .byte 0xf3,0x0f,0x1e,0xfa + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + movq %rsp,%r11 +.LSEH_begin_cneg_mod_384: + + + pushq %rbp + + movq %rcx,%rdi + movq %rdx,%rsi + movq %r8,%rdx + movq %r9,%rcx + pushq %rbx + + pushq %r12 + + pushq %r13 + + pushq %r14 + + pushq %r15 + + pushq %rdx + +.LSEH_body_cneg_mod_384: + + + movq 0(%rsi),%rdx + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq %rdx,%r8 + movq 24(%rsi),%r11 + orq %r9,%rdx + movq 32(%rsi),%r12 + orq %r10,%rdx + movq 40(%rsi),%r13 + orq %r11,%rdx + movq $-1,%rsi + orq %r12,%rdx + orq %r13,%rdx + + movq 0(%rcx),%r14 + cmovnzq %rsi,%rdx + movq 8(%rcx),%r15 + movq 16(%rcx),%rax + andq %rdx,%r14 + movq 24(%rcx),%rbx + andq %rdx,%r15 + movq 32(%rcx),%rbp + andq %rdx,%rax + movq 40(%rcx),%rsi + andq %rdx,%rbx + movq 0(%rsp),%rcx + andq %rdx,%rbp + andq %rdx,%rsi + + subq %r8,%r14 + sbbq %r9,%r15 + sbbq %r10,%rax + sbbq %r11,%rbx + sbbq %r12,%rbp + sbbq %r13,%rsi + + orq %rcx,%rcx + + cmovzq %r8,%r14 + cmovzq %r9,%r15 + cmovzq %r10,%rax + movq %r14,0(%rdi) + cmovzq %r11,%rbx + movq %r15,8(%rdi) + cmovzq %r12,%rbp + movq %rax,16(%rdi) + cmovzq %r13,%rsi + movq %rbx,24(%rdi) + movq %rbp,32(%rdi) + movq %rsi,40(%rdi) + + movq 8(%rsp),%r15 + + movq 16(%rsp),%r14 + + movq 24(%rsp),%r13 + + movq 32(%rsp),%r12 + + movq 40(%rsp),%rbx + + movq 48(%rsp),%rbp + + leaq 56(%rsp),%rsp + +.LSEH_epilogue_cneg_mod_384: + mov 8(%rsp),%rdi + mov 16(%rsp),%rsi + + .byte 0xf3,0xc3 + +.LSEH_end_cneg_mod_384: + + +.globl sub_mod_384 + +.def sub_mod_384; .scl 2; .type 32; .endef +.p2align 5 +sub_mod_384: + .byte 0xf3,0x0f,0x1e,0xfa + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + movq %rsp,%r11 +.LSEH_begin_sub_mod_384: + + + pushq %rbp + + movq %rcx,%rdi + movq %rdx,%rsi + movq %r8,%rdx + movq %r9,%rcx + pushq %rbx + + pushq %r12 + + pushq %r13 + + pushq %r14 + + pushq %r15 + + subq $8,%rsp + +.LSEH_body_sub_mod_384: + + + call __sub_mod_384 + + movq 8(%rsp),%r15 + + movq 16(%rsp),%r14 + + movq 24(%rsp),%r13 + + movq 32(%rsp),%r12 + + movq 40(%rsp),%rbx + + movq 48(%rsp),%rbp + + leaq 56(%rsp),%rsp + +.LSEH_epilogue_sub_mod_384: + mov 8(%rsp),%rdi + mov 16(%rsp),%rsi + + .byte 0xf3,0xc3 + +.LSEH_end_sub_mod_384: + +.def __sub_mod_384; .scl 3; .type 32; .endef +.p2align 5 +__sub_mod_384: + .byte 0xf3,0x0f,0x1e,0xfa + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + + subq 0(%rdx),%r8 + movq 0(%rcx),%r14 + sbbq 8(%rdx),%r9 + movq 8(%rcx),%r15 + sbbq 16(%rdx),%r10 + movq 16(%rcx),%rax + sbbq 24(%rdx),%r11 + movq 24(%rcx),%rbx + sbbq 32(%rdx),%r12 + movq 32(%rcx),%rbp + sbbq 40(%rdx),%r13 + movq 40(%rcx),%rsi + sbbq %rdx,%rdx + + andq %rdx,%r14 + andq %rdx,%r15 + andq %rdx,%rax + andq %rdx,%rbx + andq %rdx,%rbp + andq %rdx,%rsi + + addq %r14,%r8 + adcq %r15,%r9 + movq %r8,0(%rdi) + adcq %rax,%r10 + movq %r9,8(%rdi) + adcq %rbx,%r11 + movq %r10,16(%rdi) + adcq %rbp,%r12 + movq %r11,24(%rdi) + adcq %rsi,%r13 + movq %r12,32(%rdi) + movq %r13,40(%rdi) + + .byte 0xf3,0xc3 + + +.globl sub_mod_384x + +.def sub_mod_384x; .scl 2; .type 32; .endef +.p2align 5 +sub_mod_384x: + .byte 0xf3,0x0f,0x1e,0xfa + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + movq %rsp,%r11 +.LSEH_begin_sub_mod_384x: + + + pushq %rbp + + movq %rcx,%rdi + movq %rdx,%rsi + movq %r8,%rdx + movq %r9,%rcx + pushq %rbx + + pushq %r12 + + pushq %r13 + + pushq %r14 + + pushq %r15 + + subq $24,%rsp + +.LSEH_body_sub_mod_384x: + + + movq %rsi,0(%rsp) + movq %rdx,8(%rsp) + leaq 48(%rsi),%rsi + leaq 48(%rdx),%rdx + leaq 48(%rdi),%rdi + call __sub_mod_384 + + movq 0(%rsp),%rsi + movq 8(%rsp),%rdx + leaq -48(%rdi),%rdi + call __sub_mod_384 + + movq 24+0(%rsp),%r15 + + movq 24+8(%rsp),%r14 + + movq 24+16(%rsp),%r13 + + movq 24+24(%rsp),%r12 + + movq 24+32(%rsp),%rbx + + movq 24+40(%rsp),%rbp + + leaq 24+48(%rsp),%rsp + +.LSEH_epilogue_sub_mod_384x: + mov 8(%rsp),%rdi + mov 16(%rsp),%rsi + + .byte 0xf3,0xc3 + +.LSEH_end_sub_mod_384x: +.globl mul_by_1_plus_i_mod_384x + +.def mul_by_1_plus_i_mod_384x; .scl 2; .type 32; .endef +.p2align 5 +mul_by_1_plus_i_mod_384x: + .byte 0xf3,0x0f,0x1e,0xfa + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + movq %rsp,%r11 +.LSEH_begin_mul_by_1_plus_i_mod_384x: + + + pushq %rbp + + movq %rcx,%rdi + movq %rdx,%rsi + movq %r8,%rdx + pushq %rbx + + pushq %r12 + + pushq %r13 + + pushq %r14 + + pushq %r15 + + subq $56,%rsp + +.LSEH_body_mul_by_1_plus_i_mod_384x: + + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + + movq %r8,%r14 + addq 48(%rsi),%r8 + movq %r9,%r15 + adcq 56(%rsi),%r9 + movq %r10,%rax + adcq 64(%rsi),%r10 + movq %r11,%rbx + adcq 72(%rsi),%r11 + movq %r12,%rcx + adcq 80(%rsi),%r12 + movq %r13,%rbp + adcq 88(%rsi),%r13 + movq %rdi,48(%rsp) + sbbq %rdi,%rdi + + subq 48(%rsi),%r14 + sbbq 56(%rsi),%r15 + sbbq 64(%rsi),%rax + sbbq 72(%rsi),%rbx + sbbq 80(%rsi),%rcx + sbbq 88(%rsi),%rbp + sbbq %rsi,%rsi + + movq %r8,0(%rsp) + movq 0(%rdx),%r8 + movq %r9,8(%rsp) + movq 8(%rdx),%r9 + movq %r10,16(%rsp) + movq 16(%rdx),%r10 + movq %r11,24(%rsp) + movq 24(%rdx),%r11 + movq %r12,32(%rsp) + andq %rsi,%r8 + movq 32(%rdx),%r12 + movq %r13,40(%rsp) + andq %rsi,%r9 + movq 40(%rdx),%r13 + andq %rsi,%r10 + andq %rsi,%r11 + andq %rsi,%r12 + andq %rsi,%r13 + movq 48(%rsp),%rsi + + addq %r8,%r14 + movq 0(%rsp),%r8 + adcq %r9,%r15 + movq 8(%rsp),%r9 + adcq %r10,%rax + movq 16(%rsp),%r10 + adcq %r11,%rbx + movq 24(%rsp),%r11 + adcq %r12,%rcx + movq 32(%rsp),%r12 + adcq %r13,%rbp + movq 40(%rsp),%r13 + + movq %r14,0(%rsi) + movq %r8,%r14 + movq %r15,8(%rsi) + movq %rax,16(%rsi) + movq %r9,%r15 + movq %rbx,24(%rsi) + movq %rcx,32(%rsi) + movq %r10,%rax + movq %rbp,40(%rsi) + + subq 0(%rdx),%r8 + movq %r11,%rbx + sbbq 8(%rdx),%r9 + sbbq 16(%rdx),%r10 + movq %r12,%rcx + sbbq 24(%rdx),%r11 + sbbq 32(%rdx),%r12 + movq %r13,%rbp + sbbq 40(%rdx),%r13 + sbbq $0,%rdi + + cmovcq %r14,%r8 + cmovcq %r15,%r9 + cmovcq %rax,%r10 + movq %r8,48(%rsi) + cmovcq %rbx,%r11 + movq %r9,56(%rsi) + cmovcq %rcx,%r12 + movq %r10,64(%rsi) + cmovcq %rbp,%r13 + movq %r11,72(%rsi) + movq %r12,80(%rsi) + movq %r13,88(%rsi) + + movq 56+0(%rsp),%r15 + + movq 56+8(%rsp),%r14 + + movq 56+16(%rsp),%r13 + + movq 56+24(%rsp),%r12 + + movq 56+32(%rsp),%rbx + + movq 56+40(%rsp),%rbp + + leaq 56+48(%rsp),%rsp + +.LSEH_epilogue_mul_by_1_plus_i_mod_384x: + mov 8(%rsp),%rdi + mov 16(%rsp),%rsi + + .byte 0xf3,0xc3 + +.LSEH_end_mul_by_1_plus_i_mod_384x: +.globl sgn0_pty_mod_384 + +.def sgn0_pty_mod_384; .scl 2; .type 32; .endef +.p2align 5 +sgn0_pty_mod_384: + .byte 0xf3,0x0f,0x1e,0xfa + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + movq %rsp,%r11 +.LSEH_begin_sgn0_pty_mod_384: + + + movq %rcx,%rdi + movq %rdx,%rsi +.LSEH_body_sgn0_pty_mod_384: + + movq 0(%rdi),%r8 + movq 8(%rdi),%r9 + movq 16(%rdi),%r10 + movq 24(%rdi),%r11 + movq 32(%rdi),%rcx + movq 40(%rdi),%rdx + + xorq %rax,%rax + movq %r8,%rdi + addq %r8,%r8 + adcq %r9,%r9 + adcq %r10,%r10 + adcq %r11,%r11 + adcq %rcx,%rcx + adcq %rdx,%rdx + adcq $0,%rax + + subq 0(%rsi),%r8 + sbbq 8(%rsi),%r9 + sbbq 16(%rsi),%r10 + sbbq 24(%rsi),%r11 + sbbq 32(%rsi),%rcx + sbbq 40(%rsi),%rdx + sbbq $0,%rax + + notq %rax + andq $1,%rdi + andq $2,%rax + orq %rdi,%rax + +.LSEH_epilogue_sgn0_pty_mod_384: + mov 8(%rsp),%rdi + mov 16(%rsp),%rsi + + .byte 0xf3,0xc3 + +.LSEH_end_sgn0_pty_mod_384: + +.globl sgn0_pty_mod_384x + +.def sgn0_pty_mod_384x; .scl 2; .type 32; .endef +.p2align 5 +sgn0_pty_mod_384x: + .byte 0xf3,0x0f,0x1e,0xfa + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + movq %rsp,%r11 +.LSEH_begin_sgn0_pty_mod_384x: + + + pushq %rbp + + movq %rcx,%rdi + movq %rdx,%rsi + pushq %rbx + + subq $8,%rsp + +.LSEH_body_sgn0_pty_mod_384x: + + + movq 48(%rdi),%r8 + movq 56(%rdi),%r9 + movq 64(%rdi),%r10 + movq 72(%rdi),%r11 + movq 80(%rdi),%rcx + movq 88(%rdi),%rdx + + movq %r8,%rbx + orq %r9,%r8 + orq %r10,%r8 + orq %r11,%r8 + orq %rcx,%r8 + orq %rdx,%r8 + + leaq 0(%rdi),%rax + xorq %rdi,%rdi + movq %rbx,%rbp + addq %rbx,%rbx + adcq %r9,%r9 + adcq %r10,%r10 + adcq %r11,%r11 + adcq %rcx,%rcx + adcq %rdx,%rdx + adcq $0,%rdi + + subq 0(%rsi),%rbx + sbbq 8(%rsi),%r9 + sbbq 16(%rsi),%r10 + sbbq 24(%rsi),%r11 + sbbq 32(%rsi),%rcx + sbbq 40(%rsi),%rdx + sbbq $0,%rdi + + movq %r8,0(%rsp) + notq %rdi + andq $1,%rbp + andq $2,%rdi + orq %rbp,%rdi + + movq 0(%rax),%r8 + movq 8(%rax),%r9 + movq 16(%rax),%r10 + movq 24(%rax),%r11 + movq 32(%rax),%rcx + movq 40(%rax),%rdx + + movq %r8,%rbx + orq %r9,%r8 + orq %r10,%r8 + orq %r11,%r8 + orq %rcx,%r8 + orq %rdx,%r8 + + xorq %rax,%rax + movq %rbx,%rbp + addq %rbx,%rbx + adcq %r9,%r9 + adcq %r10,%r10 + adcq %r11,%r11 + adcq %rcx,%rcx + adcq %rdx,%rdx + adcq $0,%rax + + subq 0(%rsi),%rbx + sbbq 8(%rsi),%r9 + sbbq 16(%rsi),%r10 + sbbq 24(%rsi),%r11 + sbbq 32(%rsi),%rcx + sbbq 40(%rsi),%rdx + sbbq $0,%rax + + movq 0(%rsp),%rbx + + notq %rax + + testq %r8,%r8 + cmovzq %rdi,%rbp + + testq %rbx,%rbx + cmovnzq %rdi,%rax + + andq $1,%rbp + andq $2,%rax + orq %rbp,%rax + + movq 8(%rsp),%rbx + + movq 16(%rsp),%rbp + + leaq 24(%rsp),%rsp + +.LSEH_epilogue_sgn0_pty_mod_384x: + mov 8(%rsp),%rdi + mov 16(%rsp),%rsi + + .byte 0xf3,0xc3 + +.LSEH_end_sgn0_pty_mod_384x: +.globl vec_select_32 + +.def vec_select_32; .scl 2; .type 32; .endef +.p2align 5 +vec_select_32: + .byte 0xf3,0x0f,0x1e,0xfa + + movd %r9d,%xmm5 + pxor %xmm4,%xmm4 + pshufd $0,%xmm5,%xmm5 + movdqu (%rdx),%xmm0 + leaq 16(%rdx),%rdx + pcmpeqd %xmm4,%xmm5 + movdqu (%r8),%xmm1 + leaq 16(%r8),%r8 + pcmpeqd %xmm5,%xmm4 + leaq 16(%rcx),%rcx + pand %xmm4,%xmm0 + movdqu 0+16-16(%rdx),%xmm2 + pand %xmm5,%xmm1 + movdqu 0+16-16(%r8),%xmm3 + por %xmm1,%xmm0 + movdqu %xmm0,0-16(%rcx) + pand %xmm4,%xmm2 + pand %xmm5,%xmm3 + por %xmm3,%xmm2 + movdqu %xmm2,16-16(%rcx) + .byte 0xf3,0xc3 + +.globl vec_select_48 + +.def vec_select_48; .scl 2; .type 32; .endef +.p2align 5 +vec_select_48: + .byte 0xf3,0x0f,0x1e,0xfa + + movd %r9d,%xmm5 + pxor %xmm4,%xmm4 + pshufd $0,%xmm5,%xmm5 + movdqu (%rdx),%xmm0 + leaq 24(%rdx),%rdx + pcmpeqd %xmm4,%xmm5 + movdqu (%r8),%xmm1 + leaq 24(%r8),%r8 + pcmpeqd %xmm5,%xmm4 + leaq 24(%rcx),%rcx + pand %xmm4,%xmm0 + movdqu 0+16-24(%rdx),%xmm2 + pand %xmm5,%xmm1 + movdqu 0+16-24(%r8),%xmm3 + por %xmm1,%xmm0 + movdqu %xmm0,0-24(%rcx) + pand %xmm4,%xmm2 + movdqu 16+16-24(%rdx),%xmm0 + pand %xmm5,%xmm3 + movdqu 16+16-24(%r8),%xmm1 + por %xmm3,%xmm2 + movdqu %xmm2,16-24(%rcx) + pand %xmm4,%xmm0 + pand %xmm5,%xmm1 + por %xmm1,%xmm0 + movdqu %xmm0,32-24(%rcx) + .byte 0xf3,0xc3 + +.globl vec_select_96 + +.def vec_select_96; .scl 2; .type 32; .endef +.p2align 5 +vec_select_96: + .byte 0xf3,0x0f,0x1e,0xfa + + movd %r9d,%xmm5 + pxor %xmm4,%xmm4 + pshufd $0,%xmm5,%xmm5 + movdqu (%rdx),%xmm0 + leaq 48(%rdx),%rdx + pcmpeqd %xmm4,%xmm5 + movdqu (%r8),%xmm1 + leaq 48(%r8),%r8 + pcmpeqd %xmm5,%xmm4 + leaq 48(%rcx),%rcx + pand %xmm4,%xmm0 + movdqu 0+16-48(%rdx),%xmm2 + pand %xmm5,%xmm1 + movdqu 0+16-48(%r8),%xmm3 + por %xmm1,%xmm0 + movdqu %xmm0,0-48(%rcx) + pand %xmm4,%xmm2 + movdqu 16+16-48(%rdx),%xmm0 + pand %xmm5,%xmm3 + movdqu 16+16-48(%r8),%xmm1 + por %xmm3,%xmm2 + movdqu %xmm2,16-48(%rcx) + pand %xmm4,%xmm0 + movdqu 32+16-48(%rdx),%xmm2 + pand %xmm5,%xmm1 + movdqu 32+16-48(%r8),%xmm3 + por %xmm1,%xmm0 + movdqu %xmm0,32-48(%rcx) + pand %xmm4,%xmm2 + movdqu 48+16-48(%rdx),%xmm0 + pand %xmm5,%xmm3 + movdqu 48+16-48(%r8),%xmm1 + por %xmm3,%xmm2 + movdqu %xmm2,48-48(%rcx) + pand %xmm4,%xmm0 + movdqu 64+16-48(%rdx),%xmm2 + pand %xmm5,%xmm1 + movdqu 64+16-48(%r8),%xmm3 + por %xmm1,%xmm0 + movdqu %xmm0,64-48(%rcx) + pand %xmm4,%xmm2 + pand %xmm5,%xmm3 + por %xmm3,%xmm2 + movdqu %xmm2,80-48(%rcx) + .byte 0xf3,0xc3 + +.globl vec_select_192 + +.def vec_select_192; .scl 2; .type 32; .endef +.p2align 5 +vec_select_192: + .byte 0xf3,0x0f,0x1e,0xfa + + movd %r9d,%xmm5 + pxor %xmm4,%xmm4 + pshufd $0,%xmm5,%xmm5 + movdqu (%rdx),%xmm0 + leaq 96(%rdx),%rdx + pcmpeqd %xmm4,%xmm5 + movdqu (%r8),%xmm1 + leaq 96(%r8),%r8 + pcmpeqd %xmm5,%xmm4 + leaq 96(%rcx),%rcx + pand %xmm4,%xmm0 + movdqu 0+16-96(%rdx),%xmm2 + pand %xmm5,%xmm1 + movdqu 0+16-96(%r8),%xmm3 + por %xmm1,%xmm0 + movdqu %xmm0,0-96(%rcx) + pand %xmm4,%xmm2 + movdqu 16+16-96(%rdx),%xmm0 + pand %xmm5,%xmm3 + movdqu 16+16-96(%r8),%xmm1 + por %xmm3,%xmm2 + movdqu %xmm2,16-96(%rcx) + pand %xmm4,%xmm0 + movdqu 32+16-96(%rdx),%xmm2 + pand %xmm5,%xmm1 + movdqu 32+16-96(%r8),%xmm3 + por %xmm1,%xmm0 + movdqu %xmm0,32-96(%rcx) + pand %xmm4,%xmm2 + movdqu 48+16-96(%rdx),%xmm0 + pand %xmm5,%xmm3 + movdqu 48+16-96(%r8),%xmm1 + por %xmm3,%xmm2 + movdqu %xmm2,48-96(%rcx) + pand %xmm4,%xmm0 + movdqu 64+16-96(%rdx),%xmm2 + pand %xmm5,%xmm1 + movdqu 64+16-96(%r8),%xmm3 + por %xmm1,%xmm0 + movdqu %xmm0,64-96(%rcx) + pand %xmm4,%xmm2 + movdqu 80+16-96(%rdx),%xmm0 + pand %xmm5,%xmm3 + movdqu 80+16-96(%r8),%xmm1 + por %xmm3,%xmm2 + movdqu %xmm2,80-96(%rcx) + pand %xmm4,%xmm0 + movdqu 96+16-96(%rdx),%xmm2 + pand %xmm5,%xmm1 + movdqu 96+16-96(%r8),%xmm3 + por %xmm1,%xmm0 + movdqu %xmm0,96-96(%rcx) + pand %xmm4,%xmm2 + movdqu 112+16-96(%rdx),%xmm0 + pand %xmm5,%xmm3 + movdqu 112+16-96(%r8),%xmm1 + por %xmm3,%xmm2 + movdqu %xmm2,112-96(%rcx) + pand %xmm4,%xmm0 + movdqu 128+16-96(%rdx),%xmm2 + pand %xmm5,%xmm1 + movdqu 128+16-96(%r8),%xmm3 + por %xmm1,%xmm0 + movdqu %xmm0,128-96(%rcx) + pand %xmm4,%xmm2 + movdqu 144+16-96(%rdx),%xmm0 + pand %xmm5,%xmm3 + movdqu 144+16-96(%r8),%xmm1 + por %xmm3,%xmm2 + movdqu %xmm2,144-96(%rcx) + pand %xmm4,%xmm0 + movdqu 160+16-96(%rdx),%xmm2 + pand %xmm5,%xmm1 + movdqu 160+16-96(%r8),%xmm3 + por %xmm1,%xmm0 + movdqu %xmm0,160-96(%rcx) + pand %xmm4,%xmm2 + pand %xmm5,%xmm3 + por %xmm3,%xmm2 + movdqu %xmm2,176-96(%rcx) + .byte 0xf3,0xc3 + +.globl vec_select_144 + +.def vec_select_144; .scl 2; .type 32; .endef +.p2align 5 +vec_select_144: + .byte 0xf3,0x0f,0x1e,0xfa + + movd %r9d,%xmm5 + pxor %xmm4,%xmm4 + pshufd $0,%xmm5,%xmm5 + movdqu (%rdx),%xmm0 + leaq 72(%rdx),%rdx + pcmpeqd %xmm4,%xmm5 + movdqu (%r8),%xmm1 + leaq 72(%r8),%r8 + pcmpeqd %xmm5,%xmm4 + leaq 72(%rcx),%rcx + pand %xmm4,%xmm0 + movdqu 0+16-72(%rdx),%xmm2 + pand %xmm5,%xmm1 + movdqu 0+16-72(%r8),%xmm3 + por %xmm1,%xmm0 + movdqu %xmm0,0-72(%rcx) + pand %xmm4,%xmm2 + movdqu 16+16-72(%rdx),%xmm0 + pand %xmm5,%xmm3 + movdqu 16+16-72(%r8),%xmm1 + por %xmm3,%xmm2 + movdqu %xmm2,16-72(%rcx) + pand %xmm4,%xmm0 + movdqu 32+16-72(%rdx),%xmm2 + pand %xmm5,%xmm1 + movdqu 32+16-72(%r8),%xmm3 + por %xmm1,%xmm0 + movdqu %xmm0,32-72(%rcx) + pand %xmm4,%xmm2 + movdqu 48+16-72(%rdx),%xmm0 + pand %xmm5,%xmm3 + movdqu 48+16-72(%r8),%xmm1 + por %xmm3,%xmm2 + movdqu %xmm2,48-72(%rcx) + pand %xmm4,%xmm0 + movdqu 64+16-72(%rdx),%xmm2 + pand %xmm5,%xmm1 + movdqu 64+16-72(%r8),%xmm3 + por %xmm1,%xmm0 + movdqu %xmm0,64-72(%rcx) + pand %xmm4,%xmm2 + movdqu 80+16-72(%rdx),%xmm0 + pand %xmm5,%xmm3 + movdqu 80+16-72(%r8),%xmm1 + por %xmm3,%xmm2 + movdqu %xmm2,80-72(%rcx) + pand %xmm4,%xmm0 + movdqu 96+16-72(%rdx),%xmm2 + pand %xmm5,%xmm1 + movdqu 96+16-72(%r8),%xmm3 + por %xmm1,%xmm0 + movdqu %xmm0,96-72(%rcx) + pand %xmm4,%xmm2 + movdqu 112+16-72(%rdx),%xmm0 + pand %xmm5,%xmm3 + movdqu 112+16-72(%r8),%xmm1 + por %xmm3,%xmm2 + movdqu %xmm2,112-72(%rcx) + pand %xmm4,%xmm0 + pand %xmm5,%xmm1 + por %xmm1,%xmm0 + movdqu %xmm0,128-72(%rcx) + .byte 0xf3,0xc3 + +.globl vec_select_288 + +.def vec_select_288; .scl 2; .type 32; .endef +.p2align 5 +vec_select_288: + .byte 0xf3,0x0f,0x1e,0xfa + + movd %r9d,%xmm5 + pxor %xmm4,%xmm4 + pshufd $0,%xmm5,%xmm5 + movdqu (%rdx),%xmm0 + leaq 144(%rdx),%rdx + pcmpeqd %xmm4,%xmm5 + movdqu (%r8),%xmm1 + leaq 144(%r8),%r8 + pcmpeqd %xmm5,%xmm4 + leaq 144(%rcx),%rcx + pand %xmm4,%xmm0 + movdqu 0+16-144(%rdx),%xmm2 + pand %xmm5,%xmm1 + movdqu 0+16-144(%r8),%xmm3 + por %xmm1,%xmm0 + movdqu %xmm0,0-144(%rcx) + pand %xmm4,%xmm2 + movdqu 16+16-144(%rdx),%xmm0 + pand %xmm5,%xmm3 + movdqu 16+16-144(%r8),%xmm1 + por %xmm3,%xmm2 + movdqu %xmm2,16-144(%rcx) + pand %xmm4,%xmm0 + movdqu 32+16-144(%rdx),%xmm2 + pand %xmm5,%xmm1 + movdqu 32+16-144(%r8),%xmm3 + por %xmm1,%xmm0 + movdqu %xmm0,32-144(%rcx) + pand %xmm4,%xmm2 + movdqu 48+16-144(%rdx),%xmm0 + pand %xmm5,%xmm3 + movdqu 48+16-144(%r8),%xmm1 + por %xmm3,%xmm2 + movdqu %xmm2,48-144(%rcx) + pand %xmm4,%xmm0 + movdqu 64+16-144(%rdx),%xmm2 + pand %xmm5,%xmm1 + movdqu 64+16-144(%r8),%xmm3 + por %xmm1,%xmm0 + movdqu %xmm0,64-144(%rcx) + pand %xmm4,%xmm2 + movdqu 80+16-144(%rdx),%xmm0 + pand %xmm5,%xmm3 + movdqu 80+16-144(%r8),%xmm1 + por %xmm3,%xmm2 + movdqu %xmm2,80-144(%rcx) + pand %xmm4,%xmm0 + movdqu 96+16-144(%rdx),%xmm2 + pand %xmm5,%xmm1 + movdqu 96+16-144(%r8),%xmm3 + por %xmm1,%xmm0 + movdqu %xmm0,96-144(%rcx) + pand %xmm4,%xmm2 + movdqu 112+16-144(%rdx),%xmm0 + pand %xmm5,%xmm3 + movdqu 112+16-144(%r8),%xmm1 + por %xmm3,%xmm2 + movdqu %xmm2,112-144(%rcx) + pand %xmm4,%xmm0 + movdqu 128+16-144(%rdx),%xmm2 + pand %xmm5,%xmm1 + movdqu 128+16-144(%r8),%xmm3 + por %xmm1,%xmm0 + movdqu %xmm0,128-144(%rcx) + pand %xmm4,%xmm2 + movdqu 144+16-144(%rdx),%xmm0 + pand %xmm5,%xmm3 + movdqu 144+16-144(%r8),%xmm1 + por %xmm3,%xmm2 + movdqu %xmm2,144-144(%rcx) + pand %xmm4,%xmm0 + movdqu 160+16-144(%rdx),%xmm2 + pand %xmm5,%xmm1 + movdqu 160+16-144(%r8),%xmm3 + por %xmm1,%xmm0 + movdqu %xmm0,160-144(%rcx) + pand %xmm4,%xmm2 + movdqu 176+16-144(%rdx),%xmm0 + pand %xmm5,%xmm3 + movdqu 176+16-144(%r8),%xmm1 + por %xmm3,%xmm2 + movdqu %xmm2,176-144(%rcx) + pand %xmm4,%xmm0 + movdqu 192+16-144(%rdx),%xmm2 + pand %xmm5,%xmm1 + movdqu 192+16-144(%r8),%xmm3 + por %xmm1,%xmm0 + movdqu %xmm0,192-144(%rcx) + pand %xmm4,%xmm2 + movdqu 208+16-144(%rdx),%xmm0 + pand %xmm5,%xmm3 + movdqu 208+16-144(%r8),%xmm1 + por %xmm3,%xmm2 + movdqu %xmm2,208-144(%rcx) + pand %xmm4,%xmm0 + movdqu 224+16-144(%rdx),%xmm2 + pand %xmm5,%xmm1 + movdqu 224+16-144(%r8),%xmm3 + por %xmm1,%xmm0 + movdqu %xmm0,224-144(%rcx) + pand %xmm4,%xmm2 + movdqu 240+16-144(%rdx),%xmm0 + pand %xmm5,%xmm3 + movdqu 240+16-144(%r8),%xmm1 + por %xmm3,%xmm2 + movdqu %xmm2,240-144(%rcx) + pand %xmm4,%xmm0 + movdqu 256+16-144(%rdx),%xmm2 + pand %xmm5,%xmm1 + movdqu 256+16-144(%r8),%xmm3 + por %xmm1,%xmm0 + movdqu %xmm0,256-144(%rcx) + pand %xmm4,%xmm2 + pand %xmm5,%xmm3 + por %xmm3,%xmm2 + movdqu %xmm2,272-144(%rcx) + .byte 0xf3,0xc3 + +.globl vec_prefetch + +.def vec_prefetch; .scl 2; .type 32; .endef +.p2align 5 +vec_prefetch: + .byte 0xf3,0x0f,0x1e,0xfa + + leaq -1(%rcx,%rdx,1),%rdx + movq $64,%rax + xorq %r8,%r8 + prefetchnta (%rcx) + leaq (%rcx,%rax,1),%rcx + cmpq %rdx,%rcx + cmovaq %rdx,%rcx + cmovaq %r8,%rax + prefetchnta (%rcx) + leaq (%rcx,%rax,1),%rcx + cmpq %rdx,%rcx + cmovaq %rdx,%rcx + cmovaq %r8,%rax + prefetchnta (%rcx) + leaq (%rcx,%rax,1),%rcx + cmpq %rdx,%rcx + cmovaq %rdx,%rcx + cmovaq %r8,%rax + prefetchnta (%rcx) + leaq (%rcx,%rax,1),%rcx + cmpq %rdx,%rcx + cmovaq %rdx,%rcx + cmovaq %r8,%rax + prefetchnta (%rcx) + leaq (%rcx,%rax,1),%rcx + cmpq %rdx,%rcx + cmovaq %rdx,%rcx + cmovaq %r8,%rax + prefetchnta (%rcx) + leaq (%rcx,%rax,1),%rcx + cmpq %rdx,%rcx + cmovaq %rdx,%rcx + prefetchnta (%rcx) + .byte 0xf3,0xc3 + +.globl vec_is_zero_16x + +.def vec_is_zero_16x; .scl 2; .type 32; .endef +.p2align 5 +vec_is_zero_16x: + .byte 0xf3,0x0f,0x1e,0xfa + + shrl $4,%edx + movdqu (%rcx),%xmm0 + leaq 16(%rcx),%rcx + +.Loop_is_zero: + decl %edx + jz .Loop_is_zero_done + movdqu (%rcx),%xmm1 + leaq 16(%rcx),%rcx + por %xmm1,%xmm0 + jmp .Loop_is_zero + +.Loop_is_zero_done: + pshufd $0x4e,%xmm0,%xmm1 + por %xmm1,%xmm0 +.byte 102,72,15,126,192 + incl %edx + testq %rax,%rax + cmovnzl %edx,%eax + xorl $1,%eax + .byte 0xf3,0xc3 + +.globl vec_is_equal_16x + +.def vec_is_equal_16x; .scl 2; .type 32; .endef +.p2align 5 +vec_is_equal_16x: + .byte 0xf3,0x0f,0x1e,0xfa + + shrl $4,%r8d + movdqu (%rcx),%xmm0 + movdqu (%rdx),%xmm1 + subq %rcx,%rdx + leaq 16(%rcx),%rcx + pxor %xmm1,%xmm0 + +.Loop_is_equal: + decl %r8d + jz .Loop_is_equal_done + movdqu (%rcx),%xmm1 + movdqu (%rcx,%rdx,1),%xmm2 + leaq 16(%rcx),%rcx + pxor %xmm2,%xmm1 + por %xmm1,%xmm0 + jmp .Loop_is_equal + +.Loop_is_equal_done: + pshufd $0x4e,%xmm0,%xmm1 + por %xmm1,%xmm0 +.byte 102,72,15,126,192 + incl %r8d + testq %rax,%rax + cmovnzl %r8d,%eax + xorl $1,%eax + .byte 0xf3,0xc3 + +.section .pdata +.p2align 2 +.rva .LSEH_begin_add_mod_384 +.rva .LSEH_body_add_mod_384 +.rva .LSEH_info_add_mod_384_prologue + +.rva .LSEH_body_add_mod_384 +.rva .LSEH_epilogue_add_mod_384 +.rva .LSEH_info_add_mod_384_body + +.rva .LSEH_epilogue_add_mod_384 +.rva .LSEH_end_add_mod_384 +.rva .LSEH_info_add_mod_384_epilogue + +.rva .LSEH_begin_add_mod_384x +.rva .LSEH_body_add_mod_384x +.rva .LSEH_info_add_mod_384x_prologue + +.rva .LSEH_body_add_mod_384x +.rva .LSEH_epilogue_add_mod_384x +.rva .LSEH_info_add_mod_384x_body + +.rva .LSEH_epilogue_add_mod_384x +.rva .LSEH_end_add_mod_384x +.rva .LSEH_info_add_mod_384x_epilogue + +.rva .LSEH_begin_rshift_mod_384 +.rva .LSEH_body_rshift_mod_384 +.rva .LSEH_info_rshift_mod_384_prologue + +.rva .LSEH_body_rshift_mod_384 +.rva .LSEH_epilogue_rshift_mod_384 +.rva .LSEH_info_rshift_mod_384_body + +.rva .LSEH_epilogue_rshift_mod_384 +.rva .LSEH_end_rshift_mod_384 +.rva .LSEH_info_rshift_mod_384_epilogue + +.rva .LSEH_begin_div_by_2_mod_384 +.rva .LSEH_body_div_by_2_mod_384 +.rva .LSEH_info_div_by_2_mod_384_prologue + +.rva .LSEH_body_div_by_2_mod_384 +.rva .LSEH_epilogue_div_by_2_mod_384 +.rva .LSEH_info_div_by_2_mod_384_body + +.rva .LSEH_epilogue_div_by_2_mod_384 +.rva .LSEH_end_div_by_2_mod_384 +.rva .LSEH_info_div_by_2_mod_384_epilogue + +.rva .LSEH_begin_lshift_mod_384 +.rva .LSEH_body_lshift_mod_384 +.rva .LSEH_info_lshift_mod_384_prologue + +.rva .LSEH_body_lshift_mod_384 +.rva .LSEH_epilogue_lshift_mod_384 +.rva .LSEH_info_lshift_mod_384_body + +.rva .LSEH_epilogue_lshift_mod_384 +.rva .LSEH_end_lshift_mod_384 +.rva .LSEH_info_lshift_mod_384_epilogue + +.rva .LSEH_begin_mul_by_3_mod_384 +.rva .LSEH_body_mul_by_3_mod_384 +.rva .LSEH_info_mul_by_3_mod_384_prologue + +.rva .LSEH_body_mul_by_3_mod_384 +.rva .LSEH_epilogue_mul_by_3_mod_384 +.rva .LSEH_info_mul_by_3_mod_384_body + +.rva .LSEH_epilogue_mul_by_3_mod_384 +.rva .LSEH_end_mul_by_3_mod_384 +.rva .LSEH_info_mul_by_3_mod_384_epilogue + +.rva .LSEH_begin_mul_by_8_mod_384 +.rva .LSEH_body_mul_by_8_mod_384 +.rva .LSEH_info_mul_by_8_mod_384_prologue + +.rva .LSEH_body_mul_by_8_mod_384 +.rva .LSEH_epilogue_mul_by_8_mod_384 +.rva .LSEH_info_mul_by_8_mod_384_body + +.rva .LSEH_epilogue_mul_by_8_mod_384 +.rva .LSEH_end_mul_by_8_mod_384 +.rva .LSEH_info_mul_by_8_mod_384_epilogue + +.rva .LSEH_begin_mul_by_3_mod_384x +.rva .LSEH_body_mul_by_3_mod_384x +.rva .LSEH_info_mul_by_3_mod_384x_prologue + +.rva .LSEH_body_mul_by_3_mod_384x +.rva .LSEH_epilogue_mul_by_3_mod_384x +.rva .LSEH_info_mul_by_3_mod_384x_body + +.rva .LSEH_epilogue_mul_by_3_mod_384x +.rva .LSEH_end_mul_by_3_mod_384x +.rva .LSEH_info_mul_by_3_mod_384x_epilogue + +.rva .LSEH_begin_mul_by_8_mod_384x +.rva .LSEH_body_mul_by_8_mod_384x +.rva .LSEH_info_mul_by_8_mod_384x_prologue + +.rva .LSEH_body_mul_by_8_mod_384x +.rva .LSEH_epilogue_mul_by_8_mod_384x +.rva .LSEH_info_mul_by_8_mod_384x_body + +.rva .LSEH_epilogue_mul_by_8_mod_384x +.rva .LSEH_end_mul_by_8_mod_384x +.rva .LSEH_info_mul_by_8_mod_384x_epilogue + +.rva .LSEH_begin_cneg_mod_384 +.rva .LSEH_body_cneg_mod_384 +.rva .LSEH_info_cneg_mod_384_prologue + +.rva .LSEH_body_cneg_mod_384 +.rva .LSEH_epilogue_cneg_mod_384 +.rva .LSEH_info_cneg_mod_384_body + +.rva .LSEH_epilogue_cneg_mod_384 +.rva .LSEH_end_cneg_mod_384 +.rva .LSEH_info_cneg_mod_384_epilogue + +.rva .LSEH_begin_sub_mod_384 +.rva .LSEH_body_sub_mod_384 +.rva .LSEH_info_sub_mod_384_prologue + +.rva .LSEH_body_sub_mod_384 +.rva .LSEH_epilogue_sub_mod_384 +.rva .LSEH_info_sub_mod_384_body + +.rva .LSEH_epilogue_sub_mod_384 +.rva .LSEH_end_sub_mod_384 +.rva .LSEH_info_sub_mod_384_epilogue + +.rva .LSEH_begin_sub_mod_384x +.rva .LSEH_body_sub_mod_384x +.rva .LSEH_info_sub_mod_384x_prologue + +.rva .LSEH_body_sub_mod_384x +.rva .LSEH_epilogue_sub_mod_384x +.rva .LSEH_info_sub_mod_384x_body + +.rva .LSEH_epilogue_sub_mod_384x +.rva .LSEH_end_sub_mod_384x +.rva .LSEH_info_sub_mod_384x_epilogue + +.rva .LSEH_begin_mul_by_1_plus_i_mod_384x +.rva .LSEH_body_mul_by_1_plus_i_mod_384x +.rva .LSEH_info_mul_by_1_plus_i_mod_384x_prologue + +.rva .LSEH_body_mul_by_1_plus_i_mod_384x +.rva .LSEH_epilogue_mul_by_1_plus_i_mod_384x +.rva .LSEH_info_mul_by_1_plus_i_mod_384x_body + +.rva .LSEH_epilogue_mul_by_1_plus_i_mod_384x +.rva .LSEH_end_mul_by_1_plus_i_mod_384x +.rva .LSEH_info_mul_by_1_plus_i_mod_384x_epilogue + +.rva .LSEH_begin_sgn0_pty_mod_384 +.rva .LSEH_body_sgn0_pty_mod_384 +.rva .LSEH_info_sgn0_pty_mod_384_prologue + +.rva .LSEH_body_sgn0_pty_mod_384 +.rva .LSEH_epilogue_sgn0_pty_mod_384 +.rva .LSEH_info_sgn0_pty_mod_384_body + +.rva .LSEH_epilogue_sgn0_pty_mod_384 +.rva .LSEH_end_sgn0_pty_mod_384 +.rva .LSEH_info_sgn0_pty_mod_384_epilogue + +.rva .LSEH_begin_sgn0_pty_mod_384x +.rva .LSEH_body_sgn0_pty_mod_384x +.rva .LSEH_info_sgn0_pty_mod_384x_prologue + +.rva .LSEH_body_sgn0_pty_mod_384x +.rva .LSEH_epilogue_sgn0_pty_mod_384x +.rva .LSEH_info_sgn0_pty_mod_384x_body + +.rva .LSEH_epilogue_sgn0_pty_mod_384x +.rva .LSEH_end_sgn0_pty_mod_384x +.rva .LSEH_info_sgn0_pty_mod_384x_epilogue + +.section .xdata +.p2align 3 +.LSEH_info_add_mod_384_prologue: +.byte 1,0,5,0x0b +.byte 0,0x74,1,0 +.byte 0,0x64,2,0 +.byte 0,0xb3 +.byte 0,0 +.long 0,0 +.LSEH_info_add_mod_384_body: +.byte 1,0,17,0 +.byte 0x00,0xf4,0x01,0x00 +.byte 0x00,0xe4,0x02,0x00 +.byte 0x00,0xd4,0x03,0x00 +.byte 0x00,0xc4,0x04,0x00 +.byte 0x00,0x34,0x05,0x00 +.byte 0x00,0x54,0x06,0x00 +.byte 0x00,0x74,0x08,0x00 +.byte 0x00,0x64,0x09,0x00 +.byte 0x00,0x62 +.byte 0x00,0x00,0x00,0x00,0x00,0x00 +.byte 0x00,0x00,0x00,0x00 +.LSEH_info_add_mod_384_epilogue: +.byte 1,0,4,0 +.byte 0x00,0x74,0x01,0x00 +.byte 0x00,0x64,0x02,0x00 +.byte 0x00,0x00,0x00,0x00 + +.LSEH_info_add_mod_384x_prologue: +.byte 1,0,5,0x0b +.byte 0,0x74,1,0 +.byte 0,0x64,2,0 +.byte 0,0xb3 +.byte 0,0 +.long 0,0 +.LSEH_info_add_mod_384x_body: +.byte 1,0,17,0 +.byte 0x00,0xf4,0x03,0x00 +.byte 0x00,0xe4,0x04,0x00 +.byte 0x00,0xd4,0x05,0x00 +.byte 0x00,0xc4,0x06,0x00 +.byte 0x00,0x34,0x07,0x00 +.byte 0x00,0x54,0x08,0x00 +.byte 0x00,0x74,0x0a,0x00 +.byte 0x00,0x64,0x0b,0x00 +.byte 0x00,0x82 +.byte 0x00,0x00,0x00,0x00,0x00,0x00 +.byte 0x00,0x00,0x00,0x00 +.LSEH_info_add_mod_384x_epilogue: +.byte 1,0,4,0 +.byte 0x00,0x74,0x01,0x00 +.byte 0x00,0x64,0x02,0x00 +.byte 0x00,0x00,0x00,0x00 + +.LSEH_info_rshift_mod_384_prologue: +.byte 1,0,5,0x0b +.byte 0,0x74,1,0 +.byte 0,0x64,2,0 +.byte 0,0xb3 +.byte 0,0 +.long 0,0 +.LSEH_info_rshift_mod_384_body: +.byte 1,0,17,0 +.byte 0x00,0xf4,0x01,0x00 +.byte 0x00,0xe4,0x02,0x00 +.byte 0x00,0xd4,0x03,0x00 +.byte 0x00,0xc4,0x04,0x00 +.byte 0x00,0x34,0x05,0x00 +.byte 0x00,0x54,0x06,0x00 +.byte 0x00,0x74,0x08,0x00 +.byte 0x00,0x64,0x09,0x00 +.byte 0x00,0x62 +.byte 0x00,0x00,0x00,0x00,0x00,0x00 +.byte 0x00,0x00,0x00,0x00 +.LSEH_info_rshift_mod_384_epilogue: +.byte 1,0,4,0 +.byte 0x00,0x74,0x01,0x00 +.byte 0x00,0x64,0x02,0x00 +.byte 0x00,0x00,0x00,0x00 + +.LSEH_info_div_by_2_mod_384_prologue: +.byte 1,0,5,0x0b +.byte 0,0x74,1,0 +.byte 0,0x64,2,0 +.byte 0,0xb3 +.byte 0,0 +.long 0,0 +.LSEH_info_div_by_2_mod_384_body: +.byte 1,0,17,0 +.byte 0x00,0xf4,0x01,0x00 +.byte 0x00,0xe4,0x02,0x00 +.byte 0x00,0xd4,0x03,0x00 +.byte 0x00,0xc4,0x04,0x00 +.byte 0x00,0x34,0x05,0x00 +.byte 0x00,0x54,0x06,0x00 +.byte 0x00,0x74,0x08,0x00 +.byte 0x00,0x64,0x09,0x00 +.byte 0x00,0x62 +.byte 0x00,0x00,0x00,0x00,0x00,0x00 +.byte 0x00,0x00,0x00,0x00 +.LSEH_info_div_by_2_mod_384_epilogue: +.byte 1,0,4,0 +.byte 0x00,0x74,0x01,0x00 +.byte 0x00,0x64,0x02,0x00 +.byte 0x00,0x00,0x00,0x00 + +.LSEH_info_lshift_mod_384_prologue: +.byte 1,0,5,0x0b +.byte 0,0x74,1,0 +.byte 0,0x64,2,0 +.byte 0,0xb3 +.byte 0,0 +.long 0,0 +.LSEH_info_lshift_mod_384_body: +.byte 1,0,17,0 +.byte 0x00,0xf4,0x01,0x00 +.byte 0x00,0xe4,0x02,0x00 +.byte 0x00,0xd4,0x03,0x00 +.byte 0x00,0xc4,0x04,0x00 +.byte 0x00,0x34,0x05,0x00 +.byte 0x00,0x54,0x06,0x00 +.byte 0x00,0x74,0x08,0x00 +.byte 0x00,0x64,0x09,0x00 +.byte 0x00,0x62 +.byte 0x00,0x00,0x00,0x00,0x00,0x00 +.byte 0x00,0x00,0x00,0x00 +.LSEH_info_lshift_mod_384_epilogue: +.byte 1,0,4,0 +.byte 0x00,0x74,0x01,0x00 +.byte 0x00,0x64,0x02,0x00 +.byte 0x00,0x00,0x00,0x00 + +.LSEH_info_mul_by_3_mod_384_prologue: +.byte 1,0,5,0x0b +.byte 0,0x74,1,0 +.byte 0,0x64,2,0 +.byte 0,0xb3 +.byte 0,0 +.long 0,0 +.LSEH_info_mul_by_3_mod_384_body: +.byte 1,0,17,0 +.byte 0x00,0xf4,0x01,0x00 +.byte 0x00,0xe4,0x02,0x00 +.byte 0x00,0xd4,0x03,0x00 +.byte 0x00,0xc4,0x04,0x00 +.byte 0x00,0x34,0x05,0x00 +.byte 0x00,0x54,0x06,0x00 +.byte 0x00,0x74,0x08,0x00 +.byte 0x00,0x64,0x09,0x00 +.byte 0x00,0x62 +.byte 0x00,0x00,0x00,0x00,0x00,0x00 +.byte 0x00,0x00,0x00,0x00 +.LSEH_info_mul_by_3_mod_384_epilogue: +.byte 1,0,4,0 +.byte 0x00,0x74,0x01,0x00 +.byte 0x00,0x64,0x02,0x00 +.byte 0x00,0x00,0x00,0x00 + +.LSEH_info_mul_by_8_mod_384_prologue: +.byte 1,0,5,0x0b +.byte 0,0x74,1,0 +.byte 0,0x64,2,0 +.byte 0,0xb3 +.byte 0,0 +.long 0,0 +.LSEH_info_mul_by_8_mod_384_body: +.byte 1,0,17,0 +.byte 0x00,0xf4,0x01,0x00 +.byte 0x00,0xe4,0x02,0x00 +.byte 0x00,0xd4,0x03,0x00 +.byte 0x00,0xc4,0x04,0x00 +.byte 0x00,0x34,0x05,0x00 +.byte 0x00,0x54,0x06,0x00 +.byte 0x00,0x74,0x08,0x00 +.byte 0x00,0x64,0x09,0x00 +.byte 0x00,0x62 +.byte 0x00,0x00,0x00,0x00,0x00,0x00 +.byte 0x00,0x00,0x00,0x00 +.LSEH_info_mul_by_8_mod_384_epilogue: +.byte 1,0,4,0 +.byte 0x00,0x74,0x01,0x00 +.byte 0x00,0x64,0x02,0x00 +.byte 0x00,0x00,0x00,0x00 + +.LSEH_info_mul_by_3_mod_384x_prologue: +.byte 1,0,5,0x0b +.byte 0,0x74,1,0 +.byte 0,0x64,2,0 +.byte 0,0xb3 +.byte 0,0 +.long 0,0 +.LSEH_info_mul_by_3_mod_384x_body: +.byte 1,0,17,0 +.byte 0x00,0xf4,0x01,0x00 +.byte 0x00,0xe4,0x02,0x00 +.byte 0x00,0xd4,0x03,0x00 +.byte 0x00,0xc4,0x04,0x00 +.byte 0x00,0x34,0x05,0x00 +.byte 0x00,0x54,0x06,0x00 +.byte 0x00,0x74,0x08,0x00 +.byte 0x00,0x64,0x09,0x00 +.byte 0x00,0x62 +.byte 0x00,0x00,0x00,0x00,0x00,0x00 +.byte 0x00,0x00,0x00,0x00 +.LSEH_info_mul_by_3_mod_384x_epilogue: +.byte 1,0,4,0 +.byte 0x00,0x74,0x01,0x00 +.byte 0x00,0x64,0x02,0x00 +.byte 0x00,0x00,0x00,0x00 + +.LSEH_info_mul_by_8_mod_384x_prologue: +.byte 1,0,5,0x0b +.byte 0,0x74,1,0 +.byte 0,0x64,2,0 +.byte 0,0xb3 +.byte 0,0 +.long 0,0 +.LSEH_info_mul_by_8_mod_384x_body: +.byte 1,0,17,0 +.byte 0x00,0xf4,0x01,0x00 +.byte 0x00,0xe4,0x02,0x00 +.byte 0x00,0xd4,0x03,0x00 +.byte 0x00,0xc4,0x04,0x00 +.byte 0x00,0x34,0x05,0x00 +.byte 0x00,0x54,0x06,0x00 +.byte 0x00,0x74,0x08,0x00 +.byte 0x00,0x64,0x09,0x00 +.byte 0x00,0x62 +.byte 0x00,0x00,0x00,0x00,0x00,0x00 +.byte 0x00,0x00,0x00,0x00 +.LSEH_info_mul_by_8_mod_384x_epilogue: +.byte 1,0,4,0 +.byte 0x00,0x74,0x01,0x00 +.byte 0x00,0x64,0x02,0x00 +.byte 0x00,0x00,0x00,0x00 + +.LSEH_info_cneg_mod_384_prologue: +.byte 1,0,5,0x0b +.byte 0,0x74,1,0 +.byte 0,0x64,2,0 +.byte 0,0xb3 +.byte 0,0 +.long 0,0 +.LSEH_info_cneg_mod_384_body: +.byte 1,0,17,0 +.byte 0x00,0xf4,0x01,0x00 +.byte 0x00,0xe4,0x02,0x00 +.byte 0x00,0xd4,0x03,0x00 +.byte 0x00,0xc4,0x04,0x00 +.byte 0x00,0x34,0x05,0x00 +.byte 0x00,0x54,0x06,0x00 +.byte 0x00,0x74,0x08,0x00 +.byte 0x00,0x64,0x09,0x00 +.byte 0x00,0x62 +.byte 0x00,0x00,0x00,0x00,0x00,0x00 +.byte 0x00,0x00,0x00,0x00 +.LSEH_info_cneg_mod_384_epilogue: +.byte 1,0,4,0 +.byte 0x00,0x74,0x01,0x00 +.byte 0x00,0x64,0x02,0x00 +.byte 0x00,0x00,0x00,0x00 + +.LSEH_info_sub_mod_384_prologue: +.byte 1,0,5,0x0b +.byte 0,0x74,1,0 +.byte 0,0x64,2,0 +.byte 0,0xb3 +.byte 0,0 +.long 0,0 +.LSEH_info_sub_mod_384_body: +.byte 1,0,17,0 +.byte 0x00,0xf4,0x01,0x00 +.byte 0x00,0xe4,0x02,0x00 +.byte 0x00,0xd4,0x03,0x00 +.byte 0x00,0xc4,0x04,0x00 +.byte 0x00,0x34,0x05,0x00 +.byte 0x00,0x54,0x06,0x00 +.byte 0x00,0x74,0x08,0x00 +.byte 0x00,0x64,0x09,0x00 +.byte 0x00,0x62 +.byte 0x00,0x00,0x00,0x00,0x00,0x00 +.byte 0x00,0x00,0x00,0x00 +.LSEH_info_sub_mod_384_epilogue: +.byte 1,0,4,0 +.byte 0x00,0x74,0x01,0x00 +.byte 0x00,0x64,0x02,0x00 +.byte 0x00,0x00,0x00,0x00 + +.LSEH_info_sub_mod_384x_prologue: +.byte 1,0,5,0x0b +.byte 0,0x74,1,0 +.byte 0,0x64,2,0 +.byte 0,0xb3 +.byte 0,0 +.long 0,0 +.LSEH_info_sub_mod_384x_body: +.byte 1,0,17,0 +.byte 0x00,0xf4,0x03,0x00 +.byte 0x00,0xe4,0x04,0x00 +.byte 0x00,0xd4,0x05,0x00 +.byte 0x00,0xc4,0x06,0x00 +.byte 0x00,0x34,0x07,0x00 +.byte 0x00,0x54,0x08,0x00 +.byte 0x00,0x74,0x0a,0x00 +.byte 0x00,0x64,0x0b,0x00 +.byte 0x00,0x82 +.byte 0x00,0x00,0x00,0x00,0x00,0x00 +.byte 0x00,0x00,0x00,0x00 +.LSEH_info_sub_mod_384x_epilogue: +.byte 1,0,4,0 +.byte 0x00,0x74,0x01,0x00 +.byte 0x00,0x64,0x02,0x00 +.byte 0x00,0x00,0x00,0x00 + +.LSEH_info_mul_by_1_plus_i_mod_384x_prologue: +.byte 1,0,5,0x0b +.byte 0,0x74,1,0 +.byte 0,0x64,2,0 +.byte 0,0xb3 +.byte 0,0 +.long 0,0 +.LSEH_info_mul_by_1_plus_i_mod_384x_body: +.byte 1,0,17,0 +.byte 0x00,0xf4,0x07,0x00 +.byte 0x00,0xe4,0x08,0x00 +.byte 0x00,0xd4,0x09,0x00 +.byte 0x00,0xc4,0x0a,0x00 +.byte 0x00,0x34,0x0b,0x00 +.byte 0x00,0x54,0x0c,0x00 +.byte 0x00,0x74,0x0e,0x00 +.byte 0x00,0x64,0x0f,0x00 +.byte 0x00,0xc2 +.byte 0x00,0x00,0x00,0x00,0x00,0x00 +.byte 0x00,0x00,0x00,0x00 +.LSEH_info_mul_by_1_plus_i_mod_384x_epilogue: +.byte 1,0,4,0 +.byte 0x00,0x74,0x01,0x00 +.byte 0x00,0x64,0x02,0x00 +.byte 0x00,0x00,0x00,0x00 + +.LSEH_info_sgn0_pty_mod_384_prologue: +.byte 1,0,5,0x0b +.byte 0,0x74,1,0 +.byte 0,0x64,2,0 +.byte 0,0xb3 +.byte 0,0 +.long 0,0 +.LSEH_info_sgn0_pty_mod_384_body: +.byte 1,0,4,0 +.byte 0x00,0x74,0x01,0x00 +.byte 0x00,0x64,0x02,0x00 +.byte 0x00,0x00,0x00,0x00 +.LSEH_info_sgn0_pty_mod_384_epilogue: +.byte 1,0,4,0 +.byte 0x00,0x74,0x01,0x00 +.byte 0x00,0x64,0x02,0x00 +.byte 0x00,0x00,0x00,0x00 + +.LSEH_info_sgn0_pty_mod_384x_prologue: +.byte 1,0,5,0x0b +.byte 0,0x74,1,0 +.byte 0,0x64,2,0 +.byte 0,0xb3 +.byte 0,0 +.long 0,0 +.LSEH_info_sgn0_pty_mod_384x_body: +.byte 1,0,9,0 +.byte 0x00,0x34,0x01,0x00 +.byte 0x00,0x54,0x02,0x00 +.byte 0x00,0x74,0x04,0x00 +.byte 0x00,0x64,0x05,0x00 +.byte 0x00,0x22 +.byte 0x00,0x00,0x00,0x00,0x00,0x00 +.byte 0x00,0x00,0x00,0x00 +.LSEH_info_sgn0_pty_mod_384x_epilogue: +.byte 1,0,4,0 +.byte 0x00,0x74,0x01,0x00 +.byte 0x00,0x64,0x02,0x00 +.byte 0x00,0x00,0x00,0x00 + diff --git a/crypto/blst_src/build/coff/add_mod_384x384-x86_64.s b/crypto/blst_src/build/coff/add_mod_384x384-x86_64.s new file mode 100644 index 00000000000..53662b4a56a --- /dev/null +++ b/crypto/blst_src/build/coff/add_mod_384x384-x86_64.s @@ -0,0 +1,330 @@ +.text + +.def __add_mod_384x384; .scl 3; .type 32; .endef +.p2align 5 +__add_mod_384x384: + .byte 0xf3,0x0f,0x1e,0xfa + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + movq 48(%rsi),%r14 + + addq 0(%rdx),%r8 + movq 56(%rsi),%r15 + adcq 8(%rdx),%r9 + movq 64(%rsi),%rax + adcq 16(%rdx),%r10 + movq 72(%rsi),%rbx + adcq 24(%rdx),%r11 + movq 80(%rsi),%rbp + adcq 32(%rdx),%r12 + movq 88(%rsi),%rsi + adcq 40(%rdx),%r13 + movq %r8,0(%rdi) + adcq 48(%rdx),%r14 + movq %r9,8(%rdi) + adcq 56(%rdx),%r15 + movq %r10,16(%rdi) + adcq 64(%rdx),%rax + movq %r12,32(%rdi) + movq %r14,%r8 + adcq 72(%rdx),%rbx + movq %r11,24(%rdi) + movq %r15,%r9 + adcq 80(%rdx),%rbp + movq %r13,40(%rdi) + movq %rax,%r10 + adcq 88(%rdx),%rsi + movq %rbx,%r11 + sbbq %rdx,%rdx + + subq 0(%rcx),%r14 + sbbq 8(%rcx),%r15 + movq %rbp,%r12 + sbbq 16(%rcx),%rax + sbbq 24(%rcx),%rbx + sbbq 32(%rcx),%rbp + movq %rsi,%r13 + sbbq 40(%rcx),%rsi + sbbq $0,%rdx + + cmovcq %r8,%r14 + cmovcq %r9,%r15 + cmovcq %r10,%rax + movq %r14,48(%rdi) + cmovcq %r11,%rbx + movq %r15,56(%rdi) + cmovcq %r12,%rbp + movq %rax,64(%rdi) + cmovcq %r13,%rsi + movq %rbx,72(%rdi) + movq %rbp,80(%rdi) + movq %rsi,88(%rdi) + + .byte 0xf3,0xc3 + + +.def __sub_mod_384x384; .scl 3; .type 32; .endef +.p2align 5 +__sub_mod_384x384: + .byte 0xf3,0x0f,0x1e,0xfa + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + movq 48(%rsi),%r14 + + subq 0(%rdx),%r8 + movq 56(%rsi),%r15 + sbbq 8(%rdx),%r9 + movq 64(%rsi),%rax + sbbq 16(%rdx),%r10 + movq 72(%rsi),%rbx + sbbq 24(%rdx),%r11 + movq 80(%rsi),%rbp + sbbq 32(%rdx),%r12 + movq 88(%rsi),%rsi + sbbq 40(%rdx),%r13 + movq %r8,0(%rdi) + sbbq 48(%rdx),%r14 + movq 0(%rcx),%r8 + movq %r9,8(%rdi) + sbbq 56(%rdx),%r15 + movq 8(%rcx),%r9 + movq %r10,16(%rdi) + sbbq 64(%rdx),%rax + movq 16(%rcx),%r10 + movq %r11,24(%rdi) + sbbq 72(%rdx),%rbx + movq 24(%rcx),%r11 + movq %r12,32(%rdi) + sbbq 80(%rdx),%rbp + movq 32(%rcx),%r12 + movq %r13,40(%rdi) + sbbq 88(%rdx),%rsi + movq 40(%rcx),%r13 + sbbq %rdx,%rdx + + andq %rdx,%r8 + andq %rdx,%r9 + andq %rdx,%r10 + andq %rdx,%r11 + andq %rdx,%r12 + andq %rdx,%r13 + + addq %r8,%r14 + adcq %r9,%r15 + movq %r14,48(%rdi) + adcq %r10,%rax + movq %r15,56(%rdi) + adcq %r11,%rbx + movq %rax,64(%rdi) + adcq %r12,%rbp + movq %rbx,72(%rdi) + adcq %r13,%rsi + movq %rbp,80(%rdi) + movq %rsi,88(%rdi) + + .byte 0xf3,0xc3 + + +.globl add_mod_384x384 + +.def add_mod_384x384; .scl 2; .type 32; .endef +.p2align 5 +add_mod_384x384: + .byte 0xf3,0x0f,0x1e,0xfa + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + movq %rsp,%r11 +.LSEH_begin_add_mod_384x384: + + + pushq %rbp + + movq %rcx,%rdi + movq %rdx,%rsi + movq %r8,%rdx + movq %r9,%rcx + pushq %rbx + + pushq %r12 + + pushq %r13 + + pushq %r14 + + pushq %r15 + + subq $8,%rsp + +.LSEH_body_add_mod_384x384: + + + call __add_mod_384x384 + + movq 8(%rsp),%r15 + + movq 16(%rsp),%r14 + + movq 24(%rsp),%r13 + + movq 32(%rsp),%r12 + + movq 40(%rsp),%rbx + + movq 48(%rsp),%rbp + + leaq 56(%rsp),%rsp + +.LSEH_epilogue_add_mod_384x384: + mov 8(%rsp),%rdi + mov 16(%rsp),%rsi + + .byte 0xf3,0xc3 + +.LSEH_end_add_mod_384x384: + +.globl sub_mod_384x384 + +.def sub_mod_384x384; .scl 2; .type 32; .endef +.p2align 5 +sub_mod_384x384: + .byte 0xf3,0x0f,0x1e,0xfa + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + movq %rsp,%r11 +.LSEH_begin_sub_mod_384x384: + + + pushq %rbp + + movq %rcx,%rdi + movq %rdx,%rsi + movq %r8,%rdx + movq %r9,%rcx + pushq %rbx + + pushq %r12 + + pushq %r13 + + pushq %r14 + + pushq %r15 + + subq $8,%rsp + +.LSEH_body_sub_mod_384x384: + + + call __sub_mod_384x384 + + movq 8(%rsp),%r15 + + movq 16(%rsp),%r14 + + movq 24(%rsp),%r13 + + movq 32(%rsp),%r12 + + movq 40(%rsp),%rbx + + movq 48(%rsp),%rbp + + leaq 56(%rsp),%rsp + +.LSEH_epilogue_sub_mod_384x384: + mov 8(%rsp),%rdi + mov 16(%rsp),%rsi + + .byte 0xf3,0xc3 + +.LSEH_end_sub_mod_384x384: +.section .pdata +.p2align 2 +.rva .LSEH_begin_add_mod_384x384 +.rva .LSEH_body_add_mod_384x384 +.rva .LSEH_info_add_mod_384x384_prologue + +.rva .LSEH_body_add_mod_384x384 +.rva .LSEH_epilogue_add_mod_384x384 +.rva .LSEH_info_add_mod_384x384_body + +.rva .LSEH_epilogue_add_mod_384x384 +.rva .LSEH_end_add_mod_384x384 +.rva .LSEH_info_add_mod_384x384_epilogue + +.rva .LSEH_begin_sub_mod_384x384 +.rva .LSEH_body_sub_mod_384x384 +.rva .LSEH_info_sub_mod_384x384_prologue + +.rva .LSEH_body_sub_mod_384x384 +.rva .LSEH_epilogue_sub_mod_384x384 +.rva .LSEH_info_sub_mod_384x384_body + +.rva .LSEH_epilogue_sub_mod_384x384 +.rva .LSEH_end_sub_mod_384x384 +.rva .LSEH_info_sub_mod_384x384_epilogue + +.section .xdata +.p2align 3 +.LSEH_info_add_mod_384x384_prologue: +.byte 1,0,5,0x0b +.byte 0,0x74,1,0 +.byte 0,0x64,2,0 +.byte 0,0xb3 +.byte 0,0 +.long 0,0 +.LSEH_info_add_mod_384x384_body: +.byte 1,0,17,0 +.byte 0x00,0xf4,0x01,0x00 +.byte 0x00,0xe4,0x02,0x00 +.byte 0x00,0xd4,0x03,0x00 +.byte 0x00,0xc4,0x04,0x00 +.byte 0x00,0x34,0x05,0x00 +.byte 0x00,0x54,0x06,0x00 +.byte 0x00,0x74,0x08,0x00 +.byte 0x00,0x64,0x09,0x00 +.byte 0x00,0x62 +.byte 0x00,0x00,0x00,0x00,0x00,0x00 +.byte 0x00,0x00,0x00,0x00 +.LSEH_info_add_mod_384x384_epilogue: +.byte 1,0,4,0 +.byte 0x00,0x74,0x01,0x00 +.byte 0x00,0x64,0x02,0x00 +.byte 0x00,0x00,0x00,0x00 + +.LSEH_info_sub_mod_384x384_prologue: +.byte 1,0,5,0x0b +.byte 0,0x74,1,0 +.byte 0,0x64,2,0 +.byte 0,0xb3 +.byte 0,0 +.long 0,0 +.LSEH_info_sub_mod_384x384_body: +.byte 1,0,17,0 +.byte 0x00,0xf4,0x01,0x00 +.byte 0x00,0xe4,0x02,0x00 +.byte 0x00,0xd4,0x03,0x00 +.byte 0x00,0xc4,0x04,0x00 +.byte 0x00,0x34,0x05,0x00 +.byte 0x00,0x54,0x06,0x00 +.byte 0x00,0x74,0x08,0x00 +.byte 0x00,0x64,0x09,0x00 +.byte 0x00,0x62 +.byte 0x00,0x00,0x00,0x00,0x00,0x00 +.byte 0x00,0x00,0x00,0x00 +.LSEH_info_sub_mod_384x384_epilogue: +.byte 1,0,4,0 +.byte 0x00,0x74,0x01,0x00 +.byte 0x00,0x64,0x02,0x00 +.byte 0x00,0x00,0x00,0x00 + diff --git a/crypto/blst_src/build/coff/ct_inverse_mod_256-armv8.S b/crypto/blst_src/build/coff/ct_inverse_mod_256-armv8.S new file mode 100644 index 00000000000..d2fd83182b4 --- /dev/null +++ b/crypto/blst_src/build/coff/ct_inverse_mod_256-armv8.S @@ -0,0 +1,799 @@ +.text + +.globl ct_inverse_mod_256 + +.def ct_inverse_mod_256; +.type 32; +.endef +.p2align 5 +ct_inverse_mod_256: +.long 3573752639 + stp x29, x30, [sp,#-80]! + add x29, sp, #0 + stp x19, x20, [sp,#16] + stp x21, x22, [sp,#32] + stp x23, x24, [sp,#48] + stp x25, x26, [sp,#64] + sub sp, sp, #1040 + + ldp x4, x5, [x1,#8*0] + ldp x6, x7, [x1,#8*2] + + add x1, sp, #16+511 // find closest 512-byte-aligned spot + and x1, x1, #-512 // in the frame... + str x0, [sp] + + ldp x8, x9, [x2,#8*0] + ldp x10, x11, [x2,#8*2] + + stp x4, x5, [x1,#8*0] // copy input to |a| + stp x6, x7, [x1,#8*2] + stp x8, x9, [x1,#8*4] // copy modulus to |b| + stp x10, x11, [x1,#8*6] + + ////////////////////////////////////////// first iteration + bl .Lab_approximation_31_256_loaded + + eor x0, x1, #256 // pointer to dst |a|b|u|v| + bl __smul_256_n_shift_by_31 + str x12,[x0,#8*8] // initialize |u| with |f0| + + mov x12, x14 // |f1| + mov x13, x15 // |g1| + add x0, x0, #8*4 // pointer to dst |b| + bl __smul_256_n_shift_by_31 + str x12, [x0,#8*9] // initialize |v| with |f1| + + ////////////////////////////////////////// second iteration + eor x1, x1, #256 // flip-flop src |a|b|u|v| + bl __ab_approximation_31_256 + + eor x0, x1, #256 // pointer to dst |a|b|u|v| + bl __smul_256_n_shift_by_31 + mov x16, x12 // corrected |f0| + mov x17, x13 // corrected |g0| + + mov x12, x14 // |f1| + mov x13, x15 // |g1| + add x0, x0, #8*4 // pointer to destination |b| + bl __smul_256_n_shift_by_31 + + ldr x8, [x1,#8*8] // |u| + ldr x9, [x1,#8*13] // |v| + madd x4, x16, x8, xzr // |u|*|f0| + madd x4, x17, x9, x4 // |v|*|g0| + str x4, [x0,#8*4] + asr x5, x4, #63 // sign extension + stp x5, x5, [x0,#8*5] + stp x5, x5, [x0,#8*7] + + madd x4, x12, x8, xzr // |u|*|f1| + madd x4, x13, x9, x4 // |v|*|g1| + str x4, [x0,#8*9] + asr x5, x4, #63 // sign extension + stp x5, x5, [x0,#8*10] + stp x5, x5, [x0,#8*12] + eor x1, x1, #256 // flip-flop src |a|b|u|v| + bl __ab_approximation_31_256 + + eor x0, x1, #256 // pointer to dst |a|b|u|v| + bl __smul_256_n_shift_by_31 + mov x16, x12 // corrected |f0| + mov x17, x13 // corrected |g0| + + mov x12, x14 // |f1| + mov x13, x15 // |g1| + add x0, x0, #8*4 // pointer to destination |b| + bl __smul_256_n_shift_by_31 + + add x0, x0, #8*4 // pointer to destination |u| + bl __smul_256x63 + adc x22, x22, x23 + str x22, [x0,#8*4] + + mov x16, x12 // corrected |f1| + mov x17, x13 // corrected |g1| + add x0, x0, #8*5 // pointer to destination |v| + bl __smul_256x63 + adc x22, x22, x23 + stp x22, x22, [x0,#8*4] + stp x22, x22, [x0,#8*6] + eor x1, x1, #256 // flip-flop src |a|b|u|v| + bl __ab_approximation_31_256 + + eor x0, x1, #256 // pointer to dst |a|b|u|v| + bl __smul_256_n_shift_by_31 + mov x16, x12 // corrected |f0| + mov x17, x13 // corrected |g0| + + mov x12, x14 // |f1| + mov x13, x15 // |g1| + add x0, x0, #8*4 // pointer to destination |b| + bl __smul_256_n_shift_by_31 + + add x0, x0, #8*4 // pointer to destination |u| + bl __smul_256x63 + adc x22, x22, x23 + str x22, [x0,#8*4] + + mov x16, x12 // corrected |f1| + mov x17, x13 // corrected |g1| + add x0, x0, #8*5 // pointer to destination |v| + bl __smul_256x63 + adc x22, x22, x23 + stp x22, x22, [x0,#8*4] + stp x22, x22, [x0,#8*6] + eor x1, x1, #256 // flip-flop src |a|b|u|v| + bl __ab_approximation_31_256 + + eor x0, x1, #256 // pointer to dst |a|b|u|v| + bl __smul_256_n_shift_by_31 + mov x16, x12 // corrected |f0| + mov x17, x13 // corrected |g0| + + mov x12, x14 // |f1| + mov x13, x15 // |g1| + add x0, x0, #8*4 // pointer to destination |b| + bl __smul_256_n_shift_by_31 + + add x0, x0, #8*4 // pointer to destination |u| + bl __smul_256x63 + adc x22, x22, x23 + str x22, [x0,#8*4] + + mov x16, x12 // corrected |f1| + mov x17, x13 // corrected |g1| + add x0, x0, #8*5 // pointer to destination |v| + bl __smul_256x63 + adc x22, x22, x23 + stp x22, x22, [x0,#8*4] + stp x22, x22, [x0,#8*6] + eor x1, x1, #256 // flip-flop src |a|b|u|v| + bl __ab_approximation_31_256 + + eor x0, x1, #256 // pointer to dst |a|b|u|v| + bl __smul_256_n_shift_by_31 + mov x16, x12 // corrected |f0| + mov x17, x13 // corrected |g0| + + mov x12, x14 // |f1| + mov x13, x15 // |g1| + add x0, x0, #8*4 // pointer to destination |b| + bl __smul_256_n_shift_by_31 + + add x0, x0, #8*4 // pointer to destination |u| + bl __smul_256x63 + adc x22, x22, x23 + str x22, [x0,#8*4] + + mov x16, x12 // corrected |f1| + mov x17, x13 // corrected |g1| + add x0, x0, #8*5 // pointer to destination |v| + bl __smul_256x63 + adc x22, x22, x23 + stp x22, x22, [x0,#8*4] + stp x22, x22, [x0,#8*6] + eor x1, x1, #256 // flip-flop src |a|b|u|v| + bl __ab_approximation_31_256 + + eor x0, x1, #256 // pointer to dst |a|b|u|v| + bl __smul_256_n_shift_by_31 + mov x16, x12 // corrected |f0| + mov x17, x13 // corrected |g0| + + mov x12, x14 // |f1| + mov x13, x15 // |g1| + add x0, x0, #8*4 // pointer to destination |b| + bl __smul_256_n_shift_by_31 + + add x0, x0, #8*4 // pointer to destination |u| + bl __smul_256x63 + adc x22, x22, x23 + str x22, [x0,#8*4] + + mov x16, x12 // corrected |f1| + mov x17, x13 // corrected |g1| + add x0, x0, #8*5 // pointer to destination |v| + bl __smul_256x63 + adc x22, x22, x23 + stp x22, x22, [x0,#8*4] + stp x22, x22, [x0,#8*6] + eor x1, x1, #256 // flip-flop src |a|b|u|v| + bl __ab_approximation_31_256 + + eor x0, x1, #256 // pointer to dst |a|b|u|v| + bl __smul_256_n_shift_by_31 + mov x16, x12 // corrected |f0| + mov x17, x13 // corrected |g0| + + mov x12, x14 // |f1| + mov x13, x15 // |g1| + add x0, x0, #8*4 // pointer to destination |b| + bl __smul_256_n_shift_by_31 + + add x0, x0, #8*4 // pointer to destination |u| + bl __smul_256x63 + adc x22, x22, x23 + str x22, [x0,#8*4] + + mov x16, x12 // corrected |f1| + mov x17, x13 // corrected |g1| + add x0, x0, #8*5 // pointer to destination |v| + bl __smul_256x63 + adc x22, x22, x23 + stp x22, x22, [x0,#8*4] + stp x22, x22, [x0,#8*6] + eor x1, x1, #256 // flip-flop src |a|b|u|v| + bl __ab_approximation_31_256 + + eor x0, x1, #256 // pointer to dst |a|b|u|v| + bl __smul_256_n_shift_by_31 + mov x16, x12 // corrected |f0| + mov x17, x13 // corrected |g0| + + mov x12, x14 // |f1| + mov x13, x15 // |g1| + add x0, x0, #8*4 // pointer to destination |b| + bl __smul_256_n_shift_by_31 + + add x0, x0, #8*4 // pointer to destination |u| + bl __smul_256x63 + adc x22, x22, x23 + str x22, [x0,#8*4] + + mov x16, x12 // corrected |f1| + mov x17, x13 // corrected |g1| + add x0, x0, #8*5 // pointer to destination |v| + bl __smul_256x63 + bl __smul_512x63_tail + eor x1, x1, #256 // flip-flop src |a|b|u|v| + bl __ab_approximation_31_256 + + eor x0, x1, #256 // pointer to dst |a|b|u|v| + bl __smul_256_n_shift_by_31 + mov x16, x12 // corrected |f0| + mov x17, x13 // corrected |g0| + + mov x12, x14 // |f1| + mov x13, x15 // |g1| + add x0, x0, #8*4 // pointer to destination |b| + bl __smul_256_n_shift_by_31 + + add x0, x0, #8*4 // pointer to destination |u| + bl __smul_256x63 + adc x22, x22, x23 + str x22, [x0,#8*4] + + mov x16, x12 // corrected |f1| + mov x17, x13 // corrected |g1| + add x0, x0, #8*5 // pointer to destination |v| + bl __smul_256x63 + bl __smul_512x63_tail + eor x1, x1, #256 // flip-flop src |a|b|u|v| + bl __ab_approximation_31_256 + + eor x0, x1, #256 // pointer to dst |a|b|u|v| + bl __smul_256_n_shift_by_31 + mov x16, x12 // corrected |f0| + mov x17, x13 // corrected |g0| + + mov x12, x14 // |f1| + mov x13, x15 // |g1| + add x0, x0, #8*4 // pointer to destination |b| + bl __smul_256_n_shift_by_31 + + add x0, x0, #8*4 // pointer to destination |u| + bl __smul_256x63 + adc x22, x22, x23 + str x22, [x0,#8*4] + + mov x16, x12 // corrected |f1| + mov x17, x13 // corrected |g1| + add x0, x0, #8*5 // pointer to destination |v| + bl __smul_256x63 + bl __smul_512x63_tail + eor x1, x1, #256 // flip-flop src |a|b|u|v| + bl __ab_approximation_31_256 + + eor x0, x1, #256 // pointer to dst |a|b|u|v| + bl __smul_256_n_shift_by_31 + mov x16, x12 // corrected |f0| + mov x17, x13 // corrected |g0| + + mov x12, x14 // |f1| + mov x13, x15 // |g1| + add x0, x0, #8*4 // pointer to destination |b| + bl __smul_256_n_shift_by_31 + + add x0, x0, #8*4 // pointer to destination |u| + bl __smul_256x63 + adc x22, x22, x23 + str x22, [x0,#8*4] + + mov x16, x12 // corrected |f1| + mov x17, x13 // corrected |g1| + add x0, x0, #8*5 // pointer to destination |v| + bl __smul_256x63 + bl __smul_512x63_tail + eor x1, x1, #256 // flip-flop src |a|b|u|v| + bl __ab_approximation_31_256 + + eor x0, x1, #256 // pointer to dst |a|b|u|v| + bl __smul_256_n_shift_by_31 + mov x16, x12 // corrected |f0| + mov x17, x13 // corrected |g0| + + mov x12, x14 // |f1| + mov x13, x15 // |g1| + add x0, x0, #8*4 // pointer to destination |b| + bl __smul_256_n_shift_by_31 + + add x0, x0, #8*4 // pointer to destination |u| + bl __smul_256x63 + adc x22, x22, x23 + str x22, [x0,#8*4] + + mov x16, x12 // corrected |f1| + mov x17, x13 // corrected |g1| + add x0, x0, #8*5 // pointer to destination |v| + bl __smul_256x63 + bl __smul_512x63_tail + eor x1, x1, #256 // flip-flop src |a|b|u|v| + bl __ab_approximation_31_256 + + eor x0, x1, #256 // pointer to dst |a|b|u|v| + bl __smul_256_n_shift_by_31 + mov x16, x12 // corrected |f0| + mov x17, x13 // corrected |g0| + + mov x12, x14 // |f1| + mov x13, x15 // |g1| + add x0, x0, #8*4 // pointer to destination |b| + bl __smul_256_n_shift_by_31 + + add x0, x0, #8*4 // pointer to destination |u| + bl __smul_256x63 + adc x22, x22, x23 + str x22, [x0,#8*4] + + mov x16, x12 // corrected |f1| + mov x17, x13 // corrected |g1| + add x0, x0, #8*5 // pointer to destination |v| + bl __smul_256x63 + bl __smul_512x63_tail + eor x1, x1, #256 // flip-flop src |a|b|u|v| + bl __ab_approximation_31_256 + + eor x0, x1, #256 // pointer to dst |a|b|u|v| + bl __smul_256_n_shift_by_31 + mov x16, x12 // corrected |f0| + mov x17, x13 // corrected |g0| + + mov x12, x14 // |f1| + mov x13, x15 // |g1| + add x0, x0, #8*4 // pointer to destination |b| + bl __smul_256_n_shift_by_31 + + add x0, x0, #8*4 // pointer to destination |u| + bl __smul_256x63 + adc x22, x22, x23 + str x22, [x0,#8*4] + + mov x16, x12 // corrected |f1| + mov x17, x13 // corrected |g1| + add x0, x0, #8*5 // pointer to destination |v| + bl __smul_256x63 + bl __smul_512x63_tail + ////////////////////////////////////////// two[!] last iterations + eor x1, x1, #256 // flip-flop src |a|b|u|v| + mov x2, #47 // 31 + 512 % 31 + //bl __ab_approximation_62_256 // |a| and |b| are exact, + ldr x7, [x1,#8*0] // just load + ldr x11, [x1,#8*4] + bl __inner_loop_62_256 + + mov x16, x14 + mov x17, x15 + ldr x0, [sp] // original out_ptr + bl __smul_256x63 + bl __smul_512x63_tail + ldr x30, [x29,#8] + + smulh x20, x7, x17 // figure out top-most limb + ldp x8, x9, [x3,#8*0] + adc x23, x23, x25 + ldp x10, x11, [x3,#8*2] + + add x20, x20, x23 // x20 is 1, 0 or -1 + asr x19, x20, #63 // sign as mask + + and x23, x8, x19 // add mod<<256 conditionally + and x24, x9, x19 + adds x4, x4, x23 + and x25, x10, x19 + adcs x5, x5, x24 + and x26, x11, x19 + adcs x6, x6, x25 + adcs x7, x22, x26 + adc x20, x20, xzr // x20 is 1, 0 or -1 + + neg x19, x20 + orr x20, x20, x19 // excess bit or sign as mask + asr x19, x19, #63 // excess bit as mask + + and x8, x8, x20 // mask |mod| + and x9, x9, x20 + and x10, x10, x20 + and x11, x11, x20 + + eor x8, x8, x19 // conditionally negate |mod| + eor x9, x9, x19 + adds x8, x8, x19, lsr#63 + eor x10, x10, x19 + adcs x9, x9, xzr + eor x11, x11, x19 + adcs x10, x10, xzr + adc x11, x11, xzr + + adds x4, x4, x8 // final adjustment for |mod|<<256 + adcs x5, x5, x9 + adcs x6, x6, x10 + stp x4, x5, [x0,#8*4] + adc x7, x7, x11 + stp x6, x7, [x0,#8*6] + + add sp, sp, #1040 + ldp x19, x20, [x29,#16] + ldp x21, x22, [x29,#32] + ldp x23, x24, [x29,#48] + ldp x25, x26, [x29,#64] + ldr x29, [sp],#80 +.long 3573752767 + ret + + +//////////////////////////////////////////////////////////////////////// +.def __smul_256x63; +.type 32; +.endef +.p2align 5 +__smul_256x63: + ldp x4, x5, [x1,#8*0+64] // load |u| (or |v|) + asr x14, x16, #63 // |f_|'s sign as mask (or |g_|'s) + ldp x6, x7, [x1,#8*2+64] + eor x16, x16, x14 // conditionally negate |f_| (or |g_|) + ldr x22, [x1,#8*4+64] + + eor x4, x4, x14 // conditionally negate |u| (or |v|) + sub x16, x16, x14 + eor x5, x5, x14 + adds x4, x4, x14, lsr#63 + eor x6, x6, x14 + adcs x5, x5, xzr + eor x7, x7, x14 + adcs x6, x6, xzr + eor x22, x22, x14 + umulh x19, x4, x16 + adcs x7, x7, xzr + umulh x20, x5, x16 + adcs x22, x22, xzr + umulh x21, x6, x16 + mul x4, x4, x16 + cmp x16, #0 + mul x5, x5, x16 + csel x22, x22, xzr, ne + mul x6, x6, x16 + adds x5, x5, x19 + mul x24, x7, x16 + adcs x6, x6, x20 + adcs x24, x24, x21 + adc x26, xzr, xzr + ldp x8, x9, [x1,#8*0+104] // load |u| (or |v|) + asr x14, x17, #63 // |f_|'s sign as mask (or |g_|'s) + ldp x10, x11, [x1,#8*2+104] + eor x17, x17, x14 // conditionally negate |f_| (or |g_|) + ldr x23, [x1,#8*4+104] + + eor x8, x8, x14 // conditionally negate |u| (or |v|) + sub x17, x17, x14 + eor x9, x9, x14 + adds x8, x8, x14, lsr#63 + eor x10, x10, x14 + adcs x9, x9, xzr + eor x11, x11, x14 + adcs x10, x10, xzr + eor x23, x23, x14 + umulh x19, x8, x17 + adcs x11, x11, xzr + umulh x20, x9, x17 + adcs x23, x23, xzr + umulh x21, x10, x17 + adc x15, xzr, xzr // used in __smul_512x63_tail + mul x8, x8, x17 + cmp x17, #0 + mul x9, x9, x17 + csel x23, x23, xzr, ne + mul x10, x10, x17 + adds x9, x9, x19 + mul x25, x11, x17 + adcs x10, x10, x20 + adcs x25, x25, x21 + adc x26, x26, xzr + + adds x4, x4, x8 + adcs x5, x5, x9 + adcs x6, x6, x10 + stp x4, x5, [x0,#8*0] + adcs x24, x24, x25 + stp x6, x24, [x0,#8*2] + + ret + + +.def __smul_512x63_tail; +.type 32; +.endef +.p2align 5 +__smul_512x63_tail: + umulh x24, x7, x16 + ldp x5, x6, [x1,#8*18] // load rest of |v| + adc x26, x26, xzr + ldr x7, [x1,#8*20] + and x22, x22, x16 + + umulh x11, x11, x17 // resume |v|*|g1| chain + + sub x24, x24, x22 // tie up |u|*|f1| chain + asr x25, x24, #63 + + eor x5, x5, x14 // conditionally negate rest of |v| + eor x6, x6, x14 + adds x5, x5, x15 + eor x7, x7, x14 + adcs x6, x6, xzr + umulh x19, x23, x17 + adc x7, x7, xzr + umulh x20, x5, x17 + add x11, x11, x26 + umulh x21, x6, x17 + + mul x4, x23, x17 + mul x5, x5, x17 + adds x4, x4, x11 + mul x6, x6, x17 + adcs x5, x5, x19 + mul x22, x7, x17 + adcs x6, x6, x20 + adcs x22, x22, x21 + adc x23, xzr, xzr // used in the final step + + adds x4, x4, x24 + adcs x5, x5, x25 + adcs x6, x6, x25 + stp x4, x5, [x0,#8*4] + adcs x22, x22, x25 // carry is used in the final step + stp x6, x22, [x0,#8*6] + + ret + + +.def __smul_256_n_shift_by_31; +.type 32; +.endef +.p2align 5 +__smul_256_n_shift_by_31: + ldp x4, x5, [x1,#8*0+0] // load |a| (or |b|) + asr x24, x12, #63 // |f0|'s sign as mask (or |g0|'s) + ldp x6, x7, [x1,#8*2+0] + eor x25, x12, x24 // conditionally negate |f0| (or |g0|) + + eor x4, x4, x24 // conditionally negate |a| (or |b|) + sub x25, x25, x24 + eor x5, x5, x24 + adds x4, x4, x24, lsr#63 + eor x6, x6, x24 + adcs x5, x5, xzr + eor x7, x7, x24 + umulh x19, x4, x25 + adcs x6, x6, xzr + umulh x20, x5, x25 + adc x7, x7, xzr + umulh x21, x6, x25 + and x24, x24, x25 + umulh x22, x7, x25 + neg x24, x24 + + mul x4, x4, x25 + mul x5, x5, x25 + mul x6, x6, x25 + adds x5, x5, x19 + mul x7, x7, x25 + adcs x6, x6, x20 + adcs x7, x7, x21 + adc x22, x22, x24 + ldp x8, x9, [x1,#8*0+32] // load |a| (or |b|) + asr x24, x13, #63 // |f0|'s sign as mask (or |g0|'s) + ldp x10, x11, [x1,#8*2+32] + eor x25, x13, x24 // conditionally negate |f0| (or |g0|) + + eor x8, x8, x24 // conditionally negate |a| (or |b|) + sub x25, x25, x24 + eor x9, x9, x24 + adds x8, x8, x24, lsr#63 + eor x10, x10, x24 + adcs x9, x9, xzr + eor x11, x11, x24 + umulh x19, x8, x25 + adcs x10, x10, xzr + umulh x20, x9, x25 + adc x11, x11, xzr + umulh x21, x10, x25 + and x24, x24, x25 + umulh x23, x11, x25 + neg x24, x24 + + mul x8, x8, x25 + mul x9, x9, x25 + mul x10, x10, x25 + adds x9, x9, x19 + mul x11, x11, x25 + adcs x10, x10, x20 + adcs x11, x11, x21 + adc x23, x23, x24 + adds x4, x4, x8 + adcs x5, x5, x9 + adcs x6, x6, x10 + adcs x7, x7, x11 + adc x8, x22, x23 + + extr x4, x5, x4, #31 + extr x5, x6, x5, #31 + extr x6, x7, x6, #31 + asr x23, x8, #63 // result's sign as mask + extr x7, x8, x7, #31 + + eor x4, x4, x23 // ensure the result is positive + eor x5, x5, x23 + adds x4, x4, x23, lsr#63 + eor x6, x6, x23 + adcs x5, x5, xzr + eor x7, x7, x23 + adcs x6, x6, xzr + stp x4, x5, [x0,#8*0] + adc x7, x7, xzr + stp x6, x7, [x0,#8*2] + + eor x12, x12, x23 // adjust |f/g| accordingly + eor x13, x13, x23 + sub x12, x12, x23 + sub x13, x13, x23 + + ret + +.def __ab_approximation_31_256; +.type 32; +.endef +.p2align 4 +__ab_approximation_31_256: + ldp x6, x7, [x1,#8*2] + ldp x10, x11, [x1,#8*6] + ldp x4, x5, [x1,#8*0] + ldp x8, x9, [x1,#8*4] + +.Lab_approximation_31_256_loaded: + orr x19, x7, x11 // check top-most limbs, ... + cmp x19, #0 + csel x7, x7, x6, ne + csel x11, x11, x10, ne + csel x6, x6, x5, ne + orr x19, x7, x11 // and ones before top-most, ... + csel x10, x10, x9, ne + + cmp x19, #0 + csel x7, x7, x6, ne + csel x11, x11, x10, ne + csel x6, x6, x4, ne + orr x19, x7, x11 // and one more, ... + csel x10, x10, x8, ne + + clz x19, x19 + cmp x19, #64 + csel x19, x19, xzr, ne + csel x7, x7, x6, ne + csel x11, x11, x10, ne + neg x20, x19 + + lslv x7, x7, x19 // align high limbs to the left + lslv x11, x11, x19 + lsrv x6, x6, x20 + lsrv x10, x10, x20 + and x6, x6, x20, asr#6 + and x10, x10, x20, asr#6 + orr x7, x7, x6 + orr x11, x11, x10 + + bfxil x7, x4, #0, #31 + bfxil x11, x8, #0, #31 + + b __inner_loop_31_256 + ret + + +.def __inner_loop_31_256; +.type 32; +.endef +.p2align 4 +__inner_loop_31_256: + mov x2, #31 + mov x13, #0x7FFFFFFF80000000 // |f0|=1, |g0|=0 + mov x15, #0x800000007FFFFFFF // |f1|=0, |g1|=1 + mov x23,#0x7FFFFFFF7FFFFFFF + +.Loop_31_256: + sbfx x22, x7, #0, #1 // if |a_| is odd, then we'll be subtracting + sub x2, x2, #1 + and x19, x11, x22 + sub x20, x11, x7 // |b_|-|a_| + subs x21, x7, x19 // |a_|-|b_| (or |a_|-0 if |a_| was even) + mov x19, x15 + csel x11, x11, x7, hs // |b_| = |a_| + csel x7, x21, x20, hs // borrow means |a_|<|b_|, replace with |b_|-|a_| + csel x15, x15, x13, hs // exchange |fg0| and |fg1| + csel x13, x13, x19, hs + lsr x7, x7, #1 + and x19, x15, x22 + and x20, x23, x22 + sub x13, x13, x19 // |f0|-=|f1| (or |f0-=0| if |a_| was even) + add x15, x15, x15 // |f1|<<=1 + add x13, x13, x20 + sub x15, x15, x23 + cbnz x2, .Loop_31_256 + + mov x23, #0x7FFFFFFF + ubfx x12, x13, #0, #32 + ubfx x13, x13, #32, #32 + ubfx x14, x15, #0, #32 + ubfx x15, x15, #32, #32 + sub x12, x12, x23 // remove bias + sub x13, x13, x23 + sub x14, x14, x23 + sub x15, x15, x23 + + ret + + +.def __inner_loop_62_256; +.type 32; +.endef +.p2align 4 +__inner_loop_62_256: + mov x12, #1 // |f0|=1 + mov x13, #0 // |g0|=0 + mov x14, #0 // |f1|=0 + mov x15, #1 // |g1|=1 + +.Loop_62_256: + sbfx x22, x7, #0, #1 // if |a_| is odd, then we'll be subtracting + sub x2, x2, #1 + and x19, x11, x22 + sub x20, x11, x7 // |b_|-|a_| + subs x21, x7, x19 // |a_|-|b_| (or |a_|-0 if |a_| was even) + mov x19, x12 + csel x11, x11, x7, hs // |b_| = |a_| + csel x7, x21, x20, hs // borrow means |a_|<|b_|, replace with |b_|-|a_| + mov x20, x13 + csel x12, x12, x14, hs // exchange |f0| and |f1| + csel x14, x14, x19, hs + csel x13, x13, x15, hs // exchange |g0| and |g1| + csel x15, x15, x20, hs + lsr x7, x7, #1 + and x19, x14, x22 + and x20, x15, x22 + add x14, x14, x14 // |f1|<<=1 + add x15, x15, x15 // |g1|<<=1 + sub x12, x12, x19 // |f0|-=|f1| (or |f0-=0| if |a_| was even) + sub x13, x13, x20 // |g0|-=|g1| (or |g0-=0| ...) + cbnz x2, .Loop_62_256 + + ret + diff --git a/crypto/blst_src/build/coff/ct_inverse_mod_256-x86_64.s b/crypto/blst_src/build/coff/ct_inverse_mod_256-x86_64.s new file mode 100644 index 00000000000..d1aa7597bc0 --- /dev/null +++ b/crypto/blst_src/build/coff/ct_inverse_mod_256-x86_64.s @@ -0,0 +1,1213 @@ +.text + +.globl ct_inverse_mod_256 + +.def ct_inverse_mod_256; .scl 2; .type 32; .endef +.p2align 5 +ct_inverse_mod_256: + .byte 0xf3,0x0f,0x1e,0xfa + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + movq %rsp,%r11 +.LSEH_begin_ct_inverse_mod_256: + + + pushq %rbp + + movq %rcx,%rdi + movq %rdx,%rsi + movq %r8,%rdx + movq %r9,%rcx + pushq %rbx + + pushq %r12 + + pushq %r13 + + pushq %r14 + + pushq %r15 + + subq $1072,%rsp + +.LSEH_body_ct_inverse_mod_256: + + + leaq 48+511(%rsp),%rax + andq $-512,%rax + movq %rdi,32(%rsp) + movq %rcx,40(%rsp) + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + + movq 0(%rdx),%r12 + movq 8(%rdx),%r13 + movq 16(%rdx),%r14 + movq 24(%rdx),%r15 + + movq %r8,0(%rax) + movq %r9,8(%rax) + movq %r10,16(%rax) + movq %r11,24(%rax) + + movq %r12,32(%rax) + movq %r13,40(%rax) + movq %r14,48(%rax) + movq %r15,56(%rax) + movq %rax,%rsi + + + movl $31,%edx + call __ab_approximation_31_256 + + + movq %r12,16(%rsp) + movq %r13,24(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulq_256_n_shift_by_31 + + + movq %rdx,64(%rdi) + + movq 16(%rsp),%rdx + movq 24(%rsp),%rcx + leaq 32(%rdi),%rdi + call __smulq_256_n_shift_by_31 + + + movq %rdx,72(%rdi) + + + xorq $256,%rsi + movl $31,%edx + call __ab_approximation_31_256 + + + movq %r12,16(%rsp) + movq %r13,24(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulq_256_n_shift_by_31 + movq %rdx,0(%rsp) + movq %rcx,8(%rsp) + + movq 16(%rsp),%rdx + movq 24(%rsp),%rcx + leaq 32(%rdi),%rdi + call __smulq_256_n_shift_by_31 + + + + movq 64(%rsi),%r8 + movq 104(%rsi),%r12 + movq %r8,%r9 + imulq 0(%rsp),%r8 + movq %r12,%r13 + imulq 8(%rsp),%r12 + addq %r12,%r8 + movq %r8,32(%rdi) + sarq $63,%r8 + movq %r8,40(%rdi) + movq %r8,48(%rdi) + movq %r8,56(%rdi) + movq %r8,64(%rdi) + leaq 64(%rsi),%rsi + + imulq %rdx,%r9 + imulq %rcx,%r13 + addq %r13,%r9 + movq %r9,72(%rdi) + sarq $63,%r9 + movq %r9,80(%rdi) + movq %r9,88(%rdi) + movq %r9,96(%rdi) + movq %r9,104(%rdi) + xorq $256+64,%rsi + movl $31,%edx + call __ab_approximation_31_256 + + + movq %r12,16(%rsp) + movq %r13,24(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulq_256_n_shift_by_31 + movq %rdx,0(%rsp) + movq %rcx,8(%rsp) + + movq 16(%rsp),%rdx + movq 24(%rsp),%rcx + leaq 32(%rdi),%rdi + call __smulq_256_n_shift_by_31 + movq %rdx,16(%rsp) + movq %rcx,24(%rsp) + + movq 0(%rsp),%rdx + movq 8(%rsp),%rcx + leaq 64(%rsi),%rsi + leaq 32(%rdi),%rdi + call __smulq_256x63 + + movq 16(%rsp),%rdx + movq 24(%rsp),%rcx + leaq 40(%rdi),%rdi + call __smulq_256x63 + xorq $256+64,%rsi + movl $31,%edx + call __ab_approximation_31_256 + + + movq %r12,16(%rsp) + movq %r13,24(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulq_256_n_shift_by_31 + movq %rdx,0(%rsp) + movq %rcx,8(%rsp) + + movq 16(%rsp),%rdx + movq 24(%rsp),%rcx + leaq 32(%rdi),%rdi + call __smulq_256_n_shift_by_31 + movq %rdx,16(%rsp) + movq %rcx,24(%rsp) + + movq 0(%rsp),%rdx + movq 8(%rsp),%rcx + leaq 64(%rsi),%rsi + leaq 32(%rdi),%rdi + call __smulq_256x63 + + movq 16(%rsp),%rdx + movq 24(%rsp),%rcx + leaq 40(%rdi),%rdi + call __smulq_256x63 + xorq $256+64,%rsi + movl $31,%edx + call __ab_approximation_31_256 + + + movq %r12,16(%rsp) + movq %r13,24(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulq_256_n_shift_by_31 + movq %rdx,0(%rsp) + movq %rcx,8(%rsp) + + movq 16(%rsp),%rdx + movq 24(%rsp),%rcx + leaq 32(%rdi),%rdi + call __smulq_256_n_shift_by_31 + movq %rdx,16(%rsp) + movq %rcx,24(%rsp) + + movq 0(%rsp),%rdx + movq 8(%rsp),%rcx + leaq 64(%rsi),%rsi + leaq 32(%rdi),%rdi + call __smulq_256x63 + + movq 16(%rsp),%rdx + movq 24(%rsp),%rcx + leaq 40(%rdi),%rdi + call __smulq_256x63 + xorq $256+64,%rsi + movl $31,%edx + call __ab_approximation_31_256 + + + movq %r12,16(%rsp) + movq %r13,24(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulq_256_n_shift_by_31 + movq %rdx,0(%rsp) + movq %rcx,8(%rsp) + + movq 16(%rsp),%rdx + movq 24(%rsp),%rcx + leaq 32(%rdi),%rdi + call __smulq_256_n_shift_by_31 + movq %rdx,16(%rsp) + movq %rcx,24(%rsp) + + movq 0(%rsp),%rdx + movq 8(%rsp),%rcx + leaq 64(%rsi),%rsi + leaq 32(%rdi),%rdi + call __smulq_256x63 + + movq 16(%rsp),%rdx + movq 24(%rsp),%rcx + leaq 40(%rdi),%rdi + call __smulq_256x63 + xorq $256+64,%rsi + movl $31,%edx + call __ab_approximation_31_256 + + + movq %r12,16(%rsp) + movq %r13,24(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulq_256_n_shift_by_31 + movq %rdx,0(%rsp) + movq %rcx,8(%rsp) + + movq 16(%rsp),%rdx + movq 24(%rsp),%rcx + leaq 32(%rdi),%rdi + call __smulq_256_n_shift_by_31 + movq %rdx,16(%rsp) + movq %rcx,24(%rsp) + + movq 0(%rsp),%rdx + movq 8(%rsp),%rcx + leaq 64(%rsi),%rsi + leaq 32(%rdi),%rdi + call __smulq_256x63 + + movq 16(%rsp),%rdx + movq 24(%rsp),%rcx + leaq 40(%rdi),%rdi + call __smulq_256x63 + xorq $256+64,%rsi + movl $31,%edx + call __ab_approximation_31_256 + + + movq %r12,16(%rsp) + movq %r13,24(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulq_256_n_shift_by_31 + movq %rdx,0(%rsp) + movq %rcx,8(%rsp) + + movq 16(%rsp),%rdx + movq 24(%rsp),%rcx + leaq 32(%rdi),%rdi + call __smulq_256_n_shift_by_31 + movq %rdx,16(%rsp) + movq %rcx,24(%rsp) + + movq 0(%rsp),%rdx + movq 8(%rsp),%rcx + leaq 64(%rsi),%rsi + leaq 32(%rdi),%rdi + call __smulq_256x63 + + movq 16(%rsp),%rdx + movq 24(%rsp),%rcx + leaq 40(%rdi),%rdi + call __smulq_256x63 + xorq $256+64,%rsi + movl $31,%edx + call __ab_approximation_31_256 + + + movq %r12,16(%rsp) + movq %r13,24(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulq_256_n_shift_by_31 + movq %rdx,0(%rsp) + movq %rcx,8(%rsp) + + movq 16(%rsp),%rdx + movq 24(%rsp),%rcx + leaq 32(%rdi),%rdi + call __smulq_256_n_shift_by_31 + movq %rdx,16(%rsp) + movq %rcx,24(%rsp) + + movq 0(%rsp),%rdx + movq 8(%rsp),%rcx + leaq 64(%rsi),%rsi + leaq 32(%rdi),%rdi + call __smulq_256x63 + + movq 16(%rsp),%rdx + movq 24(%rsp),%rcx + leaq 40(%rdi),%rdi + call __smulq_256x63 + sarq $63,%rbp + movq %rbp,40(%rdi) + movq %rbp,48(%rdi) + movq %rbp,56(%rdi) + xorq $256+64,%rsi + movl $31,%edx + call __ab_approximation_31_256 + + + movq %r12,16(%rsp) + movq %r13,24(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulq_256_n_shift_by_31 + movq %rdx,0(%rsp) + movq %rcx,8(%rsp) + + movq 16(%rsp),%rdx + movq 24(%rsp),%rcx + leaq 32(%rdi),%rdi + call __smulq_256_n_shift_by_31 + movq %rdx,16(%rsp) + movq %rcx,24(%rsp) + + movq 0(%rsp),%rdx + movq 8(%rsp),%rcx + leaq 64(%rsi),%rsi + leaq 32(%rdi),%rdi + call __smulq_256x63 + + movq 16(%rsp),%rdx + movq 24(%rsp),%rcx + leaq 40(%rdi),%rdi + call __smulq_512x63 + xorq $256+64,%rsi + movl $31,%edx + call __ab_approximation_31_256 + + + movq %r12,16(%rsp) + movq %r13,24(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulq_256_n_shift_by_31 + movq %rdx,0(%rsp) + movq %rcx,8(%rsp) + + movq 16(%rsp),%rdx + movq 24(%rsp),%rcx + leaq 32(%rdi),%rdi + call __smulq_256_n_shift_by_31 + movq %rdx,16(%rsp) + movq %rcx,24(%rsp) + + movq 0(%rsp),%rdx + movq 8(%rsp),%rcx + leaq 64(%rsi),%rsi + leaq 32(%rdi),%rdi + call __smulq_256x63 + + movq 16(%rsp),%rdx + movq 24(%rsp),%rcx + leaq 40(%rdi),%rdi + call __smulq_512x63 + xorq $256+64,%rsi + movl $31,%edx + call __ab_approximation_31_256 + + + movq %r12,16(%rsp) + movq %r13,24(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulq_256_n_shift_by_31 + movq %rdx,0(%rsp) + movq %rcx,8(%rsp) + + movq 16(%rsp),%rdx + movq 24(%rsp),%rcx + leaq 32(%rdi),%rdi + call __smulq_256_n_shift_by_31 + movq %rdx,16(%rsp) + movq %rcx,24(%rsp) + + movq 0(%rsp),%rdx + movq 8(%rsp),%rcx + leaq 64(%rsi),%rsi + leaq 32(%rdi),%rdi + call __smulq_256x63 + + movq 16(%rsp),%rdx + movq 24(%rsp),%rcx + leaq 40(%rdi),%rdi + call __smulq_512x63 + xorq $256+64,%rsi + movl $31,%edx + call __ab_approximation_31_256 + + + movq %r12,16(%rsp) + movq %r13,24(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulq_256_n_shift_by_31 + movq %rdx,0(%rsp) + movq %rcx,8(%rsp) + + movq 16(%rsp),%rdx + movq 24(%rsp),%rcx + leaq 32(%rdi),%rdi + call __smulq_256_n_shift_by_31 + movq %rdx,16(%rsp) + movq %rcx,24(%rsp) + + movq 0(%rsp),%rdx + movq 8(%rsp),%rcx + leaq 64(%rsi),%rsi + leaq 32(%rdi),%rdi + call __smulq_256x63 + + movq 16(%rsp),%rdx + movq 24(%rsp),%rcx + leaq 40(%rdi),%rdi + call __smulq_512x63 + xorq $256+64,%rsi + movl $31,%edx + call __ab_approximation_31_256 + + + movq %r12,16(%rsp) + movq %r13,24(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulq_256_n_shift_by_31 + movq %rdx,0(%rsp) + movq %rcx,8(%rsp) + + movq 16(%rsp),%rdx + movq 24(%rsp),%rcx + leaq 32(%rdi),%rdi + call __smulq_256_n_shift_by_31 + movq %rdx,16(%rsp) + movq %rcx,24(%rsp) + + movq 0(%rsp),%rdx + movq 8(%rsp),%rcx + leaq 64(%rsi),%rsi + leaq 32(%rdi),%rdi + call __smulq_256x63 + + movq 16(%rsp),%rdx + movq 24(%rsp),%rcx + leaq 40(%rdi),%rdi + call __smulq_512x63 + xorq $256+64,%rsi + movl $31,%edx + call __ab_approximation_31_256 + + + movq %r12,16(%rsp) + movq %r13,24(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulq_256_n_shift_by_31 + movq %rdx,0(%rsp) + movq %rcx,8(%rsp) + + movq 16(%rsp),%rdx + movq 24(%rsp),%rcx + leaq 32(%rdi),%rdi + call __smulq_256_n_shift_by_31 + movq %rdx,16(%rsp) + movq %rcx,24(%rsp) + + movq 0(%rsp),%rdx + movq 8(%rsp),%rcx + leaq 64(%rsi),%rsi + leaq 32(%rdi),%rdi + call __smulq_256x63 + + movq 16(%rsp),%rdx + movq 24(%rsp),%rcx + leaq 40(%rdi),%rdi + call __smulq_512x63 + + xorq $256+64,%rsi + movl $47,%edx + + movq 0(%rsi),%r8 + + movq 32(%rsi),%r10 + + call __inner_loop_62_256 + + + + + + + + leaq 64(%rsi),%rsi + + + + + + movq %r12,%rdx + movq %r13,%rcx + movq 32(%rsp),%rdi + call __smulq_512x63 + adcq %rbp,%rdx + + movq 40(%rsp),%rsi + movq %rdx,%rax + sarq $63,%rdx + + movq %rdx,%r8 + movq %rdx,%r9 + andq 0(%rsi),%r8 + movq %rdx,%r10 + andq 8(%rsi),%r9 + andq 16(%rsi),%r10 + andq 24(%rsi),%rdx + + addq %r8,%r12 + adcq %r9,%r13 + adcq %r10,%r14 + adcq %rdx,%r15 + adcq $0,%rax + + movq %rax,%rdx + negq %rax + orq %rax,%rdx + sarq $63,%rax + + movq %rdx,%r8 + movq %rdx,%r9 + andq 0(%rsi),%r8 + movq %rdx,%r10 + andq 8(%rsi),%r9 + andq 16(%rsi),%r10 + andq 24(%rsi),%rdx + + xorq %rax,%r8 + xorq %rcx,%rcx + xorq %rax,%r9 + subq %rax,%rcx + xorq %rax,%r10 + xorq %rax,%rdx + addq %rcx,%r8 + adcq $0,%r9 + adcq $0,%r10 + adcq $0,%rdx + + addq %r8,%r12 + adcq %r9,%r13 + adcq %r10,%r14 + adcq %rdx,%r15 + + movq %r12,32(%rdi) + movq %r13,40(%rdi) + movq %r14,48(%rdi) + movq %r15,56(%rdi) + + leaq 1072(%rsp),%r8 + movq 0(%r8),%r15 + + movq 8(%r8),%r14 + + movq 16(%r8),%r13 + + movq 24(%r8),%r12 + + movq 32(%r8),%rbx + + movq 40(%r8),%rbp + + leaq 48(%r8),%rsp + +.LSEH_epilogue_ct_inverse_mod_256: + mov 8(%rsp),%rdi + mov 16(%rsp),%rsi + + .byte 0xf3,0xc3 + +.LSEH_end_ct_inverse_mod_256: +.def __smulq_512x63; .scl 3; .type 32; .endef +.p2align 5 +__smulq_512x63: + .byte 0xf3,0x0f,0x1e,0xfa + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%rbp + + movq %rdx,%rbx + sarq $63,%rdx + xorq %rax,%rax + subq %rdx,%rax + + xorq %rdx,%rbx + addq %rax,%rbx + + xorq %rdx,%r8 + xorq %rdx,%r9 + xorq %rdx,%r10 + xorq %rdx,%r11 + xorq %rdx,%rbp + addq %r8,%rax + adcq $0,%r9 + adcq $0,%r10 + adcq $0,%r11 + adcq $0,%rbp + + mulq %rbx + movq %rax,0(%rdi) + movq %r9,%rax + movq %rdx,%r9 + mulq %rbx + addq %rax,%r9 + movq %r10,%rax + adcq $0,%rdx + movq %r9,8(%rdi) + movq %rdx,%r10 + mulq %rbx + addq %rax,%r10 + movq %r11,%rax + adcq $0,%rdx + movq %r10,16(%rdi) + movq %rdx,%r11 + andq %rbx,%rbp + negq %rbp + mulq %rbx + addq %rax,%r11 + adcq %rdx,%rbp + movq %r11,24(%rdi) + + movq 40(%rsi),%r8 + movq 48(%rsi),%r9 + movq 56(%rsi),%r10 + movq 64(%rsi),%r11 + movq 72(%rsi),%r12 + movq 80(%rsi),%r13 + movq 88(%rsi),%r14 + movq 96(%rsi),%r15 + + movq %rcx,%rdx + sarq $63,%rdx + xorq %rax,%rax + subq %rdx,%rax + + xorq %rdx,%rcx + addq %rax,%rcx + + xorq %rdx,%r8 + xorq %rdx,%r9 + xorq %rdx,%r10 + xorq %rdx,%r11 + xorq %rdx,%r12 + xorq %rdx,%r13 + xorq %rdx,%r14 + xorq %rdx,%r15 + addq %r8,%rax + adcq $0,%r9 + adcq $0,%r10 + adcq $0,%r11 + adcq $0,%r12 + adcq $0,%r13 + adcq $0,%r14 + adcq $0,%r15 + + mulq %rcx + movq %rax,%r8 + movq %r9,%rax + movq %rdx,%r9 + mulq %rcx + addq %rax,%r9 + movq %r10,%rax + adcq $0,%rdx + movq %rdx,%r10 + mulq %rcx + addq %rax,%r10 + movq %r11,%rax + adcq $0,%rdx + movq %rdx,%r11 + mulq %rcx + addq %rax,%r11 + movq %r12,%rax + adcq $0,%rdx + movq %rdx,%r12 + mulq %rcx + addq %rax,%r12 + movq %r13,%rax + adcq $0,%rdx + movq %rdx,%r13 + mulq %rcx + addq %rax,%r13 + movq %r14,%rax + adcq $0,%rdx + movq %rdx,%r14 + mulq %rcx + addq %rax,%r14 + movq %r15,%rax + adcq $0,%rdx + movq %rdx,%r15 + imulq %rcx + addq %rax,%r15 + adcq $0,%rdx + + movq %rbp,%rbx + sarq $63,%rbp + + addq 0(%rdi),%r8 + adcq 8(%rdi),%r9 + adcq 16(%rdi),%r10 + adcq 24(%rdi),%r11 + adcq %rbx,%r12 + adcq %rbp,%r13 + adcq %rbp,%r14 + adcq %rbp,%r15 + + movq %r8,0(%rdi) + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + movq %r12,32(%rdi) + movq %r13,40(%rdi) + movq %r14,48(%rdi) + movq %r15,56(%rdi) + + .byte 0xf3,0xc3 + + +.def __smulq_256x63; .scl 3; .type 32; .endef +.p2align 5 +__smulq_256x63: + .byte 0xf3,0x0f,0x1e,0xfa + + movq 0+0(%rsi),%r8 + movq 0+8(%rsi),%r9 + movq 0+16(%rsi),%r10 + movq 0+24(%rsi),%r11 + movq 0+32(%rsi),%rbp + + movq %rdx,%rbx + sarq $63,%rdx + xorq %rax,%rax + subq %rdx,%rax + + xorq %rdx,%rbx + addq %rax,%rbx + + xorq %rdx,%r8 + xorq %rdx,%r9 + xorq %rdx,%r10 + xorq %rdx,%r11 + xorq %rdx,%rbp + addq %r8,%rax + adcq $0,%r9 + adcq $0,%r10 + adcq $0,%r11 + adcq $0,%rbp + + mulq %rbx + movq %rax,%r8 + movq %r9,%rax + movq %rdx,%r9 + mulq %rbx + addq %rax,%r9 + movq %r10,%rax + adcq $0,%rdx + movq %rdx,%r10 + mulq %rbx + addq %rax,%r10 + movq %r11,%rax + adcq $0,%rdx + movq %rdx,%r11 + andq %rbx,%rbp + negq %rbp + mulq %rbx + addq %rax,%r11 + adcq %rdx,%rbp + movq %rcx,%rdx + movq 40+0(%rsi),%r12 + movq 40+8(%rsi),%r13 + movq 40+16(%rsi),%r14 + movq 40+24(%rsi),%r15 + movq 40+32(%rsi),%rcx + + movq %rdx,%rbx + sarq $63,%rdx + xorq %rax,%rax + subq %rdx,%rax + + xorq %rdx,%rbx + addq %rax,%rbx + + xorq %rdx,%r12 + xorq %rdx,%r13 + xorq %rdx,%r14 + xorq %rdx,%r15 + xorq %rdx,%rcx + addq %r12,%rax + adcq $0,%r13 + adcq $0,%r14 + adcq $0,%r15 + adcq $0,%rcx + + mulq %rbx + movq %rax,%r12 + movq %r13,%rax + movq %rdx,%r13 + mulq %rbx + addq %rax,%r13 + movq %r14,%rax + adcq $0,%rdx + movq %rdx,%r14 + mulq %rbx + addq %rax,%r14 + movq %r15,%rax + adcq $0,%rdx + movq %rdx,%r15 + andq %rbx,%rcx + negq %rcx + mulq %rbx + addq %rax,%r15 + adcq %rdx,%rcx + addq %r12,%r8 + adcq %r13,%r9 + adcq %r14,%r10 + adcq %r15,%r11 + adcq %rcx,%rbp + + movq %r8,0(%rdi) + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + movq %rbp,32(%rdi) + + .byte 0xf3,0xc3 + +.def __smulq_256_n_shift_by_31; .scl 3; .type 32; .endef +.p2align 5 +__smulq_256_n_shift_by_31: + .byte 0xf3,0x0f,0x1e,0xfa + + movq %rdx,0(%rdi) + movq %rcx,8(%rdi) + movq %rdx,%rbp + movq 0+0(%rsi),%r8 + movq 0+8(%rsi),%r9 + movq 0+16(%rsi),%r10 + movq 0+24(%rsi),%r11 + + movq %rbp,%rbx + sarq $63,%rbp + xorq %rax,%rax + subq %rbp,%rax + + xorq %rbp,%rbx + addq %rax,%rbx + + xorq %rbp,%r8 + xorq %rbp,%r9 + xorq %rbp,%r10 + xorq %rbp,%r11 + addq %r8,%rax + adcq $0,%r9 + adcq $0,%r10 + adcq $0,%r11 + + mulq %rbx + movq %rax,%r8 + movq %r9,%rax + andq %rbx,%rbp + negq %rbp + movq %rdx,%r9 + mulq %rbx + addq %rax,%r9 + movq %r10,%rax + adcq $0,%rdx + movq %rdx,%r10 + mulq %rbx + addq %rax,%r10 + movq %r11,%rax + adcq $0,%rdx + movq %rdx,%r11 + mulq %rbx + addq %rax,%r11 + adcq %rdx,%rbp + movq 32+0(%rsi),%r12 + movq 32+8(%rsi),%r13 + movq 32+16(%rsi),%r14 + movq 32+24(%rsi),%r15 + + movq %rcx,%rbx + sarq $63,%rcx + xorq %rax,%rax + subq %rcx,%rax + + xorq %rcx,%rbx + addq %rax,%rbx + + xorq %rcx,%r12 + xorq %rcx,%r13 + xorq %rcx,%r14 + xorq %rcx,%r15 + addq %r12,%rax + adcq $0,%r13 + adcq $0,%r14 + adcq $0,%r15 + + mulq %rbx + movq %rax,%r12 + movq %r13,%rax + andq %rbx,%rcx + negq %rcx + movq %rdx,%r13 + mulq %rbx + addq %rax,%r13 + movq %r14,%rax + adcq $0,%rdx + movq %rdx,%r14 + mulq %rbx + addq %rax,%r14 + movq %r15,%rax + adcq $0,%rdx + movq %rdx,%r15 + mulq %rbx + addq %rax,%r15 + adcq %rdx,%rcx + addq %r12,%r8 + adcq %r13,%r9 + adcq %r14,%r10 + adcq %r15,%r11 + adcq %rcx,%rbp + + movq 0(%rdi),%rdx + movq 8(%rdi),%rcx + + shrdq $31,%r9,%r8 + shrdq $31,%r10,%r9 + shrdq $31,%r11,%r10 + shrdq $31,%rbp,%r11 + + sarq $63,%rbp + xorq %rax,%rax + subq %rbp,%rax + + xorq %rbp,%r8 + xorq %rbp,%r9 + xorq %rbp,%r10 + xorq %rbp,%r11 + addq %rax,%r8 + adcq $0,%r9 + adcq $0,%r10 + adcq $0,%r11 + + movq %r8,0(%rdi) + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + + xorq %rbp,%rdx + xorq %rbp,%rcx + addq %rax,%rdx + addq %rax,%rcx + + .byte 0xf3,0xc3 + +.def __ab_approximation_31_256; .scl 3; .type 32; .endef +.p2align 5 +__ab_approximation_31_256: + .byte 0xf3,0x0f,0x1e,0xfa + + movq 24(%rsi),%r9 + movq 56(%rsi),%r11 + movq 16(%rsi),%rbx + movq 48(%rsi),%rbp + movq 8(%rsi),%r8 + movq 40(%rsi),%r10 + + movq %r9,%rax + orq %r11,%rax + cmovzq %rbx,%r9 + cmovzq %rbp,%r11 + cmovzq %r8,%rbx + movq 0(%rsi),%r8 + cmovzq %r10,%rbp + movq 32(%rsi),%r10 + + movq %r9,%rax + orq %r11,%rax + cmovzq %rbx,%r9 + cmovzq %rbp,%r11 + cmovzq %r8,%rbx + cmovzq %r10,%rbp + + movq %r9,%rax + orq %r11,%rax + bsrq %rax,%rcx + leaq 1(%rcx),%rcx + cmovzq %r8,%r9 + cmovzq %r10,%r11 + cmovzq %rax,%rcx + negq %rcx + + + shldq %cl,%rbx,%r9 + shldq %cl,%rbp,%r11 + + movl $0x7FFFFFFF,%eax + andq %rax,%r8 + andq %rax,%r10 + notq %rax + andq %rax,%r9 + andq %rax,%r11 + orq %r9,%r8 + orq %r11,%r10 + + jmp __inner_loop_31_256 + + .byte 0xf3,0xc3 + +.def __inner_loop_31_256; .scl 3; .type 32; .endef +.p2align 5 +__inner_loop_31_256: + .byte 0xf3,0x0f,0x1e,0xfa + + movq $0x7FFFFFFF80000000,%rcx + movq $0x800000007FFFFFFF,%r13 + movq $0x7FFFFFFF7FFFFFFF,%r15 + +.Loop_31_256: + cmpq %r10,%r8 + movq %r8,%rax + movq %r10,%rbx + movq %rcx,%rbp + movq %r13,%r14 + cmovbq %r10,%r8 + cmovbq %rax,%r10 + cmovbq %r13,%rcx + cmovbq %rbp,%r13 + + subq %r10,%r8 + subq %r13,%rcx + addq %r15,%rcx + + testq $1,%rax + cmovzq %rax,%r8 + cmovzq %rbx,%r10 + cmovzq %rbp,%rcx + cmovzq %r14,%r13 + + shrq $1,%r8 + addq %r13,%r13 + subq %r15,%r13 + subl $1,%edx + jnz .Loop_31_256 + + shrq $32,%r15 + movl %ecx,%edx + movl %r13d,%r12d + shrq $32,%rcx + shrq $32,%r13 + subq %r15,%rdx + subq %r15,%rcx + subq %r15,%r12 + subq %r15,%r13 + + .byte 0xf3,0xc3 + + +.def __inner_loop_62_256; .scl 3; .type 32; .endef +.p2align 5 +__inner_loop_62_256: + .byte 0xf3,0x0f,0x1e,0xfa + + movl %edx,%r15d + movq $1,%rdx + xorq %rcx,%rcx + xorq %r12,%r12 + movq %rdx,%r13 + movq %rdx,%r14 + +.Loop_62_256: + xorq %rax,%rax + testq %r14,%r8 + movq %r10,%rbx + cmovnzq %r10,%rax + subq %r8,%rbx + movq %r8,%rbp + subq %rax,%r8 + cmovcq %rbx,%r8 + cmovcq %rbp,%r10 + movq %rdx,%rax + cmovcq %r12,%rdx + cmovcq %rax,%r12 + movq %rcx,%rbx + cmovcq %r13,%rcx + cmovcq %rbx,%r13 + xorq %rax,%rax + xorq %rbx,%rbx + shrq $1,%r8 + testq %r14,%rbp + cmovnzq %r12,%rax + cmovnzq %r13,%rbx + addq %r12,%r12 + addq %r13,%r13 + subq %rax,%rdx + subq %rbx,%rcx + subl $1,%r15d + jnz .Loop_62_256 + + .byte 0xf3,0xc3 + +.section .pdata +.p2align 2 +.rva .LSEH_begin_ct_inverse_mod_256 +.rva .LSEH_body_ct_inverse_mod_256 +.rva .LSEH_info_ct_inverse_mod_256_prologue + +.rva .LSEH_body_ct_inverse_mod_256 +.rva .LSEH_epilogue_ct_inverse_mod_256 +.rva .LSEH_info_ct_inverse_mod_256_body + +.rva .LSEH_epilogue_ct_inverse_mod_256 +.rva .LSEH_end_ct_inverse_mod_256 +.rva .LSEH_info_ct_inverse_mod_256_epilogue + +.section .xdata +.p2align 3 +.LSEH_info_ct_inverse_mod_256_prologue: +.byte 1,0,5,0x0b +.byte 0,0x74,1,0 +.byte 0,0x64,2,0 +.byte 0,0xb3 +.byte 0,0 +.long 0,0 +.LSEH_info_ct_inverse_mod_256_body: +.byte 1,0,18,0 +.byte 0x00,0xf4,0x86,0x00 +.byte 0x00,0xe4,0x87,0x00 +.byte 0x00,0xd4,0x88,0x00 +.byte 0x00,0xc4,0x89,0x00 +.byte 0x00,0x34,0x8a,0x00 +.byte 0x00,0x54,0x8b,0x00 +.byte 0x00,0x74,0x8d,0x00 +.byte 0x00,0x64,0x8e,0x00 +.byte 0x00,0x01,0x8c,0x00 +.byte 0x00,0x00,0x00,0x00 +.byte 0x00,0x00,0x00,0x00 +.LSEH_info_ct_inverse_mod_256_epilogue: +.byte 1,0,4,0 +.byte 0x00,0x74,0x01,0x00 +.byte 0x00,0x64,0x02,0x00 +.byte 0x00,0x00,0x00,0x00 + diff --git a/crypto/blst_src/build/coff/ct_inverse_mod_384-armv8.S b/crypto/blst_src/build/coff/ct_inverse_mod_384-armv8.S new file mode 100644 index 00000000000..86fdc405828 --- /dev/null +++ b/crypto/blst_src/build/coff/ct_inverse_mod_384-armv8.S @@ -0,0 +1,730 @@ +.text + +.globl ct_inverse_mod_383 + +.def ct_inverse_mod_383; +.type 32; +.endef +.p2align 5 +ct_inverse_mod_383: +.long 3573752639 + stp x29, x30, [sp,#-128]! + add x29, sp, #0 + stp x19, x20, [sp,#16] + stp x21, x22, [sp,#32] + stp x23, x24, [sp,#48] + stp x25, x26, [sp,#64] + stp x27, x28, [sp,#80] + sub sp, sp, #1040 + + ldp x22, x4, [x1,#8*0] + ldp x5, x6, [x1,#8*2] + ldp x7, x8, [x1,#8*4] + + add x1, sp, #16+511 // find closest 512-byte-aligned spot + and x1, x1, #-512 // in the frame... + stp x0, x3, [sp] + + ldp x9, x10, [x2,#8*0] + ldp x11, x12, [x2,#8*2] + ldp x13, x14, [x2,#8*4] + + stp x22, x4, [x1,#8*0] // copy input to |a| + stp x5, x6, [x1,#8*2] + stp x7, x8, [x1,#8*4] + stp x9, x10, [x1,#8*6] // copy modulus to |b| + stp x11, x12, [x1,#8*8] + stp x13, x14, [x1,#8*10] + + ////////////////////////////////////////// first iteration + mov x2, #62 + bl .Lab_approximation_62_loaded + + eor x0, x1, #256 // pointer to dst |a|b|u|v| + bl __smul_383_n_shift_by_62 + str x15,[x0,#8*12] // initialize |u| with |f0| + + mov x15, x17 // |f1| + mov x16, x19 // |g1| + add x0, x0, #8*6 // pointer to dst |b| + bl __smul_383_n_shift_by_62 + str x15, [x0,#8*12] // initialize |v| with |f1| + + ////////////////////////////////////////// second iteration + eor x1, x1, #256 // flip-flop src |a|b|u|v| + mov x2, #62 + bl __ab_approximation_62 + + eor x0, x1, #256 // pointer to dst |a|b|u|v| + bl __smul_383_n_shift_by_62 + mov x20, x15 // corrected |f0| + mov x21, x16 // corrected |g0| + + mov x15, x17 // |f1| + mov x16, x19 // |g1| + add x0, x0, #8*6 // pointer to destination |b| + bl __smul_383_n_shift_by_62 + + ldr x7, [x1,#8*12] // |u| + ldr x8, [x1,#8*18] // |v| + mul x3, x20, x7 // |u|*|f0| + smulh x4, x20, x7 + mul x5, x21, x8 // |v|*|g0| + smulh x6, x21, x8 + adds x3, x3, x5 + adc x4, x4, x6 + stp x3, x4, [x0,#8*6] + asr x5, x4, #63 // sign extension + stp x5, x5, [x0,#8*8] + stp x5, x5, [x0,#8*10] + + mul x3, x15, x7 // |u|*|f1| + smulh x4, x15, x7 + mul x5, x16, x8 // |v|*|g1| + smulh x6, x16, x8 + adds x3, x3, x5 + adc x4, x4, x6 + stp x3, x4, [x0,#8*12] + asr x5, x4, #63 // sign extension + stp x5, x5, [x0,#8*14] + stp x5, x5, [x0,#8*16] + eor x1, x1, #256 // flip-flop src |a|b|u|v| + mov x2, #62 + bl __ab_approximation_62 + + eor x0, x1, #256 // pointer to dst |a|b|u|v| + bl __smul_383_n_shift_by_62 + mov x20, x15 // corrected |f0| + mov x21, x16 // corrected |g0| + + mov x15, x17 // |f1| + mov x16, x19 // |g1| + add x0, x0, #8*6 // pointer to destination |b| + bl __smul_383_n_shift_by_62 + + add x0, x0, #8*6 // pointer to destination |u| + bl __smul_383x63 + + mov x20, x15 // corrected |f1| + mov x21, x16 // corrected |g1| + add x0, x0, #8*6 // pointer to destination |v| + bl __smul_383x63 + eor x1, x1, #256 // flip-flop src |a|b|u|v| + mov x2, #62 + bl __ab_approximation_62 + + eor x0, x1, #256 // pointer to dst |a|b|u|v| + bl __smul_383_n_shift_by_62 + mov x20, x15 // corrected |f0| + mov x21, x16 // corrected |g0| + + mov x15, x17 // |f1| + mov x16, x19 // |g1| + add x0, x0, #8*6 // pointer to destination |b| + bl __smul_383_n_shift_by_62 + + add x0, x0, #8*6 // pointer to destination |u| + bl __smul_383x63 + + mov x20, x15 // corrected |f1| + mov x21, x16 // corrected |g1| + add x0, x0, #8*6 // pointer to destination |v| + bl __smul_383x63 + eor x1, x1, #256 // flip-flop src |a|b|u|v| + mov x2, #62 + bl __ab_approximation_62 + + eor x0, x1, #256 // pointer to dst |a|b|u|v| + bl __smul_383_n_shift_by_62 + mov x20, x15 // corrected |f0| + mov x21, x16 // corrected |g0| + + mov x15, x17 // |f1| + mov x16, x19 // |g1| + add x0, x0, #8*6 // pointer to destination |b| + bl __smul_383_n_shift_by_62 + + add x0, x0, #8*6 // pointer to destination |u| + bl __smul_383x63 + + mov x20, x15 // corrected |f1| + mov x21, x16 // corrected |g1| + add x0, x0, #8*6 // pointer to destination |v| + bl __smul_383x63 + eor x1, x1, #256 // flip-flop src |a|b|u|v| + mov x2, #62 + bl __ab_approximation_62 + + eor x0, x1, #256 // pointer to dst |a|b|u|v| + bl __smul_383_n_shift_by_62 + mov x20, x15 // corrected |f0| + mov x21, x16 // corrected |g0| + + mov x15, x17 // |f1| + mov x16, x19 // |g1| + add x0, x0, #8*6 // pointer to destination |b| + bl __smul_383_n_shift_by_62 + + add x0, x0, #8*6 // pointer to destination |u| + bl __smul_383x63 + + mov x20, x15 // corrected |f1| + mov x21, x16 // corrected |g1| + add x0, x0, #8*6 // pointer to destination |v| + bl __smul_383x63 + asr x27, x27, #63 // sign extension + stp x27, x27, [x0,#8*6] + stp x27, x27, [x0,#8*8] + stp x27, x27, [x0,#8*10] + eor x1, x1, #256 // flip-flop src |a|b|u|v| + mov x2, #62 + bl __ab_approximation_62 + + eor x0, x1, #256 // pointer to dst |a|b|u|v| + bl __smul_383_n_shift_by_62 + mov x20, x15 // corrected |f0| + mov x21, x16 // corrected |g0| + + mov x15, x17 // |f1| + mov x16, x19 // |g1| + add x0, x0, #8*6 // pointer to destination |b| + bl __smul_383_n_shift_by_62 + + add x0, x0, #8*6 // pointer to destination |u| + bl __smul_383x63 + + mov x20, x15 // corrected |f1| + mov x21, x16 // corrected |g1| + add x0, x0, #8*6 // pointer to destination |v| + bl __smul_383x63 + bl __smul_767x63_tail + eor x1, x1, #256 // flip-flop src |a|b|u|v| + mov x2, #62 + bl __ab_approximation_62 + + eor x0, x1, #256 // pointer to dst |a|b|u|v| + bl __smul_383_n_shift_by_62 + mov x20, x15 // corrected |f0| + mov x21, x16 // corrected |g0| + + mov x15, x17 // |f1| + mov x16, x19 // |g1| + add x0, x0, #8*6 // pointer to destination |b| + bl __smul_383_n_shift_by_62 + + add x0, x0, #8*6 // pointer to destination |u| + bl __smul_383x63 + + mov x20, x15 // corrected |f1| + mov x21, x16 // corrected |g1| + add x0, x0, #8*6 // pointer to destination |v| + bl __smul_383x63 + bl __smul_767x63_tail + eor x1, x1, #256 // flip-flop src |a|b|u|v| + mov x2, #62 + bl __ab_approximation_62 + + eor x0, x1, #256 // pointer to dst |a|b|u|v| + bl __smul_383_n_shift_by_62 + mov x20, x15 // corrected |f0| + mov x21, x16 // corrected |g0| + + mov x15, x17 // |f1| + mov x16, x19 // |g1| + add x0, x0, #8*6 // pointer to destination |b| + bl __smul_383_n_shift_by_62 + + add x0, x0, #8*6 // pointer to destination |u| + bl __smul_383x63 + + mov x20, x15 // corrected |f1| + mov x21, x16 // corrected |g1| + add x0, x0, #8*6 // pointer to destination |v| + bl __smul_383x63 + bl __smul_767x63_tail + eor x1, x1, #256 // flip-flop src |a|b|u|v| + mov x2, #62 + bl __ab_approximation_62 + + eor x0, x1, #256 // pointer to dst |a|b|u|v| + bl __smul_383_n_shift_by_62 + mov x20, x15 // corrected |f0| + mov x21, x16 // corrected |g0| + + mov x15, x17 // |f1| + mov x16, x19 // |g1| + add x0, x0, #8*6 // pointer to destination |b| + bl __smul_383_n_shift_by_62 + + add x0, x0, #8*6 // pointer to destination |u| + bl __smul_383x63 + + mov x20, x15 // corrected |f1| + mov x21, x16 // corrected |g1| + add x0, x0, #8*6 // pointer to destination |v| + bl __smul_383x63 + bl __smul_767x63_tail + eor x1, x1, #256 // flip-flop src |a|b|u|v| + mov x2, #62 + bl __ab_approximation_62 + + eor x0, x1, #256 // pointer to dst |a|b|u|v| + bl __smul_383_n_shift_by_62 + mov x20, x15 // corrected |f0| + mov x21, x16 // corrected |g0| + + mov x15, x17 // |f1| + mov x16, x19 // |g1| + add x0, x0, #8*6 // pointer to destination |b| + bl __smul_383_n_shift_by_62 + + add x0, x0, #8*6 // pointer to destination |u| + bl __smul_383x63 + + mov x20, x15 // corrected |f1| + mov x21, x16 // corrected |g1| + add x0, x0, #8*6 // pointer to destination |v| + bl __smul_383x63 + bl __smul_767x63_tail + ////////////////////////////////////////// iteration before last + eor x1, x1, #256 // flip-flop src |a|b|u|v| + mov x2, #62 + //bl __ab_approximation_62 // |a| and |b| are exact, + ldp x3, x8, [x1,#8*0] // just load + ldp x9, x14, [x1,#8*6] + bl __inner_loop_62 + + eor x0, x1, #256 // pointer to dst |a|b|u|v| + str x3, [x0,#8*0] + str x9, [x0,#8*6] + + mov x20, x15 // exact |f0| + mov x21, x16 // exact |g0| + mov x15, x17 + mov x16, x19 + add x0, x0, #8*12 // pointer to dst |u| + bl __smul_383x63 + + mov x20, x15 // exact |f1| + mov x21, x16 // exact |g1| + add x0, x0, #8*6 // pointer to dst |v| + bl __smul_383x63 + bl __smul_767x63_tail + + ////////////////////////////////////////// last iteration + eor x1, x1, #256 // flip-flop src |a|b|u|v| + mov x2, #22 // 766 % 62 + //bl __ab_approximation_62 // |a| and |b| are exact, + ldr x3, [x1,#8*0] // just load + eor x8, x8, x8 + ldr x9, [x1,#8*6] + eor x14, x14, x14 + bl __inner_loop_62 + + mov x20, x17 + mov x21, x19 + ldp x0, x15, [sp] // original out_ptr and n_ptr + bl __smul_383x63 + bl __smul_767x63_tail + ldr x30, [x29,#8] + + asr x22, x8, #63 // sign as mask + ldp x9, x10, [x15,#8*0] + ldp x11, x12, [x15,#8*2] + ldp x13, x14, [x15,#8*4] + + and x9, x9, x22 // add mod<<384 conditionally + and x10, x10, x22 + adds x3, x3, x9 + and x11, x11, x22 + adcs x4, x4, x10 + and x12, x12, x22 + adcs x5, x5, x11 + and x13, x13, x22 + adcs x6, x6, x12 + and x14, x14, x22 + stp x3, x4, [x0,#8*6] + adcs x7, x7, x13 + stp x5, x6, [x0,#8*8] + adc x8, x8, x14 + stp x7, x8, [x0,#8*10] + + add sp, sp, #1040 + ldp x19, x20, [x29,#16] + ldp x21, x22, [x29,#32] + ldp x23, x24, [x29,#48] + ldp x25, x26, [x29,#64] + ldp x27, x28, [x29,#80] + ldr x29, [sp],#128 +.long 3573752767 + ret + + +//////////////////////////////////////////////////////////////////////// +// see corresponding commentary in ctx_inverse_mod_384-x86_64... +.def __smul_383x63; +.type 32; +.endef +.p2align 5 +__smul_383x63: + ldp x3, x4, [x1,#8*0+96] // load |u| (or |v|) + asr x17, x20, #63 // |f_|'s sign as mask (or |g_|'s) + ldp x5, x6, [x1,#8*2+96] + eor x20, x20, x17 // conditionally negate |f_| (or |g_|) + ldp x7, x8, [x1,#8*4+96] + + eor x3, x3, x17 // conditionally negate |u| (or |v|) + sub x20, x20, x17 + eor x4, x4, x17 + adds x3, x3, x17, lsr#63 + eor x5, x5, x17 + adcs x4, x4, xzr + eor x6, x6, x17 + adcs x5, x5, xzr + eor x7, x7, x17 + adcs x6, x6, xzr + umulh x22, x3, x20 + eor x8, x8, x17 + umulh x23, x4, x20 + adcs x7, x7, xzr + umulh x24, x5, x20 + adcs x8, x8, xzr + umulh x25, x6, x20 + umulh x26, x7, x20 + mul x3, x3, x20 + mul x4, x4, x20 + mul x5, x5, x20 + adds x4, x4, x22 + mul x6, x6, x20 + adcs x5, x5, x23 + mul x7, x7, x20 + adcs x6, x6, x24 + mul x27,x8, x20 + adcs x7, x7, x25 + adcs x27,x27,x26 + adc x2, xzr, xzr + ldp x9, x10, [x1,#8*0+144] // load |u| (or |v|) + asr x17, x21, #63 // |f_|'s sign as mask (or |g_|'s) + ldp x11, x12, [x1,#8*2+144] + eor x21, x21, x17 // conditionally negate |f_| (or |g_|) + ldp x13, x14, [x1,#8*4+144] + + eor x9, x9, x17 // conditionally negate |u| (or |v|) + sub x21, x21, x17 + eor x10, x10, x17 + adds x9, x9, x17, lsr#63 + eor x11, x11, x17 + adcs x10, x10, xzr + eor x12, x12, x17 + adcs x11, x11, xzr + eor x13, x13, x17 + adcs x12, x12, xzr + umulh x22, x9, x21 + eor x14, x14, x17 + umulh x23, x10, x21 + adcs x13, x13, xzr + umulh x24, x11, x21 + adcs x14, x14, xzr + umulh x25, x12, x21 + adc x19, xzr, xzr // used in __smul_767x63_tail + umulh x26, x13, x21 + mul x9, x9, x21 + mul x10, x10, x21 + mul x11, x11, x21 + adds x10, x10, x22 + mul x12, x12, x21 + adcs x11, x11, x23 + mul x13, x13, x21 + adcs x12, x12, x24 + mul x28,x14, x21 + adcs x13, x13, x25 + adcs x28,x28,x26 + adc x2, x2, xzr + + adds x3, x3, x9 + adcs x4, x4, x10 + adcs x5, x5, x11 + adcs x6, x6, x12 + stp x3, x4, [x0,#8*0] + adcs x7, x7, x13 + stp x5, x6, [x0,#8*2] + adcs x27, x27, x28 + stp x7, x27, [x0,#8*4] + adc x28, x2, xzr // used in __smul_767x63_tail + + ret + + +.def __smul_767x63_tail; +.type 32; +.endef +.p2align 5 +__smul_767x63_tail: + smulh x27, x8, x20 + ldp x3, x4, [x1,#8*24] // load rest of |v| + umulh x14,x14, x21 + ldp x5, x6, [x1,#8*26] + ldp x7, x8, [x1,#8*28] + + eor x3, x3, x17 // conditionally negate rest of |v| + eor x4, x4, x17 + eor x5, x5, x17 + adds x3, x3, x19 + eor x6, x6, x17 + adcs x4, x4, xzr + eor x7, x7, x17 + adcs x5, x5, xzr + eor x8, x8, x17 + adcs x6, x6, xzr + umulh x22, x3, x21 + adcs x7, x7, xzr + umulh x23, x4, x21 + adc x8, x8, xzr + + umulh x24, x5, x21 + add x14, x14, x28 + umulh x25, x6, x21 + asr x28, x27, #63 + umulh x26, x7, x21 + mul x3, x3, x21 + mul x4, x4, x21 + mul x5, x5, x21 + adds x3, x3, x14 + mul x6, x6, x21 + adcs x4, x4, x22 + mul x7, x7, x21 + adcs x5, x5, x23 + mul x8, x8, x21 + adcs x6, x6, x24 + adcs x7, x7, x25 + adc x8, x8, x26 + + adds x3, x3, x27 + adcs x4, x4, x28 + adcs x5, x5, x28 + adcs x6, x6, x28 + stp x3, x4, [x0,#8*6] + adcs x7, x7, x28 + stp x5, x6, [x0,#8*8] + adc x8, x8, x28 + stp x7, x8, [x0,#8*10] + + ret + + +.def __smul_383_n_shift_by_62; +.type 32; +.endef +.p2align 5 +__smul_383_n_shift_by_62: + ldp x3, x4, [x1,#8*0+0] // load |a| (or |b|) + asr x28, x15, #63 // |f0|'s sign as mask (or |g0|'s) + ldp x5, x6, [x1,#8*2+0] + eor x2, x15, x28 // conditionally negate |f0| (or |g0|) + ldp x7, x8, [x1,#8*4+0] + + eor x3, x3, x28 // conditionally negate |a| (or |b|) + sub x2, x2, x28 + eor x4, x4, x28 + adds x3, x3, x28, lsr#63 + eor x5, x5, x28 + adcs x4, x4, xzr + eor x6, x6, x28 + adcs x5, x5, xzr + eor x7, x7, x28 + umulh x22, x3, x2 + adcs x6, x6, xzr + umulh x23, x4, x2 + eor x8, x8, x28 + umulh x24, x5, x2 + adcs x7, x7, xzr + umulh x25, x6, x2 + adc x8, x8, xzr + + umulh x26, x7, x2 + smulh x27, x8, x2 + mul x3, x3, x2 + mul x4, x4, x2 + mul x5, x5, x2 + adds x4, x4, x22 + mul x6, x6, x2 + adcs x5, x5, x23 + mul x7, x7, x2 + adcs x6, x6, x24 + mul x8, x8, x2 + adcs x7, x7, x25 + adcs x8, x8 ,x26 + adc x27, x27, xzr + ldp x9, x10, [x1,#8*0+48] // load |a| (or |b|) + asr x28, x16, #63 // |f0|'s sign as mask (or |g0|'s) + ldp x11, x12, [x1,#8*2+48] + eor x2, x16, x28 // conditionally negate |f0| (or |g0|) + ldp x13, x14, [x1,#8*4+48] + + eor x9, x9, x28 // conditionally negate |a| (or |b|) + sub x2, x2, x28 + eor x10, x10, x28 + adds x9, x9, x28, lsr#63 + eor x11, x11, x28 + adcs x10, x10, xzr + eor x12, x12, x28 + adcs x11, x11, xzr + eor x13, x13, x28 + umulh x22, x9, x2 + adcs x12, x12, xzr + umulh x23, x10, x2 + eor x14, x14, x28 + umulh x24, x11, x2 + adcs x13, x13, xzr + umulh x25, x12, x2 + adc x14, x14, xzr + + umulh x26, x13, x2 + smulh x28, x14, x2 + mul x9, x9, x2 + mul x10, x10, x2 + mul x11, x11, x2 + adds x10, x10, x22 + mul x12, x12, x2 + adcs x11, x11, x23 + mul x13, x13, x2 + adcs x12, x12, x24 + mul x14, x14, x2 + adcs x13, x13, x25 + adcs x14, x14 ,x26 + adc x28, x28, xzr + adds x3, x3, x9 + adcs x4, x4, x10 + adcs x5, x5, x11 + adcs x6, x6, x12 + adcs x7, x7, x13 + adcs x8, x8, x14 + adc x9, x27, x28 + + extr x3, x4, x3, #62 + extr x4, x5, x4, #62 + extr x5, x6, x5, #62 + asr x28, x9, #63 + extr x6, x7, x6, #62 + extr x7, x8, x7, #62 + extr x8, x9, x8, #62 + + eor x3, x3, x28 + eor x4, x4, x28 + adds x3, x3, x28, lsr#63 + eor x5, x5, x28 + adcs x4, x4, xzr + eor x6, x6, x28 + adcs x5, x5, xzr + eor x7, x7, x28 + adcs x6, x6, xzr + eor x8, x8, x28 + stp x3, x4, [x0,#8*0] + adcs x7, x7, xzr + stp x5, x6, [x0,#8*2] + adc x8, x8, xzr + stp x7, x8, [x0,#8*4] + + eor x15, x15, x28 + eor x16, x16, x28 + sub x15, x15, x28 + sub x16, x16, x28 + + ret + +.def __ab_approximation_62; +.type 32; +.endef +.p2align 4 +__ab_approximation_62: + ldp x7, x8, [x1,#8*4] + ldp x13, x14, [x1,#8*10] + ldp x5, x6, [x1,#8*2] + ldp x11, x12, [x1,#8*8] + +.Lab_approximation_62_loaded: + orr x22, x8, x14 // check top-most limbs, ... + cmp x22, #0 + csel x8, x8, x7, ne + csel x14, x14, x13, ne + csel x7, x7, x6, ne + orr x22, x8, x14 // ... ones before top-most, ... + csel x13, x13, x12, ne + + ldp x3, x4, [x1,#8*0] + ldp x9, x10, [x1,#8*6] + + cmp x22, #0 + csel x8, x8, x7, ne + csel x14, x14, x13, ne + csel x7, x7, x5, ne + orr x22, x8, x14 // ... and ones before that ... + csel x13, x13, x11, ne + + cmp x22, #0 + csel x8, x8, x7, ne + csel x14, x14, x13, ne + csel x7, x7, x4, ne + orr x22, x8, x14 + csel x13, x13, x10, ne + + clz x22, x22 + cmp x22, #64 + csel x22, x22, xzr, ne + csel x8, x8, x7, ne + csel x14, x14, x13, ne + neg x23, x22 + + lslv x8, x8, x22 // align high limbs to the left + lslv x14, x14, x22 + lsrv x7, x7, x23 + lsrv x13, x13, x23 + and x7, x7, x23, asr#6 + and x13, x13, x23, asr#6 + orr x8, x8, x7 + orr x14, x14, x13 + + b __inner_loop_62 + ret + +.def __inner_loop_62; +.type 32; +.endef +.p2align 4 +__inner_loop_62: + mov x15, #1 // |f0|=1 + mov x16, #0 // |g0|=0 + mov x17, #0 // |f1|=0 + mov x19, #1 // |g1|=1 + +.Loop_62: + sbfx x28, x3, #0, #1 // if |a_| is odd, then we'll be subtracting + sub x2, x2, #1 + subs x24, x9, x3 // |b_|-|a_| + and x22, x9, x28 + sbc x25, x14, x8 + and x23, x14, x28 + subs x26, x3, x22 // |a_|-|b_| (or |a_|-0 if |a_| was even) + mov x22, x15 + sbcs x27, x8, x23 + mov x23, x16 + csel x9, x9, x3, hs // |b_| = |a_| + csel x14, x14, x8, hs + csel x3, x26, x24, hs // borrow means |a_|<|b_|, replace with |b_|-|a_| + csel x8, x27, x25, hs + csel x15, x15, x17, hs // exchange |f0| and |f1| + csel x17, x17, x22, hs + csel x16, x16, x19, hs // exchange |g0| and |g1| + csel x19, x19, x23, hs + extr x3, x8, x3, #1 + lsr x8, x8, #1 + and x22, x17, x28 + and x23, x19, x28 + add x17, x17, x17 // |f1|<<=1 + add x19, x19, x19 // |g1|<<=1 + sub x15, x15, x22 // |f0|-=|f1| (or |f0-=0| if |a_| was even) + sub x16, x16, x23 // |g0|-=|g1| (or |g0-=0| ...) + cbnz x2, .Loop_62 + + ret + diff --git a/crypto/blst_src/build/coff/ct_is_square_mod_384-armv8.S b/crypto/blst_src/build/coff/ct_is_square_mod_384-armv8.S new file mode 100644 index 00000000000..efe90a82144 --- /dev/null +++ b/crypto/blst_src/build/coff/ct_is_square_mod_384-armv8.S @@ -0,0 +1,335 @@ +.text + +.globl ct_is_square_mod_384 + +.def ct_is_square_mod_384; +.type 32; +.endef +.p2align 5 +ct_is_square_mod_384: +.long 3573752639 + stp x29, x30, [sp,#-128]! + add x29, sp, #0 + stp x19, x20, [sp,#16] + stp x21, x22, [sp,#32] + stp x23, x24, [sp,#48] + stp x25, x26, [sp,#64] + stp x27, x28, [sp,#80] + sub sp, sp, #512 + + ldp x3, x4, [x0,#8*0] // load input + ldp x5, x6, [x0,#8*2] + ldp x7, x8, [x0,#8*4] + + add x0, sp, #255 // find closest 256-byte-aligned spot + and x0, x0, #-256 // in the frame... + + ldp x9, x10, [x1,#8*0] // load modulus + ldp x11, x12, [x1,#8*2] + ldp x13, x14, [x1,#8*4] + + stp x3, x4, [x0,#8*6] // copy input to |a| + stp x5, x6, [x0,#8*8] + stp x7, x8, [x0,#8*10] + stp x9, x10, [x0,#8*0] // copy modulus to |b| + stp x11, x12, [x0,#8*2] + stp x13, x14, [x0,#8*4] + + eor x2, x2, x2 // init the .Legendre symbol + mov x15, #24 // 24 is 768/30-1 + b .Loop_is_square + +.p2align 4 +.Loop_is_square: + bl __ab_approximation_30 + sub x15, x15, #1 + + eor x1, x0, #128 // pointer to dst |b| + bl __smul_384_n_shift_by_30 + + mov x19, x16 // |f0| + mov x20, x17 // |g0| + add x1, x1, #8*6 // pointer to dst |a| + bl __smul_384_n_shift_by_30 + + ldp x9, x10, [x1,#-8*6] + eor x0, x0, #128 // flip-flop src |a|b| + and x27, x27, x9 // if |a| was negative, + add x2, x2, x27, lsr#1 // adjust |L| + + cbnz x15, .Loop_is_square + + ////////////////////////////////////////// last iteration + //bl __ab_approximation_30 // |a| and |b| are exact, + //ldr x8, [x0,#8*6] // and loaded + //ldr x14, [x0,#8*0] + mov x15, #48 // 48 is 768%30 + 30 + bl __inner_loop_48 + ldr x30, [x29,#8] + + and x0, x2, #1 + eor x0, x0, #1 + + add sp, sp, #512 + ldp x19, x20, [x29,#16] + ldp x21, x22, [x29,#32] + ldp x23, x24, [x29,#48] + ldp x25, x26, [x29,#64] + ldp x27, x28, [x29,#80] + ldr x29, [sp],#128 +.long 3573752767 + ret + + +.def __smul_384_n_shift_by_30; +.type 32; +.endef +.p2align 5 +__smul_384_n_shift_by_30: + ldp x3, x4, [x0,#8*0+0] // load |b| (or |a|) + asr x27, x20, #63 // |g1|'s sign as mask (or |f1|'s) + ldp x5, x6, [x0,#8*2+0] + eor x20, x20, x27 // conditionally negate |g1| (or |f1|) + ldp x7, x8, [x0,#8*4+0] + + eor x3, x3, x27 // conditionally negate |b| (or |a|) + sub x20, x20, x27 + eor x4, x4, x27 + adds x3, x3, x27, lsr#63 + eor x5, x5, x27 + adcs x4, x4, xzr + eor x6, x6, x27 + adcs x5, x5, xzr + eor x7, x7, x27 + umulh x21, x3, x20 + adcs x6, x6, xzr + umulh x22, x4, x20 + eor x8, x8, x27 + umulh x23, x5, x20 + adcs x7, x7, xzr + umulh x24, x6, x20 + adc x8, x8, xzr + + umulh x25, x7, x20 + and x28, x20, x27 + umulh x26, x8, x20 + neg x28, x28 + mul x3, x3, x20 + mul x4, x4, x20 + mul x5, x5, x20 + adds x4, x4, x21 + mul x6, x6, x20 + adcs x5, x5, x22 + mul x7, x7, x20 + adcs x6, x6, x23 + mul x8, x8, x20 + adcs x7, x7, x24 + adcs x8, x8 ,x25 + adc x26, x26, x28 + ldp x9, x10, [x0,#8*0+48] // load |b| (or |a|) + asr x27, x19, #63 // |g1|'s sign as mask (or |f1|'s) + ldp x11, x12, [x0,#8*2+48] + eor x19, x19, x27 // conditionally negate |g1| (or |f1|) + ldp x13, x14, [x0,#8*4+48] + + eor x9, x9, x27 // conditionally negate |b| (or |a|) + sub x19, x19, x27 + eor x10, x10, x27 + adds x9, x9, x27, lsr#63 + eor x11, x11, x27 + adcs x10, x10, xzr + eor x12, x12, x27 + adcs x11, x11, xzr + eor x13, x13, x27 + umulh x21, x9, x19 + adcs x12, x12, xzr + umulh x22, x10, x19 + eor x14, x14, x27 + umulh x23, x11, x19 + adcs x13, x13, xzr + umulh x24, x12, x19 + adc x14, x14, xzr + + umulh x25, x13, x19 + and x28, x19, x27 + umulh x27, x14, x19 + neg x28, x28 + mul x9, x9, x19 + mul x10, x10, x19 + mul x11, x11, x19 + adds x10, x10, x21 + mul x12, x12, x19 + adcs x11, x11, x22 + mul x13, x13, x19 + adcs x12, x12, x23 + mul x14, x14, x19 + adcs x13, x13, x24 + adcs x14, x14 ,x25 + adc x27, x27, x28 + adds x3, x3, x9 + adcs x4, x4, x10 + adcs x5, x5, x11 + adcs x6, x6, x12 + adcs x7, x7, x13 + adcs x8, x8, x14 + adc x9, x26, x27 + + extr x3, x4, x3, #30 + extr x4, x5, x4, #30 + extr x5, x6, x5, #30 + asr x27, x9, #63 + extr x6, x7, x6, #30 + extr x7, x8, x7, #30 + extr x8, x9, x8, #30 + + eor x3, x3, x27 + eor x4, x4, x27 + adds x3, x3, x27, lsr#63 + eor x5, x5, x27 + adcs x4, x4, xzr + eor x6, x6, x27 + adcs x5, x5, xzr + eor x7, x7, x27 + adcs x6, x6, xzr + eor x8, x8, x27 + stp x3, x4, [x1,#8*0] + adcs x7, x7, xzr + stp x5, x6, [x1,#8*2] + adc x8, x8, xzr + stp x7, x8, [x1,#8*4] + + ret + +.def __ab_approximation_30; +.type 32; +.endef +.p2align 4 +__ab_approximation_30: + ldp x13, x14, [x0,#8*4] // |a| is still in registers + ldp x11, x12, [x0,#8*2] + + orr x21, x8, x14 // check top-most limbs, ... + cmp x21, #0 + csel x8, x8, x7, ne + csel x14, x14, x13, ne + csel x7, x7, x6, ne + orr x21, x8, x14 // ... ones before top-most, ... + csel x13, x13, x12, ne + + cmp x21, #0 + csel x8, x8, x7, ne + csel x14, x14, x13, ne + csel x7, x7, x5, ne + orr x21, x8, x14 // ... and ones before that ... + csel x13, x13, x11, ne + + cmp x21, #0 + csel x8, x8, x7, ne + csel x14, x14, x13, ne + csel x7, x7, x4, ne + orr x21, x8, x14 // and one more, ... + csel x13, x13, x10, ne + + cmp x21, #0 + csel x8, x8, x7, ne + csel x14, x14, x13, ne + csel x7, x7, x3, ne + orr x21, x8, x14 + csel x13, x13, x9, ne + + clz x21, x21 + cmp x21, #64 + csel x21, x21, xzr, ne + csel x8, x8, x7, ne + csel x14, x14, x13, ne + neg x22, x21 + + lslv x8, x8, x21 // align high limbs to the left + lslv x14, x14, x21 + lsrv x7, x7, x22 + lsrv x13, x13, x22 + and x7, x7, x22, asr#6 + and x13, x13, x22, asr#6 + orr x8, x8, x7 + orr x14, x14, x13 + + bfxil x8, x3, #0, #32 + bfxil x14, x9, #0, #32 + + b __inner_loop_30 + ret + + +.def __inner_loop_30; +.type 32; +.endef +.p2align 4 +__inner_loop_30: + mov x28, #30 + mov x17, #0x7FFFFFFF80000000 // |f0|=1, |g0|=0 + mov x20, #0x800000007FFFFFFF // |f1|=0, |g1|=1 + mov x27,#0x7FFFFFFF7FFFFFFF + +.Loop_30: + sbfx x24, x8, #0, #1 // if |a_| is odd, then we'll be subtracting + and x25, x8, x14 + sub x28, x28, #1 + and x21, x14, x24 + + sub x22, x14, x8 // |b_|-|a_| + subs x23, x8, x21 // |a_|-|b_| (or |a_|-0 if |a_| was even) + add x25, x2, x25, lsr#1 // L + (a_ & b_) >> 1 + mov x21, x20 + csel x14, x14, x8, hs // |b_| = |a_| + csel x8, x23, x22, hs // borrow means |a_|<|b_|, replace with |b_|-|a_| + csel x20, x20, x17, hs // exchange |fg0| and |fg1| + csel x17, x17, x21, hs + csel x2, x2, x25, hs + lsr x8, x8, #1 + and x21, x20, x24 + and x22, x27, x24 + add x23, x14, #2 + sub x17, x17, x21 // |f0|-=|f1| (or |f0-=0| if |a_| was even) + add x20, x20, x20 // |f1|<<=1 + add x2, x2, x23, lsr#2 // "negate" |L| if |b|%8 is 3 or 5 + add x17, x17, x22 + sub x20, x20, x27 + + cbnz x28, .Loop_30 + + mov x27, #0x7FFFFFFF + ubfx x16, x17, #0, #32 + ubfx x17, x17, #32, #32 + ubfx x19, x20, #0, #32 + ubfx x20, x20, #32, #32 + sub x16, x16, x27 // remove the bias + sub x17, x17, x27 + sub x19, x19, x27 + sub x20, x20, x27 + + ret + +.def __inner_loop_48; +.type 32; +.endef +.p2align 4 +__inner_loop_48: +.Loop_48: + sbfx x24, x3, #0, #1 // if |a_| is odd, then we'll be subtracting + and x25, x3, x9 + sub x15, x15, #1 + and x21, x9, x24 + sub x22, x9, x3 // |b_|-|a_| + subs x23, x3, x21 // |a_|-|b_| (or |a_|-0 if |a_| was even) + add x25, x2, x25, lsr#1 + csel x9, x9, x3, hs // |b_| = |a_| + csel x3, x23, x22, hs // borrow means |a_|<|b_|, replace with |b_|-|a_| + csel x2, x2, x25, hs + add x23, x9, #2 + lsr x3, x3, #1 + add x2, x2, x23, lsr#2 // "negate" |L| if |b|%8 is 3 or 5 + + cbnz x15, .Loop_48 + + ret + diff --git a/crypto/blst_src/build/coff/ct_is_square_mod_384-x86_64.s b/crypto/blst_src/build/coff/ct_is_square_mod_384-x86_64.s new file mode 100644 index 00000000000..9ac32f50852 --- /dev/null +++ b/crypto/blst_src/build/coff/ct_is_square_mod_384-x86_64.s @@ -0,0 +1,509 @@ +.text + +.globl ct_is_square_mod_384 + +.def ct_is_square_mod_384; .scl 2; .type 32; .endef +.p2align 5 +ct_is_square_mod_384: + .byte 0xf3,0x0f,0x1e,0xfa + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + movq %rsp,%r11 +.LSEH_begin_ct_is_square_mod_384: + + + pushq %rbp + + movq %rcx,%rdi + movq %rdx,%rsi + pushq %rbx + + pushq %r12 + + pushq %r13 + + pushq %r14 + + pushq %r15 + + subq $536,%rsp + +.LSEH_body_ct_is_square_mod_384: + + + leaq 24+255(%rsp),%rax + andq $-256,%rax + + movq 0(%rdi),%r8 + movq 8(%rdi),%r9 + movq 16(%rdi),%r10 + movq 24(%rdi),%r11 + movq 32(%rdi),%r12 + movq 40(%rdi),%r13 + + movq 0(%rsi),%r14 + movq 8(%rsi),%r15 + movq 16(%rsi),%rbx + movq 24(%rsi),%rcx + movq 32(%rsi),%rdx + movq 40(%rsi),%rdi + movq %rax,%rsi + + movq %r8,0(%rax) + movq %r9,8(%rax) + movq %r10,16(%rax) + movq %r11,24(%rax) + movq %r12,32(%rax) + movq %r13,40(%rax) + + movq %r14,48(%rax) + movq %r15,56(%rax) + movq %rbx,64(%rax) + movq %rcx,72(%rax) + movq %rdx,80(%rax) + movq %rdi,88(%rax) + + xorq %rbp,%rbp + movl $24,%ecx + jmp .Loop_is_square + +.p2align 5 +.Loop_is_square: + movl %ecx,16(%rsp) + + call __ab_approximation_30 + movq %rax,0(%rsp) + movq %rbx,8(%rsp) + + movq $128+48,%rdi + xorq %rsi,%rdi + call __smulq_384_n_shift_by_30 + + movq 0(%rsp),%rdx + movq 8(%rsp),%rcx + leaq -48(%rdi),%rdi + call __smulq_384_n_shift_by_30 + + movl 16(%rsp),%ecx + xorq $128,%rsi + + andq 48(%rdi),%r14 + shrq $1,%r14 + addq %r14,%rbp + + subl $1,%ecx + jnz .Loop_is_square + + + + + movq 48(%rsi),%r9 + call __inner_loop_48 + + movq $1,%rax + andq %rbp,%rax + xorq $1,%rax + + leaq 536(%rsp),%r8 + movq 0(%r8),%r15 + + movq 8(%r8),%r14 + + movq 16(%r8),%r13 + + movq 24(%r8),%r12 + + movq 32(%r8),%rbx + + movq 40(%r8),%rbp + + leaq 48(%r8),%rsp + +.LSEH_epilogue_ct_is_square_mod_384: + mov 8(%rsp),%rdi + mov 16(%rsp),%rsi + + .byte 0xf3,0xc3 + +.LSEH_end_ct_is_square_mod_384: + +.def __smulq_384_n_shift_by_30; .scl 3; .type 32; .endef +.p2align 5 +__smulq_384_n_shift_by_30: + .byte 0xf3,0x0f,0x1e,0xfa + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + + movq %rdx,%rbx + sarq $63,%rdx + xorq %rax,%rax + subq %rdx,%rax + + xorq %rdx,%rbx + addq %rax,%rbx + + xorq %rdx,%r8 + xorq %rdx,%r9 + xorq %rdx,%r10 + xorq %rdx,%r11 + xorq %rdx,%r12 + xorq %rdx,%r13 + addq %r8,%rax + adcq $0,%r9 + adcq $0,%r10 + adcq $0,%r11 + adcq $0,%r12 + adcq $0,%r13 + + movq %rdx,%r14 + andq %rbx,%r14 + mulq %rbx + movq %rax,%r8 + movq %r9,%rax + movq %rdx,%r9 + mulq %rbx + addq %rax,%r9 + movq %r10,%rax + adcq $0,%rdx + movq %rdx,%r10 + mulq %rbx + addq %rax,%r10 + movq %r11,%rax + adcq $0,%rdx + movq %rdx,%r11 + mulq %rbx + addq %rax,%r11 + movq %r12,%rax + adcq $0,%rdx + movq %rdx,%r12 + mulq %rbx + addq %rax,%r12 + movq %r13,%rax + adcq $0,%rdx + movq %rdx,%r13 + negq %r14 + mulq %rbx + addq %rax,%r13 + adcq %rdx,%r14 + leaq 48(%rsi),%rsi + movq %rcx,%rdx + + movq %r8,0(%rdi) + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + movq %r12,32(%rdi) + movq %r13,40(%rdi) + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + + movq %rdx,%rbx + sarq $63,%rdx + xorq %rax,%rax + subq %rdx,%rax + + xorq %rdx,%rbx + addq %rax,%rbx + + xorq %rdx,%r8 + xorq %rdx,%r9 + xorq %rdx,%r10 + xorq %rdx,%r11 + xorq %rdx,%r12 + xorq %rdx,%r13 + addq %r8,%rax + adcq $0,%r9 + adcq $0,%r10 + adcq $0,%r11 + adcq $0,%r12 + adcq $0,%r13 + + movq %rdx,%r15 + andq %rbx,%r15 + mulq %rbx + movq %rax,%r8 + movq %r9,%rax + movq %rdx,%r9 + mulq %rbx + addq %rax,%r9 + movq %r10,%rax + adcq $0,%rdx + movq %rdx,%r10 + mulq %rbx + addq %rax,%r10 + movq %r11,%rax + adcq $0,%rdx + movq %rdx,%r11 + mulq %rbx + addq %rax,%r11 + movq %r12,%rax + adcq $0,%rdx + movq %rdx,%r12 + mulq %rbx + addq %rax,%r12 + movq %r13,%rax + adcq $0,%rdx + movq %rdx,%r13 + negq %r15 + mulq %rbx + addq %rax,%r13 + adcq %rdx,%r15 + leaq -48(%rsi),%rsi + + addq 0(%rdi),%r8 + adcq 8(%rdi),%r9 + adcq 16(%rdi),%r10 + adcq 24(%rdi),%r11 + adcq 32(%rdi),%r12 + adcq 40(%rdi),%r13 + adcq %r15,%r14 + + shrdq $30,%r9,%r8 + shrdq $30,%r10,%r9 + shrdq $30,%r11,%r10 + shrdq $30,%r12,%r11 + shrdq $30,%r13,%r12 + shrdq $30,%r14,%r13 + + sarq $63,%r14 + xorq %rbx,%rbx + subq %r14,%rbx + + xorq %r14,%r8 + xorq %r14,%r9 + xorq %r14,%r10 + xorq %r14,%r11 + xorq %r14,%r12 + xorq %r14,%r13 + addq %rbx,%r8 + adcq $0,%r9 + adcq $0,%r10 + adcq $0,%r11 + adcq $0,%r12 + adcq $0,%r13 + + movq %r8,0(%rdi) + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + movq %r12,32(%rdi) + movq %r13,40(%rdi) + + .byte 0xf3,0xc3 + +.def __ab_approximation_30; .scl 3; .type 32; .endef +.p2align 5 +__ab_approximation_30: + .byte 0xf3,0x0f,0x1e,0xfa + + movq 88(%rsi),%rbx + movq 80(%rsi),%r15 + movq 72(%rsi),%r14 + + movq %r13,%rax + orq %rbx,%rax + cmovzq %r12,%r13 + cmovzq %r15,%rbx + cmovzq %r11,%r12 + movq 64(%rsi),%r11 + cmovzq %r14,%r15 + + movq %r13,%rax + orq %rbx,%rax + cmovzq %r12,%r13 + cmovzq %r15,%rbx + cmovzq %r10,%r12 + movq 56(%rsi),%r10 + cmovzq %r11,%r15 + + movq %r13,%rax + orq %rbx,%rax + cmovzq %r12,%r13 + cmovzq %r15,%rbx + cmovzq %r9,%r12 + movq 48(%rsi),%r9 + cmovzq %r10,%r15 + + movq %r13,%rax + orq %rbx,%rax + cmovzq %r12,%r13 + cmovzq %r15,%rbx + cmovzq %r8,%r12 + cmovzq %r9,%r15 + + movq %r13,%rax + orq %rbx,%rax + bsrq %rax,%rcx + leaq 1(%rcx),%rcx + cmovzq %r8,%r13 + cmovzq %r9,%rbx + cmovzq %rax,%rcx + negq %rcx + + + shldq %cl,%r12,%r13 + shldq %cl,%r15,%rbx + + movq $0xFFFFFFFF00000000,%rax + movl %r8d,%r8d + movl %r9d,%r9d + andq %rax,%r13 + andq %rax,%rbx + orq %r13,%r8 + orq %rbx,%r9 + + jmp __inner_loop_30 + + .byte 0xf3,0xc3 + +.def __inner_loop_30; .scl 3; .type 32; .endef +.p2align 5 +__inner_loop_30: + .byte 0xf3,0x0f,0x1e,0xfa + + movq $0x7FFFFFFF80000000,%rbx + movq $0x800000007FFFFFFF,%rcx + leaq -1(%rbx),%r15 + movl $30,%edi + +.Loop_30: + movq %r8,%rax + andq %r9,%rax + shrq $1,%rax + + cmpq %r9,%r8 + movq %r8,%r10 + movq %r9,%r11 + leaq (%rax,%rbp,1),%rax + movq %rbx,%r12 + movq %rcx,%r13 + movq %rbp,%r14 + cmovbq %r9,%r8 + cmovbq %r10,%r9 + cmovbq %rcx,%rbx + cmovbq %r12,%rcx + cmovbq %rax,%rbp + + subq %r9,%r8 + subq %rcx,%rbx + addq %r15,%rbx + + testq $1,%r10 + cmovzq %r10,%r8 + cmovzq %r11,%r9 + cmovzq %r12,%rbx + cmovzq %r13,%rcx + cmovzq %r14,%rbp + + leaq 2(%r9),%rax + shrq $1,%r8 + shrq $2,%rax + addq %rcx,%rcx + leaq (%rax,%rbp,1),%rbp + subq %r15,%rcx + + subl $1,%edi + jnz .Loop_30 + + shrq $32,%r15 + movl %ebx,%eax + shrq $32,%rbx + movl %ecx,%edx + shrq $32,%rcx + subq %r15,%rax + subq %r15,%rbx + subq %r15,%rdx + subq %r15,%rcx + + .byte 0xf3,0xc3 + + +.def __inner_loop_48; .scl 3; .type 32; .endef +.p2align 5 +__inner_loop_48: + .byte 0xf3,0x0f,0x1e,0xfa + + movl $48,%edi + +.Loop_48: + movq %r8,%rax + andq %r9,%rax + shrq $1,%rax + + cmpq %r9,%r8 + movq %r8,%r10 + movq %r9,%r11 + leaq (%rax,%rbp,1),%rax + movq %rbp,%r12 + cmovbq %r9,%r8 + cmovbq %r10,%r9 + cmovbq %rax,%rbp + + subq %r9,%r8 + + testq $1,%r10 + cmovzq %r10,%r8 + cmovzq %r11,%r9 + cmovzq %r12,%rbp + + leaq 2(%r9),%rax + shrq $1,%r8 + shrq $2,%rax + addq %rax,%rbp + + subl $1,%edi + jnz .Loop_48 + + .byte 0xf3,0xc3 + +.section .pdata +.p2align 2 +.rva .LSEH_begin_ct_is_square_mod_384 +.rva .LSEH_body_ct_is_square_mod_384 +.rva .LSEH_info_ct_is_square_mod_384_prologue + +.rva .LSEH_body_ct_is_square_mod_384 +.rva .LSEH_epilogue_ct_is_square_mod_384 +.rva .LSEH_info_ct_is_square_mod_384_body + +.rva .LSEH_epilogue_ct_is_square_mod_384 +.rva .LSEH_end_ct_is_square_mod_384 +.rva .LSEH_info_ct_is_square_mod_384_epilogue + +.section .xdata +.p2align 3 +.LSEH_info_ct_is_square_mod_384_prologue: +.byte 1,0,5,0x0b +.byte 0,0x74,1,0 +.byte 0,0x64,2,0 +.byte 0,0xb3 +.byte 0,0 +.long 0,0 +.LSEH_info_ct_is_square_mod_384_body: +.byte 1,0,18,0 +.byte 0x00,0xf4,0x43,0x00 +.byte 0x00,0xe4,0x44,0x00 +.byte 0x00,0xd4,0x45,0x00 +.byte 0x00,0xc4,0x46,0x00 +.byte 0x00,0x34,0x47,0x00 +.byte 0x00,0x54,0x48,0x00 +.byte 0x00,0x74,0x4a,0x00 +.byte 0x00,0x64,0x4b,0x00 +.byte 0x00,0x01,0x49,0x00 +.byte 0x00,0x00,0x00,0x00 +.byte 0x00,0x00,0x00,0x00 +.LSEH_info_ct_is_square_mod_384_epilogue: +.byte 1,0,4,0 +.byte 0x00,0x74,0x01,0x00 +.byte 0x00,0x64,0x02,0x00 +.byte 0x00,0x00,0x00,0x00 + diff --git a/crypto/blst_src/build/coff/ctq_inverse_mod_384-x86_64.s b/crypto/blst_src/build/coff/ctq_inverse_mod_384-x86_64.s new file mode 100644 index 00000000000..d027a6dc5c0 --- /dev/null +++ b/crypto/blst_src/build/coff/ctq_inverse_mod_384-x86_64.s @@ -0,0 +1,1230 @@ +.comm __blst_platform_cap,4 +.text + +.globl ct_inverse_mod_383 + +.def ct_inverse_mod_383; .scl 2; .type 32; .endef +.p2align 5 +ct_inverse_mod_383: + .byte 0xf3,0x0f,0x1e,0xfa + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + movq %rsp,%r11 +.LSEH_begin_ct_inverse_mod_383: + + + movq %rcx,%rdi + movq %rdx,%rsi + movq %r8,%rdx + movq %r9,%rcx +#ifdef __BLST_PORTABLE__ + testl $1,__blst_platform_cap(%rip) + jnz ct_inverse_mod_383$1 +#endif + pushq %rbp + + pushq %rbx + + pushq %r12 + + pushq %r13 + + pushq %r14 + + pushq %r15 + + subq $1112,%rsp + +.LSEH_body_ct_inverse_mod_383: + + + leaq 88+511(%rsp),%rax + andq $-512,%rax + movq %rdi,32(%rsp) + movq %rcx,40(%rsp) + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + + movq 0(%rdx),%r14 + movq 8(%rdx),%r15 + movq 16(%rdx),%rbx + movq 24(%rdx),%rbp + movq 32(%rdx),%rsi + movq 40(%rdx),%rdi + + movq %r8,0(%rax) + movq %r9,8(%rax) + movq %r10,16(%rax) + movq %r11,24(%rax) + movq %r12,32(%rax) + movq %r13,40(%rax) + + movq %r14,48(%rax) + movq %r15,56(%rax) + movq %rbx,64(%rax) + movq %rbp,72(%rax) + movq %rsi,80(%rax) + movq %rax,%rsi + movq %rdi,88(%rax) + + + movl $62,%edi + call __ab_approximation_62 + + + movq %r12,72(%rsp) + movq %r13,80(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulq_383_n_shift_by_62 + + + movq %rdx,96(%rdi) + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulq_383_n_shift_by_62 + + + movq %rdx,96(%rdi) + + + xorq $256,%rsi + movl $62,%edi + call __ab_approximation_62 + + + movq %r12,72(%rsp) + movq %r13,80(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulq_383_n_shift_by_62 + movq %rdx,56(%rsp) + movq %rcx,64(%rsp) + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulq_383_n_shift_by_62 + + + + movq 96(%rsi),%rax + movq 144(%rsi),%r11 + movq %rdx,%rbx + movq %rax,%r10 + imulq 56(%rsp) + movq %rax,%r8 + movq %r11,%rax + movq %rdx,%r9 + imulq 64(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + movq %r8,48(%rdi) + movq %r9,56(%rdi) + sarq $63,%r9 + movq %r9,64(%rdi) + movq %r9,72(%rdi) + movq %r9,80(%rdi) + movq %r9,88(%rdi) + leaq 96(%rsi),%rsi + + movq %r10,%rax + imulq %rbx + movq %rax,%r8 + movq %r11,%rax + movq %rdx,%r9 + imulq %rcx + addq %rax,%r8 + adcq %rdx,%r9 + movq %r8,96(%rdi) + movq %r9,104(%rdi) + sarq $63,%r9 + movq %r9,112(%rdi) + movq %r9,120(%rdi) + movq %r9,128(%rdi) + movq %r9,136(%rdi) + xorq $256+96,%rsi + movl $62,%edi + call __ab_approximation_62 + + + movq %r12,72(%rsp) + movq %r13,80(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulq_383_n_shift_by_62 + movq %rdx,56(%rsp) + movq %rcx,64(%rsp) + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulq_383_n_shift_by_62 + movq %rdx,72(%rsp) + movq %rcx,80(%rsp) + + movq 56(%rsp),%rdx + movq 64(%rsp),%rcx + leaq 96(%rsi),%rsi + leaq 48(%rdi),%rdi + call __smulq_383x63 + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulq_383x63 + xorq $256+96,%rsi + movl $62,%edi + call __ab_approximation_62 + + + movq %r12,72(%rsp) + movq %r13,80(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulq_383_n_shift_by_62 + movq %rdx,56(%rsp) + movq %rcx,64(%rsp) + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulq_383_n_shift_by_62 + movq %rdx,72(%rsp) + movq %rcx,80(%rsp) + + movq 56(%rsp),%rdx + movq 64(%rsp),%rcx + leaq 96(%rsi),%rsi + leaq 48(%rdi),%rdi + call __smulq_383x63 + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulq_383x63 + xorq $256+96,%rsi + movl $62,%edi + call __ab_approximation_62 + + + movq %r12,72(%rsp) + movq %r13,80(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulq_383_n_shift_by_62 + movq %rdx,56(%rsp) + movq %rcx,64(%rsp) + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulq_383_n_shift_by_62 + movq %rdx,72(%rsp) + movq %rcx,80(%rsp) + + movq 56(%rsp),%rdx + movq 64(%rsp),%rcx + leaq 96(%rsi),%rsi + leaq 48(%rdi),%rdi + call __smulq_383x63 + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulq_383x63 + xorq $256+96,%rsi + movl $62,%edi + call __ab_approximation_62 + + + movq %r12,72(%rsp) + movq %r13,80(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulq_383_n_shift_by_62 + movq %rdx,56(%rsp) + movq %rcx,64(%rsp) + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulq_383_n_shift_by_62 + movq %rdx,72(%rsp) + movq %rcx,80(%rsp) + + movq 56(%rsp),%rdx + movq 64(%rsp),%rcx + leaq 96(%rsi),%rsi + leaq 48(%rdi),%rdi + call __smulq_383x63 + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulq_383x63 + sarq $63,%r13 + movq %r13,48(%rdi) + movq %r13,56(%rdi) + movq %r13,64(%rdi) + movq %r13,72(%rdi) + movq %r13,80(%rdi) + movq %r13,88(%rdi) + xorq $256+96,%rsi + movl $62,%edi + call __ab_approximation_62 + + + movq %r12,72(%rsp) + movq %r13,80(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulq_383_n_shift_by_62 + movq %rdx,56(%rsp) + movq %rcx,64(%rsp) + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulq_383_n_shift_by_62 + movq %rdx,72(%rsp) + movq %rcx,80(%rsp) + + movq 56(%rsp),%rdx + movq 64(%rsp),%rcx + leaq 96(%rsi),%rsi + leaq 48(%rdi),%rdi + call __smulq_383x63 + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulq_767x63 + xorq $256+96,%rsi + movl $62,%edi + call __ab_approximation_62 + + + movq %r12,72(%rsp) + movq %r13,80(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulq_383_n_shift_by_62 + movq %rdx,56(%rsp) + movq %rcx,64(%rsp) + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulq_383_n_shift_by_62 + movq %rdx,72(%rsp) + movq %rcx,80(%rsp) + + movq 56(%rsp),%rdx + movq 64(%rsp),%rcx + leaq 96(%rsi),%rsi + leaq 48(%rdi),%rdi + call __smulq_383x63 + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulq_767x63 + xorq $256+96,%rsi + movl $62,%edi + call __ab_approximation_62 + + + movq %r12,72(%rsp) + movq %r13,80(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulq_383_n_shift_by_62 + movq %rdx,56(%rsp) + movq %rcx,64(%rsp) + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulq_383_n_shift_by_62 + movq %rdx,72(%rsp) + movq %rcx,80(%rsp) + + movq 56(%rsp),%rdx + movq 64(%rsp),%rcx + leaq 96(%rsi),%rsi + leaq 48(%rdi),%rdi + call __smulq_383x63 + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulq_767x63 + xorq $256+96,%rsi + movl $62,%edi + call __ab_approximation_62 + + + movq %r12,72(%rsp) + movq %r13,80(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulq_383_n_shift_by_62 + movq %rdx,56(%rsp) + movq %rcx,64(%rsp) + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulq_383_n_shift_by_62 + movq %rdx,72(%rsp) + movq %rcx,80(%rsp) + + movq 56(%rsp),%rdx + movq 64(%rsp),%rcx + leaq 96(%rsi),%rsi + leaq 48(%rdi),%rdi + call __smulq_383x63 + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulq_767x63 + xorq $256+96,%rsi + movl $62,%edi + call __ab_approximation_62 + + + movq %r12,72(%rsp) + movq %r13,80(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulq_383_n_shift_by_62 + movq %rdx,56(%rsp) + movq %rcx,64(%rsp) + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulq_383_n_shift_by_62 + movq %rdx,72(%rsp) + movq %rcx,80(%rsp) + + movq 56(%rsp),%rdx + movq 64(%rsp),%rcx + leaq 96(%rsi),%rsi + leaq 48(%rdi),%rdi + call __smulq_383x63 + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulq_767x63 + + xorq $256+96,%rsi + movl $62,%edi + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 48(%rsi),%r10 + movq 56(%rsi),%r11 + call __inner_loop_62 + + + movq %r12,72(%rsp) + movq %r13,80(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + movq %r8,0(%rdi) + movq %r10,48(%rdi) + + + + leaq 96(%rsi),%rsi + leaq 96(%rdi),%rdi + call __smulq_383x63 + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulq_767x63 + + + xorq $256+96,%rsi + movl $22,%edi + + movq 0(%rsi),%r8 + xorq %r9,%r9 + movq 48(%rsi),%r10 + xorq %r11,%r11 + call __inner_loop_62 + + + + + + + + leaq 96(%rsi),%rsi + + + + + + movq %r12,%rdx + movq %r13,%rcx + movq 32(%rsp),%rdi + call __smulq_767x63 + + movq 40(%rsp),%rsi + movq %rax,%rdx + sarq $63,%rax + + movq %rax,%r8 + movq %rax,%r9 + movq %rax,%r10 + andq 0(%rsi),%r8 + andq 8(%rsi),%r9 + movq %rax,%r11 + andq 16(%rsi),%r10 + andq 24(%rsi),%r11 + movq %rax,%r12 + andq 32(%rsi),%r12 + andq 40(%rsi),%rax + + addq %r8,%r14 + adcq %r9,%r15 + adcq %r10,%rbx + adcq %r11,%rbp + adcq %r12,%rcx + adcq %rax,%rdx + + movq %r14,48(%rdi) + movq %r15,56(%rdi) + movq %rbx,64(%rdi) + movq %rbp,72(%rdi) + movq %rcx,80(%rdi) + movq %rdx,88(%rdi) + + leaq 1112(%rsp),%r8 + movq 0(%r8),%r15 + + movq 8(%r8),%r14 + + movq 16(%r8),%r13 + + movq 24(%r8),%r12 + + movq 32(%r8),%rbx + + movq 40(%r8),%rbp + + leaq 48(%r8),%rsp + +.LSEH_epilogue_ct_inverse_mod_383: + mov 8(%rsp),%rdi + mov 16(%rsp),%rsi + + .byte 0xf3,0xc3 + +.LSEH_end_ct_inverse_mod_383: +.def __smulq_767x63; .scl 3; .type 32; .endef +.p2align 5 +__smulq_767x63: + .byte 0xf3,0x0f,0x1e,0xfa + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + + movq %rdx,%rbp + sarq $63,%rdx + xorq %rax,%rax + subq %rdx,%rax + + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + leaq 48(%rsi),%rsi + + xorq %rdx,%rbp + addq %rax,%rbp + + xorq %rdx,%r8 + xorq %rdx,%r9 + xorq %rdx,%r10 + xorq %rdx,%r11 + xorq %rdx,%r12 + xorq %rdx,%r13 + addq %r8,%rax + adcq $0,%r9 + adcq $0,%r10 + adcq $0,%r11 + adcq $0,%r12 + adcq $0,%r13 + + mulq %rbp + movq %rax,0(%rdi) + movq %r9,%rax + movq %rdx,%r9 + mulq %rbp + addq %rax,%r9 + movq %r10,%rax + adcq $0,%rdx + movq %rdx,%r10 + movq %r9,8(%rdi) + mulq %rbp + addq %rax,%r10 + movq %r11,%rax + adcq $0,%rdx + movq %rdx,%r11 + movq %r10,16(%rdi) + mulq %rbp + addq %rax,%r11 + movq %r12,%rax + adcq $0,%rdx + movq %rdx,%r12 + movq %r11,24(%rdi) + mulq %rbp + addq %rax,%r12 + movq %r13,%rax + adcq $0,%rdx + movq %rdx,%r13 + movq %r12,32(%rdi) + imulq %rbp + addq %rax,%r13 + adcq $0,%rdx + + movq %r13,40(%rdi) + movq %rdx,48(%rdi) + sarq $63,%rdx + movq %rdx,56(%rdi) + movq %rcx,%rdx + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + movq 48(%rsi),%r14 + movq 56(%rsi),%r15 + movq 64(%rsi),%rbx + movq 72(%rsi),%rbp + movq 80(%rsi),%rcx + movq 88(%rsi),%rdi + + movq %rdx,%rsi + sarq $63,%rdx + xorq %rax,%rax + subq %rdx,%rax + + xorq %rdx,%rsi + addq %rax,%rsi + + xorq %rdx,%r8 + xorq %rdx,%r9 + xorq %rdx,%r10 + xorq %rdx,%r11 + xorq %rdx,%r12 + xorq %rdx,%r13 + xorq %rdx,%r14 + xorq %rdx,%r15 + xorq %rdx,%rbx + xorq %rdx,%rbp + xorq %rdx,%rcx + xorq %rdx,%rdi + addq %r8,%rax + adcq $0,%r9 + adcq $0,%r10 + adcq $0,%r11 + adcq $0,%r12 + adcq $0,%r13 + adcq $0,%r14 + adcq $0,%r15 + adcq $0,%rbx + adcq $0,%rbp + adcq $0,%rcx + adcq $0,%rdi + + mulq %rsi + movq %rax,%r8 + movq %r9,%rax + movq %rdx,%r9 + mulq %rsi + addq %rax,%r9 + movq %r10,%rax + adcq $0,%rdx + movq %rdx,%r10 + mulq %rsi + addq %rax,%r10 + movq %r11,%rax + adcq $0,%rdx + movq %rdx,%r11 + mulq %rsi + addq %rax,%r11 + movq %r12,%rax + adcq $0,%rdx + movq %rdx,%r12 + mulq %rsi + addq %rax,%r12 + movq %r13,%rax + adcq $0,%rdx + movq %rdx,%r13 + mulq %rsi + addq %rax,%r13 + movq %r14,%rax + adcq $0,%rdx + movq %rdx,%r14 + mulq %rsi + addq %rax,%r14 + movq %r15,%rax + adcq $0,%rdx + movq %rdx,%r15 + mulq %rsi + addq %rax,%r15 + movq %rbx,%rax + adcq $0,%rdx + movq %rdx,%rbx + mulq %rsi + addq %rax,%rbx + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%rbp + mulq %rsi + addq %rax,%rbp + movq %rcx,%rax + adcq $0,%rdx + movq %rdx,%rcx + mulq %rsi + addq %rax,%rcx + movq %rdi,%rax + adcq $0,%rdx + movq %rdx,%rdi + movq 8(%rsp),%rdx + imulq %rsi,%rax + movq 16(%rsp),%rsi + addq %rdi,%rax + + addq 0(%rdx),%r8 + adcq 8(%rdx),%r9 + adcq 16(%rdx),%r10 + adcq 24(%rdx),%r11 + adcq 32(%rdx),%r12 + adcq 40(%rdx),%r13 + adcq 48(%rdx),%r14 + movq 56(%rdx),%rdi + adcq %rdi,%r15 + adcq %rdi,%rbx + adcq %rdi,%rbp + adcq %rdi,%rcx + adcq %rdi,%rax + + movq %rdx,%rdi + + movq %r8,0(%rdx) + movq %r9,8(%rdx) + movq %r10,16(%rdx) + movq %r11,24(%rdx) + movq %r12,32(%rdx) + movq %r13,40(%rdx) + movq %r14,48(%rdx) + movq %r15,56(%rdx) + movq %rbx,64(%rdx) + movq %rbp,72(%rdx) + movq %rcx,80(%rdx) + movq %rax,88(%rdx) + + .byte 0xf3,0xc3 + +.def __smulq_383x63; .scl 3; .type 32; .endef +.p2align 5 +__smulq_383x63: + .byte 0xf3,0x0f,0x1e,0xfa + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + + movq %rdx,%rbp + sarq $63,%rdx + xorq %rax,%rax + subq %rdx,%rax + + xorq %rdx,%rbp + addq %rax,%rbp + + xorq %rdx,%r8 + xorq %rdx,%r9 + xorq %rdx,%r10 + xorq %rdx,%r11 + xorq %rdx,%r12 + xorq %rdx,%r13 + addq %r8,%rax + adcq $0,%r9 + adcq $0,%r10 + adcq $0,%r11 + adcq $0,%r12 + adcq $0,%r13 + + mulq %rbp + movq %rax,%r8 + movq %r9,%rax + movq %rdx,%r9 + mulq %rbp + addq %rax,%r9 + movq %r10,%rax + adcq $0,%rdx + movq %rdx,%r10 + mulq %rbp + addq %rax,%r10 + movq %r11,%rax + adcq $0,%rdx + movq %rdx,%r11 + mulq %rbp + addq %rax,%r11 + movq %r12,%rax + adcq $0,%rdx + movq %rdx,%r12 + mulq %rbp + addq %rax,%r12 + movq %r13,%rax + adcq $0,%rdx + movq %rdx,%r13 + imulq %rbp,%rax + addq %rax,%r13 + + leaq 48(%rsi),%rsi + movq %rcx,%rdx + + movq %r8,0(%rdi) + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + movq %r12,32(%rdi) + movq %r13,40(%rdi) + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + + movq %rdx,%rbp + sarq $63,%rdx + xorq %rax,%rax + subq %rdx,%rax + + xorq %rdx,%rbp + addq %rax,%rbp + + xorq %rdx,%r8 + xorq %rdx,%r9 + xorq %rdx,%r10 + xorq %rdx,%r11 + xorq %rdx,%r12 + xorq %rdx,%r13 + addq %r8,%rax + adcq $0,%r9 + adcq $0,%r10 + adcq $0,%r11 + adcq $0,%r12 + adcq $0,%r13 + + mulq %rbp + movq %rax,%r8 + movq %r9,%rax + movq %rdx,%r9 + mulq %rbp + addq %rax,%r9 + movq %r10,%rax + adcq $0,%rdx + movq %rdx,%r10 + mulq %rbp + addq %rax,%r10 + movq %r11,%rax + adcq $0,%rdx + movq %rdx,%r11 + mulq %rbp + addq %rax,%r11 + movq %r12,%rax + adcq $0,%rdx + movq %rdx,%r12 + mulq %rbp + addq %rax,%r12 + movq %r13,%rax + adcq $0,%rdx + movq %rdx,%r13 + imulq %rbp,%rax + addq %rax,%r13 + + leaq -48(%rsi),%rsi + + addq 0(%rdi),%r8 + adcq 8(%rdi),%r9 + adcq 16(%rdi),%r10 + adcq 24(%rdi),%r11 + adcq 32(%rdi),%r12 + adcq 40(%rdi),%r13 + + movq %r8,0(%rdi) + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + movq %r12,32(%rdi) + movq %r13,40(%rdi) + + .byte 0xf3,0xc3 + +.def __smulq_383_n_shift_by_62; .scl 3; .type 32; .endef +.p2align 5 +__smulq_383_n_shift_by_62: + .byte 0xf3,0x0f,0x1e,0xfa + + movq %rdx,%rbx + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + + movq %rdx,%rbp + sarq $63,%rdx + xorq %rax,%rax + subq %rdx,%rax + + xorq %rdx,%rbp + addq %rax,%rbp + + xorq %rdx,%r8 + xorq %rdx,%r9 + xorq %rdx,%r10 + xorq %rdx,%r11 + xorq %rdx,%r12 + xorq %rdx,%r13 + addq %r8,%rax + adcq $0,%r9 + adcq $0,%r10 + adcq $0,%r11 + adcq $0,%r12 + adcq $0,%r13 + + mulq %rbp + movq %rax,%r8 + movq %r9,%rax + movq %rdx,%r9 + mulq %rbp + addq %rax,%r9 + movq %r10,%rax + adcq $0,%rdx + movq %rdx,%r10 + mulq %rbp + addq %rax,%r10 + movq %r11,%rax + adcq $0,%rdx + movq %rdx,%r11 + mulq %rbp + addq %rax,%r11 + movq %r12,%rax + adcq $0,%rdx + movq %rdx,%r12 + mulq %rbp + addq %rax,%r12 + movq %r13,%rax + adcq $0,%rdx + movq %rdx,%r13 + imulq %rbp + addq %rax,%r13 + adcq $0,%rdx + + leaq 48(%rsi),%rsi + movq %rdx,%r14 + movq %rcx,%rdx + + movq %r8,0(%rdi) + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + movq %r12,32(%rdi) + movq %r13,40(%rdi) + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + + movq %rdx,%rbp + sarq $63,%rdx + xorq %rax,%rax + subq %rdx,%rax + + xorq %rdx,%rbp + addq %rax,%rbp + + xorq %rdx,%r8 + xorq %rdx,%r9 + xorq %rdx,%r10 + xorq %rdx,%r11 + xorq %rdx,%r12 + xorq %rdx,%r13 + addq %r8,%rax + adcq $0,%r9 + adcq $0,%r10 + adcq $0,%r11 + adcq $0,%r12 + adcq $0,%r13 + + mulq %rbp + movq %rax,%r8 + movq %r9,%rax + movq %rdx,%r9 + mulq %rbp + addq %rax,%r9 + movq %r10,%rax + adcq $0,%rdx + movq %rdx,%r10 + mulq %rbp + addq %rax,%r10 + movq %r11,%rax + adcq $0,%rdx + movq %rdx,%r11 + mulq %rbp + addq %rax,%r11 + movq %r12,%rax + adcq $0,%rdx + movq %rdx,%r12 + mulq %rbp + addq %rax,%r12 + movq %r13,%rax + adcq $0,%rdx + movq %rdx,%r13 + imulq %rbp + addq %rax,%r13 + adcq $0,%rdx + + leaq -48(%rsi),%rsi + + addq 0(%rdi),%r8 + adcq 8(%rdi),%r9 + adcq 16(%rdi),%r10 + adcq 24(%rdi),%r11 + adcq 32(%rdi),%r12 + adcq 40(%rdi),%r13 + adcq %rdx,%r14 + movq %rbx,%rdx + + shrdq $62,%r9,%r8 + shrdq $62,%r10,%r9 + shrdq $62,%r11,%r10 + shrdq $62,%r12,%r11 + shrdq $62,%r13,%r12 + shrdq $62,%r14,%r13 + + sarq $63,%r14 + xorq %rbp,%rbp + subq %r14,%rbp + + xorq %r14,%r8 + xorq %r14,%r9 + xorq %r14,%r10 + xorq %r14,%r11 + xorq %r14,%r12 + xorq %r14,%r13 + addq %rbp,%r8 + adcq $0,%r9 + adcq $0,%r10 + adcq $0,%r11 + adcq $0,%r12 + adcq $0,%r13 + + movq %r8,0(%rdi) + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + movq %r12,32(%rdi) + movq %r13,40(%rdi) + + xorq %r14,%rdx + xorq %r14,%rcx + addq %rbp,%rdx + addq %rbp,%rcx + + .byte 0xf3,0xc3 + +.def __ab_approximation_62; .scl 3; .type 32; .endef +.p2align 5 +__ab_approximation_62: + .byte 0xf3,0x0f,0x1e,0xfa + + movq 40(%rsi),%r9 + movq 88(%rsi),%r11 + movq 32(%rsi),%rbx + movq 80(%rsi),%rbp + movq 24(%rsi),%r8 + movq 72(%rsi),%r10 + + movq %r9,%rax + orq %r11,%rax + cmovzq %rbx,%r9 + cmovzq %rbp,%r11 + cmovzq %r8,%rbx + cmovzq %r10,%rbp + movq 16(%rsi),%r8 + movq 64(%rsi),%r10 + + movq %r9,%rax + orq %r11,%rax + cmovzq %rbx,%r9 + cmovzq %rbp,%r11 + cmovzq %r8,%rbx + cmovzq %r10,%rbp + movq 8(%rsi),%r8 + movq 56(%rsi),%r10 + + movq %r9,%rax + orq %r11,%rax + cmovzq %rbx,%r9 + cmovzq %rbp,%r11 + cmovzq %r8,%rbx + cmovzq %r10,%rbp + movq 0(%rsi),%r8 + movq 48(%rsi),%r10 + + movq %r9,%rax + orq %r11,%rax + bsrq %rax,%rcx + leaq 1(%rcx),%rcx + cmovzq %rbx,%r9 + cmovzq %rbp,%r11 + cmovzq %rax,%rcx + negq %rcx + + + shldq %cl,%rbx,%r9 + shldq %cl,%rbp,%r11 + + jmp __inner_loop_62 + + .byte 0xf3,0xc3 + +.def __inner_loop_62; .scl 3; .type 32; .endef +.p2align 3 +.long 0 +__inner_loop_62: + .byte 0xf3,0x0f,0x1e,0xfa + + movq $1,%rdx + xorq %rcx,%rcx + xorq %r12,%r12 + movq $1,%r13 + movq %rsi,8(%rsp) + +.Loop_62: + xorq %rax,%rax + xorq %rbx,%rbx + testq $1,%r8 + movq %r10,%rbp + movq %r11,%r14 + cmovnzq %r10,%rax + cmovnzq %r11,%rbx + subq %r8,%rbp + sbbq %r9,%r14 + movq %r8,%r15 + movq %r9,%rsi + subq %rax,%r8 + sbbq %rbx,%r9 + cmovcq %rbp,%r8 + cmovcq %r14,%r9 + cmovcq %r15,%r10 + cmovcq %rsi,%r11 + movq %rdx,%rax + cmovcq %r12,%rdx + cmovcq %rax,%r12 + movq %rcx,%rbx + cmovcq %r13,%rcx + cmovcq %rbx,%r13 + xorq %rax,%rax + xorq %rbx,%rbx + shrdq $1,%r9,%r8 + shrq $1,%r9 + testq $1,%r15 + cmovnzq %r12,%rax + cmovnzq %r13,%rbx + addq %r12,%r12 + addq %r13,%r13 + subq %rax,%rdx + subq %rbx,%rcx + subl $1,%edi + jnz .Loop_62 + + movq 8(%rsp),%rsi + .byte 0xf3,0xc3 + +.section .pdata +.p2align 2 +.rva .LSEH_begin_ct_inverse_mod_383 +.rva .LSEH_body_ct_inverse_mod_383 +.rva .LSEH_info_ct_inverse_mod_383_prologue + +.rva .LSEH_body_ct_inverse_mod_383 +.rva .LSEH_epilogue_ct_inverse_mod_383 +.rva .LSEH_info_ct_inverse_mod_383_body + +.rva .LSEH_epilogue_ct_inverse_mod_383 +.rva .LSEH_end_ct_inverse_mod_383 +.rva .LSEH_info_ct_inverse_mod_383_epilogue + +.section .xdata +.p2align 3 +.LSEH_info_ct_inverse_mod_383_prologue: +.byte 1,0,5,0x0b +.byte 0,0x74,1,0 +.byte 0,0x64,2,0 +.byte 0,0xb3 +.byte 0,0 +.long 0,0 +.LSEH_info_ct_inverse_mod_383_body: +.byte 1,0,18,0 +.byte 0x00,0xf4,0x8b,0x00 +.byte 0x00,0xe4,0x8c,0x00 +.byte 0x00,0xd4,0x8d,0x00 +.byte 0x00,0xc4,0x8e,0x00 +.byte 0x00,0x34,0x8f,0x00 +.byte 0x00,0x54,0x90,0x00 +.byte 0x00,0x74,0x92,0x00 +.byte 0x00,0x64,0x93,0x00 +.byte 0x00,0x01,0x91,0x00 +.byte 0x00,0x00,0x00,0x00 +.byte 0x00,0x00,0x00,0x00 +.LSEH_info_ct_inverse_mod_383_epilogue: +.byte 1,0,4,0 +.byte 0x00,0x74,0x01,0x00 +.byte 0x00,0x64,0x02,0x00 +.byte 0x00,0x00,0x00,0x00 + diff --git a/crypto/blst_src/build/coff/ctx_inverse_mod_384-x86_64.s b/crypto/blst_src/build/coff/ctx_inverse_mod_384-x86_64.s new file mode 100644 index 00000000000..4f7dd6d1552 --- /dev/null +++ b/crypto/blst_src/build/coff/ctx_inverse_mod_384-x86_64.s @@ -0,0 +1,1601 @@ +.text + +.globl ctx_inverse_mod_383 + +.def ctx_inverse_mod_383; .scl 2; .type 32; .endef +.p2align 5 +ctx_inverse_mod_383: + .byte 0xf3,0x0f,0x1e,0xfa + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + movq %rsp,%r11 +.LSEH_begin_ctx_inverse_mod_383: + + + movq %rcx,%rdi + movq %rdx,%rsi + movq %r8,%rdx + movq %r9,%rcx +ct_inverse_mod_383$1: + pushq %rbp + + pushq %rbx + + pushq %r12 + + pushq %r13 + + pushq %r14 + + pushq %r15 + + subq $1112,%rsp + +.LSEH_body_ctx_inverse_mod_383: + + + leaq 88+511(%rsp),%rax + andq $-512,%rax + movq %rdi,32(%rsp) + movq %rcx,40(%rsp) + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + + movq 0(%rdx),%r14 + movq 8(%rdx),%r15 + movq 16(%rdx),%rbx + movq 24(%rdx),%rbp + movq 32(%rdx),%rsi + movq 40(%rdx),%rdi + + movq %r8,0(%rax) + movq %r9,8(%rax) + movq %r10,16(%rax) + movq %r11,24(%rax) + movq %r12,32(%rax) + movq %r13,40(%rax) + + movq %r14,48(%rax) + movq %r15,56(%rax) + movq %rbx,64(%rax) + movq %rbp,72(%rax) + movq %rsi,80(%rax) + movq %rax,%rsi + movq %rdi,88(%rax) + + + movl $31,%edi + call __ab_approximation_31 + + + movq %r12,72(%rsp) + movq %r13,80(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulx_383_n_shift_by_31 + + + movq %rdx,96(%rdi) + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_383_n_shift_by_31 + + + movq %rdx,96(%rdi) + + + xorq $256,%rsi + movl $31,%edi + call __ab_approximation_31 + + + movq %r12,72(%rsp) + movq %r13,80(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulx_383_n_shift_by_31 + movq %rdx,56(%rsp) + movq %rcx,64(%rsp) + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_383_n_shift_by_31 + + + + movq 96(%rsi),%rax + movq 144(%rsi),%r11 + movq %rdx,%rbx + movq %rax,%r10 + imulq 56(%rsp) + movq %rax,%r8 + movq %r11,%rax + movq %rdx,%r9 + imulq 64(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + movq %r8,48(%rdi) + movq %r9,56(%rdi) + sarq $63,%r9 + movq %r9,64(%rdi) + movq %r9,72(%rdi) + movq %r9,80(%rdi) + movq %r9,88(%rdi) + leaq 96(%rsi),%rsi + + movq %r10,%rax + imulq %rbx + movq %rax,%r8 + movq %r11,%rax + movq %rdx,%r9 + imulq %rcx + addq %rax,%r8 + adcq %rdx,%r9 + movq %r8,96(%rdi) + movq %r9,104(%rdi) + sarq $63,%r9 + movq %r9,112(%rdi) + movq %r9,120(%rdi) + movq %r9,128(%rdi) + movq %r9,136(%rdi) + xorq $256+96,%rsi + movl $31,%edi + call __ab_approximation_31 + + + movq %r12,72(%rsp) + movq %r13,80(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulx_383_n_shift_by_31 + movq %rdx,56(%rsp) + movq %rcx,64(%rsp) + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_383_n_shift_by_31 + movq %rdx,72(%rsp) + movq %rcx,80(%rsp) + + movq 56(%rsp),%rdx + movq 64(%rsp),%rcx + leaq 96(%rsi),%rsi + leaq 48(%rdi),%rdi + call __smulx_383x63 + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_383x63 + xorq $256+96,%rsi + movl $31,%edi + call __ab_approximation_31 + + + movq %r12,72(%rsp) + movq %r13,80(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulx_383_n_shift_by_31 + movq %rdx,56(%rsp) + movq %rcx,64(%rsp) + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_383_n_shift_by_31 + movq %rdx,72(%rsp) + movq %rcx,80(%rsp) + + movq 56(%rsp),%rdx + movq 64(%rsp),%rcx + leaq 96(%rsi),%rsi + leaq 48(%rdi),%rdi + call __smulx_383x63 + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_383x63 + xorq $256+96,%rsi + movl $31,%edi + call __ab_approximation_31 + + + movq %r12,72(%rsp) + movq %r13,80(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulx_383_n_shift_by_31 + movq %rdx,56(%rsp) + movq %rcx,64(%rsp) + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_383_n_shift_by_31 + movq %rdx,72(%rsp) + movq %rcx,80(%rsp) + + movq 56(%rsp),%rdx + movq 64(%rsp),%rcx + leaq 96(%rsi),%rsi + leaq 48(%rdi),%rdi + call __smulx_383x63 + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_383x63 + xorq $256+96,%rsi + movl $31,%edi + call __ab_approximation_31 + + + movq %r12,72(%rsp) + movq %r13,80(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulx_383_n_shift_by_31 + movq %rdx,56(%rsp) + movq %rcx,64(%rsp) + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_383_n_shift_by_31 + movq %rdx,72(%rsp) + movq %rcx,80(%rsp) + + movq 56(%rsp),%rdx + movq 64(%rsp),%rcx + leaq 96(%rsi),%rsi + leaq 48(%rdi),%rdi + call __smulx_383x63 + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_383x63 + xorq $256+96,%rsi + movl $31,%edi + call __ab_approximation_31 + + + movq %r12,72(%rsp) + movq %r13,80(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulx_383_n_shift_by_31 + movq %rdx,56(%rsp) + movq %rcx,64(%rsp) + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_383_n_shift_by_31 + movq %rdx,72(%rsp) + movq %rcx,80(%rsp) + + movq 56(%rsp),%rdx + movq 64(%rsp),%rcx + leaq 96(%rsi),%rsi + leaq 48(%rdi),%rdi + call __smulx_383x63 + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_383x63 + xorq $256+96,%rsi + movl $31,%edi + call __ab_approximation_31 + + + movq %r12,72(%rsp) + movq %r13,80(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulx_383_n_shift_by_31 + movq %rdx,56(%rsp) + movq %rcx,64(%rsp) + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_383_n_shift_by_31 + movq %rdx,72(%rsp) + movq %rcx,80(%rsp) + + movq 56(%rsp),%rdx + movq 64(%rsp),%rcx + leaq 96(%rsi),%rsi + leaq 48(%rdi),%rdi + call __smulx_383x63 + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_383x63 + xorq $256+96,%rsi + movl $31,%edi + call __ab_approximation_31 + + + movq %r12,72(%rsp) + movq %r13,80(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulx_383_n_shift_by_31 + movq %rdx,56(%rsp) + movq %rcx,64(%rsp) + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_383_n_shift_by_31 + movq %rdx,72(%rsp) + movq %rcx,80(%rsp) + + movq 56(%rsp),%rdx + movq 64(%rsp),%rcx + leaq 96(%rsi),%rsi + leaq 48(%rdi),%rdi + call __smulx_383x63 + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_383x63 + xorq $256+96,%rsi + movl $31,%edi + call __ab_approximation_31 + + + movq %r12,72(%rsp) + movq %r13,80(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulx_383_n_shift_by_31 + movq %rdx,56(%rsp) + movq %rcx,64(%rsp) + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_383_n_shift_by_31 + movq %rdx,72(%rsp) + movq %rcx,80(%rsp) + + movq 56(%rsp),%rdx + movq 64(%rsp),%rcx + leaq 96(%rsi),%rsi + leaq 48(%rdi),%rdi + call __smulx_383x63 + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_383x63 + xorq $256+96,%rsi + movl $31,%edi + call __ab_approximation_31 + + + movq %r12,72(%rsp) + movq %r13,80(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulx_383_n_shift_by_31 + movq %rdx,56(%rsp) + movq %rcx,64(%rsp) + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_383_n_shift_by_31 + movq %rdx,72(%rsp) + movq %rcx,80(%rsp) + + movq 56(%rsp),%rdx + movq 64(%rsp),%rcx + leaq 96(%rsi),%rsi + leaq 48(%rdi),%rdi + call __smulx_383x63 + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_383x63 + xorq $256+96,%rsi + movl $31,%edi + call __ab_approximation_31 + + + movq %r12,72(%rsp) + movq %r13,80(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulx_383_n_shift_by_31 + movq %rdx,56(%rsp) + movq %rcx,64(%rsp) + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_383_n_shift_by_31 + movq %rdx,72(%rsp) + movq %rcx,80(%rsp) + + movq 56(%rsp),%rdx + movq 64(%rsp),%rcx + leaq 96(%rsi),%rsi + leaq 48(%rdi),%rdi + call __smulx_383x63 + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_383x63 + sarq $63,%r13 + movq %r13,48(%rdi) + movq %r13,56(%rdi) + movq %r13,64(%rdi) + movq %r13,72(%rdi) + movq %r13,80(%rdi) + movq %r13,88(%rdi) + xorq $256+96,%rsi + movl $31,%edi + call __ab_approximation_31 + + + movq %r12,72(%rsp) + movq %r13,80(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulx_383_n_shift_by_31 + movq %rdx,56(%rsp) + movq %rcx,64(%rsp) + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_383_n_shift_by_31 + movq %rdx,72(%rsp) + movq %rcx,80(%rsp) + + movq 56(%rsp),%rdx + movq 64(%rsp),%rcx + leaq 96(%rsi),%rsi + leaq 48(%rdi),%rdi + call __smulx_383x63 + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_767x63 + xorq $256+96,%rsi + movl $31,%edi + call __ab_approximation_31 + + + movq %r12,72(%rsp) + movq %r13,80(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulx_383_n_shift_by_31 + movq %rdx,56(%rsp) + movq %rcx,64(%rsp) + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_383_n_shift_by_31 + movq %rdx,72(%rsp) + movq %rcx,80(%rsp) + + movq 56(%rsp),%rdx + movq 64(%rsp),%rcx + leaq 96(%rsi),%rsi + leaq 48(%rdi),%rdi + call __smulx_383x63 + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_767x63 + xorq $256+96,%rsi + movl $31,%edi + call __ab_approximation_31 + + + movq %r12,72(%rsp) + movq %r13,80(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulx_383_n_shift_by_31 + movq %rdx,56(%rsp) + movq %rcx,64(%rsp) + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_383_n_shift_by_31 + movq %rdx,72(%rsp) + movq %rcx,80(%rsp) + + movq 56(%rsp),%rdx + movq 64(%rsp),%rcx + leaq 96(%rsi),%rsi + leaq 48(%rdi),%rdi + call __smulx_383x63 + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_767x63 + xorq $256+96,%rsi + movl $31,%edi + call __ab_approximation_31 + + + movq %r12,72(%rsp) + movq %r13,80(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulx_383_n_shift_by_31 + movq %rdx,56(%rsp) + movq %rcx,64(%rsp) + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_383_n_shift_by_31 + movq %rdx,72(%rsp) + movq %rcx,80(%rsp) + + movq 56(%rsp),%rdx + movq 64(%rsp),%rcx + leaq 96(%rsi),%rsi + leaq 48(%rdi),%rdi + call __smulx_383x63 + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_767x63 + xorq $256+96,%rsi + movl $31,%edi + call __ab_approximation_31 + + + movq %r12,72(%rsp) + movq %r13,80(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulx_383_n_shift_by_31 + movq %rdx,56(%rsp) + movq %rcx,64(%rsp) + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_383_n_shift_by_31 + movq %rdx,72(%rsp) + movq %rcx,80(%rsp) + + movq 56(%rsp),%rdx + movq 64(%rsp),%rcx + leaq 96(%rsi),%rsi + leaq 48(%rdi),%rdi + call __smulx_383x63 + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_767x63 + xorq $256+96,%rsi + movl $31,%edi + call __ab_approximation_31 + + + movq %r12,72(%rsp) + movq %r13,80(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulx_383_n_shift_by_31 + movq %rdx,56(%rsp) + movq %rcx,64(%rsp) + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_383_n_shift_by_31 + movq %rdx,72(%rsp) + movq %rcx,80(%rsp) + + movq 56(%rsp),%rdx + movq 64(%rsp),%rcx + leaq 96(%rsi),%rsi + leaq 48(%rdi),%rdi + call __smulx_383x63 + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_767x63 + xorq $256+96,%rsi + movl $31,%edi + call __ab_approximation_31 + + + movq %r12,72(%rsp) + movq %r13,80(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulx_383_n_shift_by_31 + movq %rdx,56(%rsp) + movq %rcx,64(%rsp) + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_383_n_shift_by_31 + movq %rdx,72(%rsp) + movq %rcx,80(%rsp) + + movq 56(%rsp),%rdx + movq 64(%rsp),%rcx + leaq 96(%rsi),%rsi + leaq 48(%rdi),%rdi + call __smulx_383x63 + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_767x63 + xorq $256+96,%rsi + movl $31,%edi + call __ab_approximation_31 + + + movq %r12,72(%rsp) + movq %r13,80(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulx_191_n_shift_by_31 + movq %rdx,56(%rsp) + movq %rcx,64(%rsp) + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_191_n_shift_by_31 + movq %rdx,72(%rsp) + movq %rcx,80(%rsp) + + movq 56(%rsp),%rdx + movq 64(%rsp),%rcx + leaq 96(%rsi),%rsi + leaq 48(%rdi),%rdi + call __smulx_383x63 + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_767x63 + xorq $256+96,%rsi + movl $31,%edi + call __ab_approximation_31 + + + movq %r12,72(%rsp) + movq %r13,80(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulx_191_n_shift_by_31 + movq %rdx,56(%rsp) + movq %rcx,64(%rsp) + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_191_n_shift_by_31 + movq %rdx,72(%rsp) + movq %rcx,80(%rsp) + + movq 56(%rsp),%rdx + movq 64(%rsp),%rcx + leaq 96(%rsi),%rsi + leaq 48(%rdi),%rdi + call __smulx_383x63 + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_767x63 + xorq $256+96,%rsi + movl $31,%edi + call __ab_approximation_31 + + + movq %r12,72(%rsp) + movq %r13,80(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulx_191_n_shift_by_31 + movq %rdx,56(%rsp) + movq %rcx,64(%rsp) + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_191_n_shift_by_31 + movq %rdx,72(%rsp) + movq %rcx,80(%rsp) + + movq 56(%rsp),%rdx + movq 64(%rsp),%rcx + leaq 96(%rsi),%rsi + leaq 48(%rdi),%rdi + call __smulx_383x63 + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_767x63 + xorq $256+96,%rsi + movl $31,%edi + call __ab_approximation_31 + + + movq %r12,72(%rsp) + movq %r13,80(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulx_191_n_shift_by_31 + movq %rdx,56(%rsp) + movq %rcx,64(%rsp) + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_191_n_shift_by_31 + movq %rdx,72(%rsp) + movq %rcx,80(%rsp) + + movq 56(%rsp),%rdx + movq 64(%rsp),%rcx + leaq 96(%rsi),%rsi + leaq 48(%rdi),%rdi + call __smulx_383x63 + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_767x63 + + xorq $256+96,%rsi + movl $53,%edi + + movq 0(%rsi),%r8 + + movq 48(%rsi),%r10 + + call __tail_loop_53 + + + + + + + + leaq 96(%rsi),%rsi + + + + + + movq %r12,%rdx + movq %r13,%rcx + movq 32(%rsp),%rdi + call __smulx_767x63 + + movq 40(%rsp),%rsi + movq %rax,%rdx + sarq $63,%rax + + movq %rax,%r8 + movq %rax,%r9 + movq %rax,%r10 + andq 0(%rsi),%r8 + andq 8(%rsi),%r9 + movq %rax,%r11 + andq 16(%rsi),%r10 + andq 24(%rsi),%r11 + movq %rax,%r12 + andq 32(%rsi),%r12 + andq 40(%rsi),%rax + + addq %r8,%r14 + adcq %r9,%r15 + adcq %r10,%rbx + adcq %r11,%rbp + adcq %r12,%rcx + adcq %rax,%rdx + + movq %r14,48(%rdi) + movq %r15,56(%rdi) + movq %rbx,64(%rdi) + movq %rbp,72(%rdi) + movq %rcx,80(%rdi) + movq %rdx,88(%rdi) + + leaq 1112(%rsp),%r8 + movq 0(%r8),%r15 + + movq 8(%r8),%r14 + + movq 16(%r8),%r13 + + movq 24(%r8),%r12 + + movq 32(%r8),%rbx + + movq 40(%r8),%rbp + + leaq 48(%r8),%rsp + +.LSEH_epilogue_ctx_inverse_mod_383: + mov 8(%rsp),%rdi + mov 16(%rsp),%rsi + + .byte 0xf3,0xc3 + +.LSEH_end_ctx_inverse_mod_383: +.def __smulx_767x63; .scl 3; .type 32; .endef +.p2align 5 +__smulx_767x63: + .byte 0xf3,0x0f,0x1e,0xfa + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + + movq %rdx,%rax + sarq $63,%rax + xorq %rbp,%rbp + subq %rax,%rbp + + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + leaq 48(%rsi),%rsi + + xorq %rax,%rdx + addq %rbp,%rdx + + xorq %rax,%r8 + xorq %rax,%r9 + xorq %rax,%r10 + xorq %rax,%r11 + xorq %rax,%r12 + xorq %r13,%rax + addq %rbp,%r8 + adcq $0,%r9 + adcq $0,%r10 + adcq $0,%r11 + adcq $0,%r12 + adcq $0,%rax + + mulxq %r8,%r8,%rbp + mulxq %r9,%r9,%r13 + addq %rbp,%r9 + mulxq %r10,%r10,%rbp + adcq %r13,%r10 + mulxq %r11,%r11,%r13 + adcq %rbp,%r11 + mulxq %r12,%r12,%rbp + adcq %r13,%r12 + adcq $0,%rbp + imulq %rdx + addq %rbp,%rax + adcq $0,%rdx + + movq %r8,0(%rdi) + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + movq %r12,32(%rdi) + movq %rax,40(%rdi) + movq %rdx,48(%rdi) + sarq $63,%rdx + movq %rdx,56(%rdi) + movq %rcx,%rdx + movq %rcx,%rax + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + movq 48(%rsi),%r14 + movq 56(%rsi),%r15 + movq 64(%rsi),%rbx + movq 72(%rsi),%rbp + movq 80(%rsi),%rcx + movq 88(%rsi),%rdi + + sarq $63,%rax + xorq %rsi,%rsi + subq %rax,%rsi + + xorq %rax,%rdx + addq %rsi,%rdx + + xorq %rax,%r8 + xorq %rax,%r9 + xorq %rax,%r10 + xorq %rax,%r11 + xorq %rax,%r12 + xorq %rax,%r13 + xorq %rax,%r14 + xorq %rax,%r15 + xorq %rax,%rbx + xorq %rax,%rbp + xorq %rax,%rcx + xorq %rax,%rdi + addq %rsi,%r8 + adcq $0,%r9 + adcq $0,%r10 + adcq $0,%r11 + adcq $0,%r12 + adcq $0,%r13 + adcq $0,%r14 + adcq $0,%r15 + adcq $0,%rbx + adcq $0,%rbp + adcq $0,%rcx + adcq $0,%rdi + + mulxq %r8,%r8,%rax + mulxq %r9,%r9,%rsi + addq %rax,%r9 + mulxq %r10,%r10,%rax + adcq %rsi,%r10 + mulxq %r11,%r11,%rsi + adcq %rax,%r11 + mulxq %r12,%r12,%rax + adcq %rsi,%r12 + mulxq %r13,%r13,%rsi + adcq %rax,%r13 + mulxq %r14,%r14,%rax + adcq %rsi,%r14 + mulxq %r15,%r15,%rsi + adcq %rax,%r15 + mulxq %rbx,%rbx,%rax + adcq %rsi,%rbx + mulxq %rbp,%rbp,%rsi + adcq %rax,%rbp + mulxq %rcx,%rcx,%rax + adcq %rsi,%rcx + mulxq %rdi,%rdi,%rsi + movq 8(%rsp),%rdx + movq 16(%rsp),%rsi + adcq %rdi,%rax + + addq 0(%rdx),%r8 + adcq 8(%rdx),%r9 + adcq 16(%rdx),%r10 + adcq 24(%rdx),%r11 + adcq 32(%rdx),%r12 + adcq 40(%rdx),%r13 + adcq 48(%rdx),%r14 + movq 56(%rdx),%rdi + adcq %rdi,%r15 + adcq %rdi,%rbx + adcq %rdi,%rbp + adcq %rdi,%rcx + adcq %rdi,%rax + + movq %rdx,%rdi + + movq %r8,0(%rdx) + movq %r9,8(%rdx) + movq %r10,16(%rdx) + movq %r11,24(%rdx) + movq %r12,32(%rdx) + movq %r13,40(%rdx) + movq %r14,48(%rdx) + movq %r15,56(%rdx) + movq %rbx,64(%rdx) + movq %rbp,72(%rdx) + movq %rcx,80(%rdx) + movq %rax,88(%rdx) + + .byte 0xf3,0xc3 + +.def __smulx_383x63; .scl 3; .type 32; .endef +.p2align 5 +__smulx_383x63: + .byte 0xf3,0x0f,0x1e,0xfa + + movq 0+0(%rsi),%r8 + movq 0+8(%rsi),%r9 + movq 0+16(%rsi),%r10 + movq 0+24(%rsi),%r11 + movq 0+32(%rsi),%r12 + movq 0+40(%rsi),%r13 + + movq %rdx,%rbp + sarq $63,%rbp + xorq %rax,%rax + subq %rbp,%rax + + xorq %rbp,%rdx + addq %rax,%rdx + + xorq %rbp,%r8 + xorq %rbp,%r9 + xorq %rbp,%r10 + xorq %rbp,%r11 + xorq %rbp,%r12 + xorq %rbp,%r13 + addq %rax,%r8 + adcq $0,%r9 + adcq $0,%r10 + adcq $0,%r11 + adcq $0,%r12 + adcq $0,%r13 + + mulxq %r8,%r8,%rbp + mulxq %r9,%r9,%rax + addq %rbp,%r9 + mulxq %r10,%r10,%rbp + adcq %rax,%r10 + mulxq %r11,%r11,%rax + adcq %rbp,%r11 + mulxq %r12,%r12,%rbp + adcq %rax,%r12 + mulxq %r13,%r13,%rax + movq %rcx,%rdx + adcq %rbp,%r13 + + movq %r8,0(%rdi) + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + movq %r12,32(%rdi) + movq %r13,40(%rdi) + movq 48+0(%rsi),%r8 + movq 48+8(%rsi),%r9 + movq 48+16(%rsi),%r10 + movq 48+24(%rsi),%r11 + movq 48+32(%rsi),%r12 + movq 48+40(%rsi),%r13 + + movq %rdx,%rbp + sarq $63,%rbp + xorq %rax,%rax + subq %rbp,%rax + + xorq %rbp,%rdx + addq %rax,%rdx + + xorq %rbp,%r8 + xorq %rbp,%r9 + xorq %rbp,%r10 + xorq %rbp,%r11 + xorq %rbp,%r12 + xorq %rbp,%r13 + addq %rax,%r8 + adcq $0,%r9 + adcq $0,%r10 + adcq $0,%r11 + adcq $0,%r12 + adcq $0,%r13 + + mulxq %r8,%r8,%rbp + mulxq %r9,%r9,%rax + addq %rbp,%r9 + mulxq %r10,%r10,%rbp + adcq %rax,%r10 + mulxq %r11,%r11,%rax + adcq %rbp,%r11 + mulxq %r12,%r12,%rbp + adcq %rax,%r12 + mulxq %r13,%r13,%rax + adcq %rbp,%r13 + + addq 0(%rdi),%r8 + adcq 8(%rdi),%r9 + adcq 16(%rdi),%r10 + adcq 24(%rdi),%r11 + adcq 32(%rdi),%r12 + adcq 40(%rdi),%r13 + + movq %r8,0(%rdi) + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + movq %r12,32(%rdi) + movq %r13,40(%rdi) + + .byte 0xf3,0xc3 + +.def __smulx_383_n_shift_by_31; .scl 3; .type 32; .endef +.p2align 5 +__smulx_383_n_shift_by_31: + .byte 0xf3,0x0f,0x1e,0xfa + + movq %rdx,%rbx + xorq %r14,%r14 + movq 0+0(%rsi),%r8 + movq 0+8(%rsi),%r9 + movq 0+16(%rsi),%r10 + movq 0+24(%rsi),%r11 + movq 0+32(%rsi),%r12 + movq 0+40(%rsi),%r13 + + movq %rdx,%rax + sarq $63,%rax + xorq %rbp,%rbp + subq %rax,%rbp + + xorq %rax,%rdx + addq %rbp,%rdx + + xorq %rax,%r8 + xorq %rax,%r9 + xorq %rax,%r10 + xorq %rax,%r11 + xorq %rax,%r12 + xorq %r13,%rax + addq %rbp,%r8 + adcq $0,%r9 + adcq $0,%r10 + adcq $0,%r11 + adcq $0,%r12 + adcq $0,%rax + + mulxq %r8,%r8,%rbp + mulxq %r9,%r9,%r13 + addq %rbp,%r9 + mulxq %r10,%r10,%rbp + adcq %r13,%r10 + mulxq %r11,%r11,%r13 + adcq %rbp,%r11 + mulxq %r12,%r12,%rbp + adcq %r13,%r12 + adcq $0,%rbp + imulq %rdx + addq %rbp,%rax + adcq %rdx,%r14 + + movq %rcx,%rdx + + movq %r8,0(%rdi) + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + movq %r12,32(%rdi) + movq %rax,40(%rdi) + movq 48+0(%rsi),%r8 + movq 48+8(%rsi),%r9 + movq 48+16(%rsi),%r10 + movq 48+24(%rsi),%r11 + movq 48+32(%rsi),%r12 + movq 48+40(%rsi),%r13 + + movq %rdx,%rax + sarq $63,%rax + xorq %rbp,%rbp + subq %rax,%rbp + + xorq %rax,%rdx + addq %rbp,%rdx + + xorq %rax,%r8 + xorq %rax,%r9 + xorq %rax,%r10 + xorq %rax,%r11 + xorq %rax,%r12 + xorq %r13,%rax + addq %rbp,%r8 + adcq $0,%r9 + adcq $0,%r10 + adcq $0,%r11 + adcq $0,%r12 + adcq $0,%rax + + mulxq %r8,%r8,%rbp + mulxq %r9,%r9,%r13 + addq %rbp,%r9 + mulxq %r10,%r10,%rbp + adcq %r13,%r10 + mulxq %r11,%r11,%r13 + adcq %rbp,%r11 + mulxq %r12,%r12,%rbp + adcq %r13,%r12 + adcq $0,%rbp + imulq %rdx + addq %rbp,%rax + adcq $0,%rdx + + addq 0(%rdi),%r8 + adcq 8(%rdi),%r9 + adcq 16(%rdi),%r10 + adcq 24(%rdi),%r11 + adcq 32(%rdi),%r12 + adcq 40(%rdi),%rax + adcq %rdx,%r14 + movq %rbx,%rdx + + shrdq $31,%r9,%r8 + shrdq $31,%r10,%r9 + shrdq $31,%r11,%r10 + shrdq $31,%r12,%r11 + shrdq $31,%rax,%r12 + shrdq $31,%r14,%rax + + sarq $63,%r14 + xorq %rbp,%rbp + subq %r14,%rbp + + xorq %r14,%r8 + xorq %r14,%r9 + xorq %r14,%r10 + xorq %r14,%r11 + xorq %r14,%r12 + xorq %r14,%rax + addq %rbp,%r8 + adcq $0,%r9 + adcq $0,%r10 + adcq $0,%r11 + adcq $0,%r12 + adcq $0,%rax + + movq %r8,0(%rdi) + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + movq %r12,32(%rdi) + movq %rax,40(%rdi) + + xorq %r14,%rdx + xorq %r14,%rcx + addq %rbp,%rdx + addq %rbp,%rcx + + .byte 0xf3,0xc3 + +.def __smulx_191_n_shift_by_31; .scl 3; .type 32; .endef +.p2align 5 +__smulx_191_n_shift_by_31: + .byte 0xf3,0x0f,0x1e,0xfa + + movq %rdx,%rbx + movq 0+0(%rsi),%r8 + movq 0+8(%rsi),%r9 + movq 0+16(%rsi),%r10 + + movq %rdx,%rax + sarq $63,%rax + xorq %rbp,%rbp + subq %rax,%rbp + + xorq %rax,%rdx + addq %rbp,%rdx + + xorq %rax,%r8 + xorq %rax,%r9 + xorq %r10,%rax + addq %rbp,%r8 + adcq $0,%r9 + adcq $0,%rax + + mulxq %r8,%r8,%rbp + mulxq %r9,%r9,%r10 + addq %rbp,%r9 + adcq $0,%r10 + imulq %rdx + addq %rax,%r10 + adcq $0,%rdx + movq %rdx,%r14 + movq %rcx,%rdx + movq 48+0(%rsi),%r11 + movq 48+8(%rsi),%r12 + movq 48+16(%rsi),%r13 + + movq %rdx,%rax + sarq $63,%rax + xorq %rbp,%rbp + subq %rax,%rbp + + xorq %rax,%rdx + addq %rbp,%rdx + + xorq %rax,%r11 + xorq %rax,%r12 + xorq %r13,%rax + addq %rbp,%r11 + adcq $0,%r12 + adcq $0,%rax + + mulxq %r11,%r11,%rbp + mulxq %r12,%r12,%r13 + addq %rbp,%r12 + adcq $0,%r13 + imulq %rdx + addq %rax,%r13 + adcq $0,%rdx + addq %r8,%r11 + adcq %r9,%r12 + adcq %r10,%r13 + adcq %rdx,%r14 + movq %rbx,%rdx + + shrdq $31,%r12,%r11 + shrdq $31,%r13,%r12 + shrdq $31,%r14,%r13 + + sarq $63,%r14 + xorq %rbp,%rbp + subq %r14,%rbp + + xorq %r14,%r11 + xorq %r14,%r12 + xorq %r14,%r13 + addq %rbp,%r11 + adcq $0,%r12 + adcq $0,%r13 + + movq %r11,0(%rdi) + movq %r12,8(%rdi) + movq %r13,16(%rdi) + + xorq %r14,%rdx + xorq %r14,%rcx + addq %rbp,%rdx + addq %rbp,%rcx + + .byte 0xf3,0xc3 + +.def __ab_approximation_31; .scl 3; .type 32; .endef +.p2align 5 +__ab_approximation_31: + .byte 0xf3,0x0f,0x1e,0xfa + + movq 40(%rsi),%r9 + movq 88(%rsi),%r11 + movq 32(%rsi),%rbx + movq 80(%rsi),%rbp + movq 24(%rsi),%r8 + movq 72(%rsi),%r10 + + movq %r9,%rax + orq %r11,%rax + cmovzq %rbx,%r9 + cmovzq %rbp,%r11 + cmovzq %r8,%rbx + movq 16(%rsi),%r8 + cmovzq %r10,%rbp + movq 64(%rsi),%r10 + + movq %r9,%rax + orq %r11,%rax + cmovzq %rbx,%r9 + cmovzq %rbp,%r11 + cmovzq %r8,%rbx + movq 8(%rsi),%r8 + cmovzq %r10,%rbp + movq 56(%rsi),%r10 + + movq %r9,%rax + orq %r11,%rax + cmovzq %rbx,%r9 + cmovzq %rbp,%r11 + cmovzq %r8,%rbx + movq 0(%rsi),%r8 + cmovzq %r10,%rbp + movq 48(%rsi),%r10 + + movq %r9,%rax + orq %r11,%rax + cmovzq %rbx,%r9 + cmovzq %rbp,%r11 + cmovzq %r8,%rbx + cmovzq %r10,%rbp + + movq %r9,%rax + orq %r11,%rax + bsrq %rax,%rcx + leaq 1(%rcx),%rcx + cmovzq %r8,%r9 + cmovzq %r10,%r11 + cmovzq %rax,%rcx + negq %rcx + + + shldq %cl,%rbx,%r9 + shldq %cl,%rbp,%r11 + + movl $0x7FFFFFFF,%eax + andq %rax,%r8 + andq %rax,%r10 + andnq %r9,%rax,%r9 + andnq %r11,%rax,%r11 + orq %r9,%r8 + orq %r11,%r10 + + jmp __inner_loop_31 + + .byte 0xf3,0xc3 + +.def __inner_loop_31; .scl 3; .type 32; .endef +.p2align 5 +__inner_loop_31: + .byte 0xf3,0x0f,0x1e,0xfa + + movq $0x7FFFFFFF80000000,%rcx + movq $0x800000007FFFFFFF,%r13 + movq $0x7FFFFFFF7FFFFFFF,%r15 + +.Loop_31: + cmpq %r10,%r8 + movq %r8,%rax + movq %r10,%rbx + movq %rcx,%rbp + movq %r13,%r14 + cmovbq %r10,%r8 + cmovbq %rax,%r10 + cmovbq %r13,%rcx + cmovbq %rbp,%r13 + + subq %r10,%r8 + subq %r13,%rcx + addq %r15,%rcx + + testq $1,%rax + cmovzq %rax,%r8 + cmovzq %rbx,%r10 + cmovzq %rbp,%rcx + cmovzq %r14,%r13 + + shrq $1,%r8 + addq %r13,%r13 + subq %r15,%r13 + subl $1,%edi + jnz .Loop_31 + + shrq $32,%r15 + movl %ecx,%edx + movl %r13d,%r12d + shrq $32,%rcx + shrq $32,%r13 + subq %r15,%rdx + subq %r15,%rcx + subq %r15,%r12 + subq %r15,%r13 + + .byte 0xf3,0xc3 + + +.def __tail_loop_53; .scl 3; .type 32; .endef +.p2align 5 +__tail_loop_53: + .byte 0xf3,0x0f,0x1e,0xfa + + movq $1,%rdx + xorq %rcx,%rcx + xorq %r12,%r12 + movq $1,%r13 + +.Loop_53: + xorq %rax,%rax + testq $1,%r8 + movq %r10,%rbx + cmovnzq %r10,%rax + subq %r8,%rbx + movq %r8,%rbp + subq %rax,%r8 + cmovcq %rbx,%r8 + cmovcq %rbp,%r10 + movq %rdx,%rax + cmovcq %r12,%rdx + cmovcq %rax,%r12 + movq %rcx,%rbx + cmovcq %r13,%rcx + cmovcq %rbx,%r13 + xorq %rax,%rax + xorq %rbx,%rbx + shrq $1,%r8 + testq $1,%rbp + cmovnzq %r12,%rax + cmovnzq %r13,%rbx + addq %r12,%r12 + addq %r13,%r13 + subq %rax,%rdx + subq %rbx,%rcx + subl $1,%edi + jnz .Loop_53 + + .byte 0xf3,0xc3 + +.section .pdata +.p2align 2 +.rva .LSEH_begin_ctx_inverse_mod_383 +.rva .LSEH_body_ctx_inverse_mod_383 +.rva .LSEH_info_ctx_inverse_mod_383_prologue + +.rva .LSEH_body_ctx_inverse_mod_383 +.rva .LSEH_epilogue_ctx_inverse_mod_383 +.rva .LSEH_info_ctx_inverse_mod_383_body + +.rva .LSEH_epilogue_ctx_inverse_mod_383 +.rva .LSEH_end_ctx_inverse_mod_383 +.rva .LSEH_info_ctx_inverse_mod_383_epilogue + +.section .xdata +.p2align 3 +.LSEH_info_ctx_inverse_mod_383_prologue: +.byte 1,0,5,0x0b +.byte 0,0x74,1,0 +.byte 0,0x64,2,0 +.byte 0,0xb3 +.byte 0,0 +.long 0,0 +.LSEH_info_ctx_inverse_mod_383_body: +.byte 1,0,18,0 +.byte 0x00,0xf4,0x8b,0x00 +.byte 0x00,0xe4,0x8c,0x00 +.byte 0x00,0xd4,0x8d,0x00 +.byte 0x00,0xc4,0x8e,0x00 +.byte 0x00,0x34,0x8f,0x00 +.byte 0x00,0x54,0x90,0x00 +.byte 0x00,0x74,0x92,0x00 +.byte 0x00,0x64,0x93,0x00 +.byte 0x00,0x01,0x91,0x00 +.byte 0x00,0x00,0x00,0x00 +.byte 0x00,0x00,0x00,0x00 +.LSEH_info_ctx_inverse_mod_383_epilogue: +.byte 1,0,4,0 +.byte 0x00,0x74,0x01,0x00 +.byte 0x00,0x64,0x02,0x00 +.byte 0x00,0x00,0x00,0x00 + diff --git a/crypto/blst_src/build/coff/div3w-armv8.S b/crypto/blst_src/build/coff/div3w-armv8.S new file mode 100644 index 00000000000..2e5d7045d6a --- /dev/null +++ b/crypto/blst_src/build/coff/div3w-armv8.S @@ -0,0 +1,94 @@ +.text + +.globl div_3_limbs +.def div_3_limbs; +.type 32; +.endef +.p2align 5 +div_3_limbs: + ldp x4,x5,[x0] // load R + eor x0,x0,x0 // Q = 0 + mov x3,#64 // loop counter + nop + +.Loop: + subs x6,x4,x1 // R - D + add x0,x0,x0 // Q <<= 1 + sbcs x7,x5,x2 + add x0,x0,#1 // Q + speculative bit + csel x4,x4,x6,lo // select between R and R - D + extr x1,x2,x1,#1 // D >>= 1 + csel x5,x5,x7,lo + lsr x2,x2,#1 + sbc x0,x0,xzr // subtract speculative bit + sub x3,x3,#1 + cbnz x3,.Loop + + asr x3,x0,#63 // top bit -> mask + add x0,x0,x0 // Q <<= 1 + subs x6,x4,x1 // R - D + add x0,x0,#1 // Q + speculative bit + sbcs x7,x5,x2 + sbc x0,x0,xzr // subtract speculative bit + + orr x0,x0,x3 // all ones if overflow + + ret + +.globl quot_rem_128 +.def quot_rem_128; +.type 32; +.endef +.p2align 5 +quot_rem_128: + ldp x3,x4,[x1] + + mul x5,x3,x2 // divisor[0:1} * quotient + umulh x6,x3,x2 + mul x11, x4,x2 + umulh x7,x4,x2 + + ldp x8,x9,[x0] // load 3 limbs of the dividend + ldr x10,[x0,#16] + + adds x6,x6,x11 + adc x7,x7,xzr + + subs x8,x8,x5 // dividend - divisor * quotient + sbcs x9,x9,x6 + sbcs x10,x10,x7 + sbc x5,xzr,xzr // borrow -> mask + + add x2,x2,x5 // if borrowed, adjust the quotient ... + and x3,x3,x5 + and x4,x4,x5 + adds x8,x8,x3 // ... and add divisor + adc x9,x9,x4 + + stp x8,x9,[x0] // save 2 limbs of the remainder + str x2,[x0,#16] // and one limb of the quotient + + mov x0,x2 // return adjusted quotient + + ret + + +.globl quot_rem_64 +.def quot_rem_64; +.type 32; +.endef +.p2align 5 +quot_rem_64: + ldr x3,[x1] + ldr x8,[x0] // load 1 limb of the dividend + + mul x5,x3,x2 // divisor * quotient + + sub x8,x8,x5 // dividend - divisor * quotient + + stp x8,x2,[x0] // save remainder and quotient + + mov x0,x2 // return quotient + + ret + diff --git a/crypto/blst_src/build/coff/div3w-x86_64.s b/crypto/blst_src/build/coff/div3w-x86_64.s new file mode 100644 index 00000000000..033d1eb3055 --- /dev/null +++ b/crypto/blst_src/build/coff/div3w-x86_64.s @@ -0,0 +1,248 @@ +.text + +.globl div_3_limbs + +.def div_3_limbs; .scl 2; .type 32; .endef +.p2align 5 +div_3_limbs: + .byte 0xf3,0x0f,0x1e,0xfa + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + movq %rsp,%r11 +.LSEH_begin_div_3_limbs: + + + movq %rcx,%rdi + movq %rdx,%rsi + movq %r8,%rdx +.LSEH_body_div_3_limbs: + + movq (%rdi),%r8 + movq 8(%rdi),%r9 + xorq %rax,%rax + movl $64,%ecx + +.Loop: + movq %r8,%r10 + subq %rsi,%r8 + movq %r9,%r11 + sbbq %rdx,%r9 + leaq 1(%rax,%rax,1),%rax + movq %rdx,%rdi + cmovcq %r10,%r8 + cmovcq %r11,%r9 + sbbq $0,%rax + shlq $63,%rdi + shrq $1,%rsi + shrq $1,%rdx + orq %rdi,%rsi + subl $1,%ecx + jnz .Loop + + leaq 1(%rax,%rax,1),%rcx + sarq $63,%rax + + subq %rsi,%r8 + sbbq %rdx,%r9 + sbbq $0,%rcx + + orq %rcx,%rax + +.LSEH_epilogue_div_3_limbs: + mov 8(%rsp),%rdi + mov 16(%rsp),%rsi + + .byte 0xf3,0xc3 + +.LSEH_end_div_3_limbs: +.globl quot_rem_128 + +.def quot_rem_128; .scl 2; .type 32; .endef +.p2align 5 +quot_rem_128: + .byte 0xf3,0x0f,0x1e,0xfa + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + movq %rsp,%r11 +.LSEH_begin_quot_rem_128: + + + movq %rcx,%rdi + movq %rdx,%rsi + movq %r8,%rdx +.LSEH_body_quot_rem_128: + + movq %rdx,%rax + movq %rdx,%rcx + + mulq 0(%rsi) + movq %rax,%r8 + movq %rcx,%rax + movq %rdx,%r9 + + mulq 8(%rsi) + addq %rax,%r9 + adcq $0,%rdx + + movq 0(%rdi),%r10 + movq 8(%rdi),%r11 + movq 16(%rdi),%rax + + subq %r8,%r10 + sbbq %r9,%r11 + sbbq %rdx,%rax + sbbq %r8,%r8 + + addq %r8,%rcx + movq %r8,%r9 + andq 0(%rsi),%r8 + andq 8(%rsi),%r9 + addq %r8,%r10 + adcq %r9,%r11 + + movq %r10,0(%rdi) + movq %r11,8(%rdi) + movq %rcx,16(%rdi) + + movq %rcx,%rax + +.LSEH_epilogue_quot_rem_128: + mov 8(%rsp),%rdi + mov 16(%rsp),%rsi + + .byte 0xf3,0xc3 + +.LSEH_end_quot_rem_128: + + + + + +.globl quot_rem_64 + +.def quot_rem_64; .scl 2; .type 32; .endef +.p2align 5 +quot_rem_64: + .byte 0xf3,0x0f,0x1e,0xfa + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + movq %rsp,%r11 +.LSEH_begin_quot_rem_64: + + + movq %rcx,%rdi + movq %rdx,%rsi + movq %r8,%rdx +.LSEH_body_quot_rem_64: + + movq %rdx,%rax + imulq 0(%rsi),%rdx + + movq 0(%rdi),%r10 + + subq %rdx,%r10 + + movq %r10,0(%rdi) + movq %rax,8(%rdi) + +.LSEH_epilogue_quot_rem_64: + mov 8(%rsp),%rdi + mov 16(%rsp),%rsi + + .byte 0xf3,0xc3 + +.LSEH_end_quot_rem_64: +.section .pdata +.p2align 2 +.rva .LSEH_begin_div_3_limbs +.rva .LSEH_body_div_3_limbs +.rva .LSEH_info_div_3_limbs_prologue + +.rva .LSEH_body_div_3_limbs +.rva .LSEH_epilogue_div_3_limbs +.rva .LSEH_info_div_3_limbs_body + +.rva .LSEH_epilogue_div_3_limbs +.rva .LSEH_end_div_3_limbs +.rva .LSEH_info_div_3_limbs_epilogue + +.rva .LSEH_begin_quot_rem_128 +.rva .LSEH_body_quot_rem_128 +.rva .LSEH_info_quot_rem_128_prologue + +.rva .LSEH_body_quot_rem_128 +.rva .LSEH_epilogue_quot_rem_128 +.rva .LSEH_info_quot_rem_128_body + +.rva .LSEH_epilogue_quot_rem_128 +.rva .LSEH_end_quot_rem_128 +.rva .LSEH_info_quot_rem_128_epilogue + +.rva .LSEH_begin_quot_rem_64 +.rva .LSEH_body_quot_rem_64 +.rva .LSEH_info_quot_rem_64_prologue + +.rva .LSEH_body_quot_rem_64 +.rva .LSEH_epilogue_quot_rem_64 +.rva .LSEH_info_quot_rem_64_body + +.rva .LSEH_epilogue_quot_rem_64 +.rva .LSEH_end_quot_rem_64 +.rva .LSEH_info_quot_rem_64_epilogue + +.section .xdata +.p2align 3 +.LSEH_info_div_3_limbs_prologue: +.byte 1,0,5,0x0b +.byte 0,0x74,1,0 +.byte 0,0x64,2,0 +.byte 0,0xb3 +.byte 0,0 +.long 0,0 +.LSEH_info_div_3_limbs_body: +.byte 1,0,4,0 +.byte 0x00,0x74,0x01,0x00 +.byte 0x00,0x64,0x02,0x00 +.byte 0x00,0x00,0x00,0x00 +.LSEH_info_div_3_limbs_epilogue: +.byte 1,0,4,0 +.byte 0x00,0x74,0x01,0x00 +.byte 0x00,0x64,0x02,0x00 +.byte 0x00,0x00,0x00,0x00 + +.LSEH_info_quot_rem_128_prologue: +.byte 1,0,5,0x0b +.byte 0,0x74,1,0 +.byte 0,0x64,2,0 +.byte 0,0xb3 +.byte 0,0 +.long 0,0 +.LSEH_info_quot_rem_128_body: +.byte 1,0,4,0 +.byte 0x00,0x74,0x01,0x00 +.byte 0x00,0x64,0x02,0x00 +.byte 0x00,0x00,0x00,0x00 +.LSEH_info_quot_rem_128_epilogue: +.byte 1,0,4,0 +.byte 0x00,0x74,0x01,0x00 +.byte 0x00,0x64,0x02,0x00 +.byte 0x00,0x00,0x00,0x00 + +.LSEH_info_quot_rem_64_prologue: +.byte 1,0,5,0x0b +.byte 0,0x74,1,0 +.byte 0,0x64,2,0 +.byte 0,0xb3 +.byte 0,0 +.long 0,0 +.LSEH_info_quot_rem_64_body: +.byte 1,0,4,0 +.byte 0x00,0x74,0x01,0x00 +.byte 0x00,0x64,0x02,0x00 +.byte 0x00,0x00,0x00,0x00 +.LSEH_info_quot_rem_64_epilogue: +.byte 1,0,4,0 +.byte 0x00,0x74,0x01,0x00 +.byte 0x00,0x64,0x02,0x00 +.byte 0x00,0x00,0x00,0x00 + diff --git a/crypto/blst_src/build/coff/mul_mont_256-armv8.S b/crypto/blst_src/build/coff/mul_mont_256-armv8.S new file mode 100644 index 00000000000..8cadbb89344 --- /dev/null +++ b/crypto/blst_src/build/coff/mul_mont_256-armv8.S @@ -0,0 +1,474 @@ +.text + +.globl mul_mont_sparse_256 + +.def mul_mont_sparse_256; +.type 32; +.endef +.p2align 5 +mul_mont_sparse_256: + stp x29,x30,[sp,#-64]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + + ldp x10,x11,[x1] + ldr x9, [x2] + ldp x12,x13,[x1,#16] + + mul x19,x10,x9 + ldp x5,x6,[x3] + mul x20,x11,x9 + ldp x7,x8,[x3,#16] + mul x21,x12,x9 + mul x22,x13,x9 + + umulh x14,x10,x9 + umulh x15,x11,x9 + mul x3,x4,x19 + umulh x16,x12,x9 + umulh x17,x13,x9 + adds x20,x20,x14 + //mul x14,x5,x3 + adcs x21,x21,x15 + mul x15,x6,x3 + adcs x22,x22,x16 + mul x16,x7,x3 + adc x23,xzr, x17 + mul x17,x8,x3 + ldr x9,[x2,8*1] + subs xzr,x19,#1 //adds x19,x19,x14 + umulh x14,x5,x3 + adcs x20,x20,x15 + umulh x15,x6,x3 + adcs x21,x21,x16 + umulh x16,x7,x3 + adcs x22,x22,x17 + umulh x17,x8,x3 + adc x23,x23,xzr + + adds x19,x20,x14 + mul x14,x10,x9 + adcs x20,x21,x15 + mul x15,x11,x9 + adcs x21,x22,x16 + mul x16,x12,x9 + adcs x22,x23,x17 + mul x17,x13,x9 + adc x23,xzr,xzr + + adds x19,x19,x14 + umulh x14,x10,x9 + adcs x20,x20,x15 + umulh x15,x11,x9 + adcs x21,x21,x16 + mul x3,x4,x19 + umulh x16,x12,x9 + adcs x22,x22,x17 + umulh x17,x13,x9 + adc x23,x23,xzr + + adds x20,x20,x14 + //mul x14,x5,x3 + adcs x21,x21,x15 + mul x15,x6,x3 + adcs x22,x22,x16 + mul x16,x7,x3 + adc x23,x23,x17 + mul x17,x8,x3 + ldr x9,[x2,8*2] + subs xzr,x19,#1 //adds x19,x19,x14 + umulh x14,x5,x3 + adcs x20,x20,x15 + umulh x15,x6,x3 + adcs x21,x21,x16 + umulh x16,x7,x3 + adcs x22,x22,x17 + umulh x17,x8,x3 + adc x23,x23,xzr + + adds x19,x20,x14 + mul x14,x10,x9 + adcs x20,x21,x15 + mul x15,x11,x9 + adcs x21,x22,x16 + mul x16,x12,x9 + adcs x22,x23,x17 + mul x17,x13,x9 + adc x23,xzr,xzr + + adds x19,x19,x14 + umulh x14,x10,x9 + adcs x20,x20,x15 + umulh x15,x11,x9 + adcs x21,x21,x16 + mul x3,x4,x19 + umulh x16,x12,x9 + adcs x22,x22,x17 + umulh x17,x13,x9 + adc x23,x23,xzr + + adds x20,x20,x14 + //mul x14,x5,x3 + adcs x21,x21,x15 + mul x15,x6,x3 + adcs x22,x22,x16 + mul x16,x7,x3 + adc x23,x23,x17 + mul x17,x8,x3 + ldr x9,[x2,8*3] + subs xzr,x19,#1 //adds x19,x19,x14 + umulh x14,x5,x3 + adcs x20,x20,x15 + umulh x15,x6,x3 + adcs x21,x21,x16 + umulh x16,x7,x3 + adcs x22,x22,x17 + umulh x17,x8,x3 + adc x23,x23,xzr + + adds x19,x20,x14 + mul x14,x10,x9 + adcs x20,x21,x15 + mul x15,x11,x9 + adcs x21,x22,x16 + mul x16,x12,x9 + adcs x22,x23,x17 + mul x17,x13,x9 + adc x23,xzr,xzr + + adds x19,x19,x14 + umulh x14,x10,x9 + adcs x20,x20,x15 + umulh x15,x11,x9 + adcs x21,x21,x16 + mul x3,x4,x19 + umulh x16,x12,x9 + adcs x22,x22,x17 + umulh x17,x13,x9 + adc x23,x23,xzr + + adds x20,x20,x14 + //mul x14,x5,x3 + adcs x21,x21,x15 + mul x15,x6,x3 + adcs x22,x22,x16 + mul x16,x7,x3 + adc x23,x23,x17 + mul x17,x8,x3 + subs xzr,x19,#1 //adds x19,x19,x14 + umulh x14,x5,x3 + adcs x20,x20,x15 + umulh x15,x6,x3 + adcs x21,x21,x16 + umulh x16,x7,x3 + adcs x22,x22,x17 + umulh x17,x8,x3 + adc x23,x23,xzr + + adds x19,x20,x14 + adcs x20,x21,x15 + adcs x21,x22,x16 + adcs x22,x23,x17 + adc x23,xzr,xzr + + subs x14,x19,x5 + sbcs x15,x20,x6 + sbcs x16,x21,x7 + sbcs x17,x22,x8 + sbcs xzr, x23,xzr + + csel x19,x19,x14,lo + csel x20,x20,x15,lo + csel x21,x21,x16,lo + csel x22,x22,x17,lo + + stp x19,x20,[x0] + stp x21,x22,[x0,#16] + + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldr x29,[sp],#64 + ret + +.globl sqr_mont_sparse_256 + +.def sqr_mont_sparse_256; +.type 32; +.endef +.p2align 5 +sqr_mont_sparse_256: +.long 3573752639 + stp x29,x30,[sp,#-48]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + + ldp x5,x6,[x1] + ldp x7,x8,[x1,#16] + mov x4,x3 + + //////////////////////////////////////////////////////////////// + // | | | | | |a1*a0| | + // | | | | |a2*a0| | | + // | |a3*a2|a3*a0| | | | + // | | | |a2*a1| | | | + // | | |a3*a1| | | | | + // *| | | | | | | | 2| + // +|a3*a3|a2*a2|a1*a1|a0*a0| + // |--+--+--+--+--+--+--+--| + // |A7|A6|A5|A4|A3|A2|A1|A0|, where Ax is x10 + // + // "can't overflow" below mark carrying into high part of + // multiplication result, which can't overflow, because it + // can never be all ones. + + mul x11,x6,x5 // a[1]*a[0] + umulh x15,x6,x5 + mul x12,x7,x5 // a[2]*a[0] + umulh x16,x7,x5 + mul x13,x8,x5 // a[3]*a[0] + umulh x19,x8,x5 + + adds x12,x12,x15 // accumulate high parts of multiplication + mul x14,x7,x6 // a[2]*a[1] + umulh x15,x7,x6 + adcs x13,x13,x16 + mul x16,x8,x6 // a[3]*a[1] + umulh x17,x8,x6 + adc x19,x19,xzr // can't overflow + + mul x20,x8,x7 // a[3]*a[2] + umulh x21,x8,x7 + + adds x15,x15,x16 // accumulate high parts of multiplication + mul x10,x5,x5 // a[0]*a[0] + adc x16,x17,xzr // can't overflow + + adds x13,x13,x14 // accumulate low parts of multiplication + umulh x5,x5,x5 + adcs x19,x19,x15 + mul x15,x6,x6 // a[1]*a[1] + adcs x20,x20,x16 + umulh x6,x6,x6 + adc x21,x21,xzr // can't overflow + + adds x11,x11,x11 // acc[1-6]*=2 + mul x16,x7,x7 // a[2]*a[2] + adcs x12,x12,x12 + umulh x7,x7,x7 + adcs x13,x13,x13 + mul x17,x8,x8 // a[3]*a[3] + adcs x19,x19,x19 + umulh x8,x8,x8 + adcs x20,x20,x20 + adcs x21,x21,x21 + adc x22,xzr,xzr + + adds x11,x11,x5 // +a[i]*a[i] + adcs x12,x12,x15 + adcs x13,x13,x6 + adcs x19,x19,x16 + adcs x20,x20,x7 + adcs x21,x21,x17 + adc x22,x22,x8 + + bl __mul_by_1_mont_256 + ldr x30,[x29,#8] + + adds x10,x10,x19 // accumulate upper half + adcs x11,x11,x20 + adcs x12,x12,x21 + adcs x13,x13,x22 + adc x19,xzr,xzr + + subs x14,x10,x5 + sbcs x15,x11,x6 + sbcs x16,x12,x7 + sbcs x17,x13,x8 + sbcs xzr, x19,xzr + + csel x10,x10,x14,lo + csel x11,x11,x15,lo + csel x12,x12,x16,lo + csel x13,x13,x17,lo + + stp x10,x11,[x0] + stp x12,x13,[x0,#16] + + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldr x29,[sp],#48 +.long 3573752767 + ret + +.globl from_mont_256 + +.def from_mont_256; +.type 32; +.endef +.p2align 5 +from_mont_256: +.long 3573752639 + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + + mov x4,x3 + ldp x10,x11,[x1] + ldp x12,x13,[x1,#16] + + bl __mul_by_1_mont_256 + ldr x30,[x29,#8] + + subs x14,x10,x5 + sbcs x15,x11,x6 + sbcs x16,x12,x7 + sbcs x17,x13,x8 + + csel x10,x10,x14,lo + csel x11,x11,x15,lo + csel x12,x12,x16,lo + csel x13,x13,x17,lo + + stp x10,x11,[x0] + stp x12,x13,[x0,#16] + + ldr x29,[sp],#16 +.long 3573752767 + ret + + +.globl redc_mont_256 + +.def redc_mont_256; +.type 32; +.endef +.p2align 5 +redc_mont_256: +.long 3573752639 + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + + mov x4,x3 + ldp x10,x11,[x1] + ldp x12,x13,[x1,#16] + + bl __mul_by_1_mont_256 + ldr x30,[x29,#8] + + ldp x14,x15,[x1,#32] + ldp x16,x17,[x1,#48] + + adds x10,x10,x14 + adcs x11,x11,x15 + adcs x12,x12,x16 + adcs x13,x13,x17 + adc x9,xzr,xzr + + subs x14,x10,x5 + sbcs x15,x11,x6 + sbcs x16,x12,x7 + sbcs x17,x13,x8 + sbcs xzr, x9,xzr + + csel x10,x10,x14,lo + csel x11,x11,x15,lo + csel x12,x12,x16,lo + csel x13,x13,x17,lo + + stp x10,x11,[x0] + stp x12,x13,[x0,#16] + + ldr x29,[sp],#16 +.long 3573752767 + ret + + +.def __mul_by_1_mont_256; +.type 32; +.endef +.p2align 5 +__mul_by_1_mont_256: + mul x3,x4,x10 + ldp x5,x6,[x2] + ldp x7,x8,[x2,#16] + //mul x14,x5,x3 + mul x15,x6,x3 + mul x16,x7,x3 + mul x17,x8,x3 + subs xzr,x10,#1 //adds x10,x10,x14 + umulh x14,x5,x3 + adcs x11,x11,x15 + umulh x15,x6,x3 + adcs x12,x12,x16 + umulh x16,x7,x3 + adcs x13,x13,x17 + umulh x17,x8,x3 + adc x9,xzr,xzr + + adds x10,x11,x14 + adcs x11,x12,x15 + adcs x12,x13,x16 + mul x3,x4,x10 + adc x13,x9,x17 + //mul x14,x5,x3 + mul x15,x6,x3 + mul x16,x7,x3 + mul x17,x8,x3 + subs xzr,x10,#1 //adds x10,x10,x14 + umulh x14,x5,x3 + adcs x11,x11,x15 + umulh x15,x6,x3 + adcs x12,x12,x16 + umulh x16,x7,x3 + adcs x13,x13,x17 + umulh x17,x8,x3 + adc x9,xzr,xzr + + adds x10,x11,x14 + adcs x11,x12,x15 + adcs x12,x13,x16 + mul x3,x4,x10 + adc x13,x9,x17 + //mul x14,x5,x3 + mul x15,x6,x3 + mul x16,x7,x3 + mul x17,x8,x3 + subs xzr,x10,#1 //adds x10,x10,x14 + umulh x14,x5,x3 + adcs x11,x11,x15 + umulh x15,x6,x3 + adcs x12,x12,x16 + umulh x16,x7,x3 + adcs x13,x13,x17 + umulh x17,x8,x3 + adc x9,xzr,xzr + + adds x10,x11,x14 + adcs x11,x12,x15 + adcs x12,x13,x16 + mul x3,x4,x10 + adc x13,x9,x17 + //mul x14,x5,x3 + mul x15,x6,x3 + mul x16,x7,x3 + mul x17,x8,x3 + subs xzr,x10,#1 //adds x10,x10,x14 + umulh x14,x5,x3 + adcs x11,x11,x15 + umulh x15,x6,x3 + adcs x12,x12,x16 + umulh x16,x7,x3 + adcs x13,x13,x17 + umulh x17,x8,x3 + adc x9,xzr,xzr + + adds x10,x11,x14 + adcs x11,x12,x15 + adcs x12,x13,x16 + adc x13,x9,x17 + + ret + diff --git a/crypto/blst_src/build/coff/mul_mont_384-armv8.S b/crypto/blst_src/build/coff/mul_mont_384-armv8.S new file mode 100644 index 00000000000..074f38c495c --- /dev/null +++ b/crypto/blst_src/build/coff/mul_mont_384-armv8.S @@ -0,0 +1,2424 @@ +.text + +.globl add_mod_384x384 +.def add_mod_384x384; +.type 32; +.endef +.p2align 5 +add_mod_384x384: +.long 3573752639 + stp x29,x30,[sp,#-64]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + + ldp x5,x6,[x3] + ldp x7,x8,[x3,#16] + ldp x9,x10,[x3,#32] + + bl __add_mod_384x384 + ldr x30,[x29,#8] + + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldr x29,[sp],#64 +.long 3573752767 + ret + + +.def __add_mod_384x384; +.type 32; +.endef +.p2align 5 +__add_mod_384x384: + ldp x11, x12, [x1] + ldp x19,x20,[x2] + ldp x13, x14, [x1,#16] + adds x11,x11,x19 + ldp x21,x22,[x2,#16] + adcs x12,x12,x20 + ldp x15, x16, [x1,#32] + adcs x13,x13,x21 + ldp x23,x24,[x2,#32] + adcs x14,x14,x22 + stp x11, x12, [x0] + adcs x15,x15,x23 + ldp x11, x12, [x1,#48] + adcs x16,x16,x24 + + ldp x19,x20,[x2,#48] + stp x13, x14, [x0,#16] + ldp x13, x14, [x1,#64] + ldp x21,x22,[x2,#64] + + adcs x11,x11,x19 + stp x15, x16, [x0,#32] + adcs x12,x12,x20 + ldp x15, x16, [x1,#80] + adcs x13,x13,x21 + ldp x23,x24,[x2,#80] + adcs x14,x14,x22 + adcs x15,x15,x23 + adcs x16,x16,x24 + adc x17,xzr,xzr + + subs x19,x11,x5 + sbcs x20,x12,x6 + sbcs x21,x13,x7 + sbcs x22,x14,x8 + sbcs x23,x15,x9 + sbcs x24,x16,x10 + sbcs xzr,x17,xzr + + csel x11,x11,x19,lo + csel x12,x12,x20,lo + csel x13,x13,x21,lo + csel x14,x14,x22,lo + stp x11,x12,[x0,#48] + csel x15,x15,x23,lo + stp x13,x14,[x0,#64] + csel x16,x16,x24,lo + stp x15,x16,[x0,#80] + + ret + + +.globl sub_mod_384x384 +.def sub_mod_384x384; +.type 32; +.endef +.p2align 5 +sub_mod_384x384: +.long 3573752639 + stp x29,x30,[sp,#-64]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + + ldp x5,x6,[x3] + ldp x7,x8,[x3,#16] + ldp x9,x10,[x3,#32] + + bl __sub_mod_384x384 + ldr x30,[x29,#8] + + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldr x29,[sp],#64 +.long 3573752767 + ret + + +.def __sub_mod_384x384; +.type 32; +.endef +.p2align 5 +__sub_mod_384x384: + ldp x11, x12, [x1] + ldp x19,x20,[x2] + ldp x13, x14, [x1,#16] + subs x11,x11,x19 + ldp x21,x22,[x2,#16] + sbcs x12,x12,x20 + ldp x15, x16, [x1,#32] + sbcs x13,x13,x21 + ldp x23,x24,[x2,#32] + sbcs x14,x14,x22 + stp x11, x12, [x0] + sbcs x15,x15,x23 + ldp x11, x12, [x1,#48] + sbcs x16,x16,x24 + + ldp x19,x20,[x2,#48] + stp x13, x14, [x0,#16] + ldp x13, x14, [x1,#64] + ldp x21,x22,[x2,#64] + + sbcs x11,x11,x19 + stp x15, x16, [x0,#32] + sbcs x12,x12,x20 + ldp x15, x16, [x1,#80] + sbcs x13,x13,x21 + ldp x23,x24,[x2,#80] + sbcs x14,x14,x22 + sbcs x15,x15,x23 + sbcs x16,x16,x24 + sbc x17,xzr,xzr + + and x19,x5,x17 + and x20,x6,x17 + adds x11,x11,x19 + and x21,x7,x17 + adcs x12,x12,x20 + and x22,x8,x17 + adcs x13,x13,x21 + and x23,x9,x17 + adcs x14,x14,x22 + and x24,x10,x17 + adcs x15,x15,x23 + stp x11,x12,[x0,#48] + adc x16,x16,x24 + stp x13,x14,[x0,#64] + stp x15,x16,[x0,#80] + + ret + + +.def __add_mod_384; +.type 32; +.endef +.p2align 5 +__add_mod_384: + ldp x11, x12, [x1] + ldp x19,x20,[x2] + ldp x13, x14, [x1,#16] + adds x11,x11,x19 + ldp x21,x22,[x2,#16] + adcs x12,x12,x20 + ldp x15, x16, [x1,#32] + adcs x13,x13,x21 + ldp x23,x24,[x2,#32] + adcs x14,x14,x22 + adcs x15,x15,x23 + adcs x16,x16,x24 + adc x17,xzr,xzr + + subs x19,x11,x5 + sbcs x20,x12,x6 + sbcs x21,x13,x7 + sbcs x22,x14,x8 + sbcs x23,x15,x9 + sbcs x24,x16,x10 + sbcs xzr,x17,xzr + + csel x11,x11,x19,lo + csel x12,x12,x20,lo + csel x13,x13,x21,lo + csel x14,x14,x22,lo + csel x15,x15,x23,lo + stp x11,x12,[x0] + csel x16,x16,x24,lo + stp x13,x14,[x0,#16] + stp x15,x16,[x0,#32] + + ret + + +.def __sub_mod_384; +.type 32; +.endef +.p2align 5 +__sub_mod_384: + ldp x11, x12, [x1] + ldp x19,x20,[x2] + ldp x13, x14, [x1,#16] + subs x11,x11,x19 + ldp x21,x22,[x2,#16] + sbcs x12,x12,x20 + ldp x15, x16, [x1,#32] + sbcs x13,x13,x21 + ldp x23,x24,[x2,#32] + sbcs x14,x14,x22 + sbcs x15,x15,x23 + sbcs x16,x16,x24 + sbc x17,xzr,xzr + + and x19,x5,x17 + and x20,x6,x17 + adds x11,x11,x19 + and x21,x7,x17 + adcs x12,x12,x20 + and x22,x8,x17 + adcs x13,x13,x21 + and x23,x9,x17 + adcs x14,x14,x22 + and x24,x10,x17 + adcs x15,x15,x23 + stp x11,x12,[x0] + adc x16,x16,x24 + stp x13,x14,[x0,#16] + stp x15,x16,[x0,#32] + + ret + + +.globl mul_mont_384x + +.def mul_mont_384x; +.type 32; +.endef +.p2align 5 +mul_mont_384x: +.long 3573752639 + stp x29,x30,[sp,#-128]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + sub sp,sp,#288 // space for 3 768-bit vectors + + mov x26,x0 // save r_ptr + mov x27,x1 // save b_ptr + mov x28,x2 // save b_ptr + + sub x0,sp,#0 // mul_384(t0, a->re, b->re) + bl __mul_384 + + add x1,x1,#48 // mul_384(t1, a->im, b->im) + add x2,x2,#48 + add x0,sp,#96 + bl __mul_384 + + ldp x5,x6,[x3] + ldp x7,x8,[x3,#16] + ldp x9,x10,[x3,#32] + + sub x2,x1,#48 + add x0,sp,#240 + bl __add_mod_384 + + add x1,x28,#0 + add x2,x28,#48 + add x0,sp,#192 // t2 + bl __add_mod_384 + + add x1,x0,#0 + add x2,x0,#48 + bl __mul_384 // mul_384(t2, a->re+a->im, b->re+b->im) + + ldp x5,x6,[x3] + ldp x7,x8,[x3,#16] + ldp x9,x10,[x3,#32] + + mov x1,x0 + add x2,sp,#0 + bl __sub_mod_384x384 + + add x2,sp,#96 + bl __sub_mod_384x384 // t2 = t2-t0-t1 + + add x1,sp,#0 + add x2,sp,#96 + add x0,sp,#0 + bl __sub_mod_384x384 // t0 = t0-t1 + + add x1,sp,#0 // ret->re = redc(t0) + add x0,x26,#0 + bl __mul_by_1_mont_384 + bl __redc_tail_mont_384 + + add x1,sp,#192 // ret->im = redc(t2) + add x0,x0,#48 + bl __mul_by_1_mont_384 + bl __redc_tail_mont_384 + ldr x30,[x29,#8] + + add sp,sp,#288 + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldr x29,[sp],#128 +.long 3573752767 + ret + + +.globl sqr_mont_384x + +.def sqr_mont_384x; +.type 32; +.endef +.p2align 5 +sqr_mont_384x: +.long 3573752639 + stp x29,x30,[sp,#-128]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + stp x3,x0,[sp,#96] // __mul_mont_384 wants them there + sub sp,sp,#96 // space for 2 384-bit vectors + mov x4,x3 // adjust for missing b_ptr + + ldp x5,x6,[x2] + ldp x7,x8,[x2,#16] + ldp x9,x10,[x2,#32] + + add x2,x1,#48 + add x0,sp,#0 + bl __add_mod_384 // t0 = a->re + a->im + + add x0,sp,#48 + bl __sub_mod_384 // t1 = a->re - a->im + + ldp x11,x12,[x1] + ldr x17, [x2] + ldp x13,x14,[x1,#16] + ldp x15,x16,[x1,#32] + + bl __mul_mont_384 // mul_mont_384(ret->im, a->re, a->im) + + adds x11,x11,x11 // add with itself + adcs x12,x12,x12 + adcs x13,x13,x13 + adcs x14,x14,x14 + adcs x15,x15,x15 + adcs x16,x16,x16 + adc x25,xzr,xzr + + subs x19,x11,x5 + sbcs x20,x12,x6 + sbcs x21,x13,x7 + sbcs x22,x14,x8 + sbcs x23,x15,x9 + sbcs x24,x16,x10 + sbcs xzr,x25,xzr + + csel x19,x11,x19,lo + csel x20,x12,x20,lo + csel x21,x13,x21,lo + ldp x11,x12,[sp] + csel x22,x14,x22,lo + ldr x17, [sp,#48] + csel x23,x15,x23,lo + ldp x13,x14,[sp,#16] + csel x24,x16,x24,lo + ldp x15,x16,[sp,#32] + + stp x19,x20,[x2,#48] + stp x21,x22,[x2,#64] + stp x23,x24,[x2,#80] + + add x2,sp,#48 + bl __mul_mont_384 // mul_mont_384(ret->re, t0, t1) + ldr x30,[x29,#8] + + stp x11,x12,[x2] + stp x13,x14,[x2,#16] + stp x15,x16,[x2,#32] + + add sp,sp,#96 + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldr x29,[sp],#128 +.long 3573752767 + ret + + +.globl mul_mont_384 + +.def mul_mont_384; +.type 32; +.endef +.p2align 5 +mul_mont_384: +.long 3573752639 + stp x29,x30,[sp,#-128]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + stp x4,x0,[sp,#96] // __mul_mont_384 wants them there + + ldp x11,x12,[x1] + ldr x17, [x2] + ldp x13,x14,[x1,#16] + ldp x15,x16,[x1,#32] + + ldp x5,x6,[x3] + ldp x7,x8,[x3,#16] + ldp x9,x10,[x3,#32] + + bl __mul_mont_384 + ldr x30,[x29,#8] + + stp x11,x12,[x2] + stp x13,x14,[x2,#16] + stp x15,x16,[x2,#32] + + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldr x29,[sp],#128 +.long 3573752767 + ret + + +.def __mul_mont_384; +.type 32; +.endef +.p2align 5 +__mul_mont_384: + mul x19,x11,x17 + mul x20,x12,x17 + mul x21,x13,x17 + mul x22,x14,x17 + mul x23,x15,x17 + mul x24,x16,x17 + mul x4,x4,x19 + + umulh x26,x11,x17 + umulh x27,x12,x17 + umulh x28,x13,x17 + umulh x0,x14,x17 + umulh x1,x15,x17 + umulh x3,x16,x17 + + adds x20,x20,x26 + // mul x26,x5,x4 + adcs x21,x21,x27 + mul x27,x6,x4 + adcs x22,x22,x28 + mul x28,x7,x4 + adcs x23,x23,x0 + mul x0,x8,x4 + adcs x24,x24,x1 + mul x1,x9,x4 + adc x25,xzr, x3 + mul x3,x10,x4 + mov x17,xzr + subs xzr,x19,#1 // adds x19,x19,x26 + umulh x26,x5,x4 + adcs x20,x20,x27 + umulh x27,x6,x4 + adcs x21,x21,x28 + umulh x28,x7,x4 + adcs x22,x22,x0 + umulh x0,x8,x4 + adcs x23,x23,x1 + umulh x1,x9,x4 + adcs x24,x24,x3 + umulh x3,x10,x4 + adcs x25,x25,xzr + adc x4,x17,xzr + ldr x17,[x2,8*1] + + adds x19,x20,x26 + mul x26,x11,x17 + adcs x20,x21,x27 + mul x27,x12,x17 + adcs x21,x22,x28 + mul x28,x13,x17 + adcs x22,x23,x0 + mul x0,x14,x17 + adcs x23,x24,x1 + mul x1,x15,x17 + adcs x24,x25,x3 + mul x3,x16,x17 + adc x25,x4,xzr + ldr x4,[x29,#96] + + adds x19,x19,x26 + umulh x26,x11,x17 + adcs x20,x20,x27 + umulh x27,x12,x17 + adcs x21,x21,x28 + mul x4,x4,x19 + umulh x28,x13,x17 + adcs x22,x22,x0 + umulh x0,x14,x17 + adcs x23,x23,x1 + umulh x1,x15,x17 + adcs x24,x24,x3 + umulh x3,x16,x17 + adcs x25,x25,xzr + adc x17,xzr,xzr + + adds x20,x20,x26 + // mul x26,x5,x4 + adcs x21,x21,x27 + mul x27,x6,x4 + adcs x22,x22,x28 + mul x28,x7,x4 + adcs x23,x23,x0 + mul x0,x8,x4 + adcs x24,x24,x1 + mul x1,x9,x4 + adcs x25,x25,x3 + mul x3,x10,x4 + adc x17,x17,xzr + subs xzr,x19,#1 // adds x19,x19,x26 + umulh x26,x5,x4 + adcs x20,x20,x27 + umulh x27,x6,x4 + adcs x21,x21,x28 + umulh x28,x7,x4 + adcs x22,x22,x0 + umulh x0,x8,x4 + adcs x23,x23,x1 + umulh x1,x9,x4 + adcs x24,x24,x3 + umulh x3,x10,x4 + adcs x25,x25,xzr + adc x4,x17,xzr + ldr x17,[x2,8*2] + + adds x19,x20,x26 + mul x26,x11,x17 + adcs x20,x21,x27 + mul x27,x12,x17 + adcs x21,x22,x28 + mul x28,x13,x17 + adcs x22,x23,x0 + mul x0,x14,x17 + adcs x23,x24,x1 + mul x1,x15,x17 + adcs x24,x25,x3 + mul x3,x16,x17 + adc x25,x4,xzr + ldr x4,[x29,#96] + + adds x19,x19,x26 + umulh x26,x11,x17 + adcs x20,x20,x27 + umulh x27,x12,x17 + adcs x21,x21,x28 + mul x4,x4,x19 + umulh x28,x13,x17 + adcs x22,x22,x0 + umulh x0,x14,x17 + adcs x23,x23,x1 + umulh x1,x15,x17 + adcs x24,x24,x3 + umulh x3,x16,x17 + adcs x25,x25,xzr + adc x17,xzr,xzr + + adds x20,x20,x26 + // mul x26,x5,x4 + adcs x21,x21,x27 + mul x27,x6,x4 + adcs x22,x22,x28 + mul x28,x7,x4 + adcs x23,x23,x0 + mul x0,x8,x4 + adcs x24,x24,x1 + mul x1,x9,x4 + adcs x25,x25,x3 + mul x3,x10,x4 + adc x17,x17,xzr + subs xzr,x19,#1 // adds x19,x19,x26 + umulh x26,x5,x4 + adcs x20,x20,x27 + umulh x27,x6,x4 + adcs x21,x21,x28 + umulh x28,x7,x4 + adcs x22,x22,x0 + umulh x0,x8,x4 + adcs x23,x23,x1 + umulh x1,x9,x4 + adcs x24,x24,x3 + umulh x3,x10,x4 + adcs x25,x25,xzr + adc x4,x17,xzr + ldr x17,[x2,8*3] + + adds x19,x20,x26 + mul x26,x11,x17 + adcs x20,x21,x27 + mul x27,x12,x17 + adcs x21,x22,x28 + mul x28,x13,x17 + adcs x22,x23,x0 + mul x0,x14,x17 + adcs x23,x24,x1 + mul x1,x15,x17 + adcs x24,x25,x3 + mul x3,x16,x17 + adc x25,x4,xzr + ldr x4,[x29,#96] + + adds x19,x19,x26 + umulh x26,x11,x17 + adcs x20,x20,x27 + umulh x27,x12,x17 + adcs x21,x21,x28 + mul x4,x4,x19 + umulh x28,x13,x17 + adcs x22,x22,x0 + umulh x0,x14,x17 + adcs x23,x23,x1 + umulh x1,x15,x17 + adcs x24,x24,x3 + umulh x3,x16,x17 + adcs x25,x25,xzr + adc x17,xzr,xzr + + adds x20,x20,x26 + // mul x26,x5,x4 + adcs x21,x21,x27 + mul x27,x6,x4 + adcs x22,x22,x28 + mul x28,x7,x4 + adcs x23,x23,x0 + mul x0,x8,x4 + adcs x24,x24,x1 + mul x1,x9,x4 + adcs x25,x25,x3 + mul x3,x10,x4 + adc x17,x17,xzr + subs xzr,x19,#1 // adds x19,x19,x26 + umulh x26,x5,x4 + adcs x20,x20,x27 + umulh x27,x6,x4 + adcs x21,x21,x28 + umulh x28,x7,x4 + adcs x22,x22,x0 + umulh x0,x8,x4 + adcs x23,x23,x1 + umulh x1,x9,x4 + adcs x24,x24,x3 + umulh x3,x10,x4 + adcs x25,x25,xzr + adc x4,x17,xzr + ldr x17,[x2,8*4] + + adds x19,x20,x26 + mul x26,x11,x17 + adcs x20,x21,x27 + mul x27,x12,x17 + adcs x21,x22,x28 + mul x28,x13,x17 + adcs x22,x23,x0 + mul x0,x14,x17 + adcs x23,x24,x1 + mul x1,x15,x17 + adcs x24,x25,x3 + mul x3,x16,x17 + adc x25,x4,xzr + ldr x4,[x29,#96] + + adds x19,x19,x26 + umulh x26,x11,x17 + adcs x20,x20,x27 + umulh x27,x12,x17 + adcs x21,x21,x28 + mul x4,x4,x19 + umulh x28,x13,x17 + adcs x22,x22,x0 + umulh x0,x14,x17 + adcs x23,x23,x1 + umulh x1,x15,x17 + adcs x24,x24,x3 + umulh x3,x16,x17 + adcs x25,x25,xzr + adc x17,xzr,xzr + + adds x20,x20,x26 + // mul x26,x5,x4 + adcs x21,x21,x27 + mul x27,x6,x4 + adcs x22,x22,x28 + mul x28,x7,x4 + adcs x23,x23,x0 + mul x0,x8,x4 + adcs x24,x24,x1 + mul x1,x9,x4 + adcs x25,x25,x3 + mul x3,x10,x4 + adc x17,x17,xzr + subs xzr,x19,#1 // adds x19,x19,x26 + umulh x26,x5,x4 + adcs x20,x20,x27 + umulh x27,x6,x4 + adcs x21,x21,x28 + umulh x28,x7,x4 + adcs x22,x22,x0 + umulh x0,x8,x4 + adcs x23,x23,x1 + umulh x1,x9,x4 + adcs x24,x24,x3 + umulh x3,x10,x4 + adcs x25,x25,xzr + adc x4,x17,xzr + ldr x17,[x2,8*5] + + adds x19,x20,x26 + mul x26,x11,x17 + adcs x20,x21,x27 + mul x27,x12,x17 + adcs x21,x22,x28 + mul x28,x13,x17 + adcs x22,x23,x0 + mul x0,x14,x17 + adcs x23,x24,x1 + mul x1,x15,x17 + adcs x24,x25,x3 + mul x3,x16,x17 + adc x25,x4,xzr + ldr x4,[x29,#96] + + adds x19,x19,x26 + umulh x26,x11,x17 + adcs x20,x20,x27 + umulh x27,x12,x17 + adcs x21,x21,x28 + mul x4,x4,x19 + umulh x28,x13,x17 + adcs x22,x22,x0 + umulh x0,x14,x17 + adcs x23,x23,x1 + umulh x1,x15,x17 + adcs x24,x24,x3 + umulh x3,x16,x17 + adcs x25,x25,xzr + adc x17,xzr,xzr + + adds x20,x20,x26 + // mul x26,x5,x4 + adcs x21,x21,x27 + mul x27,x6,x4 + adcs x22,x22,x28 + mul x28,x7,x4 + adcs x23,x23,x0 + mul x0,x8,x4 + adcs x24,x24,x1 + mul x1,x9,x4 + adcs x25,x25,x3 + mul x3,x10,x4 + adc x17,x17,xzr + subs xzr,x19,#1 // adds x19,x19,x26 + umulh x26,x5,x4 + adcs x20,x20,x27 + umulh x27,x6,x4 + adcs x21,x21,x28 + umulh x28,x7,x4 + adcs x22,x22,x0 + umulh x0,x8,x4 + adcs x23,x23,x1 + umulh x1,x9,x4 + adcs x24,x24,x3 + umulh x3,x10,x4 + adcs x25,x25,xzr + ldp x4,x2,[x29,#96] // pull r_ptr + adc x17,x17,xzr + + adds x19,x20,x26 + adcs x20,x21,x27 + adcs x21,x22,x28 + adcs x22,x23,x0 + adcs x23,x24,x1 + adcs x24,x25,x3 + adc x25,x17,xzr + + subs x26,x19,x5 + sbcs x27,x20,x6 + sbcs x28,x21,x7 + sbcs x0,x22,x8 + sbcs x1,x23,x9 + sbcs x3,x24,x10 + sbcs xzr, x25,xzr + + csel x11,x19,x26,lo + csel x12,x20,x27,lo + csel x13,x21,x28,lo + csel x14,x22,x0,lo + csel x15,x23,x1,lo + csel x16,x24,x3,lo + ret + + +.globl sqr_mont_384 + +.def sqr_mont_384; +.type 32; +.endef +.p2align 5 +sqr_mont_384: +.long 3573752639 + stp x29,x30,[sp,#-128]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + sub sp,sp,#96 // space for 768-bit vector + mov x4,x3 // adjust for missing b_ptr + + mov x3,x0 // save r_ptr + mov x0,sp + + ldp x11,x12,[x1] + ldp x13,x14,[x1,#16] + ldp x15,x16,[x1,#32] + + bl __sqr_384 + + ldp x5,x6,[x2] + ldp x7,x8,[x2,#16] + ldp x9,x10,[x2,#32] + + mov x1,sp + mov x0,x3 // restore r_ptr + bl __mul_by_1_mont_384 + bl __redc_tail_mont_384 + ldr x30,[x29,#8] + + add sp,sp,#96 + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldr x29,[sp],#128 +.long 3573752767 + ret + + +.globl sqr_n_mul_mont_383 + +.def sqr_n_mul_mont_383; +.type 32; +.endef +.p2align 5 +sqr_n_mul_mont_383: +.long 3573752639 + stp x29,x30,[sp,#-128]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + stp x4,x0,[sp,#96] // __mul_mont_384 wants them there + sub sp,sp,#96 // space for 768-bit vector + mov x17,x5 // save b_ptr + + ldp x11,x12,[x1] + ldp x13,x14,[x1,#16] + ldp x15,x16,[x1,#32] + mov x0,sp +.Loop_sqr_383: + bl __sqr_384 + sub x2,x2,#1 // counter + + ldp x5,x6,[x3] + ldp x7,x8,[x3,#16] + ldp x9,x10,[x3,#32] + + mov x1,sp + bl __mul_by_1_mont_384 + + ldp x19,x20,[x1,#48] + ldp x21,x22,[x1,#64] + ldp x23,x24,[x1,#80] + + adds x11,x11,x19 // just accumulate upper half + adcs x12,x12,x20 + adcs x13,x13,x21 + adcs x14,x14,x22 + adcs x15,x15,x23 + adc x16,x16,x24 + + cbnz x2,.Loop_sqr_383 + + mov x2,x17 + ldr x17,[x17] + bl __mul_mont_384 + ldr x30,[x29,#8] + + stp x11,x12,[x2] + stp x13,x14,[x2,#16] + stp x15,x16,[x2,#32] + + add sp,sp,#96 + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldr x29,[sp],#128 +.long 3573752767 + ret + +.def __sqr_384; +.type 32; +.endef +.p2align 5 +__sqr_384: + mul x19,x12,x11 + mul x20,x13,x11 + mul x21,x14,x11 + mul x22,x15,x11 + mul x23,x16,x11 + + umulh x6,x12,x11 + umulh x7,x13,x11 + umulh x8,x14,x11 + umulh x9,x15,x11 + adds x20,x20,x6 + umulh x10,x16,x11 + adcs x21,x21,x7 + mul x7,x13,x12 + adcs x22,x22,x8 + mul x8,x14,x12 + adcs x23,x23,x9 + mul x9,x15,x12 + adc x24,xzr, x10 + mul x10,x16,x12 + + adds x21,x21,x7 + umulh x7,x13,x12 + adcs x22,x22,x8 + umulh x8,x14,x12 + adcs x23,x23,x9 + umulh x9,x15,x12 + adcs x24,x24,x10 + umulh x10,x16,x12 + adc x25,xzr,xzr + + mul x5,x11,x11 + adds x22,x22,x7 + umulh x11, x11,x11 + adcs x23,x23,x8 + mul x8,x14,x13 + adcs x24,x24,x9 + mul x9,x15,x13 + adc x25,x25,x10 + mul x10,x16,x13 + + adds x23,x23,x8 + umulh x8,x14,x13 + adcs x24,x24,x9 + umulh x9,x15,x13 + adcs x25,x25,x10 + umulh x10,x16,x13 + adc x26,xzr,xzr + + mul x6,x12,x12 + adds x24,x24,x8 + umulh x12, x12,x12 + adcs x25,x25,x9 + mul x9,x15,x14 + adc x26,x26,x10 + mul x10,x16,x14 + + adds x25,x25,x9 + umulh x9,x15,x14 + adcs x26,x26,x10 + umulh x10,x16,x14 + adc x27,xzr,xzr + mul x7,x13,x13 + adds x26,x26,x9 + umulh x13, x13,x13 + adc x27,x27,x10 + mul x8,x14,x14 + + mul x10,x16,x15 + umulh x14, x14,x14 + adds x27,x27,x10 + umulh x10,x16,x15 + mul x9,x15,x15 + adc x28,x10,xzr + + adds x19,x19,x19 + adcs x20,x20,x20 + adcs x21,x21,x21 + adcs x22,x22,x22 + adcs x23,x23,x23 + adcs x24,x24,x24 + adcs x25,x25,x25 + adcs x26,x26,x26 + umulh x15, x15,x15 + adcs x27,x27,x27 + mul x10,x16,x16 + adcs x28,x28,x28 + umulh x16, x16,x16 + adc x1,xzr,xzr + + adds x19,x19,x11 + adcs x20,x20,x6 + adcs x21,x21,x12 + adcs x22,x22,x7 + adcs x23,x23,x13 + adcs x24,x24,x8 + adcs x25,x25,x14 + stp x5,x19,[x0] + adcs x26,x26,x9 + stp x20,x21,[x0,#16] + adcs x27,x27,x15 + stp x22,x23,[x0,#32] + adcs x28,x28,x10 + stp x24,x25,[x0,#48] + adc x16,x16,x1 + stp x26,x27,[x0,#64] + stp x28,x16,[x0,#80] + + ret + +.globl sqr_384 + +.def sqr_384; +.type 32; +.endef +.p2align 5 +sqr_384: +.long 3573752639 + stp x29,x30,[sp,#-128]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + + ldp x11,x12,[x1] + ldp x13,x14,[x1,#16] + ldp x15,x16,[x1,#32] + + bl __sqr_384 + ldr x30,[x29,#8] + + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldr x29,[sp],#128 +.long 3573752767 + ret + + +.globl redc_mont_384 + +.def redc_mont_384; +.type 32; +.endef +.p2align 5 +redc_mont_384: +.long 3573752639 + stp x29,x30,[sp,#-128]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + mov x4,x3 // adjust for missing b_ptr + + ldp x5,x6,[x2] + ldp x7,x8,[x2,#16] + ldp x9,x10,[x2,#32] + + bl __mul_by_1_mont_384 + bl __redc_tail_mont_384 + ldr x30,[x29,#8] + + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldr x29,[sp],#128 +.long 3573752767 + ret + + +.globl from_mont_384 + +.def from_mont_384; +.type 32; +.endef +.p2align 5 +from_mont_384: +.long 3573752639 + stp x29,x30,[sp,#-128]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + mov x4,x3 // adjust for missing b_ptr + + ldp x5,x6,[x2] + ldp x7,x8,[x2,#16] + ldp x9,x10,[x2,#32] + + bl __mul_by_1_mont_384 + ldr x30,[x29,#8] + + subs x19,x11,x5 + sbcs x20,x12,x6 + sbcs x21,x13,x7 + sbcs x22,x14,x8 + sbcs x23,x15,x9 + sbcs x24,x16,x10 + + csel x11,x11,x19,lo + csel x12,x12,x20,lo + csel x13,x13,x21,lo + csel x14,x14,x22,lo + csel x15,x15,x23,lo + csel x16,x16,x24,lo + + stp x11,x12,[x0] + stp x13,x14,[x0,#16] + stp x15,x16,[x0,#32] + + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldr x29,[sp],#128 +.long 3573752767 + ret + + +.def __mul_by_1_mont_384; +.type 32; +.endef +.p2align 5 +__mul_by_1_mont_384: + ldp x11,x12,[x1] + ldp x13,x14,[x1,#16] + mul x26,x4,x11 + ldp x15,x16,[x1,#32] + + // mul x19,x5,x26 + mul x20,x6,x26 + mul x21,x7,x26 + mul x22,x8,x26 + mul x23,x9,x26 + mul x24,x10,x26 + subs xzr,x11,#1 // adds x19,x19,x11 + umulh x11,x5,x26 + adcs x20,x20,x12 + umulh x12,x6,x26 + adcs x21,x21,x13 + umulh x13,x7,x26 + adcs x22,x22,x14 + umulh x14,x8,x26 + adcs x23,x23,x15 + umulh x15,x9,x26 + adcs x24,x24,x16 + umulh x16,x10,x26 + adc x25,xzr,xzr + adds x11,x11,x20 + adcs x12,x12,x21 + adcs x13,x13,x22 + mul x26,x4,x11 + adcs x14,x14,x23 + adcs x15,x15,x24 + adc x16,x16,x25 + + // mul x19,x5,x26 + mul x20,x6,x26 + mul x21,x7,x26 + mul x22,x8,x26 + mul x23,x9,x26 + mul x24,x10,x26 + subs xzr,x11,#1 // adds x19,x19,x11 + umulh x11,x5,x26 + adcs x20,x20,x12 + umulh x12,x6,x26 + adcs x21,x21,x13 + umulh x13,x7,x26 + adcs x22,x22,x14 + umulh x14,x8,x26 + adcs x23,x23,x15 + umulh x15,x9,x26 + adcs x24,x24,x16 + umulh x16,x10,x26 + adc x25,xzr,xzr + adds x11,x11,x20 + adcs x12,x12,x21 + adcs x13,x13,x22 + mul x26,x4,x11 + adcs x14,x14,x23 + adcs x15,x15,x24 + adc x16,x16,x25 + + // mul x19,x5,x26 + mul x20,x6,x26 + mul x21,x7,x26 + mul x22,x8,x26 + mul x23,x9,x26 + mul x24,x10,x26 + subs xzr,x11,#1 // adds x19,x19,x11 + umulh x11,x5,x26 + adcs x20,x20,x12 + umulh x12,x6,x26 + adcs x21,x21,x13 + umulh x13,x7,x26 + adcs x22,x22,x14 + umulh x14,x8,x26 + adcs x23,x23,x15 + umulh x15,x9,x26 + adcs x24,x24,x16 + umulh x16,x10,x26 + adc x25,xzr,xzr + adds x11,x11,x20 + adcs x12,x12,x21 + adcs x13,x13,x22 + mul x26,x4,x11 + adcs x14,x14,x23 + adcs x15,x15,x24 + adc x16,x16,x25 + + // mul x19,x5,x26 + mul x20,x6,x26 + mul x21,x7,x26 + mul x22,x8,x26 + mul x23,x9,x26 + mul x24,x10,x26 + subs xzr,x11,#1 // adds x19,x19,x11 + umulh x11,x5,x26 + adcs x20,x20,x12 + umulh x12,x6,x26 + adcs x21,x21,x13 + umulh x13,x7,x26 + adcs x22,x22,x14 + umulh x14,x8,x26 + adcs x23,x23,x15 + umulh x15,x9,x26 + adcs x24,x24,x16 + umulh x16,x10,x26 + adc x25,xzr,xzr + adds x11,x11,x20 + adcs x12,x12,x21 + adcs x13,x13,x22 + mul x26,x4,x11 + adcs x14,x14,x23 + adcs x15,x15,x24 + adc x16,x16,x25 + + // mul x19,x5,x26 + mul x20,x6,x26 + mul x21,x7,x26 + mul x22,x8,x26 + mul x23,x9,x26 + mul x24,x10,x26 + subs xzr,x11,#1 // adds x19,x19,x11 + umulh x11,x5,x26 + adcs x20,x20,x12 + umulh x12,x6,x26 + adcs x21,x21,x13 + umulh x13,x7,x26 + adcs x22,x22,x14 + umulh x14,x8,x26 + adcs x23,x23,x15 + umulh x15,x9,x26 + adcs x24,x24,x16 + umulh x16,x10,x26 + adc x25,xzr,xzr + adds x11,x11,x20 + adcs x12,x12,x21 + adcs x13,x13,x22 + mul x26,x4,x11 + adcs x14,x14,x23 + adcs x15,x15,x24 + adc x16,x16,x25 + + // mul x19,x5,x26 + mul x20,x6,x26 + mul x21,x7,x26 + mul x22,x8,x26 + mul x23,x9,x26 + mul x24,x10,x26 + subs xzr,x11,#1 // adds x19,x19,x11 + umulh x11,x5,x26 + adcs x20,x20,x12 + umulh x12,x6,x26 + adcs x21,x21,x13 + umulh x13,x7,x26 + adcs x22,x22,x14 + umulh x14,x8,x26 + adcs x23,x23,x15 + umulh x15,x9,x26 + adcs x24,x24,x16 + umulh x16,x10,x26 + adc x25,xzr,xzr + adds x11,x11,x20 + adcs x12,x12,x21 + adcs x13,x13,x22 + adcs x14,x14,x23 + adcs x15,x15,x24 + adc x16,x16,x25 + + ret + + +.def __redc_tail_mont_384; +.type 32; +.endef +.p2align 5 +__redc_tail_mont_384: + ldp x19,x20,[x1,#48] + ldp x21,x22,[x1,#64] + ldp x23,x24,[x1,#80] + + adds x11,x11,x19 // accumulate upper half + adcs x12,x12,x20 + adcs x13,x13,x21 + adcs x14,x14,x22 + adcs x15,x15,x23 + adcs x16,x16,x24 + adc x25,xzr,xzr + + subs x19,x11,x5 + sbcs x20,x12,x6 + sbcs x21,x13,x7 + sbcs x22,x14,x8 + sbcs x23,x15,x9 + sbcs x24,x16,x10 + sbcs xzr,x25,xzr + + csel x11,x11,x19,lo + csel x12,x12,x20,lo + csel x13,x13,x21,lo + csel x14,x14,x22,lo + csel x15,x15,x23,lo + csel x16,x16,x24,lo + + stp x11,x12,[x0] + stp x13,x14,[x0,#16] + stp x15,x16,[x0,#32] + + ret + + +.globl mul_384 + +.def mul_384; +.type 32; +.endef +.p2align 5 +mul_384: +.long 3573752639 + stp x29,x30,[sp,#-128]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + + bl __mul_384 + ldr x30,[x29,#8] + + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldr x29,[sp],#128 +.long 3573752767 + ret + + +.def __mul_384; +.type 32; +.endef +.p2align 5 +__mul_384: + ldp x11,x12,[x1] + ldr x17, [x2] + ldp x13,x14,[x1,#16] + ldp x15,x16,[x1,#32] + + mul x19,x11,x17 + mul x20,x12,x17 + mul x21,x13,x17 + mul x22,x14,x17 + mul x23,x15,x17 + mul x24,x16,x17 + + umulh x5,x11,x17 + umulh x6,x12,x17 + umulh x7,x13,x17 + umulh x8,x14,x17 + umulh x9,x15,x17 + umulh x10,x16,x17 + ldr x17,[x2,8*1] + + str x19,[x0] + adds x19,x20,x5 + mul x5,x11,x17 + adcs x20,x21,x6 + mul x6,x12,x17 + adcs x21,x22,x7 + mul x7,x13,x17 + adcs x22,x23,x8 + mul x8,x14,x17 + adcs x23,x24,x9 + mul x9,x15,x17 + adc x24,xzr, x10 + mul x10,x16,x17 + adds x19,x19,x5 + umulh x5,x11,x17 + adcs x20,x20,x6 + umulh x6,x12,x17 + adcs x21,x21,x7 + umulh x7,x13,x17 + adcs x22,x22,x8 + umulh x8,x14,x17 + adcs x23,x23,x9 + umulh x9,x15,x17 + adcs x24,x24,x10 + umulh x10,x16,x17 + ldr x17,[x2,#8*(1+1)] + adc x25,xzr,xzr + + str x19,[x0,8*1] + adds x19,x20,x5 + mul x5,x11,x17 + adcs x20,x21,x6 + mul x6,x12,x17 + adcs x21,x22,x7 + mul x7,x13,x17 + adcs x22,x23,x8 + mul x8,x14,x17 + adcs x23,x24,x9 + mul x9,x15,x17 + adc x24,x25,x10 + mul x10,x16,x17 + adds x19,x19,x5 + umulh x5,x11,x17 + adcs x20,x20,x6 + umulh x6,x12,x17 + adcs x21,x21,x7 + umulh x7,x13,x17 + adcs x22,x22,x8 + umulh x8,x14,x17 + adcs x23,x23,x9 + umulh x9,x15,x17 + adcs x24,x24,x10 + umulh x10,x16,x17 + ldr x17,[x2,#8*(2+1)] + adc x25,xzr,xzr + + str x19,[x0,8*2] + adds x19,x20,x5 + mul x5,x11,x17 + adcs x20,x21,x6 + mul x6,x12,x17 + adcs x21,x22,x7 + mul x7,x13,x17 + adcs x22,x23,x8 + mul x8,x14,x17 + adcs x23,x24,x9 + mul x9,x15,x17 + adc x24,x25,x10 + mul x10,x16,x17 + adds x19,x19,x5 + umulh x5,x11,x17 + adcs x20,x20,x6 + umulh x6,x12,x17 + adcs x21,x21,x7 + umulh x7,x13,x17 + adcs x22,x22,x8 + umulh x8,x14,x17 + adcs x23,x23,x9 + umulh x9,x15,x17 + adcs x24,x24,x10 + umulh x10,x16,x17 + ldr x17,[x2,#8*(3+1)] + adc x25,xzr,xzr + + str x19,[x0,8*3] + adds x19,x20,x5 + mul x5,x11,x17 + adcs x20,x21,x6 + mul x6,x12,x17 + adcs x21,x22,x7 + mul x7,x13,x17 + adcs x22,x23,x8 + mul x8,x14,x17 + adcs x23,x24,x9 + mul x9,x15,x17 + adc x24,x25,x10 + mul x10,x16,x17 + adds x19,x19,x5 + umulh x5,x11,x17 + adcs x20,x20,x6 + umulh x6,x12,x17 + adcs x21,x21,x7 + umulh x7,x13,x17 + adcs x22,x22,x8 + umulh x8,x14,x17 + adcs x23,x23,x9 + umulh x9,x15,x17 + adcs x24,x24,x10 + umulh x10,x16,x17 + ldr x17,[x2,#8*(4+1)] + adc x25,xzr,xzr + + str x19,[x0,8*4] + adds x19,x20,x5 + mul x5,x11,x17 + adcs x20,x21,x6 + mul x6,x12,x17 + adcs x21,x22,x7 + mul x7,x13,x17 + adcs x22,x23,x8 + mul x8,x14,x17 + adcs x23,x24,x9 + mul x9,x15,x17 + adc x24,x25,x10 + mul x10,x16,x17 + adds x19,x19,x5 + umulh x5,x11,x17 + adcs x20,x20,x6 + umulh x6,x12,x17 + adcs x21,x21,x7 + umulh x7,x13,x17 + adcs x22,x22,x8 + umulh x8,x14,x17 + adcs x23,x23,x9 + umulh x9,x15,x17 + adcs x24,x24,x10 + umulh x10,x16,x17 + adc x25,xzr,xzr + + str x19,[x0,8*5] + adds x19,x20,x5 + adcs x20,x21,x6 + adcs x21,x22,x7 + adcs x22,x23,x8 + adcs x23,x24,x9 + adc x24,x25,x10 + + stp x19,x20,[x0,#48] + stp x21,x22,[x0,#64] + stp x23,x24,[x0,#80] + + ret + + +.globl mul_382x + +.def mul_382x; +.type 32; +.endef +.p2align 5 +mul_382x: +.long 3573752639 + stp x29,x30,[sp,#-128]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + sub sp,sp,#96 // space for two 384-bit vectors + + ldp x11,x12,[x1] + mov x26,x0 // save r_ptr + ldp x19,x20,[x1,#48] + mov x27,x1 // save a_ptr + ldp x13,x14,[x1,#16] + mov x28,x2 // save b_ptr + ldp x21,x22,[x1,#64] + ldp x15,x16,[x1,#32] + adds x5,x11,x19 // t0 = a->re + a->im + ldp x23,x24,[x1,#80] + adcs x6,x12,x20 + ldp x11,x12,[x2] + adcs x7,x13,x21 + ldp x19,x20,[x2,#48] + adcs x8,x14,x22 + ldp x13,x14,[x2,#16] + adcs x9,x15,x23 + ldp x21,x22,[x2,#64] + adc x10,x16,x24 + ldp x15,x16,[x2,#32] + + stp x5,x6,[sp] + adds x5,x11,x19 // t1 = b->re + b->im + ldp x23,x24,[x2,#80] + adcs x6,x12,x20 + stp x7,x8,[sp,#16] + adcs x7,x13,x21 + adcs x8,x14,x22 + stp x9,x10,[sp,#32] + adcs x9,x15,x23 + stp x5,x6,[sp,#48] + adc x10,x16,x24 + stp x7,x8,[sp,#64] + stp x9,x10,[sp,#80] + + bl __mul_384 // mul_384(ret->re, a->re, b->re) + + add x1,sp,#0 // mul_384(ret->im, t0, t1) + add x2,sp,#48 + add x0,x26,#96 + bl __mul_384 + + add x1,x27,#48 // mul_384(tx, a->im, b->im) + add x2,x28,#48 + add x0,sp,#0 + bl __mul_384 + + ldp x5,x6,[x3] + ldp x7,x8,[x3,#16] + ldp x9,x10,[x3,#32] + + add x1,x26,#96 // ret->im -= tx + add x2,sp,#0 + add x0,x26,#96 + bl __sub_mod_384x384 + + add x2,x26,#0 // ret->im -= ret->re + bl __sub_mod_384x384 + + add x1,x26,#0 // ret->re -= tx + add x2,sp,#0 + add x0,x26,#0 + bl __sub_mod_384x384 + ldr x30,[x29,#8] + + add sp,sp,#96 + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldr x29,[sp],#128 +.long 3573752767 + ret + + +.globl sqr_382x + +.def sqr_382x; +.type 32; +.endef +.p2align 5 +sqr_382x: +.long 3573752639 + stp x29,x30,[sp,#-128]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + + ldp x11,x12,[x1] + ldp x19,x20,[x1,#48] + ldp x13,x14,[x1,#16] + adds x5,x11,x19 // t0 = a->re + a->im + ldp x21,x22,[x1,#64] + adcs x6,x12,x20 + ldp x15,x16,[x1,#32] + adcs x7,x13,x21 + ldp x23,x24,[x1,#80] + adcs x8,x14,x22 + stp x5,x6,[x0] + adcs x9,x15,x23 + ldp x5,x6,[x2] + adc x10,x16,x24 + stp x7,x8,[x0,#16] + + subs x11,x11,x19 // t1 = a->re - a->im + ldp x7,x8,[x2,#16] + sbcs x12,x12,x20 + stp x9,x10,[x0,#32] + sbcs x13,x13,x21 + ldp x9,x10,[x2,#32] + sbcs x14,x14,x22 + sbcs x15,x15,x23 + sbcs x16,x16,x24 + sbc x25,xzr,xzr + + and x19,x5,x25 + and x20,x6,x25 + adds x11,x11,x19 + and x21,x7,x25 + adcs x12,x12,x20 + and x22,x8,x25 + adcs x13,x13,x21 + and x23,x9,x25 + adcs x14,x14,x22 + and x24,x10,x25 + adcs x15,x15,x23 + stp x11,x12,[x0,#48] + adc x16,x16,x24 + stp x13,x14,[x0,#64] + stp x15,x16,[x0,#80] + + mov x4,x1 // save a_ptr + add x1,x0,#0 // mul_384(ret->re, t0, t1) + add x2,x0,#48 + bl __mul_384 + + add x1,x4,#0 // mul_384(ret->im, a->re, a->im) + add x2,x4,#48 + add x0,x0,#96 + bl __mul_384 + ldr x30,[x29,#8] + + ldp x11,x12,[x0] + ldp x13,x14,[x0,#16] + adds x11,x11,x11 // add with itself + ldp x15,x16,[x0,#32] + adcs x12,x12,x12 + adcs x13,x13,x13 + adcs x14,x14,x14 + adcs x15,x15,x15 + adcs x16,x16,x16 + adcs x19,x19,x19 + adcs x20,x20,x20 + stp x11,x12,[x0] + adcs x21,x21,x21 + stp x13,x14,[x0,#16] + adcs x22,x22,x22 + stp x15,x16,[x0,#32] + adcs x23,x23,x23 + stp x19,x20,[x0,#48] + adc x24,x24,x24 + stp x21,x22,[x0,#64] + stp x23,x24,[x0,#80] + + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldr x29,[sp],#128 +.long 3573752767 + ret + + +.globl sqr_mont_382x + +.def sqr_mont_382x; +.type 32; +.endef +.p2align 5 +sqr_mont_382x: +.long 3573752639 + stp x29,x30,[sp,#-128]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + stp x3,x0,[sp,#96] // __mul_mont_384 wants them there + sub sp,sp,#112 // space for two 384-bit vectors + word + mov x4,x3 // adjust for missing b_ptr + + ldp x11,x12,[x1] + ldp x13,x14,[x1,#16] + ldp x15,x16,[x1,#32] + + ldp x17,x20,[x1,#48] + ldp x21,x22,[x1,#64] + ldp x23,x24,[x1,#80] + + adds x5,x11,x17 // t0 = a->re + a->im + adcs x6,x12,x20 + adcs x7,x13,x21 + adcs x8,x14,x22 + adcs x9,x15,x23 + adc x10,x16,x24 + + subs x19,x11,x17 // t1 = a->re - a->im + sbcs x20,x12,x20 + sbcs x21,x13,x21 + sbcs x22,x14,x22 + sbcs x23,x15,x23 + sbcs x24,x16,x24 + sbc x25,xzr,xzr // borrow flag as mask + + stp x5,x6,[sp] + stp x7,x8,[sp,#16] + stp x9,x10,[sp,#32] + stp x19,x20,[sp,#48] + stp x21,x22,[sp,#64] + stp x23,x24,[sp,#80] + str x25,[sp,#96] + + ldp x5,x6,[x2] + ldp x7,x8,[x2,#16] + ldp x9,x10,[x2,#32] + + add x2,x1,#48 + bl __mul_mont_383_nonred // mul_mont_384(ret->im, a->re, a->im) + + adds x19,x11,x11 // add with itself + adcs x20,x12,x12 + adcs x21,x13,x13 + adcs x22,x14,x14 + adcs x23,x15,x15 + adc x24,x16,x16 + + stp x19,x20,[x2,#48] + stp x21,x22,[x2,#64] + stp x23,x24,[x2,#80] + + ldp x11,x12,[sp] + ldr x17,[sp,#48] + ldp x13,x14,[sp,#16] + ldp x15,x16,[sp,#32] + + add x2,sp,#48 + bl __mul_mont_383_nonred // mul_mont_384(ret->im, t0, t1) + ldr x30,[x29,#8] + + ldr x25,[sp,#96] // account for sign from a->re - a->im + ldp x19,x20,[sp] + ldp x21,x22,[sp,#16] + ldp x23,x24,[sp,#32] + + and x19,x19,x25 + and x20,x20,x25 + and x21,x21,x25 + and x22,x22,x25 + and x23,x23,x25 + and x24,x24,x25 + + subs x11,x11,x19 + sbcs x12,x12,x20 + sbcs x13,x13,x21 + sbcs x14,x14,x22 + sbcs x15,x15,x23 + sbcs x16,x16,x24 + sbc x25,xzr,xzr + + and x19,x5,x25 + and x20,x6,x25 + and x21,x7,x25 + and x22,x8,x25 + and x23,x9,x25 + and x24,x10,x25 + + adds x11,x11,x19 + adcs x12,x12,x20 + adcs x13,x13,x21 + adcs x14,x14,x22 + adcs x15,x15,x23 + adc x16,x16,x24 + + stp x11,x12,[x2] + stp x13,x14,[x2,#16] + stp x15,x16,[x2,#32] + + add sp,sp,#112 + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldr x29,[sp],#128 +.long 3573752767 + ret + + +.def __mul_mont_383_nonred; +.type 32; +.endef +.p2align 5 +__mul_mont_383_nonred: + mul x19,x11,x17 + mul x20,x12,x17 + mul x21,x13,x17 + mul x22,x14,x17 + mul x23,x15,x17 + mul x24,x16,x17 + mul x4,x4,x19 + + umulh x26,x11,x17 + umulh x27,x12,x17 + umulh x28,x13,x17 + umulh x0,x14,x17 + umulh x1,x15,x17 + umulh x3,x16,x17 + + adds x20,x20,x26 + mul x26,x5,x4 + adcs x21,x21,x27 + mul x27,x6,x4 + adcs x22,x22,x28 + mul x28,x7,x4 + adcs x23,x23,x0 + mul x0,x8,x4 + adcs x24,x24,x1 + mul x1,x9,x4 + adc x25,xzr, x3 + mul x3,x10,x4 + ldr x17,[x2,8*1] + adds x19,x19,x26 + umulh x26,x5,x4 + adcs x20,x20,x27 + umulh x27,x6,x4 + adcs x21,x21,x28 + umulh x28,x7,x4 + adcs x22,x22,x0 + umulh x0,x8,x4 + adcs x23,x23,x1 + umulh x1,x9,x4 + adcs x24,x24,x3 + umulh x3,x10,x4 + adc x25,x25,xzr + + ldr x4,[x29,#96] + adds x19,x20,x26 + mul x26,x11,x17 + adcs x20,x21,x27 + mul x27,x12,x17 + adcs x21,x22,x28 + mul x28,x13,x17 + adcs x22,x23,x0 + mul x0,x14,x17 + adcs x23,x24,x1 + mul x1,x15,x17 + adcs x24,x25,x3 + mul x3,x16,x17 + adc x25,xzr,xzr + + adds x19,x19,x26 + umulh x26,x11,x17 + adcs x20,x20,x27 + umulh x27,x12,x17 + adcs x21,x21,x28 + mul x4,x4,x19 + umulh x28,x13,x17 + adcs x22,x22,x0 + umulh x0,x14,x17 + adcs x23,x23,x1 + umulh x1,x15,x17 + adcs x24,x24,x3 + umulh x3,x16,x17 + adc x25,x25,xzr + + adds x20,x20,x26 + mul x26,x5,x4 + adcs x21,x21,x27 + mul x27,x6,x4 + adcs x22,x22,x28 + mul x28,x7,x4 + adcs x23,x23,x0 + mul x0,x8,x4 + adcs x24,x24,x1 + mul x1,x9,x4 + adc x25,x25,x3 + mul x3,x10,x4 + ldr x17,[x2,8*2] + adds x19,x19,x26 + umulh x26,x5,x4 + adcs x20,x20,x27 + umulh x27,x6,x4 + adcs x21,x21,x28 + umulh x28,x7,x4 + adcs x22,x22,x0 + umulh x0,x8,x4 + adcs x23,x23,x1 + umulh x1,x9,x4 + adcs x24,x24,x3 + umulh x3,x10,x4 + adc x25,x25,xzr + + ldr x4,[x29,#96] + adds x19,x20,x26 + mul x26,x11,x17 + adcs x20,x21,x27 + mul x27,x12,x17 + adcs x21,x22,x28 + mul x28,x13,x17 + adcs x22,x23,x0 + mul x0,x14,x17 + adcs x23,x24,x1 + mul x1,x15,x17 + adcs x24,x25,x3 + mul x3,x16,x17 + adc x25,xzr,xzr + + adds x19,x19,x26 + umulh x26,x11,x17 + adcs x20,x20,x27 + umulh x27,x12,x17 + adcs x21,x21,x28 + mul x4,x4,x19 + umulh x28,x13,x17 + adcs x22,x22,x0 + umulh x0,x14,x17 + adcs x23,x23,x1 + umulh x1,x15,x17 + adcs x24,x24,x3 + umulh x3,x16,x17 + adc x25,x25,xzr + + adds x20,x20,x26 + mul x26,x5,x4 + adcs x21,x21,x27 + mul x27,x6,x4 + adcs x22,x22,x28 + mul x28,x7,x4 + adcs x23,x23,x0 + mul x0,x8,x4 + adcs x24,x24,x1 + mul x1,x9,x4 + adc x25,x25,x3 + mul x3,x10,x4 + ldr x17,[x2,8*3] + adds x19,x19,x26 + umulh x26,x5,x4 + adcs x20,x20,x27 + umulh x27,x6,x4 + adcs x21,x21,x28 + umulh x28,x7,x4 + adcs x22,x22,x0 + umulh x0,x8,x4 + adcs x23,x23,x1 + umulh x1,x9,x4 + adcs x24,x24,x3 + umulh x3,x10,x4 + adc x25,x25,xzr + + ldr x4,[x29,#96] + adds x19,x20,x26 + mul x26,x11,x17 + adcs x20,x21,x27 + mul x27,x12,x17 + adcs x21,x22,x28 + mul x28,x13,x17 + adcs x22,x23,x0 + mul x0,x14,x17 + adcs x23,x24,x1 + mul x1,x15,x17 + adcs x24,x25,x3 + mul x3,x16,x17 + adc x25,xzr,xzr + + adds x19,x19,x26 + umulh x26,x11,x17 + adcs x20,x20,x27 + umulh x27,x12,x17 + adcs x21,x21,x28 + mul x4,x4,x19 + umulh x28,x13,x17 + adcs x22,x22,x0 + umulh x0,x14,x17 + adcs x23,x23,x1 + umulh x1,x15,x17 + adcs x24,x24,x3 + umulh x3,x16,x17 + adc x25,x25,xzr + + adds x20,x20,x26 + mul x26,x5,x4 + adcs x21,x21,x27 + mul x27,x6,x4 + adcs x22,x22,x28 + mul x28,x7,x4 + adcs x23,x23,x0 + mul x0,x8,x4 + adcs x24,x24,x1 + mul x1,x9,x4 + adc x25,x25,x3 + mul x3,x10,x4 + ldr x17,[x2,8*4] + adds x19,x19,x26 + umulh x26,x5,x4 + adcs x20,x20,x27 + umulh x27,x6,x4 + adcs x21,x21,x28 + umulh x28,x7,x4 + adcs x22,x22,x0 + umulh x0,x8,x4 + adcs x23,x23,x1 + umulh x1,x9,x4 + adcs x24,x24,x3 + umulh x3,x10,x4 + adc x25,x25,xzr + + ldr x4,[x29,#96] + adds x19,x20,x26 + mul x26,x11,x17 + adcs x20,x21,x27 + mul x27,x12,x17 + adcs x21,x22,x28 + mul x28,x13,x17 + adcs x22,x23,x0 + mul x0,x14,x17 + adcs x23,x24,x1 + mul x1,x15,x17 + adcs x24,x25,x3 + mul x3,x16,x17 + adc x25,xzr,xzr + + adds x19,x19,x26 + umulh x26,x11,x17 + adcs x20,x20,x27 + umulh x27,x12,x17 + adcs x21,x21,x28 + mul x4,x4,x19 + umulh x28,x13,x17 + adcs x22,x22,x0 + umulh x0,x14,x17 + adcs x23,x23,x1 + umulh x1,x15,x17 + adcs x24,x24,x3 + umulh x3,x16,x17 + adc x25,x25,xzr + + adds x20,x20,x26 + mul x26,x5,x4 + adcs x21,x21,x27 + mul x27,x6,x4 + adcs x22,x22,x28 + mul x28,x7,x4 + adcs x23,x23,x0 + mul x0,x8,x4 + adcs x24,x24,x1 + mul x1,x9,x4 + adc x25,x25,x3 + mul x3,x10,x4 + ldr x17,[x2,8*5] + adds x19,x19,x26 + umulh x26,x5,x4 + adcs x20,x20,x27 + umulh x27,x6,x4 + adcs x21,x21,x28 + umulh x28,x7,x4 + adcs x22,x22,x0 + umulh x0,x8,x4 + adcs x23,x23,x1 + umulh x1,x9,x4 + adcs x24,x24,x3 + umulh x3,x10,x4 + adc x25,x25,xzr + + ldr x4,[x29,#96] + adds x19,x20,x26 + mul x26,x11,x17 + adcs x20,x21,x27 + mul x27,x12,x17 + adcs x21,x22,x28 + mul x28,x13,x17 + adcs x22,x23,x0 + mul x0,x14,x17 + adcs x23,x24,x1 + mul x1,x15,x17 + adcs x24,x25,x3 + mul x3,x16,x17 + adc x25,xzr,xzr + + adds x19,x19,x26 + umulh x26,x11,x17 + adcs x20,x20,x27 + umulh x27,x12,x17 + adcs x21,x21,x28 + mul x4,x4,x19 + umulh x28,x13,x17 + adcs x22,x22,x0 + umulh x0,x14,x17 + adcs x23,x23,x1 + umulh x1,x15,x17 + adcs x24,x24,x3 + umulh x3,x16,x17 + adc x25,x25,xzr + + adds x20,x20,x26 + mul x26,x5,x4 + adcs x21,x21,x27 + mul x27,x6,x4 + adcs x22,x22,x28 + mul x28,x7,x4 + adcs x23,x23,x0 + mul x0,x8,x4 + adcs x24,x24,x1 + mul x1,x9,x4 + adc x25,x25,x3 + mul x3,x10,x4 + adds x19,x19,x26 + umulh x26,x5,x4 + adcs x20,x20,x27 + umulh x27,x6,x4 + adcs x21,x21,x28 + umulh x28,x7,x4 + adcs x22,x22,x0 + umulh x0,x8,x4 + adcs x23,x23,x1 + umulh x1,x9,x4 + adcs x24,x24,x3 + umulh x3,x10,x4 + adc x25,x25,xzr + ldp x4,x2,[x29,#96] // pull r_ptr + + adds x11,x20,x26 + adcs x12,x21,x27 + adcs x13,x22,x28 + adcs x14,x23,x0 + adcs x15,x24,x1 + adcs x16,x25,x3 + + ret + + +.globl sgn0_pty_mont_384 + +.def sgn0_pty_mont_384; +.type 32; +.endef +.p2align 5 +sgn0_pty_mont_384: +.long 3573752639 + stp x29,x30,[sp,#-128]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + + mov x4,x2 + ldp x5,x6,[x1] + ldp x7,x8,[x1,#16] + ldp x9,x10,[x1,#32] + mov x1,x0 + + bl __mul_by_1_mont_384 + ldr x30,[x29,#8] + + and x0,x11,#1 + adds x11,x11,x11 + adcs x12,x12,x12 + adcs x13,x13,x13 + adcs x14,x14,x14 + adcs x15,x15,x15 + adcs x16,x16,x16 + adc x17,xzr,xzr + + subs x11,x11,x5 + sbcs x12,x12,x6 + sbcs x13,x13,x7 + sbcs x14,x14,x8 + sbcs x15,x15,x9 + sbcs x16,x16,x10 + sbc x17,x17,xzr + + mvn x17,x17 + and x17,x17,#2 + orr x0,x0,x17 + + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldr x29,[sp],#128 +.long 3573752767 + ret + + +.globl sgn0_pty_mont_384x + +.def sgn0_pty_mont_384x; +.type 32; +.endef +.p2align 5 +sgn0_pty_mont_384x: +.long 3573752639 + stp x29,x30,[sp,#-128]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + + mov x4,x2 + ldp x5,x6,[x1] + ldp x7,x8,[x1,#16] + ldp x9,x10,[x1,#32] + mov x1,x0 + + bl __mul_by_1_mont_384 + add x1,x1,#48 + + and x2,x11,#1 + orr x3,x11,x12 + adds x11,x11,x11 + orr x3,x3,x13 + adcs x12,x12,x12 + orr x3,x3,x14 + adcs x13,x13,x13 + orr x3,x3,x15 + adcs x14,x14,x14 + orr x3,x3,x16 + adcs x15,x15,x15 + adcs x16,x16,x16 + adc x17,xzr,xzr + + subs x11,x11,x5 + sbcs x12,x12,x6 + sbcs x13,x13,x7 + sbcs x14,x14,x8 + sbcs x15,x15,x9 + sbcs x16,x16,x10 + sbc x17,x17,xzr + + mvn x17,x17 + and x17,x17,#2 + orr x2,x2,x17 + + bl __mul_by_1_mont_384 + ldr x30,[x29,#8] + + and x0,x11,#1 + orr x1,x11,x12 + adds x11,x11,x11 + orr x1,x1,x13 + adcs x12,x12,x12 + orr x1,x1,x14 + adcs x13,x13,x13 + orr x1,x1,x15 + adcs x14,x14,x14 + orr x1,x1,x16 + adcs x15,x15,x15 + adcs x16,x16,x16 + adc x17,xzr,xzr + + subs x11,x11,x5 + sbcs x12,x12,x6 + sbcs x13,x13,x7 + sbcs x14,x14,x8 + sbcs x15,x15,x9 + sbcs x16,x16,x10 + sbc x17,x17,xzr + + mvn x17,x17 + and x17,x17,#2 + orr x0,x0,x17 + + cmp x3,#0 + csel x3,x0,x2,eq // a->re==0? prty(a->im) : prty(a->re) + + cmp x1,#0 + csel x1,x0,x2,ne // a->im!=0? sgn0(a->im) : sgn0(a->re) + + and x3,x3,#1 + and x1,x1,#2 + orr x0,x1,x3 // pack sign and parity + + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldr x29,[sp],#128 +.long 3573752767 + ret + diff --git a/crypto/blst_src/build/coff/mulq_mont_256-x86_64.s b/crypto/blst_src/build/coff/mulq_mont_256-x86_64.s new file mode 100644 index 00000000000..2dd30bc5b5d --- /dev/null +++ b/crypto/blst_src/build/coff/mulq_mont_256-x86_64.s @@ -0,0 +1,897 @@ +.comm __blst_platform_cap,4 +.text + +.globl mul_mont_sparse_256 + +.def mul_mont_sparse_256; .scl 2; .type 32; .endef +.p2align 5 +mul_mont_sparse_256: + .byte 0xf3,0x0f,0x1e,0xfa + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + movq %rsp,%r11 +.LSEH_begin_mul_mont_sparse_256: + + + movq %rcx,%rdi + movq %rdx,%rsi + movq %r8,%rdx + movq %r9,%rcx + movq 40(%rsp),%r8 +#ifdef __BLST_PORTABLE__ + testl $1,__blst_platform_cap(%rip) + jnz mul_mont_sparse_256$1 +#endif + pushq %rbp + + pushq %rbx + + pushq %r12 + + pushq %r13 + + pushq %r14 + + pushq %r15 + + pushq %rdi + +.LSEH_body_mul_mont_sparse_256: + + + movq 0(%rdx),%rax + movq 0(%rsi),%r13 + movq 8(%rsi),%r14 + movq 16(%rsi),%r12 + movq 24(%rsi),%rbp + movq %rdx,%rbx + + movq %rax,%r15 + mulq %r13 + movq %rax,%r9 + movq %r15,%rax + movq %rdx,%r10 + call __mulq_mont_sparse_256 + + movq 8(%rsp),%r15 + + movq 16(%rsp),%r14 + + movq 24(%rsp),%r13 + + movq 32(%rsp),%r12 + + movq 40(%rsp),%rbx + + movq 48(%rsp),%rbp + + leaq 56(%rsp),%rsp + +.LSEH_epilogue_mul_mont_sparse_256: + mov 8(%rsp),%rdi + mov 16(%rsp),%rsi + + .byte 0xf3,0xc3 + +.LSEH_end_mul_mont_sparse_256: + +.globl sqr_mont_sparse_256 + +.def sqr_mont_sparse_256; .scl 2; .type 32; .endef +.p2align 5 +sqr_mont_sparse_256: + .byte 0xf3,0x0f,0x1e,0xfa + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + movq %rsp,%r11 +.LSEH_begin_sqr_mont_sparse_256: + + + movq %rcx,%rdi + movq %rdx,%rsi + movq %r8,%rdx + movq %r9,%rcx +#ifdef __BLST_PORTABLE__ + testl $1,__blst_platform_cap(%rip) + jnz sqr_mont_sparse_256$1 +#endif + pushq %rbp + + pushq %rbx + + pushq %r12 + + pushq %r13 + + pushq %r14 + + pushq %r15 + + pushq %rdi + +.LSEH_body_sqr_mont_sparse_256: + + + movq 0(%rsi),%rax + movq %rcx,%r8 + movq 8(%rsi),%r14 + movq %rdx,%rcx + movq 16(%rsi),%r12 + leaq (%rsi),%rbx + movq 24(%rsi),%rbp + + movq %rax,%r15 + mulq %rax + movq %rax,%r9 + movq %r15,%rax + movq %rdx,%r10 + call __mulq_mont_sparse_256 + + movq 8(%rsp),%r15 + + movq 16(%rsp),%r14 + + movq 24(%rsp),%r13 + + movq 32(%rsp),%r12 + + movq 40(%rsp),%rbx + + movq 48(%rsp),%rbp + + leaq 56(%rsp),%rsp + +.LSEH_epilogue_sqr_mont_sparse_256: + mov 8(%rsp),%rdi + mov 16(%rsp),%rsi + + .byte 0xf3,0xc3 + +.LSEH_end_sqr_mont_sparse_256: +.def __mulq_mont_sparse_256; .scl 3; .type 32; .endef +.p2align 5 +__mulq_mont_sparse_256: + .byte 0xf3,0x0f,0x1e,0xfa + + mulq %r14 + addq %rax,%r10 + movq %r15,%rax + adcq $0,%rdx + movq %rdx,%r11 + + mulq %r12 + addq %rax,%r11 + movq %r15,%rax + adcq $0,%rdx + movq %rdx,%r12 + + mulq %rbp + addq %rax,%r12 + movq 8(%rbx),%rax + adcq $0,%rdx + xorq %r14,%r14 + movq %rdx,%r13 + + movq %r9,%rdi + imulq %r8,%r9 + + + movq %rax,%r15 + mulq 0(%rsi) + addq %rax,%r10 + movq %r15,%rax + adcq $0,%rdx + movq %rdx,%rbp + + mulq 8(%rsi) + addq %rax,%r11 + movq %r15,%rax + adcq $0,%rdx + addq %rbp,%r11 + adcq $0,%rdx + movq %rdx,%rbp + + mulq 16(%rsi) + addq %rax,%r12 + movq %r15,%rax + adcq $0,%rdx + addq %rbp,%r12 + adcq $0,%rdx + movq %rdx,%rbp + + mulq 24(%rsi) + addq %rax,%r13 + movq %r9,%rax + adcq $0,%rdx + addq %rbp,%r13 + adcq %rdx,%r14 + xorq %r15,%r15 + + + mulq 0(%rcx) + addq %rax,%rdi + movq %r9,%rax + adcq %rdx,%rdi + + mulq 8(%rcx) + addq %rax,%r10 + movq %r9,%rax + adcq $0,%rdx + addq %rdi,%r10 + adcq $0,%rdx + movq %rdx,%rbp + + mulq 16(%rcx) + addq %rax,%r11 + movq %r9,%rax + adcq $0,%rdx + addq %rbp,%r11 + adcq $0,%rdx + movq %rdx,%rbp + + mulq 24(%rcx) + addq %rax,%r12 + movq 16(%rbx),%rax + adcq $0,%rdx + addq %rbp,%r12 + adcq $0,%rdx + addq %rdx,%r13 + adcq $0,%r14 + adcq $0,%r15 + movq %r10,%rdi + imulq %r8,%r10 + + + movq %rax,%r9 + mulq 0(%rsi) + addq %rax,%r11 + movq %r9,%rax + adcq $0,%rdx + movq %rdx,%rbp + + mulq 8(%rsi) + addq %rax,%r12 + movq %r9,%rax + adcq $0,%rdx + addq %rbp,%r12 + adcq $0,%rdx + movq %rdx,%rbp + + mulq 16(%rsi) + addq %rax,%r13 + movq %r9,%rax + adcq $0,%rdx + addq %rbp,%r13 + adcq $0,%rdx + movq %rdx,%rbp + + mulq 24(%rsi) + addq %rax,%r14 + movq %r10,%rax + adcq $0,%rdx + addq %rbp,%r14 + adcq %rdx,%r15 + xorq %r9,%r9 + + + mulq 0(%rcx) + addq %rax,%rdi + movq %r10,%rax + adcq %rdx,%rdi + + mulq 8(%rcx) + addq %rax,%r11 + movq %r10,%rax + adcq $0,%rdx + addq %rdi,%r11 + adcq $0,%rdx + movq %rdx,%rbp + + mulq 16(%rcx) + addq %rax,%r12 + movq %r10,%rax + adcq $0,%rdx + addq %rbp,%r12 + adcq $0,%rdx + movq %rdx,%rbp + + mulq 24(%rcx) + addq %rax,%r13 + movq 24(%rbx),%rax + adcq $0,%rdx + addq %rbp,%r13 + adcq $0,%rdx + addq %rdx,%r14 + adcq $0,%r15 + adcq $0,%r9 + movq %r11,%rdi + imulq %r8,%r11 + + + movq %rax,%r10 + mulq 0(%rsi) + addq %rax,%r12 + movq %r10,%rax + adcq $0,%rdx + movq %rdx,%rbp + + mulq 8(%rsi) + addq %rax,%r13 + movq %r10,%rax + adcq $0,%rdx + addq %rbp,%r13 + adcq $0,%rdx + movq %rdx,%rbp + + mulq 16(%rsi) + addq %rax,%r14 + movq %r10,%rax + adcq $0,%rdx + addq %rbp,%r14 + adcq $0,%rdx + movq %rdx,%rbp + + mulq 24(%rsi) + addq %rax,%r15 + movq %r11,%rax + adcq $0,%rdx + addq %rbp,%r15 + adcq %rdx,%r9 + xorq %r10,%r10 + + + mulq 0(%rcx) + addq %rax,%rdi + movq %r11,%rax + adcq %rdx,%rdi + + mulq 8(%rcx) + addq %rax,%r12 + movq %r11,%rax + adcq $0,%rdx + addq %rdi,%r12 + adcq $0,%rdx + movq %rdx,%rbp + + mulq 16(%rcx) + addq %rax,%r13 + movq %r11,%rax + adcq $0,%rdx + addq %rbp,%r13 + adcq $0,%rdx + movq %rdx,%rbp + + mulq 24(%rcx) + addq %rax,%r14 + movq %r12,%rax + adcq $0,%rdx + addq %rbp,%r14 + adcq $0,%rdx + addq %rdx,%r15 + adcq $0,%r9 + adcq $0,%r10 + imulq %r8,%rax + movq 8(%rsp),%rsi + + + movq %rax,%r11 + mulq 0(%rcx) + addq %rax,%r12 + movq %r11,%rax + adcq %rdx,%r12 + + mulq 8(%rcx) + addq %rax,%r13 + movq %r11,%rax + adcq $0,%rdx + addq %r12,%r13 + adcq $0,%rdx + movq %rdx,%rbp + + mulq 16(%rcx) + addq %rax,%r14 + movq %r11,%rax + adcq $0,%rdx + addq %rbp,%r14 + adcq $0,%rdx + movq %rdx,%rbp + + mulq 24(%rcx) + movq %r14,%rbx + addq %rbp,%r15 + adcq $0,%rdx + addq %rax,%r15 + movq %r13,%rax + adcq $0,%rdx + addq %rdx,%r9 + adcq $0,%r10 + + + + + movq %r15,%r12 + subq 0(%rcx),%r13 + sbbq 8(%rcx),%r14 + sbbq 16(%rcx),%r15 + movq %r9,%rbp + sbbq 24(%rcx),%r9 + sbbq $0,%r10 + + cmovcq %rax,%r13 + cmovcq %rbx,%r14 + cmovcq %r12,%r15 + movq %r13,0(%rsi) + cmovcq %rbp,%r9 + movq %r14,8(%rsi) + movq %r15,16(%rsi) + movq %r9,24(%rsi) + + .byte 0xf3,0xc3 + + +.globl from_mont_256 + +.def from_mont_256; .scl 2; .type 32; .endef +.p2align 5 +from_mont_256: + .byte 0xf3,0x0f,0x1e,0xfa + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + movq %rsp,%r11 +.LSEH_begin_from_mont_256: + + + movq %rcx,%rdi + movq %rdx,%rsi + movq %r8,%rdx + movq %r9,%rcx +#ifdef __BLST_PORTABLE__ + testl $1,__blst_platform_cap(%rip) + jnz from_mont_256$1 +#endif + pushq %rbp + + pushq %rbx + + pushq %r12 + + pushq %r13 + + pushq %r14 + + pushq %r15 + + subq $8,%rsp + +.LSEH_body_from_mont_256: + + + movq %rdx,%rbx + call __mulq_by_1_mont_256 + + + + + + movq %r14,%r10 + movq %r15,%r11 + movq %r9,%r12 + + subq 0(%rbx),%r13 + sbbq 8(%rbx),%r14 + sbbq 16(%rbx),%r15 + sbbq 24(%rbx),%r9 + + cmovncq %r13,%rax + cmovncq %r14,%r10 + cmovncq %r15,%r11 + movq %rax,0(%rdi) + cmovncq %r9,%r12 + movq %r10,8(%rdi) + movq %r11,16(%rdi) + movq %r12,24(%rdi) + + movq 8(%rsp),%r15 + + movq 16(%rsp),%r14 + + movq 24(%rsp),%r13 + + movq 32(%rsp),%r12 + + movq 40(%rsp),%rbx + + movq 48(%rsp),%rbp + + leaq 56(%rsp),%rsp + +.LSEH_epilogue_from_mont_256: + mov 8(%rsp),%rdi + mov 16(%rsp),%rsi + + .byte 0xf3,0xc3 + +.LSEH_end_from_mont_256: + +.globl redc_mont_256 + +.def redc_mont_256; .scl 2; .type 32; .endef +.p2align 5 +redc_mont_256: + .byte 0xf3,0x0f,0x1e,0xfa + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + movq %rsp,%r11 +.LSEH_begin_redc_mont_256: + + + movq %rcx,%rdi + movq %rdx,%rsi + movq %r8,%rdx + movq %r9,%rcx +#ifdef __BLST_PORTABLE__ + testl $1,__blst_platform_cap(%rip) + jnz redc_mont_256$1 +#endif + pushq %rbp + + pushq %rbx + + pushq %r12 + + pushq %r13 + + pushq %r14 + + pushq %r15 + + subq $8,%rsp + +.LSEH_body_redc_mont_256: + + + movq %rdx,%rbx + call __mulq_by_1_mont_256 + + addq 32(%rsi),%r13 + adcq 40(%rsi),%r14 + movq %r13,%rax + adcq 48(%rsi),%r15 + movq %r14,%r10 + adcq 56(%rsi),%r9 + sbbq %rsi,%rsi + + + + + movq %r15,%r11 + subq 0(%rbx),%r13 + sbbq 8(%rbx),%r14 + sbbq 16(%rbx),%r15 + movq %r9,%r12 + sbbq 24(%rbx),%r9 + sbbq $0,%rsi + + cmovncq %r13,%rax + cmovncq %r14,%r10 + cmovncq %r15,%r11 + movq %rax,0(%rdi) + cmovncq %r9,%r12 + movq %r10,8(%rdi) + movq %r11,16(%rdi) + movq %r12,24(%rdi) + + movq 8(%rsp),%r15 + + movq 16(%rsp),%r14 + + movq 24(%rsp),%r13 + + movq 32(%rsp),%r12 + + movq 40(%rsp),%rbx + + movq 48(%rsp),%rbp + + leaq 56(%rsp),%rsp + +.LSEH_epilogue_redc_mont_256: + mov 8(%rsp),%rdi + mov 16(%rsp),%rsi + + .byte 0xf3,0xc3 + +.LSEH_end_redc_mont_256: +.def __mulq_by_1_mont_256; .scl 3; .type 32; .endef +.p2align 5 +__mulq_by_1_mont_256: + .byte 0xf3,0x0f,0x1e,0xfa + + movq 0(%rsi),%rax + movq 8(%rsi),%r10 + movq 16(%rsi),%r11 + movq 24(%rsi),%r12 + + movq %rax,%r13 + imulq %rcx,%rax + movq %rax,%r9 + + mulq 0(%rbx) + addq %rax,%r13 + movq %r9,%rax + adcq %rdx,%r13 + + mulq 8(%rbx) + addq %rax,%r10 + movq %r9,%rax + adcq $0,%rdx + addq %r13,%r10 + adcq $0,%rdx + movq %rdx,%r13 + + mulq 16(%rbx) + movq %r10,%r14 + imulq %rcx,%r10 + addq %rax,%r11 + movq %r9,%rax + adcq $0,%rdx + addq %r13,%r11 + adcq $0,%rdx + movq %rdx,%r13 + + mulq 24(%rbx) + addq %rax,%r12 + movq %r10,%rax + adcq $0,%rdx + addq %r13,%r12 + adcq $0,%rdx + movq %rdx,%r13 + + mulq 0(%rbx) + addq %rax,%r14 + movq %r10,%rax + adcq %rdx,%r14 + + mulq 8(%rbx) + addq %rax,%r11 + movq %r10,%rax + adcq $0,%rdx + addq %r14,%r11 + adcq $0,%rdx + movq %rdx,%r14 + + mulq 16(%rbx) + movq %r11,%r15 + imulq %rcx,%r11 + addq %rax,%r12 + movq %r10,%rax + adcq $0,%rdx + addq %r14,%r12 + adcq $0,%rdx + movq %rdx,%r14 + + mulq 24(%rbx) + addq %rax,%r13 + movq %r11,%rax + adcq $0,%rdx + addq %r14,%r13 + adcq $0,%rdx + movq %rdx,%r14 + + mulq 0(%rbx) + addq %rax,%r15 + movq %r11,%rax + adcq %rdx,%r15 + + mulq 8(%rbx) + addq %rax,%r12 + movq %r11,%rax + adcq $0,%rdx + addq %r15,%r12 + adcq $0,%rdx + movq %rdx,%r15 + + mulq 16(%rbx) + movq %r12,%r9 + imulq %rcx,%r12 + addq %rax,%r13 + movq %r11,%rax + adcq $0,%rdx + addq %r15,%r13 + adcq $0,%rdx + movq %rdx,%r15 + + mulq 24(%rbx) + addq %rax,%r14 + movq %r12,%rax + adcq $0,%rdx + addq %r15,%r14 + adcq $0,%rdx + movq %rdx,%r15 + + mulq 0(%rbx) + addq %rax,%r9 + movq %r12,%rax + adcq %rdx,%r9 + + mulq 8(%rbx) + addq %rax,%r13 + movq %r12,%rax + adcq $0,%rdx + addq %r9,%r13 + adcq $0,%rdx + movq %rdx,%r9 + + mulq 16(%rbx) + addq %rax,%r14 + movq %r12,%rax + adcq $0,%rdx + addq %r9,%r14 + adcq $0,%rdx + movq %rdx,%r9 + + mulq 24(%rbx) + addq %rax,%r15 + movq %r13,%rax + adcq $0,%rdx + addq %r9,%r15 + adcq $0,%rdx + movq %rdx,%r9 + .byte 0xf3,0xc3 + +.section .pdata +.p2align 2 +.rva .LSEH_begin_mul_mont_sparse_256 +.rva .LSEH_body_mul_mont_sparse_256 +.rva .LSEH_info_mul_mont_sparse_256_prologue + +.rva .LSEH_body_mul_mont_sparse_256 +.rva .LSEH_epilogue_mul_mont_sparse_256 +.rva .LSEH_info_mul_mont_sparse_256_body + +.rva .LSEH_epilogue_mul_mont_sparse_256 +.rva .LSEH_end_mul_mont_sparse_256 +.rva .LSEH_info_mul_mont_sparse_256_epilogue + +.rva .LSEH_begin_sqr_mont_sparse_256 +.rva .LSEH_body_sqr_mont_sparse_256 +.rva .LSEH_info_sqr_mont_sparse_256_prologue + +.rva .LSEH_body_sqr_mont_sparse_256 +.rva .LSEH_epilogue_sqr_mont_sparse_256 +.rva .LSEH_info_sqr_mont_sparse_256_body + +.rva .LSEH_epilogue_sqr_mont_sparse_256 +.rva .LSEH_end_sqr_mont_sparse_256 +.rva .LSEH_info_sqr_mont_sparse_256_epilogue + +.rva .LSEH_begin_from_mont_256 +.rva .LSEH_body_from_mont_256 +.rva .LSEH_info_from_mont_256_prologue + +.rva .LSEH_body_from_mont_256 +.rva .LSEH_epilogue_from_mont_256 +.rva .LSEH_info_from_mont_256_body + +.rva .LSEH_epilogue_from_mont_256 +.rva .LSEH_end_from_mont_256 +.rva .LSEH_info_from_mont_256_epilogue + +.rva .LSEH_begin_redc_mont_256 +.rva .LSEH_body_redc_mont_256 +.rva .LSEH_info_redc_mont_256_prologue + +.rva .LSEH_body_redc_mont_256 +.rva .LSEH_epilogue_redc_mont_256 +.rva .LSEH_info_redc_mont_256_body + +.rva .LSEH_epilogue_redc_mont_256 +.rva .LSEH_end_redc_mont_256 +.rva .LSEH_info_redc_mont_256_epilogue + +.section .xdata +.p2align 3 +.LSEH_info_mul_mont_sparse_256_prologue: +.byte 1,0,5,0x0b +.byte 0,0x74,1,0 +.byte 0,0x64,2,0 +.byte 0,0xb3 +.byte 0,0 +.long 0,0 +.LSEH_info_mul_mont_sparse_256_body: +.byte 1,0,17,0 +.byte 0x00,0xf4,0x01,0x00 +.byte 0x00,0xe4,0x02,0x00 +.byte 0x00,0xd4,0x03,0x00 +.byte 0x00,0xc4,0x04,0x00 +.byte 0x00,0x34,0x05,0x00 +.byte 0x00,0x54,0x06,0x00 +.byte 0x00,0x74,0x08,0x00 +.byte 0x00,0x64,0x09,0x00 +.byte 0x00,0x62 +.byte 0x00,0x00,0x00,0x00,0x00,0x00 +.byte 0x00,0x00,0x00,0x00 +.LSEH_info_mul_mont_sparse_256_epilogue: +.byte 1,0,4,0 +.byte 0x00,0x74,0x01,0x00 +.byte 0x00,0x64,0x02,0x00 +.byte 0x00,0x00,0x00,0x00 + +.LSEH_info_sqr_mont_sparse_256_prologue: +.byte 1,0,5,0x0b +.byte 0,0x74,1,0 +.byte 0,0x64,2,0 +.byte 0,0xb3 +.byte 0,0 +.long 0,0 +.LSEH_info_sqr_mont_sparse_256_body: +.byte 1,0,17,0 +.byte 0x00,0xf4,0x01,0x00 +.byte 0x00,0xe4,0x02,0x00 +.byte 0x00,0xd4,0x03,0x00 +.byte 0x00,0xc4,0x04,0x00 +.byte 0x00,0x34,0x05,0x00 +.byte 0x00,0x54,0x06,0x00 +.byte 0x00,0x74,0x08,0x00 +.byte 0x00,0x64,0x09,0x00 +.byte 0x00,0x62 +.byte 0x00,0x00,0x00,0x00,0x00,0x00 +.byte 0x00,0x00,0x00,0x00 +.LSEH_info_sqr_mont_sparse_256_epilogue: +.byte 1,0,4,0 +.byte 0x00,0x74,0x01,0x00 +.byte 0x00,0x64,0x02,0x00 +.byte 0x00,0x00,0x00,0x00 + +.LSEH_info_from_mont_256_prologue: +.byte 1,0,5,0x0b +.byte 0,0x74,1,0 +.byte 0,0x64,2,0 +.byte 0,0xb3 +.byte 0,0 +.long 0,0 +.LSEH_info_from_mont_256_body: +.byte 1,0,17,0 +.byte 0x00,0xf4,0x01,0x00 +.byte 0x00,0xe4,0x02,0x00 +.byte 0x00,0xd4,0x03,0x00 +.byte 0x00,0xc4,0x04,0x00 +.byte 0x00,0x34,0x05,0x00 +.byte 0x00,0x54,0x06,0x00 +.byte 0x00,0x74,0x08,0x00 +.byte 0x00,0x64,0x09,0x00 +.byte 0x00,0x62 +.byte 0x00,0x00,0x00,0x00,0x00,0x00 +.byte 0x00,0x00,0x00,0x00 +.LSEH_info_from_mont_256_epilogue: +.byte 1,0,4,0 +.byte 0x00,0x74,0x01,0x00 +.byte 0x00,0x64,0x02,0x00 +.byte 0x00,0x00,0x00,0x00 + +.LSEH_info_redc_mont_256_prologue: +.byte 1,0,5,0x0b +.byte 0,0x74,1,0 +.byte 0,0x64,2,0 +.byte 0,0xb3 +.byte 0,0 +.long 0,0 +.LSEH_info_redc_mont_256_body: +.byte 1,0,17,0 +.byte 0x00,0xf4,0x01,0x00 +.byte 0x00,0xe4,0x02,0x00 +.byte 0x00,0xd4,0x03,0x00 +.byte 0x00,0xc4,0x04,0x00 +.byte 0x00,0x34,0x05,0x00 +.byte 0x00,0x54,0x06,0x00 +.byte 0x00,0x74,0x08,0x00 +.byte 0x00,0x64,0x09,0x00 +.byte 0x00,0x62 +.byte 0x00,0x00,0x00,0x00,0x00,0x00 +.byte 0x00,0x00,0x00,0x00 +.LSEH_info_redc_mont_256_epilogue: +.byte 1,0,4,0 +.byte 0x00,0x74,0x01,0x00 +.byte 0x00,0x64,0x02,0x00 +.byte 0x00,0x00,0x00,0x00 + diff --git a/crypto/blst_src/build/coff/mulq_mont_384-x86_64.s b/crypto/blst_src/build/coff/mulq_mont_384-x86_64.s new file mode 100644 index 00000000000..ee646f5b137 --- /dev/null +++ b/crypto/blst_src/build/coff/mulq_mont_384-x86_64.s @@ -0,0 +1,4303 @@ +.comm __blst_platform_cap,4 +.text + + + + + + + +.def __subq_mod_384x384; .scl 3; .type 32; .endef +.p2align 5 +__subq_mod_384x384: + .byte 0xf3,0x0f,0x1e,0xfa + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + movq 48(%rsi),%r14 + + subq 0(%rdx),%r8 + movq 56(%rsi),%r15 + sbbq 8(%rdx),%r9 + movq 64(%rsi),%rax + sbbq 16(%rdx),%r10 + movq 72(%rsi),%rbx + sbbq 24(%rdx),%r11 + movq 80(%rsi),%rbp + sbbq 32(%rdx),%r12 + movq 88(%rsi),%rsi + sbbq 40(%rdx),%r13 + movq %r8,0(%rdi) + sbbq 48(%rdx),%r14 + movq 0(%rcx),%r8 + movq %r9,8(%rdi) + sbbq 56(%rdx),%r15 + movq 8(%rcx),%r9 + movq %r10,16(%rdi) + sbbq 64(%rdx),%rax + movq 16(%rcx),%r10 + movq %r11,24(%rdi) + sbbq 72(%rdx),%rbx + movq 24(%rcx),%r11 + movq %r12,32(%rdi) + sbbq 80(%rdx),%rbp + movq 32(%rcx),%r12 + movq %r13,40(%rdi) + sbbq 88(%rdx),%rsi + movq 40(%rcx),%r13 + sbbq %rdx,%rdx + + andq %rdx,%r8 + andq %rdx,%r9 + andq %rdx,%r10 + andq %rdx,%r11 + andq %rdx,%r12 + andq %rdx,%r13 + + addq %r8,%r14 + adcq %r9,%r15 + movq %r14,48(%rdi) + adcq %r10,%rax + movq %r15,56(%rdi) + adcq %r11,%rbx + movq %rax,64(%rdi) + adcq %r12,%rbp + movq %rbx,72(%rdi) + adcq %r13,%rsi + movq %rbp,80(%rdi) + movq %rsi,88(%rdi) + + .byte 0xf3,0xc3 + + +.def __addq_mod_384; .scl 3; .type 32; .endef +.p2align 5 +__addq_mod_384: + .byte 0xf3,0x0f,0x1e,0xfa + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + + addq 0(%rdx),%r8 + adcq 8(%rdx),%r9 + adcq 16(%rdx),%r10 + movq %r8,%r14 + adcq 24(%rdx),%r11 + movq %r9,%r15 + adcq 32(%rdx),%r12 + movq %r10,%rax + adcq 40(%rdx),%r13 + movq %r11,%rbx + sbbq %rdx,%rdx + + subq 0(%rcx),%r8 + sbbq 8(%rcx),%r9 + movq %r12,%rbp + sbbq 16(%rcx),%r10 + sbbq 24(%rcx),%r11 + sbbq 32(%rcx),%r12 + movq %r13,%rsi + sbbq 40(%rcx),%r13 + sbbq $0,%rdx + + cmovcq %r14,%r8 + cmovcq %r15,%r9 + cmovcq %rax,%r10 + movq %r8,0(%rdi) + cmovcq %rbx,%r11 + movq %r9,8(%rdi) + cmovcq %rbp,%r12 + movq %r10,16(%rdi) + cmovcq %rsi,%r13 + movq %r11,24(%rdi) + movq %r12,32(%rdi) + movq %r13,40(%rdi) + + .byte 0xf3,0xc3 + + +.def __subq_mod_384; .scl 3; .type 32; .endef +.p2align 5 +__subq_mod_384: + .byte 0xf3,0x0f,0x1e,0xfa + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + +__subq_mod_384_a_is_loaded: + subq 0(%rdx),%r8 + movq 0(%rcx),%r14 + sbbq 8(%rdx),%r9 + movq 8(%rcx),%r15 + sbbq 16(%rdx),%r10 + movq 16(%rcx),%rax + sbbq 24(%rdx),%r11 + movq 24(%rcx),%rbx + sbbq 32(%rdx),%r12 + movq 32(%rcx),%rbp + sbbq 40(%rdx),%r13 + movq 40(%rcx),%rsi + sbbq %rdx,%rdx + + andq %rdx,%r14 + andq %rdx,%r15 + andq %rdx,%rax + andq %rdx,%rbx + andq %rdx,%rbp + andq %rdx,%rsi + + addq %r14,%r8 + adcq %r15,%r9 + movq %r8,0(%rdi) + adcq %rax,%r10 + movq %r9,8(%rdi) + adcq %rbx,%r11 + movq %r10,16(%rdi) + adcq %rbp,%r12 + movq %r11,24(%rdi) + adcq %rsi,%r13 + movq %r12,32(%rdi) + movq %r13,40(%rdi) + + .byte 0xf3,0xc3 + +.globl mul_mont_384x + +.def mul_mont_384x; .scl 2; .type 32; .endef +.p2align 5 +mul_mont_384x: + .byte 0xf3,0x0f,0x1e,0xfa + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + movq %rsp,%r11 +.LSEH_begin_mul_mont_384x: + + + movq %rcx,%rdi + movq %rdx,%rsi + movq %r8,%rdx + movq %r9,%rcx + movq 40(%rsp),%r8 +#ifdef __BLST_PORTABLE__ + testl $1,__blst_platform_cap(%rip) + jnz mul_mont_384x$1 +#endif + pushq %rbp + + pushq %rbx + + pushq %r12 + + pushq %r13 + + pushq %r14 + + pushq %r15 + + subq $328,%rsp + +.LSEH_body_mul_mont_384x: + + + movq %rdx,%rbx + movq %rdi,32(%rsp) + movq %rsi,24(%rsp) + movq %rdx,16(%rsp) + movq %rcx,8(%rsp) + movq %r8,0(%rsp) + + + + + leaq 40(%rsp),%rdi + call __mulq_384 + + + leaq 48(%rbx),%rbx + leaq 48(%rsi),%rsi + leaq 40+96(%rsp),%rdi + call __mulq_384 + + + movq 8(%rsp),%rcx + leaq -48(%rsi),%rdx + leaq 40+192+48(%rsp),%rdi + call __addq_mod_384 + + movq 16(%rsp),%rsi + leaq 48(%rsi),%rdx + leaq -48(%rdi),%rdi + call __addq_mod_384 + + leaq (%rdi),%rbx + leaq 48(%rdi),%rsi + call __mulq_384 + + + leaq (%rdi),%rsi + leaq 40(%rsp),%rdx + movq 8(%rsp),%rcx + call __subq_mod_384x384 + + leaq (%rdi),%rsi + leaq -96(%rdi),%rdx + call __subq_mod_384x384 + + + leaq 40(%rsp),%rsi + leaq 40+96(%rsp),%rdx + leaq 40(%rsp),%rdi + call __subq_mod_384x384 + + movq %rcx,%rbx + + + leaq 40(%rsp),%rsi + movq 0(%rsp),%rcx + movq 32(%rsp),%rdi + call __mulq_by_1_mont_384 + call __redq_tail_mont_384 + + + leaq 40+192(%rsp),%rsi + movq 0(%rsp),%rcx + leaq 48(%rdi),%rdi + call __mulq_by_1_mont_384 + call __redq_tail_mont_384 + + leaq 328(%rsp),%r8 + movq 0(%r8),%r15 + + movq 8(%r8),%r14 + + movq 16(%r8),%r13 + + movq 24(%r8),%r12 + + movq 32(%r8),%rbx + + movq 40(%r8),%rbp + + leaq 48(%r8),%rsp + +.LSEH_epilogue_mul_mont_384x: + mov 8(%rsp),%rdi + mov 16(%rsp),%rsi + + .byte 0xf3,0xc3 + +.LSEH_end_mul_mont_384x: +.globl sqr_mont_384x + +.def sqr_mont_384x; .scl 2; .type 32; .endef +.p2align 5 +sqr_mont_384x: + .byte 0xf3,0x0f,0x1e,0xfa + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + movq %rsp,%r11 +.LSEH_begin_sqr_mont_384x: + + + movq %rcx,%rdi + movq %rdx,%rsi + movq %r8,%rdx + movq %r9,%rcx +#ifdef __BLST_PORTABLE__ + testl $1,__blst_platform_cap(%rip) + jnz sqr_mont_384x$1 +#endif + pushq %rbp + + pushq %rbx + + pushq %r12 + + pushq %r13 + + pushq %r14 + + pushq %r15 + + subq $136,%rsp + +.LSEH_body_sqr_mont_384x: + + + movq %rcx,0(%rsp) + movq %rdx,%rcx + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + + + leaq 48(%rsi),%rdx + leaq 32(%rsp),%rdi + call __addq_mod_384 + + + movq 16(%rsp),%rsi + leaq 48(%rsi),%rdx + leaq 32+48(%rsp),%rdi + call __subq_mod_384 + + + movq 16(%rsp),%rsi + leaq 48(%rsi),%rbx + + movq 48(%rsi),%rax + movq 0(%rsi),%r14 + movq 8(%rsi),%r15 + movq 16(%rsi),%r12 + movq 24(%rsi),%r13 + + call __mulq_mont_384 + addq %r14,%r14 + adcq %r15,%r15 + adcq %r8,%r8 + movq %r14,%r12 + adcq %r9,%r9 + movq %r15,%r13 + adcq %r10,%r10 + movq %r8,%rax + adcq %r11,%r11 + movq %r9,%rbx + sbbq %rdx,%rdx + + subq 0(%rcx),%r14 + sbbq 8(%rcx),%r15 + movq %r10,%rbp + sbbq 16(%rcx),%r8 + sbbq 24(%rcx),%r9 + sbbq 32(%rcx),%r10 + movq %r11,%rsi + sbbq 40(%rcx),%r11 + sbbq $0,%rdx + + cmovcq %r12,%r14 + cmovcq %r13,%r15 + cmovcq %rax,%r8 + movq %r14,48(%rdi) + cmovcq %rbx,%r9 + movq %r15,56(%rdi) + cmovcq %rbp,%r10 + movq %r8,64(%rdi) + cmovcq %rsi,%r11 + movq %r9,72(%rdi) + movq %r10,80(%rdi) + movq %r11,88(%rdi) + + leaq 32(%rsp),%rsi + leaq 32+48(%rsp),%rbx + + movq 32+48(%rsp),%rax + movq 32+0(%rsp),%r14 + movq 32+8(%rsp),%r15 + movq 32+16(%rsp),%r12 + movq 32+24(%rsp),%r13 + + call __mulq_mont_384 + + leaq 136(%rsp),%r8 + movq 0(%r8),%r15 + + movq 8(%r8),%r14 + + movq 16(%r8),%r13 + + movq 24(%r8),%r12 + + movq 32(%r8),%rbx + + movq 40(%r8),%rbp + + leaq 48(%r8),%rsp + +.LSEH_epilogue_sqr_mont_384x: + mov 8(%rsp),%rdi + mov 16(%rsp),%rsi + + .byte 0xf3,0xc3 + +.LSEH_end_sqr_mont_384x: + +.globl mul_382x + +.def mul_382x; .scl 2; .type 32; .endef +.p2align 5 +mul_382x: + .byte 0xf3,0x0f,0x1e,0xfa + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + movq %rsp,%r11 +.LSEH_begin_mul_382x: + + + movq %rcx,%rdi + movq %rdx,%rsi + movq %r8,%rdx + movq %r9,%rcx +#ifdef __BLST_PORTABLE__ + testl $1,__blst_platform_cap(%rip) + jnz mul_382x$1 +#endif + pushq %rbp + + pushq %rbx + + pushq %r12 + + pushq %r13 + + pushq %r14 + + pushq %r15 + + subq $136,%rsp + +.LSEH_body_mul_382x: + + + leaq 96(%rdi),%rdi + movq %rsi,0(%rsp) + movq %rdx,8(%rsp) + movq %rdi,16(%rsp) + movq %rcx,24(%rsp) + + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + + addq 48(%rsi),%r8 + adcq 56(%rsi),%r9 + adcq 64(%rsi),%r10 + adcq 72(%rsi),%r11 + adcq 80(%rsi),%r12 + adcq 88(%rsi),%r13 + + movq %r8,32+0(%rsp) + movq %r9,32+8(%rsp) + movq %r10,32+16(%rsp) + movq %r11,32+24(%rsp) + movq %r12,32+32(%rsp) + movq %r13,32+40(%rsp) + + + movq 0(%rdx),%r8 + movq 8(%rdx),%r9 + movq 16(%rdx),%r10 + movq 24(%rdx),%r11 + movq 32(%rdx),%r12 + movq 40(%rdx),%r13 + + addq 48(%rdx),%r8 + adcq 56(%rdx),%r9 + adcq 64(%rdx),%r10 + adcq 72(%rdx),%r11 + adcq 80(%rdx),%r12 + adcq 88(%rdx),%r13 + + movq %r8,32+48(%rsp) + movq %r9,32+56(%rsp) + movq %r10,32+64(%rsp) + movq %r11,32+72(%rsp) + movq %r12,32+80(%rsp) + movq %r13,32+88(%rsp) + + + leaq 32+0(%rsp),%rsi + leaq 32+48(%rsp),%rbx + call __mulq_384 + + + movq 0(%rsp),%rsi + movq 8(%rsp),%rbx + leaq -96(%rdi),%rdi + call __mulq_384 + + + leaq 48(%rsi),%rsi + leaq 48(%rbx),%rbx + leaq 32(%rsp),%rdi + call __mulq_384 + + + movq 16(%rsp),%rsi + leaq 32(%rsp),%rdx + movq 24(%rsp),%rcx + movq %rsi,%rdi + call __subq_mod_384x384 + + + leaq 0(%rdi),%rsi + leaq -96(%rdi),%rdx + call __subq_mod_384x384 + + + leaq -96(%rdi),%rsi + leaq 32(%rsp),%rdx + leaq -96(%rdi),%rdi + call __subq_mod_384x384 + + leaq 136(%rsp),%r8 + movq 0(%r8),%r15 + + movq 8(%r8),%r14 + + movq 16(%r8),%r13 + + movq 24(%r8),%r12 + + movq 32(%r8),%rbx + + movq 40(%r8),%rbp + + leaq 48(%r8),%rsp + +.LSEH_epilogue_mul_382x: + mov 8(%rsp),%rdi + mov 16(%rsp),%rsi + + .byte 0xf3,0xc3 + +.LSEH_end_mul_382x: +.globl sqr_382x + +.def sqr_382x; .scl 2; .type 32; .endef +.p2align 5 +sqr_382x: + .byte 0xf3,0x0f,0x1e,0xfa + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + movq %rsp,%r11 +.LSEH_begin_sqr_382x: + + + movq %rcx,%rdi + movq %rdx,%rsi + movq %r8,%rdx +#ifdef __BLST_PORTABLE__ + testl $1,__blst_platform_cap(%rip) + jnz sqr_382x$1 +#endif + pushq %rbp + + pushq %rbx + + pushq %r12 + + pushq %r13 + + pushq %r14 + + pushq %r15 + + pushq %rsi + +.LSEH_body_sqr_382x: + + + movq %rdx,%rcx + + + movq 0(%rsi),%r14 + movq 8(%rsi),%r15 + movq 16(%rsi),%rax + movq 24(%rsi),%rbx + movq 32(%rsi),%rbp + movq 40(%rsi),%rdx + + movq %r14,%r8 + addq 48(%rsi),%r14 + movq %r15,%r9 + adcq 56(%rsi),%r15 + movq %rax,%r10 + adcq 64(%rsi),%rax + movq %rbx,%r11 + adcq 72(%rsi),%rbx + movq %rbp,%r12 + adcq 80(%rsi),%rbp + movq %rdx,%r13 + adcq 88(%rsi),%rdx + + movq %r14,0(%rdi) + movq %r15,8(%rdi) + movq %rax,16(%rdi) + movq %rbx,24(%rdi) + movq %rbp,32(%rdi) + movq %rdx,40(%rdi) + + + leaq 48(%rsi),%rdx + leaq 48(%rdi),%rdi + call __subq_mod_384_a_is_loaded + + + leaq (%rdi),%rsi + leaq -48(%rdi),%rbx + leaq -48(%rdi),%rdi + call __mulq_384 + + + movq (%rsp),%rsi + leaq 48(%rsi),%rbx + leaq 96(%rdi),%rdi + call __mulq_384 + + movq 0(%rdi),%r8 + movq 8(%rdi),%r9 + movq 16(%rdi),%r10 + movq 24(%rdi),%r11 + movq 32(%rdi),%r12 + movq 40(%rdi),%r13 + movq 48(%rdi),%r14 + movq 56(%rdi),%r15 + movq 64(%rdi),%rax + movq 72(%rdi),%rbx + movq 80(%rdi),%rbp + addq %r8,%r8 + movq 88(%rdi),%rdx + adcq %r9,%r9 + movq %r8,0(%rdi) + adcq %r10,%r10 + movq %r9,8(%rdi) + adcq %r11,%r11 + movq %r10,16(%rdi) + adcq %r12,%r12 + movq %r11,24(%rdi) + adcq %r13,%r13 + movq %r12,32(%rdi) + adcq %r14,%r14 + movq %r13,40(%rdi) + adcq %r15,%r15 + movq %r14,48(%rdi) + adcq %rax,%rax + movq %r15,56(%rdi) + adcq %rbx,%rbx + movq %rax,64(%rdi) + adcq %rbp,%rbp + movq %rbx,72(%rdi) + adcq %rdx,%rdx + movq %rbp,80(%rdi) + movq %rdx,88(%rdi) + + movq 8(%rsp),%r15 + + movq 16(%rsp),%r14 + + movq 24(%rsp),%r13 + + movq 32(%rsp),%r12 + + movq 40(%rsp),%rbx + + movq 48(%rsp),%rbp + + leaq 56(%rsp),%rsp + +.LSEH_epilogue_sqr_382x: + mov 8(%rsp),%rdi + mov 16(%rsp),%rsi + + .byte 0xf3,0xc3 + +.LSEH_end_sqr_382x: +.globl mul_384 + +.def mul_384; .scl 2; .type 32; .endef +.p2align 5 +mul_384: + .byte 0xf3,0x0f,0x1e,0xfa + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + movq %rsp,%r11 +.LSEH_begin_mul_384: + + + movq %rcx,%rdi + movq %rdx,%rsi + movq %r8,%rdx +#ifdef __BLST_PORTABLE__ + testl $1,__blst_platform_cap(%rip) + jnz mul_384$1 +#endif + pushq %rbp + + pushq %rbx + + pushq %r12 + +.LSEH_body_mul_384: + + + movq %rdx,%rbx + call __mulq_384 + + movq 0(%rsp),%r12 + + movq 8(%rsp),%rbx + + movq 16(%rsp),%rbp + + leaq 24(%rsp),%rsp + +.LSEH_epilogue_mul_384: + mov 8(%rsp),%rdi + mov 16(%rsp),%rsi + + .byte 0xf3,0xc3 + +.LSEH_end_mul_384: + +.def __mulq_384; .scl 3; .type 32; .endef +.p2align 5 +__mulq_384: + .byte 0xf3,0x0f,0x1e,0xfa + + movq 0(%rbx),%rax + + movq %rax,%rbp + mulq 0(%rsi) + movq %rax,0(%rdi) + movq %rbp,%rax + movq %rdx,%rcx + + mulq 8(%rsi) + addq %rax,%rcx + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%r8 + + mulq 16(%rsi) + addq %rax,%r8 + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%r9 + + mulq 24(%rsi) + addq %rax,%r9 + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%r10 + + mulq 32(%rsi) + addq %rax,%r10 + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%r11 + + mulq 40(%rsi) + addq %rax,%r11 + movq 8(%rbx),%rax + adcq $0,%rdx + movq %rdx,%r12 + movq %rax,%rbp + mulq 0(%rsi) + addq %rax,%rcx + movq %rbp,%rax + adcq $0,%rdx + movq %rcx,8(%rdi) + movq %rdx,%rcx + + mulq 8(%rsi) + addq %rax,%r8 + movq %rbp,%rax + adcq $0,%rdx + addq %r8,%rcx + adcq $0,%rdx + movq %rdx,%r8 + + mulq 16(%rsi) + addq %rax,%r9 + movq %rbp,%rax + adcq $0,%rdx + addq %r9,%r8 + adcq $0,%rdx + movq %rdx,%r9 + + mulq 24(%rsi) + addq %rax,%r10 + movq %rbp,%rax + adcq $0,%rdx + addq %r10,%r9 + adcq $0,%rdx + movq %rdx,%r10 + + mulq 32(%rsi) + addq %rax,%r11 + movq %rbp,%rax + adcq $0,%rdx + addq %r11,%r10 + adcq $0,%rdx + movq %rdx,%r11 + + mulq 40(%rsi) + addq %rax,%r12 + movq 16(%rbx),%rax + adcq $0,%rdx + addq %r12,%r11 + adcq $0,%rdx + movq %rdx,%r12 + movq %rax,%rbp + mulq 0(%rsi) + addq %rax,%rcx + movq %rbp,%rax + adcq $0,%rdx + movq %rcx,16(%rdi) + movq %rdx,%rcx + + mulq 8(%rsi) + addq %rax,%r8 + movq %rbp,%rax + adcq $0,%rdx + addq %r8,%rcx + adcq $0,%rdx + movq %rdx,%r8 + + mulq 16(%rsi) + addq %rax,%r9 + movq %rbp,%rax + adcq $0,%rdx + addq %r9,%r8 + adcq $0,%rdx + movq %rdx,%r9 + + mulq 24(%rsi) + addq %rax,%r10 + movq %rbp,%rax + adcq $0,%rdx + addq %r10,%r9 + adcq $0,%rdx + movq %rdx,%r10 + + mulq 32(%rsi) + addq %rax,%r11 + movq %rbp,%rax + adcq $0,%rdx + addq %r11,%r10 + adcq $0,%rdx + movq %rdx,%r11 + + mulq 40(%rsi) + addq %rax,%r12 + movq 24(%rbx),%rax + adcq $0,%rdx + addq %r12,%r11 + adcq $0,%rdx + movq %rdx,%r12 + movq %rax,%rbp + mulq 0(%rsi) + addq %rax,%rcx + movq %rbp,%rax + adcq $0,%rdx + movq %rcx,24(%rdi) + movq %rdx,%rcx + + mulq 8(%rsi) + addq %rax,%r8 + movq %rbp,%rax + adcq $0,%rdx + addq %r8,%rcx + adcq $0,%rdx + movq %rdx,%r8 + + mulq 16(%rsi) + addq %rax,%r9 + movq %rbp,%rax + adcq $0,%rdx + addq %r9,%r8 + adcq $0,%rdx + movq %rdx,%r9 + + mulq 24(%rsi) + addq %rax,%r10 + movq %rbp,%rax + adcq $0,%rdx + addq %r10,%r9 + adcq $0,%rdx + movq %rdx,%r10 + + mulq 32(%rsi) + addq %rax,%r11 + movq %rbp,%rax + adcq $0,%rdx + addq %r11,%r10 + adcq $0,%rdx + movq %rdx,%r11 + + mulq 40(%rsi) + addq %rax,%r12 + movq 32(%rbx),%rax + adcq $0,%rdx + addq %r12,%r11 + adcq $0,%rdx + movq %rdx,%r12 + movq %rax,%rbp + mulq 0(%rsi) + addq %rax,%rcx + movq %rbp,%rax + adcq $0,%rdx + movq %rcx,32(%rdi) + movq %rdx,%rcx + + mulq 8(%rsi) + addq %rax,%r8 + movq %rbp,%rax + adcq $0,%rdx + addq %r8,%rcx + adcq $0,%rdx + movq %rdx,%r8 + + mulq 16(%rsi) + addq %rax,%r9 + movq %rbp,%rax + adcq $0,%rdx + addq %r9,%r8 + adcq $0,%rdx + movq %rdx,%r9 + + mulq 24(%rsi) + addq %rax,%r10 + movq %rbp,%rax + adcq $0,%rdx + addq %r10,%r9 + adcq $0,%rdx + movq %rdx,%r10 + + mulq 32(%rsi) + addq %rax,%r11 + movq %rbp,%rax + adcq $0,%rdx + addq %r11,%r10 + adcq $0,%rdx + movq %rdx,%r11 + + mulq 40(%rsi) + addq %rax,%r12 + movq 40(%rbx),%rax + adcq $0,%rdx + addq %r12,%r11 + adcq $0,%rdx + movq %rdx,%r12 + movq %rax,%rbp + mulq 0(%rsi) + addq %rax,%rcx + movq %rbp,%rax + adcq $0,%rdx + movq %rcx,40(%rdi) + movq %rdx,%rcx + + mulq 8(%rsi) + addq %rax,%r8 + movq %rbp,%rax + adcq $0,%rdx + addq %r8,%rcx + adcq $0,%rdx + movq %rdx,%r8 + + mulq 16(%rsi) + addq %rax,%r9 + movq %rbp,%rax + adcq $0,%rdx + addq %r9,%r8 + adcq $0,%rdx + movq %rdx,%r9 + + mulq 24(%rsi) + addq %rax,%r10 + movq %rbp,%rax + adcq $0,%rdx + addq %r10,%r9 + adcq $0,%rdx + movq %rdx,%r10 + + mulq 32(%rsi) + addq %rax,%r11 + movq %rbp,%rax + adcq $0,%rdx + addq %r11,%r10 + adcq $0,%rdx + movq %rdx,%r11 + + mulq 40(%rsi) + addq %rax,%r12 + movq %rax,%rax + adcq $0,%rdx + addq %r12,%r11 + adcq $0,%rdx + movq %rdx,%r12 + movq %rcx,48(%rdi) + movq %r8,56(%rdi) + movq %r9,64(%rdi) + movq %r10,72(%rdi) + movq %r11,80(%rdi) + movq %r12,88(%rdi) + + .byte 0xf3,0xc3 + +.globl sqr_384 + +.def sqr_384; .scl 2; .type 32; .endef +.p2align 5 +sqr_384: + .byte 0xf3,0x0f,0x1e,0xfa + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + movq %rsp,%r11 +.LSEH_begin_sqr_384: + + + movq %rcx,%rdi + movq %rdx,%rsi +#ifdef __BLST_PORTABLE__ + testl $1,__blst_platform_cap(%rip) + jnz sqr_384$1 +#endif + pushq %rbp + + pushq %rbx + + pushq %r12 + + pushq %r13 + + pushq %r14 + + pushq %r15 + + subq $8,%rsp + +.LSEH_body_sqr_384: + + + call __sqrq_384 + + movq 8(%rsp),%r15 + + movq 16(%rsp),%r14 + + movq 24(%rsp),%r13 + + movq 32(%rsp),%r12 + + movq 40(%rsp),%rbx + + movq 48(%rsp),%rbp + + leaq 56(%rsp),%rsp + +.LSEH_epilogue_sqr_384: + mov 8(%rsp),%rdi + mov 16(%rsp),%rsi + + .byte 0xf3,0xc3 + +.LSEH_end_sqr_384: + +.def __sqrq_384; .scl 3; .type 32; .endef +.p2align 5 +__sqrq_384: + .byte 0xf3,0x0f,0x1e,0xfa + + movq 0(%rsi),%rax + movq 8(%rsi),%r15 + movq 16(%rsi),%rcx + movq 24(%rsi),%rbx + + + movq %rax,%r14 + mulq %r15 + movq %rax,%r9 + movq %r14,%rax + movq 32(%rsi),%rbp + movq %rdx,%r10 + + mulq %rcx + addq %rax,%r10 + movq %r14,%rax + adcq $0,%rdx + movq 40(%rsi),%rsi + movq %rdx,%r11 + + mulq %rbx + addq %rax,%r11 + movq %r14,%rax + adcq $0,%rdx + movq %rdx,%r12 + + mulq %rbp + addq %rax,%r12 + movq %r14,%rax + adcq $0,%rdx + movq %rdx,%r13 + + mulq %rsi + addq %rax,%r13 + movq %r14,%rax + adcq $0,%rdx + movq %rdx,%r14 + + mulq %rax + xorq %r8,%r8 + movq %rax,0(%rdi) + movq %r15,%rax + addq %r9,%r9 + adcq $0,%r8 + addq %rdx,%r9 + adcq $0,%r8 + movq %r9,8(%rdi) + + mulq %rcx + addq %rax,%r11 + movq %r15,%rax + adcq $0,%rdx + movq %rdx,%r9 + + mulq %rbx + addq %rax,%r12 + movq %r15,%rax + adcq $0,%rdx + addq %r9,%r12 + adcq $0,%rdx + movq %rdx,%r9 + + mulq %rbp + addq %rax,%r13 + movq %r15,%rax + adcq $0,%rdx + addq %r9,%r13 + adcq $0,%rdx + movq %rdx,%r9 + + mulq %rsi + addq %rax,%r14 + movq %r15,%rax + adcq $0,%rdx + addq %r9,%r14 + adcq $0,%rdx + movq %rdx,%r15 + + mulq %rax + xorq %r9,%r9 + addq %rax,%r8 + movq %rcx,%rax + addq %r10,%r10 + adcq %r11,%r11 + adcq $0,%r9 + addq %r8,%r10 + adcq %rdx,%r11 + adcq $0,%r9 + movq %r10,16(%rdi) + + mulq %rbx + addq %rax,%r13 + movq %rcx,%rax + adcq $0,%rdx + movq %r11,24(%rdi) + movq %rdx,%r8 + + mulq %rbp + addq %rax,%r14 + movq %rcx,%rax + adcq $0,%rdx + addq %r8,%r14 + adcq $0,%rdx + movq %rdx,%r8 + + mulq %rsi + addq %rax,%r15 + movq %rcx,%rax + adcq $0,%rdx + addq %r8,%r15 + adcq $0,%rdx + movq %rdx,%rcx + + mulq %rax + xorq %r11,%r11 + addq %rax,%r9 + movq %rbx,%rax + addq %r12,%r12 + adcq %r13,%r13 + adcq $0,%r11 + addq %r9,%r12 + adcq %rdx,%r13 + adcq $0,%r11 + movq %r12,32(%rdi) + + + mulq %rbp + addq %rax,%r15 + movq %rbx,%rax + adcq $0,%rdx + movq %r13,40(%rdi) + movq %rdx,%r8 + + mulq %rsi + addq %rax,%rcx + movq %rbx,%rax + adcq $0,%rdx + addq %r8,%rcx + adcq $0,%rdx + movq %rdx,%rbx + + mulq %rax + xorq %r12,%r12 + addq %rax,%r11 + movq %rbp,%rax + addq %r14,%r14 + adcq %r15,%r15 + adcq $0,%r12 + addq %r11,%r14 + adcq %rdx,%r15 + movq %r14,48(%rdi) + adcq $0,%r12 + movq %r15,56(%rdi) + + + mulq %rsi + addq %rax,%rbx + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%rbp + + mulq %rax + xorq %r13,%r13 + addq %rax,%r12 + movq %rsi,%rax + addq %rcx,%rcx + adcq %rbx,%rbx + adcq $0,%r13 + addq %r12,%rcx + adcq %rdx,%rbx + movq %rcx,64(%rdi) + adcq $0,%r13 + movq %rbx,72(%rdi) + + + mulq %rax + addq %r13,%rax + addq %rbp,%rbp + adcq $0,%rdx + addq %rbp,%rax + adcq $0,%rdx + movq %rax,80(%rdi) + movq %rdx,88(%rdi) + + .byte 0xf3,0xc3 + + +.globl sqr_mont_384 + +.def sqr_mont_384; .scl 2; .type 32; .endef +.p2align 5 +sqr_mont_384: + .byte 0xf3,0x0f,0x1e,0xfa + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + movq %rsp,%r11 +.LSEH_begin_sqr_mont_384: + + + movq %rcx,%rdi + movq %rdx,%rsi + movq %r8,%rdx + movq %r9,%rcx +#ifdef __BLST_PORTABLE__ + testl $1,__blst_platform_cap(%rip) + jnz sqr_mont_384$1 +#endif + pushq %rbp + + pushq %rbx + + pushq %r12 + + pushq %r13 + + pushq %r14 + + pushq %r15 + + subq $120,%rsp + +.LSEH_body_sqr_mont_384: + + + movq %rcx,96(%rsp) + movq %rdx,104(%rsp) + movq %rdi,112(%rsp) + + movq %rsp,%rdi + call __sqrq_384 + + leaq 0(%rsp),%rsi + movq 96(%rsp),%rcx + movq 104(%rsp),%rbx + movq 112(%rsp),%rdi + call __mulq_by_1_mont_384 + call __redq_tail_mont_384 + + leaq 120(%rsp),%r8 + movq 120(%rsp),%r15 + + movq 8(%r8),%r14 + + movq 16(%r8),%r13 + + movq 24(%r8),%r12 + + movq 32(%r8),%rbx + + movq 40(%r8),%rbp + + leaq 48(%r8),%rsp + +.LSEH_epilogue_sqr_mont_384: + mov 8(%rsp),%rdi + mov 16(%rsp),%rsi + + .byte 0xf3,0xc3 + +.LSEH_end_sqr_mont_384: + + + +.globl redc_mont_384 + +.def redc_mont_384; .scl 2; .type 32; .endef +.p2align 5 +redc_mont_384: + .byte 0xf3,0x0f,0x1e,0xfa + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + movq %rsp,%r11 +.LSEH_begin_redc_mont_384: + + + movq %rcx,%rdi + movq %rdx,%rsi + movq %r8,%rdx + movq %r9,%rcx +#ifdef __BLST_PORTABLE__ + testl $1,__blst_platform_cap(%rip) + jnz redc_mont_384$1 +#endif + pushq %rbp + + pushq %rbx + + pushq %r12 + + pushq %r13 + + pushq %r14 + + pushq %r15 + + subq $8,%rsp + +.LSEH_body_redc_mont_384: + + + movq %rdx,%rbx + call __mulq_by_1_mont_384 + call __redq_tail_mont_384 + + movq 8(%rsp),%r15 + + movq 16(%rsp),%r14 + + movq 24(%rsp),%r13 + + movq 32(%rsp),%r12 + + movq 40(%rsp),%rbx + + movq 48(%rsp),%rbp + + leaq 56(%rsp),%rsp + +.LSEH_epilogue_redc_mont_384: + mov 8(%rsp),%rdi + mov 16(%rsp),%rsi + + .byte 0xf3,0xc3 + +.LSEH_end_redc_mont_384: + + + + +.globl from_mont_384 + +.def from_mont_384; .scl 2; .type 32; .endef +.p2align 5 +from_mont_384: + .byte 0xf3,0x0f,0x1e,0xfa + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + movq %rsp,%r11 +.LSEH_begin_from_mont_384: + + + movq %rcx,%rdi + movq %rdx,%rsi + movq %r8,%rdx + movq %r9,%rcx +#ifdef __BLST_PORTABLE__ + testl $1,__blst_platform_cap(%rip) + jnz from_mont_384$1 +#endif + pushq %rbp + + pushq %rbx + + pushq %r12 + + pushq %r13 + + pushq %r14 + + pushq %r15 + + subq $8,%rsp + +.LSEH_body_from_mont_384: + + + movq %rdx,%rbx + call __mulq_by_1_mont_384 + + + + + + movq %r15,%rcx + movq %r8,%rdx + movq %r9,%rbp + + subq 0(%rbx),%r14 + sbbq 8(%rbx),%r15 + movq %r10,%r13 + sbbq 16(%rbx),%r8 + sbbq 24(%rbx),%r9 + sbbq 32(%rbx),%r10 + movq %r11,%rsi + sbbq 40(%rbx),%r11 + + cmovcq %rax,%r14 + cmovcq %rcx,%r15 + cmovcq %rdx,%r8 + movq %r14,0(%rdi) + cmovcq %rbp,%r9 + movq %r15,8(%rdi) + cmovcq %r13,%r10 + movq %r8,16(%rdi) + cmovcq %rsi,%r11 + movq %r9,24(%rdi) + movq %r10,32(%rdi) + movq %r11,40(%rdi) + + movq 8(%rsp),%r15 + + movq 16(%rsp),%r14 + + movq 24(%rsp),%r13 + + movq 32(%rsp),%r12 + + movq 40(%rsp),%rbx + + movq 48(%rsp),%rbp + + leaq 56(%rsp),%rsp + +.LSEH_epilogue_from_mont_384: + mov 8(%rsp),%rdi + mov 16(%rsp),%rsi + + .byte 0xf3,0xc3 + +.LSEH_end_from_mont_384: +.def __mulq_by_1_mont_384; .scl 3; .type 32; .endef +.p2align 5 +__mulq_by_1_mont_384: + .byte 0xf3,0x0f,0x1e,0xfa + + movq 0(%rsi),%rax + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + + movq %rax,%r14 + imulq %rcx,%rax + movq %rax,%r8 + + mulq 0(%rbx) + addq %rax,%r14 + movq %r8,%rax + adcq %rdx,%r14 + + mulq 8(%rbx) + addq %rax,%r9 + movq %r8,%rax + adcq $0,%rdx + addq %r14,%r9 + adcq $0,%rdx + movq %rdx,%r14 + + mulq 16(%rbx) + addq %rax,%r10 + movq %r8,%rax + adcq $0,%rdx + addq %r14,%r10 + adcq $0,%rdx + movq %rdx,%r14 + + mulq 24(%rbx) + addq %rax,%r11 + movq %r8,%rax + adcq $0,%rdx + movq %r9,%r15 + imulq %rcx,%r9 + addq %r14,%r11 + adcq $0,%rdx + movq %rdx,%r14 + + mulq 32(%rbx) + addq %rax,%r12 + movq %r8,%rax + adcq $0,%rdx + addq %r14,%r12 + adcq $0,%rdx + movq %rdx,%r14 + + mulq 40(%rbx) + addq %rax,%r13 + movq %r9,%rax + adcq $0,%rdx + addq %r14,%r13 + adcq $0,%rdx + movq %rdx,%r14 + + mulq 0(%rbx) + addq %rax,%r15 + movq %r9,%rax + adcq %rdx,%r15 + + mulq 8(%rbx) + addq %rax,%r10 + movq %r9,%rax + adcq $0,%rdx + addq %r15,%r10 + adcq $0,%rdx + movq %rdx,%r15 + + mulq 16(%rbx) + addq %rax,%r11 + movq %r9,%rax + adcq $0,%rdx + addq %r15,%r11 + adcq $0,%rdx + movq %rdx,%r15 + + mulq 24(%rbx) + addq %rax,%r12 + movq %r9,%rax + adcq $0,%rdx + movq %r10,%r8 + imulq %rcx,%r10 + addq %r15,%r12 + adcq $0,%rdx + movq %rdx,%r15 + + mulq 32(%rbx) + addq %rax,%r13 + movq %r9,%rax + adcq $0,%rdx + addq %r15,%r13 + adcq $0,%rdx + movq %rdx,%r15 + + mulq 40(%rbx) + addq %rax,%r14 + movq %r10,%rax + adcq $0,%rdx + addq %r15,%r14 + adcq $0,%rdx + movq %rdx,%r15 + + mulq 0(%rbx) + addq %rax,%r8 + movq %r10,%rax + adcq %rdx,%r8 + + mulq 8(%rbx) + addq %rax,%r11 + movq %r10,%rax + adcq $0,%rdx + addq %r8,%r11 + adcq $0,%rdx + movq %rdx,%r8 + + mulq 16(%rbx) + addq %rax,%r12 + movq %r10,%rax + adcq $0,%rdx + addq %r8,%r12 + adcq $0,%rdx + movq %rdx,%r8 + + mulq 24(%rbx) + addq %rax,%r13 + movq %r10,%rax + adcq $0,%rdx + movq %r11,%r9 + imulq %rcx,%r11 + addq %r8,%r13 + adcq $0,%rdx + movq %rdx,%r8 + + mulq 32(%rbx) + addq %rax,%r14 + movq %r10,%rax + adcq $0,%rdx + addq %r8,%r14 + adcq $0,%rdx + movq %rdx,%r8 + + mulq 40(%rbx) + addq %rax,%r15 + movq %r11,%rax + adcq $0,%rdx + addq %r8,%r15 + adcq $0,%rdx + movq %rdx,%r8 + + mulq 0(%rbx) + addq %rax,%r9 + movq %r11,%rax + adcq %rdx,%r9 + + mulq 8(%rbx) + addq %rax,%r12 + movq %r11,%rax + adcq $0,%rdx + addq %r9,%r12 + adcq $0,%rdx + movq %rdx,%r9 + + mulq 16(%rbx) + addq %rax,%r13 + movq %r11,%rax + adcq $0,%rdx + addq %r9,%r13 + adcq $0,%rdx + movq %rdx,%r9 + + mulq 24(%rbx) + addq %rax,%r14 + movq %r11,%rax + adcq $0,%rdx + movq %r12,%r10 + imulq %rcx,%r12 + addq %r9,%r14 + adcq $0,%rdx + movq %rdx,%r9 + + mulq 32(%rbx) + addq %rax,%r15 + movq %r11,%rax + adcq $0,%rdx + addq %r9,%r15 + adcq $0,%rdx + movq %rdx,%r9 + + mulq 40(%rbx) + addq %rax,%r8 + movq %r12,%rax + adcq $0,%rdx + addq %r9,%r8 + adcq $0,%rdx + movq %rdx,%r9 + + mulq 0(%rbx) + addq %rax,%r10 + movq %r12,%rax + adcq %rdx,%r10 + + mulq 8(%rbx) + addq %rax,%r13 + movq %r12,%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %rdx,%r10 + + mulq 16(%rbx) + addq %rax,%r14 + movq %r12,%rax + adcq $0,%rdx + addq %r10,%r14 + adcq $0,%rdx + movq %rdx,%r10 + + mulq 24(%rbx) + addq %rax,%r15 + movq %r12,%rax + adcq $0,%rdx + movq %r13,%r11 + imulq %rcx,%r13 + addq %r10,%r15 + adcq $0,%rdx + movq %rdx,%r10 + + mulq 32(%rbx) + addq %rax,%r8 + movq %r12,%rax + adcq $0,%rdx + addq %r10,%r8 + adcq $0,%rdx + movq %rdx,%r10 + + mulq 40(%rbx) + addq %rax,%r9 + movq %r13,%rax + adcq $0,%rdx + addq %r10,%r9 + adcq $0,%rdx + movq %rdx,%r10 + + mulq 0(%rbx) + addq %rax,%r11 + movq %r13,%rax + adcq %rdx,%r11 + + mulq 8(%rbx) + addq %rax,%r14 + movq %r13,%rax + adcq $0,%rdx + addq %r11,%r14 + adcq $0,%rdx + movq %rdx,%r11 + + mulq 16(%rbx) + addq %rax,%r15 + movq %r13,%rax + adcq $0,%rdx + addq %r11,%r15 + adcq $0,%rdx + movq %rdx,%r11 + + mulq 24(%rbx) + addq %rax,%r8 + movq %r13,%rax + adcq $0,%rdx + addq %r11,%r8 + adcq $0,%rdx + movq %rdx,%r11 + + mulq 32(%rbx) + addq %rax,%r9 + movq %r13,%rax + adcq $0,%rdx + addq %r11,%r9 + adcq $0,%rdx + movq %rdx,%r11 + + mulq 40(%rbx) + addq %rax,%r10 + movq %r14,%rax + adcq $0,%rdx + addq %r11,%r10 + adcq $0,%rdx + movq %rdx,%r11 + .byte 0xf3,0xc3 + + +.def __redq_tail_mont_384; .scl 3; .type 32; .endef +.p2align 5 +__redq_tail_mont_384: + .byte 0xf3,0x0f,0x1e,0xfa + + addq 48(%rsi),%r14 + movq %r14,%rax + adcq 56(%rsi),%r15 + adcq 64(%rsi),%r8 + adcq 72(%rsi),%r9 + movq %r15,%rcx + adcq 80(%rsi),%r10 + adcq 88(%rsi),%r11 + sbbq %r12,%r12 + + + + + movq %r8,%rdx + movq %r9,%rbp + + subq 0(%rbx),%r14 + sbbq 8(%rbx),%r15 + movq %r10,%r13 + sbbq 16(%rbx),%r8 + sbbq 24(%rbx),%r9 + sbbq 32(%rbx),%r10 + movq %r11,%rsi + sbbq 40(%rbx),%r11 + sbbq $0,%r12 + + cmovcq %rax,%r14 + cmovcq %rcx,%r15 + cmovcq %rdx,%r8 + movq %r14,0(%rdi) + cmovcq %rbp,%r9 + movq %r15,8(%rdi) + cmovcq %r13,%r10 + movq %r8,16(%rdi) + cmovcq %rsi,%r11 + movq %r9,24(%rdi) + movq %r10,32(%rdi) + movq %r11,40(%rdi) + + .byte 0xf3,0xc3 + + +.globl sgn0_pty_mont_384 + +.def sgn0_pty_mont_384; .scl 2; .type 32; .endef +.p2align 5 +sgn0_pty_mont_384: + .byte 0xf3,0x0f,0x1e,0xfa + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + movq %rsp,%r11 +.LSEH_begin_sgn0_pty_mont_384: + + + movq %rcx,%rdi + movq %rdx,%rsi + movq %r8,%rdx +#ifdef __BLST_PORTABLE__ + testl $1,__blst_platform_cap(%rip) + jnz sgn0_pty_mont_384$1 +#endif + pushq %rbp + + pushq %rbx + + pushq %r12 + + pushq %r13 + + pushq %r14 + + pushq %r15 + + subq $8,%rsp + +.LSEH_body_sgn0_pty_mont_384: + + + movq %rsi,%rbx + leaq 0(%rdi),%rsi + movq %rdx,%rcx + call __mulq_by_1_mont_384 + + xorq %rax,%rax + movq %r14,%r13 + addq %r14,%r14 + adcq %r15,%r15 + adcq %r8,%r8 + adcq %r9,%r9 + adcq %r10,%r10 + adcq %r11,%r11 + adcq $0,%rax + + subq 0(%rbx),%r14 + sbbq 8(%rbx),%r15 + sbbq 16(%rbx),%r8 + sbbq 24(%rbx),%r9 + sbbq 32(%rbx),%r10 + sbbq 40(%rbx),%r11 + sbbq $0,%rax + + notq %rax + andq $1,%r13 + andq $2,%rax + orq %r13,%rax + + movq 8(%rsp),%r15 + + movq 16(%rsp),%r14 + + movq 24(%rsp),%r13 + + movq 32(%rsp),%r12 + + movq 40(%rsp),%rbx + + movq 48(%rsp),%rbp + + leaq 56(%rsp),%rsp + +.LSEH_epilogue_sgn0_pty_mont_384: + mov 8(%rsp),%rdi + mov 16(%rsp),%rsi + + .byte 0xf3,0xc3 + +.LSEH_end_sgn0_pty_mont_384: + +.globl sgn0_pty_mont_384x + +.def sgn0_pty_mont_384x; .scl 2; .type 32; .endef +.p2align 5 +sgn0_pty_mont_384x: + .byte 0xf3,0x0f,0x1e,0xfa + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + movq %rsp,%r11 +.LSEH_begin_sgn0_pty_mont_384x: + + + movq %rcx,%rdi + movq %rdx,%rsi + movq %r8,%rdx +#ifdef __BLST_PORTABLE__ + testl $1,__blst_platform_cap(%rip) + jnz sgn0_pty_mont_384x$1 +#endif + pushq %rbp + + pushq %rbx + + pushq %r12 + + pushq %r13 + + pushq %r14 + + pushq %r15 + + subq $8,%rsp + +.LSEH_body_sgn0_pty_mont_384x: + + + movq %rsi,%rbx + leaq 48(%rdi),%rsi + movq %rdx,%rcx + call __mulq_by_1_mont_384 + + movq %r14,%r12 + orq %r15,%r14 + orq %r8,%r14 + orq %r9,%r14 + orq %r10,%r14 + orq %r11,%r14 + + leaq 0(%rdi),%rsi + xorq %rdi,%rdi + movq %r12,%r13 + addq %r12,%r12 + adcq %r15,%r15 + adcq %r8,%r8 + adcq %r9,%r9 + adcq %r10,%r10 + adcq %r11,%r11 + adcq $0,%rdi + + subq 0(%rbx),%r12 + sbbq 8(%rbx),%r15 + sbbq 16(%rbx),%r8 + sbbq 24(%rbx),%r9 + sbbq 32(%rbx),%r10 + sbbq 40(%rbx),%r11 + sbbq $0,%rdi + + movq %r14,0(%rsp) + notq %rdi + andq $1,%r13 + andq $2,%rdi + orq %r13,%rdi + + call __mulq_by_1_mont_384 + + movq %r14,%r12 + orq %r15,%r14 + orq %r8,%r14 + orq %r9,%r14 + orq %r10,%r14 + orq %r11,%r14 + + xorq %rax,%rax + movq %r12,%r13 + addq %r12,%r12 + adcq %r15,%r15 + adcq %r8,%r8 + adcq %r9,%r9 + adcq %r10,%r10 + adcq %r11,%r11 + adcq $0,%rax + + subq 0(%rbx),%r12 + sbbq 8(%rbx),%r15 + sbbq 16(%rbx),%r8 + sbbq 24(%rbx),%r9 + sbbq 32(%rbx),%r10 + sbbq 40(%rbx),%r11 + sbbq $0,%rax + + movq 0(%rsp),%r12 + + notq %rax + + testq %r14,%r14 + cmovzq %rdi,%r13 + + testq %r12,%r12 + cmovnzq %rdi,%rax + + andq $1,%r13 + andq $2,%rax + orq %r13,%rax + + movq 8(%rsp),%r15 + + movq 16(%rsp),%r14 + + movq 24(%rsp),%r13 + + movq 32(%rsp),%r12 + + movq 40(%rsp),%rbx + + movq 48(%rsp),%rbp + + leaq 56(%rsp),%rsp + +.LSEH_epilogue_sgn0_pty_mont_384x: + mov 8(%rsp),%rdi + mov 16(%rsp),%rsi + + .byte 0xf3,0xc3 + +.LSEH_end_sgn0_pty_mont_384x: +.globl mul_mont_384 + +.def mul_mont_384; .scl 2; .type 32; .endef +.p2align 5 +mul_mont_384: + .byte 0xf3,0x0f,0x1e,0xfa + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + movq %rsp,%r11 +.LSEH_begin_mul_mont_384: + + + movq %rcx,%rdi + movq %rdx,%rsi + movq %r8,%rdx + movq %r9,%rcx + movq 40(%rsp),%r8 +#ifdef __BLST_PORTABLE__ + testl $1,__blst_platform_cap(%rip) + jnz mul_mont_384$1 +#endif + pushq %rbp + + pushq %rbx + + pushq %r12 + + pushq %r13 + + pushq %r14 + + pushq %r15 + + subq $24,%rsp + +.LSEH_body_mul_mont_384: + + + movq 0(%rdx),%rax + movq 0(%rsi),%r14 + movq 8(%rsi),%r15 + movq 16(%rsi),%r12 + movq 24(%rsi),%r13 + movq %rdx,%rbx + movq %r8,0(%rsp) + movq %rdi,8(%rsp) + + call __mulq_mont_384 + + movq 24(%rsp),%r15 + + movq 32(%rsp),%r14 + + movq 40(%rsp),%r13 + + movq 48(%rsp),%r12 + + movq 56(%rsp),%rbx + + movq 64(%rsp),%rbp + + leaq 72(%rsp),%rsp + +.LSEH_epilogue_mul_mont_384: + mov 8(%rsp),%rdi + mov 16(%rsp),%rsi + + .byte 0xf3,0xc3 + +.LSEH_end_mul_mont_384: +.def __mulq_mont_384; .scl 3; .type 32; .endef +.p2align 5 +__mulq_mont_384: + .byte 0xf3,0x0f,0x1e,0xfa + + movq %rax,%rdi + mulq %r14 + movq %rax,%r8 + movq %rdi,%rax + movq %rdx,%r9 + + mulq %r15 + addq %rax,%r9 + movq %rdi,%rax + adcq $0,%rdx + movq %rdx,%r10 + + mulq %r12 + addq %rax,%r10 + movq %rdi,%rax + adcq $0,%rdx + movq %rdx,%r11 + + movq %r8,%rbp + imulq 8(%rsp),%r8 + + mulq %r13 + addq %rax,%r11 + movq %rdi,%rax + adcq $0,%rdx + movq %rdx,%r12 + + mulq 32(%rsi) + addq %rax,%r12 + movq %rdi,%rax + adcq $0,%rdx + movq %rdx,%r13 + + mulq 40(%rsi) + addq %rax,%r13 + movq %r8,%rax + adcq $0,%rdx + xorq %r15,%r15 + movq %rdx,%r14 + + mulq 0(%rcx) + addq %rax,%rbp + movq %r8,%rax + adcq %rdx,%rbp + + mulq 8(%rcx) + addq %rax,%r9 + movq %r8,%rax + adcq $0,%rdx + addq %rbp,%r9 + adcq $0,%rdx + movq %rdx,%rbp + + mulq 16(%rcx) + addq %rax,%r10 + movq %r8,%rax + adcq $0,%rdx + addq %rbp,%r10 + adcq $0,%rdx + movq %rdx,%rbp + + mulq 24(%rcx) + addq %rbp,%r11 + adcq $0,%rdx + addq %rax,%r11 + movq %r8,%rax + adcq $0,%rdx + movq %rdx,%rbp + + mulq 32(%rcx) + addq %rax,%r12 + movq %r8,%rax + adcq $0,%rdx + addq %rbp,%r12 + adcq $0,%rdx + movq %rdx,%rbp + + mulq 40(%rcx) + addq %rax,%r13 + movq 8(%rbx),%rax + adcq $0,%rdx + addq %rbp,%r13 + adcq %rdx,%r14 + adcq $0,%r15 + + movq %rax,%rdi + mulq 0(%rsi) + addq %rax,%r9 + movq %rdi,%rax + adcq $0,%rdx + movq %rdx,%r8 + + mulq 8(%rsi) + addq %rax,%r10 + movq %rdi,%rax + adcq $0,%rdx + addq %r8,%r10 + adcq $0,%rdx + movq %rdx,%r8 + + mulq 16(%rsi) + addq %rax,%r11 + movq %rdi,%rax + adcq $0,%rdx + addq %r8,%r11 + adcq $0,%rdx + movq %rdx,%r8 + + movq %r9,%rbp + imulq 8(%rsp),%r9 + + mulq 24(%rsi) + addq %rax,%r12 + movq %rdi,%rax + adcq $0,%rdx + addq %r8,%r12 + adcq $0,%rdx + movq %rdx,%r8 + + mulq 32(%rsi) + addq %rax,%r13 + movq %rdi,%rax + adcq $0,%rdx + addq %r8,%r13 + adcq $0,%rdx + movq %rdx,%r8 + + mulq 40(%rsi) + addq %r8,%r14 + adcq $0,%rdx + xorq %r8,%r8 + addq %rax,%r14 + movq %r9,%rax + adcq %rdx,%r15 + adcq $0,%r8 + + mulq 0(%rcx) + addq %rax,%rbp + movq %r9,%rax + adcq %rdx,%rbp + + mulq 8(%rcx) + addq %rax,%r10 + movq %r9,%rax + adcq $0,%rdx + addq %rbp,%r10 + adcq $0,%rdx + movq %rdx,%rbp + + mulq 16(%rcx) + addq %rax,%r11 + movq %r9,%rax + adcq $0,%rdx + addq %rbp,%r11 + adcq $0,%rdx + movq %rdx,%rbp + + mulq 24(%rcx) + addq %rbp,%r12 + adcq $0,%rdx + addq %rax,%r12 + movq %r9,%rax + adcq $0,%rdx + movq %rdx,%rbp + + mulq 32(%rcx) + addq %rax,%r13 + movq %r9,%rax + adcq $0,%rdx + addq %rbp,%r13 + adcq $0,%rdx + movq %rdx,%rbp + + mulq 40(%rcx) + addq %rax,%r14 + movq 16(%rbx),%rax + adcq $0,%rdx + addq %rbp,%r14 + adcq %rdx,%r15 + adcq $0,%r8 + + movq %rax,%rdi + mulq 0(%rsi) + addq %rax,%r10 + movq %rdi,%rax + adcq $0,%rdx + movq %rdx,%r9 + + mulq 8(%rsi) + addq %rax,%r11 + movq %rdi,%rax + adcq $0,%rdx + addq %r9,%r11 + adcq $0,%rdx + movq %rdx,%r9 + + mulq 16(%rsi) + addq %rax,%r12 + movq %rdi,%rax + adcq $0,%rdx + addq %r9,%r12 + adcq $0,%rdx + movq %rdx,%r9 + + movq %r10,%rbp + imulq 8(%rsp),%r10 + + mulq 24(%rsi) + addq %rax,%r13 + movq %rdi,%rax + adcq $0,%rdx + addq %r9,%r13 + adcq $0,%rdx + movq %rdx,%r9 + + mulq 32(%rsi) + addq %rax,%r14 + movq %rdi,%rax + adcq $0,%rdx + addq %r9,%r14 + adcq $0,%rdx + movq %rdx,%r9 + + mulq 40(%rsi) + addq %r9,%r15 + adcq $0,%rdx + xorq %r9,%r9 + addq %rax,%r15 + movq %r10,%rax + adcq %rdx,%r8 + adcq $0,%r9 + + mulq 0(%rcx) + addq %rax,%rbp + movq %r10,%rax + adcq %rdx,%rbp + + mulq 8(%rcx) + addq %rax,%r11 + movq %r10,%rax + adcq $0,%rdx + addq %rbp,%r11 + adcq $0,%rdx + movq %rdx,%rbp + + mulq 16(%rcx) + addq %rax,%r12 + movq %r10,%rax + adcq $0,%rdx + addq %rbp,%r12 + adcq $0,%rdx + movq %rdx,%rbp + + mulq 24(%rcx) + addq %rbp,%r13 + adcq $0,%rdx + addq %rax,%r13 + movq %r10,%rax + adcq $0,%rdx + movq %rdx,%rbp + + mulq 32(%rcx) + addq %rax,%r14 + movq %r10,%rax + adcq $0,%rdx + addq %rbp,%r14 + adcq $0,%rdx + movq %rdx,%rbp + + mulq 40(%rcx) + addq %rax,%r15 + movq 24(%rbx),%rax + adcq $0,%rdx + addq %rbp,%r15 + adcq %rdx,%r8 + adcq $0,%r9 + + movq %rax,%rdi + mulq 0(%rsi) + addq %rax,%r11 + movq %rdi,%rax + adcq $0,%rdx + movq %rdx,%r10 + + mulq 8(%rsi) + addq %rax,%r12 + movq %rdi,%rax + adcq $0,%rdx + addq %r10,%r12 + adcq $0,%rdx + movq %rdx,%r10 + + mulq 16(%rsi) + addq %rax,%r13 + movq %rdi,%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %rdx,%r10 + + movq %r11,%rbp + imulq 8(%rsp),%r11 + + mulq 24(%rsi) + addq %rax,%r14 + movq %rdi,%rax + adcq $0,%rdx + addq %r10,%r14 + adcq $0,%rdx + movq %rdx,%r10 + + mulq 32(%rsi) + addq %rax,%r15 + movq %rdi,%rax + adcq $0,%rdx + addq %r10,%r15 + adcq $0,%rdx + movq %rdx,%r10 + + mulq 40(%rsi) + addq %r10,%r8 + adcq $0,%rdx + xorq %r10,%r10 + addq %rax,%r8 + movq %r11,%rax + adcq %rdx,%r9 + adcq $0,%r10 + + mulq 0(%rcx) + addq %rax,%rbp + movq %r11,%rax + adcq %rdx,%rbp + + mulq 8(%rcx) + addq %rax,%r12 + movq %r11,%rax + adcq $0,%rdx + addq %rbp,%r12 + adcq $0,%rdx + movq %rdx,%rbp + + mulq 16(%rcx) + addq %rax,%r13 + movq %r11,%rax + adcq $0,%rdx + addq %rbp,%r13 + adcq $0,%rdx + movq %rdx,%rbp + + mulq 24(%rcx) + addq %rbp,%r14 + adcq $0,%rdx + addq %rax,%r14 + movq %r11,%rax + adcq $0,%rdx + movq %rdx,%rbp + + mulq 32(%rcx) + addq %rax,%r15 + movq %r11,%rax + adcq $0,%rdx + addq %rbp,%r15 + adcq $0,%rdx + movq %rdx,%rbp + + mulq 40(%rcx) + addq %rax,%r8 + movq 32(%rbx),%rax + adcq $0,%rdx + addq %rbp,%r8 + adcq %rdx,%r9 + adcq $0,%r10 + + movq %rax,%rdi + mulq 0(%rsi) + addq %rax,%r12 + movq %rdi,%rax + adcq $0,%rdx + movq %rdx,%r11 + + mulq 8(%rsi) + addq %rax,%r13 + movq %rdi,%rax + adcq $0,%rdx + addq %r11,%r13 + adcq $0,%rdx + movq %rdx,%r11 + + mulq 16(%rsi) + addq %rax,%r14 + movq %rdi,%rax + adcq $0,%rdx + addq %r11,%r14 + adcq $0,%rdx + movq %rdx,%r11 + + movq %r12,%rbp + imulq 8(%rsp),%r12 + + mulq 24(%rsi) + addq %rax,%r15 + movq %rdi,%rax + adcq $0,%rdx + addq %r11,%r15 + adcq $0,%rdx + movq %rdx,%r11 + + mulq 32(%rsi) + addq %rax,%r8 + movq %rdi,%rax + adcq $0,%rdx + addq %r11,%r8 + adcq $0,%rdx + movq %rdx,%r11 + + mulq 40(%rsi) + addq %r11,%r9 + adcq $0,%rdx + xorq %r11,%r11 + addq %rax,%r9 + movq %r12,%rax + adcq %rdx,%r10 + adcq $0,%r11 + + mulq 0(%rcx) + addq %rax,%rbp + movq %r12,%rax + adcq %rdx,%rbp + + mulq 8(%rcx) + addq %rax,%r13 + movq %r12,%rax + adcq $0,%rdx + addq %rbp,%r13 + adcq $0,%rdx + movq %rdx,%rbp + + mulq 16(%rcx) + addq %rax,%r14 + movq %r12,%rax + adcq $0,%rdx + addq %rbp,%r14 + adcq $0,%rdx + movq %rdx,%rbp + + mulq 24(%rcx) + addq %rbp,%r15 + adcq $0,%rdx + addq %rax,%r15 + movq %r12,%rax + adcq $0,%rdx + movq %rdx,%rbp + + mulq 32(%rcx) + addq %rax,%r8 + movq %r12,%rax + adcq $0,%rdx + addq %rbp,%r8 + adcq $0,%rdx + movq %rdx,%rbp + + mulq 40(%rcx) + addq %rax,%r9 + movq 40(%rbx),%rax + adcq $0,%rdx + addq %rbp,%r9 + adcq %rdx,%r10 + adcq $0,%r11 + + movq %rax,%rdi + mulq 0(%rsi) + addq %rax,%r13 + movq %rdi,%rax + adcq $0,%rdx + movq %rdx,%r12 + + mulq 8(%rsi) + addq %rax,%r14 + movq %rdi,%rax + adcq $0,%rdx + addq %r12,%r14 + adcq $0,%rdx + movq %rdx,%r12 + + mulq 16(%rsi) + addq %rax,%r15 + movq %rdi,%rax + adcq $0,%rdx + addq %r12,%r15 + adcq $0,%rdx + movq %rdx,%r12 + + movq %r13,%rbp + imulq 8(%rsp),%r13 + + mulq 24(%rsi) + addq %rax,%r8 + movq %rdi,%rax + adcq $0,%rdx + addq %r12,%r8 + adcq $0,%rdx + movq %rdx,%r12 + + mulq 32(%rsi) + addq %rax,%r9 + movq %rdi,%rax + adcq $0,%rdx + addq %r12,%r9 + adcq $0,%rdx + movq %rdx,%r12 + + mulq 40(%rsi) + addq %r12,%r10 + adcq $0,%rdx + xorq %r12,%r12 + addq %rax,%r10 + movq %r13,%rax + adcq %rdx,%r11 + adcq $0,%r12 + + mulq 0(%rcx) + addq %rax,%rbp + movq %r13,%rax + adcq %rdx,%rbp + + mulq 8(%rcx) + addq %rax,%r14 + movq %r13,%rax + adcq $0,%rdx + addq %rbp,%r14 + adcq $0,%rdx + movq %rdx,%rbp + + mulq 16(%rcx) + addq %rax,%r15 + movq %r13,%rax + adcq $0,%rdx + addq %rbp,%r15 + adcq $0,%rdx + movq %rdx,%rbp + + mulq 24(%rcx) + addq %rbp,%r8 + adcq $0,%rdx + addq %rax,%r8 + movq %r13,%rax + adcq $0,%rdx + movq %rdx,%rbp + + mulq 32(%rcx) + addq %rax,%r9 + movq %r13,%rax + adcq $0,%rdx + addq %rbp,%r9 + adcq $0,%rdx + movq %rdx,%rbp + + mulq 40(%rcx) + addq %rax,%r10 + movq %r14,%rax + adcq $0,%rdx + addq %rbp,%r10 + adcq %rdx,%r11 + adcq $0,%r12 + + + + + movq 16(%rsp),%rdi + subq 0(%rcx),%r14 + movq %r15,%rdx + sbbq 8(%rcx),%r15 + movq %r8,%rbx + sbbq 16(%rcx),%r8 + movq %r9,%rsi + sbbq 24(%rcx),%r9 + movq %r10,%rbp + sbbq 32(%rcx),%r10 + movq %r11,%r13 + sbbq 40(%rcx),%r11 + sbbq $0,%r12 + + cmovcq %rax,%r14 + cmovcq %rdx,%r15 + cmovcq %rbx,%r8 + movq %r14,0(%rdi) + cmovcq %rsi,%r9 + movq %r15,8(%rdi) + cmovcq %rbp,%r10 + movq %r8,16(%rdi) + cmovcq %r13,%r11 + movq %r9,24(%rdi) + movq %r10,32(%rdi) + movq %r11,40(%rdi) + + .byte 0xf3,0xc3 + +.globl sqr_n_mul_mont_384 + +.def sqr_n_mul_mont_384; .scl 2; .type 32; .endef +.p2align 5 +sqr_n_mul_mont_384: + .byte 0xf3,0x0f,0x1e,0xfa + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + movq %rsp,%r11 +.LSEH_begin_sqr_n_mul_mont_384: + + + movq %rcx,%rdi + movq %rdx,%rsi + movq %r8,%rdx + movq %r9,%rcx + movq 40(%rsp),%r8 + movq 48(%rsp),%r9 +#ifdef __BLST_PORTABLE__ + testl $1,__blst_platform_cap(%rip) + jnz sqr_n_mul_mont_384$1 +#endif + pushq %rbp + + pushq %rbx + + pushq %r12 + + pushq %r13 + + pushq %r14 + + pushq %r15 + + subq $136,%rsp + +.LSEH_body_sqr_n_mul_mont_384: + + + movq %r8,0(%rsp) + movq %rdi,8(%rsp) + movq %rcx,16(%rsp) + leaq 32(%rsp),%rdi + movq %r9,24(%rsp) + movq (%r9),%xmm2 + +.Loop_sqr_384: + movd %edx,%xmm1 + + call __sqrq_384 + + leaq 0(%rdi),%rsi + movq 0(%rsp),%rcx + movq 16(%rsp),%rbx + call __mulq_by_1_mont_384 + call __redq_tail_mont_384 + + movd %xmm1,%edx + leaq 0(%rdi),%rsi + decl %edx + jnz .Loop_sqr_384 + +.byte 102,72,15,126,208 + movq %rbx,%rcx + movq 24(%rsp),%rbx + + + + + + + movq %r8,%r12 + movq %r9,%r13 + + call __mulq_mont_384 + + leaq 136(%rsp),%r8 + movq 136(%rsp),%r15 + + movq 8(%r8),%r14 + + movq 16(%r8),%r13 + + movq 24(%r8),%r12 + + movq 32(%r8),%rbx + + movq 40(%r8),%rbp + + leaq 48(%r8),%rsp + +.LSEH_epilogue_sqr_n_mul_mont_384: + mov 8(%rsp),%rdi + mov 16(%rsp),%rsi + + .byte 0xf3,0xc3 + +.LSEH_end_sqr_n_mul_mont_384: + +.globl sqr_n_mul_mont_383 + +.def sqr_n_mul_mont_383; .scl 2; .type 32; .endef +.p2align 5 +sqr_n_mul_mont_383: + .byte 0xf3,0x0f,0x1e,0xfa + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + movq %rsp,%r11 +.LSEH_begin_sqr_n_mul_mont_383: + + + movq %rcx,%rdi + movq %rdx,%rsi + movq %r8,%rdx + movq %r9,%rcx + movq 40(%rsp),%r8 + movq 48(%rsp),%r9 +#ifdef __BLST_PORTABLE__ + testl $1,__blst_platform_cap(%rip) + jnz sqr_n_mul_mont_383$1 +#endif + pushq %rbp + + pushq %rbx + + pushq %r12 + + pushq %r13 + + pushq %r14 + + pushq %r15 + + subq $136,%rsp + +.LSEH_body_sqr_n_mul_mont_383: + + + movq %r8,0(%rsp) + movq %rdi,8(%rsp) + movq %rcx,16(%rsp) + leaq 32(%rsp),%rdi + movq %r9,24(%rsp) + movq (%r9),%xmm2 + +.Loop_sqr_383: + movd %edx,%xmm1 + + call __sqrq_384 + + leaq 0(%rdi),%rsi + movq 0(%rsp),%rcx + movq 16(%rsp),%rbx + call __mulq_by_1_mont_384 + + movd %xmm1,%edx + addq 48(%rsi),%r14 + adcq 56(%rsi),%r15 + adcq 64(%rsi),%r8 + adcq 72(%rsi),%r9 + adcq 80(%rsi),%r10 + adcq 88(%rsi),%r11 + leaq 0(%rdi),%rsi + + movq %r14,0(%rdi) + movq %r15,8(%rdi) + movq %r8,16(%rdi) + movq %r9,24(%rdi) + movq %r10,32(%rdi) + movq %r11,40(%rdi) + + decl %edx + jnz .Loop_sqr_383 + +.byte 102,72,15,126,208 + movq %rbx,%rcx + movq 24(%rsp),%rbx + + + + + + + movq %r8,%r12 + movq %r9,%r13 + + call __mulq_mont_384 + + leaq 136(%rsp),%r8 + movq 136(%rsp),%r15 + + movq 8(%r8),%r14 + + movq 16(%r8),%r13 + + movq 24(%r8),%r12 + + movq 32(%r8),%rbx + + movq 40(%r8),%rbp + + leaq 48(%r8),%rsp + +.LSEH_epilogue_sqr_n_mul_mont_383: + mov 8(%rsp),%rdi + mov 16(%rsp),%rsi + + .byte 0xf3,0xc3 + +.LSEH_end_sqr_n_mul_mont_383: +.def __mulq_mont_383_nonred; .scl 3; .type 32; .endef +.p2align 5 +__mulq_mont_383_nonred: + .byte 0xf3,0x0f,0x1e,0xfa + + movq %rax,%rbp + mulq %r14 + movq %rax,%r8 + movq %rbp,%rax + movq %rdx,%r9 + + mulq %r15 + addq %rax,%r9 + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%r10 + + mulq %r12 + addq %rax,%r10 + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%r11 + + movq %r8,%r15 + imulq 8(%rsp),%r8 + + mulq %r13 + addq %rax,%r11 + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%r12 + + mulq 32(%rsi) + addq %rax,%r12 + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%r13 + + mulq 40(%rsi) + addq %rax,%r13 + movq %r8,%rax + adcq $0,%rdx + movq %rdx,%r14 + + mulq 0(%rcx) + addq %rax,%r15 + movq %r8,%rax + adcq %rdx,%r15 + + mulq 8(%rcx) + addq %rax,%r9 + movq %r8,%rax + adcq $0,%rdx + addq %r15,%r9 + adcq $0,%rdx + movq %rdx,%r15 + + mulq 16(%rcx) + addq %rax,%r10 + movq %r8,%rax + adcq $0,%rdx + addq %r15,%r10 + adcq $0,%rdx + movq %rdx,%r15 + + mulq 24(%rcx) + addq %r15,%r11 + adcq $0,%rdx + addq %rax,%r11 + movq %r8,%rax + adcq $0,%rdx + movq %rdx,%r15 + + mulq 32(%rcx) + addq %rax,%r12 + movq %r8,%rax + adcq $0,%rdx + addq %r15,%r12 + adcq $0,%rdx + movq %rdx,%r15 + + mulq 40(%rcx) + addq %rax,%r13 + movq 8(%rbx),%rax + adcq $0,%rdx + addq %r15,%r13 + adcq %rdx,%r14 + + movq %rax,%rbp + mulq 0(%rsi) + addq %rax,%r9 + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%r15 + + mulq 8(%rsi) + addq %rax,%r10 + movq %rbp,%rax + adcq $0,%rdx + addq %r15,%r10 + adcq $0,%rdx + movq %rdx,%r15 + + mulq 16(%rsi) + addq %rax,%r11 + movq %rbp,%rax + adcq $0,%rdx + addq %r15,%r11 + adcq $0,%rdx + movq %rdx,%r15 + + movq %r9,%r8 + imulq 8(%rsp),%r9 + + mulq 24(%rsi) + addq %rax,%r12 + movq %rbp,%rax + adcq $0,%rdx + addq %r15,%r12 + adcq $0,%rdx + movq %rdx,%r15 + + mulq 32(%rsi) + addq %rax,%r13 + movq %rbp,%rax + adcq $0,%rdx + addq %r15,%r13 + adcq $0,%rdx + movq %rdx,%r15 + + mulq 40(%rsi) + addq %r15,%r14 + adcq $0,%rdx + addq %rax,%r14 + movq %r9,%rax + adcq $0,%rdx + movq %rdx,%r15 + + mulq 0(%rcx) + addq %rax,%r8 + movq %r9,%rax + adcq %rdx,%r8 + + mulq 8(%rcx) + addq %rax,%r10 + movq %r9,%rax + adcq $0,%rdx + addq %r8,%r10 + adcq $0,%rdx + movq %rdx,%r8 + + mulq 16(%rcx) + addq %rax,%r11 + movq %r9,%rax + adcq $0,%rdx + addq %r8,%r11 + adcq $0,%rdx + movq %rdx,%r8 + + mulq 24(%rcx) + addq %r8,%r12 + adcq $0,%rdx + addq %rax,%r12 + movq %r9,%rax + adcq $0,%rdx + movq %rdx,%r8 + + mulq 32(%rcx) + addq %rax,%r13 + movq %r9,%rax + adcq $0,%rdx + addq %r8,%r13 + adcq $0,%rdx + movq %rdx,%r8 + + mulq 40(%rcx) + addq %rax,%r14 + movq 16(%rbx),%rax + adcq $0,%rdx + addq %r8,%r14 + adcq %rdx,%r15 + + movq %rax,%rbp + mulq 0(%rsi) + addq %rax,%r10 + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%r8 + + mulq 8(%rsi) + addq %rax,%r11 + movq %rbp,%rax + adcq $0,%rdx + addq %r8,%r11 + adcq $0,%rdx + movq %rdx,%r8 + + mulq 16(%rsi) + addq %rax,%r12 + movq %rbp,%rax + adcq $0,%rdx + addq %r8,%r12 + adcq $0,%rdx + movq %rdx,%r8 + + movq %r10,%r9 + imulq 8(%rsp),%r10 + + mulq 24(%rsi) + addq %rax,%r13 + movq %rbp,%rax + adcq $0,%rdx + addq %r8,%r13 + adcq $0,%rdx + movq %rdx,%r8 + + mulq 32(%rsi) + addq %rax,%r14 + movq %rbp,%rax + adcq $0,%rdx + addq %r8,%r14 + adcq $0,%rdx + movq %rdx,%r8 + + mulq 40(%rsi) + addq %r8,%r15 + adcq $0,%rdx + addq %rax,%r15 + movq %r10,%rax + adcq $0,%rdx + movq %rdx,%r8 + + mulq 0(%rcx) + addq %rax,%r9 + movq %r10,%rax + adcq %rdx,%r9 + + mulq 8(%rcx) + addq %rax,%r11 + movq %r10,%rax + adcq $0,%rdx + addq %r9,%r11 + adcq $0,%rdx + movq %rdx,%r9 + + mulq 16(%rcx) + addq %rax,%r12 + movq %r10,%rax + adcq $0,%rdx + addq %r9,%r12 + adcq $0,%rdx + movq %rdx,%r9 + + mulq 24(%rcx) + addq %r9,%r13 + adcq $0,%rdx + addq %rax,%r13 + movq %r10,%rax + adcq $0,%rdx + movq %rdx,%r9 + + mulq 32(%rcx) + addq %rax,%r14 + movq %r10,%rax + adcq $0,%rdx + addq %r9,%r14 + adcq $0,%rdx + movq %rdx,%r9 + + mulq 40(%rcx) + addq %rax,%r15 + movq 24(%rbx),%rax + adcq $0,%rdx + addq %r9,%r15 + adcq %rdx,%r8 + + movq %rax,%rbp + mulq 0(%rsi) + addq %rax,%r11 + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%r9 + + mulq 8(%rsi) + addq %rax,%r12 + movq %rbp,%rax + adcq $0,%rdx + addq %r9,%r12 + adcq $0,%rdx + movq %rdx,%r9 + + mulq 16(%rsi) + addq %rax,%r13 + movq %rbp,%rax + adcq $0,%rdx + addq %r9,%r13 + adcq $0,%rdx + movq %rdx,%r9 + + movq %r11,%r10 + imulq 8(%rsp),%r11 + + mulq 24(%rsi) + addq %rax,%r14 + movq %rbp,%rax + adcq $0,%rdx + addq %r9,%r14 + adcq $0,%rdx + movq %rdx,%r9 + + mulq 32(%rsi) + addq %rax,%r15 + movq %rbp,%rax + adcq $0,%rdx + addq %r9,%r15 + adcq $0,%rdx + movq %rdx,%r9 + + mulq 40(%rsi) + addq %r9,%r8 + adcq $0,%rdx + addq %rax,%r8 + movq %r11,%rax + adcq $0,%rdx + movq %rdx,%r9 + + mulq 0(%rcx) + addq %rax,%r10 + movq %r11,%rax + adcq %rdx,%r10 + + mulq 8(%rcx) + addq %rax,%r12 + movq %r11,%rax + adcq $0,%rdx + addq %r10,%r12 + adcq $0,%rdx + movq %rdx,%r10 + + mulq 16(%rcx) + addq %rax,%r13 + movq %r11,%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %rdx,%r10 + + mulq 24(%rcx) + addq %r10,%r14 + adcq $0,%rdx + addq %rax,%r14 + movq %r11,%rax + adcq $0,%rdx + movq %rdx,%r10 + + mulq 32(%rcx) + addq %rax,%r15 + movq %r11,%rax + adcq $0,%rdx + addq %r10,%r15 + adcq $0,%rdx + movq %rdx,%r10 + + mulq 40(%rcx) + addq %rax,%r8 + movq 32(%rbx),%rax + adcq $0,%rdx + addq %r10,%r8 + adcq %rdx,%r9 + + movq %rax,%rbp + mulq 0(%rsi) + addq %rax,%r12 + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%r10 + + mulq 8(%rsi) + addq %rax,%r13 + movq %rbp,%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %rdx,%r10 + + mulq 16(%rsi) + addq %rax,%r14 + movq %rbp,%rax + adcq $0,%rdx + addq %r10,%r14 + adcq $0,%rdx + movq %rdx,%r10 + + movq %r12,%r11 + imulq 8(%rsp),%r12 + + mulq 24(%rsi) + addq %rax,%r15 + movq %rbp,%rax + adcq $0,%rdx + addq %r10,%r15 + adcq $0,%rdx + movq %rdx,%r10 + + mulq 32(%rsi) + addq %rax,%r8 + movq %rbp,%rax + adcq $0,%rdx + addq %r10,%r8 + adcq $0,%rdx + movq %rdx,%r10 + + mulq 40(%rsi) + addq %r10,%r9 + adcq $0,%rdx + addq %rax,%r9 + movq %r12,%rax + adcq $0,%rdx + movq %rdx,%r10 + + mulq 0(%rcx) + addq %rax,%r11 + movq %r12,%rax + adcq %rdx,%r11 + + mulq 8(%rcx) + addq %rax,%r13 + movq %r12,%rax + adcq $0,%rdx + addq %r11,%r13 + adcq $0,%rdx + movq %rdx,%r11 + + mulq 16(%rcx) + addq %rax,%r14 + movq %r12,%rax + adcq $0,%rdx + addq %r11,%r14 + adcq $0,%rdx + movq %rdx,%r11 + + mulq 24(%rcx) + addq %r11,%r15 + adcq $0,%rdx + addq %rax,%r15 + movq %r12,%rax + adcq $0,%rdx + movq %rdx,%r11 + + mulq 32(%rcx) + addq %rax,%r8 + movq %r12,%rax + adcq $0,%rdx + addq %r11,%r8 + adcq $0,%rdx + movq %rdx,%r11 + + mulq 40(%rcx) + addq %rax,%r9 + movq 40(%rbx),%rax + adcq $0,%rdx + addq %r11,%r9 + adcq %rdx,%r10 + + movq %rax,%rbp + mulq 0(%rsi) + addq %rax,%r13 + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%r11 + + mulq 8(%rsi) + addq %rax,%r14 + movq %rbp,%rax + adcq $0,%rdx + addq %r11,%r14 + adcq $0,%rdx + movq %rdx,%r11 + + mulq 16(%rsi) + addq %rax,%r15 + movq %rbp,%rax + adcq $0,%rdx + addq %r11,%r15 + adcq $0,%rdx + movq %rdx,%r11 + + movq %r13,%r12 + imulq 8(%rsp),%r13 + + mulq 24(%rsi) + addq %rax,%r8 + movq %rbp,%rax + adcq $0,%rdx + addq %r11,%r8 + adcq $0,%rdx + movq %rdx,%r11 + + mulq 32(%rsi) + addq %rax,%r9 + movq %rbp,%rax + adcq $0,%rdx + addq %r11,%r9 + adcq $0,%rdx + movq %rdx,%r11 + + mulq 40(%rsi) + addq %r11,%r10 + adcq $0,%rdx + addq %rax,%r10 + movq %r13,%rax + adcq $0,%rdx + movq %rdx,%r11 + + mulq 0(%rcx) + addq %rax,%r12 + movq %r13,%rax + adcq %rdx,%r12 + + mulq 8(%rcx) + addq %rax,%r14 + movq %r13,%rax + adcq $0,%rdx + addq %r12,%r14 + adcq $0,%rdx + movq %rdx,%r12 + + mulq 16(%rcx) + addq %rax,%r15 + movq %r13,%rax + adcq $0,%rdx + addq %r12,%r15 + adcq $0,%rdx + movq %rdx,%r12 + + mulq 24(%rcx) + addq %r12,%r8 + adcq $0,%rdx + addq %rax,%r8 + movq %r13,%rax + adcq $0,%rdx + movq %rdx,%r12 + + mulq 32(%rcx) + addq %rax,%r9 + movq %r13,%rax + adcq $0,%rdx + addq %r12,%r9 + adcq $0,%rdx + movq %rdx,%r12 + + mulq 40(%rcx) + addq %rax,%r10 + movq %r14,%rax + adcq $0,%rdx + addq %r12,%r10 + adcq %rdx,%r11 + .byte 0xf3,0xc3 + +.globl sqr_mont_382x + +.def sqr_mont_382x; .scl 2; .type 32; .endef +.p2align 5 +sqr_mont_382x: + .byte 0xf3,0x0f,0x1e,0xfa + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + movq %rsp,%r11 +.LSEH_begin_sqr_mont_382x: + + + movq %rcx,%rdi + movq %rdx,%rsi + movq %r8,%rdx + movq %r9,%rcx +#ifdef __BLST_PORTABLE__ + testl $1,__blst_platform_cap(%rip) + jnz sqr_mont_382x$1 +#endif + pushq %rbp + + pushq %rbx + + pushq %r12 + + pushq %r13 + + pushq %r14 + + pushq %r15 + + subq $136,%rsp + +.LSEH_body_sqr_mont_382x: + + + movq %rcx,0(%rsp) + movq %rdx,%rcx + movq %rsi,16(%rsp) + movq %rdi,24(%rsp) + + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + + movq %r8,%r14 + addq 48(%rsi),%r8 + movq %r9,%r15 + adcq 56(%rsi),%r9 + movq %r10,%rax + adcq 64(%rsi),%r10 + movq %r11,%rdx + adcq 72(%rsi),%r11 + movq %r12,%rbx + adcq 80(%rsi),%r12 + movq %r13,%rbp + adcq 88(%rsi),%r13 + + subq 48(%rsi),%r14 + sbbq 56(%rsi),%r15 + sbbq 64(%rsi),%rax + sbbq 72(%rsi),%rdx + sbbq 80(%rsi),%rbx + sbbq 88(%rsi),%rbp + sbbq %rdi,%rdi + + movq %r8,32+0(%rsp) + movq %r9,32+8(%rsp) + movq %r10,32+16(%rsp) + movq %r11,32+24(%rsp) + movq %r12,32+32(%rsp) + movq %r13,32+40(%rsp) + + movq %r14,32+48(%rsp) + movq %r15,32+56(%rsp) + movq %rax,32+64(%rsp) + movq %rdx,32+72(%rsp) + movq %rbx,32+80(%rsp) + movq %rbp,32+88(%rsp) + movq %rdi,32+96(%rsp) + + + + leaq 48(%rsi),%rbx + + movq 48(%rsi),%rax + movq 0(%rsi),%r14 + movq 8(%rsi),%r15 + movq 16(%rsi),%r12 + movq 24(%rsi),%r13 + + movq 24(%rsp),%rdi + call __mulq_mont_383_nonred + addq %r14,%r14 + adcq %r15,%r15 + adcq %r8,%r8 + adcq %r9,%r9 + adcq %r10,%r10 + adcq %r11,%r11 + + movq %r14,48(%rdi) + movq %r15,56(%rdi) + movq %r8,64(%rdi) + movq %r9,72(%rdi) + movq %r10,80(%rdi) + movq %r11,88(%rdi) + + leaq 32(%rsp),%rsi + leaq 32+48(%rsp),%rbx + + movq 32+48(%rsp),%rax + movq 32+0(%rsp),%r14 + movq 32+8(%rsp),%r15 + movq 32+16(%rsp),%r12 + movq 32+24(%rsp),%r13 + + call __mulq_mont_383_nonred + movq 32+96(%rsp),%rsi + movq 32+0(%rsp),%r12 + movq 32+8(%rsp),%r13 + andq %rsi,%r12 + movq 32+16(%rsp),%rax + andq %rsi,%r13 + movq 32+24(%rsp),%rbx + andq %rsi,%rax + movq 32+32(%rsp),%rbp + andq %rsi,%rbx + andq %rsi,%rbp + andq 32+40(%rsp),%rsi + + subq %r12,%r14 + movq 0(%rcx),%r12 + sbbq %r13,%r15 + movq 8(%rcx),%r13 + sbbq %rax,%r8 + movq 16(%rcx),%rax + sbbq %rbx,%r9 + movq 24(%rcx),%rbx + sbbq %rbp,%r10 + movq 32(%rcx),%rbp + sbbq %rsi,%r11 + sbbq %rsi,%rsi + + andq %rsi,%r12 + andq %rsi,%r13 + andq %rsi,%rax + andq %rsi,%rbx + andq %rsi,%rbp + andq 40(%rcx),%rsi + + addq %r12,%r14 + adcq %r13,%r15 + adcq %rax,%r8 + adcq %rbx,%r9 + adcq %rbp,%r10 + adcq %rsi,%r11 + + movq %r14,0(%rdi) + movq %r15,8(%rdi) + movq %r8,16(%rdi) + movq %r9,24(%rdi) + movq %r10,32(%rdi) + movq %r11,40(%rdi) + leaq 136(%rsp),%r8 + movq 0(%r8),%r15 + + movq 8(%r8),%r14 + + movq 16(%r8),%r13 + + movq 24(%r8),%r12 + + movq 32(%r8),%rbx + + movq 40(%r8),%rbp + + leaq 48(%r8),%rsp + +.LSEH_epilogue_sqr_mont_382x: + mov 8(%rsp),%rdi + mov 16(%rsp),%rsi + + .byte 0xf3,0xc3 + +.LSEH_end_sqr_mont_382x: +.section .pdata +.p2align 2 +.rva .LSEH_begin_mul_mont_384x +.rva .LSEH_body_mul_mont_384x +.rva .LSEH_info_mul_mont_384x_prologue + +.rva .LSEH_body_mul_mont_384x +.rva .LSEH_epilogue_mul_mont_384x +.rva .LSEH_info_mul_mont_384x_body + +.rva .LSEH_epilogue_mul_mont_384x +.rva .LSEH_end_mul_mont_384x +.rva .LSEH_info_mul_mont_384x_epilogue + +.rva .LSEH_begin_sqr_mont_384x +.rva .LSEH_body_sqr_mont_384x +.rva .LSEH_info_sqr_mont_384x_prologue + +.rva .LSEH_body_sqr_mont_384x +.rva .LSEH_epilogue_sqr_mont_384x +.rva .LSEH_info_sqr_mont_384x_body + +.rva .LSEH_epilogue_sqr_mont_384x +.rva .LSEH_end_sqr_mont_384x +.rva .LSEH_info_sqr_mont_384x_epilogue + +.rva .LSEH_begin_mul_382x +.rva .LSEH_body_mul_382x +.rva .LSEH_info_mul_382x_prologue + +.rva .LSEH_body_mul_382x +.rva .LSEH_epilogue_mul_382x +.rva .LSEH_info_mul_382x_body + +.rva .LSEH_epilogue_mul_382x +.rva .LSEH_end_mul_382x +.rva .LSEH_info_mul_382x_epilogue + +.rva .LSEH_begin_sqr_382x +.rva .LSEH_body_sqr_382x +.rva .LSEH_info_sqr_382x_prologue + +.rva .LSEH_body_sqr_382x +.rva .LSEH_epilogue_sqr_382x +.rva .LSEH_info_sqr_382x_body + +.rva .LSEH_epilogue_sqr_382x +.rva .LSEH_end_sqr_382x +.rva .LSEH_info_sqr_382x_epilogue + +.rva .LSEH_begin_mul_384 +.rva .LSEH_body_mul_384 +.rva .LSEH_info_mul_384_prologue + +.rva .LSEH_body_mul_384 +.rva .LSEH_epilogue_mul_384 +.rva .LSEH_info_mul_384_body + +.rva .LSEH_epilogue_mul_384 +.rva .LSEH_end_mul_384 +.rva .LSEH_info_mul_384_epilogue + +.rva .LSEH_begin_sqr_384 +.rva .LSEH_body_sqr_384 +.rva .LSEH_info_sqr_384_prologue + +.rva .LSEH_body_sqr_384 +.rva .LSEH_epilogue_sqr_384 +.rva .LSEH_info_sqr_384_body + +.rva .LSEH_epilogue_sqr_384 +.rva .LSEH_end_sqr_384 +.rva .LSEH_info_sqr_384_epilogue + +.rva .LSEH_begin_sqr_mont_384 +.rva .LSEH_body_sqr_mont_384 +.rva .LSEH_info_sqr_mont_384_prologue + +.rva .LSEH_body_sqr_mont_384 +.rva .LSEH_epilogue_sqr_mont_384 +.rva .LSEH_info_sqr_mont_384_body + +.rva .LSEH_epilogue_sqr_mont_384 +.rva .LSEH_end_sqr_mont_384 +.rva .LSEH_info_sqr_mont_384_epilogue + +.rva .LSEH_begin_redc_mont_384 +.rva .LSEH_body_redc_mont_384 +.rva .LSEH_info_redc_mont_384_prologue + +.rva .LSEH_body_redc_mont_384 +.rva .LSEH_epilogue_redc_mont_384 +.rva .LSEH_info_redc_mont_384_body + +.rva .LSEH_epilogue_redc_mont_384 +.rva .LSEH_end_redc_mont_384 +.rva .LSEH_info_redc_mont_384_epilogue + +.rva .LSEH_begin_from_mont_384 +.rva .LSEH_body_from_mont_384 +.rva .LSEH_info_from_mont_384_prologue + +.rva .LSEH_body_from_mont_384 +.rva .LSEH_epilogue_from_mont_384 +.rva .LSEH_info_from_mont_384_body + +.rva .LSEH_epilogue_from_mont_384 +.rva .LSEH_end_from_mont_384 +.rva .LSEH_info_from_mont_384_epilogue + +.rva .LSEH_begin_sgn0_pty_mont_384 +.rva .LSEH_body_sgn0_pty_mont_384 +.rva .LSEH_info_sgn0_pty_mont_384_prologue + +.rva .LSEH_body_sgn0_pty_mont_384 +.rva .LSEH_epilogue_sgn0_pty_mont_384 +.rva .LSEH_info_sgn0_pty_mont_384_body + +.rva .LSEH_epilogue_sgn0_pty_mont_384 +.rva .LSEH_end_sgn0_pty_mont_384 +.rva .LSEH_info_sgn0_pty_mont_384_epilogue + +.rva .LSEH_begin_sgn0_pty_mont_384x +.rva .LSEH_body_sgn0_pty_mont_384x +.rva .LSEH_info_sgn0_pty_mont_384x_prologue + +.rva .LSEH_body_sgn0_pty_mont_384x +.rva .LSEH_epilogue_sgn0_pty_mont_384x +.rva .LSEH_info_sgn0_pty_mont_384x_body + +.rva .LSEH_epilogue_sgn0_pty_mont_384x +.rva .LSEH_end_sgn0_pty_mont_384x +.rva .LSEH_info_sgn0_pty_mont_384x_epilogue + +.rva .LSEH_begin_mul_mont_384 +.rva .LSEH_body_mul_mont_384 +.rva .LSEH_info_mul_mont_384_prologue + +.rva .LSEH_body_mul_mont_384 +.rva .LSEH_epilogue_mul_mont_384 +.rva .LSEH_info_mul_mont_384_body + +.rva .LSEH_epilogue_mul_mont_384 +.rva .LSEH_end_mul_mont_384 +.rva .LSEH_info_mul_mont_384_epilogue + +.rva .LSEH_begin_sqr_n_mul_mont_384 +.rva .LSEH_body_sqr_n_mul_mont_384 +.rva .LSEH_info_sqr_n_mul_mont_384_prologue + +.rva .LSEH_body_sqr_n_mul_mont_384 +.rva .LSEH_epilogue_sqr_n_mul_mont_384 +.rva .LSEH_info_sqr_n_mul_mont_384_body + +.rva .LSEH_epilogue_sqr_n_mul_mont_384 +.rva .LSEH_end_sqr_n_mul_mont_384 +.rva .LSEH_info_sqr_n_mul_mont_384_epilogue + +.rva .LSEH_begin_sqr_n_mul_mont_383 +.rva .LSEH_body_sqr_n_mul_mont_383 +.rva .LSEH_info_sqr_n_mul_mont_383_prologue + +.rva .LSEH_body_sqr_n_mul_mont_383 +.rva .LSEH_epilogue_sqr_n_mul_mont_383 +.rva .LSEH_info_sqr_n_mul_mont_383_body + +.rva .LSEH_epilogue_sqr_n_mul_mont_383 +.rva .LSEH_end_sqr_n_mul_mont_383 +.rva .LSEH_info_sqr_n_mul_mont_383_epilogue + +.rva .LSEH_begin_sqr_mont_382x +.rva .LSEH_body_sqr_mont_382x +.rva .LSEH_info_sqr_mont_382x_prologue + +.rva .LSEH_body_sqr_mont_382x +.rva .LSEH_epilogue_sqr_mont_382x +.rva .LSEH_info_sqr_mont_382x_body + +.rva .LSEH_epilogue_sqr_mont_382x +.rva .LSEH_end_sqr_mont_382x +.rva .LSEH_info_sqr_mont_382x_epilogue + +.section .xdata +.p2align 3 +.LSEH_info_mul_mont_384x_prologue: +.byte 1,0,5,0x0b +.byte 0,0x74,1,0 +.byte 0,0x64,2,0 +.byte 0,0xb3 +.byte 0,0 +.long 0,0 +.LSEH_info_mul_mont_384x_body: +.byte 1,0,18,0 +.byte 0x00,0xf4,0x29,0x00 +.byte 0x00,0xe4,0x2a,0x00 +.byte 0x00,0xd4,0x2b,0x00 +.byte 0x00,0xc4,0x2c,0x00 +.byte 0x00,0x34,0x2d,0x00 +.byte 0x00,0x54,0x2e,0x00 +.byte 0x00,0x74,0x30,0x00 +.byte 0x00,0x64,0x31,0x00 +.byte 0x00,0x01,0x2f,0x00 +.byte 0x00,0x00,0x00,0x00 +.byte 0x00,0x00,0x00,0x00 +.LSEH_info_mul_mont_384x_epilogue: +.byte 1,0,4,0 +.byte 0x00,0x74,0x01,0x00 +.byte 0x00,0x64,0x02,0x00 +.byte 0x00,0x00,0x00,0x00 + +.LSEH_info_sqr_mont_384x_prologue: +.byte 1,0,5,0x0b +.byte 0,0x74,1,0 +.byte 0,0x64,2,0 +.byte 0,0xb3 +.byte 0,0 +.long 0,0 +.LSEH_info_sqr_mont_384x_body: +.byte 1,0,18,0 +.byte 0x00,0xf4,0x11,0x00 +.byte 0x00,0xe4,0x12,0x00 +.byte 0x00,0xd4,0x13,0x00 +.byte 0x00,0xc4,0x14,0x00 +.byte 0x00,0x34,0x15,0x00 +.byte 0x00,0x54,0x16,0x00 +.byte 0x00,0x74,0x18,0x00 +.byte 0x00,0x64,0x19,0x00 +.byte 0x00,0x01,0x17,0x00 +.byte 0x00,0x00,0x00,0x00 +.byte 0x00,0x00,0x00,0x00 +.LSEH_info_sqr_mont_384x_epilogue: +.byte 1,0,4,0 +.byte 0x00,0x74,0x01,0x00 +.byte 0x00,0x64,0x02,0x00 +.byte 0x00,0x00,0x00,0x00 + +.LSEH_info_mul_382x_prologue: +.byte 1,0,5,0x0b +.byte 0,0x74,1,0 +.byte 0,0x64,2,0 +.byte 0,0xb3 +.byte 0,0 +.long 0,0 +.LSEH_info_mul_382x_body: +.byte 1,0,18,0 +.byte 0x00,0xf4,0x11,0x00 +.byte 0x00,0xe4,0x12,0x00 +.byte 0x00,0xd4,0x13,0x00 +.byte 0x00,0xc4,0x14,0x00 +.byte 0x00,0x34,0x15,0x00 +.byte 0x00,0x54,0x16,0x00 +.byte 0x00,0x74,0x18,0x00 +.byte 0x00,0x64,0x19,0x00 +.byte 0x00,0x01,0x17,0x00 +.byte 0x00,0x00,0x00,0x00 +.byte 0x00,0x00,0x00,0x00 +.LSEH_info_mul_382x_epilogue: +.byte 1,0,4,0 +.byte 0x00,0x74,0x01,0x00 +.byte 0x00,0x64,0x02,0x00 +.byte 0x00,0x00,0x00,0x00 + +.LSEH_info_sqr_382x_prologue: +.byte 1,0,5,0x0b +.byte 0,0x74,1,0 +.byte 0,0x64,2,0 +.byte 0,0xb3 +.byte 0,0 +.long 0,0 +.LSEH_info_sqr_382x_body: +.byte 1,0,17,0 +.byte 0x00,0xf4,0x01,0x00 +.byte 0x00,0xe4,0x02,0x00 +.byte 0x00,0xd4,0x03,0x00 +.byte 0x00,0xc4,0x04,0x00 +.byte 0x00,0x34,0x05,0x00 +.byte 0x00,0x54,0x06,0x00 +.byte 0x00,0x74,0x08,0x00 +.byte 0x00,0x64,0x09,0x00 +.byte 0x00,0x62 +.byte 0x00,0x00,0x00,0x00,0x00,0x00 +.byte 0x00,0x00,0x00,0x00 +.LSEH_info_sqr_382x_epilogue: +.byte 1,0,4,0 +.byte 0x00,0x74,0x01,0x00 +.byte 0x00,0x64,0x02,0x00 +.byte 0x00,0x00,0x00,0x00 + +.LSEH_info_mul_384_prologue: +.byte 1,0,5,0x0b +.byte 0,0x74,1,0 +.byte 0,0x64,2,0 +.byte 0,0xb3 +.byte 0,0 +.long 0,0 +.LSEH_info_mul_384_body: +.byte 1,0,11,0 +.byte 0x00,0xc4,0x00,0x00 +.byte 0x00,0x34,0x01,0x00 +.byte 0x00,0x54,0x02,0x00 +.byte 0x00,0x74,0x04,0x00 +.byte 0x00,0x64,0x05,0x00 +.byte 0x00,0x22 +.byte 0x00,0x00,0x00,0x00,0x00,0x00 +.LSEH_info_mul_384_epilogue: +.byte 1,0,4,0 +.byte 0x00,0x74,0x01,0x00 +.byte 0x00,0x64,0x02,0x00 +.byte 0x00,0x00,0x00,0x00 + +.LSEH_info_sqr_384_prologue: +.byte 1,0,5,0x0b +.byte 0,0x74,1,0 +.byte 0,0x64,2,0 +.byte 0,0xb3 +.byte 0,0 +.long 0,0 +.LSEH_info_sqr_384_body: +.byte 1,0,17,0 +.byte 0x00,0xf4,0x01,0x00 +.byte 0x00,0xe4,0x02,0x00 +.byte 0x00,0xd4,0x03,0x00 +.byte 0x00,0xc4,0x04,0x00 +.byte 0x00,0x34,0x05,0x00 +.byte 0x00,0x54,0x06,0x00 +.byte 0x00,0x74,0x08,0x00 +.byte 0x00,0x64,0x09,0x00 +.byte 0x00,0x62 +.byte 0x00,0x00,0x00,0x00,0x00,0x00 +.byte 0x00,0x00,0x00,0x00 +.LSEH_info_sqr_384_epilogue: +.byte 1,0,4,0 +.byte 0x00,0x74,0x01,0x00 +.byte 0x00,0x64,0x02,0x00 +.byte 0x00,0x00,0x00,0x00 + +.LSEH_info_sqr_mont_384_prologue: +.byte 1,0,5,0x0b +.byte 0,0x74,1,0 +.byte 0,0x64,2,0 +.byte 0,0xb3 +.byte 0,0 +.long 0,0 +.LSEH_info_sqr_mont_384_body: +.byte 1,0,18,0 +.byte 0x00,0xf4,0x0f,0x00 +.byte 0x00,0xe4,0x10,0x00 +.byte 0x00,0xd4,0x11,0x00 +.byte 0x00,0xc4,0x12,0x00 +.byte 0x00,0x34,0x13,0x00 +.byte 0x00,0x54,0x14,0x00 +.byte 0x00,0x74,0x16,0x00 +.byte 0x00,0x64,0x17,0x00 +.byte 0x00,0x01,0x15,0x00 +.byte 0x00,0x00,0x00,0x00 +.byte 0x00,0x00,0x00,0x00 +.LSEH_info_sqr_mont_384_epilogue: +.byte 1,0,4,0 +.byte 0x00,0x74,0x01,0x00 +.byte 0x00,0x64,0x02,0x00 +.byte 0x00,0x00,0x00,0x00 + +.LSEH_info_redc_mont_384_prologue: +.byte 1,0,5,0x0b +.byte 0,0x74,1,0 +.byte 0,0x64,2,0 +.byte 0,0xb3 +.byte 0,0 +.long 0,0 +.LSEH_info_redc_mont_384_body: +.byte 1,0,17,0 +.byte 0x00,0xf4,0x01,0x00 +.byte 0x00,0xe4,0x02,0x00 +.byte 0x00,0xd4,0x03,0x00 +.byte 0x00,0xc4,0x04,0x00 +.byte 0x00,0x34,0x05,0x00 +.byte 0x00,0x54,0x06,0x00 +.byte 0x00,0x74,0x08,0x00 +.byte 0x00,0x64,0x09,0x00 +.byte 0x00,0x62 +.byte 0x00,0x00,0x00,0x00,0x00,0x00 +.byte 0x00,0x00,0x00,0x00 +.LSEH_info_redc_mont_384_epilogue: +.byte 1,0,4,0 +.byte 0x00,0x74,0x01,0x00 +.byte 0x00,0x64,0x02,0x00 +.byte 0x00,0x00,0x00,0x00 + +.LSEH_info_from_mont_384_prologue: +.byte 1,0,5,0x0b +.byte 0,0x74,1,0 +.byte 0,0x64,2,0 +.byte 0,0xb3 +.byte 0,0 +.long 0,0 +.LSEH_info_from_mont_384_body: +.byte 1,0,17,0 +.byte 0x00,0xf4,0x01,0x00 +.byte 0x00,0xe4,0x02,0x00 +.byte 0x00,0xd4,0x03,0x00 +.byte 0x00,0xc4,0x04,0x00 +.byte 0x00,0x34,0x05,0x00 +.byte 0x00,0x54,0x06,0x00 +.byte 0x00,0x74,0x08,0x00 +.byte 0x00,0x64,0x09,0x00 +.byte 0x00,0x62 +.byte 0x00,0x00,0x00,0x00,0x00,0x00 +.byte 0x00,0x00,0x00,0x00 +.LSEH_info_from_mont_384_epilogue: +.byte 1,0,4,0 +.byte 0x00,0x74,0x01,0x00 +.byte 0x00,0x64,0x02,0x00 +.byte 0x00,0x00,0x00,0x00 + +.LSEH_info_sgn0_pty_mont_384_prologue: +.byte 1,0,5,0x0b +.byte 0,0x74,1,0 +.byte 0,0x64,2,0 +.byte 0,0xb3 +.byte 0,0 +.long 0,0 +.LSEH_info_sgn0_pty_mont_384_body: +.byte 1,0,17,0 +.byte 0x00,0xf4,0x01,0x00 +.byte 0x00,0xe4,0x02,0x00 +.byte 0x00,0xd4,0x03,0x00 +.byte 0x00,0xc4,0x04,0x00 +.byte 0x00,0x34,0x05,0x00 +.byte 0x00,0x54,0x06,0x00 +.byte 0x00,0x74,0x08,0x00 +.byte 0x00,0x64,0x09,0x00 +.byte 0x00,0x62 +.byte 0x00,0x00,0x00,0x00,0x00,0x00 +.byte 0x00,0x00,0x00,0x00 +.LSEH_info_sgn0_pty_mont_384_epilogue: +.byte 1,0,4,0 +.byte 0x00,0x74,0x01,0x00 +.byte 0x00,0x64,0x02,0x00 +.byte 0x00,0x00,0x00,0x00 + +.LSEH_info_sgn0_pty_mont_384x_prologue: +.byte 1,0,5,0x0b +.byte 0,0x74,1,0 +.byte 0,0x64,2,0 +.byte 0,0xb3 +.byte 0,0 +.long 0,0 +.LSEH_info_sgn0_pty_mont_384x_body: +.byte 1,0,17,0 +.byte 0x00,0xf4,0x01,0x00 +.byte 0x00,0xe4,0x02,0x00 +.byte 0x00,0xd4,0x03,0x00 +.byte 0x00,0xc4,0x04,0x00 +.byte 0x00,0x34,0x05,0x00 +.byte 0x00,0x54,0x06,0x00 +.byte 0x00,0x74,0x08,0x00 +.byte 0x00,0x64,0x09,0x00 +.byte 0x00,0x62 +.byte 0x00,0x00,0x00,0x00,0x00,0x00 +.byte 0x00,0x00,0x00,0x00 +.LSEH_info_sgn0_pty_mont_384x_epilogue: +.byte 1,0,4,0 +.byte 0x00,0x74,0x01,0x00 +.byte 0x00,0x64,0x02,0x00 +.byte 0x00,0x00,0x00,0x00 + +.LSEH_info_mul_mont_384_prologue: +.byte 1,0,5,0x0b +.byte 0,0x74,1,0 +.byte 0,0x64,2,0 +.byte 0,0xb3 +.byte 0,0 +.long 0,0 +.LSEH_info_mul_mont_384_body: +.byte 1,0,17,0 +.byte 0x00,0xf4,0x03,0x00 +.byte 0x00,0xe4,0x04,0x00 +.byte 0x00,0xd4,0x05,0x00 +.byte 0x00,0xc4,0x06,0x00 +.byte 0x00,0x34,0x07,0x00 +.byte 0x00,0x54,0x08,0x00 +.byte 0x00,0x74,0x0a,0x00 +.byte 0x00,0x64,0x0b,0x00 +.byte 0x00,0x82 +.byte 0x00,0x00,0x00,0x00,0x00,0x00 +.byte 0x00,0x00,0x00,0x00 +.LSEH_info_mul_mont_384_epilogue: +.byte 1,0,4,0 +.byte 0x00,0x74,0x01,0x00 +.byte 0x00,0x64,0x02,0x00 +.byte 0x00,0x00,0x00,0x00 + +.LSEH_info_sqr_n_mul_mont_384_prologue: +.byte 1,0,5,0x0b +.byte 0,0x74,1,0 +.byte 0,0x64,2,0 +.byte 0,0xb3 +.byte 0,0 +.long 0,0 +.LSEH_info_sqr_n_mul_mont_384_body: +.byte 1,0,18,0 +.byte 0x00,0xf4,0x11,0x00 +.byte 0x00,0xe4,0x12,0x00 +.byte 0x00,0xd4,0x13,0x00 +.byte 0x00,0xc4,0x14,0x00 +.byte 0x00,0x34,0x15,0x00 +.byte 0x00,0x54,0x16,0x00 +.byte 0x00,0x74,0x18,0x00 +.byte 0x00,0x64,0x19,0x00 +.byte 0x00,0x01,0x17,0x00 +.byte 0x00,0x00,0x00,0x00 +.byte 0x00,0x00,0x00,0x00 +.LSEH_info_sqr_n_mul_mont_384_epilogue: +.byte 1,0,4,0 +.byte 0x00,0x74,0x01,0x00 +.byte 0x00,0x64,0x02,0x00 +.byte 0x00,0x00,0x00,0x00 + +.LSEH_info_sqr_n_mul_mont_383_prologue: +.byte 1,0,5,0x0b +.byte 0,0x74,1,0 +.byte 0,0x64,2,0 +.byte 0,0xb3 +.byte 0,0 +.long 0,0 +.LSEH_info_sqr_n_mul_mont_383_body: +.byte 1,0,18,0 +.byte 0x00,0xf4,0x11,0x00 +.byte 0x00,0xe4,0x12,0x00 +.byte 0x00,0xd4,0x13,0x00 +.byte 0x00,0xc4,0x14,0x00 +.byte 0x00,0x34,0x15,0x00 +.byte 0x00,0x54,0x16,0x00 +.byte 0x00,0x74,0x18,0x00 +.byte 0x00,0x64,0x19,0x00 +.byte 0x00,0x01,0x17,0x00 +.byte 0x00,0x00,0x00,0x00 +.byte 0x00,0x00,0x00,0x00 +.LSEH_info_sqr_n_mul_mont_383_epilogue: +.byte 1,0,4,0 +.byte 0x00,0x74,0x01,0x00 +.byte 0x00,0x64,0x02,0x00 +.byte 0x00,0x00,0x00,0x00 + +.LSEH_info_sqr_mont_382x_prologue: +.byte 1,0,5,0x0b +.byte 0,0x74,1,0 +.byte 0,0x64,2,0 +.byte 0,0xb3 +.byte 0,0 +.long 0,0 +.LSEH_info_sqr_mont_382x_body: +.byte 1,0,18,0 +.byte 0x00,0xf4,0x11,0x00 +.byte 0x00,0xe4,0x12,0x00 +.byte 0x00,0xd4,0x13,0x00 +.byte 0x00,0xc4,0x14,0x00 +.byte 0x00,0x34,0x15,0x00 +.byte 0x00,0x54,0x16,0x00 +.byte 0x00,0x74,0x18,0x00 +.byte 0x00,0x64,0x19,0x00 +.byte 0x00,0x01,0x17,0x00 +.byte 0x00,0x00,0x00,0x00 +.byte 0x00,0x00,0x00,0x00 +.LSEH_info_sqr_mont_382x_epilogue: +.byte 1,0,4,0 +.byte 0x00,0x74,0x01,0x00 +.byte 0x00,0x64,0x02,0x00 +.byte 0x00,0x00,0x00,0x00 + diff --git a/crypto/blst_src/build/coff/mulx_mont_256-x86_64.s b/crypto/blst_src/build/coff/mulx_mont_256-x86_64.s new file mode 100644 index 00000000000..cba65569c52 --- /dev/null +++ b/crypto/blst_src/build/coff/mulx_mont_256-x86_64.s @@ -0,0 +1,796 @@ +.text + +.globl mulx_mont_sparse_256 + +.def mulx_mont_sparse_256; .scl 2; .type 32; .endef +.p2align 5 +mulx_mont_sparse_256: + .byte 0xf3,0x0f,0x1e,0xfa + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + movq %rsp,%r11 +.LSEH_begin_mulx_mont_sparse_256: + + + movq %rcx,%rdi + movq %rdx,%rsi + movq %r8,%rdx + movq %r9,%rcx + movq 40(%rsp),%r8 +mul_mont_sparse_256$1: + pushq %rbp + + pushq %rbx + + pushq %r12 + + pushq %r13 + + pushq %r14 + + pushq %r15 + + subq $8,%rsp + +.LSEH_body_mulx_mont_sparse_256: + + + movq %rdx,%rbx + movq 0(%rdx),%rdx + movq 0(%rsi),%r14 + movq 8(%rsi),%r15 + movq 16(%rsi),%rbp + movq 24(%rsi),%r9 + leaq -128(%rsi),%rsi + leaq -128(%rcx),%rcx + + mulxq %r14,%rax,%r11 + call __mulx_mont_sparse_256 + + movq 8(%rsp),%r15 + + movq 16(%rsp),%r14 + + movq 24(%rsp),%r13 + + movq 32(%rsp),%r12 + + movq 40(%rsp),%rbx + + movq 48(%rsp),%rbp + + leaq 56(%rsp),%rsp + +.LSEH_epilogue_mulx_mont_sparse_256: + mov 8(%rsp),%rdi + mov 16(%rsp),%rsi + + .byte 0xf3,0xc3 + +.LSEH_end_mulx_mont_sparse_256: + +.globl sqrx_mont_sparse_256 + +.def sqrx_mont_sparse_256; .scl 2; .type 32; .endef +.p2align 5 +sqrx_mont_sparse_256: + .byte 0xf3,0x0f,0x1e,0xfa + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + movq %rsp,%r11 +.LSEH_begin_sqrx_mont_sparse_256: + + + movq %rcx,%rdi + movq %rdx,%rsi + movq %r8,%rdx + movq %r9,%rcx +sqr_mont_sparse_256$1: + pushq %rbp + + pushq %rbx + + pushq %r12 + + pushq %r13 + + pushq %r14 + + pushq %r15 + + subq $8,%rsp + +.LSEH_body_sqrx_mont_sparse_256: + + + movq %rsi,%rbx + movq %rcx,%r8 + movq %rdx,%rcx + movq 0(%rsi),%rdx + movq 8(%rsi),%r15 + movq 16(%rsi),%rbp + movq 24(%rsi),%r9 + leaq -128(%rbx),%rsi + leaq -128(%rcx),%rcx + + mulxq %rdx,%rax,%r11 + call __mulx_mont_sparse_256 + + movq 8(%rsp),%r15 + + movq 16(%rsp),%r14 + + movq 24(%rsp),%r13 + + movq 32(%rsp),%r12 + + movq 40(%rsp),%rbx + + movq 48(%rsp),%rbp + + leaq 56(%rsp),%rsp + +.LSEH_epilogue_sqrx_mont_sparse_256: + mov 8(%rsp),%rdi + mov 16(%rsp),%rsi + + .byte 0xf3,0xc3 + +.LSEH_end_sqrx_mont_sparse_256: +.def __mulx_mont_sparse_256; .scl 3; .type 32; .endef +.p2align 5 +__mulx_mont_sparse_256: + .byte 0xf3,0x0f,0x1e,0xfa + + mulxq %r15,%r15,%r12 + mulxq %rbp,%rbp,%r13 + addq %r15,%r11 + mulxq %r9,%r9,%r14 + movq 8(%rbx),%rdx + adcq %rbp,%r12 + adcq %r9,%r13 + adcq $0,%r14 + + movq %rax,%r10 + imulq %r8,%rax + + + xorq %r15,%r15 + mulxq 0+128(%rsi),%rbp,%r9 + adoxq %rbp,%r11 + adcxq %r9,%r12 + + mulxq 8+128(%rsi),%rbp,%r9 + adoxq %rbp,%r12 + adcxq %r9,%r13 + + mulxq 16+128(%rsi),%rbp,%r9 + adoxq %rbp,%r13 + adcxq %r9,%r14 + + mulxq 24+128(%rsi),%rbp,%r9 + movq %rax,%rdx + adoxq %rbp,%r14 + adcxq %r15,%r9 + adoxq %r9,%r15 + + + mulxq 0+128(%rcx),%rbp,%rax + adcxq %rbp,%r10 + adoxq %r11,%rax + + mulxq 8+128(%rcx),%rbp,%r9 + adcxq %rbp,%rax + adoxq %r9,%r12 + + mulxq 16+128(%rcx),%rbp,%r9 + adcxq %rbp,%r12 + adoxq %r9,%r13 + + mulxq 24+128(%rcx),%rbp,%r9 + movq 16(%rbx),%rdx + adcxq %rbp,%r13 + adoxq %r9,%r14 + adcxq %r10,%r14 + adoxq %r10,%r15 + adcxq %r10,%r15 + adoxq %r10,%r10 + adcq $0,%r10 + movq %rax,%r11 + imulq %r8,%rax + + + xorq %rbp,%rbp + mulxq 0+128(%rsi),%rbp,%r9 + adoxq %rbp,%r12 + adcxq %r9,%r13 + + mulxq 8+128(%rsi),%rbp,%r9 + adoxq %rbp,%r13 + adcxq %r9,%r14 + + mulxq 16+128(%rsi),%rbp,%r9 + adoxq %rbp,%r14 + adcxq %r9,%r15 + + mulxq 24+128(%rsi),%rbp,%r9 + movq %rax,%rdx + adoxq %rbp,%r15 + adcxq %r10,%r9 + adoxq %r9,%r10 + + + mulxq 0+128(%rcx),%rbp,%rax + adcxq %rbp,%r11 + adoxq %r12,%rax + + mulxq 8+128(%rcx),%rbp,%r9 + adcxq %rbp,%rax + adoxq %r9,%r13 + + mulxq 16+128(%rcx),%rbp,%r9 + adcxq %rbp,%r13 + adoxq %r9,%r14 + + mulxq 24+128(%rcx),%rbp,%r9 + movq 24(%rbx),%rdx + adcxq %rbp,%r14 + adoxq %r9,%r15 + adcxq %r11,%r15 + adoxq %r11,%r10 + adcxq %r11,%r10 + adoxq %r11,%r11 + adcq $0,%r11 + movq %rax,%r12 + imulq %r8,%rax + + + xorq %rbp,%rbp + mulxq 0+128(%rsi),%rbp,%r9 + adoxq %rbp,%r13 + adcxq %r9,%r14 + + mulxq 8+128(%rsi),%rbp,%r9 + adoxq %rbp,%r14 + adcxq %r9,%r15 + + mulxq 16+128(%rsi),%rbp,%r9 + adoxq %rbp,%r15 + adcxq %r9,%r10 + + mulxq 24+128(%rsi),%rbp,%r9 + movq %rax,%rdx + adoxq %rbp,%r10 + adcxq %r11,%r9 + adoxq %r9,%r11 + + + mulxq 0+128(%rcx),%rbp,%rax + adcxq %rbp,%r12 + adoxq %r13,%rax + + mulxq 8+128(%rcx),%rbp,%r9 + adcxq %rbp,%rax + adoxq %r9,%r14 + + mulxq 16+128(%rcx),%rbp,%r9 + adcxq %rbp,%r14 + adoxq %r9,%r15 + + mulxq 24+128(%rcx),%rbp,%r9 + movq %rax,%rdx + adcxq %rbp,%r15 + adoxq %r9,%r10 + adcxq %r12,%r10 + adoxq %r12,%r11 + adcxq %r12,%r11 + adoxq %r12,%r12 + adcq $0,%r12 + imulq %r8,%rdx + + + xorq %rbp,%rbp + mulxq 0+128(%rcx),%r13,%r9 + adcxq %rax,%r13 + adoxq %r9,%r14 + + mulxq 8+128(%rcx),%rbp,%r9 + adcxq %rbp,%r14 + adoxq %r9,%r15 + + mulxq 16+128(%rcx),%rbp,%r9 + adcxq %rbp,%r15 + adoxq %r9,%r10 + + mulxq 24+128(%rcx),%rbp,%r9 + movq %r14,%rdx + leaq 128(%rcx),%rcx + adcxq %rbp,%r10 + adoxq %r9,%r11 + movq %r15,%rax + adcxq %r13,%r11 + adoxq %r13,%r12 + adcq $0,%r12 + + + + + movq %r10,%rbp + subq 0(%rcx),%r14 + sbbq 8(%rcx),%r15 + sbbq 16(%rcx),%r10 + movq %r11,%r9 + sbbq 24(%rcx),%r11 + sbbq $0,%r12 + + cmovcq %rdx,%r14 + cmovcq %rax,%r15 + cmovcq %rbp,%r10 + movq %r14,0(%rdi) + cmovcq %r9,%r11 + movq %r15,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + + .byte 0xf3,0xc3 + +.globl fromx_mont_256 + +.def fromx_mont_256; .scl 2; .type 32; .endef +.p2align 5 +fromx_mont_256: + .byte 0xf3,0x0f,0x1e,0xfa + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + movq %rsp,%r11 +.LSEH_begin_fromx_mont_256: + + + movq %rcx,%rdi + movq %rdx,%rsi + movq %r8,%rdx + movq %r9,%rcx +from_mont_256$1: + pushq %rbp + + pushq %rbx + + pushq %r12 + + pushq %r13 + + pushq %r14 + + pushq %r15 + + subq $8,%rsp + +.LSEH_body_fromx_mont_256: + + + movq %rdx,%rbx + call __mulx_by_1_mont_256 + + + + + + movq %r15,%rdx + movq %r10,%r12 + movq %r11,%r13 + + subq 0(%rbx),%r14 + sbbq 8(%rbx),%r15 + sbbq 16(%rbx),%r10 + sbbq 24(%rbx),%r11 + + cmovncq %r14,%rax + cmovncq %r15,%rdx + cmovncq %r10,%r12 + movq %rax,0(%rdi) + cmovncq %r11,%r13 + movq %rdx,8(%rdi) + movq %r12,16(%rdi) + movq %r13,24(%rdi) + + movq 8(%rsp),%r15 + + movq 16(%rsp),%r14 + + movq 24(%rsp),%r13 + + movq 32(%rsp),%r12 + + movq 40(%rsp),%rbx + + movq 48(%rsp),%rbp + + leaq 56(%rsp),%rsp + +.LSEH_epilogue_fromx_mont_256: + mov 8(%rsp),%rdi + mov 16(%rsp),%rsi + + .byte 0xf3,0xc3 + +.LSEH_end_fromx_mont_256: + +.globl redcx_mont_256 + +.def redcx_mont_256; .scl 2; .type 32; .endef +.p2align 5 +redcx_mont_256: + .byte 0xf3,0x0f,0x1e,0xfa + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + movq %rsp,%r11 +.LSEH_begin_redcx_mont_256: + + + movq %rcx,%rdi + movq %rdx,%rsi + movq %r8,%rdx + movq %r9,%rcx +redc_mont_256$1: + pushq %rbp + + pushq %rbx + + pushq %r12 + + pushq %r13 + + pushq %r14 + + pushq %r15 + + subq $8,%rsp + +.LSEH_body_redcx_mont_256: + + + movq %rdx,%rbx + call __mulx_by_1_mont_256 + + addq 32(%rsi),%r14 + adcq 40(%rsi),%r15 + movq %r14,%rax + adcq 48(%rsi),%r10 + movq %r15,%rdx + adcq 56(%rsi),%r11 + sbbq %rsi,%rsi + + + + + movq %r10,%r12 + subq 0(%rbx),%r14 + sbbq 8(%rbx),%r15 + sbbq 16(%rbx),%r10 + movq %r11,%r13 + sbbq 24(%rbx),%r11 + sbbq $0,%rsi + + cmovncq %r14,%rax + cmovncq %r15,%rdx + cmovncq %r10,%r12 + movq %rax,0(%rdi) + cmovncq %r11,%r13 + movq %rdx,8(%rdi) + movq %r12,16(%rdi) + movq %r13,24(%rdi) + + movq 8(%rsp),%r15 + + movq 16(%rsp),%r14 + + movq 24(%rsp),%r13 + + movq 32(%rsp),%r12 + + movq 40(%rsp),%rbx + + movq 48(%rsp),%rbp + + leaq 56(%rsp),%rsp + +.LSEH_epilogue_redcx_mont_256: + mov 8(%rsp),%rdi + mov 16(%rsp),%rsi + + .byte 0xf3,0xc3 + +.LSEH_end_redcx_mont_256: +.def __mulx_by_1_mont_256; .scl 3; .type 32; .endef +.p2align 5 +__mulx_by_1_mont_256: + .byte 0xf3,0x0f,0x1e,0xfa + + movq 0(%rsi),%rax + movq 8(%rsi),%r11 + movq 16(%rsi),%r12 + movq 24(%rsi),%r13 + + movq %rax,%r14 + imulq %rcx,%rax + movq %rax,%r10 + + mulq 0(%rbx) + addq %rax,%r14 + movq %r10,%rax + adcq %rdx,%r14 + + mulq 8(%rbx) + addq %rax,%r11 + movq %r10,%rax + adcq $0,%rdx + addq %r14,%r11 + adcq $0,%rdx + movq %rdx,%r14 + + mulq 16(%rbx) + movq %r11,%r15 + imulq %rcx,%r11 + addq %rax,%r12 + movq %r10,%rax + adcq $0,%rdx + addq %r14,%r12 + adcq $0,%rdx + movq %rdx,%r14 + + mulq 24(%rbx) + addq %rax,%r13 + movq %r11,%rax + adcq $0,%rdx + addq %r14,%r13 + adcq $0,%rdx + movq %rdx,%r14 + + mulq 0(%rbx) + addq %rax,%r15 + movq %r11,%rax + adcq %rdx,%r15 + + mulq 8(%rbx) + addq %rax,%r12 + movq %r11,%rax + adcq $0,%rdx + addq %r15,%r12 + adcq $0,%rdx + movq %rdx,%r15 + + mulq 16(%rbx) + movq %r12,%r10 + imulq %rcx,%r12 + addq %rax,%r13 + movq %r11,%rax + adcq $0,%rdx + addq %r15,%r13 + adcq $0,%rdx + movq %rdx,%r15 + + mulq 24(%rbx) + addq %rax,%r14 + movq %r12,%rax + adcq $0,%rdx + addq %r15,%r14 + adcq $0,%rdx + movq %rdx,%r15 + + mulq 0(%rbx) + addq %rax,%r10 + movq %r12,%rax + adcq %rdx,%r10 + + mulq 8(%rbx) + addq %rax,%r13 + movq %r12,%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %rdx,%r10 + + mulq 16(%rbx) + movq %r13,%r11 + imulq %rcx,%r13 + addq %rax,%r14 + movq %r12,%rax + adcq $0,%rdx + addq %r10,%r14 + adcq $0,%rdx + movq %rdx,%r10 + + mulq 24(%rbx) + addq %rax,%r15 + movq %r13,%rax + adcq $0,%rdx + addq %r10,%r15 + adcq $0,%rdx + movq %rdx,%r10 + + mulq 0(%rbx) + addq %rax,%r11 + movq %r13,%rax + adcq %rdx,%r11 + + mulq 8(%rbx) + addq %rax,%r14 + movq %r13,%rax + adcq $0,%rdx + addq %r11,%r14 + adcq $0,%rdx + movq %rdx,%r11 + + mulq 16(%rbx) + addq %rax,%r15 + movq %r13,%rax + adcq $0,%rdx + addq %r11,%r15 + adcq $0,%rdx + movq %rdx,%r11 + + mulq 24(%rbx) + addq %rax,%r10 + movq %r14,%rax + adcq $0,%rdx + addq %r11,%r10 + adcq $0,%rdx + movq %rdx,%r11 + .byte 0xf3,0xc3 + +.section .pdata +.p2align 2 +.rva .LSEH_begin_mulx_mont_sparse_256 +.rva .LSEH_body_mulx_mont_sparse_256 +.rva .LSEH_info_mulx_mont_sparse_256_prologue + +.rva .LSEH_body_mulx_mont_sparse_256 +.rva .LSEH_epilogue_mulx_mont_sparse_256 +.rva .LSEH_info_mulx_mont_sparse_256_body + +.rva .LSEH_epilogue_mulx_mont_sparse_256 +.rva .LSEH_end_mulx_mont_sparse_256 +.rva .LSEH_info_mulx_mont_sparse_256_epilogue + +.rva .LSEH_begin_sqrx_mont_sparse_256 +.rva .LSEH_body_sqrx_mont_sparse_256 +.rva .LSEH_info_sqrx_mont_sparse_256_prologue + +.rva .LSEH_body_sqrx_mont_sparse_256 +.rva .LSEH_epilogue_sqrx_mont_sparse_256 +.rva .LSEH_info_sqrx_mont_sparse_256_body + +.rva .LSEH_epilogue_sqrx_mont_sparse_256 +.rva .LSEH_end_sqrx_mont_sparse_256 +.rva .LSEH_info_sqrx_mont_sparse_256_epilogue + +.rva .LSEH_begin_fromx_mont_256 +.rva .LSEH_body_fromx_mont_256 +.rva .LSEH_info_fromx_mont_256_prologue + +.rva .LSEH_body_fromx_mont_256 +.rva .LSEH_epilogue_fromx_mont_256 +.rva .LSEH_info_fromx_mont_256_body + +.rva .LSEH_epilogue_fromx_mont_256 +.rva .LSEH_end_fromx_mont_256 +.rva .LSEH_info_fromx_mont_256_epilogue + +.rva .LSEH_begin_redcx_mont_256 +.rva .LSEH_body_redcx_mont_256 +.rva .LSEH_info_redcx_mont_256_prologue + +.rva .LSEH_body_redcx_mont_256 +.rva .LSEH_epilogue_redcx_mont_256 +.rva .LSEH_info_redcx_mont_256_body + +.rva .LSEH_epilogue_redcx_mont_256 +.rva .LSEH_end_redcx_mont_256 +.rva .LSEH_info_redcx_mont_256_epilogue + +.section .xdata +.p2align 3 +.LSEH_info_mulx_mont_sparse_256_prologue: +.byte 1,0,5,0x0b +.byte 0,0x74,1,0 +.byte 0,0x64,2,0 +.byte 0,0xb3 +.byte 0,0 +.long 0,0 +.LSEH_info_mulx_mont_sparse_256_body: +.byte 1,0,17,0 +.byte 0x00,0xf4,0x01,0x00 +.byte 0x00,0xe4,0x02,0x00 +.byte 0x00,0xd4,0x03,0x00 +.byte 0x00,0xc4,0x04,0x00 +.byte 0x00,0x34,0x05,0x00 +.byte 0x00,0x54,0x06,0x00 +.byte 0x00,0x74,0x08,0x00 +.byte 0x00,0x64,0x09,0x00 +.byte 0x00,0x62 +.byte 0x00,0x00,0x00,0x00,0x00,0x00 +.byte 0x00,0x00,0x00,0x00 +.LSEH_info_mulx_mont_sparse_256_epilogue: +.byte 1,0,4,0 +.byte 0x00,0x74,0x01,0x00 +.byte 0x00,0x64,0x02,0x00 +.byte 0x00,0x00,0x00,0x00 + +.LSEH_info_sqrx_mont_sparse_256_prologue: +.byte 1,0,5,0x0b +.byte 0,0x74,1,0 +.byte 0,0x64,2,0 +.byte 0,0xb3 +.byte 0,0 +.long 0,0 +.LSEH_info_sqrx_mont_sparse_256_body: +.byte 1,0,17,0 +.byte 0x00,0xf4,0x01,0x00 +.byte 0x00,0xe4,0x02,0x00 +.byte 0x00,0xd4,0x03,0x00 +.byte 0x00,0xc4,0x04,0x00 +.byte 0x00,0x34,0x05,0x00 +.byte 0x00,0x54,0x06,0x00 +.byte 0x00,0x74,0x08,0x00 +.byte 0x00,0x64,0x09,0x00 +.byte 0x00,0x62 +.byte 0x00,0x00,0x00,0x00,0x00,0x00 +.byte 0x00,0x00,0x00,0x00 +.LSEH_info_sqrx_mont_sparse_256_epilogue: +.byte 1,0,4,0 +.byte 0x00,0x74,0x01,0x00 +.byte 0x00,0x64,0x02,0x00 +.byte 0x00,0x00,0x00,0x00 + +.LSEH_info_fromx_mont_256_prologue: +.byte 1,0,5,0x0b +.byte 0,0x74,1,0 +.byte 0,0x64,2,0 +.byte 0,0xb3 +.byte 0,0 +.long 0,0 +.LSEH_info_fromx_mont_256_body: +.byte 1,0,17,0 +.byte 0x00,0xf4,0x01,0x00 +.byte 0x00,0xe4,0x02,0x00 +.byte 0x00,0xd4,0x03,0x00 +.byte 0x00,0xc4,0x04,0x00 +.byte 0x00,0x34,0x05,0x00 +.byte 0x00,0x54,0x06,0x00 +.byte 0x00,0x74,0x08,0x00 +.byte 0x00,0x64,0x09,0x00 +.byte 0x00,0x62 +.byte 0x00,0x00,0x00,0x00,0x00,0x00 +.byte 0x00,0x00,0x00,0x00 +.LSEH_info_fromx_mont_256_epilogue: +.byte 1,0,4,0 +.byte 0x00,0x74,0x01,0x00 +.byte 0x00,0x64,0x02,0x00 +.byte 0x00,0x00,0x00,0x00 + +.LSEH_info_redcx_mont_256_prologue: +.byte 1,0,5,0x0b +.byte 0,0x74,1,0 +.byte 0,0x64,2,0 +.byte 0,0xb3 +.byte 0,0 +.long 0,0 +.LSEH_info_redcx_mont_256_body: +.byte 1,0,17,0 +.byte 0x00,0xf4,0x01,0x00 +.byte 0x00,0xe4,0x02,0x00 +.byte 0x00,0xd4,0x03,0x00 +.byte 0x00,0xc4,0x04,0x00 +.byte 0x00,0x34,0x05,0x00 +.byte 0x00,0x54,0x06,0x00 +.byte 0x00,0x74,0x08,0x00 +.byte 0x00,0x64,0x09,0x00 +.byte 0x00,0x62 +.byte 0x00,0x00,0x00,0x00,0x00,0x00 +.byte 0x00,0x00,0x00,0x00 +.LSEH_info_redcx_mont_256_epilogue: +.byte 1,0,4,0 +.byte 0x00,0x74,0x01,0x00 +.byte 0x00,0x64,0x02,0x00 +.byte 0x00,0x00,0x00,0x00 + diff --git a/crypto/blst_src/build/coff/mulx_mont_384-x86_64.s b/crypto/blst_src/build/coff/mulx_mont_384-x86_64.s new file mode 100644 index 00000000000..ce1354f46b4 --- /dev/null +++ b/crypto/blst_src/build/coff/mulx_mont_384-x86_64.s @@ -0,0 +1,3608 @@ +.text + + + + + + + +.def __subx_mod_384x384; .scl 3; .type 32; .endef +.p2align 5 +__subx_mod_384x384: + .byte 0xf3,0x0f,0x1e,0xfa + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + movq 48(%rsi),%r14 + + subq 0(%rdx),%r8 + movq 56(%rsi),%r15 + sbbq 8(%rdx),%r9 + movq 64(%rsi),%rax + sbbq 16(%rdx),%r10 + movq 72(%rsi),%rbx + sbbq 24(%rdx),%r11 + movq 80(%rsi),%rbp + sbbq 32(%rdx),%r12 + movq 88(%rsi),%rsi + sbbq 40(%rdx),%r13 + movq %r8,0(%rdi) + sbbq 48(%rdx),%r14 + movq 0(%rcx),%r8 + movq %r9,8(%rdi) + sbbq 56(%rdx),%r15 + movq 8(%rcx),%r9 + movq %r10,16(%rdi) + sbbq 64(%rdx),%rax + movq 16(%rcx),%r10 + movq %r11,24(%rdi) + sbbq 72(%rdx),%rbx + movq 24(%rcx),%r11 + movq %r12,32(%rdi) + sbbq 80(%rdx),%rbp + movq 32(%rcx),%r12 + movq %r13,40(%rdi) + sbbq 88(%rdx),%rsi + movq 40(%rcx),%r13 + sbbq %rdx,%rdx + + andq %rdx,%r8 + andq %rdx,%r9 + andq %rdx,%r10 + andq %rdx,%r11 + andq %rdx,%r12 + andq %rdx,%r13 + + addq %r8,%r14 + adcq %r9,%r15 + movq %r14,48(%rdi) + adcq %r10,%rax + movq %r15,56(%rdi) + adcq %r11,%rbx + movq %rax,64(%rdi) + adcq %r12,%rbp + movq %rbx,72(%rdi) + adcq %r13,%rsi + movq %rbp,80(%rdi) + movq %rsi,88(%rdi) + + .byte 0xf3,0xc3 + + +.def __addx_mod_384; .scl 3; .type 32; .endef +.p2align 5 +__addx_mod_384: + .byte 0xf3,0x0f,0x1e,0xfa + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + + addq 0(%rdx),%r8 + adcq 8(%rdx),%r9 + adcq 16(%rdx),%r10 + movq %r8,%r14 + adcq 24(%rdx),%r11 + movq %r9,%r15 + adcq 32(%rdx),%r12 + movq %r10,%rax + adcq 40(%rdx),%r13 + movq %r11,%rbx + sbbq %rdx,%rdx + + subq 0(%rcx),%r8 + sbbq 8(%rcx),%r9 + movq %r12,%rbp + sbbq 16(%rcx),%r10 + sbbq 24(%rcx),%r11 + sbbq 32(%rcx),%r12 + movq %r13,%rsi + sbbq 40(%rcx),%r13 + sbbq $0,%rdx + + cmovcq %r14,%r8 + cmovcq %r15,%r9 + cmovcq %rax,%r10 + movq %r8,0(%rdi) + cmovcq %rbx,%r11 + movq %r9,8(%rdi) + cmovcq %rbp,%r12 + movq %r10,16(%rdi) + cmovcq %rsi,%r13 + movq %r11,24(%rdi) + movq %r12,32(%rdi) + movq %r13,40(%rdi) + + .byte 0xf3,0xc3 + + +.def __subx_mod_384; .scl 3; .type 32; .endef +.p2align 5 +__subx_mod_384: + .byte 0xf3,0x0f,0x1e,0xfa + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + +__subx_mod_384_a_is_loaded: + subq 0(%rdx),%r8 + movq 0(%rcx),%r14 + sbbq 8(%rdx),%r9 + movq 8(%rcx),%r15 + sbbq 16(%rdx),%r10 + movq 16(%rcx),%rax + sbbq 24(%rdx),%r11 + movq 24(%rcx),%rbx + sbbq 32(%rdx),%r12 + movq 32(%rcx),%rbp + sbbq 40(%rdx),%r13 + movq 40(%rcx),%rsi + sbbq %rdx,%rdx + + andq %rdx,%r14 + andq %rdx,%r15 + andq %rdx,%rax + andq %rdx,%rbx + andq %rdx,%rbp + andq %rdx,%rsi + + addq %r14,%r8 + adcq %r15,%r9 + movq %r8,0(%rdi) + adcq %rax,%r10 + movq %r9,8(%rdi) + adcq %rbx,%r11 + movq %r10,16(%rdi) + adcq %rbp,%r12 + movq %r11,24(%rdi) + adcq %rsi,%r13 + movq %r12,32(%rdi) + movq %r13,40(%rdi) + + .byte 0xf3,0xc3 + +.globl mulx_mont_384x + +.def mulx_mont_384x; .scl 2; .type 32; .endef +.p2align 5 +mulx_mont_384x: + .byte 0xf3,0x0f,0x1e,0xfa + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + movq %rsp,%r11 +.LSEH_begin_mulx_mont_384x: + + + movq %rcx,%rdi + movq %rdx,%rsi + movq %r8,%rdx + movq %r9,%rcx + movq 40(%rsp),%r8 +mul_mont_384x$1: + pushq %rbp + + pushq %rbx + + pushq %r12 + + pushq %r13 + + pushq %r14 + + pushq %r15 + + subq $328,%rsp + +.LSEH_body_mulx_mont_384x: + + + movq %rdx,%rbx + movq %rdi,32(%rsp) + movq %rsi,24(%rsp) + movq %rdx,16(%rsp) + movq %rcx,8(%rsp) + movq %r8,0(%rsp) + + + + + leaq 40(%rsp),%rdi + call __mulx_384 + + + leaq 48(%rbx),%rbx + leaq 128+48(%rsi),%rsi + leaq 96(%rdi),%rdi + call __mulx_384 + + + movq 8(%rsp),%rcx + leaq (%rbx),%rsi + leaq -48(%rbx),%rdx + leaq 40+192+48(%rsp),%rdi + call __addx_mod_384 + + movq 24(%rsp),%rsi + leaq 48(%rsi),%rdx + leaq -48(%rdi),%rdi + call __addx_mod_384 + + leaq (%rdi),%rbx + leaq 48(%rdi),%rsi + call __mulx_384 + + + leaq (%rdi),%rsi + leaq 40(%rsp),%rdx + movq 8(%rsp),%rcx + call __subx_mod_384x384 + + leaq (%rdi),%rsi + leaq -96(%rdi),%rdx + call __subx_mod_384x384 + + + leaq 40(%rsp),%rsi + leaq 40+96(%rsp),%rdx + leaq 40(%rsp),%rdi + call __subx_mod_384x384 + + leaq (%rcx),%rbx + + + leaq 40(%rsp),%rsi + movq 0(%rsp),%rcx + movq 32(%rsp),%rdi + call __mulx_by_1_mont_384 + call __redx_tail_mont_384 + + + leaq 40+192(%rsp),%rsi + movq 0(%rsp),%rcx + leaq 48(%rdi),%rdi + call __mulx_by_1_mont_384 + call __redx_tail_mont_384 + + leaq 328(%rsp),%r8 + movq 0(%r8),%r15 + + movq 8(%r8),%r14 + + movq 16(%r8),%r13 + + movq 24(%r8),%r12 + + movq 32(%r8),%rbx + + movq 40(%r8),%rbp + + leaq 48(%r8),%rsp + +.LSEH_epilogue_mulx_mont_384x: + mov 8(%rsp),%rdi + mov 16(%rsp),%rsi + + .byte 0xf3,0xc3 + +.LSEH_end_mulx_mont_384x: +.globl sqrx_mont_384x + +.def sqrx_mont_384x; .scl 2; .type 32; .endef +.p2align 5 +sqrx_mont_384x: + .byte 0xf3,0x0f,0x1e,0xfa + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + movq %rsp,%r11 +.LSEH_begin_sqrx_mont_384x: + + + movq %rcx,%rdi + movq %rdx,%rsi + movq %r8,%rdx + movq %r9,%rcx +sqr_mont_384x$1: + pushq %rbp + + pushq %rbx + + pushq %r12 + + pushq %r13 + + pushq %r14 + + pushq %r15 + + subq $136,%rsp + +.LSEH_body_sqrx_mont_384x: + + + movq %rcx,0(%rsp) + movq %rdx,%rcx + + movq %rdi,16(%rsp) + movq %rsi,24(%rsp) + + + leaq 48(%rsi),%rdx + leaq 32(%rsp),%rdi + call __addx_mod_384 + + + movq 24(%rsp),%rsi + leaq 48(%rsi),%rdx + leaq 32+48(%rsp),%rdi + call __subx_mod_384 + + + movq 24(%rsp),%rsi + leaq 48(%rsi),%rbx + + movq 48(%rsi),%rdx + movq 0(%rsi),%r14 + movq 8(%rsi),%r15 + movq 16(%rsi),%rax + movq 24(%rsi),%r12 + movq 32(%rsi),%rdi + movq 40(%rsi),%rbp + leaq -128(%rsi),%rsi + leaq -128(%rcx),%rcx + + mulxq %r14,%r8,%r9 + call __mulx_mont_384 + addq %rdx,%rdx + adcq %r15,%r15 + adcq %rax,%rax + movq %rdx,%r8 + adcq %r12,%r12 + movq %r15,%r9 + adcq %rdi,%rdi + movq %rax,%r10 + adcq %rbp,%rbp + movq %r12,%r11 + sbbq %rsi,%rsi + + subq 0(%rcx),%rdx + sbbq 8(%rcx),%r15 + movq %rdi,%r13 + sbbq 16(%rcx),%rax + sbbq 24(%rcx),%r12 + sbbq 32(%rcx),%rdi + movq %rbp,%r14 + sbbq 40(%rcx),%rbp + sbbq $0,%rsi + + cmovcq %r8,%rdx + cmovcq %r9,%r15 + cmovcq %r10,%rax + movq %rdx,48(%rbx) + cmovcq %r11,%r12 + movq %r15,56(%rbx) + cmovcq %r13,%rdi + movq %rax,64(%rbx) + cmovcq %r14,%rbp + movq %r12,72(%rbx) + movq %rdi,80(%rbx) + movq %rbp,88(%rbx) + + leaq 32(%rsp),%rsi + leaq 32+48(%rsp),%rbx + + movq 32+48(%rsp),%rdx + movq 32+0(%rsp),%r14 + movq 32+8(%rsp),%r15 + movq 32+16(%rsp),%rax + movq 32+24(%rsp),%r12 + movq 32+32(%rsp),%rdi + movq 32+40(%rsp),%rbp + leaq -128(%rsi),%rsi + leaq -128(%rcx),%rcx + + mulxq %r14,%r8,%r9 + call __mulx_mont_384 + + leaq 136(%rsp),%r8 + movq 0(%r8),%r15 + + movq 8(%r8),%r14 + + movq 16(%r8),%r13 + + movq 24(%r8),%r12 + + movq 32(%r8),%rbx + + movq 40(%r8),%rbp + + leaq 48(%r8),%rsp + +.LSEH_epilogue_sqrx_mont_384x: + mov 8(%rsp),%rdi + mov 16(%rsp),%rsi + + .byte 0xf3,0xc3 + +.LSEH_end_sqrx_mont_384x: + +.globl mulx_382x + +.def mulx_382x; .scl 2; .type 32; .endef +.p2align 5 +mulx_382x: + .byte 0xf3,0x0f,0x1e,0xfa + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + movq %rsp,%r11 +.LSEH_begin_mulx_382x: + + + movq %rcx,%rdi + movq %rdx,%rsi + movq %r8,%rdx + movq %r9,%rcx +mul_382x$1: + pushq %rbp + + pushq %rbx + + pushq %r12 + + pushq %r13 + + pushq %r14 + + pushq %r15 + + subq $136,%rsp + +.LSEH_body_mulx_382x: + + + leaq 96(%rdi),%rdi + movq %rsi,0(%rsp) + movq %rdx,8(%rsp) + movq %rdi,16(%rsp) + movq %rcx,24(%rsp) + + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + + addq 48(%rsi),%r8 + adcq 56(%rsi),%r9 + adcq 64(%rsi),%r10 + adcq 72(%rsi),%r11 + adcq 80(%rsi),%r12 + adcq 88(%rsi),%r13 + + movq %r8,32+0(%rsp) + movq %r9,32+8(%rsp) + movq %r10,32+16(%rsp) + movq %r11,32+24(%rsp) + movq %r12,32+32(%rsp) + movq %r13,32+40(%rsp) + + + movq 0(%rdx),%r8 + movq 8(%rdx),%r9 + movq 16(%rdx),%r10 + movq 24(%rdx),%r11 + movq 32(%rdx),%r12 + movq 40(%rdx),%r13 + + addq 48(%rdx),%r8 + adcq 56(%rdx),%r9 + adcq 64(%rdx),%r10 + adcq 72(%rdx),%r11 + adcq 80(%rdx),%r12 + adcq 88(%rdx),%r13 + + movq %r8,32+48(%rsp) + movq %r9,32+56(%rsp) + movq %r10,32+64(%rsp) + movq %r11,32+72(%rsp) + movq %r12,32+80(%rsp) + movq %r13,32+88(%rsp) + + + leaq 32+0(%rsp),%rsi + leaq 32+48(%rsp),%rbx + call __mulx_384 + + + movq 0(%rsp),%rsi + movq 8(%rsp),%rbx + leaq -96(%rdi),%rdi + call __mulx_384 + + + leaq 48+128(%rsi),%rsi + leaq 48(%rbx),%rbx + leaq 32(%rsp),%rdi + call __mulx_384 + + + movq 16(%rsp),%rsi + leaq 32(%rsp),%rdx + movq 24(%rsp),%rcx + movq %rsi,%rdi + call __subx_mod_384x384 + + + leaq 0(%rdi),%rsi + leaq -96(%rdi),%rdx + call __subx_mod_384x384 + + + leaq -96(%rdi),%rsi + leaq 32(%rsp),%rdx + leaq -96(%rdi),%rdi + call __subx_mod_384x384 + + leaq 136(%rsp),%r8 + movq 0(%r8),%r15 + + movq 8(%r8),%r14 + + movq 16(%r8),%r13 + + movq 24(%r8),%r12 + + movq 32(%r8),%rbx + + movq 40(%r8),%rbp + + leaq 48(%r8),%rsp + +.LSEH_epilogue_mulx_382x: + mov 8(%rsp),%rdi + mov 16(%rsp),%rsi + + .byte 0xf3,0xc3 + +.LSEH_end_mulx_382x: +.globl sqrx_382x + +.def sqrx_382x; .scl 2; .type 32; .endef +.p2align 5 +sqrx_382x: + .byte 0xf3,0x0f,0x1e,0xfa + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + movq %rsp,%r11 +.LSEH_begin_sqrx_382x: + + + movq %rcx,%rdi + movq %rdx,%rsi + movq %r8,%rdx +sqr_382x$1: + pushq %rbp + + pushq %rbx + + pushq %r12 + + pushq %r13 + + pushq %r14 + + pushq %r15 + + pushq %rsi + +.LSEH_body_sqrx_382x: + + + movq %rdx,%rcx + + + movq 0(%rsi),%r14 + movq 8(%rsi),%r15 + movq 16(%rsi),%rax + movq 24(%rsi),%rbx + movq 32(%rsi),%rbp + movq 40(%rsi),%rdx + + movq %r14,%r8 + addq 48(%rsi),%r14 + movq %r15,%r9 + adcq 56(%rsi),%r15 + movq %rax,%r10 + adcq 64(%rsi),%rax + movq %rbx,%r11 + adcq 72(%rsi),%rbx + movq %rbp,%r12 + adcq 80(%rsi),%rbp + movq %rdx,%r13 + adcq 88(%rsi),%rdx + + movq %r14,0(%rdi) + movq %r15,8(%rdi) + movq %rax,16(%rdi) + movq %rbx,24(%rdi) + movq %rbp,32(%rdi) + movq %rdx,40(%rdi) + + + leaq 48(%rsi),%rdx + leaq 48(%rdi),%rdi + call __subx_mod_384_a_is_loaded + + + leaq (%rdi),%rsi + leaq -48(%rdi),%rbx + leaq -48(%rdi),%rdi + call __mulx_384 + + + movq (%rsp),%rsi + leaq 48(%rsi),%rbx + leaq 96(%rdi),%rdi + call __mulx_384 + + movq 0(%rdi),%r8 + movq 8(%rdi),%r9 + movq 16(%rdi),%r10 + movq 24(%rdi),%r11 + movq 32(%rdi),%r12 + movq 40(%rdi),%r13 + movq 48(%rdi),%r14 + movq 56(%rdi),%r15 + movq 64(%rdi),%rax + movq 72(%rdi),%rbx + movq 80(%rdi),%rbp + addq %r8,%r8 + movq 88(%rdi),%rdx + adcq %r9,%r9 + movq %r8,0(%rdi) + adcq %r10,%r10 + movq %r9,8(%rdi) + adcq %r11,%r11 + movq %r10,16(%rdi) + adcq %r12,%r12 + movq %r11,24(%rdi) + adcq %r13,%r13 + movq %r12,32(%rdi) + adcq %r14,%r14 + movq %r13,40(%rdi) + adcq %r15,%r15 + movq %r14,48(%rdi) + adcq %rax,%rax + movq %r15,56(%rdi) + adcq %rbx,%rbx + movq %rax,64(%rdi) + adcq %rbp,%rbp + movq %rbx,72(%rdi) + adcq %rdx,%rdx + movq %rbp,80(%rdi) + movq %rdx,88(%rdi) + + movq 8(%rsp),%r15 + + movq 16(%rsp),%r14 + + movq 24(%rsp),%r13 + + movq 32(%rsp),%r12 + + movq 40(%rsp),%rbx + + movq 48(%rsp),%rbp + + leaq 56(%rsp),%rsp + +.LSEH_epilogue_sqrx_382x: + mov 8(%rsp),%rdi + mov 16(%rsp),%rsi + + .byte 0xf3,0xc3 + +.LSEH_end_sqrx_382x: +.globl mulx_384 + +.def mulx_384; .scl 2; .type 32; .endef +.p2align 5 +mulx_384: + .byte 0xf3,0x0f,0x1e,0xfa + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + movq %rsp,%r11 +.LSEH_begin_mulx_384: + + + movq %rcx,%rdi + movq %rdx,%rsi + movq %r8,%rdx +mul_384$1: + pushq %rbp + + pushq %rbx + + pushq %r12 + + pushq %r13 + + pushq %r14 + + pushq %r15 + +.LSEH_body_mulx_384: + + + movq %rdx,%rbx + call __mulx_384 + + movq 0(%rsp),%r15 + + movq 8(%rsp),%r14 + + movq 16(%rsp),%r13 + + movq 24(%rsp),%r12 + + movq 32(%rsp),%rbx + + movq 40(%rsp),%rbp + + leaq 48(%rsp),%rsp + +.LSEH_epilogue_mulx_384: + mov 8(%rsp),%rdi + mov 16(%rsp),%rsi + + .byte 0xf3,0xc3 + +.LSEH_end_mulx_384: + +.def __mulx_384; .scl 3; .type 32; .endef +.p2align 5 +__mulx_384: + .byte 0xf3,0x0f,0x1e,0xfa + + movq 0(%rbx),%rdx + movq 0(%rsi),%r14 + movq 8(%rsi),%r15 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + leaq -128(%rsi),%rsi + + mulxq %r14,%r9,%rcx + xorq %rbp,%rbp + + mulxq %r15,%r8,%rax + adcxq %rcx,%r8 + movq %r9,0(%rdi) + + mulxq %r10,%r9,%rcx + adcxq %rax,%r9 + + mulxq %r11,%r10,%rax + adcxq %rcx,%r10 + + mulxq %r12,%r11,%rcx + adcxq %rax,%r11 + + mulxq %r13,%r12,%r13 + movq 8(%rbx),%rdx + adcxq %rcx,%r12 + adcxq %rbp,%r13 + mulxq %r14,%rax,%rcx + adcxq %r8,%rax + adoxq %rcx,%r9 + movq %rax,8(%rdi) + + mulxq %r15,%r8,%rcx + adcxq %r9,%r8 + adoxq %rcx,%r10 + + mulxq 128+16(%rsi),%r9,%rax + adcxq %r10,%r9 + adoxq %rax,%r11 + + mulxq 128+24(%rsi),%r10,%rcx + adcxq %r11,%r10 + adoxq %rcx,%r12 + + mulxq 128+32(%rsi),%r11,%rax + adcxq %r12,%r11 + adoxq %r13,%rax + + mulxq 128+40(%rsi),%r12,%r13 + movq 16(%rbx),%rdx + adcxq %rax,%r12 + adoxq %rbp,%r13 + adcxq %rbp,%r13 + mulxq %r14,%rax,%rcx + adcxq %r8,%rax + adoxq %rcx,%r9 + movq %rax,16(%rdi) + + mulxq %r15,%r8,%rcx + adcxq %r9,%r8 + adoxq %rcx,%r10 + + mulxq 128+16(%rsi),%r9,%rax + adcxq %r10,%r9 + adoxq %rax,%r11 + + mulxq 128+24(%rsi),%r10,%rcx + adcxq %r11,%r10 + adoxq %rcx,%r12 + + mulxq 128+32(%rsi),%r11,%rax + adcxq %r12,%r11 + adoxq %r13,%rax + + mulxq 128+40(%rsi),%r12,%r13 + movq 24(%rbx),%rdx + adcxq %rax,%r12 + adoxq %rbp,%r13 + adcxq %rbp,%r13 + mulxq %r14,%rax,%rcx + adcxq %r8,%rax + adoxq %rcx,%r9 + movq %rax,24(%rdi) + + mulxq %r15,%r8,%rcx + adcxq %r9,%r8 + adoxq %rcx,%r10 + + mulxq 128+16(%rsi),%r9,%rax + adcxq %r10,%r9 + adoxq %rax,%r11 + + mulxq 128+24(%rsi),%r10,%rcx + adcxq %r11,%r10 + adoxq %rcx,%r12 + + mulxq 128+32(%rsi),%r11,%rax + adcxq %r12,%r11 + adoxq %r13,%rax + + mulxq 128+40(%rsi),%r12,%r13 + movq 32(%rbx),%rdx + adcxq %rax,%r12 + adoxq %rbp,%r13 + adcxq %rbp,%r13 + mulxq %r14,%rax,%rcx + adcxq %r8,%rax + adoxq %rcx,%r9 + movq %rax,32(%rdi) + + mulxq %r15,%r8,%rcx + adcxq %r9,%r8 + adoxq %rcx,%r10 + + mulxq 128+16(%rsi),%r9,%rax + adcxq %r10,%r9 + adoxq %rax,%r11 + + mulxq 128+24(%rsi),%r10,%rcx + adcxq %r11,%r10 + adoxq %rcx,%r12 + + mulxq 128+32(%rsi),%r11,%rax + adcxq %r12,%r11 + adoxq %r13,%rax + + mulxq 128+40(%rsi),%r12,%r13 + movq 40(%rbx),%rdx + adcxq %rax,%r12 + adoxq %rbp,%r13 + adcxq %rbp,%r13 + mulxq %r14,%rax,%rcx + adcxq %r8,%rax + adoxq %rcx,%r9 + movq %rax,40(%rdi) + + mulxq %r15,%r8,%rcx + adcxq %r9,%r8 + adoxq %rcx,%r10 + + mulxq 128+16(%rsi),%r9,%rax + adcxq %r10,%r9 + adoxq %rax,%r11 + + mulxq 128+24(%rsi),%r10,%rcx + adcxq %r11,%r10 + adoxq %rcx,%r12 + + mulxq 128+32(%rsi),%r11,%rax + adcxq %r12,%r11 + adoxq %r13,%rax + + mulxq 128+40(%rsi),%r12,%r13 + movq %rax,%rdx + adcxq %rax,%r12 + adoxq %rbp,%r13 + adcxq %rbp,%r13 + movq %r8,48(%rdi) + movq %r9,56(%rdi) + movq %r10,64(%rdi) + movq %r11,72(%rdi) + movq %r12,80(%rdi) + movq %r13,88(%rdi) + + .byte 0xf3,0xc3 + +.globl sqrx_384 + +.def sqrx_384; .scl 2; .type 32; .endef +.p2align 5 +sqrx_384: + .byte 0xf3,0x0f,0x1e,0xfa + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + movq %rsp,%r11 +.LSEH_begin_sqrx_384: + + + movq %rcx,%rdi + movq %rdx,%rsi +sqr_384$1: + pushq %rbp + + pushq %rbx + + pushq %r12 + + pushq %r13 + + pushq %r14 + + pushq %r15 + + pushq %rdi + +.LSEH_body_sqrx_384: + + + call __sqrx_384 + + movq 8(%rsp),%r15 + + movq 16(%rsp),%r14 + + movq 24(%rsp),%r13 + + movq 32(%rsp),%r12 + + movq 40(%rsp),%rbx + + movq 48(%rsp),%rbp + + leaq 56(%rsp),%rsp + +.LSEH_epilogue_sqrx_384: + mov 8(%rsp),%rdi + mov 16(%rsp),%rsi + + .byte 0xf3,0xc3 + +.LSEH_end_sqrx_384: +.def __sqrx_384; .scl 3; .type 32; .endef +.p2align 5 +__sqrx_384: + .byte 0xf3,0x0f,0x1e,0xfa + + movq 0(%rsi),%rdx + movq 8(%rsi),%r14 + movq 16(%rsi),%r15 + movq 24(%rsi),%rcx + movq 32(%rsi),%rbx + + + mulxq %r14,%r8,%rdi + movq 40(%rsi),%rbp + mulxq %r15,%r9,%rax + addq %rdi,%r9 + mulxq %rcx,%r10,%rdi + adcq %rax,%r10 + mulxq %rbx,%r11,%rax + adcq %rdi,%r11 + mulxq %rbp,%r12,%r13 + movq %r14,%rdx + adcq %rax,%r12 + adcq $0,%r13 + + + xorq %r14,%r14 + mulxq %r15,%rdi,%rax + adcxq %rdi,%r10 + adoxq %rax,%r11 + + mulxq %rcx,%rdi,%rax + adcxq %rdi,%r11 + adoxq %rax,%r12 + + mulxq %rbx,%rdi,%rax + adcxq %rdi,%r12 + adoxq %rax,%r13 + + mulxq %rbp,%rdi,%rax + movq %r15,%rdx + adcxq %rdi,%r13 + adoxq %r14,%rax + adcxq %rax,%r14 + + + xorq %r15,%r15 + mulxq %rcx,%rdi,%rax + adcxq %rdi,%r12 + adoxq %rax,%r13 + + mulxq %rbx,%rdi,%rax + adcxq %rdi,%r13 + adoxq %rax,%r14 + + mulxq %rbp,%rdi,%rax + movq %rcx,%rdx + adcxq %rdi,%r14 + adoxq %r15,%rax + adcxq %rax,%r15 + + + xorq %rcx,%rcx + mulxq %rbx,%rdi,%rax + adcxq %rdi,%r14 + adoxq %rax,%r15 + + mulxq %rbp,%rdi,%rax + movq %rbx,%rdx + adcxq %rdi,%r15 + adoxq %rcx,%rax + adcxq %rax,%rcx + + + mulxq %rbp,%rdi,%rbx + movq 0(%rsi),%rdx + addq %rdi,%rcx + movq 8(%rsp),%rdi + adcq $0,%rbx + + + xorq %rbp,%rbp + adcxq %r8,%r8 + adcxq %r9,%r9 + adcxq %r10,%r10 + adcxq %r11,%r11 + adcxq %r12,%r12 + + + mulxq %rdx,%rdx,%rax + movq %rdx,0(%rdi) + movq 8(%rsi),%rdx + adoxq %rax,%r8 + movq %r8,8(%rdi) + + mulxq %rdx,%r8,%rax + movq 16(%rsi),%rdx + adoxq %r8,%r9 + adoxq %rax,%r10 + movq %r9,16(%rdi) + movq %r10,24(%rdi) + + mulxq %rdx,%r8,%r9 + movq 24(%rsi),%rdx + adoxq %r8,%r11 + adoxq %r9,%r12 + adcxq %r13,%r13 + adcxq %r14,%r14 + movq %r11,32(%rdi) + movq %r12,40(%rdi) + + mulxq %rdx,%r8,%r9 + movq 32(%rsi),%rdx + adoxq %r8,%r13 + adoxq %r9,%r14 + adcxq %r15,%r15 + adcxq %rcx,%rcx + movq %r13,48(%rdi) + movq %r14,56(%rdi) + + mulxq %rdx,%r8,%r9 + movq 40(%rsi),%rdx + adoxq %r8,%r15 + adoxq %r9,%rcx + adcxq %rbx,%rbx + adcxq %rbp,%rbp + movq %r15,64(%rdi) + movq %rcx,72(%rdi) + + mulxq %rdx,%r8,%r9 + adoxq %r8,%rbx + adoxq %r9,%rbp + + movq %rbx,80(%rdi) + movq %rbp,88(%rdi) + + .byte 0xf3,0xc3 + + + + +.globl redcx_mont_384 + +.def redcx_mont_384; .scl 2; .type 32; .endef +.p2align 5 +redcx_mont_384: + .byte 0xf3,0x0f,0x1e,0xfa + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + movq %rsp,%r11 +.LSEH_begin_redcx_mont_384: + + + movq %rcx,%rdi + movq %rdx,%rsi + movq %r8,%rdx + movq %r9,%rcx +redc_mont_384$1: + pushq %rbp + + pushq %rbx + + pushq %r12 + + pushq %r13 + + pushq %r14 + + pushq %r15 + + subq $8,%rsp + +.LSEH_body_redcx_mont_384: + + + movq %rdx,%rbx + call __mulx_by_1_mont_384 + call __redx_tail_mont_384 + + movq 8(%rsp),%r15 + + movq 16(%rsp),%r14 + + movq 24(%rsp),%r13 + + movq 32(%rsp),%r12 + + movq 40(%rsp),%rbx + + movq 48(%rsp),%rbp + + leaq 56(%rsp),%rsp + +.LSEH_epilogue_redcx_mont_384: + mov 8(%rsp),%rdi + mov 16(%rsp),%rsi + + .byte 0xf3,0xc3 + +.LSEH_end_redcx_mont_384: + + + + +.globl fromx_mont_384 + +.def fromx_mont_384; .scl 2; .type 32; .endef +.p2align 5 +fromx_mont_384: + .byte 0xf3,0x0f,0x1e,0xfa + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + movq %rsp,%r11 +.LSEH_begin_fromx_mont_384: + + + movq %rcx,%rdi + movq %rdx,%rsi + movq %r8,%rdx + movq %r9,%rcx +from_mont_384$1: + pushq %rbp + + pushq %rbx + + pushq %r12 + + pushq %r13 + + pushq %r14 + + pushq %r15 + + subq $8,%rsp + +.LSEH_body_fromx_mont_384: + + + movq %rdx,%rbx + call __mulx_by_1_mont_384 + + + + + movq %r14,%rax + movq %r15,%rcx + movq %r8,%rdx + movq %r9,%rbp + + subq 0(%rbx),%r14 + sbbq 8(%rbx),%r15 + movq %r10,%r13 + sbbq 16(%rbx),%r8 + sbbq 24(%rbx),%r9 + sbbq 32(%rbx),%r10 + movq %r11,%rsi + sbbq 40(%rbx),%r11 + + cmovcq %rax,%r14 + cmovcq %rcx,%r15 + cmovcq %rdx,%r8 + movq %r14,0(%rdi) + cmovcq %rbp,%r9 + movq %r15,8(%rdi) + cmovcq %r13,%r10 + movq %r8,16(%rdi) + cmovcq %rsi,%r11 + movq %r9,24(%rdi) + movq %r10,32(%rdi) + movq %r11,40(%rdi) + + movq 8(%rsp),%r15 + + movq 16(%rsp),%r14 + + movq 24(%rsp),%r13 + + movq 32(%rsp),%r12 + + movq 40(%rsp),%rbx + + movq 48(%rsp),%rbp + + leaq 56(%rsp),%rsp + +.LSEH_epilogue_fromx_mont_384: + mov 8(%rsp),%rdi + mov 16(%rsp),%rsi + + .byte 0xf3,0xc3 + +.LSEH_end_fromx_mont_384: +.def __mulx_by_1_mont_384; .scl 3; .type 32; .endef +.p2align 5 +__mulx_by_1_mont_384: + .byte 0xf3,0x0f,0x1e,0xfa + + movq 0(%rsi),%r8 + movq %rcx,%rdx + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + imulq %r8,%rdx + + + xorq %r14,%r14 + mulxq 0(%rbx),%rax,%rbp + adcxq %rax,%r8 + adoxq %rbp,%r9 + + mulxq 8(%rbx),%rax,%rbp + adcxq %rax,%r9 + adoxq %rbp,%r10 + + mulxq 16(%rbx),%rax,%rbp + adcxq %rax,%r10 + adoxq %rbp,%r11 + + mulxq 24(%rbx),%rax,%rbp + adcxq %rax,%r11 + adoxq %rbp,%r12 + + mulxq 32(%rbx),%rax,%rbp + adcxq %rax,%r12 + adoxq %rbp,%r13 + + mulxq 40(%rbx),%rax,%rbp + movq %rcx,%rdx + adcxq %rax,%r13 + adoxq %r14,%rbp + adcxq %rbp,%r14 + imulq %r9,%rdx + + + xorq %r15,%r15 + mulxq 0(%rbx),%rax,%rbp + adcxq %rax,%r9 + adoxq %rbp,%r10 + + mulxq 8(%rbx),%rax,%rbp + adcxq %rax,%r10 + adoxq %rbp,%r11 + + mulxq 16(%rbx),%rax,%rbp + adcxq %rax,%r11 + adoxq %rbp,%r12 + + mulxq 24(%rbx),%rax,%rbp + adcxq %rax,%r12 + adoxq %rbp,%r13 + + mulxq 32(%rbx),%rax,%rbp + adcxq %rax,%r13 + adoxq %rbp,%r14 + + mulxq 40(%rbx),%rax,%rbp + movq %rcx,%rdx + adcxq %rax,%r14 + adoxq %r15,%rbp + adcxq %rbp,%r15 + imulq %r10,%rdx + + + xorq %r8,%r8 + mulxq 0(%rbx),%rax,%rbp + adcxq %rax,%r10 + adoxq %rbp,%r11 + + mulxq 8(%rbx),%rax,%rbp + adcxq %rax,%r11 + adoxq %rbp,%r12 + + mulxq 16(%rbx),%rax,%rbp + adcxq %rax,%r12 + adoxq %rbp,%r13 + + mulxq 24(%rbx),%rax,%rbp + adcxq %rax,%r13 + adoxq %rbp,%r14 + + mulxq 32(%rbx),%rax,%rbp + adcxq %rax,%r14 + adoxq %rbp,%r15 + + mulxq 40(%rbx),%rax,%rbp + movq %rcx,%rdx + adcxq %rax,%r15 + adoxq %r8,%rbp + adcxq %rbp,%r8 + imulq %r11,%rdx + + + xorq %r9,%r9 + mulxq 0(%rbx),%rax,%rbp + adcxq %rax,%r11 + adoxq %rbp,%r12 + + mulxq 8(%rbx),%rax,%rbp + adcxq %rax,%r12 + adoxq %rbp,%r13 + + mulxq 16(%rbx),%rax,%rbp + adcxq %rax,%r13 + adoxq %rbp,%r14 + + mulxq 24(%rbx),%rax,%rbp + adcxq %rax,%r14 + adoxq %rbp,%r15 + + mulxq 32(%rbx),%rax,%rbp + adcxq %rax,%r15 + adoxq %rbp,%r8 + + mulxq 40(%rbx),%rax,%rbp + movq %rcx,%rdx + adcxq %rax,%r8 + adoxq %r9,%rbp + adcxq %rbp,%r9 + imulq %r12,%rdx + + + xorq %r10,%r10 + mulxq 0(%rbx),%rax,%rbp + adcxq %rax,%r12 + adoxq %rbp,%r13 + + mulxq 8(%rbx),%rax,%rbp + adcxq %rax,%r13 + adoxq %rbp,%r14 + + mulxq 16(%rbx),%rax,%rbp + adcxq %rax,%r14 + adoxq %rbp,%r15 + + mulxq 24(%rbx),%rax,%rbp + adcxq %rax,%r15 + adoxq %rbp,%r8 + + mulxq 32(%rbx),%rax,%rbp + adcxq %rax,%r8 + adoxq %rbp,%r9 + + mulxq 40(%rbx),%rax,%rbp + movq %rcx,%rdx + adcxq %rax,%r9 + adoxq %r10,%rbp + adcxq %rbp,%r10 + imulq %r13,%rdx + + + xorq %r11,%r11 + mulxq 0(%rbx),%rax,%rbp + adcxq %rax,%r13 + adoxq %rbp,%r14 + + mulxq 8(%rbx),%rax,%rbp + adcxq %rax,%r14 + adoxq %rbp,%r15 + + mulxq 16(%rbx),%rax,%rbp + adcxq %rax,%r15 + adoxq %rbp,%r8 + + mulxq 24(%rbx),%rax,%rbp + adcxq %rax,%r8 + adoxq %rbp,%r9 + + mulxq 32(%rbx),%rax,%rbp + adcxq %rax,%r9 + adoxq %rbp,%r10 + + mulxq 40(%rbx),%rax,%rbp + movq %rcx,%rdx + adcxq %rax,%r10 + adoxq %r11,%rbp + adcxq %rbp,%r11 + .byte 0xf3,0xc3 + + +.def __redx_tail_mont_384; .scl 3; .type 32; .endef +.p2align 5 +__redx_tail_mont_384: + .byte 0xf3,0x0f,0x1e,0xfa + + addq 48(%rsi),%r14 + movq %r14,%rax + adcq 56(%rsi),%r15 + adcq 64(%rsi),%r8 + adcq 72(%rsi),%r9 + movq %r15,%rcx + adcq 80(%rsi),%r10 + adcq 88(%rsi),%r11 + sbbq %r12,%r12 + + + + + movq %r8,%rdx + movq %r9,%rbp + + subq 0(%rbx),%r14 + sbbq 8(%rbx),%r15 + movq %r10,%r13 + sbbq 16(%rbx),%r8 + sbbq 24(%rbx),%r9 + sbbq 32(%rbx),%r10 + movq %r11,%rsi + sbbq 40(%rbx),%r11 + sbbq $0,%r12 + + cmovcq %rax,%r14 + cmovcq %rcx,%r15 + cmovcq %rdx,%r8 + movq %r14,0(%rdi) + cmovcq %rbp,%r9 + movq %r15,8(%rdi) + cmovcq %r13,%r10 + movq %r8,16(%rdi) + cmovcq %rsi,%r11 + movq %r9,24(%rdi) + movq %r10,32(%rdi) + movq %r11,40(%rdi) + + .byte 0xf3,0xc3 + + +.globl sgn0x_pty_mont_384 + +.def sgn0x_pty_mont_384; .scl 2; .type 32; .endef +.p2align 5 +sgn0x_pty_mont_384: + .byte 0xf3,0x0f,0x1e,0xfa + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + movq %rsp,%r11 +.LSEH_begin_sgn0x_pty_mont_384: + + + movq %rcx,%rdi + movq %rdx,%rsi + movq %r8,%rdx +sgn0_pty_mont_384$1: + pushq %rbp + + pushq %rbx + + pushq %r12 + + pushq %r13 + + pushq %r14 + + pushq %r15 + + subq $8,%rsp + +.LSEH_body_sgn0x_pty_mont_384: + + + movq %rsi,%rbx + leaq 0(%rdi),%rsi + movq %rdx,%rcx + call __mulx_by_1_mont_384 + + xorq %rax,%rax + movq %r14,%r13 + addq %r14,%r14 + adcq %r15,%r15 + adcq %r8,%r8 + adcq %r9,%r9 + adcq %r10,%r10 + adcq %r11,%r11 + adcq $0,%rax + + subq 0(%rbx),%r14 + sbbq 8(%rbx),%r15 + sbbq 16(%rbx),%r8 + sbbq 24(%rbx),%r9 + sbbq 32(%rbx),%r10 + sbbq 40(%rbx),%r11 + sbbq $0,%rax + + notq %rax + andq $1,%r13 + andq $2,%rax + orq %r13,%rax + + movq 8(%rsp),%r15 + + movq 16(%rsp),%r14 + + movq 24(%rsp),%r13 + + movq 32(%rsp),%r12 + + movq 40(%rsp),%rbx + + movq 48(%rsp),%rbp + + leaq 56(%rsp),%rsp + +.LSEH_epilogue_sgn0x_pty_mont_384: + mov 8(%rsp),%rdi + mov 16(%rsp),%rsi + + .byte 0xf3,0xc3 + +.LSEH_end_sgn0x_pty_mont_384: + +.globl sgn0x_pty_mont_384x + +.def sgn0x_pty_mont_384x; .scl 2; .type 32; .endef +.p2align 5 +sgn0x_pty_mont_384x: + .byte 0xf3,0x0f,0x1e,0xfa + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + movq %rsp,%r11 +.LSEH_begin_sgn0x_pty_mont_384x: + + + movq %rcx,%rdi + movq %rdx,%rsi + movq %r8,%rdx +sgn0_pty_mont_384x$1: + pushq %rbp + + pushq %rbx + + pushq %r12 + + pushq %r13 + + pushq %r14 + + pushq %r15 + + subq $8,%rsp + +.LSEH_body_sgn0x_pty_mont_384x: + + + movq %rsi,%rbx + leaq 48(%rdi),%rsi + movq %rdx,%rcx + call __mulx_by_1_mont_384 + + movq %r14,%r12 + orq %r15,%r14 + orq %r8,%r14 + orq %r9,%r14 + orq %r10,%r14 + orq %r11,%r14 + + leaq 0(%rdi),%rsi + xorq %rdi,%rdi + movq %r12,%r13 + addq %r12,%r12 + adcq %r15,%r15 + adcq %r8,%r8 + adcq %r9,%r9 + adcq %r10,%r10 + adcq %r11,%r11 + adcq $0,%rdi + + subq 0(%rbx),%r12 + sbbq 8(%rbx),%r15 + sbbq 16(%rbx),%r8 + sbbq 24(%rbx),%r9 + sbbq 32(%rbx),%r10 + sbbq 40(%rbx),%r11 + sbbq $0,%rdi + + movq %r14,0(%rsp) + notq %rdi + andq $1,%r13 + andq $2,%rdi + orq %r13,%rdi + + call __mulx_by_1_mont_384 + + movq %r14,%r12 + orq %r15,%r14 + orq %r8,%r14 + orq %r9,%r14 + orq %r10,%r14 + orq %r11,%r14 + + xorq %rax,%rax + movq %r12,%r13 + addq %r12,%r12 + adcq %r15,%r15 + adcq %r8,%r8 + adcq %r9,%r9 + adcq %r10,%r10 + adcq %r11,%r11 + adcq $0,%rax + + subq 0(%rbx),%r12 + sbbq 8(%rbx),%r15 + sbbq 16(%rbx),%r8 + sbbq 24(%rbx),%r9 + sbbq 32(%rbx),%r10 + sbbq 40(%rbx),%r11 + sbbq $0,%rax + + movq 0(%rsp),%r12 + + notq %rax + + testq %r14,%r14 + cmovzq %rdi,%r13 + + testq %r12,%r12 + cmovnzq %rdi,%rax + + andq $1,%r13 + andq $2,%rax + orq %r13,%rax + + movq 8(%rsp),%r15 + + movq 16(%rsp),%r14 + + movq 24(%rsp),%r13 + + movq 32(%rsp),%r12 + + movq 40(%rsp),%rbx + + movq 48(%rsp),%rbp + + leaq 56(%rsp),%rsp + +.LSEH_epilogue_sgn0x_pty_mont_384x: + mov 8(%rsp),%rdi + mov 16(%rsp),%rsi + + .byte 0xf3,0xc3 + +.LSEH_end_sgn0x_pty_mont_384x: +.globl mulx_mont_384 + +.def mulx_mont_384; .scl 2; .type 32; .endef +.p2align 5 +mulx_mont_384: + .byte 0xf3,0x0f,0x1e,0xfa + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + movq %rsp,%r11 +.LSEH_begin_mulx_mont_384: + + + movq %rcx,%rdi + movq %rdx,%rsi + movq %r8,%rdx + movq %r9,%rcx + movq 40(%rsp),%r8 +mul_mont_384$1: + pushq %rbp + + pushq %rbx + + pushq %r12 + + pushq %r13 + + pushq %r14 + + pushq %r15 + + leaq -24(%rsp),%rsp + +.LSEH_body_mulx_mont_384: + + + movq %rdx,%rbx + movq 0(%rdx),%rdx + movq 0(%rsi),%r14 + movq 8(%rsi),%r15 + movq 16(%rsi),%rax + movq 24(%rsi),%r12 + movq %rdi,16(%rsp) + movq 32(%rsi),%rdi + movq 40(%rsi),%rbp + leaq -128(%rsi),%rsi + leaq -128(%rcx),%rcx + movq %r8,(%rsp) + + mulxq %r14,%r8,%r9 + call __mulx_mont_384 + + movq 24(%rsp),%r15 + + movq 32(%rsp),%r14 + + movq 40(%rsp),%r13 + + movq 48(%rsp),%r12 + + movq 56(%rsp),%rbx + + movq 64(%rsp),%rbp + + leaq 72(%rsp),%rsp + +.LSEH_epilogue_mulx_mont_384: + mov 8(%rsp),%rdi + mov 16(%rsp),%rsi + + .byte 0xf3,0xc3 + +.LSEH_end_mulx_mont_384: +.def __mulx_mont_384; .scl 3; .type 32; .endef +.p2align 5 +__mulx_mont_384: + .byte 0xf3,0x0f,0x1e,0xfa + + + mulxq %r15,%r14,%r10 + mulxq %rax,%r15,%r11 + addq %r14,%r9 + mulxq %r12,%rax,%r12 + adcq %r15,%r10 + mulxq %rdi,%rdi,%r13 + adcq %rax,%r11 + mulxq %rbp,%rbp,%r14 + movq 8(%rbx),%rdx + adcq %rdi,%r12 + adcq %rbp,%r13 + adcq $0,%r14 + xorq %r15,%r15 + + movq %r8,16(%rsp) + imulq 8(%rsp),%r8 + + + xorq %rax,%rax + mulxq 0+128(%rsi),%rdi,%rbp + adoxq %rdi,%r9 + adcxq %rbp,%r10 + + mulxq 8+128(%rsi),%rdi,%rbp + adoxq %rdi,%r10 + adcxq %rbp,%r11 + + mulxq 16+128(%rsi),%rdi,%rbp + adoxq %rdi,%r11 + adcxq %rbp,%r12 + + mulxq 24+128(%rsi),%rdi,%rbp + adoxq %rdi,%r12 + adcxq %rbp,%r13 + + mulxq 32+128(%rsi),%rdi,%rbp + adoxq %rdi,%r13 + adcxq %rbp,%r14 + + mulxq 40+128(%rsi),%rdi,%rbp + movq %r8,%rdx + adoxq %rdi,%r14 + adcxq %rbp,%r15 + adoxq %rax,%r15 + adoxq %rax,%rax + + + xorq %r8,%r8 + mulxq 0+128(%rcx),%rdi,%rbp + adcxq 16(%rsp),%rdi + adoxq %rbp,%r9 + + mulxq 8+128(%rcx),%rdi,%rbp + adcxq %rdi,%r9 + adoxq %rbp,%r10 + + mulxq 16+128(%rcx),%rdi,%rbp + adcxq %rdi,%r10 + adoxq %rbp,%r11 + + mulxq 24+128(%rcx),%rdi,%rbp + adcxq %rdi,%r11 + adoxq %rbp,%r12 + + mulxq 32+128(%rcx),%rdi,%rbp + adcxq %rdi,%r12 + adoxq %rbp,%r13 + + mulxq 40+128(%rcx),%rdi,%rbp + movq 16(%rbx),%rdx + adcxq %rdi,%r13 + adoxq %rbp,%r14 + adcxq %r8,%r14 + adoxq %r8,%r15 + adcxq %r8,%r15 + adoxq %r8,%rax + adcxq %r8,%rax + movq %r9,16(%rsp) + imulq 8(%rsp),%r9 + + + xorq %r8,%r8 + mulxq 0+128(%rsi),%rdi,%rbp + adoxq %rdi,%r10 + adcxq %rbp,%r11 + + mulxq 8+128(%rsi),%rdi,%rbp + adoxq %rdi,%r11 + adcxq %rbp,%r12 + + mulxq 16+128(%rsi),%rdi,%rbp + adoxq %rdi,%r12 + adcxq %rbp,%r13 + + mulxq 24+128(%rsi),%rdi,%rbp + adoxq %rdi,%r13 + adcxq %rbp,%r14 + + mulxq 32+128(%rsi),%rdi,%rbp + adoxq %rdi,%r14 + adcxq %rbp,%r15 + + mulxq 40+128(%rsi),%rdi,%rbp + movq %r9,%rdx + adoxq %rdi,%r15 + adcxq %rbp,%rax + adoxq %r8,%rax + adoxq %r8,%r8 + + + xorq %r9,%r9 + mulxq 0+128(%rcx),%rdi,%rbp + adcxq 16(%rsp),%rdi + adoxq %rbp,%r10 + + mulxq 8+128(%rcx),%rdi,%rbp + adcxq %rdi,%r10 + adoxq %rbp,%r11 + + mulxq 16+128(%rcx),%rdi,%rbp + adcxq %rdi,%r11 + adoxq %rbp,%r12 + + mulxq 24+128(%rcx),%rdi,%rbp + adcxq %rdi,%r12 + adoxq %rbp,%r13 + + mulxq 32+128(%rcx),%rdi,%rbp + adcxq %rdi,%r13 + adoxq %rbp,%r14 + + mulxq 40+128(%rcx),%rdi,%rbp + movq 24(%rbx),%rdx + adcxq %rdi,%r14 + adoxq %rbp,%r15 + adcxq %r9,%r15 + adoxq %r9,%rax + adcxq %r9,%rax + adoxq %r9,%r8 + adcxq %r9,%r8 + movq %r10,16(%rsp) + imulq 8(%rsp),%r10 + + + xorq %r9,%r9 + mulxq 0+128(%rsi),%rdi,%rbp + adoxq %rdi,%r11 + adcxq %rbp,%r12 + + mulxq 8+128(%rsi),%rdi,%rbp + adoxq %rdi,%r12 + adcxq %rbp,%r13 + + mulxq 16+128(%rsi),%rdi,%rbp + adoxq %rdi,%r13 + adcxq %rbp,%r14 + + mulxq 24+128(%rsi),%rdi,%rbp + adoxq %rdi,%r14 + adcxq %rbp,%r15 + + mulxq 32+128(%rsi),%rdi,%rbp + adoxq %rdi,%r15 + adcxq %rbp,%rax + + mulxq 40+128(%rsi),%rdi,%rbp + movq %r10,%rdx + adoxq %rdi,%rax + adcxq %rbp,%r8 + adoxq %r9,%r8 + adoxq %r9,%r9 + + + xorq %r10,%r10 + mulxq 0+128(%rcx),%rdi,%rbp + adcxq 16(%rsp),%rdi + adoxq %rbp,%r11 + + mulxq 8+128(%rcx),%rdi,%rbp + adcxq %rdi,%r11 + adoxq %rbp,%r12 + + mulxq 16+128(%rcx),%rdi,%rbp + adcxq %rdi,%r12 + adoxq %rbp,%r13 + + mulxq 24+128(%rcx),%rdi,%rbp + adcxq %rdi,%r13 + adoxq %rbp,%r14 + + mulxq 32+128(%rcx),%rdi,%rbp + adcxq %rdi,%r14 + adoxq %rbp,%r15 + + mulxq 40+128(%rcx),%rdi,%rbp + movq 32(%rbx),%rdx + adcxq %rdi,%r15 + adoxq %rbp,%rax + adcxq %r10,%rax + adoxq %r10,%r8 + adcxq %r10,%r8 + adoxq %r10,%r9 + adcxq %r10,%r9 + movq %r11,16(%rsp) + imulq 8(%rsp),%r11 + + + xorq %r10,%r10 + mulxq 0+128(%rsi),%rdi,%rbp + adoxq %rdi,%r12 + adcxq %rbp,%r13 + + mulxq 8+128(%rsi),%rdi,%rbp + adoxq %rdi,%r13 + adcxq %rbp,%r14 + + mulxq 16+128(%rsi),%rdi,%rbp + adoxq %rdi,%r14 + adcxq %rbp,%r15 + + mulxq 24+128(%rsi),%rdi,%rbp + adoxq %rdi,%r15 + adcxq %rbp,%rax + + mulxq 32+128(%rsi),%rdi,%rbp + adoxq %rdi,%rax + adcxq %rbp,%r8 + + mulxq 40+128(%rsi),%rdi,%rbp + movq %r11,%rdx + adoxq %rdi,%r8 + adcxq %rbp,%r9 + adoxq %r10,%r9 + adoxq %r10,%r10 + + + xorq %r11,%r11 + mulxq 0+128(%rcx),%rdi,%rbp + adcxq 16(%rsp),%rdi + adoxq %rbp,%r12 + + mulxq 8+128(%rcx),%rdi,%rbp + adcxq %rdi,%r12 + adoxq %rbp,%r13 + + mulxq 16+128(%rcx),%rdi,%rbp + adcxq %rdi,%r13 + adoxq %rbp,%r14 + + mulxq 24+128(%rcx),%rdi,%rbp + adcxq %rdi,%r14 + adoxq %rbp,%r15 + + mulxq 32+128(%rcx),%rdi,%rbp + adcxq %rdi,%r15 + adoxq %rbp,%rax + + mulxq 40+128(%rcx),%rdi,%rbp + movq 40(%rbx),%rdx + adcxq %rdi,%rax + adoxq %rbp,%r8 + adcxq %r11,%r8 + adoxq %r11,%r9 + adcxq %r11,%r9 + adoxq %r11,%r10 + adcxq %r11,%r10 + movq %r12,16(%rsp) + imulq 8(%rsp),%r12 + + + xorq %r11,%r11 + mulxq 0+128(%rsi),%rdi,%rbp + adoxq %rdi,%r13 + adcxq %rbp,%r14 + + mulxq 8+128(%rsi),%rdi,%rbp + adoxq %rdi,%r14 + adcxq %rbp,%r15 + + mulxq 16+128(%rsi),%rdi,%rbp + adoxq %rdi,%r15 + adcxq %rbp,%rax + + mulxq 24+128(%rsi),%rdi,%rbp + adoxq %rdi,%rax + adcxq %rbp,%r8 + + mulxq 32+128(%rsi),%rdi,%rbp + adoxq %rdi,%r8 + adcxq %rbp,%r9 + + mulxq 40+128(%rsi),%rdi,%rbp + movq %r12,%rdx + adoxq %rdi,%r9 + adcxq %rbp,%r10 + adoxq %r11,%r10 + adoxq %r11,%r11 + + + xorq %r12,%r12 + mulxq 0+128(%rcx),%rdi,%rbp + adcxq 16(%rsp),%rdi + adoxq %rbp,%r13 + + mulxq 8+128(%rcx),%rdi,%rbp + adcxq %rdi,%r13 + adoxq %rbp,%r14 + + mulxq 16+128(%rcx),%rdi,%rbp + adcxq %rdi,%r14 + adoxq %rbp,%r15 + + mulxq 24+128(%rcx),%rdi,%rbp + adcxq %rdi,%r15 + adoxq %rbp,%rax + + mulxq 32+128(%rcx),%rdi,%rbp + adcxq %rdi,%rax + adoxq %rbp,%r8 + + mulxq 40+128(%rcx),%rdi,%rbp + movq %r13,%rdx + adcxq %rdi,%r8 + adoxq %rbp,%r9 + adcxq %r12,%r9 + adoxq %r12,%r10 + adcxq %r12,%r10 + adoxq %r12,%r11 + adcxq %r12,%r11 + imulq 8(%rsp),%rdx + movq 24(%rsp),%rbx + + + xorq %r12,%r12 + mulxq 0+128(%rcx),%rdi,%rbp + adcxq %rdi,%r13 + adoxq %rbp,%r14 + + mulxq 8+128(%rcx),%rdi,%rbp + adcxq %rdi,%r14 + adoxq %rbp,%r15 + + mulxq 16+128(%rcx),%rdi,%rbp + adcxq %rdi,%r15 + adoxq %rbp,%rax + + mulxq 24+128(%rcx),%rdi,%rbp + adcxq %rdi,%rax + adoxq %rbp,%r8 + movq %r15,%r13 + + mulxq 32+128(%rcx),%rdi,%rbp + adcxq %rdi,%r8 + adoxq %rbp,%r9 + movq %rax,%rsi + + mulxq 40+128(%rcx),%rdi,%rbp + adcxq %rdi,%r9 + adoxq %rbp,%r10 + movq %r14,%rdx + adcxq %r12,%r10 + adoxq %r12,%r11 + leaq 128(%rcx),%rcx + movq %r8,%r12 + adcq $0,%r11 + + + + + subq 0(%rcx),%r14 + sbbq 8(%rcx),%r15 + movq %r9,%rdi + sbbq 16(%rcx),%rax + sbbq 24(%rcx),%r8 + sbbq 32(%rcx),%r9 + movq %r10,%rbp + sbbq 40(%rcx),%r10 + sbbq $0,%r11 + + cmovncq %r14,%rdx + cmovcq %r13,%r15 + cmovcq %rsi,%rax + cmovncq %r8,%r12 + movq %rdx,0(%rbx) + cmovncq %r9,%rdi + movq %r15,8(%rbx) + cmovncq %r10,%rbp + movq %rax,16(%rbx) + movq %r12,24(%rbx) + movq %rdi,32(%rbx) + movq %rbp,40(%rbx) + + .byte 0xf3,0xc3 + + +.globl sqrx_mont_384 + +.def sqrx_mont_384; .scl 2; .type 32; .endef +.p2align 5 +sqrx_mont_384: + .byte 0xf3,0x0f,0x1e,0xfa + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + movq %rsp,%r11 +.LSEH_begin_sqrx_mont_384: + + + movq %rcx,%rdi + movq %rdx,%rsi + movq %r8,%rdx + movq %r9,%rcx +sqr_mont_384$1: + pushq %rbp + + pushq %rbx + + pushq %r12 + + pushq %r13 + + pushq %r14 + + pushq %r15 + + leaq -24(%rsp),%rsp + +.LSEH_body_sqrx_mont_384: + + + movq %rcx,%r8 + leaq -128(%rdx),%rcx + movq 0(%rsi),%rdx + movq 8(%rsi),%r15 + movq 16(%rsi),%rax + movq 24(%rsi),%r12 + movq %rdi,16(%rsp) + movq 32(%rsi),%rdi + movq 40(%rsi),%rbp + + leaq (%rsi),%rbx + movq %r8,(%rsp) + leaq -128(%rsi),%rsi + + mulxq %rdx,%r8,%r9 + call __mulx_mont_384 + + movq 24(%rsp),%r15 + + movq 32(%rsp),%r14 + + movq 40(%rsp),%r13 + + movq 48(%rsp),%r12 + + movq 56(%rsp),%rbx + + movq 64(%rsp),%rbp + + leaq 72(%rsp),%rsp + +.LSEH_epilogue_sqrx_mont_384: + mov 8(%rsp),%rdi + mov 16(%rsp),%rsi + + .byte 0xf3,0xc3 + +.LSEH_end_sqrx_mont_384: + +.globl sqrx_n_mul_mont_384 + +.def sqrx_n_mul_mont_384; .scl 2; .type 32; .endef +.p2align 5 +sqrx_n_mul_mont_384: + .byte 0xf3,0x0f,0x1e,0xfa + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + movq %rsp,%r11 +.LSEH_begin_sqrx_n_mul_mont_384: + + + movq %rcx,%rdi + movq %rdx,%rsi + movq %r8,%rdx + movq %r9,%rcx + movq 40(%rsp),%r8 + movq 48(%rsp),%r9 +sqr_n_mul_mont_384$1: + pushq %rbp + + pushq %rbx + + pushq %r12 + + pushq %r13 + + pushq %r14 + + pushq %r15 + + leaq -40(%rsp),%rsp + +.LSEH_body_sqrx_n_mul_mont_384: + + + movq %rdx,%r10 + movq 0(%rsi),%rdx + movq 8(%rsi),%r15 + movq 16(%rsi),%rax + movq %rsi,%rbx + movq 24(%rsi),%r12 + movq %rdi,16(%rsp) + movq 32(%rsi),%rdi + movq 40(%rsi),%rbp + + movq %r8,(%rsp) + movq %r9,24(%rsp) + movq 0(%r9),%xmm2 + +.Loop_sqrx_384: + movd %r10d,%xmm1 + leaq -128(%rbx),%rsi + leaq -128(%rcx),%rcx + + mulxq %rdx,%r8,%r9 + call __mulx_mont_384 + + movd %xmm1,%r10d + decl %r10d + jnz .Loop_sqrx_384 + + movq %rdx,%r14 +.byte 102,72,15,126,210 + leaq -128(%rbx),%rsi + movq 24(%rsp),%rbx + leaq -128(%rcx),%rcx + + mulxq %r14,%r8,%r9 + call __mulx_mont_384 + + movq 40(%rsp),%r15 + + movq 48(%rsp),%r14 + + movq 56(%rsp),%r13 + + movq 64(%rsp),%r12 + + movq 72(%rsp),%rbx + + movq 80(%rsp),%rbp + + leaq 88(%rsp),%rsp + +.LSEH_epilogue_sqrx_n_mul_mont_384: + mov 8(%rsp),%rdi + mov 16(%rsp),%rsi + + .byte 0xf3,0xc3 + +.LSEH_end_sqrx_n_mul_mont_384: + +.globl sqrx_n_mul_mont_383 + +.def sqrx_n_mul_mont_383; .scl 2; .type 32; .endef +.p2align 5 +sqrx_n_mul_mont_383: + .byte 0xf3,0x0f,0x1e,0xfa + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + movq %rsp,%r11 +.LSEH_begin_sqrx_n_mul_mont_383: + + + movq %rcx,%rdi + movq %rdx,%rsi + movq %r8,%rdx + movq %r9,%rcx + movq 40(%rsp),%r8 + movq 48(%rsp),%r9 +sqr_n_mul_mont_383$1: + pushq %rbp + + pushq %rbx + + pushq %r12 + + pushq %r13 + + pushq %r14 + + pushq %r15 + + leaq -40(%rsp),%rsp + +.LSEH_body_sqrx_n_mul_mont_383: + + + movq %rdx,%r10 + movq 0(%rsi),%rdx + movq 8(%rsi),%r15 + movq 16(%rsi),%rax + movq %rsi,%rbx + movq 24(%rsi),%r12 + movq %rdi,16(%rsp) + movq 32(%rsi),%rdi + movq 40(%rsi),%rbp + + movq %r8,(%rsp) + movq %r9,24(%rsp) + movq 0(%r9),%xmm2 + leaq -128(%rcx),%rcx + +.Loop_sqrx_383: + movd %r10d,%xmm1 + leaq -128(%rbx),%rsi + + mulxq %rdx,%r8,%r9 + call __mulx_mont_383_nonred + + movd %xmm1,%r10d + decl %r10d + jnz .Loop_sqrx_383 + + movq %rdx,%r14 +.byte 102,72,15,126,210 + leaq -128(%rbx),%rsi + movq 24(%rsp),%rbx + + mulxq %r14,%r8,%r9 + call __mulx_mont_384 + + movq 40(%rsp),%r15 + + movq 48(%rsp),%r14 + + movq 56(%rsp),%r13 + + movq 64(%rsp),%r12 + + movq 72(%rsp),%rbx + + movq 80(%rsp),%rbp + + leaq 88(%rsp),%rsp + +.LSEH_epilogue_sqrx_n_mul_mont_383: + mov 8(%rsp),%rdi + mov 16(%rsp),%rsi + + .byte 0xf3,0xc3 + +.LSEH_end_sqrx_n_mul_mont_383: +.def __mulx_mont_383_nonred; .scl 3; .type 32; .endef +.p2align 5 +__mulx_mont_383_nonred: + .byte 0xf3,0x0f,0x1e,0xfa + + + mulxq %r15,%r14,%r10 + mulxq %rax,%r15,%r11 + addq %r14,%r9 + mulxq %r12,%rax,%r12 + adcq %r15,%r10 + mulxq %rdi,%rdi,%r13 + adcq %rax,%r11 + mulxq %rbp,%rbp,%r14 + movq 8(%rbx),%rdx + adcq %rdi,%r12 + adcq %rbp,%r13 + adcq $0,%r14 + movq %r8,%rax + imulq 8(%rsp),%r8 + + + xorq %r15,%r15 + mulxq 0+128(%rsi),%rdi,%rbp + adoxq %rdi,%r9 + adcxq %rbp,%r10 + + mulxq 8+128(%rsi),%rdi,%rbp + adoxq %rdi,%r10 + adcxq %rbp,%r11 + + mulxq 16+128(%rsi),%rdi,%rbp + adoxq %rdi,%r11 + adcxq %rbp,%r12 + + mulxq 24+128(%rsi),%rdi,%rbp + adoxq %rdi,%r12 + adcxq %rbp,%r13 + + mulxq 32+128(%rsi),%rdi,%rbp + adoxq %rdi,%r13 + adcxq %rbp,%r14 + + mulxq 40+128(%rsi),%rdi,%rbp + movq %r8,%rdx + adoxq %rdi,%r14 + adcxq %r15,%rbp + adoxq %rbp,%r15 + + + xorq %r8,%r8 + mulxq 0+128(%rcx),%rdi,%rbp + adcxq %rdi,%rax + adoxq %rbp,%r9 + + mulxq 8+128(%rcx),%rdi,%rbp + adcxq %rdi,%r9 + adoxq %rbp,%r10 + + mulxq 16+128(%rcx),%rdi,%rbp + adcxq %rdi,%r10 + adoxq %rbp,%r11 + + mulxq 24+128(%rcx),%rdi,%rbp + adcxq %rdi,%r11 + adoxq %rbp,%r12 + + mulxq 32+128(%rcx),%rdi,%rbp + adcxq %rdi,%r12 + adoxq %rbp,%r13 + + mulxq 40+128(%rcx),%rdi,%rbp + movq 16(%rbx),%rdx + adcxq %rdi,%r13 + adoxq %rbp,%r14 + adcxq %rax,%r14 + adoxq %rax,%r15 + adcxq %rax,%r15 + movq %r9,%r8 + imulq 8(%rsp),%r9 + + + xorq %rax,%rax + mulxq 0+128(%rsi),%rdi,%rbp + adoxq %rdi,%r10 + adcxq %rbp,%r11 + + mulxq 8+128(%rsi),%rdi,%rbp + adoxq %rdi,%r11 + adcxq %rbp,%r12 + + mulxq 16+128(%rsi),%rdi,%rbp + adoxq %rdi,%r12 + adcxq %rbp,%r13 + + mulxq 24+128(%rsi),%rdi,%rbp + adoxq %rdi,%r13 + adcxq %rbp,%r14 + + mulxq 32+128(%rsi),%rdi,%rbp + adoxq %rdi,%r14 + adcxq %rbp,%r15 + + mulxq 40+128(%rsi),%rdi,%rbp + movq %r9,%rdx + adoxq %rdi,%r15 + adcxq %rax,%rbp + adoxq %rbp,%rax + + + xorq %r9,%r9 + mulxq 0+128(%rcx),%rdi,%rbp + adcxq %rdi,%r8 + adoxq %rbp,%r10 + + mulxq 8+128(%rcx),%rdi,%rbp + adcxq %rdi,%r10 + adoxq %rbp,%r11 + + mulxq 16+128(%rcx),%rdi,%rbp + adcxq %rdi,%r11 + adoxq %rbp,%r12 + + mulxq 24+128(%rcx),%rdi,%rbp + adcxq %rdi,%r12 + adoxq %rbp,%r13 + + mulxq 32+128(%rcx),%rdi,%rbp + adcxq %rdi,%r13 + adoxq %rbp,%r14 + + mulxq 40+128(%rcx),%rdi,%rbp + movq 24(%rbx),%rdx + adcxq %rdi,%r14 + adoxq %rbp,%r15 + adcxq %r8,%r15 + adoxq %r8,%rax + adcxq %r8,%rax + movq %r10,%r9 + imulq 8(%rsp),%r10 + + + xorq %r8,%r8 + mulxq 0+128(%rsi),%rdi,%rbp + adoxq %rdi,%r11 + adcxq %rbp,%r12 + + mulxq 8+128(%rsi),%rdi,%rbp + adoxq %rdi,%r12 + adcxq %rbp,%r13 + + mulxq 16+128(%rsi),%rdi,%rbp + adoxq %rdi,%r13 + adcxq %rbp,%r14 + + mulxq 24+128(%rsi),%rdi,%rbp + adoxq %rdi,%r14 + adcxq %rbp,%r15 + + mulxq 32+128(%rsi),%rdi,%rbp + adoxq %rdi,%r15 + adcxq %rbp,%rax + + mulxq 40+128(%rsi),%rdi,%rbp + movq %r10,%rdx + adoxq %rdi,%rax + adcxq %r8,%rbp + adoxq %rbp,%r8 + + + xorq %r10,%r10 + mulxq 0+128(%rcx),%rdi,%rbp + adcxq %rdi,%r9 + adoxq %rbp,%r11 + + mulxq 8+128(%rcx),%rdi,%rbp + adcxq %rdi,%r11 + adoxq %rbp,%r12 + + mulxq 16+128(%rcx),%rdi,%rbp + adcxq %rdi,%r12 + adoxq %rbp,%r13 + + mulxq 24+128(%rcx),%rdi,%rbp + adcxq %rdi,%r13 + adoxq %rbp,%r14 + + mulxq 32+128(%rcx),%rdi,%rbp + adcxq %rdi,%r14 + adoxq %rbp,%r15 + + mulxq 40+128(%rcx),%rdi,%rbp + movq 32(%rbx),%rdx + adcxq %rdi,%r15 + adoxq %rbp,%rax + adcxq %r9,%rax + adoxq %r9,%r8 + adcxq %r9,%r8 + movq %r11,%r10 + imulq 8(%rsp),%r11 + + + xorq %r9,%r9 + mulxq 0+128(%rsi),%rdi,%rbp + adoxq %rdi,%r12 + adcxq %rbp,%r13 + + mulxq 8+128(%rsi),%rdi,%rbp + adoxq %rdi,%r13 + adcxq %rbp,%r14 + + mulxq 16+128(%rsi),%rdi,%rbp + adoxq %rdi,%r14 + adcxq %rbp,%r15 + + mulxq 24+128(%rsi),%rdi,%rbp + adoxq %rdi,%r15 + adcxq %rbp,%rax + + mulxq 32+128(%rsi),%rdi,%rbp + adoxq %rdi,%rax + adcxq %rbp,%r8 + + mulxq 40+128(%rsi),%rdi,%rbp + movq %r11,%rdx + adoxq %rdi,%r8 + adcxq %r9,%rbp + adoxq %rbp,%r9 + + + xorq %r11,%r11 + mulxq 0+128(%rcx),%rdi,%rbp + adcxq %rdi,%r10 + adoxq %rbp,%r12 + + mulxq 8+128(%rcx),%rdi,%rbp + adcxq %rdi,%r12 + adoxq %rbp,%r13 + + mulxq 16+128(%rcx),%rdi,%rbp + adcxq %rdi,%r13 + adoxq %rbp,%r14 + + mulxq 24+128(%rcx),%rdi,%rbp + adcxq %rdi,%r14 + adoxq %rbp,%r15 + + mulxq 32+128(%rcx),%rdi,%rbp + adcxq %rdi,%r15 + adoxq %rbp,%rax + + mulxq 40+128(%rcx),%rdi,%rbp + movq 40(%rbx),%rdx + adcxq %rdi,%rax + adoxq %rbp,%r8 + adcxq %r10,%r8 + adoxq %r10,%r9 + adcxq %r10,%r9 + movq %r12,%r11 + imulq 8(%rsp),%r12 + + + xorq %r10,%r10 + mulxq 0+128(%rsi),%rdi,%rbp + adoxq %rdi,%r13 + adcxq %rbp,%r14 + + mulxq 8+128(%rsi),%rdi,%rbp + adoxq %rdi,%r14 + adcxq %rbp,%r15 + + mulxq 16+128(%rsi),%rdi,%rbp + adoxq %rdi,%r15 + adcxq %rbp,%rax + + mulxq 24+128(%rsi),%rdi,%rbp + adoxq %rdi,%rax + adcxq %rbp,%r8 + + mulxq 32+128(%rsi),%rdi,%rbp + adoxq %rdi,%r8 + adcxq %rbp,%r9 + + mulxq 40+128(%rsi),%rdi,%rbp + movq %r12,%rdx + adoxq %rdi,%r9 + adcxq %r10,%rbp + adoxq %rbp,%r10 + + + xorq %r12,%r12 + mulxq 0+128(%rcx),%rdi,%rbp + adcxq %rdi,%r11 + adoxq %rbp,%r13 + + mulxq 8+128(%rcx),%rdi,%rbp + adcxq %rdi,%r13 + adoxq %rbp,%r14 + + mulxq 16+128(%rcx),%rdi,%rbp + adcxq %rdi,%r14 + adoxq %rbp,%r15 + + mulxq 24+128(%rcx),%rdi,%rbp + adcxq %rdi,%r15 + adoxq %rbp,%rax + + mulxq 32+128(%rcx),%rdi,%rbp + adcxq %rdi,%rax + adoxq %rbp,%r8 + + mulxq 40+128(%rcx),%rdi,%rbp + movq %r13,%rdx + adcxq %rdi,%r8 + adoxq %rbp,%r9 + adcxq %r11,%r9 + adoxq %r11,%r10 + adcxq %r11,%r10 + imulq 8(%rsp),%rdx + movq 24(%rsp),%rbx + + + xorq %r12,%r12 + mulxq 0+128(%rcx),%rdi,%rbp + adcxq %rdi,%r13 + adoxq %rbp,%r14 + + mulxq 8+128(%rcx),%rdi,%rbp + adcxq %rdi,%r14 + adoxq %rbp,%r15 + + mulxq 16+128(%rcx),%rdi,%rbp + adcxq %rdi,%r15 + adoxq %rbp,%rax + + mulxq 24+128(%rcx),%rdi,%rbp + adcxq %rdi,%rax + adoxq %rbp,%r8 + + mulxq 32+128(%rcx),%rdi,%rbp + adcxq %rdi,%r8 + adoxq %rbp,%r9 + + mulxq 40+128(%rcx),%rdi,%rbp + movq %r14,%rdx + adcxq %rdi,%r9 + adoxq %rbp,%r10 + adcq $0,%r10 + movq %r8,%r12 + + movq %r14,0(%rbx) + movq %r15,8(%rbx) + movq %rax,16(%rbx) + movq %r9,%rdi + movq %r8,24(%rbx) + movq %r9,32(%rbx) + movq %r10,40(%rbx) + movq %r10,%rbp + + .byte 0xf3,0xc3 + + +.globl sqrx_mont_382x + +.def sqrx_mont_382x; .scl 2; .type 32; .endef +.p2align 5 +sqrx_mont_382x: + .byte 0xf3,0x0f,0x1e,0xfa + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + movq %rsp,%r11 +.LSEH_begin_sqrx_mont_382x: + + + movq %rcx,%rdi + movq %rdx,%rsi + movq %r8,%rdx + movq %r9,%rcx +sqr_mont_382x$1: + pushq %rbp + + pushq %rbx + + pushq %r12 + + pushq %r13 + + pushq %r14 + + pushq %r15 + + subq $136,%rsp + +.LSEH_body_sqrx_mont_382x: + + + movq %rcx,0(%rsp) + movq %rdx,%rcx + movq %rdi,16(%rsp) + movq %rsi,24(%rsp) + + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + + movq %r8,%r14 + addq 48(%rsi),%r8 + movq %r9,%r15 + adcq 56(%rsi),%r9 + movq %r10,%rax + adcq 64(%rsi),%r10 + movq %r11,%rdx + adcq 72(%rsi),%r11 + movq %r12,%rbx + adcq 80(%rsi),%r12 + movq %r13,%rbp + adcq 88(%rsi),%r13 + + subq 48(%rsi),%r14 + sbbq 56(%rsi),%r15 + sbbq 64(%rsi),%rax + sbbq 72(%rsi),%rdx + sbbq 80(%rsi),%rbx + sbbq 88(%rsi),%rbp + sbbq %rdi,%rdi + + movq %r8,32+0(%rsp) + movq %r9,32+8(%rsp) + movq %r10,32+16(%rsp) + movq %r11,32+24(%rsp) + movq %r12,32+32(%rsp) + movq %r13,32+40(%rsp) + + movq %r14,32+48(%rsp) + movq %r15,32+56(%rsp) + movq %rax,32+64(%rsp) + movq %rdx,32+72(%rsp) + movq %rbx,32+80(%rsp) + movq %rbp,32+88(%rsp) + movq %rdi,32+96(%rsp) + + + + leaq 48(%rsi),%rbx + + movq 48(%rsi),%rdx + movq 0(%rsi),%r14 + movq 8(%rsi),%r15 + movq 16(%rsi),%rax + movq 24(%rsi),%r12 + movq 32(%rsi),%rdi + movq 40(%rsi),%rbp + leaq -128(%rsi),%rsi + leaq -128(%rcx),%rcx + + mulxq %r14,%r8,%r9 + call __mulx_mont_383_nonred + addq %rdx,%rdx + adcq %r15,%r15 + adcq %rax,%rax + adcq %r12,%r12 + adcq %rdi,%rdi + adcq %rbp,%rbp + + movq %rdx,48(%rbx) + movq %r15,56(%rbx) + movq %rax,64(%rbx) + movq %r12,72(%rbx) + movq %rdi,80(%rbx) + movq %rbp,88(%rbx) + + leaq 32-128(%rsp),%rsi + leaq 32+48(%rsp),%rbx + + movq 32+48(%rsp),%rdx + movq 32+0(%rsp),%r14 + movq 32+8(%rsp),%r15 + movq 32+16(%rsp),%rax + movq 32+24(%rsp),%r12 + movq 32+32(%rsp),%rdi + movq 32+40(%rsp),%rbp + + + + mulxq %r14,%r8,%r9 + call __mulx_mont_383_nonred + movq 32+96(%rsp),%r14 + leaq 128(%rcx),%rcx + movq 32+0(%rsp),%r8 + andq %r14,%r8 + movq 32+8(%rsp),%r9 + andq %r14,%r9 + movq 32+16(%rsp),%r10 + andq %r14,%r10 + movq 32+24(%rsp),%r11 + andq %r14,%r11 + movq 32+32(%rsp),%r13 + andq %r14,%r13 + andq 32+40(%rsp),%r14 + + subq %r8,%rdx + movq 0(%rcx),%r8 + sbbq %r9,%r15 + movq 8(%rcx),%r9 + sbbq %r10,%rax + movq 16(%rcx),%r10 + sbbq %r11,%r12 + movq 24(%rcx),%r11 + sbbq %r13,%rdi + movq 32(%rcx),%r13 + sbbq %r14,%rbp + sbbq %r14,%r14 + + andq %r14,%r8 + andq %r14,%r9 + andq %r14,%r10 + andq %r14,%r11 + andq %r14,%r13 + andq 40(%rcx),%r14 + + addq %r8,%rdx + adcq %r9,%r15 + adcq %r10,%rax + adcq %r11,%r12 + adcq %r13,%rdi + adcq %r14,%rbp + + movq %rdx,0(%rbx) + movq %r15,8(%rbx) + movq %rax,16(%rbx) + movq %r12,24(%rbx) + movq %rdi,32(%rbx) + movq %rbp,40(%rbx) + leaq 136(%rsp),%r8 + movq 0(%r8),%r15 + + movq 8(%r8),%r14 + + movq 16(%r8),%r13 + + movq 24(%r8),%r12 + + movq 32(%r8),%rbx + + movq 40(%r8),%rbp + + leaq 48(%r8),%rsp + +.LSEH_epilogue_sqrx_mont_382x: + mov 8(%rsp),%rdi + mov 16(%rsp),%rsi + + .byte 0xf3,0xc3 + +.LSEH_end_sqrx_mont_382x: +.section .pdata +.p2align 2 +.rva .LSEH_begin_mulx_mont_384x +.rva .LSEH_body_mulx_mont_384x +.rva .LSEH_info_mulx_mont_384x_prologue + +.rva .LSEH_body_mulx_mont_384x +.rva .LSEH_epilogue_mulx_mont_384x +.rva .LSEH_info_mulx_mont_384x_body + +.rva .LSEH_epilogue_mulx_mont_384x +.rva .LSEH_end_mulx_mont_384x +.rva .LSEH_info_mulx_mont_384x_epilogue + +.rva .LSEH_begin_sqrx_mont_384x +.rva .LSEH_body_sqrx_mont_384x +.rva .LSEH_info_sqrx_mont_384x_prologue + +.rva .LSEH_body_sqrx_mont_384x +.rva .LSEH_epilogue_sqrx_mont_384x +.rva .LSEH_info_sqrx_mont_384x_body + +.rva .LSEH_epilogue_sqrx_mont_384x +.rva .LSEH_end_sqrx_mont_384x +.rva .LSEH_info_sqrx_mont_384x_epilogue + +.rva .LSEH_begin_mulx_382x +.rva .LSEH_body_mulx_382x +.rva .LSEH_info_mulx_382x_prologue + +.rva .LSEH_body_mulx_382x +.rva .LSEH_epilogue_mulx_382x +.rva .LSEH_info_mulx_382x_body + +.rva .LSEH_epilogue_mulx_382x +.rva .LSEH_end_mulx_382x +.rva .LSEH_info_mulx_382x_epilogue + +.rva .LSEH_begin_sqrx_382x +.rva .LSEH_body_sqrx_382x +.rva .LSEH_info_sqrx_382x_prologue + +.rva .LSEH_body_sqrx_382x +.rva .LSEH_epilogue_sqrx_382x +.rva .LSEH_info_sqrx_382x_body + +.rva .LSEH_epilogue_sqrx_382x +.rva .LSEH_end_sqrx_382x +.rva .LSEH_info_sqrx_382x_epilogue + +.rva .LSEH_begin_mulx_384 +.rva .LSEH_body_mulx_384 +.rva .LSEH_info_mulx_384_prologue + +.rva .LSEH_body_mulx_384 +.rva .LSEH_epilogue_mulx_384 +.rva .LSEH_info_mulx_384_body + +.rva .LSEH_epilogue_mulx_384 +.rva .LSEH_end_mulx_384 +.rva .LSEH_info_mulx_384_epilogue + +.rva .LSEH_begin_sqrx_384 +.rva .LSEH_body_sqrx_384 +.rva .LSEH_info_sqrx_384_prologue + +.rva .LSEH_body_sqrx_384 +.rva .LSEH_epilogue_sqrx_384 +.rva .LSEH_info_sqrx_384_body + +.rva .LSEH_epilogue_sqrx_384 +.rva .LSEH_end_sqrx_384 +.rva .LSEH_info_sqrx_384_epilogue + +.rva .LSEH_begin_redcx_mont_384 +.rva .LSEH_body_redcx_mont_384 +.rva .LSEH_info_redcx_mont_384_prologue + +.rva .LSEH_body_redcx_mont_384 +.rva .LSEH_epilogue_redcx_mont_384 +.rva .LSEH_info_redcx_mont_384_body + +.rva .LSEH_epilogue_redcx_mont_384 +.rva .LSEH_end_redcx_mont_384 +.rva .LSEH_info_redcx_mont_384_epilogue + +.rva .LSEH_begin_fromx_mont_384 +.rva .LSEH_body_fromx_mont_384 +.rva .LSEH_info_fromx_mont_384_prologue + +.rva .LSEH_body_fromx_mont_384 +.rva .LSEH_epilogue_fromx_mont_384 +.rva .LSEH_info_fromx_mont_384_body + +.rva .LSEH_epilogue_fromx_mont_384 +.rva .LSEH_end_fromx_mont_384 +.rva .LSEH_info_fromx_mont_384_epilogue + +.rva .LSEH_begin_sgn0x_pty_mont_384 +.rva .LSEH_body_sgn0x_pty_mont_384 +.rva .LSEH_info_sgn0x_pty_mont_384_prologue + +.rva .LSEH_body_sgn0x_pty_mont_384 +.rva .LSEH_epilogue_sgn0x_pty_mont_384 +.rva .LSEH_info_sgn0x_pty_mont_384_body + +.rva .LSEH_epilogue_sgn0x_pty_mont_384 +.rva .LSEH_end_sgn0x_pty_mont_384 +.rva .LSEH_info_sgn0x_pty_mont_384_epilogue + +.rva .LSEH_begin_sgn0x_pty_mont_384x +.rva .LSEH_body_sgn0x_pty_mont_384x +.rva .LSEH_info_sgn0x_pty_mont_384x_prologue + +.rva .LSEH_body_sgn0x_pty_mont_384x +.rva .LSEH_epilogue_sgn0x_pty_mont_384x +.rva .LSEH_info_sgn0x_pty_mont_384x_body + +.rva .LSEH_epilogue_sgn0x_pty_mont_384x +.rva .LSEH_end_sgn0x_pty_mont_384x +.rva .LSEH_info_sgn0x_pty_mont_384x_epilogue + +.rva .LSEH_begin_mulx_mont_384 +.rva .LSEH_body_mulx_mont_384 +.rva .LSEH_info_mulx_mont_384_prologue + +.rva .LSEH_body_mulx_mont_384 +.rva .LSEH_epilogue_mulx_mont_384 +.rva .LSEH_info_mulx_mont_384_body + +.rva .LSEH_epilogue_mulx_mont_384 +.rva .LSEH_end_mulx_mont_384 +.rva .LSEH_info_mulx_mont_384_epilogue + +.rva .LSEH_begin_sqrx_mont_384 +.rva .LSEH_body_sqrx_mont_384 +.rva .LSEH_info_sqrx_mont_384_prologue + +.rva .LSEH_body_sqrx_mont_384 +.rva .LSEH_epilogue_sqrx_mont_384 +.rva .LSEH_info_sqrx_mont_384_body + +.rva .LSEH_epilogue_sqrx_mont_384 +.rva .LSEH_end_sqrx_mont_384 +.rva .LSEH_info_sqrx_mont_384_epilogue + +.rva .LSEH_begin_sqrx_n_mul_mont_384 +.rva .LSEH_body_sqrx_n_mul_mont_384 +.rva .LSEH_info_sqrx_n_mul_mont_384_prologue + +.rva .LSEH_body_sqrx_n_mul_mont_384 +.rva .LSEH_epilogue_sqrx_n_mul_mont_384 +.rva .LSEH_info_sqrx_n_mul_mont_384_body + +.rva .LSEH_epilogue_sqrx_n_mul_mont_384 +.rva .LSEH_end_sqrx_n_mul_mont_384 +.rva .LSEH_info_sqrx_n_mul_mont_384_epilogue + +.rva .LSEH_begin_sqrx_n_mul_mont_383 +.rva .LSEH_body_sqrx_n_mul_mont_383 +.rva .LSEH_info_sqrx_n_mul_mont_383_prologue + +.rva .LSEH_body_sqrx_n_mul_mont_383 +.rva .LSEH_epilogue_sqrx_n_mul_mont_383 +.rva .LSEH_info_sqrx_n_mul_mont_383_body + +.rva .LSEH_epilogue_sqrx_n_mul_mont_383 +.rva .LSEH_end_sqrx_n_mul_mont_383 +.rva .LSEH_info_sqrx_n_mul_mont_383_epilogue + +.rva .LSEH_begin_sqrx_mont_382x +.rva .LSEH_body_sqrx_mont_382x +.rva .LSEH_info_sqrx_mont_382x_prologue + +.rva .LSEH_body_sqrx_mont_382x +.rva .LSEH_epilogue_sqrx_mont_382x +.rva .LSEH_info_sqrx_mont_382x_body + +.rva .LSEH_epilogue_sqrx_mont_382x +.rva .LSEH_end_sqrx_mont_382x +.rva .LSEH_info_sqrx_mont_382x_epilogue + +.section .xdata +.p2align 3 +.LSEH_info_mulx_mont_384x_prologue: +.byte 1,0,5,0x0b +.byte 0,0x74,1,0 +.byte 0,0x64,2,0 +.byte 0,0xb3 +.byte 0,0 +.long 0,0 +.LSEH_info_mulx_mont_384x_body: +.byte 1,0,18,0 +.byte 0x00,0xf4,0x29,0x00 +.byte 0x00,0xe4,0x2a,0x00 +.byte 0x00,0xd4,0x2b,0x00 +.byte 0x00,0xc4,0x2c,0x00 +.byte 0x00,0x34,0x2d,0x00 +.byte 0x00,0x54,0x2e,0x00 +.byte 0x00,0x74,0x30,0x00 +.byte 0x00,0x64,0x31,0x00 +.byte 0x00,0x01,0x2f,0x00 +.byte 0x00,0x00,0x00,0x00 +.byte 0x00,0x00,0x00,0x00 +.LSEH_info_mulx_mont_384x_epilogue: +.byte 1,0,4,0 +.byte 0x00,0x74,0x01,0x00 +.byte 0x00,0x64,0x02,0x00 +.byte 0x00,0x00,0x00,0x00 + +.LSEH_info_sqrx_mont_384x_prologue: +.byte 1,0,5,0x0b +.byte 0,0x74,1,0 +.byte 0,0x64,2,0 +.byte 0,0xb3 +.byte 0,0 +.long 0,0 +.LSEH_info_sqrx_mont_384x_body: +.byte 1,0,18,0 +.byte 0x00,0xf4,0x11,0x00 +.byte 0x00,0xe4,0x12,0x00 +.byte 0x00,0xd4,0x13,0x00 +.byte 0x00,0xc4,0x14,0x00 +.byte 0x00,0x34,0x15,0x00 +.byte 0x00,0x54,0x16,0x00 +.byte 0x00,0x74,0x18,0x00 +.byte 0x00,0x64,0x19,0x00 +.byte 0x00,0x01,0x17,0x00 +.byte 0x00,0x00,0x00,0x00 +.byte 0x00,0x00,0x00,0x00 +.LSEH_info_sqrx_mont_384x_epilogue: +.byte 1,0,4,0 +.byte 0x00,0x74,0x01,0x00 +.byte 0x00,0x64,0x02,0x00 +.byte 0x00,0x00,0x00,0x00 + +.LSEH_info_mulx_382x_prologue: +.byte 1,0,5,0x0b +.byte 0,0x74,1,0 +.byte 0,0x64,2,0 +.byte 0,0xb3 +.byte 0,0 +.long 0,0 +.LSEH_info_mulx_382x_body: +.byte 1,0,18,0 +.byte 0x00,0xf4,0x11,0x00 +.byte 0x00,0xe4,0x12,0x00 +.byte 0x00,0xd4,0x13,0x00 +.byte 0x00,0xc4,0x14,0x00 +.byte 0x00,0x34,0x15,0x00 +.byte 0x00,0x54,0x16,0x00 +.byte 0x00,0x74,0x18,0x00 +.byte 0x00,0x64,0x19,0x00 +.byte 0x00,0x01,0x17,0x00 +.byte 0x00,0x00,0x00,0x00 +.byte 0x00,0x00,0x00,0x00 +.LSEH_info_mulx_382x_epilogue: +.byte 1,0,4,0 +.byte 0x00,0x74,0x01,0x00 +.byte 0x00,0x64,0x02,0x00 +.byte 0x00,0x00,0x00,0x00 + +.LSEH_info_sqrx_382x_prologue: +.byte 1,0,5,0x0b +.byte 0,0x74,1,0 +.byte 0,0x64,2,0 +.byte 0,0xb3 +.byte 0,0 +.long 0,0 +.LSEH_info_sqrx_382x_body: +.byte 1,0,17,0 +.byte 0x00,0xf4,0x01,0x00 +.byte 0x00,0xe4,0x02,0x00 +.byte 0x00,0xd4,0x03,0x00 +.byte 0x00,0xc4,0x04,0x00 +.byte 0x00,0x34,0x05,0x00 +.byte 0x00,0x54,0x06,0x00 +.byte 0x00,0x74,0x08,0x00 +.byte 0x00,0x64,0x09,0x00 +.byte 0x00,0x62 +.byte 0x00,0x00,0x00,0x00,0x00,0x00 +.byte 0x00,0x00,0x00,0x00 +.LSEH_info_sqrx_382x_epilogue: +.byte 1,0,4,0 +.byte 0x00,0x74,0x01,0x00 +.byte 0x00,0x64,0x02,0x00 +.byte 0x00,0x00,0x00,0x00 + +.LSEH_info_mulx_384_prologue: +.byte 1,0,5,0x0b +.byte 0,0x74,1,0 +.byte 0,0x64,2,0 +.byte 0,0xb3 +.byte 0,0 +.long 0,0 +.LSEH_info_mulx_384_body: +.byte 1,0,17,0 +.byte 0x00,0xf4,0x00,0x00 +.byte 0x00,0xe4,0x01,0x00 +.byte 0x00,0xd4,0x02,0x00 +.byte 0x00,0xc4,0x03,0x00 +.byte 0x00,0x34,0x04,0x00 +.byte 0x00,0x54,0x05,0x00 +.byte 0x00,0x74,0x07,0x00 +.byte 0x00,0x64,0x08,0x00 +.byte 0x00,0x52 +.byte 0x00,0x00,0x00,0x00,0x00,0x00 +.byte 0x00,0x00,0x00,0x00 +.LSEH_info_mulx_384_epilogue: +.byte 1,0,4,0 +.byte 0x00,0x74,0x01,0x00 +.byte 0x00,0x64,0x02,0x00 +.byte 0x00,0x00,0x00,0x00 + +.LSEH_info_sqrx_384_prologue: +.byte 1,0,5,0x0b +.byte 0,0x74,1,0 +.byte 0,0x64,2,0 +.byte 0,0xb3 +.byte 0,0 +.long 0,0 +.LSEH_info_sqrx_384_body: +.byte 1,0,17,0 +.byte 0x00,0xf4,0x01,0x00 +.byte 0x00,0xe4,0x02,0x00 +.byte 0x00,0xd4,0x03,0x00 +.byte 0x00,0xc4,0x04,0x00 +.byte 0x00,0x34,0x05,0x00 +.byte 0x00,0x54,0x06,0x00 +.byte 0x00,0x74,0x08,0x00 +.byte 0x00,0x64,0x09,0x00 +.byte 0x00,0x62 +.byte 0x00,0x00,0x00,0x00,0x00,0x00 +.byte 0x00,0x00,0x00,0x00 +.LSEH_info_sqrx_384_epilogue: +.byte 1,0,4,0 +.byte 0x00,0x74,0x01,0x00 +.byte 0x00,0x64,0x02,0x00 +.byte 0x00,0x00,0x00,0x00 + +.LSEH_info_redcx_mont_384_prologue: +.byte 1,0,5,0x0b +.byte 0,0x74,1,0 +.byte 0,0x64,2,0 +.byte 0,0xb3 +.byte 0,0 +.long 0,0 +.LSEH_info_redcx_mont_384_body: +.byte 1,0,17,0 +.byte 0x00,0xf4,0x01,0x00 +.byte 0x00,0xe4,0x02,0x00 +.byte 0x00,0xd4,0x03,0x00 +.byte 0x00,0xc4,0x04,0x00 +.byte 0x00,0x34,0x05,0x00 +.byte 0x00,0x54,0x06,0x00 +.byte 0x00,0x74,0x08,0x00 +.byte 0x00,0x64,0x09,0x00 +.byte 0x00,0x62 +.byte 0x00,0x00,0x00,0x00,0x00,0x00 +.byte 0x00,0x00,0x00,0x00 +.LSEH_info_redcx_mont_384_epilogue: +.byte 1,0,4,0 +.byte 0x00,0x74,0x01,0x00 +.byte 0x00,0x64,0x02,0x00 +.byte 0x00,0x00,0x00,0x00 + +.LSEH_info_fromx_mont_384_prologue: +.byte 1,0,5,0x0b +.byte 0,0x74,1,0 +.byte 0,0x64,2,0 +.byte 0,0xb3 +.byte 0,0 +.long 0,0 +.LSEH_info_fromx_mont_384_body: +.byte 1,0,17,0 +.byte 0x00,0xf4,0x01,0x00 +.byte 0x00,0xe4,0x02,0x00 +.byte 0x00,0xd4,0x03,0x00 +.byte 0x00,0xc4,0x04,0x00 +.byte 0x00,0x34,0x05,0x00 +.byte 0x00,0x54,0x06,0x00 +.byte 0x00,0x74,0x08,0x00 +.byte 0x00,0x64,0x09,0x00 +.byte 0x00,0x62 +.byte 0x00,0x00,0x00,0x00,0x00,0x00 +.byte 0x00,0x00,0x00,0x00 +.LSEH_info_fromx_mont_384_epilogue: +.byte 1,0,4,0 +.byte 0x00,0x74,0x01,0x00 +.byte 0x00,0x64,0x02,0x00 +.byte 0x00,0x00,0x00,0x00 + +.LSEH_info_sgn0x_pty_mont_384_prologue: +.byte 1,0,5,0x0b +.byte 0,0x74,1,0 +.byte 0,0x64,2,0 +.byte 0,0xb3 +.byte 0,0 +.long 0,0 +.LSEH_info_sgn0x_pty_mont_384_body: +.byte 1,0,17,0 +.byte 0x00,0xf4,0x01,0x00 +.byte 0x00,0xe4,0x02,0x00 +.byte 0x00,0xd4,0x03,0x00 +.byte 0x00,0xc4,0x04,0x00 +.byte 0x00,0x34,0x05,0x00 +.byte 0x00,0x54,0x06,0x00 +.byte 0x00,0x74,0x08,0x00 +.byte 0x00,0x64,0x09,0x00 +.byte 0x00,0x62 +.byte 0x00,0x00,0x00,0x00,0x00,0x00 +.byte 0x00,0x00,0x00,0x00 +.LSEH_info_sgn0x_pty_mont_384_epilogue: +.byte 1,0,4,0 +.byte 0x00,0x74,0x01,0x00 +.byte 0x00,0x64,0x02,0x00 +.byte 0x00,0x00,0x00,0x00 + +.LSEH_info_sgn0x_pty_mont_384x_prologue: +.byte 1,0,5,0x0b +.byte 0,0x74,1,0 +.byte 0,0x64,2,0 +.byte 0,0xb3 +.byte 0,0 +.long 0,0 +.LSEH_info_sgn0x_pty_mont_384x_body: +.byte 1,0,17,0 +.byte 0x00,0xf4,0x01,0x00 +.byte 0x00,0xe4,0x02,0x00 +.byte 0x00,0xd4,0x03,0x00 +.byte 0x00,0xc4,0x04,0x00 +.byte 0x00,0x34,0x05,0x00 +.byte 0x00,0x54,0x06,0x00 +.byte 0x00,0x74,0x08,0x00 +.byte 0x00,0x64,0x09,0x00 +.byte 0x00,0x62 +.byte 0x00,0x00,0x00,0x00,0x00,0x00 +.byte 0x00,0x00,0x00,0x00 +.LSEH_info_sgn0x_pty_mont_384x_epilogue: +.byte 1,0,4,0 +.byte 0x00,0x74,0x01,0x00 +.byte 0x00,0x64,0x02,0x00 +.byte 0x00,0x00,0x00,0x00 + +.LSEH_info_mulx_mont_384_prologue: +.byte 1,0,5,0x0b +.byte 0,0x74,1,0 +.byte 0,0x64,2,0 +.byte 0,0xb3 +.byte 0,0 +.long 0,0 +.LSEH_info_mulx_mont_384_body: +.byte 1,0,17,0 +.byte 0x00,0xf4,0x03,0x00 +.byte 0x00,0xe4,0x04,0x00 +.byte 0x00,0xd4,0x05,0x00 +.byte 0x00,0xc4,0x06,0x00 +.byte 0x00,0x34,0x07,0x00 +.byte 0x00,0x54,0x08,0x00 +.byte 0x00,0x74,0x0a,0x00 +.byte 0x00,0x64,0x0b,0x00 +.byte 0x00,0x82 +.byte 0x00,0x00,0x00,0x00,0x00,0x00 +.byte 0x00,0x00,0x00,0x00 +.LSEH_info_mulx_mont_384_epilogue: +.byte 1,0,4,0 +.byte 0x00,0x74,0x01,0x00 +.byte 0x00,0x64,0x02,0x00 +.byte 0x00,0x00,0x00,0x00 + +.LSEH_info_sqrx_mont_384_prologue: +.byte 1,0,5,0x0b +.byte 0,0x74,1,0 +.byte 0,0x64,2,0 +.byte 0,0xb3 +.byte 0,0 +.long 0,0 +.LSEH_info_sqrx_mont_384_body: +.byte 1,0,17,0 +.byte 0x00,0xf4,0x03,0x00 +.byte 0x00,0xe4,0x04,0x00 +.byte 0x00,0xd4,0x05,0x00 +.byte 0x00,0xc4,0x06,0x00 +.byte 0x00,0x34,0x07,0x00 +.byte 0x00,0x54,0x08,0x00 +.byte 0x00,0x74,0x0a,0x00 +.byte 0x00,0x64,0x0b,0x00 +.byte 0x00,0x82 +.byte 0x00,0x00,0x00,0x00,0x00,0x00 +.byte 0x00,0x00,0x00,0x00 +.LSEH_info_sqrx_mont_384_epilogue: +.byte 1,0,4,0 +.byte 0x00,0x74,0x01,0x00 +.byte 0x00,0x64,0x02,0x00 +.byte 0x00,0x00,0x00,0x00 + +.LSEH_info_sqrx_n_mul_mont_384_prologue: +.byte 1,0,5,0x0b +.byte 0,0x74,1,0 +.byte 0,0x64,2,0 +.byte 0,0xb3 +.byte 0,0 +.long 0,0 +.LSEH_info_sqrx_n_mul_mont_384_body: +.byte 1,0,17,0 +.byte 0x00,0xf4,0x05,0x00 +.byte 0x00,0xe4,0x06,0x00 +.byte 0x00,0xd4,0x07,0x00 +.byte 0x00,0xc4,0x08,0x00 +.byte 0x00,0x34,0x09,0x00 +.byte 0x00,0x54,0x0a,0x00 +.byte 0x00,0x74,0x0c,0x00 +.byte 0x00,0x64,0x0d,0x00 +.byte 0x00,0xa2 +.byte 0x00,0x00,0x00,0x00,0x00,0x00 +.byte 0x00,0x00,0x00,0x00 +.LSEH_info_sqrx_n_mul_mont_384_epilogue: +.byte 1,0,4,0 +.byte 0x00,0x74,0x01,0x00 +.byte 0x00,0x64,0x02,0x00 +.byte 0x00,0x00,0x00,0x00 + +.LSEH_info_sqrx_n_mul_mont_383_prologue: +.byte 1,0,5,0x0b +.byte 0,0x74,1,0 +.byte 0,0x64,2,0 +.byte 0,0xb3 +.byte 0,0 +.long 0,0 +.LSEH_info_sqrx_n_mul_mont_383_body: +.byte 1,0,17,0 +.byte 0x00,0xf4,0x05,0x00 +.byte 0x00,0xe4,0x06,0x00 +.byte 0x00,0xd4,0x07,0x00 +.byte 0x00,0xc4,0x08,0x00 +.byte 0x00,0x34,0x09,0x00 +.byte 0x00,0x54,0x0a,0x00 +.byte 0x00,0x74,0x0c,0x00 +.byte 0x00,0x64,0x0d,0x00 +.byte 0x00,0xa2 +.byte 0x00,0x00,0x00,0x00,0x00,0x00 +.byte 0x00,0x00,0x00,0x00 +.LSEH_info_sqrx_n_mul_mont_383_epilogue: +.byte 1,0,4,0 +.byte 0x00,0x74,0x01,0x00 +.byte 0x00,0x64,0x02,0x00 +.byte 0x00,0x00,0x00,0x00 + +.LSEH_info_sqrx_mont_382x_prologue: +.byte 1,0,5,0x0b +.byte 0,0x74,1,0 +.byte 0,0x64,2,0 +.byte 0,0xb3 +.byte 0,0 +.long 0,0 +.LSEH_info_sqrx_mont_382x_body: +.byte 1,0,18,0 +.byte 0x00,0xf4,0x11,0x00 +.byte 0x00,0xe4,0x12,0x00 +.byte 0x00,0xd4,0x13,0x00 +.byte 0x00,0xc4,0x14,0x00 +.byte 0x00,0x34,0x15,0x00 +.byte 0x00,0x54,0x16,0x00 +.byte 0x00,0x74,0x18,0x00 +.byte 0x00,0x64,0x19,0x00 +.byte 0x00,0x01,0x17,0x00 +.byte 0x00,0x00,0x00,0x00 +.byte 0x00,0x00,0x00,0x00 +.LSEH_info_sqrx_mont_382x_epilogue: +.byte 1,0,4,0 +.byte 0x00,0x74,0x01,0x00 +.byte 0x00,0x64,0x02,0x00 +.byte 0x00,0x00,0x00,0x00 + diff --git a/crypto/blst_src/build/coff/sha256-armv8.S b/crypto/blst_src/build/coff/sha256-armv8.S new file mode 100644 index 00000000000..a4cd8090896 --- /dev/null +++ b/crypto/blst_src/build/coff/sha256-armv8.S @@ -0,0 +1,1093 @@ +// +// Copyright Supranational LLC +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 +// +// ==================================================================== +// Written by Andy Polyakov, @dot-asm, initially for the OpenSSL +// project. +// ==================================================================== +// +// sha256_block procedure for ARMv8. +// +// This module is stripped of scalar code paths, with rationale that all +// known processors are NEON-capable. +// +// See original module at CRYPTOGAMS for further details. + +.comm __blst_platform_cap,4 +.text + +.p2align 6 + +.LK256: +.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 +.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 +.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 +.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 +.long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc +.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da +.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 +.long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 +.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 +.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 +.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 +.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 +.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 +.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 +.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 +.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 +.long 0 //terminator + +.byte 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,64,100,111,116,45,97,115,109,0 +.align 2 +.p2align 2 +.globl blst_sha256_block_armv8 +.def blst_sha256_block_armv8; +.type 32; +.endef +.p2align 6 +blst_sha256_block_armv8: +.Lv8_entry: + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + + ld1 {v0.4s,v1.4s},[x0] + adr x3,.LK256 + +.Loop_hw: + ld1 {v4.16b,v5.16b,v6.16b,v7.16b},[x1],#64 + sub x2,x2,#1 + ld1 {v16.4s},[x3],#16 + rev32 v4.16b,v4.16b + rev32 v5.16b,v5.16b + rev32 v6.16b,v6.16b + rev32 v7.16b,v7.16b + orr v18.16b,v0.16b,v0.16b // offload + orr v19.16b,v1.16b,v1.16b + ld1 {v17.4s},[x3],#16 + add v16.4s,v16.4s,v4.4s +.long 0x5e2828a4 //sha256su0 v4.16b,v5.16b + orr v2.16b,v0.16b,v0.16b +.long 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s +.long 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s +.long 0x5e0760c4 //sha256su1 v4.16b,v6.16b,v7.16b + ld1 {v16.4s},[x3],#16 + add v17.4s,v17.4s,v5.4s +.long 0x5e2828c5 //sha256su0 v5.16b,v6.16b + orr v2.16b,v0.16b,v0.16b +.long 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s +.long 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s +.long 0x5e0460e5 //sha256su1 v5.16b,v7.16b,v4.16b + ld1 {v17.4s},[x3],#16 + add v16.4s,v16.4s,v6.4s +.long 0x5e2828e6 //sha256su0 v6.16b,v7.16b + orr v2.16b,v0.16b,v0.16b +.long 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s +.long 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s +.long 0x5e056086 //sha256su1 v6.16b,v4.16b,v5.16b + ld1 {v16.4s},[x3],#16 + add v17.4s,v17.4s,v7.4s +.long 0x5e282887 //sha256su0 v7.16b,v4.16b + orr v2.16b,v0.16b,v0.16b +.long 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s +.long 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s +.long 0x5e0660a7 //sha256su1 v7.16b,v5.16b,v6.16b + ld1 {v17.4s},[x3],#16 + add v16.4s,v16.4s,v4.4s +.long 0x5e2828a4 //sha256su0 v4.16b,v5.16b + orr v2.16b,v0.16b,v0.16b +.long 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s +.long 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s +.long 0x5e0760c4 //sha256su1 v4.16b,v6.16b,v7.16b + ld1 {v16.4s},[x3],#16 + add v17.4s,v17.4s,v5.4s +.long 0x5e2828c5 //sha256su0 v5.16b,v6.16b + orr v2.16b,v0.16b,v0.16b +.long 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s +.long 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s +.long 0x5e0460e5 //sha256su1 v5.16b,v7.16b,v4.16b + ld1 {v17.4s},[x3],#16 + add v16.4s,v16.4s,v6.4s +.long 0x5e2828e6 //sha256su0 v6.16b,v7.16b + orr v2.16b,v0.16b,v0.16b +.long 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s +.long 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s +.long 0x5e056086 //sha256su1 v6.16b,v4.16b,v5.16b + ld1 {v16.4s},[x3],#16 + add v17.4s,v17.4s,v7.4s +.long 0x5e282887 //sha256su0 v7.16b,v4.16b + orr v2.16b,v0.16b,v0.16b +.long 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s +.long 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s +.long 0x5e0660a7 //sha256su1 v7.16b,v5.16b,v6.16b + ld1 {v17.4s},[x3],#16 + add v16.4s,v16.4s,v4.4s +.long 0x5e2828a4 //sha256su0 v4.16b,v5.16b + orr v2.16b,v0.16b,v0.16b +.long 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s +.long 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s +.long 0x5e0760c4 //sha256su1 v4.16b,v6.16b,v7.16b + ld1 {v16.4s},[x3],#16 + add v17.4s,v17.4s,v5.4s +.long 0x5e2828c5 //sha256su0 v5.16b,v6.16b + orr v2.16b,v0.16b,v0.16b +.long 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s +.long 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s +.long 0x5e0460e5 //sha256su1 v5.16b,v7.16b,v4.16b + ld1 {v17.4s},[x3],#16 + add v16.4s,v16.4s,v6.4s +.long 0x5e2828e6 //sha256su0 v6.16b,v7.16b + orr v2.16b,v0.16b,v0.16b +.long 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s +.long 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s +.long 0x5e056086 //sha256su1 v6.16b,v4.16b,v5.16b + ld1 {v16.4s},[x3],#16 + add v17.4s,v17.4s,v7.4s +.long 0x5e282887 //sha256su0 v7.16b,v4.16b + orr v2.16b,v0.16b,v0.16b +.long 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s +.long 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s +.long 0x5e0660a7 //sha256su1 v7.16b,v5.16b,v6.16b + ld1 {v17.4s},[x3],#16 + add v16.4s,v16.4s,v4.4s + orr v2.16b,v0.16b,v0.16b +.long 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s +.long 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s + + ld1 {v16.4s},[x3],#16 + add v17.4s,v17.4s,v5.4s + orr v2.16b,v0.16b,v0.16b +.long 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s +.long 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s + + ld1 {v17.4s},[x3] + add v16.4s,v16.4s,v6.4s + sub x3,x3,#64*4-16 // rewind + orr v2.16b,v0.16b,v0.16b +.long 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s +.long 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s + + add v17.4s,v17.4s,v7.4s + orr v2.16b,v0.16b,v0.16b +.long 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s +.long 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s + + add v0.4s,v0.4s,v18.4s + add v1.4s,v1.4s,v19.4s + + cbnz x2,.Loop_hw + + st1 {v0.4s,v1.4s},[x0] + + ldr x29,[sp],#16 + ret + +.globl blst_sha256_block_data_order +.def blst_sha256_block_data_order; +.type 32; +.endef +.p2align 4 +blst_sha256_block_data_order: + adrp x16,__blst_platform_cap + ldr w16,[x16,#:lo12:__blst_platform_cap] + tst w16,#1 + b.ne .Lv8_entry + + stp x29, x30, [sp, #-16]! + mov x29, sp + sub sp,sp,#16*4 + + adr x16,.LK256 + add x2,x1,x2,lsl#6 // len to point at the end of inp + + ld1 {v0.16b},[x1], #16 + ld1 {v1.16b},[x1], #16 + ld1 {v2.16b},[x1], #16 + ld1 {v3.16b},[x1], #16 + ld1 {v4.4s},[x16], #16 + ld1 {v5.4s},[x16], #16 + ld1 {v6.4s},[x16], #16 + ld1 {v7.4s},[x16], #16 + rev32 v0.16b,v0.16b // yes, even on + rev32 v1.16b,v1.16b // big-endian + rev32 v2.16b,v2.16b + rev32 v3.16b,v3.16b + mov x17,sp + add v4.4s,v4.4s,v0.4s + add v5.4s,v5.4s,v1.4s + add v6.4s,v6.4s,v2.4s + st1 {v4.4s,v5.4s},[x17], #32 + add v7.4s,v7.4s,v3.4s + st1 {v6.4s,v7.4s},[x17] + sub x17,x17,#32 + + ldp w3,w4,[x0] + ldp w5,w6,[x0,#8] + ldp w7,w8,[x0,#16] + ldp w9,w10,[x0,#24] + ldr w12,[sp,#0] + mov w13,wzr + eor w14,w4,w5 + mov w15,wzr + b .L_00_48 + +.p2align 4 +.L_00_48: + ext v4.16b,v0.16b,v1.16b,#4 + add w10,w10,w12 + add w3,w3,w15 + and w12,w8,w7 + bic w15,w9,w7 + ext v7.16b,v2.16b,v3.16b,#4 + eor w11,w7,w7,ror#5 + add w3,w3,w13 + mov d19,v3.d[1] + orr w12,w12,w15 + eor w11,w11,w7,ror#19 + ushr v6.4s,v4.4s,#7 + eor w15,w3,w3,ror#11 + ushr v5.4s,v4.4s,#3 + add w10,w10,w12 + add v0.4s,v0.4s,v7.4s + ror w11,w11,#6 + sli v6.4s,v4.4s,#25 + eor w13,w3,w4 + eor w15,w15,w3,ror#20 + ushr v7.4s,v4.4s,#18 + add w10,w10,w11 + ldr w12,[sp,#4] + and w14,w14,w13 + eor v5.16b,v5.16b,v6.16b + ror w15,w15,#2 + add w6,w6,w10 + sli v7.4s,v4.4s,#14 + eor w14,w14,w4 + ushr v16.4s,v19.4s,#17 + add w9,w9,w12 + add w10,w10,w15 + and w12,w7,w6 + eor v5.16b,v5.16b,v7.16b + bic w15,w8,w6 + eor w11,w6,w6,ror#5 + sli v16.4s,v19.4s,#15 + add w10,w10,w14 + orr w12,w12,w15 + ushr v17.4s,v19.4s,#10 + eor w11,w11,w6,ror#19 + eor w15,w10,w10,ror#11 + ushr v7.4s,v19.4s,#19 + add w9,w9,w12 + ror w11,w11,#6 + add v0.4s,v0.4s,v5.4s + eor w14,w10,w3 + eor w15,w15,w10,ror#20 + sli v7.4s,v19.4s,#13 + add w9,w9,w11 + ldr w12,[sp,#8] + and w13,w13,w14 + eor v17.16b,v17.16b,v16.16b + ror w15,w15,#2 + add w5,w5,w9 + eor w13,w13,w3 + eor v17.16b,v17.16b,v7.16b + add w8,w8,w12 + add w9,w9,w15 + and w12,w6,w5 + add v0.4s,v0.4s,v17.4s + bic w15,w7,w5 + eor w11,w5,w5,ror#5 + add w9,w9,w13 + ushr v18.4s,v0.4s,#17 + orr w12,w12,w15 + ushr v19.4s,v0.4s,#10 + eor w11,w11,w5,ror#19 + eor w15,w9,w9,ror#11 + sli v18.4s,v0.4s,#15 + add w8,w8,w12 + ushr v17.4s,v0.4s,#19 + ror w11,w11,#6 + eor w13,w9,w10 + eor v19.16b,v19.16b,v18.16b + eor w15,w15,w9,ror#20 + add w8,w8,w11 + sli v17.4s,v0.4s,#13 + ldr w12,[sp,#12] + and w14,w14,w13 + ror w15,w15,#2 + ld1 {v4.4s},[x16], #16 + add w4,w4,w8 + eor v19.16b,v19.16b,v17.16b + eor w14,w14,w10 + eor v17.16b,v17.16b,v17.16b + add w7,w7,w12 + add w8,w8,w15 + and w12,w5,w4 + mov v17.d[1],v19.d[0] + bic w15,w6,w4 + eor w11,w4,w4,ror#5 + add w8,w8,w14 + add v0.4s,v0.4s,v17.4s + orr w12,w12,w15 + eor w11,w11,w4,ror#19 + eor w15,w8,w8,ror#11 + add v4.4s,v4.4s,v0.4s + add w7,w7,w12 + ror w11,w11,#6 + eor w14,w8,w9 + eor w15,w15,w8,ror#20 + add w7,w7,w11 + ldr w12,[sp,#16] + and w13,w13,w14 + ror w15,w15,#2 + add w3,w3,w7 + eor w13,w13,w9 + st1 {v4.4s},[x17], #16 + ext v4.16b,v1.16b,v2.16b,#4 + add w6,w6,w12 + add w7,w7,w15 + and w12,w4,w3 + bic w15,w5,w3 + ext v7.16b,v3.16b,v0.16b,#4 + eor w11,w3,w3,ror#5 + add w7,w7,w13 + mov d19,v0.d[1] + orr w12,w12,w15 + eor w11,w11,w3,ror#19 + ushr v6.4s,v4.4s,#7 + eor w15,w7,w7,ror#11 + ushr v5.4s,v4.4s,#3 + add w6,w6,w12 + add v1.4s,v1.4s,v7.4s + ror w11,w11,#6 + sli v6.4s,v4.4s,#25 + eor w13,w7,w8 + eor w15,w15,w7,ror#20 + ushr v7.4s,v4.4s,#18 + add w6,w6,w11 + ldr w12,[sp,#20] + and w14,w14,w13 + eor v5.16b,v5.16b,v6.16b + ror w15,w15,#2 + add w10,w10,w6 + sli v7.4s,v4.4s,#14 + eor w14,w14,w8 + ushr v16.4s,v19.4s,#17 + add w5,w5,w12 + add w6,w6,w15 + and w12,w3,w10 + eor v5.16b,v5.16b,v7.16b + bic w15,w4,w10 + eor w11,w10,w10,ror#5 + sli v16.4s,v19.4s,#15 + add w6,w6,w14 + orr w12,w12,w15 + ushr v17.4s,v19.4s,#10 + eor w11,w11,w10,ror#19 + eor w15,w6,w6,ror#11 + ushr v7.4s,v19.4s,#19 + add w5,w5,w12 + ror w11,w11,#6 + add v1.4s,v1.4s,v5.4s + eor w14,w6,w7 + eor w15,w15,w6,ror#20 + sli v7.4s,v19.4s,#13 + add w5,w5,w11 + ldr w12,[sp,#24] + and w13,w13,w14 + eor v17.16b,v17.16b,v16.16b + ror w15,w15,#2 + add w9,w9,w5 + eor w13,w13,w7 + eor v17.16b,v17.16b,v7.16b + add w4,w4,w12 + add w5,w5,w15 + and w12,w10,w9 + add v1.4s,v1.4s,v17.4s + bic w15,w3,w9 + eor w11,w9,w9,ror#5 + add w5,w5,w13 + ushr v18.4s,v1.4s,#17 + orr w12,w12,w15 + ushr v19.4s,v1.4s,#10 + eor w11,w11,w9,ror#19 + eor w15,w5,w5,ror#11 + sli v18.4s,v1.4s,#15 + add w4,w4,w12 + ushr v17.4s,v1.4s,#19 + ror w11,w11,#6 + eor w13,w5,w6 + eor v19.16b,v19.16b,v18.16b + eor w15,w15,w5,ror#20 + add w4,w4,w11 + sli v17.4s,v1.4s,#13 + ldr w12,[sp,#28] + and w14,w14,w13 + ror w15,w15,#2 + ld1 {v4.4s},[x16], #16 + add w8,w8,w4 + eor v19.16b,v19.16b,v17.16b + eor w14,w14,w6 + eor v17.16b,v17.16b,v17.16b + add w3,w3,w12 + add w4,w4,w15 + and w12,w9,w8 + mov v17.d[1],v19.d[0] + bic w15,w10,w8 + eor w11,w8,w8,ror#5 + add w4,w4,w14 + add v1.4s,v1.4s,v17.4s + orr w12,w12,w15 + eor w11,w11,w8,ror#19 + eor w15,w4,w4,ror#11 + add v4.4s,v4.4s,v1.4s + add w3,w3,w12 + ror w11,w11,#6 + eor w14,w4,w5 + eor w15,w15,w4,ror#20 + add w3,w3,w11 + ldr w12,[sp,#32] + and w13,w13,w14 + ror w15,w15,#2 + add w7,w7,w3 + eor w13,w13,w5 + st1 {v4.4s},[x17], #16 + ext v4.16b,v2.16b,v3.16b,#4 + add w10,w10,w12 + add w3,w3,w15 + and w12,w8,w7 + bic w15,w9,w7 + ext v7.16b,v0.16b,v1.16b,#4 + eor w11,w7,w7,ror#5 + add w3,w3,w13 + mov d19,v1.d[1] + orr w12,w12,w15 + eor w11,w11,w7,ror#19 + ushr v6.4s,v4.4s,#7 + eor w15,w3,w3,ror#11 + ushr v5.4s,v4.4s,#3 + add w10,w10,w12 + add v2.4s,v2.4s,v7.4s + ror w11,w11,#6 + sli v6.4s,v4.4s,#25 + eor w13,w3,w4 + eor w15,w15,w3,ror#20 + ushr v7.4s,v4.4s,#18 + add w10,w10,w11 + ldr w12,[sp,#36] + and w14,w14,w13 + eor v5.16b,v5.16b,v6.16b + ror w15,w15,#2 + add w6,w6,w10 + sli v7.4s,v4.4s,#14 + eor w14,w14,w4 + ushr v16.4s,v19.4s,#17 + add w9,w9,w12 + add w10,w10,w15 + and w12,w7,w6 + eor v5.16b,v5.16b,v7.16b + bic w15,w8,w6 + eor w11,w6,w6,ror#5 + sli v16.4s,v19.4s,#15 + add w10,w10,w14 + orr w12,w12,w15 + ushr v17.4s,v19.4s,#10 + eor w11,w11,w6,ror#19 + eor w15,w10,w10,ror#11 + ushr v7.4s,v19.4s,#19 + add w9,w9,w12 + ror w11,w11,#6 + add v2.4s,v2.4s,v5.4s + eor w14,w10,w3 + eor w15,w15,w10,ror#20 + sli v7.4s,v19.4s,#13 + add w9,w9,w11 + ldr w12,[sp,#40] + and w13,w13,w14 + eor v17.16b,v17.16b,v16.16b + ror w15,w15,#2 + add w5,w5,w9 + eor w13,w13,w3 + eor v17.16b,v17.16b,v7.16b + add w8,w8,w12 + add w9,w9,w15 + and w12,w6,w5 + add v2.4s,v2.4s,v17.4s + bic w15,w7,w5 + eor w11,w5,w5,ror#5 + add w9,w9,w13 + ushr v18.4s,v2.4s,#17 + orr w12,w12,w15 + ushr v19.4s,v2.4s,#10 + eor w11,w11,w5,ror#19 + eor w15,w9,w9,ror#11 + sli v18.4s,v2.4s,#15 + add w8,w8,w12 + ushr v17.4s,v2.4s,#19 + ror w11,w11,#6 + eor w13,w9,w10 + eor v19.16b,v19.16b,v18.16b + eor w15,w15,w9,ror#20 + add w8,w8,w11 + sli v17.4s,v2.4s,#13 + ldr w12,[sp,#44] + and w14,w14,w13 + ror w15,w15,#2 + ld1 {v4.4s},[x16], #16 + add w4,w4,w8 + eor v19.16b,v19.16b,v17.16b + eor w14,w14,w10 + eor v17.16b,v17.16b,v17.16b + add w7,w7,w12 + add w8,w8,w15 + and w12,w5,w4 + mov v17.d[1],v19.d[0] + bic w15,w6,w4 + eor w11,w4,w4,ror#5 + add w8,w8,w14 + add v2.4s,v2.4s,v17.4s + orr w12,w12,w15 + eor w11,w11,w4,ror#19 + eor w15,w8,w8,ror#11 + add v4.4s,v4.4s,v2.4s + add w7,w7,w12 + ror w11,w11,#6 + eor w14,w8,w9 + eor w15,w15,w8,ror#20 + add w7,w7,w11 + ldr w12,[sp,#48] + and w13,w13,w14 + ror w15,w15,#2 + add w3,w3,w7 + eor w13,w13,w9 + st1 {v4.4s},[x17], #16 + ext v4.16b,v3.16b,v0.16b,#4 + add w6,w6,w12 + add w7,w7,w15 + and w12,w4,w3 + bic w15,w5,w3 + ext v7.16b,v1.16b,v2.16b,#4 + eor w11,w3,w3,ror#5 + add w7,w7,w13 + mov d19,v2.d[1] + orr w12,w12,w15 + eor w11,w11,w3,ror#19 + ushr v6.4s,v4.4s,#7 + eor w15,w7,w7,ror#11 + ushr v5.4s,v4.4s,#3 + add w6,w6,w12 + add v3.4s,v3.4s,v7.4s + ror w11,w11,#6 + sli v6.4s,v4.4s,#25 + eor w13,w7,w8 + eor w15,w15,w7,ror#20 + ushr v7.4s,v4.4s,#18 + add w6,w6,w11 + ldr w12,[sp,#52] + and w14,w14,w13 + eor v5.16b,v5.16b,v6.16b + ror w15,w15,#2 + add w10,w10,w6 + sli v7.4s,v4.4s,#14 + eor w14,w14,w8 + ushr v16.4s,v19.4s,#17 + add w5,w5,w12 + add w6,w6,w15 + and w12,w3,w10 + eor v5.16b,v5.16b,v7.16b + bic w15,w4,w10 + eor w11,w10,w10,ror#5 + sli v16.4s,v19.4s,#15 + add w6,w6,w14 + orr w12,w12,w15 + ushr v17.4s,v19.4s,#10 + eor w11,w11,w10,ror#19 + eor w15,w6,w6,ror#11 + ushr v7.4s,v19.4s,#19 + add w5,w5,w12 + ror w11,w11,#6 + add v3.4s,v3.4s,v5.4s + eor w14,w6,w7 + eor w15,w15,w6,ror#20 + sli v7.4s,v19.4s,#13 + add w5,w5,w11 + ldr w12,[sp,#56] + and w13,w13,w14 + eor v17.16b,v17.16b,v16.16b + ror w15,w15,#2 + add w9,w9,w5 + eor w13,w13,w7 + eor v17.16b,v17.16b,v7.16b + add w4,w4,w12 + add w5,w5,w15 + and w12,w10,w9 + add v3.4s,v3.4s,v17.4s + bic w15,w3,w9 + eor w11,w9,w9,ror#5 + add w5,w5,w13 + ushr v18.4s,v3.4s,#17 + orr w12,w12,w15 + ushr v19.4s,v3.4s,#10 + eor w11,w11,w9,ror#19 + eor w15,w5,w5,ror#11 + sli v18.4s,v3.4s,#15 + add w4,w4,w12 + ushr v17.4s,v3.4s,#19 + ror w11,w11,#6 + eor w13,w5,w6 + eor v19.16b,v19.16b,v18.16b + eor w15,w15,w5,ror#20 + add w4,w4,w11 + sli v17.4s,v3.4s,#13 + ldr w12,[sp,#60] + and w14,w14,w13 + ror w15,w15,#2 + ld1 {v4.4s},[x16], #16 + add w8,w8,w4 + eor v19.16b,v19.16b,v17.16b + eor w14,w14,w6 + eor v17.16b,v17.16b,v17.16b + add w3,w3,w12 + add w4,w4,w15 + and w12,w9,w8 + mov v17.d[1],v19.d[0] + bic w15,w10,w8 + eor w11,w8,w8,ror#5 + add w4,w4,w14 + add v3.4s,v3.4s,v17.4s + orr w12,w12,w15 + eor w11,w11,w8,ror#19 + eor w15,w4,w4,ror#11 + add v4.4s,v4.4s,v3.4s + add w3,w3,w12 + ror w11,w11,#6 + eor w14,w4,w5 + eor w15,w15,w4,ror#20 + add w3,w3,w11 + ldr w12,[x16] + and w13,w13,w14 + ror w15,w15,#2 + add w7,w7,w3 + eor w13,w13,w5 + st1 {v4.4s},[x17], #16 + cmp w12,#0 // check for K256 terminator + ldr w12,[sp,#0] + sub x17,x17,#64 + bne .L_00_48 + + sub x16,x16,#256 // rewind x16 + cmp x1,x2 + mov x17, #64 + csel x17, x17, xzr, eq + sub x1,x1,x17 // avoid SEGV + mov x17,sp + add w10,w10,w12 + add w3,w3,w15 + and w12,w8,w7 + ld1 {v0.16b},[x1],#16 + bic w15,w9,w7 + eor w11,w7,w7,ror#5 + ld1 {v4.4s},[x16],#16 + add w3,w3,w13 + orr w12,w12,w15 + eor w11,w11,w7,ror#19 + eor w15,w3,w3,ror#11 + rev32 v0.16b,v0.16b + add w10,w10,w12 + ror w11,w11,#6 + eor w13,w3,w4 + eor w15,w15,w3,ror#20 + add v4.4s,v4.4s,v0.4s + add w10,w10,w11 + ldr w12,[sp,#4] + and w14,w14,w13 + ror w15,w15,#2 + add w6,w6,w10 + eor w14,w14,w4 + add w9,w9,w12 + add w10,w10,w15 + and w12,w7,w6 + bic w15,w8,w6 + eor w11,w6,w6,ror#5 + add w10,w10,w14 + orr w12,w12,w15 + eor w11,w11,w6,ror#19 + eor w15,w10,w10,ror#11 + add w9,w9,w12 + ror w11,w11,#6 + eor w14,w10,w3 + eor w15,w15,w10,ror#20 + add w9,w9,w11 + ldr w12,[sp,#8] + and w13,w13,w14 + ror w15,w15,#2 + add w5,w5,w9 + eor w13,w13,w3 + add w8,w8,w12 + add w9,w9,w15 + and w12,w6,w5 + bic w15,w7,w5 + eor w11,w5,w5,ror#5 + add w9,w9,w13 + orr w12,w12,w15 + eor w11,w11,w5,ror#19 + eor w15,w9,w9,ror#11 + add w8,w8,w12 + ror w11,w11,#6 + eor w13,w9,w10 + eor w15,w15,w9,ror#20 + add w8,w8,w11 + ldr w12,[sp,#12] + and w14,w14,w13 + ror w15,w15,#2 + add w4,w4,w8 + eor w14,w14,w10 + add w7,w7,w12 + add w8,w8,w15 + and w12,w5,w4 + bic w15,w6,w4 + eor w11,w4,w4,ror#5 + add w8,w8,w14 + orr w12,w12,w15 + eor w11,w11,w4,ror#19 + eor w15,w8,w8,ror#11 + add w7,w7,w12 + ror w11,w11,#6 + eor w14,w8,w9 + eor w15,w15,w8,ror#20 + add w7,w7,w11 + ldr w12,[sp,#16] + and w13,w13,w14 + ror w15,w15,#2 + add w3,w3,w7 + eor w13,w13,w9 + st1 {v4.4s},[x17], #16 + add w6,w6,w12 + add w7,w7,w15 + and w12,w4,w3 + ld1 {v1.16b},[x1],#16 + bic w15,w5,w3 + eor w11,w3,w3,ror#5 + ld1 {v4.4s},[x16],#16 + add w7,w7,w13 + orr w12,w12,w15 + eor w11,w11,w3,ror#19 + eor w15,w7,w7,ror#11 + rev32 v1.16b,v1.16b + add w6,w6,w12 + ror w11,w11,#6 + eor w13,w7,w8 + eor w15,w15,w7,ror#20 + add v4.4s,v4.4s,v1.4s + add w6,w6,w11 + ldr w12,[sp,#20] + and w14,w14,w13 + ror w15,w15,#2 + add w10,w10,w6 + eor w14,w14,w8 + add w5,w5,w12 + add w6,w6,w15 + and w12,w3,w10 + bic w15,w4,w10 + eor w11,w10,w10,ror#5 + add w6,w6,w14 + orr w12,w12,w15 + eor w11,w11,w10,ror#19 + eor w15,w6,w6,ror#11 + add w5,w5,w12 + ror w11,w11,#6 + eor w14,w6,w7 + eor w15,w15,w6,ror#20 + add w5,w5,w11 + ldr w12,[sp,#24] + and w13,w13,w14 + ror w15,w15,#2 + add w9,w9,w5 + eor w13,w13,w7 + add w4,w4,w12 + add w5,w5,w15 + and w12,w10,w9 + bic w15,w3,w9 + eor w11,w9,w9,ror#5 + add w5,w5,w13 + orr w12,w12,w15 + eor w11,w11,w9,ror#19 + eor w15,w5,w5,ror#11 + add w4,w4,w12 + ror w11,w11,#6 + eor w13,w5,w6 + eor w15,w15,w5,ror#20 + add w4,w4,w11 + ldr w12,[sp,#28] + and w14,w14,w13 + ror w15,w15,#2 + add w8,w8,w4 + eor w14,w14,w6 + add w3,w3,w12 + add w4,w4,w15 + and w12,w9,w8 + bic w15,w10,w8 + eor w11,w8,w8,ror#5 + add w4,w4,w14 + orr w12,w12,w15 + eor w11,w11,w8,ror#19 + eor w15,w4,w4,ror#11 + add w3,w3,w12 + ror w11,w11,#6 + eor w14,w4,w5 + eor w15,w15,w4,ror#20 + add w3,w3,w11 + ldr w12,[sp,#32] + and w13,w13,w14 + ror w15,w15,#2 + add w7,w7,w3 + eor w13,w13,w5 + st1 {v4.4s},[x17], #16 + add w10,w10,w12 + add w3,w3,w15 + and w12,w8,w7 + ld1 {v2.16b},[x1],#16 + bic w15,w9,w7 + eor w11,w7,w7,ror#5 + ld1 {v4.4s},[x16],#16 + add w3,w3,w13 + orr w12,w12,w15 + eor w11,w11,w7,ror#19 + eor w15,w3,w3,ror#11 + rev32 v2.16b,v2.16b + add w10,w10,w12 + ror w11,w11,#6 + eor w13,w3,w4 + eor w15,w15,w3,ror#20 + add v4.4s,v4.4s,v2.4s + add w10,w10,w11 + ldr w12,[sp,#36] + and w14,w14,w13 + ror w15,w15,#2 + add w6,w6,w10 + eor w14,w14,w4 + add w9,w9,w12 + add w10,w10,w15 + and w12,w7,w6 + bic w15,w8,w6 + eor w11,w6,w6,ror#5 + add w10,w10,w14 + orr w12,w12,w15 + eor w11,w11,w6,ror#19 + eor w15,w10,w10,ror#11 + add w9,w9,w12 + ror w11,w11,#6 + eor w14,w10,w3 + eor w15,w15,w10,ror#20 + add w9,w9,w11 + ldr w12,[sp,#40] + and w13,w13,w14 + ror w15,w15,#2 + add w5,w5,w9 + eor w13,w13,w3 + add w8,w8,w12 + add w9,w9,w15 + and w12,w6,w5 + bic w15,w7,w5 + eor w11,w5,w5,ror#5 + add w9,w9,w13 + orr w12,w12,w15 + eor w11,w11,w5,ror#19 + eor w15,w9,w9,ror#11 + add w8,w8,w12 + ror w11,w11,#6 + eor w13,w9,w10 + eor w15,w15,w9,ror#20 + add w8,w8,w11 + ldr w12,[sp,#44] + and w14,w14,w13 + ror w15,w15,#2 + add w4,w4,w8 + eor w14,w14,w10 + add w7,w7,w12 + add w8,w8,w15 + and w12,w5,w4 + bic w15,w6,w4 + eor w11,w4,w4,ror#5 + add w8,w8,w14 + orr w12,w12,w15 + eor w11,w11,w4,ror#19 + eor w15,w8,w8,ror#11 + add w7,w7,w12 + ror w11,w11,#6 + eor w14,w8,w9 + eor w15,w15,w8,ror#20 + add w7,w7,w11 + ldr w12,[sp,#48] + and w13,w13,w14 + ror w15,w15,#2 + add w3,w3,w7 + eor w13,w13,w9 + st1 {v4.4s},[x17], #16 + add w6,w6,w12 + add w7,w7,w15 + and w12,w4,w3 + ld1 {v3.16b},[x1],#16 + bic w15,w5,w3 + eor w11,w3,w3,ror#5 + ld1 {v4.4s},[x16],#16 + add w7,w7,w13 + orr w12,w12,w15 + eor w11,w11,w3,ror#19 + eor w15,w7,w7,ror#11 + rev32 v3.16b,v3.16b + add w6,w6,w12 + ror w11,w11,#6 + eor w13,w7,w8 + eor w15,w15,w7,ror#20 + add v4.4s,v4.4s,v3.4s + add w6,w6,w11 + ldr w12,[sp,#52] + and w14,w14,w13 + ror w15,w15,#2 + add w10,w10,w6 + eor w14,w14,w8 + add w5,w5,w12 + add w6,w6,w15 + and w12,w3,w10 + bic w15,w4,w10 + eor w11,w10,w10,ror#5 + add w6,w6,w14 + orr w12,w12,w15 + eor w11,w11,w10,ror#19 + eor w15,w6,w6,ror#11 + add w5,w5,w12 + ror w11,w11,#6 + eor w14,w6,w7 + eor w15,w15,w6,ror#20 + add w5,w5,w11 + ldr w12,[sp,#56] + and w13,w13,w14 + ror w15,w15,#2 + add w9,w9,w5 + eor w13,w13,w7 + add w4,w4,w12 + add w5,w5,w15 + and w12,w10,w9 + bic w15,w3,w9 + eor w11,w9,w9,ror#5 + add w5,w5,w13 + orr w12,w12,w15 + eor w11,w11,w9,ror#19 + eor w15,w5,w5,ror#11 + add w4,w4,w12 + ror w11,w11,#6 + eor w13,w5,w6 + eor w15,w15,w5,ror#20 + add w4,w4,w11 + ldr w12,[sp,#60] + and w14,w14,w13 + ror w15,w15,#2 + add w8,w8,w4 + eor w14,w14,w6 + add w3,w3,w12 + add w4,w4,w15 + and w12,w9,w8 + bic w15,w10,w8 + eor w11,w8,w8,ror#5 + add w4,w4,w14 + orr w12,w12,w15 + eor w11,w11,w8,ror#19 + eor w15,w4,w4,ror#11 + add w3,w3,w12 + ror w11,w11,#6 + eor w14,w4,w5 + eor w15,w15,w4,ror#20 + add w3,w3,w11 + and w13,w13,w14 + ror w15,w15,#2 + add w7,w7,w3 + eor w13,w13,w5 + st1 {v4.4s},[x17], #16 + add w3,w3,w15 // h+=Sigma0(a) from the past + ldp w11,w12,[x0,#0] + add w3,w3,w13 // h+=Maj(a,b,c) from the past + ldp w13,w14,[x0,#8] + add w3,w3,w11 // accumulate + add w4,w4,w12 + ldp w11,w12,[x0,#16] + add w5,w5,w13 + add w6,w6,w14 + ldp w13,w14,[x0,#24] + add w7,w7,w11 + add w8,w8,w12 + ldr w12,[sp,#0] + stp w3,w4,[x0,#0] + add w9,w9,w13 + mov w13,wzr + stp w5,w6,[x0,#8] + add w10,w10,w14 + stp w7,w8,[x0,#16] + eor w14,w4,w5 + stp w9,w10,[x0,#24] + mov w15,wzr + mov x17,sp + b.ne .L_00_48 + + ldr x29,[x29] + add sp,sp,#16*4+16 + ret + +.globl blst_sha256_emit + +.def blst_sha256_emit; +.type 32; +.endef +.p2align 4 +blst_sha256_emit: + ldp x4,x5,[x1] + ldp x6,x7,[x1,#16] +#ifndef __AARCH64EB__ + rev x4,x4 + rev x5,x5 + rev x6,x6 + rev x7,x7 +#endif + str w4,[x0,#4] + lsr x4,x4,#32 + str w5,[x0,#12] + lsr x5,x5,#32 + str w6,[x0,#20] + lsr x6,x6,#32 + str w7,[x0,#28] + lsr x7,x7,#32 + str w4,[x0,#0] + str w5,[x0,#8] + str w6,[x0,#16] + str w7,[x0,#24] + ret + + +.globl blst_sha256_bcopy + +.def blst_sha256_bcopy; +.type 32; +.endef +.p2align 4 +blst_sha256_bcopy: +.Loop_bcopy: + ldrb w3,[x1],#1 + sub x2,x2,#1 + strb w3,[x0],#1 + cbnz x2,.Loop_bcopy + ret + + +.globl blst_sha256_hcopy + +.def blst_sha256_hcopy; +.type 32; +.endef +.p2align 4 +blst_sha256_hcopy: + ldp x4,x5,[x1] + ldp x6,x7,[x1,#16] + stp x4,x5,[x0] + stp x6,x7,[x0,#16] + ret + diff --git a/crypto/blst_src/build/coff/sha256-portable-x86_64.s b/crypto/blst_src/build/coff/sha256-portable-x86_64.s new file mode 100644 index 00000000000..603e46c53d7 --- /dev/null +++ b/crypto/blst_src/build/coff/sha256-portable-x86_64.s @@ -0,0 +1,1792 @@ +.comm __blst_platform_cap,4 +.text + +.globl blst_sha256_block_data_order +.def blst_sha256_block_data_order; .scl 2; .type 32; .endef +.p2align 4 +blst_sha256_block_data_order: + .byte 0xf3,0x0f,0x1e,0xfa + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + movq %rsp,%r11 +.LSEH_begin_blst_sha256_block_data_order: + + + pushq %rbp + + movq %rsp,%rbp + + movq %rcx,%rdi + movq %rdx,%rsi + movq %r8,%rdx +#ifdef __BLST_PORTABLE__ + testl $2,__blst_platform_cap(%rip) + jnz .Lblst_sha256_block_data_order$2 +#endif + pushq %rbx + + pushq %r12 + + pushq %r13 + + pushq %r14 + + pushq %r15 + + shlq $4,%rdx + subq $64+24,%rsp + + +.LSEH_body_blst_sha256_block_data_order: + + leaq (%rsi,%rdx,4),%rdx + movq %rdi,64+0(%rsp) + movq %rsi,64+8(%rsp) + movq %rdx,64+16(%rsp) + + movl 0(%rdi),%eax + movl 4(%rdi),%ebx + movl 8(%rdi),%ecx + movl 12(%rdi),%edx + movl 16(%rdi),%r8d + movl 20(%rdi),%r9d + movl 24(%rdi),%r10d + movl 28(%rdi),%r11d + jmp .Lloop + +.p2align 4 +.Lloop: + movl %ebx,%edi + leaq K256(%rip),%rbp + xorl %ecx,%edi + movl 0(%rsi),%r12d + movl %r8d,%r13d + movl %eax,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r9d,%r15d + + xorl %r8d,%r13d + rorl $9,%r14d + xorl %r10d,%r15d + + movl %r12d,0(%rsp) + xorl %eax,%r14d + andl %r8d,%r15d + + rorl $5,%r13d + addl %r11d,%r12d + xorl %r10d,%r15d + + rorl $11,%r14d + xorl %r8d,%r13d + addl %r15d,%r12d + + movl %eax,%r15d + addl 0(%rbp),%r12d + xorl %eax,%r14d + + xorl %ebx,%r15d + rorl $6,%r13d + movl %ebx,%r11d + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%r11d + addl %r12d,%edx + addl %r12d,%r11d + addl %r14d,%r11d + movl 4(%rsi),%r12d + movl %edx,%r13d + movl %r11d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r8d,%edi + + xorl %edx,%r13d + rorl $9,%r14d + xorl %r9d,%edi + + movl %r12d,4(%rsp) + xorl %r11d,%r14d + andl %edx,%edi + + rorl $5,%r13d + addl %r10d,%r12d + xorl %r9d,%edi + + rorl $11,%r14d + xorl %edx,%r13d + addl %edi,%r12d + + movl %r11d,%edi + addl 4(%rbp),%r12d + xorl %r11d,%r14d + + xorl %eax,%edi + rorl $6,%r13d + movl %eax,%r10d + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%r10d + addl %r12d,%ecx + addl %r12d,%r10d + addl %r14d,%r10d + movl 8(%rsi),%r12d + movl %ecx,%r13d + movl %r10d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %edx,%r15d + + xorl %ecx,%r13d + rorl $9,%r14d + xorl %r8d,%r15d + + movl %r12d,8(%rsp) + xorl %r10d,%r14d + andl %ecx,%r15d + + rorl $5,%r13d + addl %r9d,%r12d + xorl %r8d,%r15d + + rorl $11,%r14d + xorl %ecx,%r13d + addl %r15d,%r12d + + movl %r10d,%r15d + addl 8(%rbp),%r12d + xorl %r10d,%r14d + + xorl %r11d,%r15d + rorl $6,%r13d + movl %r11d,%r9d + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%r9d + addl %r12d,%ebx + addl %r12d,%r9d + addl %r14d,%r9d + movl 12(%rsi),%r12d + movl %ebx,%r13d + movl %r9d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %ecx,%edi + + xorl %ebx,%r13d + rorl $9,%r14d + xorl %edx,%edi + + movl %r12d,12(%rsp) + xorl %r9d,%r14d + andl %ebx,%edi + + rorl $5,%r13d + addl %r8d,%r12d + xorl %edx,%edi + + rorl $11,%r14d + xorl %ebx,%r13d + addl %edi,%r12d + + movl %r9d,%edi + addl 12(%rbp),%r12d + xorl %r9d,%r14d + + xorl %r10d,%edi + rorl $6,%r13d + movl %r10d,%r8d + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%r8d + addl %r12d,%eax + addl %r12d,%r8d + addl %r14d,%r8d + movl 16(%rsi),%r12d + movl %eax,%r13d + movl %r8d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %ebx,%r15d + + xorl %eax,%r13d + rorl $9,%r14d + xorl %ecx,%r15d + + movl %r12d,16(%rsp) + xorl %r8d,%r14d + andl %eax,%r15d + + rorl $5,%r13d + addl %edx,%r12d + xorl %ecx,%r15d + + rorl $11,%r14d + xorl %eax,%r13d + addl %r15d,%r12d + + movl %r8d,%r15d + addl 16(%rbp),%r12d + xorl %r8d,%r14d + + xorl %r9d,%r15d + rorl $6,%r13d + movl %r9d,%edx + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%edx + addl %r12d,%r11d + addl %r12d,%edx + addl %r14d,%edx + movl 20(%rsi),%r12d + movl %r11d,%r13d + movl %edx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %eax,%edi + + xorl %r11d,%r13d + rorl $9,%r14d + xorl %ebx,%edi + + movl %r12d,20(%rsp) + xorl %edx,%r14d + andl %r11d,%edi + + rorl $5,%r13d + addl %ecx,%r12d + xorl %ebx,%edi + + rorl $11,%r14d + xorl %r11d,%r13d + addl %edi,%r12d + + movl %edx,%edi + addl 20(%rbp),%r12d + xorl %edx,%r14d + + xorl %r8d,%edi + rorl $6,%r13d + movl %r8d,%ecx + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%ecx + addl %r12d,%r10d + addl %r12d,%ecx + addl %r14d,%ecx + movl 24(%rsi),%r12d + movl %r10d,%r13d + movl %ecx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r11d,%r15d + + xorl %r10d,%r13d + rorl $9,%r14d + xorl %eax,%r15d + + movl %r12d,24(%rsp) + xorl %ecx,%r14d + andl %r10d,%r15d + + rorl $5,%r13d + addl %ebx,%r12d + xorl %eax,%r15d + + rorl $11,%r14d + xorl %r10d,%r13d + addl %r15d,%r12d + + movl %ecx,%r15d + addl 24(%rbp),%r12d + xorl %ecx,%r14d + + xorl %edx,%r15d + rorl $6,%r13d + movl %edx,%ebx + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%ebx + addl %r12d,%r9d + addl %r12d,%ebx + addl %r14d,%ebx + movl 28(%rsi),%r12d + movl %r9d,%r13d + movl %ebx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r10d,%edi + + xorl %r9d,%r13d + rorl $9,%r14d + xorl %r11d,%edi + + movl %r12d,28(%rsp) + xorl %ebx,%r14d + andl %r9d,%edi + + rorl $5,%r13d + addl %eax,%r12d + xorl %r11d,%edi + + rorl $11,%r14d + xorl %r9d,%r13d + addl %edi,%r12d + + movl %ebx,%edi + addl 28(%rbp),%r12d + xorl %ebx,%r14d + + xorl %ecx,%edi + rorl $6,%r13d + movl %ecx,%eax + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%eax + addl %r12d,%r8d + addl %r12d,%eax + addl %r14d,%eax + movl 32(%rsi),%r12d + movl %r8d,%r13d + movl %eax,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r9d,%r15d + + xorl %r8d,%r13d + rorl $9,%r14d + xorl %r10d,%r15d + + movl %r12d,32(%rsp) + xorl %eax,%r14d + andl %r8d,%r15d + + rorl $5,%r13d + addl %r11d,%r12d + xorl %r10d,%r15d + + rorl $11,%r14d + xorl %r8d,%r13d + addl %r15d,%r12d + + movl %eax,%r15d + addl 32(%rbp),%r12d + xorl %eax,%r14d + + xorl %ebx,%r15d + rorl $6,%r13d + movl %ebx,%r11d + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%r11d + addl %r12d,%edx + addl %r12d,%r11d + addl %r14d,%r11d + movl 36(%rsi),%r12d + movl %edx,%r13d + movl %r11d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r8d,%edi + + xorl %edx,%r13d + rorl $9,%r14d + xorl %r9d,%edi + + movl %r12d,36(%rsp) + xorl %r11d,%r14d + andl %edx,%edi + + rorl $5,%r13d + addl %r10d,%r12d + xorl %r9d,%edi + + rorl $11,%r14d + xorl %edx,%r13d + addl %edi,%r12d + + movl %r11d,%edi + addl 36(%rbp),%r12d + xorl %r11d,%r14d + + xorl %eax,%edi + rorl $6,%r13d + movl %eax,%r10d + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%r10d + addl %r12d,%ecx + addl %r12d,%r10d + addl %r14d,%r10d + movl 40(%rsi),%r12d + movl %ecx,%r13d + movl %r10d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %edx,%r15d + + xorl %ecx,%r13d + rorl $9,%r14d + xorl %r8d,%r15d + + movl %r12d,40(%rsp) + xorl %r10d,%r14d + andl %ecx,%r15d + + rorl $5,%r13d + addl %r9d,%r12d + xorl %r8d,%r15d + + rorl $11,%r14d + xorl %ecx,%r13d + addl %r15d,%r12d + + movl %r10d,%r15d + addl 40(%rbp),%r12d + xorl %r10d,%r14d + + xorl %r11d,%r15d + rorl $6,%r13d + movl %r11d,%r9d + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%r9d + addl %r12d,%ebx + addl %r12d,%r9d + addl %r14d,%r9d + movl 44(%rsi),%r12d + movl %ebx,%r13d + movl %r9d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %ecx,%edi + + xorl %ebx,%r13d + rorl $9,%r14d + xorl %edx,%edi + + movl %r12d,44(%rsp) + xorl %r9d,%r14d + andl %ebx,%edi + + rorl $5,%r13d + addl %r8d,%r12d + xorl %edx,%edi + + rorl $11,%r14d + xorl %ebx,%r13d + addl %edi,%r12d + + movl %r9d,%edi + addl 44(%rbp),%r12d + xorl %r9d,%r14d + + xorl %r10d,%edi + rorl $6,%r13d + movl %r10d,%r8d + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%r8d + addl %r12d,%eax + addl %r12d,%r8d + addl %r14d,%r8d + movl 48(%rsi),%r12d + movl %eax,%r13d + movl %r8d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %ebx,%r15d + + xorl %eax,%r13d + rorl $9,%r14d + xorl %ecx,%r15d + + movl %r12d,48(%rsp) + xorl %r8d,%r14d + andl %eax,%r15d + + rorl $5,%r13d + addl %edx,%r12d + xorl %ecx,%r15d + + rorl $11,%r14d + xorl %eax,%r13d + addl %r15d,%r12d + + movl %r8d,%r15d + addl 48(%rbp),%r12d + xorl %r8d,%r14d + + xorl %r9d,%r15d + rorl $6,%r13d + movl %r9d,%edx + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%edx + addl %r12d,%r11d + addl %r12d,%edx + addl %r14d,%edx + movl 52(%rsi),%r12d + movl %r11d,%r13d + movl %edx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %eax,%edi + + xorl %r11d,%r13d + rorl $9,%r14d + xorl %ebx,%edi + + movl %r12d,52(%rsp) + xorl %edx,%r14d + andl %r11d,%edi + + rorl $5,%r13d + addl %ecx,%r12d + xorl %ebx,%edi + + rorl $11,%r14d + xorl %r11d,%r13d + addl %edi,%r12d + + movl %edx,%edi + addl 52(%rbp),%r12d + xorl %edx,%r14d + + xorl %r8d,%edi + rorl $6,%r13d + movl %r8d,%ecx + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%ecx + addl %r12d,%r10d + addl %r12d,%ecx + addl %r14d,%ecx + movl 56(%rsi),%r12d + movl %r10d,%r13d + movl %ecx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r11d,%r15d + + xorl %r10d,%r13d + rorl $9,%r14d + xorl %eax,%r15d + + movl %r12d,56(%rsp) + xorl %ecx,%r14d + andl %r10d,%r15d + + rorl $5,%r13d + addl %ebx,%r12d + xorl %eax,%r15d + + rorl $11,%r14d + xorl %r10d,%r13d + addl %r15d,%r12d + + movl %ecx,%r15d + addl 56(%rbp),%r12d + xorl %ecx,%r14d + + xorl %edx,%r15d + rorl $6,%r13d + movl %edx,%ebx + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%ebx + addl %r12d,%r9d + addl %r12d,%ebx + addl %r14d,%ebx + movl 60(%rsi),%r12d + movl %r9d,%r13d + movl %ebx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r10d,%edi + + xorl %r9d,%r13d + rorl $9,%r14d + xorl %r11d,%edi + + movl %r12d,60(%rsp) + xorl %ebx,%r14d + andl %r9d,%edi + + rorl $5,%r13d + addl %eax,%r12d + xorl %r11d,%edi + + rorl $11,%r14d + xorl %r9d,%r13d + addl %edi,%r12d + + movl %ebx,%edi + addl 60(%rbp),%r12d + xorl %ebx,%r14d + + xorl %ecx,%edi + rorl $6,%r13d + movl %ecx,%eax + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%eax + addl %r12d,%r8d + addl %r12d,%eax + jmp .Lrounds_16_xx +.p2align 4 +.Lrounds_16_xx: + movl 4(%rsp),%r13d + movl 56(%rsp),%r15d + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%eax + movl %r15d,%r14d + rorl $2,%r15d + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 36(%rsp),%r12d + + addl 0(%rsp),%r12d + movl %r8d,%r13d + addl %r15d,%r12d + movl %eax,%r14d + rorl $14,%r13d + movl %r9d,%r15d + + xorl %r8d,%r13d + rorl $9,%r14d + xorl %r10d,%r15d + + movl %r12d,0(%rsp) + xorl %eax,%r14d + andl %r8d,%r15d + + rorl $5,%r13d + addl %r11d,%r12d + xorl %r10d,%r15d + + rorl $11,%r14d + xorl %r8d,%r13d + addl %r15d,%r12d + + movl %eax,%r15d + addl 64(%rbp),%r12d + xorl %eax,%r14d + + xorl %ebx,%r15d + rorl $6,%r13d + movl %ebx,%r11d + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%r11d + addl %r12d,%edx + addl %r12d,%r11d + movl 8(%rsp),%r13d + movl 60(%rsp),%edi + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r11d + movl %edi,%r14d + rorl $2,%edi + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi + shrl $10,%r14d + + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 40(%rsp),%r12d + + addl 4(%rsp),%r12d + movl %edx,%r13d + addl %edi,%r12d + movl %r11d,%r14d + rorl $14,%r13d + movl %r8d,%edi + + xorl %edx,%r13d + rorl $9,%r14d + xorl %r9d,%edi + + movl %r12d,4(%rsp) + xorl %r11d,%r14d + andl %edx,%edi + + rorl $5,%r13d + addl %r10d,%r12d + xorl %r9d,%edi + + rorl $11,%r14d + xorl %edx,%r13d + addl %edi,%r12d + + movl %r11d,%edi + addl 68(%rbp),%r12d + xorl %r11d,%r14d + + xorl %eax,%edi + rorl $6,%r13d + movl %eax,%r10d + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%r10d + addl %r12d,%ecx + addl %r12d,%r10d + movl 12(%rsp),%r13d + movl 0(%rsp),%r15d + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r10d + movl %r15d,%r14d + rorl $2,%r15d + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 44(%rsp),%r12d + + addl 8(%rsp),%r12d + movl %ecx,%r13d + addl %r15d,%r12d + movl %r10d,%r14d + rorl $14,%r13d + movl %edx,%r15d + + xorl %ecx,%r13d + rorl $9,%r14d + xorl %r8d,%r15d + + movl %r12d,8(%rsp) + xorl %r10d,%r14d + andl %ecx,%r15d + + rorl $5,%r13d + addl %r9d,%r12d + xorl %r8d,%r15d + + rorl $11,%r14d + xorl %ecx,%r13d + addl %r15d,%r12d + + movl %r10d,%r15d + addl 72(%rbp),%r12d + xorl %r10d,%r14d + + xorl %r11d,%r15d + rorl $6,%r13d + movl %r11d,%r9d + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%r9d + addl %r12d,%ebx + addl %r12d,%r9d + movl 16(%rsp),%r13d + movl 4(%rsp),%edi + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r9d + movl %edi,%r14d + rorl $2,%edi + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi + shrl $10,%r14d + + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 48(%rsp),%r12d + + addl 12(%rsp),%r12d + movl %ebx,%r13d + addl %edi,%r12d + movl %r9d,%r14d + rorl $14,%r13d + movl %ecx,%edi + + xorl %ebx,%r13d + rorl $9,%r14d + xorl %edx,%edi + + movl %r12d,12(%rsp) + xorl %r9d,%r14d + andl %ebx,%edi + + rorl $5,%r13d + addl %r8d,%r12d + xorl %edx,%edi + + rorl $11,%r14d + xorl %ebx,%r13d + addl %edi,%r12d + + movl %r9d,%edi + addl 76(%rbp),%r12d + xorl %r9d,%r14d + + xorl %r10d,%edi + rorl $6,%r13d + movl %r10d,%r8d + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%r8d + addl %r12d,%eax + addl %r12d,%r8d + movl 20(%rsp),%r13d + movl 8(%rsp),%r15d + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r8d + movl %r15d,%r14d + rorl $2,%r15d + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 52(%rsp),%r12d + + addl 16(%rsp),%r12d + movl %eax,%r13d + addl %r15d,%r12d + movl %r8d,%r14d + rorl $14,%r13d + movl %ebx,%r15d + + xorl %eax,%r13d + rorl $9,%r14d + xorl %ecx,%r15d + + movl %r12d,16(%rsp) + xorl %r8d,%r14d + andl %eax,%r15d + + rorl $5,%r13d + addl %edx,%r12d + xorl %ecx,%r15d + + rorl $11,%r14d + xorl %eax,%r13d + addl %r15d,%r12d + + movl %r8d,%r15d + addl 80(%rbp),%r12d + xorl %r8d,%r14d + + xorl %r9d,%r15d + rorl $6,%r13d + movl %r9d,%edx + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%edx + addl %r12d,%r11d + addl %r12d,%edx + movl 24(%rsp),%r13d + movl 12(%rsp),%edi + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%edx + movl %edi,%r14d + rorl $2,%edi + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi + shrl $10,%r14d + + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 56(%rsp),%r12d + + addl 20(%rsp),%r12d + movl %r11d,%r13d + addl %edi,%r12d + movl %edx,%r14d + rorl $14,%r13d + movl %eax,%edi + + xorl %r11d,%r13d + rorl $9,%r14d + xorl %ebx,%edi + + movl %r12d,20(%rsp) + xorl %edx,%r14d + andl %r11d,%edi + + rorl $5,%r13d + addl %ecx,%r12d + xorl %ebx,%edi + + rorl $11,%r14d + xorl %r11d,%r13d + addl %edi,%r12d + + movl %edx,%edi + addl 84(%rbp),%r12d + xorl %edx,%r14d + + xorl %r8d,%edi + rorl $6,%r13d + movl %r8d,%ecx + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%ecx + addl %r12d,%r10d + addl %r12d,%ecx + movl 28(%rsp),%r13d + movl 16(%rsp),%r15d + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%ecx + movl %r15d,%r14d + rorl $2,%r15d + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 60(%rsp),%r12d + + addl 24(%rsp),%r12d + movl %r10d,%r13d + addl %r15d,%r12d + movl %ecx,%r14d + rorl $14,%r13d + movl %r11d,%r15d + + xorl %r10d,%r13d + rorl $9,%r14d + xorl %eax,%r15d + + movl %r12d,24(%rsp) + xorl %ecx,%r14d + andl %r10d,%r15d + + rorl $5,%r13d + addl %ebx,%r12d + xorl %eax,%r15d + + rorl $11,%r14d + xorl %r10d,%r13d + addl %r15d,%r12d + + movl %ecx,%r15d + addl 88(%rbp),%r12d + xorl %ecx,%r14d + + xorl %edx,%r15d + rorl $6,%r13d + movl %edx,%ebx + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%ebx + addl %r12d,%r9d + addl %r12d,%ebx + movl 32(%rsp),%r13d + movl 20(%rsp),%edi + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%ebx + movl %edi,%r14d + rorl $2,%edi + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi + shrl $10,%r14d + + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 0(%rsp),%r12d + + addl 28(%rsp),%r12d + movl %r9d,%r13d + addl %edi,%r12d + movl %ebx,%r14d + rorl $14,%r13d + movl %r10d,%edi + + xorl %r9d,%r13d + rorl $9,%r14d + xorl %r11d,%edi + + movl %r12d,28(%rsp) + xorl %ebx,%r14d + andl %r9d,%edi + + rorl $5,%r13d + addl %eax,%r12d + xorl %r11d,%edi + + rorl $11,%r14d + xorl %r9d,%r13d + addl %edi,%r12d + + movl %ebx,%edi + addl 92(%rbp),%r12d + xorl %ebx,%r14d + + xorl %ecx,%edi + rorl $6,%r13d + movl %ecx,%eax + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%eax + addl %r12d,%r8d + addl %r12d,%eax + movl 36(%rsp),%r13d + movl 24(%rsp),%r15d + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%eax + movl %r15d,%r14d + rorl $2,%r15d + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 4(%rsp),%r12d + + addl 32(%rsp),%r12d + movl %r8d,%r13d + addl %r15d,%r12d + movl %eax,%r14d + rorl $14,%r13d + movl %r9d,%r15d + + xorl %r8d,%r13d + rorl $9,%r14d + xorl %r10d,%r15d + + movl %r12d,32(%rsp) + xorl %eax,%r14d + andl %r8d,%r15d + + rorl $5,%r13d + addl %r11d,%r12d + xorl %r10d,%r15d + + rorl $11,%r14d + xorl %r8d,%r13d + addl %r15d,%r12d + + movl %eax,%r15d + addl 96(%rbp),%r12d + xorl %eax,%r14d + + xorl %ebx,%r15d + rorl $6,%r13d + movl %ebx,%r11d + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%r11d + addl %r12d,%edx + addl %r12d,%r11d + movl 40(%rsp),%r13d + movl 28(%rsp),%edi + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r11d + movl %edi,%r14d + rorl $2,%edi + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi + shrl $10,%r14d + + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 8(%rsp),%r12d + + addl 36(%rsp),%r12d + movl %edx,%r13d + addl %edi,%r12d + movl %r11d,%r14d + rorl $14,%r13d + movl %r8d,%edi + + xorl %edx,%r13d + rorl $9,%r14d + xorl %r9d,%edi + + movl %r12d,36(%rsp) + xorl %r11d,%r14d + andl %edx,%edi + + rorl $5,%r13d + addl %r10d,%r12d + xorl %r9d,%edi + + rorl $11,%r14d + xorl %edx,%r13d + addl %edi,%r12d + + movl %r11d,%edi + addl 100(%rbp),%r12d + xorl %r11d,%r14d + + xorl %eax,%edi + rorl $6,%r13d + movl %eax,%r10d + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%r10d + addl %r12d,%ecx + addl %r12d,%r10d + movl 44(%rsp),%r13d + movl 32(%rsp),%r15d + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r10d + movl %r15d,%r14d + rorl $2,%r15d + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 12(%rsp),%r12d + + addl 40(%rsp),%r12d + movl %ecx,%r13d + addl %r15d,%r12d + movl %r10d,%r14d + rorl $14,%r13d + movl %edx,%r15d + + xorl %ecx,%r13d + rorl $9,%r14d + xorl %r8d,%r15d + + movl %r12d,40(%rsp) + xorl %r10d,%r14d + andl %ecx,%r15d + + rorl $5,%r13d + addl %r9d,%r12d + xorl %r8d,%r15d + + rorl $11,%r14d + xorl %ecx,%r13d + addl %r15d,%r12d + + movl %r10d,%r15d + addl 104(%rbp),%r12d + xorl %r10d,%r14d + + xorl %r11d,%r15d + rorl $6,%r13d + movl %r11d,%r9d + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%r9d + addl %r12d,%ebx + addl %r12d,%r9d + movl 48(%rsp),%r13d + movl 36(%rsp),%edi + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r9d + movl %edi,%r14d + rorl $2,%edi + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi + shrl $10,%r14d + + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 16(%rsp),%r12d + + addl 44(%rsp),%r12d + movl %ebx,%r13d + addl %edi,%r12d + movl %r9d,%r14d + rorl $14,%r13d + movl %ecx,%edi + + xorl %ebx,%r13d + rorl $9,%r14d + xorl %edx,%edi + + movl %r12d,44(%rsp) + xorl %r9d,%r14d + andl %ebx,%edi + + rorl $5,%r13d + addl %r8d,%r12d + xorl %edx,%edi + + rorl $11,%r14d + xorl %ebx,%r13d + addl %edi,%r12d + + movl %r9d,%edi + addl 108(%rbp),%r12d + xorl %r9d,%r14d + + xorl %r10d,%edi + rorl $6,%r13d + movl %r10d,%r8d + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%r8d + addl %r12d,%eax + addl %r12d,%r8d + movl 52(%rsp),%r13d + movl 40(%rsp),%r15d + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r8d + movl %r15d,%r14d + rorl $2,%r15d + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 20(%rsp),%r12d + + addl 48(%rsp),%r12d + movl %eax,%r13d + addl %r15d,%r12d + movl %r8d,%r14d + rorl $14,%r13d + movl %ebx,%r15d + + xorl %eax,%r13d + rorl $9,%r14d + xorl %ecx,%r15d + + movl %r12d,48(%rsp) + xorl %r8d,%r14d + andl %eax,%r15d + + rorl $5,%r13d + addl %edx,%r12d + xorl %ecx,%r15d + + rorl $11,%r14d + xorl %eax,%r13d + addl %r15d,%r12d + + movl %r8d,%r15d + addl 112(%rbp),%r12d + xorl %r8d,%r14d + + xorl %r9d,%r15d + rorl $6,%r13d + movl %r9d,%edx + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%edx + addl %r12d,%r11d + addl %r12d,%edx + movl 56(%rsp),%r13d + movl 44(%rsp),%edi + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%edx + movl %edi,%r14d + rorl $2,%edi + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi + shrl $10,%r14d + + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 24(%rsp),%r12d + + addl 52(%rsp),%r12d + movl %r11d,%r13d + addl %edi,%r12d + movl %edx,%r14d + rorl $14,%r13d + movl %eax,%edi + + xorl %r11d,%r13d + rorl $9,%r14d + xorl %ebx,%edi + + movl %r12d,52(%rsp) + xorl %edx,%r14d + andl %r11d,%edi + + rorl $5,%r13d + addl %ecx,%r12d + xorl %ebx,%edi + + rorl $11,%r14d + xorl %r11d,%r13d + addl %edi,%r12d + + movl %edx,%edi + addl 116(%rbp),%r12d + xorl %edx,%r14d + + xorl %r8d,%edi + rorl $6,%r13d + movl %r8d,%ecx + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%ecx + addl %r12d,%r10d + addl %r12d,%ecx + movl 60(%rsp),%r13d + movl 48(%rsp),%r15d + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%ecx + movl %r15d,%r14d + rorl $2,%r15d + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 28(%rsp),%r12d + + addl 56(%rsp),%r12d + movl %r10d,%r13d + addl %r15d,%r12d + movl %ecx,%r14d + rorl $14,%r13d + movl %r11d,%r15d + + xorl %r10d,%r13d + rorl $9,%r14d + xorl %eax,%r15d + + movl %r12d,56(%rsp) + xorl %ecx,%r14d + andl %r10d,%r15d + + rorl $5,%r13d + addl %ebx,%r12d + xorl %eax,%r15d + + rorl $11,%r14d + xorl %r10d,%r13d + addl %r15d,%r12d + + movl %ecx,%r15d + addl 120(%rbp),%r12d + xorl %ecx,%r14d + + xorl %edx,%r15d + rorl $6,%r13d + movl %edx,%ebx + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%ebx + addl %r12d,%r9d + addl %r12d,%ebx + movl 0(%rsp),%r13d + movl 52(%rsp),%edi + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%ebx + movl %edi,%r14d + rorl $2,%edi + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi + shrl $10,%r14d + + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 32(%rsp),%r12d + + addl 60(%rsp),%r12d + movl %r9d,%r13d + addl %edi,%r12d + movl %ebx,%r14d + rorl $14,%r13d + movl %r10d,%edi + + xorl %r9d,%r13d + rorl $9,%r14d + xorl %r11d,%edi + + movl %r12d,60(%rsp) + xorl %ebx,%r14d + andl %r9d,%edi + + rorl $5,%r13d + addl %eax,%r12d + xorl %r11d,%edi + + rorl $11,%r14d + xorl %r9d,%r13d + addl %edi,%r12d + + movl %ebx,%edi + addl 124(%rbp),%r12d + xorl %ebx,%r14d + + xorl %ecx,%edi + rorl $6,%r13d + movl %ecx,%eax + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%eax + addl %r12d,%r8d + addl %r12d,%eax + leaq 64(%rbp),%rbp + cmpb $0x19,3(%rbp) + jnz .Lrounds_16_xx + + movq 64+0(%rsp),%rdi + addl %r14d,%eax + leaq 64(%rsi),%rsi + + addl 0(%rdi),%eax + addl 4(%rdi),%ebx + addl 8(%rdi),%ecx + addl 12(%rdi),%edx + addl 16(%rdi),%r8d + addl 20(%rdi),%r9d + addl 24(%rdi),%r10d + addl 28(%rdi),%r11d + + cmpq 64+16(%rsp),%rsi + + movl %eax,0(%rdi) + movl %ebx,4(%rdi) + movl %ecx,8(%rdi) + movl %edx,12(%rdi) + movl %r8d,16(%rdi) + movl %r9d,20(%rdi) + movl %r10d,24(%rdi) + movl %r11d,28(%rdi) + jb .Lloop + + leaq 64+24+48(%rsp),%r11 + + movq 64+24(%rsp),%r15 + movq -40(%r11),%r14 + movq -32(%r11),%r13 + movq -24(%r11),%r12 + movq -16(%r11),%rbx + movq -8(%r11),%rbp +.LSEH_epilogue_blst_sha256_block_data_order: + mov 8(%r11),%rdi + mov 16(%r11),%rsi + + leaq (%r11),%rsp + .byte 0xf3,0xc3 + +.LSEH_end_blst_sha256_block_data_order: + +#ifndef __BLST_PORTABLE__ +.p2align 6 + +K256: +.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 +.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 +.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 +.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 +.long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc +.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da +.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 +.long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 +.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 +.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 +.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 +.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 +.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 +.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 +.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 +.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 + +.byte 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,64,100,111,116,45,97,115,109,0 +.globl blst_sha256_emit + +.def blst_sha256_emit; .scl 2; .type 32; .endef +.p2align 4 +blst_sha256_emit: + .byte 0xf3,0x0f,0x1e,0xfa + + movq 0(%rdx),%r8 + movq 8(%rdx),%r9 + movq 16(%rdx),%r10 + bswapq %r8 + movq 24(%rdx),%r11 + bswapq %r9 + movl %r8d,4(%rcx) + bswapq %r10 + movl %r9d,12(%rcx) + bswapq %r11 + movl %r10d,20(%rcx) + shrq $32,%r8 + movl %r11d,28(%rcx) + shrq $32,%r9 + movl %r8d,0(%rcx) + shrq $32,%r10 + movl %r9d,8(%rcx) + shrq $32,%r11 + movl %r10d,16(%rcx) + movl %r11d,24(%rcx) + .byte 0xf3,0xc3 + + +.globl blst_sha256_bcopy + +.def blst_sha256_bcopy; .scl 2; .type 32; .endef +.p2align 4 +blst_sha256_bcopy: + .byte 0xf3,0x0f,0x1e,0xfa + + subq %rdx,%rcx +.Loop_bcopy: + movzbl (%rdx),%eax + leaq 1(%rdx),%rdx + movb %al,-1(%rcx,%rdx,1) + decq %r8 + jnz .Loop_bcopy + .byte 0xf3,0xc3 + + +.globl blst_sha256_hcopy + +.def blst_sha256_hcopy; .scl 2; .type 32; .endef +.p2align 4 +blst_sha256_hcopy: + .byte 0xf3,0x0f,0x1e,0xfa + + movq 0(%rdx),%r8 + movq 8(%rdx),%r9 + movq 16(%rdx),%r10 + movq 24(%rdx),%r11 + movq %r8,0(%rcx) + movq %r9,8(%rcx) + movq %r10,16(%rcx) + movq %r11,24(%rcx) + .byte 0xf3,0xc3 + +#endif +.section .pdata +.p2align 2 +.rva .LSEH_begin_blst_sha256_block_data_order +.rva .LSEH_body_blst_sha256_block_data_order +.rva .LSEH_info_blst_sha256_block_data_order_prologue + +.rva .LSEH_body_blst_sha256_block_data_order +.rva .LSEH_epilogue_blst_sha256_block_data_order +.rva .LSEH_info_blst_sha256_block_data_order_body + +.rva .LSEH_epilogue_blst_sha256_block_data_order +.rva .LSEH_end_blst_sha256_block_data_order +.rva .LSEH_info_blst_sha256_block_data_order_epilogue + +.section .xdata +.p2align 3 +.LSEH_info_blst_sha256_block_data_order_prologue: +.byte 1,4,6,0x05 +.byte 4,0x74,2,0 +.byte 4,0x64,3,0 +.byte 4,0x53 +.byte 1,0x50 +.long 0,0 +.LSEH_info_blst_sha256_block_data_order_body: +.byte 1,0,18,0 +.byte 0x00,0xf4,0x0b,0x00 +.byte 0x00,0xe4,0x0c,0x00 +.byte 0x00,0xd4,0x0d,0x00 +.byte 0x00,0xc4,0x0e,0x00 +.byte 0x00,0x34,0x0f,0x00 +.byte 0x00,0x54,0x10,0x00 +.byte 0x00,0x74,0x12,0x00 +.byte 0x00,0x64,0x13,0x00 +.byte 0x00,0x01,0x11,0x00 +.byte 0x00,0x00,0x00,0x00 +.byte 0x00,0x00,0x00,0x00 +.LSEH_info_blst_sha256_block_data_order_epilogue: +.byte 1,0,5,11 +.byte 0x00,0x74,0x01,0x00 +.byte 0x00,0x64,0x02,0x00 +.byte 0x00,0xb3 +.byte 0x00,0x00,0x00,0x00,0x00,0x00 +.byte 0x00,0x00,0x00,0x00 + diff --git a/crypto/blst_src/build/coff/sha256-x86_64.s b/crypto/blst_src/build/coff/sha256-x86_64.s new file mode 100644 index 00000000000..d65df5d0d4d --- /dev/null +++ b/crypto/blst_src/build/coff/sha256-x86_64.s @@ -0,0 +1,1562 @@ +.comm __blst_platform_cap,4 +.text + +.p2align 6 + +K256: +.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 +.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 +.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 +.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 +.long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc +.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da +.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 +.long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 +.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 +.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 +.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 +.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 +.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 +.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 +.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 +.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 + +.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f +.long 0x03020100,0x0b0a0908,0xffffffff,0xffffffff +.long 0xffffffff,0xffffffff,0x03020100,0x0b0a0908 +.byte 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,64,100,111,116,45,97,115,109,0 +.globl blst_sha256_block_data_order_shaext + +.def blst_sha256_block_data_order_shaext; .scl 2; .type 32; .endef +.p2align 6 +blst_sha256_block_data_order_shaext: + .byte 0xf3,0x0f,0x1e,0xfa + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + movq %rsp,%r11 +.LSEH_begin_blst_sha256_block_data_order_shaext: + + + pushq %rbp + + movq %rsp,%rbp + + movq %rcx,%rdi + movq %rdx,%rsi + movq %r8,%rdx +.Lblst_sha256_block_data_order$2: + subq $0x50,%rsp + + movaps %xmm6,-80(%rbp) + movaps %xmm7,-64(%rbp) + movaps %xmm8,-48(%rbp) + movaps %xmm9,-32(%rbp) + movaps %xmm10,-16(%rbp) + +.LSEH_body_blst_sha256_block_data_order_shaext: + + leaq K256+128(%rip),%rcx + movdqu (%rdi),%xmm1 + movdqu 16(%rdi),%xmm2 + movdqa 256-128(%rcx),%xmm7 + + pshufd $0x1b,%xmm1,%xmm0 + pshufd $0xb1,%xmm1,%xmm1 + pshufd $0x1b,%xmm2,%xmm2 + movdqa %xmm7,%xmm8 +.byte 102,15,58,15,202,8 + punpcklqdq %xmm0,%xmm2 + jmp .Loop_shaext + +.p2align 4 +.Loop_shaext: + movdqu (%rsi),%xmm3 + movdqu 16(%rsi),%xmm4 + movdqu 32(%rsi),%xmm5 +.byte 102,15,56,0,223 + movdqu 48(%rsi),%xmm6 + + movdqa 0-128(%rcx),%xmm0 + paddd %xmm3,%xmm0 +.byte 102,15,56,0,231 + movdqa %xmm2,%xmm10 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + nop + movdqa %xmm1,%xmm9 +.byte 15,56,203,202 + + movdqa 16-128(%rcx),%xmm0 + paddd %xmm4,%xmm0 +.byte 102,15,56,0,239 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + leaq 64(%rsi),%rsi +.byte 15,56,204,220 +.byte 15,56,203,202 + + movdqa 32-128(%rcx),%xmm0 + paddd %xmm5,%xmm0 +.byte 102,15,56,0,247 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movdqa %xmm6,%xmm7 +.byte 102,15,58,15,253,4 + nop + paddd %xmm7,%xmm3 +.byte 15,56,204,229 +.byte 15,56,203,202 + + movdqa 48-128(%rcx),%xmm0 + paddd %xmm6,%xmm0 +.byte 15,56,205,222 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movdqa %xmm3,%xmm7 +.byte 102,15,58,15,254,4 + nop + paddd %xmm7,%xmm4 +.byte 15,56,204,238 +.byte 15,56,203,202 + movdqa 64-128(%rcx),%xmm0 + paddd %xmm3,%xmm0 +.byte 15,56,205,227 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movdqa %xmm4,%xmm7 +.byte 102,15,58,15,251,4 + nop + paddd %xmm7,%xmm5 +.byte 15,56,204,243 +.byte 15,56,203,202 + movdqa 80-128(%rcx),%xmm0 + paddd %xmm4,%xmm0 +.byte 15,56,205,236 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movdqa %xmm5,%xmm7 +.byte 102,15,58,15,252,4 + nop + paddd %xmm7,%xmm6 +.byte 15,56,204,220 +.byte 15,56,203,202 + movdqa 96-128(%rcx),%xmm0 + paddd %xmm5,%xmm0 +.byte 15,56,205,245 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movdqa %xmm6,%xmm7 +.byte 102,15,58,15,253,4 + nop + paddd %xmm7,%xmm3 +.byte 15,56,204,229 +.byte 15,56,203,202 + movdqa 112-128(%rcx),%xmm0 + paddd %xmm6,%xmm0 +.byte 15,56,205,222 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movdqa %xmm3,%xmm7 +.byte 102,15,58,15,254,4 + nop + paddd %xmm7,%xmm4 +.byte 15,56,204,238 +.byte 15,56,203,202 + movdqa 128-128(%rcx),%xmm0 + paddd %xmm3,%xmm0 +.byte 15,56,205,227 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movdqa %xmm4,%xmm7 +.byte 102,15,58,15,251,4 + nop + paddd %xmm7,%xmm5 +.byte 15,56,204,243 +.byte 15,56,203,202 + movdqa 144-128(%rcx),%xmm0 + paddd %xmm4,%xmm0 +.byte 15,56,205,236 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movdqa %xmm5,%xmm7 +.byte 102,15,58,15,252,4 + nop + paddd %xmm7,%xmm6 +.byte 15,56,204,220 +.byte 15,56,203,202 + movdqa 160-128(%rcx),%xmm0 + paddd %xmm5,%xmm0 +.byte 15,56,205,245 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movdqa %xmm6,%xmm7 +.byte 102,15,58,15,253,4 + nop + paddd %xmm7,%xmm3 +.byte 15,56,204,229 +.byte 15,56,203,202 + movdqa 176-128(%rcx),%xmm0 + paddd %xmm6,%xmm0 +.byte 15,56,205,222 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movdqa %xmm3,%xmm7 +.byte 102,15,58,15,254,4 + nop + paddd %xmm7,%xmm4 +.byte 15,56,204,238 +.byte 15,56,203,202 + movdqa 192-128(%rcx),%xmm0 + paddd %xmm3,%xmm0 +.byte 15,56,205,227 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movdqa %xmm4,%xmm7 +.byte 102,15,58,15,251,4 + nop + paddd %xmm7,%xmm5 +.byte 15,56,204,243 +.byte 15,56,203,202 + movdqa 208-128(%rcx),%xmm0 + paddd %xmm4,%xmm0 +.byte 15,56,205,236 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movdqa %xmm5,%xmm7 +.byte 102,15,58,15,252,4 +.byte 15,56,203,202 + paddd %xmm7,%xmm6 + + movdqa 224-128(%rcx),%xmm0 + paddd %xmm5,%xmm0 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 +.byte 15,56,205,245 + movdqa %xmm8,%xmm7 +.byte 15,56,203,202 + + movdqa 240-128(%rcx),%xmm0 + paddd %xmm6,%xmm0 + nop +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + decq %rdx + nop +.byte 15,56,203,202 + + paddd %xmm10,%xmm2 + paddd %xmm9,%xmm1 + jnz .Loop_shaext + + pshufd $0xb1,%xmm2,%xmm2 + pshufd $0x1b,%xmm1,%xmm7 + pshufd $0xb1,%xmm1,%xmm1 + punpckhqdq %xmm2,%xmm1 +.byte 102,15,58,15,215,8 + + movdqu %xmm1,(%rdi) + movdqu %xmm2,16(%rdi) + movaps -80(%rbp),%xmm6 + movaps -64(%rbp),%xmm7 + movaps -48(%rbp),%xmm8 + movaps -32(%rbp),%xmm9 + movaps -16(%rbp),%xmm10 + movq %rbp,%rsp + + popq %rbp + +.LSEH_epilogue_blst_sha256_block_data_order_shaext: + mov 8(%rsp),%rdi + mov 16(%rsp),%rsi + + .byte 0xf3,0xc3 + +.LSEH_end_blst_sha256_block_data_order_shaext: +.globl blst_sha256_block_data_order + +.def blst_sha256_block_data_order; .scl 2; .type 32; .endef +.p2align 6 +blst_sha256_block_data_order: + .byte 0xf3,0x0f,0x1e,0xfa + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + movq %rsp,%r11 +.LSEH_begin_blst_sha256_block_data_order: + + + pushq %rbp + + movq %rsp,%rbp + + movq %rcx,%rdi + movq %rdx,%rsi + movq %r8,%rdx + testl $2,__blst_platform_cap(%rip) + jnz .Lblst_sha256_block_data_order$2 + pushq %rbx + + pushq %r12 + + pushq %r13 + + pushq %r14 + + pushq %r15 + + shlq $4,%rdx + subq $88,%rsp + + leaq (%rsi,%rdx,4),%rdx + movq %rdi,-64(%rbp) + + movq %rdx,-48(%rbp) + movaps %xmm6,-128(%rbp) + movaps %xmm7,-112(%rbp) + movaps %xmm8,-96(%rbp) + movaps %xmm9,-80(%rbp) + +.LSEH_body_blst_sha256_block_data_order: + + + leaq -64(%rsp),%rsp + movl 0(%rdi),%eax + andq $-64,%rsp + movl 4(%rdi),%ebx + movl 8(%rdi),%ecx + movl 12(%rdi),%edx + movl 16(%rdi),%r8d + movl 20(%rdi),%r9d + movl 24(%rdi),%r10d + movl 28(%rdi),%r11d + + + jmp .Lloop_ssse3 +.p2align 4 +.Lloop_ssse3: + movdqa K256+256(%rip),%xmm7 + movq %rsi,-56(%rbp) + movdqu 0(%rsi),%xmm0 + movdqu 16(%rsi),%xmm1 + movdqu 32(%rsi),%xmm2 +.byte 102,15,56,0,199 + movdqu 48(%rsi),%xmm3 + leaq K256(%rip),%rsi +.byte 102,15,56,0,207 + movdqa 0(%rsi),%xmm4 + movdqa 16(%rsi),%xmm5 +.byte 102,15,56,0,215 + paddd %xmm0,%xmm4 + movdqa 32(%rsi),%xmm6 +.byte 102,15,56,0,223 + movdqa 48(%rsi),%xmm7 + paddd %xmm1,%xmm5 + paddd %xmm2,%xmm6 + paddd %xmm3,%xmm7 + movdqa %xmm4,0(%rsp) + movl %eax,%r14d + movdqa %xmm5,16(%rsp) + movl %ebx,%edi + movdqa %xmm6,32(%rsp) + xorl %ecx,%edi + movdqa %xmm7,48(%rsp) + movl %r8d,%r13d + jmp .Lssse3_00_47 + +.p2align 4 +.Lssse3_00_47: + subq $-64,%rsi + rorl $14,%r13d + movdqa %xmm1,%xmm4 + movl %r14d,%eax + movl %r9d,%r12d + movdqa %xmm3,%xmm7 + rorl $9,%r14d + xorl %r8d,%r13d + xorl %r10d,%r12d + rorl $5,%r13d + xorl %eax,%r14d +.byte 102,15,58,15,224,4 + andl %r8d,%r12d + xorl %r8d,%r13d +.byte 102,15,58,15,250,4 + addl 0(%rsp),%r11d + movl %eax,%r15d + xorl %r10d,%r12d + rorl $11,%r14d + movdqa %xmm4,%xmm5 + xorl %ebx,%r15d + addl %r12d,%r11d + movdqa %xmm4,%xmm6 + rorl $6,%r13d + andl %r15d,%edi + psrld $3,%xmm4 + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%edi + paddd %xmm7,%xmm0 + rorl $2,%r14d + addl %r11d,%edx + psrld $7,%xmm6 + addl %edi,%r11d + movl %edx,%r13d + pshufd $250,%xmm3,%xmm7 + addl %r11d,%r14d + rorl $14,%r13d + pslld $14,%xmm5 + movl %r14d,%r11d + movl %r8d,%r12d + pxor %xmm6,%xmm4 + rorl $9,%r14d + xorl %edx,%r13d + xorl %r9d,%r12d + rorl $5,%r13d + psrld $11,%xmm6 + xorl %r11d,%r14d + pxor %xmm5,%xmm4 + andl %edx,%r12d + xorl %edx,%r13d + pslld $11,%xmm5 + addl 4(%rsp),%r10d + movl %r11d,%edi + pxor %xmm6,%xmm4 + xorl %r9d,%r12d + rorl $11,%r14d + movdqa %xmm7,%xmm6 + xorl %eax,%edi + addl %r12d,%r10d + pxor %xmm5,%xmm4 + rorl $6,%r13d + andl %edi,%r15d + xorl %r11d,%r14d + psrld $10,%xmm7 + addl %r13d,%r10d + xorl %eax,%r15d + paddd %xmm4,%xmm0 + rorl $2,%r14d + addl %r10d,%ecx + psrlq $17,%xmm6 + addl %r15d,%r10d + movl %ecx,%r13d + addl %r10d,%r14d + pxor %xmm6,%xmm7 + rorl $14,%r13d + movl %r14d,%r10d + movl %edx,%r12d + rorl $9,%r14d + psrlq $2,%xmm6 + xorl %ecx,%r13d + xorl %r8d,%r12d + pxor %xmm6,%xmm7 + rorl $5,%r13d + xorl %r10d,%r14d + andl %ecx,%r12d + pshufd $128,%xmm7,%xmm7 + xorl %ecx,%r13d + addl 8(%rsp),%r9d + movl %r10d,%r15d + psrldq $8,%xmm7 + xorl %r8d,%r12d + rorl $11,%r14d + xorl %r11d,%r15d + addl %r12d,%r9d + rorl $6,%r13d + paddd %xmm7,%xmm0 + andl %r15d,%edi + xorl %r10d,%r14d + addl %r13d,%r9d + pshufd $80,%xmm0,%xmm7 + xorl %r11d,%edi + rorl $2,%r14d + addl %r9d,%ebx + movdqa %xmm7,%xmm6 + addl %edi,%r9d + movl %ebx,%r13d + psrld $10,%xmm7 + addl %r9d,%r14d + rorl $14,%r13d + psrlq $17,%xmm6 + movl %r14d,%r9d + movl %ecx,%r12d + pxor %xmm6,%xmm7 + rorl $9,%r14d + xorl %ebx,%r13d + xorl %edx,%r12d + rorl $5,%r13d + xorl %r9d,%r14d + psrlq $2,%xmm6 + andl %ebx,%r12d + xorl %ebx,%r13d + addl 12(%rsp),%r8d + pxor %xmm6,%xmm7 + movl %r9d,%edi + xorl %edx,%r12d + rorl $11,%r14d + pshufd $8,%xmm7,%xmm7 + xorl %r10d,%edi + addl %r12d,%r8d + movdqa 0(%rsi),%xmm6 + rorl $6,%r13d + andl %edi,%r15d + pslldq $8,%xmm7 + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + paddd %xmm7,%xmm0 + rorl $2,%r14d + addl %r8d,%eax + addl %r15d,%r8d + paddd %xmm0,%xmm6 + movl %eax,%r13d + addl %r8d,%r14d + movdqa %xmm6,0(%rsp) + rorl $14,%r13d + movdqa %xmm2,%xmm4 + movl %r14d,%r8d + movl %ebx,%r12d + movdqa %xmm0,%xmm7 + rorl $9,%r14d + xorl %eax,%r13d + xorl %ecx,%r12d + rorl $5,%r13d + xorl %r8d,%r14d +.byte 102,15,58,15,225,4 + andl %eax,%r12d + xorl %eax,%r13d +.byte 102,15,58,15,251,4 + addl 16(%rsp),%edx + movl %r8d,%r15d + xorl %ecx,%r12d + rorl $11,%r14d + movdqa %xmm4,%xmm5 + xorl %r9d,%r15d + addl %r12d,%edx + movdqa %xmm4,%xmm6 + rorl $6,%r13d + andl %r15d,%edi + psrld $3,%xmm4 + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%edi + paddd %xmm7,%xmm1 + rorl $2,%r14d + addl %edx,%r11d + psrld $7,%xmm6 + addl %edi,%edx + movl %r11d,%r13d + pshufd $250,%xmm0,%xmm7 + addl %edx,%r14d + rorl $14,%r13d + pslld $14,%xmm5 + movl %r14d,%edx + movl %eax,%r12d + pxor %xmm6,%xmm4 + rorl $9,%r14d + xorl %r11d,%r13d + xorl %ebx,%r12d + rorl $5,%r13d + psrld $11,%xmm6 + xorl %edx,%r14d + pxor %xmm5,%xmm4 + andl %r11d,%r12d + xorl %r11d,%r13d + pslld $11,%xmm5 + addl 20(%rsp),%ecx + movl %edx,%edi + pxor %xmm6,%xmm4 + xorl %ebx,%r12d + rorl $11,%r14d + movdqa %xmm7,%xmm6 + xorl %r8d,%edi + addl %r12d,%ecx + pxor %xmm5,%xmm4 + rorl $6,%r13d + andl %edi,%r15d + xorl %edx,%r14d + psrld $10,%xmm7 + addl %r13d,%ecx + xorl %r8d,%r15d + paddd %xmm4,%xmm1 + rorl $2,%r14d + addl %ecx,%r10d + psrlq $17,%xmm6 + addl %r15d,%ecx + movl %r10d,%r13d + addl %ecx,%r14d + pxor %xmm6,%xmm7 + rorl $14,%r13d + movl %r14d,%ecx + movl %r11d,%r12d + rorl $9,%r14d + psrlq $2,%xmm6 + xorl %r10d,%r13d + xorl %eax,%r12d + pxor %xmm6,%xmm7 + rorl $5,%r13d + xorl %ecx,%r14d + andl %r10d,%r12d + pshufd $128,%xmm7,%xmm7 + xorl %r10d,%r13d + addl 24(%rsp),%ebx + movl %ecx,%r15d + psrldq $8,%xmm7 + xorl %eax,%r12d + rorl $11,%r14d + xorl %edx,%r15d + addl %r12d,%ebx + rorl $6,%r13d + paddd %xmm7,%xmm1 + andl %r15d,%edi + xorl %ecx,%r14d + addl %r13d,%ebx + pshufd $80,%xmm1,%xmm7 + xorl %edx,%edi + rorl $2,%r14d + addl %ebx,%r9d + movdqa %xmm7,%xmm6 + addl %edi,%ebx + movl %r9d,%r13d + psrld $10,%xmm7 + addl %ebx,%r14d + rorl $14,%r13d + psrlq $17,%xmm6 + movl %r14d,%ebx + movl %r10d,%r12d + pxor %xmm6,%xmm7 + rorl $9,%r14d + xorl %r9d,%r13d + xorl %r11d,%r12d + rorl $5,%r13d + xorl %ebx,%r14d + psrlq $2,%xmm6 + andl %r9d,%r12d + xorl %r9d,%r13d + addl 28(%rsp),%eax + pxor %xmm6,%xmm7 + movl %ebx,%edi + xorl %r11d,%r12d + rorl $11,%r14d + pshufd $8,%xmm7,%xmm7 + xorl %ecx,%edi + addl %r12d,%eax + movdqa 16(%rsi),%xmm6 + rorl $6,%r13d + andl %edi,%r15d + pslldq $8,%xmm7 + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + paddd %xmm7,%xmm1 + rorl $2,%r14d + addl %eax,%r8d + addl %r15d,%eax + paddd %xmm1,%xmm6 + movl %r8d,%r13d + addl %eax,%r14d + movdqa %xmm6,16(%rsp) + rorl $14,%r13d + movdqa %xmm3,%xmm4 + movl %r14d,%eax + movl %r9d,%r12d + movdqa %xmm1,%xmm7 + rorl $9,%r14d + xorl %r8d,%r13d + xorl %r10d,%r12d + rorl $5,%r13d + xorl %eax,%r14d +.byte 102,15,58,15,226,4 + andl %r8d,%r12d + xorl %r8d,%r13d +.byte 102,15,58,15,248,4 + addl 32(%rsp),%r11d + movl %eax,%r15d + xorl %r10d,%r12d + rorl $11,%r14d + movdqa %xmm4,%xmm5 + xorl %ebx,%r15d + addl %r12d,%r11d + movdqa %xmm4,%xmm6 + rorl $6,%r13d + andl %r15d,%edi + psrld $3,%xmm4 + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%edi + paddd %xmm7,%xmm2 + rorl $2,%r14d + addl %r11d,%edx + psrld $7,%xmm6 + addl %edi,%r11d + movl %edx,%r13d + pshufd $250,%xmm1,%xmm7 + addl %r11d,%r14d + rorl $14,%r13d + pslld $14,%xmm5 + movl %r14d,%r11d + movl %r8d,%r12d + pxor %xmm6,%xmm4 + rorl $9,%r14d + xorl %edx,%r13d + xorl %r9d,%r12d + rorl $5,%r13d + psrld $11,%xmm6 + xorl %r11d,%r14d + pxor %xmm5,%xmm4 + andl %edx,%r12d + xorl %edx,%r13d + pslld $11,%xmm5 + addl 36(%rsp),%r10d + movl %r11d,%edi + pxor %xmm6,%xmm4 + xorl %r9d,%r12d + rorl $11,%r14d + movdqa %xmm7,%xmm6 + xorl %eax,%edi + addl %r12d,%r10d + pxor %xmm5,%xmm4 + rorl $6,%r13d + andl %edi,%r15d + xorl %r11d,%r14d + psrld $10,%xmm7 + addl %r13d,%r10d + xorl %eax,%r15d + paddd %xmm4,%xmm2 + rorl $2,%r14d + addl %r10d,%ecx + psrlq $17,%xmm6 + addl %r15d,%r10d + movl %ecx,%r13d + addl %r10d,%r14d + pxor %xmm6,%xmm7 + rorl $14,%r13d + movl %r14d,%r10d + movl %edx,%r12d + rorl $9,%r14d + psrlq $2,%xmm6 + xorl %ecx,%r13d + xorl %r8d,%r12d + pxor %xmm6,%xmm7 + rorl $5,%r13d + xorl %r10d,%r14d + andl %ecx,%r12d + pshufd $128,%xmm7,%xmm7 + xorl %ecx,%r13d + addl 40(%rsp),%r9d + movl %r10d,%r15d + psrldq $8,%xmm7 + xorl %r8d,%r12d + rorl $11,%r14d + xorl %r11d,%r15d + addl %r12d,%r9d + rorl $6,%r13d + paddd %xmm7,%xmm2 + andl %r15d,%edi + xorl %r10d,%r14d + addl %r13d,%r9d + pshufd $80,%xmm2,%xmm7 + xorl %r11d,%edi + rorl $2,%r14d + addl %r9d,%ebx + movdqa %xmm7,%xmm6 + addl %edi,%r9d + movl %ebx,%r13d + psrld $10,%xmm7 + addl %r9d,%r14d + rorl $14,%r13d + psrlq $17,%xmm6 + movl %r14d,%r9d + movl %ecx,%r12d + pxor %xmm6,%xmm7 + rorl $9,%r14d + xorl %ebx,%r13d + xorl %edx,%r12d + rorl $5,%r13d + xorl %r9d,%r14d + psrlq $2,%xmm6 + andl %ebx,%r12d + xorl %ebx,%r13d + addl 44(%rsp),%r8d + pxor %xmm6,%xmm7 + movl %r9d,%edi + xorl %edx,%r12d + rorl $11,%r14d + pshufd $8,%xmm7,%xmm7 + xorl %r10d,%edi + addl %r12d,%r8d + movdqa 32(%rsi),%xmm6 + rorl $6,%r13d + andl %edi,%r15d + pslldq $8,%xmm7 + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + paddd %xmm7,%xmm2 + rorl $2,%r14d + addl %r8d,%eax + addl %r15d,%r8d + paddd %xmm2,%xmm6 + movl %eax,%r13d + addl %r8d,%r14d + movdqa %xmm6,32(%rsp) + rorl $14,%r13d + movdqa %xmm0,%xmm4 + movl %r14d,%r8d + movl %ebx,%r12d + movdqa %xmm2,%xmm7 + rorl $9,%r14d + xorl %eax,%r13d + xorl %ecx,%r12d + rorl $5,%r13d + xorl %r8d,%r14d +.byte 102,15,58,15,227,4 + andl %eax,%r12d + xorl %eax,%r13d +.byte 102,15,58,15,249,4 + addl 48(%rsp),%edx + movl %r8d,%r15d + xorl %ecx,%r12d + rorl $11,%r14d + movdqa %xmm4,%xmm5 + xorl %r9d,%r15d + addl %r12d,%edx + movdqa %xmm4,%xmm6 + rorl $6,%r13d + andl %r15d,%edi + psrld $3,%xmm4 + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%edi + paddd %xmm7,%xmm3 + rorl $2,%r14d + addl %edx,%r11d + psrld $7,%xmm6 + addl %edi,%edx + movl %r11d,%r13d + pshufd $250,%xmm2,%xmm7 + addl %edx,%r14d + rorl $14,%r13d + pslld $14,%xmm5 + movl %r14d,%edx + movl %eax,%r12d + pxor %xmm6,%xmm4 + rorl $9,%r14d + xorl %r11d,%r13d + xorl %ebx,%r12d + rorl $5,%r13d + psrld $11,%xmm6 + xorl %edx,%r14d + pxor %xmm5,%xmm4 + andl %r11d,%r12d + xorl %r11d,%r13d + pslld $11,%xmm5 + addl 52(%rsp),%ecx + movl %edx,%edi + pxor %xmm6,%xmm4 + xorl %ebx,%r12d + rorl $11,%r14d + movdqa %xmm7,%xmm6 + xorl %r8d,%edi + addl %r12d,%ecx + pxor %xmm5,%xmm4 + rorl $6,%r13d + andl %edi,%r15d + xorl %edx,%r14d + psrld $10,%xmm7 + addl %r13d,%ecx + xorl %r8d,%r15d + paddd %xmm4,%xmm3 + rorl $2,%r14d + addl %ecx,%r10d + psrlq $17,%xmm6 + addl %r15d,%ecx + movl %r10d,%r13d + addl %ecx,%r14d + pxor %xmm6,%xmm7 + rorl $14,%r13d + movl %r14d,%ecx + movl %r11d,%r12d + rorl $9,%r14d + psrlq $2,%xmm6 + xorl %r10d,%r13d + xorl %eax,%r12d + pxor %xmm6,%xmm7 + rorl $5,%r13d + xorl %ecx,%r14d + andl %r10d,%r12d + pshufd $128,%xmm7,%xmm7 + xorl %r10d,%r13d + addl 56(%rsp),%ebx + movl %ecx,%r15d + psrldq $8,%xmm7 + xorl %eax,%r12d + rorl $11,%r14d + xorl %edx,%r15d + addl %r12d,%ebx + rorl $6,%r13d + paddd %xmm7,%xmm3 + andl %r15d,%edi + xorl %ecx,%r14d + addl %r13d,%ebx + pshufd $80,%xmm3,%xmm7 + xorl %edx,%edi + rorl $2,%r14d + addl %ebx,%r9d + movdqa %xmm7,%xmm6 + addl %edi,%ebx + movl %r9d,%r13d + psrld $10,%xmm7 + addl %ebx,%r14d + rorl $14,%r13d + psrlq $17,%xmm6 + movl %r14d,%ebx + movl %r10d,%r12d + pxor %xmm6,%xmm7 + rorl $9,%r14d + xorl %r9d,%r13d + xorl %r11d,%r12d + rorl $5,%r13d + xorl %ebx,%r14d + psrlq $2,%xmm6 + andl %r9d,%r12d + xorl %r9d,%r13d + addl 60(%rsp),%eax + pxor %xmm6,%xmm7 + movl %ebx,%edi + xorl %r11d,%r12d + rorl $11,%r14d + pshufd $8,%xmm7,%xmm7 + xorl %ecx,%edi + addl %r12d,%eax + movdqa 48(%rsi),%xmm6 + rorl $6,%r13d + andl %edi,%r15d + pslldq $8,%xmm7 + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + paddd %xmm7,%xmm3 + rorl $2,%r14d + addl %eax,%r8d + addl %r15d,%eax + paddd %xmm3,%xmm6 + movl %r8d,%r13d + addl %eax,%r14d + movdqa %xmm6,48(%rsp) + cmpb $0,67(%rsi) + jne .Lssse3_00_47 + rorl $14,%r13d + movl %r14d,%eax + movl %r9d,%r12d + rorl $9,%r14d + xorl %r8d,%r13d + xorl %r10d,%r12d + rorl $5,%r13d + xorl %eax,%r14d + andl %r8d,%r12d + xorl %r8d,%r13d + addl 0(%rsp),%r11d + movl %eax,%r15d + xorl %r10d,%r12d + rorl $11,%r14d + xorl %ebx,%r15d + addl %r12d,%r11d + rorl $6,%r13d + andl %r15d,%edi + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%edi + rorl $2,%r14d + addl %r11d,%edx + addl %edi,%r11d + movl %edx,%r13d + addl %r11d,%r14d + rorl $14,%r13d + movl %r14d,%r11d + movl %r8d,%r12d + rorl $9,%r14d + xorl %edx,%r13d + xorl %r9d,%r12d + rorl $5,%r13d + xorl %r11d,%r14d + andl %edx,%r12d + xorl %edx,%r13d + addl 4(%rsp),%r10d + movl %r11d,%edi + xorl %r9d,%r12d + rorl $11,%r14d + xorl %eax,%edi + addl %r12d,%r10d + rorl $6,%r13d + andl %edi,%r15d + xorl %r11d,%r14d + addl %r13d,%r10d + xorl %eax,%r15d + rorl $2,%r14d + addl %r10d,%ecx + addl %r15d,%r10d + movl %ecx,%r13d + addl %r10d,%r14d + rorl $14,%r13d + movl %r14d,%r10d + movl %edx,%r12d + rorl $9,%r14d + xorl %ecx,%r13d + xorl %r8d,%r12d + rorl $5,%r13d + xorl %r10d,%r14d + andl %ecx,%r12d + xorl %ecx,%r13d + addl 8(%rsp),%r9d + movl %r10d,%r15d + xorl %r8d,%r12d + rorl $11,%r14d + xorl %r11d,%r15d + addl %r12d,%r9d + rorl $6,%r13d + andl %r15d,%edi + xorl %r10d,%r14d + addl %r13d,%r9d + xorl %r11d,%edi + rorl $2,%r14d + addl %r9d,%ebx + addl %edi,%r9d + movl %ebx,%r13d + addl %r9d,%r14d + rorl $14,%r13d + movl %r14d,%r9d + movl %ecx,%r12d + rorl $9,%r14d + xorl %ebx,%r13d + xorl %edx,%r12d + rorl $5,%r13d + xorl %r9d,%r14d + andl %ebx,%r12d + xorl %ebx,%r13d + addl 12(%rsp),%r8d + movl %r9d,%edi + xorl %edx,%r12d + rorl $11,%r14d + xorl %r10d,%edi + addl %r12d,%r8d + rorl $6,%r13d + andl %edi,%r15d + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + rorl $2,%r14d + addl %r8d,%eax + addl %r15d,%r8d + movl %eax,%r13d + addl %r8d,%r14d + rorl $14,%r13d + movl %r14d,%r8d + movl %ebx,%r12d + rorl $9,%r14d + xorl %eax,%r13d + xorl %ecx,%r12d + rorl $5,%r13d + xorl %r8d,%r14d + andl %eax,%r12d + xorl %eax,%r13d + addl 16(%rsp),%edx + movl %r8d,%r15d + xorl %ecx,%r12d + rorl $11,%r14d + xorl %r9d,%r15d + addl %r12d,%edx + rorl $6,%r13d + andl %r15d,%edi + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%edi + rorl $2,%r14d + addl %edx,%r11d + addl %edi,%edx + movl %r11d,%r13d + addl %edx,%r14d + rorl $14,%r13d + movl %r14d,%edx + movl %eax,%r12d + rorl $9,%r14d + xorl %r11d,%r13d + xorl %ebx,%r12d + rorl $5,%r13d + xorl %edx,%r14d + andl %r11d,%r12d + xorl %r11d,%r13d + addl 20(%rsp),%ecx + movl %edx,%edi + xorl %ebx,%r12d + rorl $11,%r14d + xorl %r8d,%edi + addl %r12d,%ecx + rorl $6,%r13d + andl %edi,%r15d + xorl %edx,%r14d + addl %r13d,%ecx + xorl %r8d,%r15d + rorl $2,%r14d + addl %ecx,%r10d + addl %r15d,%ecx + movl %r10d,%r13d + addl %ecx,%r14d + rorl $14,%r13d + movl %r14d,%ecx + movl %r11d,%r12d + rorl $9,%r14d + xorl %r10d,%r13d + xorl %eax,%r12d + rorl $5,%r13d + xorl %ecx,%r14d + andl %r10d,%r12d + xorl %r10d,%r13d + addl 24(%rsp),%ebx + movl %ecx,%r15d + xorl %eax,%r12d + rorl $11,%r14d + xorl %edx,%r15d + addl %r12d,%ebx + rorl $6,%r13d + andl %r15d,%edi + xorl %ecx,%r14d + addl %r13d,%ebx + xorl %edx,%edi + rorl $2,%r14d + addl %ebx,%r9d + addl %edi,%ebx + movl %r9d,%r13d + addl %ebx,%r14d + rorl $14,%r13d + movl %r14d,%ebx + movl %r10d,%r12d + rorl $9,%r14d + xorl %r9d,%r13d + xorl %r11d,%r12d + rorl $5,%r13d + xorl %ebx,%r14d + andl %r9d,%r12d + xorl %r9d,%r13d + addl 28(%rsp),%eax + movl %ebx,%edi + xorl %r11d,%r12d + rorl $11,%r14d + xorl %ecx,%edi + addl %r12d,%eax + rorl $6,%r13d + andl %edi,%r15d + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + rorl $2,%r14d + addl %eax,%r8d + addl %r15d,%eax + movl %r8d,%r13d + addl %eax,%r14d + rorl $14,%r13d + movl %r14d,%eax + movl %r9d,%r12d + rorl $9,%r14d + xorl %r8d,%r13d + xorl %r10d,%r12d + rorl $5,%r13d + xorl %eax,%r14d + andl %r8d,%r12d + xorl %r8d,%r13d + addl 32(%rsp),%r11d + movl %eax,%r15d + xorl %r10d,%r12d + rorl $11,%r14d + xorl %ebx,%r15d + addl %r12d,%r11d + rorl $6,%r13d + andl %r15d,%edi + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%edi + rorl $2,%r14d + addl %r11d,%edx + addl %edi,%r11d + movl %edx,%r13d + addl %r11d,%r14d + rorl $14,%r13d + movl %r14d,%r11d + movl %r8d,%r12d + rorl $9,%r14d + xorl %edx,%r13d + xorl %r9d,%r12d + rorl $5,%r13d + xorl %r11d,%r14d + andl %edx,%r12d + xorl %edx,%r13d + addl 36(%rsp),%r10d + movl %r11d,%edi + xorl %r9d,%r12d + rorl $11,%r14d + xorl %eax,%edi + addl %r12d,%r10d + rorl $6,%r13d + andl %edi,%r15d + xorl %r11d,%r14d + addl %r13d,%r10d + xorl %eax,%r15d + rorl $2,%r14d + addl %r10d,%ecx + addl %r15d,%r10d + movl %ecx,%r13d + addl %r10d,%r14d + rorl $14,%r13d + movl %r14d,%r10d + movl %edx,%r12d + rorl $9,%r14d + xorl %ecx,%r13d + xorl %r8d,%r12d + rorl $5,%r13d + xorl %r10d,%r14d + andl %ecx,%r12d + xorl %ecx,%r13d + addl 40(%rsp),%r9d + movl %r10d,%r15d + xorl %r8d,%r12d + rorl $11,%r14d + xorl %r11d,%r15d + addl %r12d,%r9d + rorl $6,%r13d + andl %r15d,%edi + xorl %r10d,%r14d + addl %r13d,%r9d + xorl %r11d,%edi + rorl $2,%r14d + addl %r9d,%ebx + addl %edi,%r9d + movl %ebx,%r13d + addl %r9d,%r14d + rorl $14,%r13d + movl %r14d,%r9d + movl %ecx,%r12d + rorl $9,%r14d + xorl %ebx,%r13d + xorl %edx,%r12d + rorl $5,%r13d + xorl %r9d,%r14d + andl %ebx,%r12d + xorl %ebx,%r13d + addl 44(%rsp),%r8d + movl %r9d,%edi + xorl %edx,%r12d + rorl $11,%r14d + xorl %r10d,%edi + addl %r12d,%r8d + rorl $6,%r13d + andl %edi,%r15d + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + rorl $2,%r14d + addl %r8d,%eax + addl %r15d,%r8d + movl %eax,%r13d + addl %r8d,%r14d + rorl $14,%r13d + movl %r14d,%r8d + movl %ebx,%r12d + rorl $9,%r14d + xorl %eax,%r13d + xorl %ecx,%r12d + rorl $5,%r13d + xorl %r8d,%r14d + andl %eax,%r12d + xorl %eax,%r13d + addl 48(%rsp),%edx + movl %r8d,%r15d + xorl %ecx,%r12d + rorl $11,%r14d + xorl %r9d,%r15d + addl %r12d,%edx + rorl $6,%r13d + andl %r15d,%edi + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%edi + rorl $2,%r14d + addl %edx,%r11d + addl %edi,%edx + movl %r11d,%r13d + addl %edx,%r14d + rorl $14,%r13d + movl %r14d,%edx + movl %eax,%r12d + rorl $9,%r14d + xorl %r11d,%r13d + xorl %ebx,%r12d + rorl $5,%r13d + xorl %edx,%r14d + andl %r11d,%r12d + xorl %r11d,%r13d + addl 52(%rsp),%ecx + movl %edx,%edi + xorl %ebx,%r12d + rorl $11,%r14d + xorl %r8d,%edi + addl %r12d,%ecx + rorl $6,%r13d + andl %edi,%r15d + xorl %edx,%r14d + addl %r13d,%ecx + xorl %r8d,%r15d + rorl $2,%r14d + addl %ecx,%r10d + addl %r15d,%ecx + movl %r10d,%r13d + addl %ecx,%r14d + rorl $14,%r13d + movl %r14d,%ecx + movl %r11d,%r12d + rorl $9,%r14d + xorl %r10d,%r13d + xorl %eax,%r12d + rorl $5,%r13d + xorl %ecx,%r14d + andl %r10d,%r12d + xorl %r10d,%r13d + addl 56(%rsp),%ebx + movl %ecx,%r15d + xorl %eax,%r12d + rorl $11,%r14d + xorl %edx,%r15d + addl %r12d,%ebx + rorl $6,%r13d + andl %r15d,%edi + xorl %ecx,%r14d + addl %r13d,%ebx + xorl %edx,%edi + rorl $2,%r14d + addl %ebx,%r9d + addl %edi,%ebx + movl %r9d,%r13d + addl %ebx,%r14d + rorl $14,%r13d + movl %r14d,%ebx + movl %r10d,%r12d + rorl $9,%r14d + xorl %r9d,%r13d + xorl %r11d,%r12d + rorl $5,%r13d + xorl %ebx,%r14d + andl %r9d,%r12d + xorl %r9d,%r13d + addl 60(%rsp),%eax + movl %ebx,%edi + xorl %r11d,%r12d + rorl $11,%r14d + xorl %ecx,%edi + addl %r12d,%eax + rorl $6,%r13d + andl %edi,%r15d + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + rorl $2,%r14d + addl %eax,%r8d + addl %r15d,%eax + movl %r8d,%r13d + addl %eax,%r14d + movq -64(%rbp),%rdi + movl %r14d,%eax + movq -56(%rbp),%rsi + + addl 0(%rdi),%eax + addl 4(%rdi),%ebx + addl 8(%rdi),%ecx + addl 12(%rdi),%edx + addl 16(%rdi),%r8d + addl 20(%rdi),%r9d + addl 24(%rdi),%r10d + addl 28(%rdi),%r11d + + leaq 64(%rsi),%rsi + cmpq -48(%rbp),%rsi + + movl %eax,0(%rdi) + movl %ebx,4(%rdi) + movl %ecx,8(%rdi) + movl %edx,12(%rdi) + movl %r8d,16(%rdi) + movl %r9d,20(%rdi) + movl %r10d,24(%rdi) + movl %r11d,28(%rdi) + jb .Lloop_ssse3 + + xorps %xmm0,%xmm0 + movaps %xmm0,0(%rsp) + movaps %xmm0,16(%rsp) + movaps %xmm0,32(%rsp) + movaps %xmm0,48(%rsp) + movaps -128(%rbp),%xmm6 + movaps -112(%rbp),%xmm7 + movaps -96(%rbp),%xmm8 + movaps -80(%rbp),%xmm9 + movq -40(%rbp),%r15 + movq -32(%rbp),%r14 + movq -24(%rbp),%r13 + movq -16(%rbp),%r12 + movq -8(%rbp),%rbx + movq %rbp,%rsp + + popq %rbp + +.LSEH_epilogue_blst_sha256_block_data_order: + mov 8(%rsp),%rdi + mov 16(%rsp),%rsi + + .byte 0xf3,0xc3 + +.LSEH_end_blst_sha256_block_data_order: +.globl blst_sha256_emit + +.def blst_sha256_emit; .scl 2; .type 32; .endef +.p2align 4 +blst_sha256_emit: + .byte 0xf3,0x0f,0x1e,0xfa + + movq 0(%rdx),%r8 + movq 8(%rdx),%r9 + movq 16(%rdx),%r10 + bswapq %r8 + movq 24(%rdx),%r11 + bswapq %r9 + movl %r8d,4(%rcx) + bswapq %r10 + movl %r9d,12(%rcx) + bswapq %r11 + movl %r10d,20(%rcx) + shrq $32,%r8 + movl %r11d,28(%rcx) + shrq $32,%r9 + movl %r8d,0(%rcx) + shrq $32,%r10 + movl %r9d,8(%rcx) + shrq $32,%r11 + movl %r10d,16(%rcx) + movl %r11d,24(%rcx) + .byte 0xf3,0xc3 + + +.globl blst_sha256_bcopy + +.def blst_sha256_bcopy; .scl 2; .type 32; .endef +.p2align 4 +blst_sha256_bcopy: + .byte 0xf3,0x0f,0x1e,0xfa + + subq %rdx,%rcx +.Loop_bcopy: + movzbl (%rdx),%eax + leaq 1(%rdx),%rdx + movb %al,-1(%rcx,%rdx,1) + decq %r8 + jnz .Loop_bcopy + .byte 0xf3,0xc3 + + +.globl blst_sha256_hcopy + +.def blst_sha256_hcopy; .scl 2; .type 32; .endef +.p2align 4 +blst_sha256_hcopy: + .byte 0xf3,0x0f,0x1e,0xfa + + movq 0(%rdx),%r8 + movq 8(%rdx),%r9 + movq 16(%rdx),%r10 + movq 24(%rdx),%r11 + movq %r8,0(%rcx) + movq %r9,8(%rcx) + movq %r10,16(%rcx) + movq %r11,24(%rcx) + .byte 0xf3,0xc3 + +.section .pdata +.p2align 2 +.rva .LSEH_begin_blst_sha256_block_data_order_shaext +.rva .LSEH_body_blst_sha256_block_data_order_shaext +.rva .LSEH_info_blst_sha256_block_data_order_shaext_prologue + +.rva .LSEH_body_blst_sha256_block_data_order_shaext +.rva .LSEH_epilogue_blst_sha256_block_data_order_shaext +.rva .LSEH_info_blst_sha256_block_data_order_shaext_body + +.rva .LSEH_epilogue_blst_sha256_block_data_order_shaext +.rva .LSEH_end_blst_sha256_block_data_order_shaext +.rva .LSEH_info_blst_sha256_block_data_order_shaext_epilogue + +.rva .LSEH_begin_blst_sha256_block_data_order +.rva .LSEH_body_blst_sha256_block_data_order +.rva .LSEH_info_blst_sha256_block_data_order_prologue + +.rva .LSEH_body_blst_sha256_block_data_order +.rva .LSEH_epilogue_blst_sha256_block_data_order +.rva .LSEH_info_blst_sha256_block_data_order_body + +.rva .LSEH_epilogue_blst_sha256_block_data_order +.rva .LSEH_end_blst_sha256_block_data_order +.rva .LSEH_info_blst_sha256_block_data_order_epilogue + +.section .xdata +.p2align 3 +.LSEH_info_blst_sha256_block_data_order_shaext_prologue: +.byte 1,4,6,0x05 +.byte 4,0x74,2,0 +.byte 4,0x64,3,0 +.byte 4,0x53 +.byte 1,0x50 +.long 0,0 +.LSEH_info_blst_sha256_block_data_order_shaext_body: +.byte 1,0,17,85 +.byte 0x00,0x68,0x00,0x00 +.byte 0x00,0x78,0x01,0x00 +.byte 0x00,0x88,0x02,0x00 +.byte 0x00,0x98,0x03,0x00 +.byte 0x00,0xa8,0x04,0x00 +.byte 0x00,0x74,0x0c,0x00 +.byte 0x00,0x64,0x0d,0x00 +.byte 0x00,0x53 +.byte 0x00,0x92 +.byte 0x00,0x50 +.byte 0x00,0x00,0x00,0x00,0x00,0x00 +.byte 0x00,0x00,0x00,0x00 +.LSEH_info_blst_sha256_block_data_order_shaext_epilogue: +.byte 1,0,4,0 +.byte 0x00,0x74,0x01,0x00 +.byte 0x00,0x64,0x02,0x00 +.byte 0x00,0x00,0x00,0x00 + +.LSEH_info_blst_sha256_block_data_order_prologue: +.byte 1,4,6,0x05 +.byte 4,0x74,2,0 +.byte 4,0x64,3,0 +.byte 4,0x53 +.byte 1,0x50 +.long 0,0 +.LSEH_info_blst_sha256_block_data_order_body: +.byte 1,0,25,133 +.byte 0x00,0x68,0x00,0x00 +.byte 0x00,0x78,0x01,0x00 +.byte 0x00,0x88,0x02,0x00 +.byte 0x00,0x98,0x03,0x00 +.byte 0x00,0xf4,0x0b,0x00 +.byte 0x00,0xe4,0x0c,0x00 +.byte 0x00,0xd4,0x0d,0x00 +.byte 0x00,0xc4,0x0e,0x00 +.byte 0x00,0x34,0x0f,0x00 +.byte 0x00,0x74,0x12,0x00 +.byte 0x00,0x64,0x13,0x00 +.byte 0x00,0x53 +.byte 0x00,0xf2 +.byte 0x00,0x50 +.byte 0x00,0x00,0x00,0x00,0x00,0x00 +.byte 0x00,0x00,0x00,0x00 +.LSEH_info_blst_sha256_block_data_order_epilogue: +.byte 1,0,4,0 +.byte 0x00,0x74,0x01,0x00 +.byte 0x00,0x64,0x02,0x00 +.byte 0x00,0x00,0x00,0x00 + diff --git a/crypto/blst_src/build/elf/add_mod_256-armv8.S b/crypto/blst_src/build/elf/add_mod_256-armv8.S new file mode 100644 index 00000000000..57476aaa1da --- /dev/null +++ b/crypto/blst_src/build/elf/add_mod_256-armv8.S @@ -0,0 +1,379 @@ +.text + +.globl add_mod_256 +.hidden add_mod_256 +.type add_mod_256,%function +.align 5 +add_mod_256: + ldp x8,x9,[x1] + ldp x12,x13,[x2] + + ldp x10,x11,[x1,#16] + adds x8,x8,x12 + ldp x14,x15,[x2,#16] + adcs x9,x9,x13 + ldp x4,x5,[x3] + adcs x10,x10,x14 + ldp x6,x7,[x3,#16] + adcs x11,x11,x15 + adc x3,xzr,xzr + + subs x16,x8,x4 + sbcs x17,x9,x5 + sbcs x1,x10,x6 + sbcs x2,x11,x7 + sbcs xzr,x3,xzr + + csel x8,x8,x16,lo + csel x9,x9,x17,lo + csel x10,x10,x1,lo + stp x8,x9,[x0] + csel x11,x11,x2,lo + stp x10,x11,[x0,#16] + + ret +.size add_mod_256,.-add_mod_256 + +.globl mul_by_3_mod_256 +.hidden mul_by_3_mod_256 +.type mul_by_3_mod_256,%function +.align 5 +mul_by_3_mod_256: + ldp x12,x13,[x1] + ldp x14,x15,[x1,#16] + + adds x8,x12,x12 + ldp x4,x5,[x2] + adcs x9,x13,x13 + ldp x6,x7,[x2,#16] + adcs x10,x14,x14 + adcs x11,x15,x15 + adc x3,xzr,xzr + + subs x16,x8,x4 + sbcs x17,x9,x5 + sbcs x1,x10,x6 + sbcs x2,x11,x7 + sbcs xzr,x3,xzr + + csel x8,x8,x16,lo + csel x9,x9,x17,lo + csel x10,x10,x1,lo + csel x11,x11,x2,lo + + adds x8,x8,x12 + adcs x9,x9,x13 + adcs x10,x10,x14 + adcs x11,x11,x15 + adc x3,xzr,xzr + + subs x16,x8,x4 + sbcs x17,x9,x5 + sbcs x1,x10,x6 + sbcs x2,x11,x7 + sbcs xzr,x3,xzr + + csel x8,x8,x16,lo + csel x9,x9,x17,lo + csel x10,x10,x1,lo + stp x8,x9,[x0] + csel x11,x11,x2,lo + stp x10,x11,[x0,#16] + + ret +.size mul_by_3_mod_256,.-mul_by_3_mod_256 + +.globl lshift_mod_256 +.hidden lshift_mod_256 +.type lshift_mod_256,%function +.align 5 +lshift_mod_256: + ldp x8,x9,[x1] + ldp x10,x11,[x1,#16] + + ldp x4,x5,[x3] + ldp x6,x7,[x3,#16] + +.Loop_lshift_mod_256: + adds x8,x8,x8 + sub x2,x2,#1 + adcs x9,x9,x9 + adcs x10,x10,x10 + adcs x11,x11,x11 + adc x3,xzr,xzr + + subs x12,x8,x4 + sbcs x13,x9,x5 + sbcs x14,x10,x6 + sbcs x15,x11,x7 + sbcs xzr,x3,xzr + + csel x8,x8,x12,lo + csel x9,x9,x13,lo + csel x10,x10,x14,lo + csel x11,x11,x15,lo + + cbnz x2,.Loop_lshift_mod_256 + + stp x8,x9,[x0] + stp x10,x11,[x0,#16] + + ret +.size lshift_mod_256,.-lshift_mod_256 + +.globl rshift_mod_256 +.hidden rshift_mod_256 +.type rshift_mod_256,%function +.align 5 +rshift_mod_256: + ldp x8,x9,[x1] + ldp x10,x11,[x1,#16] + + ldp x4,x5,[x3] + ldp x6,x7,[x3,#16] + +.Loop_rshift: + adds x12,x8,x4 + sub x2,x2,#1 + adcs x13,x9,x5 + adcs x14,x10,x6 + adcs x15,x11,x7 + adc x3,xzr,xzr + tst x8,#1 + + csel x12,x12,x8,ne + csel x13,x13,x9,ne + csel x14,x14,x10,ne + csel x15,x15,x11,ne + csel x3,x3,xzr,ne + + extr x8,x13,x12,#1 + extr x9,x14,x13,#1 + extr x10,x15,x14,#1 + extr x11,x3,x15,#1 + + cbnz x2,.Loop_rshift + + stp x8,x9,[x0] + stp x10,x11,[x0,#16] + + ret +.size rshift_mod_256,.-rshift_mod_256 + +.globl cneg_mod_256 +.hidden cneg_mod_256 +.type cneg_mod_256,%function +.align 5 +cneg_mod_256: + ldp x8,x9,[x1] + ldp x4,x5,[x3] + + ldp x10,x11,[x1,#16] + subs x12,x4,x8 + ldp x6,x7,[x3,#16] + orr x4,x8,x9 + sbcs x13,x5,x9 + orr x5,x10,x11 + sbcs x14,x6,x10 + orr x3,x4,x5 + sbc x15,x7,x11 + + cmp x3,#0 + csetm x3,ne + ands x2,x2,x3 + + csel x8,x8,x12,eq + csel x9,x9,x13,eq + csel x10,x10,x14,eq + stp x8,x9,[x0] + csel x11,x11,x15,eq + stp x10,x11,[x0,#16] + + ret +.size cneg_mod_256,.-cneg_mod_256 + +.globl sub_mod_256 +.hidden sub_mod_256 +.type sub_mod_256,%function +.align 5 +sub_mod_256: + ldp x8,x9,[x1] + ldp x12,x13,[x2] + + ldp x10,x11,[x1,#16] + subs x8,x8,x12 + ldp x14,x15,[x2,#16] + sbcs x9,x9,x13 + ldp x4,x5,[x3] + sbcs x10,x10,x14 + ldp x6,x7,[x3,#16] + sbcs x11,x11,x15 + sbc x3,xzr,xzr + + and x4,x4,x3 + and x5,x5,x3 + adds x8,x8,x4 + and x6,x6,x3 + adcs x9,x9,x5 + and x7,x7,x3 + adcs x10,x10,x6 + stp x8,x9,[x0] + adc x11,x11,x7 + stp x10,x11,[x0,#16] + + ret +.size sub_mod_256,.-sub_mod_256 + +.globl check_mod_256 +.hidden check_mod_256 +.type check_mod_256,%function +.align 5 +check_mod_256: + ldp x8,x9,[x0] + ldp x10,x11,[x0,#16] + ldp x4,x5,[x1] + ldp x6,x7,[x1,#16] + +#ifdef __AARCH64EB__ + rev x8,x8 + rev x9,x9 + rev x10,x10 + rev x11,x11 +#endif + + subs xzr,x8,x4 + sbcs xzr,x9,x5 + orr x8,x8,x9 + sbcs xzr,x10,x6 + orr x8,x8,x10 + sbcs xzr,x11,x7 + orr x8,x8,x11 + sbc x1,xzr,xzr + + cmp x8,#0 + mov x0,#1 + csel x0,x0,xzr,ne + and x0,x0,x1 + + ret +.size check_mod_256,.-check_mod_256 + +.globl add_n_check_mod_256 +.hidden add_n_check_mod_256 +.type add_n_check_mod_256,%function +.align 5 +add_n_check_mod_256: + ldp x8,x9,[x1] + ldp x12,x13,[x2] + ldp x10,x11,[x1,#16] + ldp x14,x15,[x2,#16] + +#ifdef __AARCH64EB__ + rev x8,x8 + rev x12,x12 + rev x9,x9 + rev x13,x13 + rev x10,x10 + rev x14,x14 + rev x11,x11 + rev x15,x15 +#endif + + adds x8,x8,x12 + ldp x4,x5,[x3] + adcs x9,x9,x13 + ldp x6,x7,[x3,#16] + adcs x10,x10,x14 + adcs x11,x11,x15 + adc x3,xzr,xzr + + subs x16,x8,x4 + sbcs x17,x9,x5 + sbcs x1,x10,x6 + sbcs x2,x11,x7 + sbcs xzr,x3,xzr + + csel x8,x8,x16,lo + csel x9,x9,x17,lo + csel x10,x10,x1,lo + csel x11,x11,x2,lo + + orr x16, x8, x9 + orr x17, x10, x11 + orr x16, x16, x17 + +#ifdef __AARCH64EB__ + rev x8,x8 + rev x9,x9 + rev x10,x10 + rev x11,x11 +#endif + + stp x8,x9,[x0] + stp x10,x11,[x0,#16] + + mov x17, #1 + cmp x16, #0 + csel x0, x17, xzr, ne + + ret +.size add_n_check_mod_256,.-add_n_check_mod_256 + +.globl sub_n_check_mod_256 +.hidden sub_n_check_mod_256 +.type sub_n_check_mod_256,%function +.align 5 +sub_n_check_mod_256: + ldp x8,x9,[x1] + ldp x12,x13,[x2] + ldp x10,x11,[x1,#16] + ldp x14,x15,[x2,#16] + +#ifdef __AARCH64EB__ + rev x8,x8 + rev x12,x12 + rev x9,x9 + rev x13,x13 + rev x10,x10 + rev x14,x14 + rev x11,x11 + rev x15,x15 +#endif + + subs x8,x8,x12 + sbcs x9,x9,x13 + ldp x4,x5,[x3] + sbcs x10,x10,x14 + ldp x6,x7,[x3,#16] + sbcs x11,x11,x15 + sbc x3,xzr,xzr + + and x4,x4,x3 + and x5,x5,x3 + adds x8,x8,x4 + and x6,x6,x3 + adcs x9,x9,x5 + and x7,x7,x3 + adcs x10,x10,x6 + adc x11,x11,x7 + + orr x16, x8, x9 + orr x17, x10, x11 + orr x16, x16, x17 + +#ifdef __AARCH64EB__ + rev x8,x8 + rev x9,x9 + rev x10,x10 + rev x11,x11 +#endif + + stp x8,x9,[x0] + stp x10,x11,[x0,#16] + + mov x17, #1 + cmp x16, #0 + csel x0, x17, xzr, ne + + ret +.size sub_n_check_mod_256,.-sub_n_check_mod_256 diff --git a/crypto/blst_src/build/elf/add_mod_256-x86_64.s b/crypto/blst_src/build/elf/add_mod_256-x86_64.s new file mode 100644 index 00000000000..2f41781959c --- /dev/null +++ b/crypto/blst_src/build/elf/add_mod_256-x86_64.s @@ -0,0 +1,572 @@ +.text + +.globl add_mod_256 +.hidden add_mod_256 +.type add_mod_256,@function +.align 32 +add_mod_256: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + subq $8,%rsp +.cfi_adjust_cfa_offset 8 + + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + +.Loaded_a_add_mod_256: + addq 0(%rdx),%r8 + adcq 8(%rdx),%r9 + movq %r8,%rax + adcq 16(%rdx),%r10 + movq %r9,%rsi + adcq 24(%rdx),%r11 + sbbq %rdx,%rdx + + movq %r10,%rbx + subq 0(%rcx),%r8 + sbbq 8(%rcx),%r9 + sbbq 16(%rcx),%r10 + movq %r11,%rbp + sbbq 24(%rcx),%r11 + sbbq $0,%rdx + + cmovcq %rax,%r8 + cmovcq %rsi,%r9 + movq %r8,0(%rdi) + cmovcq %rbx,%r10 + movq %r9,8(%rdi) + cmovcq %rbp,%r11 + movq %r10,16(%rdi) + movq %r11,24(%rdi) + + movq 8(%rsp),%rbx +.cfi_restore %rbx + movq 16(%rsp),%rbp +.cfi_restore %rbp + leaq 24(%rsp),%rsp +.cfi_adjust_cfa_offset -24 + + .byte 0xf3,0xc3 +.cfi_endproc +.size add_mod_256,.-add_mod_256 + + +.globl mul_by_3_mod_256 +.hidden mul_by_3_mod_256 +.type mul_by_3_mod_256,@function +.align 32 +mul_by_3_mod_256: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + + + movq %rdx,%rcx + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq %rsi,%rdx + movq 24(%rsi),%r11 + + call __lshift_mod_256 + movq 0(%rsp),%r12 +.cfi_restore %r12 + jmp .Loaded_a_add_mod_256 + + movq 8(%rsp),%rbx +.cfi_restore %rbx + movq 16(%rsp),%rbp +.cfi_restore %rbp + leaq 24(%rsp),%rsp +.cfi_adjust_cfa_offset -24 + + .byte 0xf3,0xc3 +.cfi_endproc +.size mul_by_3_mod_256,.-mul_by_3_mod_256 + +.type __lshift_mod_256,@function +.align 32 +__lshift_mod_256: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + addq %r8,%r8 + adcq %r9,%r9 + movq %r8,%rax + adcq %r10,%r10 + movq %r9,%rsi + adcq %r11,%r11 + sbbq %r12,%r12 + + movq %r10,%rbx + subq 0(%rcx),%r8 + sbbq 8(%rcx),%r9 + sbbq 16(%rcx),%r10 + movq %r11,%rbp + sbbq 24(%rcx),%r11 + sbbq $0,%r12 + + cmovcq %rax,%r8 + cmovcq %rsi,%r9 + cmovcq %rbx,%r10 + cmovcq %rbp,%r11 + + .byte 0xf3,0xc3 +.cfi_endproc +.size __lshift_mod_256,.-__lshift_mod_256 + + +.globl lshift_mod_256 +.hidden lshift_mod_256 +.type lshift_mod_256,@function +.align 32 +lshift_mod_256: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + +.Loop_lshift_mod_256: + call __lshift_mod_256 + decl %edx + jnz .Loop_lshift_mod_256 + + movq %r8,0(%rdi) + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + + movq 0(%rsp),%r12 +.cfi_restore %r12 + movq 8(%rsp),%rbx +.cfi_restore %rbx + movq 16(%rsp),%rbp +.cfi_restore %rbp + leaq 24(%rsp),%rsp +.cfi_adjust_cfa_offset -24 + + .byte 0xf3,0xc3 +.cfi_endproc +.size lshift_mod_256,.-lshift_mod_256 + + +.globl rshift_mod_256 +.hidden rshift_mod_256 +.type rshift_mod_256,@function +.align 32 +rshift_mod_256: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + subq $8,%rsp +.cfi_adjust_cfa_offset 8 + + + movq 0(%rsi),%rbp + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + +.Loop_rshift_mod_256: + movq %rbp,%r8 + andq $1,%rbp + movq 0(%rcx),%rax + negq %rbp + movq 8(%rcx),%rsi + movq 16(%rcx),%rbx + + andq %rbp,%rax + andq %rbp,%rsi + andq %rbp,%rbx + andq 24(%rcx),%rbp + + addq %rax,%r8 + adcq %rsi,%r9 + adcq %rbx,%r10 + adcq %rbp,%r11 + sbbq %rax,%rax + + shrq $1,%r8 + movq %r9,%rbp + shrq $1,%r9 + movq %r10,%rbx + shrq $1,%r10 + movq %r11,%rsi + shrq $1,%r11 + + shlq $63,%rbp + shlq $63,%rbx + orq %r8,%rbp + shlq $63,%rsi + orq %rbx,%r9 + shlq $63,%rax + orq %rsi,%r10 + orq %rax,%r11 + + decl %edx + jnz .Loop_rshift_mod_256 + + movq %rbp,0(%rdi) + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + + movq 8(%rsp),%rbx +.cfi_restore %rbx + movq 16(%rsp),%rbp +.cfi_restore %rbp + leaq 24(%rsp),%rsp +.cfi_adjust_cfa_offset -24 + + .byte 0xf3,0xc3 +.cfi_endproc +.size rshift_mod_256,.-rshift_mod_256 + + +.globl cneg_mod_256 +.hidden cneg_mod_256 +.type cneg_mod_256,@function +.align 32 +cneg_mod_256: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + + + movq 0(%rsi),%r12 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq %r12,%r8 + movq 24(%rsi),%r11 + orq %r9,%r12 + orq %r10,%r12 + orq %r11,%r12 + movq $-1,%rbp + + movq 0(%rcx),%rax + cmovnzq %rbp,%r12 + movq 8(%rcx),%rsi + movq 16(%rcx),%rbx + andq %r12,%rax + movq 24(%rcx),%rbp + andq %r12,%rsi + andq %r12,%rbx + andq %r12,%rbp + + subq %r8,%rax + sbbq %r9,%rsi + sbbq %r10,%rbx + sbbq %r11,%rbp + + orq %rdx,%rdx + + cmovzq %r8,%rax + cmovzq %r9,%rsi + movq %rax,0(%rdi) + cmovzq %r10,%rbx + movq %rsi,8(%rdi) + cmovzq %r11,%rbp + movq %rbx,16(%rdi) + movq %rbp,24(%rdi) + + movq 0(%rsp),%r12 +.cfi_restore %r12 + movq 8(%rsp),%rbx +.cfi_restore %rbx + movq 16(%rsp),%rbp +.cfi_restore %rbp + leaq 24(%rsp),%rsp +.cfi_adjust_cfa_offset -24 + + .byte 0xf3,0xc3 +.cfi_endproc +.size cneg_mod_256,.-cneg_mod_256 + + +.globl sub_mod_256 +.hidden sub_mod_256 +.type sub_mod_256,@function +.align 32 +sub_mod_256: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + subq $8,%rsp +.cfi_adjust_cfa_offset 8 + + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + + subq 0(%rdx),%r8 + movq 0(%rcx),%rax + sbbq 8(%rdx),%r9 + movq 8(%rcx),%rsi + sbbq 16(%rdx),%r10 + movq 16(%rcx),%rbx + sbbq 24(%rdx),%r11 + movq 24(%rcx),%rbp + sbbq %rdx,%rdx + + andq %rdx,%rax + andq %rdx,%rsi + andq %rdx,%rbx + andq %rdx,%rbp + + addq %rax,%r8 + adcq %rsi,%r9 + movq %r8,0(%rdi) + adcq %rbx,%r10 + movq %r9,8(%rdi) + adcq %rbp,%r11 + movq %r10,16(%rdi) + movq %r11,24(%rdi) + + movq 8(%rsp),%rbx +.cfi_restore %rbx + movq 16(%rsp),%rbp +.cfi_restore %rbp + leaq 24(%rsp),%rsp +.cfi_adjust_cfa_offset -24 + + .byte 0xf3,0xc3 +.cfi_endproc +.size sub_mod_256,.-sub_mod_256 + + +.globl check_mod_256 +.hidden check_mod_256 +.type check_mod_256,@function +.align 32 +check_mod_256: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + + movq 0(%rdi),%rax + movq 8(%rdi),%r9 + movq 16(%rdi),%r10 + movq 24(%rdi),%r11 + + movq %rax,%r8 + orq %r9,%rax + orq %r10,%rax + orq %r11,%rax + + subq 0(%rsi),%r8 + sbbq 8(%rsi),%r9 + sbbq 16(%rsi),%r10 + sbbq 24(%rsi),%r11 + sbbq %rsi,%rsi + + movq $1,%rdx + cmpq $0,%rax + cmovneq %rdx,%rax + andq %rsi,%rax + + .byte 0xf3,0xc3 +.cfi_endproc +.size check_mod_256,.-check_mod_256 + + +.globl add_n_check_mod_256 +.hidden add_n_check_mod_256 +.type add_n_check_mod_256,@function +.align 32 +add_n_check_mod_256: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + subq $8,%rsp +.cfi_adjust_cfa_offset 8 + + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + + addq 0(%rdx),%r8 + adcq 8(%rdx),%r9 + movq %r8,%rax + adcq 16(%rdx),%r10 + movq %r9,%rsi + adcq 24(%rdx),%r11 + sbbq %rdx,%rdx + + movq %r10,%rbx + subq 0(%rcx),%r8 + sbbq 8(%rcx),%r9 + sbbq 16(%rcx),%r10 + movq %r11,%rbp + sbbq 24(%rcx),%r11 + sbbq $0,%rdx + + cmovcq %rax,%r8 + cmovcq %rsi,%r9 + movq %r8,0(%rdi) + cmovcq %rbx,%r10 + movq %r9,8(%rdi) + cmovcq %rbp,%r11 + movq %r10,16(%rdi) + movq %r11,24(%rdi) + + orq %r9,%r8 + orq %r11,%r10 + orq %r10,%r8 + movq $1,%rax + cmovzq %r8,%rax + + movq 8(%rsp),%rbx +.cfi_restore %rbx + movq 16(%rsp),%rbp +.cfi_restore %rbp + leaq 24(%rsp),%rsp +.cfi_adjust_cfa_offset -24 + + .byte 0xf3,0xc3 +.cfi_endproc +.size add_n_check_mod_256,.-add_n_check_mod_256 + + +.globl sub_n_check_mod_256 +.hidden sub_n_check_mod_256 +.type sub_n_check_mod_256,@function +.align 32 +sub_n_check_mod_256: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + subq $8,%rsp +.cfi_adjust_cfa_offset 8 + + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + + subq 0(%rdx),%r8 + movq 0(%rcx),%rax + sbbq 8(%rdx),%r9 + movq 8(%rcx),%rsi + sbbq 16(%rdx),%r10 + movq 16(%rcx),%rbx + sbbq 24(%rdx),%r11 + movq 24(%rcx),%rbp + sbbq %rdx,%rdx + + andq %rdx,%rax + andq %rdx,%rsi + andq %rdx,%rbx + andq %rdx,%rbp + + addq %rax,%r8 + adcq %rsi,%r9 + movq %r8,0(%rdi) + adcq %rbx,%r10 + movq %r9,8(%rdi) + adcq %rbp,%r11 + movq %r10,16(%rdi) + movq %r11,24(%rdi) + + orq %r9,%r8 + orq %r11,%r10 + orq %r10,%r8 + movq $1,%rax + cmovzq %r8,%rax + + movq 8(%rsp),%rbx +.cfi_restore %rbx + movq 16(%rsp),%rbp +.cfi_restore %rbp + leaq 24(%rsp),%rsp +.cfi_adjust_cfa_offset -24 + + .byte 0xf3,0xc3 +.cfi_endproc +.size sub_n_check_mod_256,.-sub_n_check_mod_256 + +.section .note.GNU-stack,"",@progbits +.section .note.gnu.property,"a",@note + .long 4,2f-1f,5 + .byte 0x47,0x4E,0x55,0 +1: .long 0xc0000002,4,3 +.align 8 +2: diff --git a/crypto/blst_src/build/elf/add_mod_384-armv8.S b/crypto/blst_src/build/elf/add_mod_384-armv8.S new file mode 100644 index 00000000000..5c18d7fe892 --- /dev/null +++ b/crypto/blst_src/build/elf/add_mod_384-armv8.S @@ -0,0 +1,1000 @@ +.text + +.globl add_mod_384 +.hidden add_mod_384 +.type add_mod_384,%function +.align 5 +add_mod_384: + .inst 0xd503233f + stp x29,x30,[sp,#-48]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + + ldp x4,x5,[x3] + ldp x6,x7,[x3,#16] + ldp x8,x9,[x3,#32] + + bl __add_mod_384 + ldr x30,[sp,#8] + + stp x10,x11,[x0] + stp x12,x13,[x0,#16] + stp x14,x15,[x0,#32] + + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldr x29,[sp],#48 + .inst 0xd50323bf + ret +.size add_mod_384,.-add_mod_384 + +.type __add_mod_384,%function +.align 5 +__add_mod_384: + ldp x10,x11,[x1] + ldp x16,x17,[x2] + ldp x12,x13,[x1,#16] + ldp x19,x20,[x2,#16] + ldp x14,x15,[x1,#32] + ldp x21,x22,[x2,#32] + +__add_mod_384_ab_are_loaded: + adds x10,x10,x16 + adcs x11,x11,x17 + adcs x12,x12,x19 + adcs x13,x13,x20 + adcs x14,x14,x21 + adcs x15,x15,x22 + adc x3,xzr,xzr + + subs x16,x10,x4 + sbcs x17,x11,x5 + sbcs x19,x12,x6 + sbcs x20,x13,x7 + sbcs x21,x14,x8 + sbcs x22,x15,x9 + sbcs xzr,x3,xzr + + csel x10,x10,x16,lo + csel x11,x11,x17,lo + csel x12,x12,x19,lo + csel x13,x13,x20,lo + csel x14,x14,x21,lo + csel x15,x15,x22,lo + + ret +.size __add_mod_384,.-__add_mod_384 + +.globl add_mod_384x +.hidden add_mod_384x +.type add_mod_384x,%function +.align 5 +add_mod_384x: + .inst 0xd503233f + stp x29,x30,[sp,#-48]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + + ldp x4,x5,[x3] + ldp x6,x7,[x3,#16] + ldp x8,x9,[x3,#32] + + bl __add_mod_384 + + stp x10,x11,[x0] + add x1,x1,#48 + stp x12,x13,[x0,#16] + add x2,x2,#48 + stp x14,x15,[x0,#32] + + bl __add_mod_384 + ldr x30,[sp,#8] + + stp x10,x11,[x0,#48] + stp x12,x13,[x0,#64] + stp x14,x15,[x0,#80] + + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldr x29,[sp],#48 + .inst 0xd50323bf + ret +.size add_mod_384x,.-add_mod_384x + +.globl rshift_mod_384 +.hidden rshift_mod_384 +.type rshift_mod_384,%function +.align 5 +rshift_mod_384: + .inst 0xd503233f + stp x29,x30,[sp,#-48]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + + ldp x10,x11,[x1] + ldp x12,x13,[x1,#16] + ldp x14,x15,[x1,#32] + + ldp x4,x5,[x3] + ldp x6,x7,[x3,#16] + ldp x8,x9,[x3,#32] + +.Loop_rshift_mod_384: + sub x2,x2,#1 + bl __rshift_mod_384 + cbnz x2,.Loop_rshift_mod_384 + + ldr x30,[sp,#8] + stp x10,x11,[x0] + stp x12,x13,[x0,#16] + stp x14,x15,[x0,#32] + + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldr x29,[sp],#48 + .inst 0xd50323bf + ret +.size rshift_mod_384,.-rshift_mod_384 + +.type __rshift_mod_384,%function +.align 5 +__rshift_mod_384: + sbfx x22,x10,#0,#1 + and x16,x22,x4 + and x17,x22,x5 + adds x10,x10,x16 + and x19,x22,x6 + adcs x11,x11,x17 + and x20,x22,x7 + adcs x12,x12,x19 + and x21,x22,x8 + adcs x13,x13,x20 + and x22,x22,x9 + adcs x14,x14,x21 + extr x10,x11,x10,#1 // a[0:5] >>= 1 + adcs x15,x15,x22 + extr x11,x12,x11,#1 + adc x22,xzr,xzr + extr x12,x13,x12,#1 + extr x13,x14,x13,#1 + extr x14,x15,x14,#1 + extr x15,x22,x15,#1 + ret +.size __rshift_mod_384,.-__rshift_mod_384 + +.globl div_by_2_mod_384 +.hidden div_by_2_mod_384 +.type div_by_2_mod_384,%function +.align 5 +div_by_2_mod_384: + .inst 0xd503233f + stp x29,x30,[sp,#-48]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + + ldp x10,x11,[x1] + ldp x12,x13,[x1,#16] + ldp x14,x15,[x1,#32] + + ldp x4,x5,[x2] + ldp x6,x7,[x2,#16] + ldp x8,x9,[x2,#32] + + bl __rshift_mod_384 + + ldr x30,[sp,#8] + stp x10,x11,[x0] + stp x12,x13,[x0,#16] + stp x14,x15,[x0,#32] + + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldr x29,[sp],#48 + .inst 0xd50323bf + ret +.size div_by_2_mod_384,.-div_by_2_mod_384 + +.globl lshift_mod_384 +.hidden lshift_mod_384 +.type lshift_mod_384,%function +.align 5 +lshift_mod_384: + .inst 0xd503233f + stp x29,x30,[sp,#-48]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + + ldp x10,x11,[x1] + ldp x12,x13,[x1,#16] + ldp x14,x15,[x1,#32] + + ldp x4,x5,[x3] + ldp x6,x7,[x3,#16] + ldp x8,x9,[x3,#32] + +.Loop_lshift_mod_384: + sub x2,x2,#1 + bl __lshift_mod_384 + cbnz x2,.Loop_lshift_mod_384 + + ldr x30,[sp,#8] + stp x10,x11,[x0] + stp x12,x13,[x0,#16] + stp x14,x15,[x0,#32] + + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldr x29,[sp],#48 + .inst 0xd50323bf + ret +.size lshift_mod_384,.-lshift_mod_384 + +.type __lshift_mod_384,%function +.align 5 +__lshift_mod_384: + adds x10,x10,x10 + adcs x11,x11,x11 + adcs x12,x12,x12 + adcs x13,x13,x13 + adcs x14,x14,x14 + adcs x15,x15,x15 + adc x3,xzr,xzr + + subs x16,x10,x4 + sbcs x17,x11,x5 + sbcs x19,x12,x6 + sbcs x20,x13,x7 + sbcs x21,x14,x8 + sbcs x22,x15,x9 + sbcs xzr,x3,xzr + + csel x10,x10,x16,lo + csel x11,x11,x17,lo + csel x12,x12,x19,lo + csel x13,x13,x20,lo + csel x14,x14,x21,lo + csel x15,x15,x22,lo + + ret +.size __lshift_mod_384,.-__lshift_mod_384 + +.globl mul_by_3_mod_384 +.hidden mul_by_3_mod_384 +.type mul_by_3_mod_384,%function +.align 5 +mul_by_3_mod_384: + .inst 0xd503233f + stp x29,x30,[sp,#-48]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + + ldp x10,x11,[x1] + ldp x12,x13,[x1,#16] + ldp x14,x15,[x1,#32] + + ldp x4,x5,[x2] + ldp x6,x7,[x2,#16] + ldp x8,x9,[x2,#32] + + bl __lshift_mod_384 + + ldp x16,x17,[x1] + ldp x19,x20,[x1,#16] + ldp x21,x22,[x1,#32] + + bl __add_mod_384_ab_are_loaded + ldr x30,[sp,#8] + + stp x10,x11,[x0] + stp x12,x13,[x0,#16] + stp x14,x15,[x0,#32] + + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldr x29,[sp],#48 + .inst 0xd50323bf + ret +.size mul_by_3_mod_384,.-mul_by_3_mod_384 + +.globl mul_by_8_mod_384 +.hidden mul_by_8_mod_384 +.type mul_by_8_mod_384,%function +.align 5 +mul_by_8_mod_384: + .inst 0xd503233f + stp x29,x30,[sp,#-48]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + + ldp x10,x11,[x1] + ldp x12,x13,[x1,#16] + ldp x14,x15,[x1,#32] + + ldp x4,x5,[x2] + ldp x6,x7,[x2,#16] + ldp x8,x9,[x2,#32] + + bl __lshift_mod_384 + bl __lshift_mod_384 + bl __lshift_mod_384 + ldr x30,[sp,#8] + + stp x10,x11,[x0] + stp x12,x13,[x0,#16] + stp x14,x15,[x0,#32] + + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldr x29,[sp],#48 + .inst 0xd50323bf + ret +.size mul_by_8_mod_384,.-mul_by_8_mod_384 + +.globl mul_by_3_mod_384x +.hidden mul_by_3_mod_384x +.type mul_by_3_mod_384x,%function +.align 5 +mul_by_3_mod_384x: + .inst 0xd503233f + stp x29,x30,[sp,#-48]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + + ldp x10,x11,[x1] + ldp x12,x13,[x1,#16] + ldp x14,x15,[x1,#32] + + ldp x4,x5,[x2] + ldp x6,x7,[x2,#16] + ldp x8,x9,[x2,#32] + + bl __lshift_mod_384 + + ldp x16,x17,[x1] + ldp x19,x20,[x1,#16] + ldp x21,x22,[x1,#32] + + bl __add_mod_384_ab_are_loaded + + stp x10,x11,[x0] + ldp x10,x11,[x1,#48] + stp x12,x13,[x0,#16] + ldp x12,x13,[x1,#64] + stp x14,x15,[x0,#32] + ldp x14,x15,[x1,#80] + + bl __lshift_mod_384 + + ldp x16,x17,[x1,#48] + ldp x19,x20,[x1,#64] + ldp x21,x22,[x1,#80] + + bl __add_mod_384_ab_are_loaded + ldr x30,[sp,#8] + + stp x10,x11,[x0,#48] + stp x12,x13,[x0,#64] + stp x14,x15,[x0,#80] + + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldr x29,[sp],#48 + .inst 0xd50323bf + ret +.size mul_by_3_mod_384x,.-mul_by_3_mod_384x + +.globl mul_by_8_mod_384x +.hidden mul_by_8_mod_384x +.type mul_by_8_mod_384x,%function +.align 5 +mul_by_8_mod_384x: + .inst 0xd503233f + stp x29,x30,[sp,#-48]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + + ldp x10,x11,[x1] + ldp x12,x13,[x1,#16] + ldp x14,x15,[x1,#32] + + ldp x4,x5,[x2] + ldp x6,x7,[x2,#16] + ldp x8,x9,[x2,#32] + + bl __lshift_mod_384 + bl __lshift_mod_384 + bl __lshift_mod_384 + + stp x10,x11,[x0] + ldp x10,x11,[x1,#48] + stp x12,x13,[x0,#16] + ldp x12,x13,[x1,#64] + stp x14,x15,[x0,#32] + ldp x14,x15,[x1,#80] + + bl __lshift_mod_384 + bl __lshift_mod_384 + bl __lshift_mod_384 + ldr x30,[sp,#8] + + stp x10,x11,[x0,#48] + stp x12,x13,[x0,#64] + stp x14,x15,[x0,#80] + + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldr x29,[sp],#48 + .inst 0xd50323bf + ret +.size mul_by_8_mod_384x,.-mul_by_8_mod_384x + +.globl cneg_mod_384 +.hidden cneg_mod_384 +.type cneg_mod_384,%function +.align 5 +cneg_mod_384: + .inst 0xd503233f + stp x29,x30,[sp,#-48]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + + ldp x10,x11,[x1] + ldp x4,x5,[x3] + ldp x12,x13,[x1,#16] + ldp x6,x7,[x3,#16] + + subs x16,x4,x10 + ldp x14,x15,[x1,#32] + ldp x8,x9,[x3,#32] + orr x3,x10,x11 + sbcs x17,x5,x11 + orr x3,x3,x12 + sbcs x19,x6,x12 + orr x3,x3,x13 + sbcs x20,x7,x13 + orr x3,x3,x14 + sbcs x21,x8,x14 + orr x3,x3,x15 + sbc x22,x9,x15 + + cmp x3,#0 + csetm x3,ne + ands x2,x2,x3 + + csel x10,x10,x16,eq + csel x11,x11,x17,eq + csel x12,x12,x19,eq + csel x13,x13,x20,eq + stp x10,x11,[x0] + csel x14,x14,x21,eq + stp x12,x13,[x0,#16] + csel x15,x15,x22,eq + stp x14,x15,[x0,#32] + + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldr x29,[sp],#48 + .inst 0xd50323bf + ret +.size cneg_mod_384,.-cneg_mod_384 + +.globl sub_mod_384 +.hidden sub_mod_384 +.type sub_mod_384,%function +.align 5 +sub_mod_384: + .inst 0xd503233f + stp x29,x30,[sp,#-48]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + + ldp x4,x5,[x3] + ldp x6,x7,[x3,#16] + ldp x8,x9,[x3,#32] + + bl __sub_mod_384 + ldr x30,[sp,#8] + + stp x10,x11,[x0] + stp x12,x13,[x0,#16] + stp x14,x15,[x0,#32] + + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldr x29,[sp],#48 + .inst 0xd50323bf + ret +.size sub_mod_384,.-sub_mod_384 + +.type __sub_mod_384,%function +.align 5 +__sub_mod_384: + ldp x10,x11,[x1] + ldp x16,x17,[x2] + ldp x12,x13,[x1,#16] + ldp x19,x20,[x2,#16] + ldp x14,x15,[x1,#32] + ldp x21,x22,[x2,#32] + + subs x10,x10,x16 + sbcs x11,x11,x17 + sbcs x12,x12,x19 + sbcs x13,x13,x20 + sbcs x14,x14,x21 + sbcs x15,x15,x22 + sbc x3,xzr,xzr + + and x16,x4,x3 + and x17,x5,x3 + adds x10,x10,x16 + and x19,x6,x3 + adcs x11,x11,x17 + and x20,x7,x3 + adcs x12,x12,x19 + and x21,x8,x3 + adcs x13,x13,x20 + and x22,x9,x3 + adcs x14,x14,x21 + adc x15,x15,x22 + + ret +.size __sub_mod_384,.-__sub_mod_384 + +.globl sub_mod_384x +.hidden sub_mod_384x +.type sub_mod_384x,%function +.align 5 +sub_mod_384x: + .inst 0xd503233f + stp x29,x30,[sp,#-48]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + + ldp x4,x5,[x3] + ldp x6,x7,[x3,#16] + ldp x8,x9,[x3,#32] + + bl __sub_mod_384 + + stp x10,x11,[x0] + add x1,x1,#48 + stp x12,x13,[x0,#16] + add x2,x2,#48 + stp x14,x15,[x0,#32] + + bl __sub_mod_384 + ldr x30,[sp,#8] + + stp x10,x11,[x0,#48] + stp x12,x13,[x0,#64] + stp x14,x15,[x0,#80] + + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldr x29,[sp],#48 + .inst 0xd50323bf + ret +.size sub_mod_384x,.-sub_mod_384x + +.globl mul_by_1_plus_i_mod_384x +.hidden mul_by_1_plus_i_mod_384x +.type mul_by_1_plus_i_mod_384x,%function +.align 5 +mul_by_1_plus_i_mod_384x: + .inst 0xd503233f + stp x29,x30,[sp,#-48]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + + ldp x4,x5,[x2] + ldp x6,x7,[x2,#16] + ldp x8,x9,[x2,#32] + add x2,x1,#48 + + bl __sub_mod_384 // a->re - a->im + + ldp x16,x17,[x1] + ldp x19,x20,[x1,#16] + ldp x21,x22,[x1,#32] + stp x10,x11,[x0] + ldp x10,x11,[x1,#48] + stp x12,x13,[x0,#16] + ldp x12,x13,[x1,#64] + stp x14,x15,[x0,#32] + ldp x14,x15,[x1,#80] + + bl __add_mod_384_ab_are_loaded // a->re + a->im + ldr x30,[sp,#8] + + stp x10,x11,[x0,#48] + stp x12,x13,[x0,#64] + stp x14,x15,[x0,#80] + + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldr x29,[sp],#48 + .inst 0xd50323bf + ret +.size mul_by_1_plus_i_mod_384x,.-mul_by_1_plus_i_mod_384x + +.globl sgn0_pty_mod_384 +.hidden sgn0_pty_mod_384 +.type sgn0_pty_mod_384,%function +.align 5 +sgn0_pty_mod_384: + ldp x10,x11,[x0] + ldp x12,x13,[x0,#16] + ldp x14,x15,[x0,#32] + + ldp x4,x5,[x1] + ldp x6,x7,[x1,#16] + ldp x8,x9,[x1,#32] + + and x0,x10,#1 + adds x10,x10,x10 + adcs x11,x11,x11 + adcs x12,x12,x12 + adcs x13,x13,x13 + adcs x14,x14,x14 + adcs x15,x15,x15 + adc x3,xzr,xzr + + subs x10,x10,x4 + sbcs x11,x11,x5 + sbcs x12,x12,x6 + sbcs x13,x13,x7 + sbcs x14,x14,x8 + sbcs x15,x15,x9 + sbc x3,x3,xzr + + mvn x3,x3 + and x3,x3,#2 + orr x0,x0,x3 + + ret +.size sgn0_pty_mod_384,.-sgn0_pty_mod_384 + +.globl sgn0_pty_mod_384x +.hidden sgn0_pty_mod_384x +.type sgn0_pty_mod_384x,%function +.align 5 +sgn0_pty_mod_384x: + ldp x10,x11,[x0] + ldp x12,x13,[x0,#16] + ldp x14,x15,[x0,#32] + + ldp x4,x5,[x1] + ldp x6,x7,[x1,#16] + ldp x8,x9,[x1,#32] + + and x2,x10,#1 + orr x3,x10,x11 + adds x10,x10,x10 + orr x3,x3,x12 + adcs x11,x11,x11 + orr x3,x3,x13 + adcs x12,x12,x12 + orr x3,x3,x14 + adcs x13,x13,x13 + orr x3,x3,x15 + adcs x14,x14,x14 + adcs x15,x15,x15 + adc x16,xzr,xzr + + subs x10,x10,x4 + sbcs x11,x11,x5 + sbcs x12,x12,x6 + sbcs x13,x13,x7 + sbcs x14,x14,x8 + sbcs x15,x15,x9 + sbc x16,x16,xzr + + ldp x10,x11,[x0,#48] + ldp x12,x13,[x0,#64] + ldp x14,x15,[x0,#80] + + mvn x16,x16 + and x16,x16,#2 + orr x2,x2,x16 + + and x0,x10,#1 + orr x1,x10,x11 + adds x10,x10,x10 + orr x1,x1,x12 + adcs x11,x11,x11 + orr x1,x1,x13 + adcs x12,x12,x12 + orr x1,x1,x14 + adcs x13,x13,x13 + orr x1,x1,x15 + adcs x14,x14,x14 + adcs x15,x15,x15 + adc x16,xzr,xzr + + subs x10,x10,x4 + sbcs x11,x11,x5 + sbcs x12,x12,x6 + sbcs x13,x13,x7 + sbcs x14,x14,x8 + sbcs x15,x15,x9 + sbc x16,x16,xzr + + mvn x16,x16 + and x16,x16,#2 + orr x0,x0,x16 + + cmp x3,#0 + csel x3,x0,x2,eq // a->re==0? prty(a->im) : prty(a->re) + + cmp x1,#0 + csel x1,x0,x2,ne // a->im!=0? sgn0(a->im) : sgn0(a->re) + + and x3,x3,#1 + and x1,x1,#2 + orr x0,x1,x3 // pack sign and parity + + ret +.size sgn0_pty_mod_384x,.-sgn0_pty_mod_384x +.globl vec_select_32 +.hidden vec_select_32 +.type vec_select_32,%function +.align 5 +vec_select_32: + dup v6.2d, x3 + ld1 {v0.2d, v1.2d, v2.2d}, [x1],#48 + cmeq v6.2d, v6.2d, #0 + ld1 {v3.2d, v4.2d, v5.2d}, [x2],#48 + bit v0.16b, v3.16b, v6.16b + bit v1.16b, v4.16b, v6.16b + bit v2.16b, v5.16b, v6.16b + st1 {v0.2d, v1.2d, v2.2d}, [x0] + ret +.size vec_select_32,.-vec_select_32 +.globl vec_select_48 +.hidden vec_select_48 +.type vec_select_48,%function +.align 5 +vec_select_48: + dup v6.2d, x3 + ld1 {v0.2d, v1.2d, v2.2d}, [x1],#48 + cmeq v6.2d, v6.2d, #0 + ld1 {v3.2d, v4.2d, v5.2d}, [x2],#48 + bit v0.16b, v3.16b, v6.16b + bit v1.16b, v4.16b, v6.16b + bit v2.16b, v5.16b, v6.16b + st1 {v0.2d, v1.2d, v2.2d}, [x0] + ret +.size vec_select_48,.-vec_select_48 +.globl vec_select_96 +.hidden vec_select_96 +.type vec_select_96,%function +.align 5 +vec_select_96: + dup v6.2d, x3 + ld1 {v0.2d, v1.2d, v2.2d}, [x1],#48 + cmeq v6.2d, v6.2d, #0 + ld1 {v3.2d, v4.2d, v5.2d}, [x2],#48 + bit v0.16b, v3.16b, v6.16b + ld1 {v16.2d, v17.2d, v18.2d}, [x1],#48 + bit v1.16b, v4.16b, v6.16b + ld1 {v19.2d, v20.2d, v21.2d}, [x2],#48 + bit v2.16b, v5.16b, v6.16b + st1 {v0.2d, v1.2d, v2.2d}, [x0],#48 + bit v16.16b, v19.16b, v6.16b + bit v17.16b, v20.16b, v6.16b + bit v18.16b, v21.16b, v6.16b + st1 {v16.2d, v17.2d, v18.2d}, [x0] + ret +.size vec_select_96,.-vec_select_96 +.globl vec_select_192 +.hidden vec_select_192 +.type vec_select_192,%function +.align 5 +vec_select_192: + dup v6.2d, x3 + ld1 {v0.2d, v1.2d, v2.2d}, [x1],#48 + cmeq v6.2d, v6.2d, #0 + ld1 {v3.2d, v4.2d, v5.2d}, [x2],#48 + bit v0.16b, v3.16b, v6.16b + ld1 {v16.2d, v17.2d, v18.2d}, [x1],#48 + bit v1.16b, v4.16b, v6.16b + ld1 {v19.2d, v20.2d, v21.2d}, [x2],#48 + bit v2.16b, v5.16b, v6.16b + st1 {v0.2d, v1.2d, v2.2d}, [x0],#48 + bit v16.16b, v19.16b, v6.16b + ld1 {v0.2d, v1.2d, v2.2d}, [x1],#48 + bit v17.16b, v20.16b, v6.16b + ld1 {v3.2d, v4.2d, v5.2d}, [x2],#48 + bit v18.16b, v21.16b, v6.16b + st1 {v16.2d, v17.2d, v18.2d}, [x0],#48 + bit v0.16b, v3.16b, v6.16b + ld1 {v16.2d, v17.2d, v18.2d}, [x1],#48 + bit v1.16b, v4.16b, v6.16b + ld1 {v19.2d, v20.2d, v21.2d}, [x2],#48 + bit v2.16b, v5.16b, v6.16b + st1 {v0.2d, v1.2d, v2.2d}, [x0],#48 + bit v16.16b, v19.16b, v6.16b + bit v17.16b, v20.16b, v6.16b + bit v18.16b, v21.16b, v6.16b + st1 {v16.2d, v17.2d, v18.2d}, [x0] + ret +.size vec_select_192,.-vec_select_192 +.globl vec_select_144 +.hidden vec_select_144 +.type vec_select_144,%function +.align 5 +vec_select_144: + dup v6.2d, x3 + ld1 {v0.2d, v1.2d, v2.2d}, [x1],#48 + cmeq v6.2d, v6.2d, #0 + ld1 {v3.2d, v4.2d, v5.2d}, [x2],#48 + bit v0.16b, v3.16b, v6.16b + ld1 {v16.2d, v17.2d, v18.2d}, [x1],#48 + bit v1.16b, v4.16b, v6.16b + ld1 {v19.2d, v20.2d, v21.2d}, [x2],#48 + bit v2.16b, v5.16b, v6.16b + st1 {v0.2d, v1.2d, v2.2d}, [x0],#48 + bit v16.16b, v19.16b, v6.16b + ld1 {v0.2d, v1.2d, v2.2d}, [x1],#48 + bit v17.16b, v20.16b, v6.16b + ld1 {v3.2d, v4.2d, v5.2d}, [x2],#48 + bit v18.16b, v21.16b, v6.16b + st1 {v16.2d, v17.2d, v18.2d}, [x0],#48 + bit v0.16b, v3.16b, v6.16b + bit v1.16b, v4.16b, v6.16b + bit v2.16b, v5.16b, v6.16b + st1 {v0.2d, v1.2d, v2.2d}, [x0] + ret +.size vec_select_144,.-vec_select_144 +.globl vec_select_288 +.hidden vec_select_288 +.type vec_select_288,%function +.align 5 +vec_select_288: + dup v6.2d, x3 + ld1 {v0.2d, v1.2d, v2.2d}, [x1],#48 + cmeq v6.2d, v6.2d, #0 + ld1 {v3.2d, v4.2d, v5.2d}, [x2],#48 + bit v0.16b, v3.16b, v6.16b + ld1 {v16.2d, v17.2d, v18.2d}, [x1],#48 + bit v1.16b, v4.16b, v6.16b + ld1 {v19.2d, v20.2d, v21.2d}, [x2],#48 + bit v2.16b, v5.16b, v6.16b + st1 {v0.2d, v1.2d, v2.2d}, [x0],#48 + bit v16.16b, v19.16b, v6.16b + ld1 {v0.2d, v1.2d, v2.2d}, [x1],#48 + bit v17.16b, v20.16b, v6.16b + ld1 {v3.2d, v4.2d, v5.2d}, [x2],#48 + bit v18.16b, v21.16b, v6.16b + st1 {v16.2d, v17.2d, v18.2d}, [x0],#48 + bit v0.16b, v3.16b, v6.16b + ld1 {v16.2d, v17.2d, v18.2d}, [x1],#48 + bit v1.16b, v4.16b, v6.16b + ld1 {v19.2d, v20.2d, v21.2d}, [x2],#48 + bit v2.16b, v5.16b, v6.16b + st1 {v0.2d, v1.2d, v2.2d}, [x0],#48 + bit v16.16b, v19.16b, v6.16b + ld1 {v0.2d, v1.2d, v2.2d}, [x1],#48 + bit v17.16b, v20.16b, v6.16b + ld1 {v3.2d, v4.2d, v5.2d}, [x2],#48 + bit v18.16b, v21.16b, v6.16b + st1 {v16.2d, v17.2d, v18.2d}, [x0],#48 + bit v0.16b, v3.16b, v6.16b + ld1 {v16.2d, v17.2d, v18.2d}, [x1],#48 + bit v1.16b, v4.16b, v6.16b + ld1 {v19.2d, v20.2d, v21.2d}, [x2],#48 + bit v2.16b, v5.16b, v6.16b + st1 {v0.2d, v1.2d, v2.2d}, [x0],#48 + bit v16.16b, v19.16b, v6.16b + bit v17.16b, v20.16b, v6.16b + bit v18.16b, v21.16b, v6.16b + st1 {v16.2d, v17.2d, v18.2d}, [x0] + ret +.size vec_select_288,.-vec_select_288 +.globl vec_prefetch +.hidden vec_prefetch +.type vec_prefetch,%function +.align 5 +vec_prefetch: + add x1, x1, x0 + sub x1, x1, #1 + mov x2, #64 + prfm pldl1keep, [x0] + add x0, x0, x2 + cmp x0, x1 + csel x0, x1, x0, hi + csel x2, xzr, x2, hi + prfm pldl1keep, [x0] + add x0, x0, x2 + cmp x0, x1 + csel x0, x1, x0, hi + csel x2, xzr, x2, hi + prfm pldl1keep, [x0] + add x0, x0, x2 + cmp x0, x1 + csel x0, x1, x0, hi + csel x2, xzr, x2, hi + prfm pldl1keep, [x0] + add x0, x0, x2 + cmp x0, x1 + csel x0, x1, x0, hi + csel x2, xzr, x2, hi + prfm pldl1keep, [x0] + add x0, x0, x2 + cmp x0, x1 + csel x0, x1, x0, hi + csel x2, xzr, x2, hi + prfm pldl1keep, [x0] + add x0, x0, x2 + cmp x0, x1 + csel x0, x1, x0, hi + prfm pldl1keep, [x0] + ret +.size vec_prefetch,.-vec_prefetch +.globl vec_is_zero_16x +.hidden vec_is_zero_16x +.type vec_is_zero_16x,%function +.align 5 +vec_is_zero_16x: + ld1 {v0.2d}, [x0], #16 + lsr x1, x1, #4 + sub x1, x1, #1 + cbz x1, .Loop_is_zero_done + +.Loop_is_zero: + ld1 {v1.2d}, [x0], #16 + orr v0.16b, v0.16b, v1.16b + sub x1, x1, #1 + cbnz x1, .Loop_is_zero + +.Loop_is_zero_done: + dup v1.2d, v0.d[1] + orr v0.16b, v0.16b, v1.16b + mov x1, v0.d[0] + mov x0, #1 + cmp x1, #0 + csel x0, x0, xzr, eq + ret +.size vec_is_zero_16x,.-vec_is_zero_16x +.globl vec_is_equal_16x +.hidden vec_is_equal_16x +.type vec_is_equal_16x,%function +.align 5 +vec_is_equal_16x: + ld1 {v0.2d}, [x0], #16 + ld1 {v1.2d}, [x1], #16 + lsr x2, x2, #4 + eor v0.16b, v0.16b, v1.16b + +.Loop_is_equal: + sub x2, x2, #1 + cbz x2, .Loop_is_equal_done + ld1 {v1.2d}, [x0], #16 + ld1 {v2.2d}, [x1], #16 + eor v1.16b, v1.16b, v2.16b + orr v0.16b, v0.16b, v1.16b + b .Loop_is_equal + nop + +.Loop_is_equal_done: + dup v1.2d, v0.d[1] + orr v0.16b, v0.16b, v1.16b + mov x1, v0.d[0] + mov x0, #1 + cmp x1, #0 + csel x0, x0, xzr, eq + ret +.size vec_is_equal_16x,.-vec_is_equal_16x diff --git a/crypto/blst_src/build/elf/add_mod_384-x86_64.s b/crypto/blst_src/build/elf/add_mod_384-x86_64.s new file mode 100644 index 00000000000..39eee6d1752 --- /dev/null +++ b/crypto/blst_src/build/elf/add_mod_384-x86_64.s @@ -0,0 +1,1907 @@ +.text + +.globl add_mod_384 +.hidden add_mod_384 +.type add_mod_384,@function +.align 32 +add_mod_384: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + subq $8,%rsp +.cfi_adjust_cfa_offset 8 + + + call __add_mod_384 + + movq 8(%rsp),%r15 +.cfi_restore %r15 + movq 16(%rsp),%r14 +.cfi_restore %r14 + movq 24(%rsp),%r13 +.cfi_restore %r13 + movq 32(%rsp),%r12 +.cfi_restore %r12 + movq 40(%rsp),%rbx +.cfi_restore %rbx + movq 48(%rsp),%rbp +.cfi_restore %rbp + leaq 56(%rsp),%rsp +.cfi_adjust_cfa_offset -56 + + .byte 0xf3,0xc3 +.cfi_endproc +.size add_mod_384,.-add_mod_384 + +.type __add_mod_384,@function +.align 32 +__add_mod_384: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + +__add_mod_384_a_is_loaded: + addq 0(%rdx),%r8 + adcq 8(%rdx),%r9 + adcq 16(%rdx),%r10 + movq %r8,%r14 + adcq 24(%rdx),%r11 + movq %r9,%r15 + adcq 32(%rdx),%r12 + movq %r10,%rax + adcq 40(%rdx),%r13 + movq %r11,%rbx + sbbq %rdx,%rdx + + subq 0(%rcx),%r8 + sbbq 8(%rcx),%r9 + movq %r12,%rbp + sbbq 16(%rcx),%r10 + sbbq 24(%rcx),%r11 + sbbq 32(%rcx),%r12 + movq %r13,%rsi + sbbq 40(%rcx),%r13 + sbbq $0,%rdx + + cmovcq %r14,%r8 + cmovcq %r15,%r9 + cmovcq %rax,%r10 + movq %r8,0(%rdi) + cmovcq %rbx,%r11 + movq %r9,8(%rdi) + cmovcq %rbp,%r12 + movq %r10,16(%rdi) + cmovcq %rsi,%r13 + movq %r11,24(%rdi) + movq %r12,32(%rdi) + movq %r13,40(%rdi) + + .byte 0xf3,0xc3 +.cfi_endproc +.size __add_mod_384,.-__add_mod_384 + +.globl add_mod_384x +.hidden add_mod_384x +.type add_mod_384x,@function +.align 32 +add_mod_384x: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + subq $24,%rsp +.cfi_adjust_cfa_offset 24 + + + movq %rsi,0(%rsp) + movq %rdx,8(%rsp) + leaq 48(%rsi),%rsi + leaq 48(%rdx),%rdx + leaq 48(%rdi),%rdi + call __add_mod_384 + + movq 0(%rsp),%rsi + movq 8(%rsp),%rdx + leaq -48(%rdi),%rdi + call __add_mod_384 + + movq 24+0(%rsp),%r15 +.cfi_restore %r15 + movq 24+8(%rsp),%r14 +.cfi_restore %r14 + movq 24+16(%rsp),%r13 +.cfi_restore %r13 + movq 24+24(%rsp),%r12 +.cfi_restore %r12 + movq 24+32(%rsp),%rbx +.cfi_restore %rbx + movq 24+40(%rsp),%rbp +.cfi_restore %rbp + leaq 24+48(%rsp),%rsp +.cfi_adjust_cfa_offset -24-8*6 + + .byte 0xf3,0xc3 +.cfi_endproc +.size add_mod_384x,.-add_mod_384x + + +.globl rshift_mod_384 +.hidden rshift_mod_384 +.type rshift_mod_384,@function +.align 32 +rshift_mod_384: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + pushq %rdi +.cfi_adjust_cfa_offset 8 + + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + +.Loop_rshift_mod_384: + call __rshift_mod_384 + decl %edx + jnz .Loop_rshift_mod_384 + + movq %r8,0(%rdi) + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + movq %r12,32(%rdi) + movq %r13,40(%rdi) + + movq 8(%rsp),%r15 +.cfi_restore %r15 + movq 16(%rsp),%r14 +.cfi_restore %r14 + movq 24(%rsp),%r13 +.cfi_restore %r13 + movq 32(%rsp),%r12 +.cfi_restore %r12 + movq 40(%rsp),%rbx +.cfi_restore %rbx + movq 48(%rsp),%rbp +.cfi_restore %rbp + leaq 56(%rsp),%rsp +.cfi_adjust_cfa_offset -56 + + .byte 0xf3,0xc3 +.cfi_endproc +.size rshift_mod_384,.-rshift_mod_384 + +.type __rshift_mod_384,@function +.align 32 +__rshift_mod_384: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + movq $1,%rsi + movq 0(%rcx),%r14 + andq %r8,%rsi + movq 8(%rcx),%r15 + negq %rsi + movq 16(%rcx),%rax + andq %rsi,%r14 + movq 24(%rcx),%rbx + andq %rsi,%r15 + movq 32(%rcx),%rbp + andq %rsi,%rax + andq %rsi,%rbx + andq %rsi,%rbp + andq 40(%rcx),%rsi + + addq %r8,%r14 + adcq %r9,%r15 + adcq %r10,%rax + adcq %r11,%rbx + adcq %r12,%rbp + adcq %r13,%rsi + sbbq %r13,%r13 + + shrq $1,%r14 + movq %r15,%r8 + shrq $1,%r15 + movq %rax,%r9 + shrq $1,%rax + movq %rbx,%r10 + shrq $1,%rbx + movq %rbp,%r11 + shrq $1,%rbp + movq %rsi,%r12 + shrq $1,%rsi + shlq $63,%r8 + shlq $63,%r9 + orq %r14,%r8 + shlq $63,%r10 + orq %r15,%r9 + shlq $63,%r11 + orq %rax,%r10 + shlq $63,%r12 + orq %rbx,%r11 + shlq $63,%r13 + orq %rbp,%r12 + orq %rsi,%r13 + + .byte 0xf3,0xc3 +.cfi_endproc +.size __rshift_mod_384,.-__rshift_mod_384 + +.globl div_by_2_mod_384 +.hidden div_by_2_mod_384 +.type div_by_2_mod_384,@function +.align 32 +div_by_2_mod_384: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + pushq %rdi +.cfi_adjust_cfa_offset 8 + + + movq 0(%rsi),%r8 + movq %rdx,%rcx + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + + call __rshift_mod_384 + + movq %r8,0(%rdi) + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + movq %r12,32(%rdi) + movq %r13,40(%rdi) + + movq 8(%rsp),%r15 +.cfi_restore %r15 + movq 16(%rsp),%r14 +.cfi_restore %r14 + movq 24(%rsp),%r13 +.cfi_restore %r13 + movq 32(%rsp),%r12 +.cfi_restore %r12 + movq 40(%rsp),%rbx +.cfi_restore %rbx + movq 48(%rsp),%rbp +.cfi_restore %rbp + leaq 56(%rsp),%rsp +.cfi_adjust_cfa_offset -56 + + .byte 0xf3,0xc3 +.cfi_endproc +.size div_by_2_mod_384,.-div_by_2_mod_384 + + +.globl lshift_mod_384 +.hidden lshift_mod_384 +.type lshift_mod_384,@function +.align 32 +lshift_mod_384: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + pushq %rdi +.cfi_adjust_cfa_offset 8 + + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + +.Loop_lshift_mod_384: + addq %r8,%r8 + adcq %r9,%r9 + adcq %r10,%r10 + movq %r8,%r14 + adcq %r11,%r11 + movq %r9,%r15 + adcq %r12,%r12 + movq %r10,%rax + adcq %r13,%r13 + movq %r11,%rbx + sbbq %rdi,%rdi + + subq 0(%rcx),%r8 + sbbq 8(%rcx),%r9 + movq %r12,%rbp + sbbq 16(%rcx),%r10 + sbbq 24(%rcx),%r11 + sbbq 32(%rcx),%r12 + movq %r13,%rsi + sbbq 40(%rcx),%r13 + sbbq $0,%rdi + + movq (%rsp),%rdi + cmovcq %r14,%r8 + cmovcq %r15,%r9 + cmovcq %rax,%r10 + cmovcq %rbx,%r11 + cmovcq %rbp,%r12 + cmovcq %rsi,%r13 + + decl %edx + jnz .Loop_lshift_mod_384 + + movq %r8,0(%rdi) + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + movq %r12,32(%rdi) + movq %r13,40(%rdi) + + movq 8(%rsp),%r15 +.cfi_restore %r15 + movq 16(%rsp),%r14 +.cfi_restore %r14 + movq 24(%rsp),%r13 +.cfi_restore %r13 + movq 32(%rsp),%r12 +.cfi_restore %r12 + movq 40(%rsp),%rbx +.cfi_restore %rbx + movq 48(%rsp),%rbp +.cfi_restore %rbp + leaq 56(%rsp),%rsp +.cfi_adjust_cfa_offset -56 + + .byte 0xf3,0xc3 +.cfi_endproc +.size lshift_mod_384,.-lshift_mod_384 + +.type __lshift_mod_384,@function +.align 32 +__lshift_mod_384: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + addq %r8,%r8 + adcq %r9,%r9 + adcq %r10,%r10 + movq %r8,%r14 + adcq %r11,%r11 + movq %r9,%r15 + adcq %r12,%r12 + movq %r10,%rax + adcq %r13,%r13 + movq %r11,%rbx + sbbq %rdx,%rdx + + subq 0(%rcx),%r8 + sbbq 8(%rcx),%r9 + movq %r12,%rbp + sbbq 16(%rcx),%r10 + sbbq 24(%rcx),%r11 + sbbq 32(%rcx),%r12 + movq %r13,%rsi + sbbq 40(%rcx),%r13 + sbbq $0,%rdx + + cmovcq %r14,%r8 + cmovcq %r15,%r9 + cmovcq %rax,%r10 + cmovcq %rbx,%r11 + cmovcq %rbp,%r12 + cmovcq %rsi,%r13 + + .byte 0xf3,0xc3 +.cfi_endproc +.size __lshift_mod_384,.-__lshift_mod_384 + + +.globl mul_by_3_mod_384 +.hidden mul_by_3_mod_384 +.type mul_by_3_mod_384,@function +.align 32 +mul_by_3_mod_384: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + pushq %rsi +.cfi_adjust_cfa_offset 8 + + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + movq %rdx,%rcx + + call __lshift_mod_384 + + movq (%rsp),%rdx + call __add_mod_384_a_is_loaded + + movq 8(%rsp),%r15 +.cfi_restore %r15 + movq 16(%rsp),%r14 +.cfi_restore %r14 + movq 24(%rsp),%r13 +.cfi_restore %r13 + movq 32(%rsp),%r12 +.cfi_restore %r12 + movq 40(%rsp),%rbx +.cfi_restore %rbx + movq 48(%rsp),%rbp +.cfi_restore %rbp + leaq 56(%rsp),%rsp +.cfi_adjust_cfa_offset -56 + + .byte 0xf3,0xc3 +.cfi_endproc +.size mul_by_3_mod_384,.-mul_by_3_mod_384 + +.globl mul_by_8_mod_384 +.hidden mul_by_8_mod_384 +.type mul_by_8_mod_384,@function +.align 32 +mul_by_8_mod_384: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + subq $8,%rsp +.cfi_adjust_cfa_offset 8 + + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + movq %rdx,%rcx + + call __lshift_mod_384 + call __lshift_mod_384 + call __lshift_mod_384 + + movq %r8,0(%rdi) + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + movq %r12,32(%rdi) + movq %r13,40(%rdi) + + movq 8(%rsp),%r15 +.cfi_restore %r15 + movq 16(%rsp),%r14 +.cfi_restore %r14 + movq 24(%rsp),%r13 +.cfi_restore %r13 + movq 32(%rsp),%r12 +.cfi_restore %r12 + movq 40(%rsp),%rbx +.cfi_restore %rbx + movq 48(%rsp),%rbp +.cfi_restore %rbp + leaq 56(%rsp),%rsp +.cfi_adjust_cfa_offset -56 + + .byte 0xf3,0xc3 +.cfi_endproc +.size mul_by_8_mod_384,.-mul_by_8_mod_384 + + +.globl mul_by_3_mod_384x +.hidden mul_by_3_mod_384x +.type mul_by_3_mod_384x,@function +.align 32 +mul_by_3_mod_384x: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + pushq %rsi +.cfi_adjust_cfa_offset 8 + + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + movq %rdx,%rcx + + call __lshift_mod_384 + + movq (%rsp),%rdx + call __add_mod_384_a_is_loaded + + movq (%rsp),%rsi + leaq 48(%rdi),%rdi + + movq 48(%rsi),%r8 + movq 56(%rsi),%r9 + movq 64(%rsi),%r10 + movq 72(%rsi),%r11 + movq 80(%rsi),%r12 + movq 88(%rsi),%r13 + + call __lshift_mod_384 + + movq $48,%rdx + addq (%rsp),%rdx + call __add_mod_384_a_is_loaded + + movq 8(%rsp),%r15 +.cfi_restore %r15 + movq 16(%rsp),%r14 +.cfi_restore %r14 + movq 24(%rsp),%r13 +.cfi_restore %r13 + movq 32(%rsp),%r12 +.cfi_restore %r12 + movq 40(%rsp),%rbx +.cfi_restore %rbx + movq 48(%rsp),%rbp +.cfi_restore %rbp + leaq 56(%rsp),%rsp +.cfi_adjust_cfa_offset -56 + + .byte 0xf3,0xc3 +.cfi_endproc +.size mul_by_3_mod_384x,.-mul_by_3_mod_384x + +.globl mul_by_8_mod_384x +.hidden mul_by_8_mod_384x +.type mul_by_8_mod_384x,@function +.align 32 +mul_by_8_mod_384x: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + pushq %rsi +.cfi_adjust_cfa_offset 8 + + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + movq %rdx,%rcx + + call __lshift_mod_384 + call __lshift_mod_384 + call __lshift_mod_384 + + movq (%rsp),%rsi + movq %r8,0(%rdi) + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + movq %r12,32(%rdi) + movq %r13,40(%rdi) + + movq 48+0(%rsi),%r8 + movq 48+8(%rsi),%r9 + movq 48+16(%rsi),%r10 + movq 48+24(%rsi),%r11 + movq 48+32(%rsi),%r12 + movq 48+40(%rsi),%r13 + + call __lshift_mod_384 + call __lshift_mod_384 + call __lshift_mod_384 + + movq %r8,48+0(%rdi) + movq %r9,48+8(%rdi) + movq %r10,48+16(%rdi) + movq %r11,48+24(%rdi) + movq %r12,48+32(%rdi) + movq %r13,48+40(%rdi) + + movq 8(%rsp),%r15 +.cfi_restore %r15 + movq 16(%rsp),%r14 +.cfi_restore %r14 + movq 24(%rsp),%r13 +.cfi_restore %r13 + movq 32(%rsp),%r12 +.cfi_restore %r12 + movq 40(%rsp),%rbx +.cfi_restore %rbx + movq 48(%rsp),%rbp +.cfi_restore %rbp + leaq 56(%rsp),%rsp +.cfi_adjust_cfa_offset -56 + + .byte 0xf3,0xc3 +.cfi_endproc +.size mul_by_8_mod_384x,.-mul_by_8_mod_384x + + +.globl cneg_mod_384 +.hidden cneg_mod_384 +.type cneg_mod_384,@function +.align 32 +cneg_mod_384: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + pushq %rdx +.cfi_adjust_cfa_offset 8 + + + movq 0(%rsi),%rdx + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq %rdx,%r8 + movq 24(%rsi),%r11 + orq %r9,%rdx + movq 32(%rsi),%r12 + orq %r10,%rdx + movq 40(%rsi),%r13 + orq %r11,%rdx + movq $-1,%rsi + orq %r12,%rdx + orq %r13,%rdx + + movq 0(%rcx),%r14 + cmovnzq %rsi,%rdx + movq 8(%rcx),%r15 + movq 16(%rcx),%rax + andq %rdx,%r14 + movq 24(%rcx),%rbx + andq %rdx,%r15 + movq 32(%rcx),%rbp + andq %rdx,%rax + movq 40(%rcx),%rsi + andq %rdx,%rbx + movq 0(%rsp),%rcx + andq %rdx,%rbp + andq %rdx,%rsi + + subq %r8,%r14 + sbbq %r9,%r15 + sbbq %r10,%rax + sbbq %r11,%rbx + sbbq %r12,%rbp + sbbq %r13,%rsi + + orq %rcx,%rcx + + cmovzq %r8,%r14 + cmovzq %r9,%r15 + cmovzq %r10,%rax + movq %r14,0(%rdi) + cmovzq %r11,%rbx + movq %r15,8(%rdi) + cmovzq %r12,%rbp + movq %rax,16(%rdi) + cmovzq %r13,%rsi + movq %rbx,24(%rdi) + movq %rbp,32(%rdi) + movq %rsi,40(%rdi) + + movq 8(%rsp),%r15 +.cfi_restore %r15 + movq 16(%rsp),%r14 +.cfi_restore %r14 + movq 24(%rsp),%r13 +.cfi_restore %r13 + movq 32(%rsp),%r12 +.cfi_restore %r12 + movq 40(%rsp),%rbx +.cfi_restore %rbx + movq 48(%rsp),%rbp +.cfi_restore %rbp + leaq 56(%rsp),%rsp +.cfi_adjust_cfa_offset -56 + + .byte 0xf3,0xc3 +.cfi_endproc +.size cneg_mod_384,.-cneg_mod_384 + + +.globl sub_mod_384 +.hidden sub_mod_384 +.type sub_mod_384,@function +.align 32 +sub_mod_384: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + subq $8,%rsp +.cfi_adjust_cfa_offset 8 + + + call __sub_mod_384 + + movq 8(%rsp),%r15 +.cfi_restore %r15 + movq 16(%rsp),%r14 +.cfi_restore %r14 + movq 24(%rsp),%r13 +.cfi_restore %r13 + movq 32(%rsp),%r12 +.cfi_restore %r12 + movq 40(%rsp),%rbx +.cfi_restore %rbx + movq 48(%rsp),%rbp +.cfi_restore %rbp + leaq 56(%rsp),%rsp +.cfi_adjust_cfa_offset -56 + + .byte 0xf3,0xc3 +.cfi_endproc +.size sub_mod_384,.-sub_mod_384 + +.type __sub_mod_384,@function +.align 32 +__sub_mod_384: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + + subq 0(%rdx),%r8 + movq 0(%rcx),%r14 + sbbq 8(%rdx),%r9 + movq 8(%rcx),%r15 + sbbq 16(%rdx),%r10 + movq 16(%rcx),%rax + sbbq 24(%rdx),%r11 + movq 24(%rcx),%rbx + sbbq 32(%rdx),%r12 + movq 32(%rcx),%rbp + sbbq 40(%rdx),%r13 + movq 40(%rcx),%rsi + sbbq %rdx,%rdx + + andq %rdx,%r14 + andq %rdx,%r15 + andq %rdx,%rax + andq %rdx,%rbx + andq %rdx,%rbp + andq %rdx,%rsi + + addq %r14,%r8 + adcq %r15,%r9 + movq %r8,0(%rdi) + adcq %rax,%r10 + movq %r9,8(%rdi) + adcq %rbx,%r11 + movq %r10,16(%rdi) + adcq %rbp,%r12 + movq %r11,24(%rdi) + adcq %rsi,%r13 + movq %r12,32(%rdi) + movq %r13,40(%rdi) + + .byte 0xf3,0xc3 +.cfi_endproc +.size __sub_mod_384,.-__sub_mod_384 + +.globl sub_mod_384x +.hidden sub_mod_384x +.type sub_mod_384x,@function +.align 32 +sub_mod_384x: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + subq $24,%rsp +.cfi_adjust_cfa_offset 24 + + + movq %rsi,0(%rsp) + movq %rdx,8(%rsp) + leaq 48(%rsi),%rsi + leaq 48(%rdx),%rdx + leaq 48(%rdi),%rdi + call __sub_mod_384 + + movq 0(%rsp),%rsi + movq 8(%rsp),%rdx + leaq -48(%rdi),%rdi + call __sub_mod_384 + + movq 24+0(%rsp),%r15 +.cfi_restore %r15 + movq 24+8(%rsp),%r14 +.cfi_restore %r14 + movq 24+16(%rsp),%r13 +.cfi_restore %r13 + movq 24+24(%rsp),%r12 +.cfi_restore %r12 + movq 24+32(%rsp),%rbx +.cfi_restore %rbx + movq 24+40(%rsp),%rbp +.cfi_restore %rbp + leaq 24+48(%rsp),%rsp +.cfi_adjust_cfa_offset -24-8*6 + + .byte 0xf3,0xc3 +.cfi_endproc +.size sub_mod_384x,.-sub_mod_384x +.globl mul_by_1_plus_i_mod_384x +.hidden mul_by_1_plus_i_mod_384x +.type mul_by_1_plus_i_mod_384x,@function +.align 32 +mul_by_1_plus_i_mod_384x: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + subq $56,%rsp +.cfi_adjust_cfa_offset 56 + + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + + movq %r8,%r14 + addq 48(%rsi),%r8 + movq %r9,%r15 + adcq 56(%rsi),%r9 + movq %r10,%rax + adcq 64(%rsi),%r10 + movq %r11,%rbx + adcq 72(%rsi),%r11 + movq %r12,%rcx + adcq 80(%rsi),%r12 + movq %r13,%rbp + adcq 88(%rsi),%r13 + movq %rdi,48(%rsp) + sbbq %rdi,%rdi + + subq 48(%rsi),%r14 + sbbq 56(%rsi),%r15 + sbbq 64(%rsi),%rax + sbbq 72(%rsi),%rbx + sbbq 80(%rsi),%rcx + sbbq 88(%rsi),%rbp + sbbq %rsi,%rsi + + movq %r8,0(%rsp) + movq 0(%rdx),%r8 + movq %r9,8(%rsp) + movq 8(%rdx),%r9 + movq %r10,16(%rsp) + movq 16(%rdx),%r10 + movq %r11,24(%rsp) + movq 24(%rdx),%r11 + movq %r12,32(%rsp) + andq %rsi,%r8 + movq 32(%rdx),%r12 + movq %r13,40(%rsp) + andq %rsi,%r9 + movq 40(%rdx),%r13 + andq %rsi,%r10 + andq %rsi,%r11 + andq %rsi,%r12 + andq %rsi,%r13 + movq 48(%rsp),%rsi + + addq %r8,%r14 + movq 0(%rsp),%r8 + adcq %r9,%r15 + movq 8(%rsp),%r9 + adcq %r10,%rax + movq 16(%rsp),%r10 + adcq %r11,%rbx + movq 24(%rsp),%r11 + adcq %r12,%rcx + movq 32(%rsp),%r12 + adcq %r13,%rbp + movq 40(%rsp),%r13 + + movq %r14,0(%rsi) + movq %r8,%r14 + movq %r15,8(%rsi) + movq %rax,16(%rsi) + movq %r9,%r15 + movq %rbx,24(%rsi) + movq %rcx,32(%rsi) + movq %r10,%rax + movq %rbp,40(%rsi) + + subq 0(%rdx),%r8 + movq %r11,%rbx + sbbq 8(%rdx),%r9 + sbbq 16(%rdx),%r10 + movq %r12,%rcx + sbbq 24(%rdx),%r11 + sbbq 32(%rdx),%r12 + movq %r13,%rbp + sbbq 40(%rdx),%r13 + sbbq $0,%rdi + + cmovcq %r14,%r8 + cmovcq %r15,%r9 + cmovcq %rax,%r10 + movq %r8,48(%rsi) + cmovcq %rbx,%r11 + movq %r9,56(%rsi) + cmovcq %rcx,%r12 + movq %r10,64(%rsi) + cmovcq %rbp,%r13 + movq %r11,72(%rsi) + movq %r12,80(%rsi) + movq %r13,88(%rsi) + + movq 56+0(%rsp),%r15 +.cfi_restore %r15 + movq 56+8(%rsp),%r14 +.cfi_restore %r14 + movq 56+16(%rsp),%r13 +.cfi_restore %r13 + movq 56+24(%rsp),%r12 +.cfi_restore %r12 + movq 56+32(%rsp),%rbx +.cfi_restore %rbx + movq 56+40(%rsp),%rbp +.cfi_restore %rbp + leaq 56+48(%rsp),%rsp +.cfi_adjust_cfa_offset -56-8*6 + + .byte 0xf3,0xc3 +.cfi_endproc +.size mul_by_1_plus_i_mod_384x,.-mul_by_1_plus_i_mod_384x +.globl sgn0_pty_mod_384 +.hidden sgn0_pty_mod_384 +.type sgn0_pty_mod_384,@function +.align 32 +sgn0_pty_mod_384: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + + + movq 0(%rdi),%r8 + movq 8(%rdi),%r9 + movq 16(%rdi),%r10 + movq 24(%rdi),%r11 + movq 32(%rdi),%rcx + movq 40(%rdi),%rdx + + xorq %rax,%rax + movq %r8,%rdi + addq %r8,%r8 + adcq %r9,%r9 + adcq %r10,%r10 + adcq %r11,%r11 + adcq %rcx,%rcx + adcq %rdx,%rdx + adcq $0,%rax + + subq 0(%rsi),%r8 + sbbq 8(%rsi),%r9 + sbbq 16(%rsi),%r10 + sbbq 24(%rsi),%r11 + sbbq 32(%rsi),%rcx + sbbq 40(%rsi),%rdx + sbbq $0,%rax + + notq %rax + andq $1,%rdi + andq $2,%rax + orq %rdi,%rax + + + .byte 0xf3,0xc3 +.cfi_endproc +.size sgn0_pty_mod_384,.-sgn0_pty_mod_384 + +.globl sgn0_pty_mod_384x +.hidden sgn0_pty_mod_384x +.type sgn0_pty_mod_384x,@function +.align 32 +sgn0_pty_mod_384x: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + subq $8,%rsp +.cfi_adjust_cfa_offset 8 + + + movq 48(%rdi),%r8 + movq 56(%rdi),%r9 + movq 64(%rdi),%r10 + movq 72(%rdi),%r11 + movq 80(%rdi),%rcx + movq 88(%rdi),%rdx + + movq %r8,%rbx + orq %r9,%r8 + orq %r10,%r8 + orq %r11,%r8 + orq %rcx,%r8 + orq %rdx,%r8 + + leaq 0(%rdi),%rax + xorq %rdi,%rdi + movq %rbx,%rbp + addq %rbx,%rbx + adcq %r9,%r9 + adcq %r10,%r10 + adcq %r11,%r11 + adcq %rcx,%rcx + adcq %rdx,%rdx + adcq $0,%rdi + + subq 0(%rsi),%rbx + sbbq 8(%rsi),%r9 + sbbq 16(%rsi),%r10 + sbbq 24(%rsi),%r11 + sbbq 32(%rsi),%rcx + sbbq 40(%rsi),%rdx + sbbq $0,%rdi + + movq %r8,0(%rsp) + notq %rdi + andq $1,%rbp + andq $2,%rdi + orq %rbp,%rdi + + movq 0(%rax),%r8 + movq 8(%rax),%r9 + movq 16(%rax),%r10 + movq 24(%rax),%r11 + movq 32(%rax),%rcx + movq 40(%rax),%rdx + + movq %r8,%rbx + orq %r9,%r8 + orq %r10,%r8 + orq %r11,%r8 + orq %rcx,%r8 + orq %rdx,%r8 + + xorq %rax,%rax + movq %rbx,%rbp + addq %rbx,%rbx + adcq %r9,%r9 + adcq %r10,%r10 + adcq %r11,%r11 + adcq %rcx,%rcx + adcq %rdx,%rdx + adcq $0,%rax + + subq 0(%rsi),%rbx + sbbq 8(%rsi),%r9 + sbbq 16(%rsi),%r10 + sbbq 24(%rsi),%r11 + sbbq 32(%rsi),%rcx + sbbq 40(%rsi),%rdx + sbbq $0,%rax + + movq 0(%rsp),%rbx + + notq %rax + + testq %r8,%r8 + cmovzq %rdi,%rbp + + testq %rbx,%rbx + cmovnzq %rdi,%rax + + andq $1,%rbp + andq $2,%rax + orq %rbp,%rax + + movq 8(%rsp),%rbx +.cfi_restore %rbx + movq 16(%rsp),%rbp +.cfi_restore %rbp + leaq 24(%rsp),%rsp +.cfi_adjust_cfa_offset -24 + + .byte 0xf3,0xc3 +.cfi_endproc +.size sgn0_pty_mod_384x,.-sgn0_pty_mod_384x +.globl vec_select_32 +.hidden vec_select_32 +.type vec_select_32,@function +.align 32 +vec_select_32: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + movd %ecx,%xmm5 + pxor %xmm4,%xmm4 + pshufd $0,%xmm5,%xmm5 + movdqu (%rsi),%xmm0 + leaq 16(%rsi),%rsi + pcmpeqd %xmm4,%xmm5 + movdqu (%rdx),%xmm1 + leaq 16(%rdx),%rdx + pcmpeqd %xmm5,%xmm4 + leaq 16(%rdi),%rdi + pand %xmm4,%xmm0 + movdqu 0+16-16(%rsi),%xmm2 + pand %xmm5,%xmm1 + movdqu 0+16-16(%rdx),%xmm3 + por %xmm1,%xmm0 + movdqu %xmm0,0-16(%rdi) + pand %xmm4,%xmm2 + pand %xmm5,%xmm3 + por %xmm3,%xmm2 + movdqu %xmm2,16-16(%rdi) + .byte 0xf3,0xc3 +.cfi_endproc +.size vec_select_32,.-vec_select_32 +.globl vec_select_48 +.hidden vec_select_48 +.type vec_select_48,@function +.align 32 +vec_select_48: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + movd %ecx,%xmm5 + pxor %xmm4,%xmm4 + pshufd $0,%xmm5,%xmm5 + movdqu (%rsi),%xmm0 + leaq 24(%rsi),%rsi + pcmpeqd %xmm4,%xmm5 + movdqu (%rdx),%xmm1 + leaq 24(%rdx),%rdx + pcmpeqd %xmm5,%xmm4 + leaq 24(%rdi),%rdi + pand %xmm4,%xmm0 + movdqu 0+16-24(%rsi),%xmm2 + pand %xmm5,%xmm1 + movdqu 0+16-24(%rdx),%xmm3 + por %xmm1,%xmm0 + movdqu %xmm0,0-24(%rdi) + pand %xmm4,%xmm2 + movdqu 16+16-24(%rsi),%xmm0 + pand %xmm5,%xmm3 + movdqu 16+16-24(%rdx),%xmm1 + por %xmm3,%xmm2 + movdqu %xmm2,16-24(%rdi) + pand %xmm4,%xmm0 + pand %xmm5,%xmm1 + por %xmm1,%xmm0 + movdqu %xmm0,32-24(%rdi) + .byte 0xf3,0xc3 +.cfi_endproc +.size vec_select_48,.-vec_select_48 +.globl vec_select_96 +.hidden vec_select_96 +.type vec_select_96,@function +.align 32 +vec_select_96: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + movd %ecx,%xmm5 + pxor %xmm4,%xmm4 + pshufd $0,%xmm5,%xmm5 + movdqu (%rsi),%xmm0 + leaq 48(%rsi),%rsi + pcmpeqd %xmm4,%xmm5 + movdqu (%rdx),%xmm1 + leaq 48(%rdx),%rdx + pcmpeqd %xmm5,%xmm4 + leaq 48(%rdi),%rdi + pand %xmm4,%xmm0 + movdqu 0+16-48(%rsi),%xmm2 + pand %xmm5,%xmm1 + movdqu 0+16-48(%rdx),%xmm3 + por %xmm1,%xmm0 + movdqu %xmm0,0-48(%rdi) + pand %xmm4,%xmm2 + movdqu 16+16-48(%rsi),%xmm0 + pand %xmm5,%xmm3 + movdqu 16+16-48(%rdx),%xmm1 + por %xmm3,%xmm2 + movdqu %xmm2,16-48(%rdi) + pand %xmm4,%xmm0 + movdqu 32+16-48(%rsi),%xmm2 + pand %xmm5,%xmm1 + movdqu 32+16-48(%rdx),%xmm3 + por %xmm1,%xmm0 + movdqu %xmm0,32-48(%rdi) + pand %xmm4,%xmm2 + movdqu 48+16-48(%rsi),%xmm0 + pand %xmm5,%xmm3 + movdqu 48+16-48(%rdx),%xmm1 + por %xmm3,%xmm2 + movdqu %xmm2,48-48(%rdi) + pand %xmm4,%xmm0 + movdqu 64+16-48(%rsi),%xmm2 + pand %xmm5,%xmm1 + movdqu 64+16-48(%rdx),%xmm3 + por %xmm1,%xmm0 + movdqu %xmm0,64-48(%rdi) + pand %xmm4,%xmm2 + pand %xmm5,%xmm3 + por %xmm3,%xmm2 + movdqu %xmm2,80-48(%rdi) + .byte 0xf3,0xc3 +.cfi_endproc +.size vec_select_96,.-vec_select_96 +.globl vec_select_192 +.hidden vec_select_192 +.type vec_select_192,@function +.align 32 +vec_select_192: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + movd %ecx,%xmm5 + pxor %xmm4,%xmm4 + pshufd $0,%xmm5,%xmm5 + movdqu (%rsi),%xmm0 + leaq 96(%rsi),%rsi + pcmpeqd %xmm4,%xmm5 + movdqu (%rdx),%xmm1 + leaq 96(%rdx),%rdx + pcmpeqd %xmm5,%xmm4 + leaq 96(%rdi),%rdi + pand %xmm4,%xmm0 + movdqu 0+16-96(%rsi),%xmm2 + pand %xmm5,%xmm1 + movdqu 0+16-96(%rdx),%xmm3 + por %xmm1,%xmm0 + movdqu %xmm0,0-96(%rdi) + pand %xmm4,%xmm2 + movdqu 16+16-96(%rsi),%xmm0 + pand %xmm5,%xmm3 + movdqu 16+16-96(%rdx),%xmm1 + por %xmm3,%xmm2 + movdqu %xmm2,16-96(%rdi) + pand %xmm4,%xmm0 + movdqu 32+16-96(%rsi),%xmm2 + pand %xmm5,%xmm1 + movdqu 32+16-96(%rdx),%xmm3 + por %xmm1,%xmm0 + movdqu %xmm0,32-96(%rdi) + pand %xmm4,%xmm2 + movdqu 48+16-96(%rsi),%xmm0 + pand %xmm5,%xmm3 + movdqu 48+16-96(%rdx),%xmm1 + por %xmm3,%xmm2 + movdqu %xmm2,48-96(%rdi) + pand %xmm4,%xmm0 + movdqu 64+16-96(%rsi),%xmm2 + pand %xmm5,%xmm1 + movdqu 64+16-96(%rdx),%xmm3 + por %xmm1,%xmm0 + movdqu %xmm0,64-96(%rdi) + pand %xmm4,%xmm2 + movdqu 80+16-96(%rsi),%xmm0 + pand %xmm5,%xmm3 + movdqu 80+16-96(%rdx),%xmm1 + por %xmm3,%xmm2 + movdqu %xmm2,80-96(%rdi) + pand %xmm4,%xmm0 + movdqu 96+16-96(%rsi),%xmm2 + pand %xmm5,%xmm1 + movdqu 96+16-96(%rdx),%xmm3 + por %xmm1,%xmm0 + movdqu %xmm0,96-96(%rdi) + pand %xmm4,%xmm2 + movdqu 112+16-96(%rsi),%xmm0 + pand %xmm5,%xmm3 + movdqu 112+16-96(%rdx),%xmm1 + por %xmm3,%xmm2 + movdqu %xmm2,112-96(%rdi) + pand %xmm4,%xmm0 + movdqu 128+16-96(%rsi),%xmm2 + pand %xmm5,%xmm1 + movdqu 128+16-96(%rdx),%xmm3 + por %xmm1,%xmm0 + movdqu %xmm0,128-96(%rdi) + pand %xmm4,%xmm2 + movdqu 144+16-96(%rsi),%xmm0 + pand %xmm5,%xmm3 + movdqu 144+16-96(%rdx),%xmm1 + por %xmm3,%xmm2 + movdqu %xmm2,144-96(%rdi) + pand %xmm4,%xmm0 + movdqu 160+16-96(%rsi),%xmm2 + pand %xmm5,%xmm1 + movdqu 160+16-96(%rdx),%xmm3 + por %xmm1,%xmm0 + movdqu %xmm0,160-96(%rdi) + pand %xmm4,%xmm2 + pand %xmm5,%xmm3 + por %xmm3,%xmm2 + movdqu %xmm2,176-96(%rdi) + .byte 0xf3,0xc3 +.cfi_endproc +.size vec_select_192,.-vec_select_192 +.globl vec_select_144 +.hidden vec_select_144 +.type vec_select_144,@function +.align 32 +vec_select_144: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + movd %ecx,%xmm5 + pxor %xmm4,%xmm4 + pshufd $0,%xmm5,%xmm5 + movdqu (%rsi),%xmm0 + leaq 72(%rsi),%rsi + pcmpeqd %xmm4,%xmm5 + movdqu (%rdx),%xmm1 + leaq 72(%rdx),%rdx + pcmpeqd %xmm5,%xmm4 + leaq 72(%rdi),%rdi + pand %xmm4,%xmm0 + movdqu 0+16-72(%rsi),%xmm2 + pand %xmm5,%xmm1 + movdqu 0+16-72(%rdx),%xmm3 + por %xmm1,%xmm0 + movdqu %xmm0,0-72(%rdi) + pand %xmm4,%xmm2 + movdqu 16+16-72(%rsi),%xmm0 + pand %xmm5,%xmm3 + movdqu 16+16-72(%rdx),%xmm1 + por %xmm3,%xmm2 + movdqu %xmm2,16-72(%rdi) + pand %xmm4,%xmm0 + movdqu 32+16-72(%rsi),%xmm2 + pand %xmm5,%xmm1 + movdqu 32+16-72(%rdx),%xmm3 + por %xmm1,%xmm0 + movdqu %xmm0,32-72(%rdi) + pand %xmm4,%xmm2 + movdqu 48+16-72(%rsi),%xmm0 + pand %xmm5,%xmm3 + movdqu 48+16-72(%rdx),%xmm1 + por %xmm3,%xmm2 + movdqu %xmm2,48-72(%rdi) + pand %xmm4,%xmm0 + movdqu 64+16-72(%rsi),%xmm2 + pand %xmm5,%xmm1 + movdqu 64+16-72(%rdx),%xmm3 + por %xmm1,%xmm0 + movdqu %xmm0,64-72(%rdi) + pand %xmm4,%xmm2 + movdqu 80+16-72(%rsi),%xmm0 + pand %xmm5,%xmm3 + movdqu 80+16-72(%rdx),%xmm1 + por %xmm3,%xmm2 + movdqu %xmm2,80-72(%rdi) + pand %xmm4,%xmm0 + movdqu 96+16-72(%rsi),%xmm2 + pand %xmm5,%xmm1 + movdqu 96+16-72(%rdx),%xmm3 + por %xmm1,%xmm0 + movdqu %xmm0,96-72(%rdi) + pand %xmm4,%xmm2 + movdqu 112+16-72(%rsi),%xmm0 + pand %xmm5,%xmm3 + movdqu 112+16-72(%rdx),%xmm1 + por %xmm3,%xmm2 + movdqu %xmm2,112-72(%rdi) + pand %xmm4,%xmm0 + pand %xmm5,%xmm1 + por %xmm1,%xmm0 + movdqu %xmm0,128-72(%rdi) + .byte 0xf3,0xc3 +.cfi_endproc +.size vec_select_144,.-vec_select_144 +.globl vec_select_288 +.hidden vec_select_288 +.type vec_select_288,@function +.align 32 +vec_select_288: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + movd %ecx,%xmm5 + pxor %xmm4,%xmm4 + pshufd $0,%xmm5,%xmm5 + movdqu (%rsi),%xmm0 + leaq 144(%rsi),%rsi + pcmpeqd %xmm4,%xmm5 + movdqu (%rdx),%xmm1 + leaq 144(%rdx),%rdx + pcmpeqd %xmm5,%xmm4 + leaq 144(%rdi),%rdi + pand %xmm4,%xmm0 + movdqu 0+16-144(%rsi),%xmm2 + pand %xmm5,%xmm1 + movdqu 0+16-144(%rdx),%xmm3 + por %xmm1,%xmm0 + movdqu %xmm0,0-144(%rdi) + pand %xmm4,%xmm2 + movdqu 16+16-144(%rsi),%xmm0 + pand %xmm5,%xmm3 + movdqu 16+16-144(%rdx),%xmm1 + por %xmm3,%xmm2 + movdqu %xmm2,16-144(%rdi) + pand %xmm4,%xmm0 + movdqu 32+16-144(%rsi),%xmm2 + pand %xmm5,%xmm1 + movdqu 32+16-144(%rdx),%xmm3 + por %xmm1,%xmm0 + movdqu %xmm0,32-144(%rdi) + pand %xmm4,%xmm2 + movdqu 48+16-144(%rsi),%xmm0 + pand %xmm5,%xmm3 + movdqu 48+16-144(%rdx),%xmm1 + por %xmm3,%xmm2 + movdqu %xmm2,48-144(%rdi) + pand %xmm4,%xmm0 + movdqu 64+16-144(%rsi),%xmm2 + pand %xmm5,%xmm1 + movdqu 64+16-144(%rdx),%xmm3 + por %xmm1,%xmm0 + movdqu %xmm0,64-144(%rdi) + pand %xmm4,%xmm2 + movdqu 80+16-144(%rsi),%xmm0 + pand %xmm5,%xmm3 + movdqu 80+16-144(%rdx),%xmm1 + por %xmm3,%xmm2 + movdqu %xmm2,80-144(%rdi) + pand %xmm4,%xmm0 + movdqu 96+16-144(%rsi),%xmm2 + pand %xmm5,%xmm1 + movdqu 96+16-144(%rdx),%xmm3 + por %xmm1,%xmm0 + movdqu %xmm0,96-144(%rdi) + pand %xmm4,%xmm2 + movdqu 112+16-144(%rsi),%xmm0 + pand %xmm5,%xmm3 + movdqu 112+16-144(%rdx),%xmm1 + por %xmm3,%xmm2 + movdqu %xmm2,112-144(%rdi) + pand %xmm4,%xmm0 + movdqu 128+16-144(%rsi),%xmm2 + pand %xmm5,%xmm1 + movdqu 128+16-144(%rdx),%xmm3 + por %xmm1,%xmm0 + movdqu %xmm0,128-144(%rdi) + pand %xmm4,%xmm2 + movdqu 144+16-144(%rsi),%xmm0 + pand %xmm5,%xmm3 + movdqu 144+16-144(%rdx),%xmm1 + por %xmm3,%xmm2 + movdqu %xmm2,144-144(%rdi) + pand %xmm4,%xmm0 + movdqu 160+16-144(%rsi),%xmm2 + pand %xmm5,%xmm1 + movdqu 160+16-144(%rdx),%xmm3 + por %xmm1,%xmm0 + movdqu %xmm0,160-144(%rdi) + pand %xmm4,%xmm2 + movdqu 176+16-144(%rsi),%xmm0 + pand %xmm5,%xmm3 + movdqu 176+16-144(%rdx),%xmm1 + por %xmm3,%xmm2 + movdqu %xmm2,176-144(%rdi) + pand %xmm4,%xmm0 + movdqu 192+16-144(%rsi),%xmm2 + pand %xmm5,%xmm1 + movdqu 192+16-144(%rdx),%xmm3 + por %xmm1,%xmm0 + movdqu %xmm0,192-144(%rdi) + pand %xmm4,%xmm2 + movdqu 208+16-144(%rsi),%xmm0 + pand %xmm5,%xmm3 + movdqu 208+16-144(%rdx),%xmm1 + por %xmm3,%xmm2 + movdqu %xmm2,208-144(%rdi) + pand %xmm4,%xmm0 + movdqu 224+16-144(%rsi),%xmm2 + pand %xmm5,%xmm1 + movdqu 224+16-144(%rdx),%xmm3 + por %xmm1,%xmm0 + movdqu %xmm0,224-144(%rdi) + pand %xmm4,%xmm2 + movdqu 240+16-144(%rsi),%xmm0 + pand %xmm5,%xmm3 + movdqu 240+16-144(%rdx),%xmm1 + por %xmm3,%xmm2 + movdqu %xmm2,240-144(%rdi) + pand %xmm4,%xmm0 + movdqu 256+16-144(%rsi),%xmm2 + pand %xmm5,%xmm1 + movdqu 256+16-144(%rdx),%xmm3 + por %xmm1,%xmm0 + movdqu %xmm0,256-144(%rdi) + pand %xmm4,%xmm2 + pand %xmm5,%xmm3 + por %xmm3,%xmm2 + movdqu %xmm2,272-144(%rdi) + .byte 0xf3,0xc3 +.cfi_endproc +.size vec_select_288,.-vec_select_288 +.globl vec_prefetch +.hidden vec_prefetch +.type vec_prefetch,@function +.align 32 +vec_prefetch: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + leaq -1(%rdi,%rsi,1),%rsi + movq $64,%rax + xorq %r8,%r8 + prefetchnta (%rdi) + leaq (%rdi,%rax,1),%rdi + cmpq %rsi,%rdi + cmovaq %rsi,%rdi + cmovaq %r8,%rax + prefetchnta (%rdi) + leaq (%rdi,%rax,1),%rdi + cmpq %rsi,%rdi + cmovaq %rsi,%rdi + cmovaq %r8,%rax + prefetchnta (%rdi) + leaq (%rdi,%rax,1),%rdi + cmpq %rsi,%rdi + cmovaq %rsi,%rdi + cmovaq %r8,%rax + prefetchnta (%rdi) + leaq (%rdi,%rax,1),%rdi + cmpq %rsi,%rdi + cmovaq %rsi,%rdi + cmovaq %r8,%rax + prefetchnta (%rdi) + leaq (%rdi,%rax,1),%rdi + cmpq %rsi,%rdi + cmovaq %rsi,%rdi + cmovaq %r8,%rax + prefetchnta (%rdi) + leaq (%rdi,%rax,1),%rdi + cmpq %rsi,%rdi + cmovaq %rsi,%rdi + prefetchnta (%rdi) + .byte 0xf3,0xc3 +.cfi_endproc +.size vec_prefetch,.-vec_prefetch +.globl vec_is_zero_16x +.hidden vec_is_zero_16x +.type vec_is_zero_16x,@function +.align 32 +vec_is_zero_16x: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + shrl $4,%esi + movdqu (%rdi),%xmm0 + leaq 16(%rdi),%rdi + +.Loop_is_zero: + decl %esi + jz .Loop_is_zero_done + movdqu (%rdi),%xmm1 + leaq 16(%rdi),%rdi + por %xmm1,%xmm0 + jmp .Loop_is_zero + +.Loop_is_zero_done: + pshufd $0x4e,%xmm0,%xmm1 + por %xmm1,%xmm0 +.byte 102,72,15,126,192 + incl %esi + testq %rax,%rax + cmovnzl %esi,%eax + xorl $1,%eax + .byte 0xf3,0xc3 +.cfi_endproc +.size vec_is_zero_16x,.-vec_is_zero_16x +.globl vec_is_equal_16x +.hidden vec_is_equal_16x +.type vec_is_equal_16x,@function +.align 32 +vec_is_equal_16x: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + shrl $4,%edx + movdqu (%rdi),%xmm0 + movdqu (%rsi),%xmm1 + subq %rdi,%rsi + leaq 16(%rdi),%rdi + pxor %xmm1,%xmm0 + +.Loop_is_equal: + decl %edx + jz .Loop_is_equal_done + movdqu (%rdi),%xmm1 + movdqu (%rdi,%rsi,1),%xmm2 + leaq 16(%rdi),%rdi + pxor %xmm2,%xmm1 + por %xmm1,%xmm0 + jmp .Loop_is_equal + +.Loop_is_equal_done: + pshufd $0x4e,%xmm0,%xmm1 + por %xmm1,%xmm0 +.byte 102,72,15,126,192 + incl %edx + testq %rax,%rax + cmovnzl %edx,%eax + xorl $1,%eax + .byte 0xf3,0xc3 +.cfi_endproc +.size vec_is_equal_16x,.-vec_is_equal_16x + +.section .note.GNU-stack,"",@progbits +.section .note.gnu.property,"a",@note + .long 4,2f-1f,5 + .byte 0x47,0x4E,0x55,0 +1: .long 0xc0000002,4,3 +.align 8 +2: diff --git a/crypto/blst_src/build/elf/add_mod_384x384-x86_64.s b/crypto/blst_src/build/elf/add_mod_384x384-x86_64.s new file mode 100644 index 00000000000..084f3d8262d --- /dev/null +++ b/crypto/blst_src/build/elf/add_mod_384x384-x86_64.s @@ -0,0 +1,252 @@ +.text + +.type __add_mod_384x384,@function +.align 32 +__add_mod_384x384: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + movq 48(%rsi),%r14 + + addq 0(%rdx),%r8 + movq 56(%rsi),%r15 + adcq 8(%rdx),%r9 + movq 64(%rsi),%rax + adcq 16(%rdx),%r10 + movq 72(%rsi),%rbx + adcq 24(%rdx),%r11 + movq 80(%rsi),%rbp + adcq 32(%rdx),%r12 + movq 88(%rsi),%rsi + adcq 40(%rdx),%r13 + movq %r8,0(%rdi) + adcq 48(%rdx),%r14 + movq %r9,8(%rdi) + adcq 56(%rdx),%r15 + movq %r10,16(%rdi) + adcq 64(%rdx),%rax + movq %r12,32(%rdi) + movq %r14,%r8 + adcq 72(%rdx),%rbx + movq %r11,24(%rdi) + movq %r15,%r9 + adcq 80(%rdx),%rbp + movq %r13,40(%rdi) + movq %rax,%r10 + adcq 88(%rdx),%rsi + movq %rbx,%r11 + sbbq %rdx,%rdx + + subq 0(%rcx),%r14 + sbbq 8(%rcx),%r15 + movq %rbp,%r12 + sbbq 16(%rcx),%rax + sbbq 24(%rcx),%rbx + sbbq 32(%rcx),%rbp + movq %rsi,%r13 + sbbq 40(%rcx),%rsi + sbbq $0,%rdx + + cmovcq %r8,%r14 + cmovcq %r9,%r15 + cmovcq %r10,%rax + movq %r14,48(%rdi) + cmovcq %r11,%rbx + movq %r15,56(%rdi) + cmovcq %r12,%rbp + movq %rax,64(%rdi) + cmovcq %r13,%rsi + movq %rbx,72(%rdi) + movq %rbp,80(%rdi) + movq %rsi,88(%rdi) + + .byte 0xf3,0xc3 +.cfi_endproc +.size __add_mod_384x384,.-__add_mod_384x384 + +.type __sub_mod_384x384,@function +.align 32 +__sub_mod_384x384: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + movq 48(%rsi),%r14 + + subq 0(%rdx),%r8 + movq 56(%rsi),%r15 + sbbq 8(%rdx),%r9 + movq 64(%rsi),%rax + sbbq 16(%rdx),%r10 + movq 72(%rsi),%rbx + sbbq 24(%rdx),%r11 + movq 80(%rsi),%rbp + sbbq 32(%rdx),%r12 + movq 88(%rsi),%rsi + sbbq 40(%rdx),%r13 + movq %r8,0(%rdi) + sbbq 48(%rdx),%r14 + movq 0(%rcx),%r8 + movq %r9,8(%rdi) + sbbq 56(%rdx),%r15 + movq 8(%rcx),%r9 + movq %r10,16(%rdi) + sbbq 64(%rdx),%rax + movq 16(%rcx),%r10 + movq %r11,24(%rdi) + sbbq 72(%rdx),%rbx + movq 24(%rcx),%r11 + movq %r12,32(%rdi) + sbbq 80(%rdx),%rbp + movq 32(%rcx),%r12 + movq %r13,40(%rdi) + sbbq 88(%rdx),%rsi + movq 40(%rcx),%r13 + sbbq %rdx,%rdx + + andq %rdx,%r8 + andq %rdx,%r9 + andq %rdx,%r10 + andq %rdx,%r11 + andq %rdx,%r12 + andq %rdx,%r13 + + addq %r8,%r14 + adcq %r9,%r15 + movq %r14,48(%rdi) + adcq %r10,%rax + movq %r15,56(%rdi) + adcq %r11,%rbx + movq %rax,64(%rdi) + adcq %r12,%rbp + movq %rbx,72(%rdi) + adcq %r13,%rsi + movq %rbp,80(%rdi) + movq %rsi,88(%rdi) + + .byte 0xf3,0xc3 +.cfi_endproc +.size __sub_mod_384x384,.-__sub_mod_384x384 + +.globl add_mod_384x384 +.hidden add_mod_384x384 +.type add_mod_384x384,@function +.align 32 +add_mod_384x384: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + subq $8,%rsp +.cfi_adjust_cfa_offset 8 + + + call __add_mod_384x384 + + movq 8(%rsp),%r15 +.cfi_restore %r15 + movq 16(%rsp),%r14 +.cfi_restore %r14 + movq 24(%rsp),%r13 +.cfi_restore %r13 + movq 32(%rsp),%r12 +.cfi_restore %r12 + movq 40(%rsp),%rbx +.cfi_restore %rbx + movq 48(%rsp),%rbp +.cfi_restore %rbp + leaq 56(%rsp),%rsp +.cfi_adjust_cfa_offset -56 + + .byte 0xf3,0xc3 +.cfi_endproc +.size add_mod_384x384,.-add_mod_384x384 + +.globl sub_mod_384x384 +.hidden sub_mod_384x384 +.type sub_mod_384x384,@function +.align 32 +sub_mod_384x384: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + subq $8,%rsp +.cfi_adjust_cfa_offset 8 + + + call __sub_mod_384x384 + + movq 8(%rsp),%r15 +.cfi_restore %r15 + movq 16(%rsp),%r14 +.cfi_restore %r14 + movq 24(%rsp),%r13 +.cfi_restore %r13 + movq 32(%rsp),%r12 +.cfi_restore %r12 + movq 40(%rsp),%rbx +.cfi_restore %rbx + movq 48(%rsp),%rbp +.cfi_restore %rbp + leaq 56(%rsp),%rsp +.cfi_adjust_cfa_offset -56 + + .byte 0xf3,0xc3 +.cfi_endproc +.size sub_mod_384x384,.-sub_mod_384x384 + +.section .note.GNU-stack,"",@progbits +.section .note.gnu.property,"a",@note + .long 4,2f-1f,5 + .byte 0x47,0x4E,0x55,0 +1: .long 0xc0000002,4,3 +.align 8 +2: diff --git a/crypto/blst_src/build/elf/ct_inverse_mod_256-armv8.S b/crypto/blst_src/build/elf/ct_inverse_mod_256-armv8.S new file mode 100644 index 00000000000..0c5ac5b882d --- /dev/null +++ b/crypto/blst_src/build/elf/ct_inverse_mod_256-armv8.S @@ -0,0 +1,785 @@ +.text + +.globl ct_inverse_mod_256 +.hidden ct_inverse_mod_256 +.type ct_inverse_mod_256, %function +.align 5 +ct_inverse_mod_256: + .inst 0xd503233f + stp x29, x30, [sp,#-80]! + add x29, sp, #0 + stp x19, x20, [sp,#16] + stp x21, x22, [sp,#32] + stp x23, x24, [sp,#48] + stp x25, x26, [sp,#64] + sub sp, sp, #1040 + + ldp x4, x5, [x1,#8*0] + ldp x6, x7, [x1,#8*2] + + add x1, sp, #16+511 // find closest 512-byte-aligned spot + and x1, x1, #-512 // in the frame... + str x0, [sp] + + ldp x8, x9, [x2,#8*0] + ldp x10, x11, [x2,#8*2] + + stp x4, x5, [x1,#8*0] // copy input to |a| + stp x6, x7, [x1,#8*2] + stp x8, x9, [x1,#8*4] // copy modulus to |b| + stp x10, x11, [x1,#8*6] + + ////////////////////////////////////////// first iteration + bl .Lab_approximation_31_256_loaded + + eor x0, x1, #256 // pointer to dst |a|b|u|v| + bl __smul_256_n_shift_by_31 + str x12,[x0,#8*8] // initialize |u| with |f0| + + mov x12, x14 // |f1| + mov x13, x15 // |g1| + add x0, x0, #8*4 // pointer to dst |b| + bl __smul_256_n_shift_by_31 + str x12, [x0,#8*9] // initialize |v| with |f1| + + ////////////////////////////////////////// second iteration + eor x1, x1, #256 // flip-flop src |a|b|u|v| + bl __ab_approximation_31_256 + + eor x0, x1, #256 // pointer to dst |a|b|u|v| + bl __smul_256_n_shift_by_31 + mov x16, x12 // corrected |f0| + mov x17, x13 // corrected |g0| + + mov x12, x14 // |f1| + mov x13, x15 // |g1| + add x0, x0, #8*4 // pointer to destination |b| + bl __smul_256_n_shift_by_31 + + ldr x8, [x1,#8*8] // |u| + ldr x9, [x1,#8*13] // |v| + madd x4, x16, x8, xzr // |u|*|f0| + madd x4, x17, x9, x4 // |v|*|g0| + str x4, [x0,#8*4] + asr x5, x4, #63 // sign extension + stp x5, x5, [x0,#8*5] + stp x5, x5, [x0,#8*7] + + madd x4, x12, x8, xzr // |u|*|f1| + madd x4, x13, x9, x4 // |v|*|g1| + str x4, [x0,#8*9] + asr x5, x4, #63 // sign extension + stp x5, x5, [x0,#8*10] + stp x5, x5, [x0,#8*12] + eor x1, x1, #256 // flip-flop src |a|b|u|v| + bl __ab_approximation_31_256 + + eor x0, x1, #256 // pointer to dst |a|b|u|v| + bl __smul_256_n_shift_by_31 + mov x16, x12 // corrected |f0| + mov x17, x13 // corrected |g0| + + mov x12, x14 // |f1| + mov x13, x15 // |g1| + add x0, x0, #8*4 // pointer to destination |b| + bl __smul_256_n_shift_by_31 + + add x0, x0, #8*4 // pointer to destination |u| + bl __smul_256x63 + adc x22, x22, x23 + str x22, [x0,#8*4] + + mov x16, x12 // corrected |f1| + mov x17, x13 // corrected |g1| + add x0, x0, #8*5 // pointer to destination |v| + bl __smul_256x63 + adc x22, x22, x23 + stp x22, x22, [x0,#8*4] + stp x22, x22, [x0,#8*6] + eor x1, x1, #256 // flip-flop src |a|b|u|v| + bl __ab_approximation_31_256 + + eor x0, x1, #256 // pointer to dst |a|b|u|v| + bl __smul_256_n_shift_by_31 + mov x16, x12 // corrected |f0| + mov x17, x13 // corrected |g0| + + mov x12, x14 // |f1| + mov x13, x15 // |g1| + add x0, x0, #8*4 // pointer to destination |b| + bl __smul_256_n_shift_by_31 + + add x0, x0, #8*4 // pointer to destination |u| + bl __smul_256x63 + adc x22, x22, x23 + str x22, [x0,#8*4] + + mov x16, x12 // corrected |f1| + mov x17, x13 // corrected |g1| + add x0, x0, #8*5 // pointer to destination |v| + bl __smul_256x63 + adc x22, x22, x23 + stp x22, x22, [x0,#8*4] + stp x22, x22, [x0,#8*6] + eor x1, x1, #256 // flip-flop src |a|b|u|v| + bl __ab_approximation_31_256 + + eor x0, x1, #256 // pointer to dst |a|b|u|v| + bl __smul_256_n_shift_by_31 + mov x16, x12 // corrected |f0| + mov x17, x13 // corrected |g0| + + mov x12, x14 // |f1| + mov x13, x15 // |g1| + add x0, x0, #8*4 // pointer to destination |b| + bl __smul_256_n_shift_by_31 + + add x0, x0, #8*4 // pointer to destination |u| + bl __smul_256x63 + adc x22, x22, x23 + str x22, [x0,#8*4] + + mov x16, x12 // corrected |f1| + mov x17, x13 // corrected |g1| + add x0, x0, #8*5 // pointer to destination |v| + bl __smul_256x63 + adc x22, x22, x23 + stp x22, x22, [x0,#8*4] + stp x22, x22, [x0,#8*6] + eor x1, x1, #256 // flip-flop src |a|b|u|v| + bl __ab_approximation_31_256 + + eor x0, x1, #256 // pointer to dst |a|b|u|v| + bl __smul_256_n_shift_by_31 + mov x16, x12 // corrected |f0| + mov x17, x13 // corrected |g0| + + mov x12, x14 // |f1| + mov x13, x15 // |g1| + add x0, x0, #8*4 // pointer to destination |b| + bl __smul_256_n_shift_by_31 + + add x0, x0, #8*4 // pointer to destination |u| + bl __smul_256x63 + adc x22, x22, x23 + str x22, [x0,#8*4] + + mov x16, x12 // corrected |f1| + mov x17, x13 // corrected |g1| + add x0, x0, #8*5 // pointer to destination |v| + bl __smul_256x63 + adc x22, x22, x23 + stp x22, x22, [x0,#8*4] + stp x22, x22, [x0,#8*6] + eor x1, x1, #256 // flip-flop src |a|b|u|v| + bl __ab_approximation_31_256 + + eor x0, x1, #256 // pointer to dst |a|b|u|v| + bl __smul_256_n_shift_by_31 + mov x16, x12 // corrected |f0| + mov x17, x13 // corrected |g0| + + mov x12, x14 // |f1| + mov x13, x15 // |g1| + add x0, x0, #8*4 // pointer to destination |b| + bl __smul_256_n_shift_by_31 + + add x0, x0, #8*4 // pointer to destination |u| + bl __smul_256x63 + adc x22, x22, x23 + str x22, [x0,#8*4] + + mov x16, x12 // corrected |f1| + mov x17, x13 // corrected |g1| + add x0, x0, #8*5 // pointer to destination |v| + bl __smul_256x63 + adc x22, x22, x23 + stp x22, x22, [x0,#8*4] + stp x22, x22, [x0,#8*6] + eor x1, x1, #256 // flip-flop src |a|b|u|v| + bl __ab_approximation_31_256 + + eor x0, x1, #256 // pointer to dst |a|b|u|v| + bl __smul_256_n_shift_by_31 + mov x16, x12 // corrected |f0| + mov x17, x13 // corrected |g0| + + mov x12, x14 // |f1| + mov x13, x15 // |g1| + add x0, x0, #8*4 // pointer to destination |b| + bl __smul_256_n_shift_by_31 + + add x0, x0, #8*4 // pointer to destination |u| + bl __smul_256x63 + adc x22, x22, x23 + str x22, [x0,#8*4] + + mov x16, x12 // corrected |f1| + mov x17, x13 // corrected |g1| + add x0, x0, #8*5 // pointer to destination |v| + bl __smul_256x63 + adc x22, x22, x23 + stp x22, x22, [x0,#8*4] + stp x22, x22, [x0,#8*6] + eor x1, x1, #256 // flip-flop src |a|b|u|v| + bl __ab_approximation_31_256 + + eor x0, x1, #256 // pointer to dst |a|b|u|v| + bl __smul_256_n_shift_by_31 + mov x16, x12 // corrected |f0| + mov x17, x13 // corrected |g0| + + mov x12, x14 // |f1| + mov x13, x15 // |g1| + add x0, x0, #8*4 // pointer to destination |b| + bl __smul_256_n_shift_by_31 + + add x0, x0, #8*4 // pointer to destination |u| + bl __smul_256x63 + adc x22, x22, x23 + str x22, [x0,#8*4] + + mov x16, x12 // corrected |f1| + mov x17, x13 // corrected |g1| + add x0, x0, #8*5 // pointer to destination |v| + bl __smul_256x63 + bl __smul_512x63_tail + eor x1, x1, #256 // flip-flop src |a|b|u|v| + bl __ab_approximation_31_256 + + eor x0, x1, #256 // pointer to dst |a|b|u|v| + bl __smul_256_n_shift_by_31 + mov x16, x12 // corrected |f0| + mov x17, x13 // corrected |g0| + + mov x12, x14 // |f1| + mov x13, x15 // |g1| + add x0, x0, #8*4 // pointer to destination |b| + bl __smul_256_n_shift_by_31 + + add x0, x0, #8*4 // pointer to destination |u| + bl __smul_256x63 + adc x22, x22, x23 + str x22, [x0,#8*4] + + mov x16, x12 // corrected |f1| + mov x17, x13 // corrected |g1| + add x0, x0, #8*5 // pointer to destination |v| + bl __smul_256x63 + bl __smul_512x63_tail + eor x1, x1, #256 // flip-flop src |a|b|u|v| + bl __ab_approximation_31_256 + + eor x0, x1, #256 // pointer to dst |a|b|u|v| + bl __smul_256_n_shift_by_31 + mov x16, x12 // corrected |f0| + mov x17, x13 // corrected |g0| + + mov x12, x14 // |f1| + mov x13, x15 // |g1| + add x0, x0, #8*4 // pointer to destination |b| + bl __smul_256_n_shift_by_31 + + add x0, x0, #8*4 // pointer to destination |u| + bl __smul_256x63 + adc x22, x22, x23 + str x22, [x0,#8*4] + + mov x16, x12 // corrected |f1| + mov x17, x13 // corrected |g1| + add x0, x0, #8*5 // pointer to destination |v| + bl __smul_256x63 + bl __smul_512x63_tail + eor x1, x1, #256 // flip-flop src |a|b|u|v| + bl __ab_approximation_31_256 + + eor x0, x1, #256 // pointer to dst |a|b|u|v| + bl __smul_256_n_shift_by_31 + mov x16, x12 // corrected |f0| + mov x17, x13 // corrected |g0| + + mov x12, x14 // |f1| + mov x13, x15 // |g1| + add x0, x0, #8*4 // pointer to destination |b| + bl __smul_256_n_shift_by_31 + + add x0, x0, #8*4 // pointer to destination |u| + bl __smul_256x63 + adc x22, x22, x23 + str x22, [x0,#8*4] + + mov x16, x12 // corrected |f1| + mov x17, x13 // corrected |g1| + add x0, x0, #8*5 // pointer to destination |v| + bl __smul_256x63 + bl __smul_512x63_tail + eor x1, x1, #256 // flip-flop src |a|b|u|v| + bl __ab_approximation_31_256 + + eor x0, x1, #256 // pointer to dst |a|b|u|v| + bl __smul_256_n_shift_by_31 + mov x16, x12 // corrected |f0| + mov x17, x13 // corrected |g0| + + mov x12, x14 // |f1| + mov x13, x15 // |g1| + add x0, x0, #8*4 // pointer to destination |b| + bl __smul_256_n_shift_by_31 + + add x0, x0, #8*4 // pointer to destination |u| + bl __smul_256x63 + adc x22, x22, x23 + str x22, [x0,#8*4] + + mov x16, x12 // corrected |f1| + mov x17, x13 // corrected |g1| + add x0, x0, #8*5 // pointer to destination |v| + bl __smul_256x63 + bl __smul_512x63_tail + eor x1, x1, #256 // flip-flop src |a|b|u|v| + bl __ab_approximation_31_256 + + eor x0, x1, #256 // pointer to dst |a|b|u|v| + bl __smul_256_n_shift_by_31 + mov x16, x12 // corrected |f0| + mov x17, x13 // corrected |g0| + + mov x12, x14 // |f1| + mov x13, x15 // |g1| + add x0, x0, #8*4 // pointer to destination |b| + bl __smul_256_n_shift_by_31 + + add x0, x0, #8*4 // pointer to destination |u| + bl __smul_256x63 + adc x22, x22, x23 + str x22, [x0,#8*4] + + mov x16, x12 // corrected |f1| + mov x17, x13 // corrected |g1| + add x0, x0, #8*5 // pointer to destination |v| + bl __smul_256x63 + bl __smul_512x63_tail + eor x1, x1, #256 // flip-flop src |a|b|u|v| + bl __ab_approximation_31_256 + + eor x0, x1, #256 // pointer to dst |a|b|u|v| + bl __smul_256_n_shift_by_31 + mov x16, x12 // corrected |f0| + mov x17, x13 // corrected |g0| + + mov x12, x14 // |f1| + mov x13, x15 // |g1| + add x0, x0, #8*4 // pointer to destination |b| + bl __smul_256_n_shift_by_31 + + add x0, x0, #8*4 // pointer to destination |u| + bl __smul_256x63 + adc x22, x22, x23 + str x22, [x0,#8*4] + + mov x16, x12 // corrected |f1| + mov x17, x13 // corrected |g1| + add x0, x0, #8*5 // pointer to destination |v| + bl __smul_256x63 + bl __smul_512x63_tail + ////////////////////////////////////////// two[!] last iterations + eor x1, x1, #256 // flip-flop src |a|b|u|v| + mov x2, #47 // 31 + 512 % 31 + //bl __ab_approximation_62_256 // |a| and |b| are exact, + ldr x7, [x1,#8*0] // just load + ldr x11, [x1,#8*4] + bl __inner_loop_62_256 + + mov x16, x14 + mov x17, x15 + ldr x0, [sp] // original out_ptr + bl __smul_256x63 + bl __smul_512x63_tail + ldr x30, [x29,#8] + + smulh x20, x7, x17 // figure out top-most limb + ldp x8, x9, [x3,#8*0] + adc x23, x23, x25 + ldp x10, x11, [x3,#8*2] + + add x20, x20, x23 // x20 is 1, 0 or -1 + asr x19, x20, #63 // sign as mask + + and x23, x8, x19 // add mod<<256 conditionally + and x24, x9, x19 + adds x4, x4, x23 + and x25, x10, x19 + adcs x5, x5, x24 + and x26, x11, x19 + adcs x6, x6, x25 + adcs x7, x22, x26 + adc x20, x20, xzr // x20 is 1, 0 or -1 + + neg x19, x20 + orr x20, x20, x19 // excess bit or sign as mask + asr x19, x19, #63 // excess bit as mask + + and x8, x8, x20 // mask |mod| + and x9, x9, x20 + and x10, x10, x20 + and x11, x11, x20 + + eor x8, x8, x19 // conditionally negate |mod| + eor x9, x9, x19 + adds x8, x8, x19, lsr#63 + eor x10, x10, x19 + adcs x9, x9, xzr + eor x11, x11, x19 + adcs x10, x10, xzr + adc x11, x11, xzr + + adds x4, x4, x8 // final adjustment for |mod|<<256 + adcs x5, x5, x9 + adcs x6, x6, x10 + stp x4, x5, [x0,#8*4] + adc x7, x7, x11 + stp x6, x7, [x0,#8*6] + + add sp, sp, #1040 + ldp x19, x20, [x29,#16] + ldp x21, x22, [x29,#32] + ldp x23, x24, [x29,#48] + ldp x25, x26, [x29,#64] + ldr x29, [sp],#80 + .inst 0xd50323bf + ret +.size ct_inverse_mod_256,.-ct_inverse_mod_256 + +//////////////////////////////////////////////////////////////////////// +.type __smul_256x63, %function +.align 5 +__smul_256x63: + ldp x4, x5, [x1,#8*0+64] // load |u| (or |v|) + asr x14, x16, #63 // |f_|'s sign as mask (or |g_|'s) + ldp x6, x7, [x1,#8*2+64] + eor x16, x16, x14 // conditionally negate |f_| (or |g_|) + ldr x22, [x1,#8*4+64] + + eor x4, x4, x14 // conditionally negate |u| (or |v|) + sub x16, x16, x14 + eor x5, x5, x14 + adds x4, x4, x14, lsr#63 + eor x6, x6, x14 + adcs x5, x5, xzr + eor x7, x7, x14 + adcs x6, x6, xzr + eor x22, x22, x14 + umulh x19, x4, x16 + adcs x7, x7, xzr + umulh x20, x5, x16 + adcs x22, x22, xzr + umulh x21, x6, x16 + mul x4, x4, x16 + cmp x16, #0 + mul x5, x5, x16 + csel x22, x22, xzr, ne + mul x6, x6, x16 + adds x5, x5, x19 + mul x24, x7, x16 + adcs x6, x6, x20 + adcs x24, x24, x21 + adc x26, xzr, xzr + ldp x8, x9, [x1,#8*0+104] // load |u| (or |v|) + asr x14, x17, #63 // |f_|'s sign as mask (or |g_|'s) + ldp x10, x11, [x1,#8*2+104] + eor x17, x17, x14 // conditionally negate |f_| (or |g_|) + ldr x23, [x1,#8*4+104] + + eor x8, x8, x14 // conditionally negate |u| (or |v|) + sub x17, x17, x14 + eor x9, x9, x14 + adds x8, x8, x14, lsr#63 + eor x10, x10, x14 + adcs x9, x9, xzr + eor x11, x11, x14 + adcs x10, x10, xzr + eor x23, x23, x14 + umulh x19, x8, x17 + adcs x11, x11, xzr + umulh x20, x9, x17 + adcs x23, x23, xzr + umulh x21, x10, x17 + adc x15, xzr, xzr // used in __smul_512x63_tail + mul x8, x8, x17 + cmp x17, #0 + mul x9, x9, x17 + csel x23, x23, xzr, ne + mul x10, x10, x17 + adds x9, x9, x19 + mul x25, x11, x17 + adcs x10, x10, x20 + adcs x25, x25, x21 + adc x26, x26, xzr + + adds x4, x4, x8 + adcs x5, x5, x9 + adcs x6, x6, x10 + stp x4, x5, [x0,#8*0] + adcs x24, x24, x25 + stp x6, x24, [x0,#8*2] + + ret +.size __smul_256x63,.-__smul_256x63 + +.type __smul_512x63_tail, %function +.align 5 +__smul_512x63_tail: + umulh x24, x7, x16 + ldp x5, x6, [x1,#8*18] // load rest of |v| + adc x26, x26, xzr + ldr x7, [x1,#8*20] + and x22, x22, x16 + + umulh x11, x11, x17 // resume |v|*|g1| chain + + sub x24, x24, x22 // tie up |u|*|f1| chain + asr x25, x24, #63 + + eor x5, x5, x14 // conditionally negate rest of |v| + eor x6, x6, x14 + adds x5, x5, x15 + eor x7, x7, x14 + adcs x6, x6, xzr + umulh x19, x23, x17 + adc x7, x7, xzr + umulh x20, x5, x17 + add x11, x11, x26 + umulh x21, x6, x17 + + mul x4, x23, x17 + mul x5, x5, x17 + adds x4, x4, x11 + mul x6, x6, x17 + adcs x5, x5, x19 + mul x22, x7, x17 + adcs x6, x6, x20 + adcs x22, x22, x21 + adc x23, xzr, xzr // used in the final step + + adds x4, x4, x24 + adcs x5, x5, x25 + adcs x6, x6, x25 + stp x4, x5, [x0,#8*4] + adcs x22, x22, x25 // carry is used in the final step + stp x6, x22, [x0,#8*6] + + ret +.size __smul_512x63_tail,.-__smul_512x63_tail + +.type __smul_256_n_shift_by_31, %function +.align 5 +__smul_256_n_shift_by_31: + ldp x4, x5, [x1,#8*0+0] // load |a| (or |b|) + asr x24, x12, #63 // |f0|'s sign as mask (or |g0|'s) + ldp x6, x7, [x1,#8*2+0] + eor x25, x12, x24 // conditionally negate |f0| (or |g0|) + + eor x4, x4, x24 // conditionally negate |a| (or |b|) + sub x25, x25, x24 + eor x5, x5, x24 + adds x4, x4, x24, lsr#63 + eor x6, x6, x24 + adcs x5, x5, xzr + eor x7, x7, x24 + umulh x19, x4, x25 + adcs x6, x6, xzr + umulh x20, x5, x25 + adc x7, x7, xzr + umulh x21, x6, x25 + and x24, x24, x25 + umulh x22, x7, x25 + neg x24, x24 + + mul x4, x4, x25 + mul x5, x5, x25 + mul x6, x6, x25 + adds x5, x5, x19 + mul x7, x7, x25 + adcs x6, x6, x20 + adcs x7, x7, x21 + adc x22, x22, x24 + ldp x8, x9, [x1,#8*0+32] // load |a| (or |b|) + asr x24, x13, #63 // |f0|'s sign as mask (or |g0|'s) + ldp x10, x11, [x1,#8*2+32] + eor x25, x13, x24 // conditionally negate |f0| (or |g0|) + + eor x8, x8, x24 // conditionally negate |a| (or |b|) + sub x25, x25, x24 + eor x9, x9, x24 + adds x8, x8, x24, lsr#63 + eor x10, x10, x24 + adcs x9, x9, xzr + eor x11, x11, x24 + umulh x19, x8, x25 + adcs x10, x10, xzr + umulh x20, x9, x25 + adc x11, x11, xzr + umulh x21, x10, x25 + and x24, x24, x25 + umulh x23, x11, x25 + neg x24, x24 + + mul x8, x8, x25 + mul x9, x9, x25 + mul x10, x10, x25 + adds x9, x9, x19 + mul x11, x11, x25 + adcs x10, x10, x20 + adcs x11, x11, x21 + adc x23, x23, x24 + adds x4, x4, x8 + adcs x5, x5, x9 + adcs x6, x6, x10 + adcs x7, x7, x11 + adc x8, x22, x23 + + extr x4, x5, x4, #31 + extr x5, x6, x5, #31 + extr x6, x7, x6, #31 + asr x23, x8, #63 // result's sign as mask + extr x7, x8, x7, #31 + + eor x4, x4, x23 // ensure the result is positive + eor x5, x5, x23 + adds x4, x4, x23, lsr#63 + eor x6, x6, x23 + adcs x5, x5, xzr + eor x7, x7, x23 + adcs x6, x6, xzr + stp x4, x5, [x0,#8*0] + adc x7, x7, xzr + stp x6, x7, [x0,#8*2] + + eor x12, x12, x23 // adjust |f/g| accordingly + eor x13, x13, x23 + sub x12, x12, x23 + sub x13, x13, x23 + + ret +.size __smul_256_n_shift_by_31,.-__smul_256_n_shift_by_31 +.type __ab_approximation_31_256, %function +.align 4 +__ab_approximation_31_256: + ldp x6, x7, [x1,#8*2] + ldp x10, x11, [x1,#8*6] + ldp x4, x5, [x1,#8*0] + ldp x8, x9, [x1,#8*4] + +.Lab_approximation_31_256_loaded: + orr x19, x7, x11 // check top-most limbs, ... + cmp x19, #0 + csel x7, x7, x6, ne + csel x11, x11, x10, ne + csel x6, x6, x5, ne + orr x19, x7, x11 // and ones before top-most, ... + csel x10, x10, x9, ne + + cmp x19, #0 + csel x7, x7, x6, ne + csel x11, x11, x10, ne + csel x6, x6, x4, ne + orr x19, x7, x11 // and one more, ... + csel x10, x10, x8, ne + + clz x19, x19 + cmp x19, #64 + csel x19, x19, xzr, ne + csel x7, x7, x6, ne + csel x11, x11, x10, ne + neg x20, x19 + + lslv x7, x7, x19 // align high limbs to the left + lslv x11, x11, x19 + lsrv x6, x6, x20 + lsrv x10, x10, x20 + and x6, x6, x20, asr#6 + and x10, x10, x20, asr#6 + orr x7, x7, x6 + orr x11, x11, x10 + + bfxil x7, x4, #0, #31 + bfxil x11, x8, #0, #31 + + b __inner_loop_31_256 + ret +.size __ab_approximation_31_256,.-__ab_approximation_31_256 + +.type __inner_loop_31_256, %function +.align 4 +__inner_loop_31_256: + mov x2, #31 + mov x13, #0x7FFFFFFF80000000 // |f0|=1, |g0|=0 + mov x15, #0x800000007FFFFFFF // |f1|=0, |g1|=1 + mov x23,#0x7FFFFFFF7FFFFFFF + +.Loop_31_256: + sbfx x22, x7, #0, #1 // if |a_| is odd, then we'll be subtracting + sub x2, x2, #1 + and x19, x11, x22 + sub x20, x11, x7 // |b_|-|a_| + subs x21, x7, x19 // |a_|-|b_| (or |a_|-0 if |a_| was even) + mov x19, x15 + csel x11, x11, x7, hs // |b_| = |a_| + csel x7, x21, x20, hs // borrow means |a_|<|b_|, replace with |b_|-|a_| + csel x15, x15, x13, hs // exchange |fg0| and |fg1| + csel x13, x13, x19, hs + lsr x7, x7, #1 + and x19, x15, x22 + and x20, x23, x22 + sub x13, x13, x19 // |f0|-=|f1| (or |f0-=0| if |a_| was even) + add x15, x15, x15 // |f1|<<=1 + add x13, x13, x20 + sub x15, x15, x23 + cbnz x2, .Loop_31_256 + + mov x23, #0x7FFFFFFF + ubfx x12, x13, #0, #32 + ubfx x13, x13, #32, #32 + ubfx x14, x15, #0, #32 + ubfx x15, x15, #32, #32 + sub x12, x12, x23 // remove bias + sub x13, x13, x23 + sub x14, x14, x23 + sub x15, x15, x23 + + ret +.size __inner_loop_31_256,.-__inner_loop_31_256 + +.type __inner_loop_62_256, %function +.align 4 +__inner_loop_62_256: + mov x12, #1 // |f0|=1 + mov x13, #0 // |g0|=0 + mov x14, #0 // |f1|=0 + mov x15, #1 // |g1|=1 + +.Loop_62_256: + sbfx x22, x7, #0, #1 // if |a_| is odd, then we'll be subtracting + sub x2, x2, #1 + and x19, x11, x22 + sub x20, x11, x7 // |b_|-|a_| + subs x21, x7, x19 // |a_|-|b_| (or |a_|-0 if |a_| was even) + mov x19, x12 + csel x11, x11, x7, hs // |b_| = |a_| + csel x7, x21, x20, hs // borrow means |a_|<|b_|, replace with |b_|-|a_| + mov x20, x13 + csel x12, x12, x14, hs // exchange |f0| and |f1| + csel x14, x14, x19, hs + csel x13, x13, x15, hs // exchange |g0| and |g1| + csel x15, x15, x20, hs + lsr x7, x7, #1 + and x19, x14, x22 + and x20, x15, x22 + add x14, x14, x14 // |f1|<<=1 + add x15, x15, x15 // |g1|<<=1 + sub x12, x12, x19 // |f0|-=|f1| (or |f0-=0| if |a_| was even) + sub x13, x13, x20 // |g0|-=|g1| (or |g0-=0| ...) + cbnz x2, .Loop_62_256 + + ret +.size __inner_loop_62_256,.-__inner_loop_62_256 diff --git a/crypto/blst_src/build/elf/ct_inverse_mod_256-x86_64.s b/crypto/blst_src/build/elf/ct_inverse_mod_256-x86_64.s new file mode 100644 index 00000000000..0f0ca4923d7 --- /dev/null +++ b/crypto/blst_src/build/elf/ct_inverse_mod_256-x86_64.s @@ -0,0 +1,1186 @@ +.text + +.globl ct_inverse_mod_256 +.hidden ct_inverse_mod_256 +.type ct_inverse_mod_256,@function +.align 32 +ct_inverse_mod_256: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + subq $1072,%rsp +.cfi_adjust_cfa_offset 1072 + + + leaq 48+511(%rsp),%rax + andq $-512,%rax + movq %rdi,32(%rsp) + movq %rcx,40(%rsp) + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + + movq 0(%rdx),%r12 + movq 8(%rdx),%r13 + movq 16(%rdx),%r14 + movq 24(%rdx),%r15 + + movq %r8,0(%rax) + movq %r9,8(%rax) + movq %r10,16(%rax) + movq %r11,24(%rax) + + movq %r12,32(%rax) + movq %r13,40(%rax) + movq %r14,48(%rax) + movq %r15,56(%rax) + movq %rax,%rsi + + + movl $31,%edx + call __ab_approximation_31_256 + + + movq %r12,16(%rsp) + movq %r13,24(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulq_256_n_shift_by_31 + + + movq %rdx,64(%rdi) + + movq 16(%rsp),%rdx + movq 24(%rsp),%rcx + leaq 32(%rdi),%rdi + call __smulq_256_n_shift_by_31 + + + movq %rdx,72(%rdi) + + + xorq $256,%rsi + movl $31,%edx + call __ab_approximation_31_256 + + + movq %r12,16(%rsp) + movq %r13,24(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulq_256_n_shift_by_31 + movq %rdx,0(%rsp) + movq %rcx,8(%rsp) + + movq 16(%rsp),%rdx + movq 24(%rsp),%rcx + leaq 32(%rdi),%rdi + call __smulq_256_n_shift_by_31 + + + + movq 64(%rsi),%r8 + movq 104(%rsi),%r12 + movq %r8,%r9 + imulq 0(%rsp),%r8 + movq %r12,%r13 + imulq 8(%rsp),%r12 + addq %r12,%r8 + movq %r8,32(%rdi) + sarq $63,%r8 + movq %r8,40(%rdi) + movq %r8,48(%rdi) + movq %r8,56(%rdi) + movq %r8,64(%rdi) + leaq 64(%rsi),%rsi + + imulq %rdx,%r9 + imulq %rcx,%r13 + addq %r13,%r9 + movq %r9,72(%rdi) + sarq $63,%r9 + movq %r9,80(%rdi) + movq %r9,88(%rdi) + movq %r9,96(%rdi) + movq %r9,104(%rdi) + xorq $256+64,%rsi + movl $31,%edx + call __ab_approximation_31_256 + + + movq %r12,16(%rsp) + movq %r13,24(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulq_256_n_shift_by_31 + movq %rdx,0(%rsp) + movq %rcx,8(%rsp) + + movq 16(%rsp),%rdx + movq 24(%rsp),%rcx + leaq 32(%rdi),%rdi + call __smulq_256_n_shift_by_31 + movq %rdx,16(%rsp) + movq %rcx,24(%rsp) + + movq 0(%rsp),%rdx + movq 8(%rsp),%rcx + leaq 64(%rsi),%rsi + leaq 32(%rdi),%rdi + call __smulq_256x63 + + movq 16(%rsp),%rdx + movq 24(%rsp),%rcx + leaq 40(%rdi),%rdi + call __smulq_256x63 + xorq $256+64,%rsi + movl $31,%edx + call __ab_approximation_31_256 + + + movq %r12,16(%rsp) + movq %r13,24(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulq_256_n_shift_by_31 + movq %rdx,0(%rsp) + movq %rcx,8(%rsp) + + movq 16(%rsp),%rdx + movq 24(%rsp),%rcx + leaq 32(%rdi),%rdi + call __smulq_256_n_shift_by_31 + movq %rdx,16(%rsp) + movq %rcx,24(%rsp) + + movq 0(%rsp),%rdx + movq 8(%rsp),%rcx + leaq 64(%rsi),%rsi + leaq 32(%rdi),%rdi + call __smulq_256x63 + + movq 16(%rsp),%rdx + movq 24(%rsp),%rcx + leaq 40(%rdi),%rdi + call __smulq_256x63 + xorq $256+64,%rsi + movl $31,%edx + call __ab_approximation_31_256 + + + movq %r12,16(%rsp) + movq %r13,24(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulq_256_n_shift_by_31 + movq %rdx,0(%rsp) + movq %rcx,8(%rsp) + + movq 16(%rsp),%rdx + movq 24(%rsp),%rcx + leaq 32(%rdi),%rdi + call __smulq_256_n_shift_by_31 + movq %rdx,16(%rsp) + movq %rcx,24(%rsp) + + movq 0(%rsp),%rdx + movq 8(%rsp),%rcx + leaq 64(%rsi),%rsi + leaq 32(%rdi),%rdi + call __smulq_256x63 + + movq 16(%rsp),%rdx + movq 24(%rsp),%rcx + leaq 40(%rdi),%rdi + call __smulq_256x63 + xorq $256+64,%rsi + movl $31,%edx + call __ab_approximation_31_256 + + + movq %r12,16(%rsp) + movq %r13,24(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulq_256_n_shift_by_31 + movq %rdx,0(%rsp) + movq %rcx,8(%rsp) + + movq 16(%rsp),%rdx + movq 24(%rsp),%rcx + leaq 32(%rdi),%rdi + call __smulq_256_n_shift_by_31 + movq %rdx,16(%rsp) + movq %rcx,24(%rsp) + + movq 0(%rsp),%rdx + movq 8(%rsp),%rcx + leaq 64(%rsi),%rsi + leaq 32(%rdi),%rdi + call __smulq_256x63 + + movq 16(%rsp),%rdx + movq 24(%rsp),%rcx + leaq 40(%rdi),%rdi + call __smulq_256x63 + xorq $256+64,%rsi + movl $31,%edx + call __ab_approximation_31_256 + + + movq %r12,16(%rsp) + movq %r13,24(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulq_256_n_shift_by_31 + movq %rdx,0(%rsp) + movq %rcx,8(%rsp) + + movq 16(%rsp),%rdx + movq 24(%rsp),%rcx + leaq 32(%rdi),%rdi + call __smulq_256_n_shift_by_31 + movq %rdx,16(%rsp) + movq %rcx,24(%rsp) + + movq 0(%rsp),%rdx + movq 8(%rsp),%rcx + leaq 64(%rsi),%rsi + leaq 32(%rdi),%rdi + call __smulq_256x63 + + movq 16(%rsp),%rdx + movq 24(%rsp),%rcx + leaq 40(%rdi),%rdi + call __smulq_256x63 + xorq $256+64,%rsi + movl $31,%edx + call __ab_approximation_31_256 + + + movq %r12,16(%rsp) + movq %r13,24(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulq_256_n_shift_by_31 + movq %rdx,0(%rsp) + movq %rcx,8(%rsp) + + movq 16(%rsp),%rdx + movq 24(%rsp),%rcx + leaq 32(%rdi),%rdi + call __smulq_256_n_shift_by_31 + movq %rdx,16(%rsp) + movq %rcx,24(%rsp) + + movq 0(%rsp),%rdx + movq 8(%rsp),%rcx + leaq 64(%rsi),%rsi + leaq 32(%rdi),%rdi + call __smulq_256x63 + + movq 16(%rsp),%rdx + movq 24(%rsp),%rcx + leaq 40(%rdi),%rdi + call __smulq_256x63 + xorq $256+64,%rsi + movl $31,%edx + call __ab_approximation_31_256 + + + movq %r12,16(%rsp) + movq %r13,24(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulq_256_n_shift_by_31 + movq %rdx,0(%rsp) + movq %rcx,8(%rsp) + + movq 16(%rsp),%rdx + movq 24(%rsp),%rcx + leaq 32(%rdi),%rdi + call __smulq_256_n_shift_by_31 + movq %rdx,16(%rsp) + movq %rcx,24(%rsp) + + movq 0(%rsp),%rdx + movq 8(%rsp),%rcx + leaq 64(%rsi),%rsi + leaq 32(%rdi),%rdi + call __smulq_256x63 + + movq 16(%rsp),%rdx + movq 24(%rsp),%rcx + leaq 40(%rdi),%rdi + call __smulq_256x63 + sarq $63,%rbp + movq %rbp,40(%rdi) + movq %rbp,48(%rdi) + movq %rbp,56(%rdi) + xorq $256+64,%rsi + movl $31,%edx + call __ab_approximation_31_256 + + + movq %r12,16(%rsp) + movq %r13,24(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulq_256_n_shift_by_31 + movq %rdx,0(%rsp) + movq %rcx,8(%rsp) + + movq 16(%rsp),%rdx + movq 24(%rsp),%rcx + leaq 32(%rdi),%rdi + call __smulq_256_n_shift_by_31 + movq %rdx,16(%rsp) + movq %rcx,24(%rsp) + + movq 0(%rsp),%rdx + movq 8(%rsp),%rcx + leaq 64(%rsi),%rsi + leaq 32(%rdi),%rdi + call __smulq_256x63 + + movq 16(%rsp),%rdx + movq 24(%rsp),%rcx + leaq 40(%rdi),%rdi + call __smulq_512x63 + xorq $256+64,%rsi + movl $31,%edx + call __ab_approximation_31_256 + + + movq %r12,16(%rsp) + movq %r13,24(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulq_256_n_shift_by_31 + movq %rdx,0(%rsp) + movq %rcx,8(%rsp) + + movq 16(%rsp),%rdx + movq 24(%rsp),%rcx + leaq 32(%rdi),%rdi + call __smulq_256_n_shift_by_31 + movq %rdx,16(%rsp) + movq %rcx,24(%rsp) + + movq 0(%rsp),%rdx + movq 8(%rsp),%rcx + leaq 64(%rsi),%rsi + leaq 32(%rdi),%rdi + call __smulq_256x63 + + movq 16(%rsp),%rdx + movq 24(%rsp),%rcx + leaq 40(%rdi),%rdi + call __smulq_512x63 + xorq $256+64,%rsi + movl $31,%edx + call __ab_approximation_31_256 + + + movq %r12,16(%rsp) + movq %r13,24(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulq_256_n_shift_by_31 + movq %rdx,0(%rsp) + movq %rcx,8(%rsp) + + movq 16(%rsp),%rdx + movq 24(%rsp),%rcx + leaq 32(%rdi),%rdi + call __smulq_256_n_shift_by_31 + movq %rdx,16(%rsp) + movq %rcx,24(%rsp) + + movq 0(%rsp),%rdx + movq 8(%rsp),%rcx + leaq 64(%rsi),%rsi + leaq 32(%rdi),%rdi + call __smulq_256x63 + + movq 16(%rsp),%rdx + movq 24(%rsp),%rcx + leaq 40(%rdi),%rdi + call __smulq_512x63 + xorq $256+64,%rsi + movl $31,%edx + call __ab_approximation_31_256 + + + movq %r12,16(%rsp) + movq %r13,24(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulq_256_n_shift_by_31 + movq %rdx,0(%rsp) + movq %rcx,8(%rsp) + + movq 16(%rsp),%rdx + movq 24(%rsp),%rcx + leaq 32(%rdi),%rdi + call __smulq_256_n_shift_by_31 + movq %rdx,16(%rsp) + movq %rcx,24(%rsp) + + movq 0(%rsp),%rdx + movq 8(%rsp),%rcx + leaq 64(%rsi),%rsi + leaq 32(%rdi),%rdi + call __smulq_256x63 + + movq 16(%rsp),%rdx + movq 24(%rsp),%rcx + leaq 40(%rdi),%rdi + call __smulq_512x63 + xorq $256+64,%rsi + movl $31,%edx + call __ab_approximation_31_256 + + + movq %r12,16(%rsp) + movq %r13,24(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulq_256_n_shift_by_31 + movq %rdx,0(%rsp) + movq %rcx,8(%rsp) + + movq 16(%rsp),%rdx + movq 24(%rsp),%rcx + leaq 32(%rdi),%rdi + call __smulq_256_n_shift_by_31 + movq %rdx,16(%rsp) + movq %rcx,24(%rsp) + + movq 0(%rsp),%rdx + movq 8(%rsp),%rcx + leaq 64(%rsi),%rsi + leaq 32(%rdi),%rdi + call __smulq_256x63 + + movq 16(%rsp),%rdx + movq 24(%rsp),%rcx + leaq 40(%rdi),%rdi + call __smulq_512x63 + xorq $256+64,%rsi + movl $31,%edx + call __ab_approximation_31_256 + + + movq %r12,16(%rsp) + movq %r13,24(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulq_256_n_shift_by_31 + movq %rdx,0(%rsp) + movq %rcx,8(%rsp) + + movq 16(%rsp),%rdx + movq 24(%rsp),%rcx + leaq 32(%rdi),%rdi + call __smulq_256_n_shift_by_31 + movq %rdx,16(%rsp) + movq %rcx,24(%rsp) + + movq 0(%rsp),%rdx + movq 8(%rsp),%rcx + leaq 64(%rsi),%rsi + leaq 32(%rdi),%rdi + call __smulq_256x63 + + movq 16(%rsp),%rdx + movq 24(%rsp),%rcx + leaq 40(%rdi),%rdi + call __smulq_512x63 + + xorq $256+64,%rsi + movl $47,%edx + + movq 0(%rsi),%r8 + + movq 32(%rsi),%r10 + + call __inner_loop_62_256 + + + + + + + + leaq 64(%rsi),%rsi + + + + + + movq %r12,%rdx + movq %r13,%rcx + movq 32(%rsp),%rdi + call __smulq_512x63 + adcq %rbp,%rdx + + movq 40(%rsp),%rsi + movq %rdx,%rax + sarq $63,%rdx + + movq %rdx,%r8 + movq %rdx,%r9 + andq 0(%rsi),%r8 + movq %rdx,%r10 + andq 8(%rsi),%r9 + andq 16(%rsi),%r10 + andq 24(%rsi),%rdx + + addq %r8,%r12 + adcq %r9,%r13 + adcq %r10,%r14 + adcq %rdx,%r15 + adcq $0,%rax + + movq %rax,%rdx + negq %rax + orq %rax,%rdx + sarq $63,%rax + + movq %rdx,%r8 + movq %rdx,%r9 + andq 0(%rsi),%r8 + movq %rdx,%r10 + andq 8(%rsi),%r9 + andq 16(%rsi),%r10 + andq 24(%rsi),%rdx + + xorq %rax,%r8 + xorq %rcx,%rcx + xorq %rax,%r9 + subq %rax,%rcx + xorq %rax,%r10 + xorq %rax,%rdx + addq %rcx,%r8 + adcq $0,%r9 + adcq $0,%r10 + adcq $0,%rdx + + addq %r8,%r12 + adcq %r9,%r13 + adcq %r10,%r14 + adcq %rdx,%r15 + + movq %r12,32(%rdi) + movq %r13,40(%rdi) + movq %r14,48(%rdi) + movq %r15,56(%rdi) + + leaq 1072(%rsp),%r8 + movq 0(%r8),%r15 +.cfi_restore %r15 + movq 8(%r8),%r14 +.cfi_restore %r14 + movq 16(%r8),%r13 +.cfi_restore %r13 + movq 24(%r8),%r12 +.cfi_restore %r12 + movq 32(%r8),%rbx +.cfi_restore %rbx + movq 40(%r8),%rbp +.cfi_restore %rbp + leaq 48(%r8),%rsp +.cfi_adjust_cfa_offset -1072-8*6 + + .byte 0xf3,0xc3 +.cfi_endproc +.size ct_inverse_mod_256,.-ct_inverse_mod_256 +.type __smulq_512x63,@function +.align 32 +__smulq_512x63: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%rbp + + movq %rdx,%rbx + sarq $63,%rdx + xorq %rax,%rax + subq %rdx,%rax + + xorq %rdx,%rbx + addq %rax,%rbx + + xorq %rdx,%r8 + xorq %rdx,%r9 + xorq %rdx,%r10 + xorq %rdx,%r11 + xorq %rdx,%rbp + addq %r8,%rax + adcq $0,%r9 + adcq $0,%r10 + adcq $0,%r11 + adcq $0,%rbp + + mulq %rbx + movq %rax,0(%rdi) + movq %r9,%rax + movq %rdx,%r9 + mulq %rbx + addq %rax,%r9 + movq %r10,%rax + adcq $0,%rdx + movq %r9,8(%rdi) + movq %rdx,%r10 + mulq %rbx + addq %rax,%r10 + movq %r11,%rax + adcq $0,%rdx + movq %r10,16(%rdi) + movq %rdx,%r11 + andq %rbx,%rbp + negq %rbp + mulq %rbx + addq %rax,%r11 + adcq %rdx,%rbp + movq %r11,24(%rdi) + + movq 40(%rsi),%r8 + movq 48(%rsi),%r9 + movq 56(%rsi),%r10 + movq 64(%rsi),%r11 + movq 72(%rsi),%r12 + movq 80(%rsi),%r13 + movq 88(%rsi),%r14 + movq 96(%rsi),%r15 + + movq %rcx,%rdx + sarq $63,%rdx + xorq %rax,%rax + subq %rdx,%rax + + xorq %rdx,%rcx + addq %rax,%rcx + + xorq %rdx,%r8 + xorq %rdx,%r9 + xorq %rdx,%r10 + xorq %rdx,%r11 + xorq %rdx,%r12 + xorq %rdx,%r13 + xorq %rdx,%r14 + xorq %rdx,%r15 + addq %r8,%rax + adcq $0,%r9 + adcq $0,%r10 + adcq $0,%r11 + adcq $0,%r12 + adcq $0,%r13 + adcq $0,%r14 + adcq $0,%r15 + + mulq %rcx + movq %rax,%r8 + movq %r9,%rax + movq %rdx,%r9 + mulq %rcx + addq %rax,%r9 + movq %r10,%rax + adcq $0,%rdx + movq %rdx,%r10 + mulq %rcx + addq %rax,%r10 + movq %r11,%rax + adcq $0,%rdx + movq %rdx,%r11 + mulq %rcx + addq %rax,%r11 + movq %r12,%rax + adcq $0,%rdx + movq %rdx,%r12 + mulq %rcx + addq %rax,%r12 + movq %r13,%rax + adcq $0,%rdx + movq %rdx,%r13 + mulq %rcx + addq %rax,%r13 + movq %r14,%rax + adcq $0,%rdx + movq %rdx,%r14 + mulq %rcx + addq %rax,%r14 + movq %r15,%rax + adcq $0,%rdx + movq %rdx,%r15 + imulq %rcx + addq %rax,%r15 + adcq $0,%rdx + + movq %rbp,%rbx + sarq $63,%rbp + + addq 0(%rdi),%r8 + adcq 8(%rdi),%r9 + adcq 16(%rdi),%r10 + adcq 24(%rdi),%r11 + adcq %rbx,%r12 + adcq %rbp,%r13 + adcq %rbp,%r14 + adcq %rbp,%r15 + + movq %r8,0(%rdi) + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + movq %r12,32(%rdi) + movq %r13,40(%rdi) + movq %r14,48(%rdi) + movq %r15,56(%rdi) + + .byte 0xf3,0xc3 +.cfi_endproc +.size __smulq_512x63,.-__smulq_512x63 + +.type __smulq_256x63,@function +.align 32 +__smulq_256x63: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + movq 0+0(%rsi),%r8 + movq 0+8(%rsi),%r9 + movq 0+16(%rsi),%r10 + movq 0+24(%rsi),%r11 + movq 0+32(%rsi),%rbp + + movq %rdx,%rbx + sarq $63,%rdx + xorq %rax,%rax + subq %rdx,%rax + + xorq %rdx,%rbx + addq %rax,%rbx + + xorq %rdx,%r8 + xorq %rdx,%r9 + xorq %rdx,%r10 + xorq %rdx,%r11 + xorq %rdx,%rbp + addq %r8,%rax + adcq $0,%r9 + adcq $0,%r10 + adcq $0,%r11 + adcq $0,%rbp + + mulq %rbx + movq %rax,%r8 + movq %r9,%rax + movq %rdx,%r9 + mulq %rbx + addq %rax,%r9 + movq %r10,%rax + adcq $0,%rdx + movq %rdx,%r10 + mulq %rbx + addq %rax,%r10 + movq %r11,%rax + adcq $0,%rdx + movq %rdx,%r11 + andq %rbx,%rbp + negq %rbp + mulq %rbx + addq %rax,%r11 + adcq %rdx,%rbp + movq %rcx,%rdx + movq 40+0(%rsi),%r12 + movq 40+8(%rsi),%r13 + movq 40+16(%rsi),%r14 + movq 40+24(%rsi),%r15 + movq 40+32(%rsi),%rcx + + movq %rdx,%rbx + sarq $63,%rdx + xorq %rax,%rax + subq %rdx,%rax + + xorq %rdx,%rbx + addq %rax,%rbx + + xorq %rdx,%r12 + xorq %rdx,%r13 + xorq %rdx,%r14 + xorq %rdx,%r15 + xorq %rdx,%rcx + addq %r12,%rax + adcq $0,%r13 + adcq $0,%r14 + adcq $0,%r15 + adcq $0,%rcx + + mulq %rbx + movq %rax,%r12 + movq %r13,%rax + movq %rdx,%r13 + mulq %rbx + addq %rax,%r13 + movq %r14,%rax + adcq $0,%rdx + movq %rdx,%r14 + mulq %rbx + addq %rax,%r14 + movq %r15,%rax + adcq $0,%rdx + movq %rdx,%r15 + andq %rbx,%rcx + negq %rcx + mulq %rbx + addq %rax,%r15 + adcq %rdx,%rcx + addq %r12,%r8 + adcq %r13,%r9 + adcq %r14,%r10 + adcq %r15,%r11 + adcq %rcx,%rbp + + movq %r8,0(%rdi) + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + movq %rbp,32(%rdi) + + .byte 0xf3,0xc3 +.cfi_endproc +.size __smulq_256x63,.-__smulq_256x63 +.type __smulq_256_n_shift_by_31,@function +.align 32 +__smulq_256_n_shift_by_31: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + movq %rdx,0(%rdi) + movq %rcx,8(%rdi) + movq %rdx,%rbp + movq 0+0(%rsi),%r8 + movq 0+8(%rsi),%r9 + movq 0+16(%rsi),%r10 + movq 0+24(%rsi),%r11 + + movq %rbp,%rbx + sarq $63,%rbp + xorq %rax,%rax + subq %rbp,%rax + + xorq %rbp,%rbx + addq %rax,%rbx + + xorq %rbp,%r8 + xorq %rbp,%r9 + xorq %rbp,%r10 + xorq %rbp,%r11 + addq %r8,%rax + adcq $0,%r9 + adcq $0,%r10 + adcq $0,%r11 + + mulq %rbx + movq %rax,%r8 + movq %r9,%rax + andq %rbx,%rbp + negq %rbp + movq %rdx,%r9 + mulq %rbx + addq %rax,%r9 + movq %r10,%rax + adcq $0,%rdx + movq %rdx,%r10 + mulq %rbx + addq %rax,%r10 + movq %r11,%rax + adcq $0,%rdx + movq %rdx,%r11 + mulq %rbx + addq %rax,%r11 + adcq %rdx,%rbp + movq 32+0(%rsi),%r12 + movq 32+8(%rsi),%r13 + movq 32+16(%rsi),%r14 + movq 32+24(%rsi),%r15 + + movq %rcx,%rbx + sarq $63,%rcx + xorq %rax,%rax + subq %rcx,%rax + + xorq %rcx,%rbx + addq %rax,%rbx + + xorq %rcx,%r12 + xorq %rcx,%r13 + xorq %rcx,%r14 + xorq %rcx,%r15 + addq %r12,%rax + adcq $0,%r13 + adcq $0,%r14 + adcq $0,%r15 + + mulq %rbx + movq %rax,%r12 + movq %r13,%rax + andq %rbx,%rcx + negq %rcx + movq %rdx,%r13 + mulq %rbx + addq %rax,%r13 + movq %r14,%rax + adcq $0,%rdx + movq %rdx,%r14 + mulq %rbx + addq %rax,%r14 + movq %r15,%rax + adcq $0,%rdx + movq %rdx,%r15 + mulq %rbx + addq %rax,%r15 + adcq %rdx,%rcx + addq %r12,%r8 + adcq %r13,%r9 + adcq %r14,%r10 + adcq %r15,%r11 + adcq %rcx,%rbp + + movq 0(%rdi),%rdx + movq 8(%rdi),%rcx + + shrdq $31,%r9,%r8 + shrdq $31,%r10,%r9 + shrdq $31,%r11,%r10 + shrdq $31,%rbp,%r11 + + sarq $63,%rbp + xorq %rax,%rax + subq %rbp,%rax + + xorq %rbp,%r8 + xorq %rbp,%r9 + xorq %rbp,%r10 + xorq %rbp,%r11 + addq %rax,%r8 + adcq $0,%r9 + adcq $0,%r10 + adcq $0,%r11 + + movq %r8,0(%rdi) + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + + xorq %rbp,%rdx + xorq %rbp,%rcx + addq %rax,%rdx + addq %rax,%rcx + + .byte 0xf3,0xc3 +.cfi_endproc +.size __smulq_256_n_shift_by_31,.-__smulq_256_n_shift_by_31 +.type __ab_approximation_31_256,@function +.align 32 +__ab_approximation_31_256: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + movq 24(%rsi),%r9 + movq 56(%rsi),%r11 + movq 16(%rsi),%rbx + movq 48(%rsi),%rbp + movq 8(%rsi),%r8 + movq 40(%rsi),%r10 + + movq %r9,%rax + orq %r11,%rax + cmovzq %rbx,%r9 + cmovzq %rbp,%r11 + cmovzq %r8,%rbx + movq 0(%rsi),%r8 + cmovzq %r10,%rbp + movq 32(%rsi),%r10 + + movq %r9,%rax + orq %r11,%rax + cmovzq %rbx,%r9 + cmovzq %rbp,%r11 + cmovzq %r8,%rbx + cmovzq %r10,%rbp + + movq %r9,%rax + orq %r11,%rax + bsrq %rax,%rcx + leaq 1(%rcx),%rcx + cmovzq %r8,%r9 + cmovzq %r10,%r11 + cmovzq %rax,%rcx + negq %rcx + + + shldq %cl,%rbx,%r9 + shldq %cl,%rbp,%r11 + + movl $0x7FFFFFFF,%eax + andq %rax,%r8 + andq %rax,%r10 + notq %rax + andq %rax,%r9 + andq %rax,%r11 + orq %r9,%r8 + orq %r11,%r10 + + jmp __inner_loop_31_256 + + .byte 0xf3,0xc3 +.cfi_endproc +.size __ab_approximation_31_256,.-__ab_approximation_31_256 +.type __inner_loop_31_256,@function +.align 32 +__inner_loop_31_256: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + movq $0x7FFFFFFF80000000,%rcx + movq $0x800000007FFFFFFF,%r13 + movq $0x7FFFFFFF7FFFFFFF,%r15 + +.Loop_31_256: + cmpq %r10,%r8 + movq %r8,%rax + movq %r10,%rbx + movq %rcx,%rbp + movq %r13,%r14 + cmovbq %r10,%r8 + cmovbq %rax,%r10 + cmovbq %r13,%rcx + cmovbq %rbp,%r13 + + subq %r10,%r8 + subq %r13,%rcx + addq %r15,%rcx + + testq $1,%rax + cmovzq %rax,%r8 + cmovzq %rbx,%r10 + cmovzq %rbp,%rcx + cmovzq %r14,%r13 + + shrq $1,%r8 + addq %r13,%r13 + subq %r15,%r13 + subl $1,%edx + jnz .Loop_31_256 + + shrq $32,%r15 + movl %ecx,%edx + movl %r13d,%r12d + shrq $32,%rcx + shrq $32,%r13 + subq %r15,%rdx + subq %r15,%rcx + subq %r15,%r12 + subq %r15,%r13 + + .byte 0xf3,0xc3 +.cfi_endproc +.size __inner_loop_31_256,.-__inner_loop_31_256 + +.type __inner_loop_62_256,@function +.align 32 +__inner_loop_62_256: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + movl %edx,%r15d + movq $1,%rdx + xorq %rcx,%rcx + xorq %r12,%r12 + movq %rdx,%r13 + movq %rdx,%r14 + +.Loop_62_256: + xorq %rax,%rax + testq %r14,%r8 + movq %r10,%rbx + cmovnzq %r10,%rax + subq %r8,%rbx + movq %r8,%rbp + subq %rax,%r8 + cmovcq %rbx,%r8 + cmovcq %rbp,%r10 + movq %rdx,%rax + cmovcq %r12,%rdx + cmovcq %rax,%r12 + movq %rcx,%rbx + cmovcq %r13,%rcx + cmovcq %rbx,%r13 + xorq %rax,%rax + xorq %rbx,%rbx + shrq $1,%r8 + testq %r14,%rbp + cmovnzq %r12,%rax + cmovnzq %r13,%rbx + addq %r12,%r12 + addq %r13,%r13 + subq %rax,%rdx + subq %rbx,%rcx + subl $1,%r15d + jnz .Loop_62_256 + + .byte 0xf3,0xc3 +.cfi_endproc +.size __inner_loop_62_256,.-__inner_loop_62_256 + +.section .note.GNU-stack,"",@progbits +.section .note.gnu.property,"a",@note + .long 4,2f-1f,5 + .byte 0x47,0x4E,0x55,0 +1: .long 0xc0000002,4,3 +.align 8 +2: diff --git a/crypto/blst_src/build/elf/ct_inverse_mod_384-armv8.S b/crypto/blst_src/build/elf/ct_inverse_mod_384-armv8.S new file mode 100644 index 00000000000..99bb9def767 --- /dev/null +++ b/crypto/blst_src/build/elf/ct_inverse_mod_384-armv8.S @@ -0,0 +1,718 @@ +.text + +.globl ct_inverse_mod_383 +.hidden ct_inverse_mod_383 +.type ct_inverse_mod_383, %function +.align 5 +ct_inverse_mod_383: + .inst 0xd503233f + stp x29, x30, [sp,#-128]! + add x29, sp, #0 + stp x19, x20, [sp,#16] + stp x21, x22, [sp,#32] + stp x23, x24, [sp,#48] + stp x25, x26, [sp,#64] + stp x27, x28, [sp,#80] + sub sp, sp, #1040 + + ldp x22, x4, [x1,#8*0] + ldp x5, x6, [x1,#8*2] + ldp x7, x8, [x1,#8*4] + + add x1, sp, #16+511 // find closest 512-byte-aligned spot + and x1, x1, #-512 // in the frame... + stp x0, x3, [sp] + + ldp x9, x10, [x2,#8*0] + ldp x11, x12, [x2,#8*2] + ldp x13, x14, [x2,#8*4] + + stp x22, x4, [x1,#8*0] // copy input to |a| + stp x5, x6, [x1,#8*2] + stp x7, x8, [x1,#8*4] + stp x9, x10, [x1,#8*6] // copy modulus to |b| + stp x11, x12, [x1,#8*8] + stp x13, x14, [x1,#8*10] + + ////////////////////////////////////////// first iteration + mov x2, #62 + bl .Lab_approximation_62_loaded + + eor x0, x1, #256 // pointer to dst |a|b|u|v| + bl __smul_383_n_shift_by_62 + str x15,[x0,#8*12] // initialize |u| with |f0| + + mov x15, x17 // |f1| + mov x16, x19 // |g1| + add x0, x0, #8*6 // pointer to dst |b| + bl __smul_383_n_shift_by_62 + str x15, [x0,#8*12] // initialize |v| with |f1| + + ////////////////////////////////////////// second iteration + eor x1, x1, #256 // flip-flop src |a|b|u|v| + mov x2, #62 + bl __ab_approximation_62 + + eor x0, x1, #256 // pointer to dst |a|b|u|v| + bl __smul_383_n_shift_by_62 + mov x20, x15 // corrected |f0| + mov x21, x16 // corrected |g0| + + mov x15, x17 // |f1| + mov x16, x19 // |g1| + add x0, x0, #8*6 // pointer to destination |b| + bl __smul_383_n_shift_by_62 + + ldr x7, [x1,#8*12] // |u| + ldr x8, [x1,#8*18] // |v| + mul x3, x20, x7 // |u|*|f0| + smulh x4, x20, x7 + mul x5, x21, x8 // |v|*|g0| + smulh x6, x21, x8 + adds x3, x3, x5 + adc x4, x4, x6 + stp x3, x4, [x0,#8*6] + asr x5, x4, #63 // sign extension + stp x5, x5, [x0,#8*8] + stp x5, x5, [x0,#8*10] + + mul x3, x15, x7 // |u|*|f1| + smulh x4, x15, x7 + mul x5, x16, x8 // |v|*|g1| + smulh x6, x16, x8 + adds x3, x3, x5 + adc x4, x4, x6 + stp x3, x4, [x0,#8*12] + asr x5, x4, #63 // sign extension + stp x5, x5, [x0,#8*14] + stp x5, x5, [x0,#8*16] + eor x1, x1, #256 // flip-flop src |a|b|u|v| + mov x2, #62 + bl __ab_approximation_62 + + eor x0, x1, #256 // pointer to dst |a|b|u|v| + bl __smul_383_n_shift_by_62 + mov x20, x15 // corrected |f0| + mov x21, x16 // corrected |g0| + + mov x15, x17 // |f1| + mov x16, x19 // |g1| + add x0, x0, #8*6 // pointer to destination |b| + bl __smul_383_n_shift_by_62 + + add x0, x0, #8*6 // pointer to destination |u| + bl __smul_383x63 + + mov x20, x15 // corrected |f1| + mov x21, x16 // corrected |g1| + add x0, x0, #8*6 // pointer to destination |v| + bl __smul_383x63 + eor x1, x1, #256 // flip-flop src |a|b|u|v| + mov x2, #62 + bl __ab_approximation_62 + + eor x0, x1, #256 // pointer to dst |a|b|u|v| + bl __smul_383_n_shift_by_62 + mov x20, x15 // corrected |f0| + mov x21, x16 // corrected |g0| + + mov x15, x17 // |f1| + mov x16, x19 // |g1| + add x0, x0, #8*6 // pointer to destination |b| + bl __smul_383_n_shift_by_62 + + add x0, x0, #8*6 // pointer to destination |u| + bl __smul_383x63 + + mov x20, x15 // corrected |f1| + mov x21, x16 // corrected |g1| + add x0, x0, #8*6 // pointer to destination |v| + bl __smul_383x63 + eor x1, x1, #256 // flip-flop src |a|b|u|v| + mov x2, #62 + bl __ab_approximation_62 + + eor x0, x1, #256 // pointer to dst |a|b|u|v| + bl __smul_383_n_shift_by_62 + mov x20, x15 // corrected |f0| + mov x21, x16 // corrected |g0| + + mov x15, x17 // |f1| + mov x16, x19 // |g1| + add x0, x0, #8*6 // pointer to destination |b| + bl __smul_383_n_shift_by_62 + + add x0, x0, #8*6 // pointer to destination |u| + bl __smul_383x63 + + mov x20, x15 // corrected |f1| + mov x21, x16 // corrected |g1| + add x0, x0, #8*6 // pointer to destination |v| + bl __smul_383x63 + eor x1, x1, #256 // flip-flop src |a|b|u|v| + mov x2, #62 + bl __ab_approximation_62 + + eor x0, x1, #256 // pointer to dst |a|b|u|v| + bl __smul_383_n_shift_by_62 + mov x20, x15 // corrected |f0| + mov x21, x16 // corrected |g0| + + mov x15, x17 // |f1| + mov x16, x19 // |g1| + add x0, x0, #8*6 // pointer to destination |b| + bl __smul_383_n_shift_by_62 + + add x0, x0, #8*6 // pointer to destination |u| + bl __smul_383x63 + + mov x20, x15 // corrected |f1| + mov x21, x16 // corrected |g1| + add x0, x0, #8*6 // pointer to destination |v| + bl __smul_383x63 + asr x27, x27, #63 // sign extension + stp x27, x27, [x0,#8*6] + stp x27, x27, [x0,#8*8] + stp x27, x27, [x0,#8*10] + eor x1, x1, #256 // flip-flop src |a|b|u|v| + mov x2, #62 + bl __ab_approximation_62 + + eor x0, x1, #256 // pointer to dst |a|b|u|v| + bl __smul_383_n_shift_by_62 + mov x20, x15 // corrected |f0| + mov x21, x16 // corrected |g0| + + mov x15, x17 // |f1| + mov x16, x19 // |g1| + add x0, x0, #8*6 // pointer to destination |b| + bl __smul_383_n_shift_by_62 + + add x0, x0, #8*6 // pointer to destination |u| + bl __smul_383x63 + + mov x20, x15 // corrected |f1| + mov x21, x16 // corrected |g1| + add x0, x0, #8*6 // pointer to destination |v| + bl __smul_383x63 + bl __smul_767x63_tail + eor x1, x1, #256 // flip-flop src |a|b|u|v| + mov x2, #62 + bl __ab_approximation_62 + + eor x0, x1, #256 // pointer to dst |a|b|u|v| + bl __smul_383_n_shift_by_62 + mov x20, x15 // corrected |f0| + mov x21, x16 // corrected |g0| + + mov x15, x17 // |f1| + mov x16, x19 // |g1| + add x0, x0, #8*6 // pointer to destination |b| + bl __smul_383_n_shift_by_62 + + add x0, x0, #8*6 // pointer to destination |u| + bl __smul_383x63 + + mov x20, x15 // corrected |f1| + mov x21, x16 // corrected |g1| + add x0, x0, #8*6 // pointer to destination |v| + bl __smul_383x63 + bl __smul_767x63_tail + eor x1, x1, #256 // flip-flop src |a|b|u|v| + mov x2, #62 + bl __ab_approximation_62 + + eor x0, x1, #256 // pointer to dst |a|b|u|v| + bl __smul_383_n_shift_by_62 + mov x20, x15 // corrected |f0| + mov x21, x16 // corrected |g0| + + mov x15, x17 // |f1| + mov x16, x19 // |g1| + add x0, x0, #8*6 // pointer to destination |b| + bl __smul_383_n_shift_by_62 + + add x0, x0, #8*6 // pointer to destination |u| + bl __smul_383x63 + + mov x20, x15 // corrected |f1| + mov x21, x16 // corrected |g1| + add x0, x0, #8*6 // pointer to destination |v| + bl __smul_383x63 + bl __smul_767x63_tail + eor x1, x1, #256 // flip-flop src |a|b|u|v| + mov x2, #62 + bl __ab_approximation_62 + + eor x0, x1, #256 // pointer to dst |a|b|u|v| + bl __smul_383_n_shift_by_62 + mov x20, x15 // corrected |f0| + mov x21, x16 // corrected |g0| + + mov x15, x17 // |f1| + mov x16, x19 // |g1| + add x0, x0, #8*6 // pointer to destination |b| + bl __smul_383_n_shift_by_62 + + add x0, x0, #8*6 // pointer to destination |u| + bl __smul_383x63 + + mov x20, x15 // corrected |f1| + mov x21, x16 // corrected |g1| + add x0, x0, #8*6 // pointer to destination |v| + bl __smul_383x63 + bl __smul_767x63_tail + eor x1, x1, #256 // flip-flop src |a|b|u|v| + mov x2, #62 + bl __ab_approximation_62 + + eor x0, x1, #256 // pointer to dst |a|b|u|v| + bl __smul_383_n_shift_by_62 + mov x20, x15 // corrected |f0| + mov x21, x16 // corrected |g0| + + mov x15, x17 // |f1| + mov x16, x19 // |g1| + add x0, x0, #8*6 // pointer to destination |b| + bl __smul_383_n_shift_by_62 + + add x0, x0, #8*6 // pointer to destination |u| + bl __smul_383x63 + + mov x20, x15 // corrected |f1| + mov x21, x16 // corrected |g1| + add x0, x0, #8*6 // pointer to destination |v| + bl __smul_383x63 + bl __smul_767x63_tail + ////////////////////////////////////////// iteration before last + eor x1, x1, #256 // flip-flop src |a|b|u|v| + mov x2, #62 + //bl __ab_approximation_62 // |a| and |b| are exact, + ldp x3, x8, [x1,#8*0] // just load + ldp x9, x14, [x1,#8*6] + bl __inner_loop_62 + + eor x0, x1, #256 // pointer to dst |a|b|u|v| + str x3, [x0,#8*0] + str x9, [x0,#8*6] + + mov x20, x15 // exact |f0| + mov x21, x16 // exact |g0| + mov x15, x17 + mov x16, x19 + add x0, x0, #8*12 // pointer to dst |u| + bl __smul_383x63 + + mov x20, x15 // exact |f1| + mov x21, x16 // exact |g1| + add x0, x0, #8*6 // pointer to dst |v| + bl __smul_383x63 + bl __smul_767x63_tail + + ////////////////////////////////////////// last iteration + eor x1, x1, #256 // flip-flop src |a|b|u|v| + mov x2, #22 // 766 % 62 + //bl __ab_approximation_62 // |a| and |b| are exact, + ldr x3, [x1,#8*0] // just load + eor x8, x8, x8 + ldr x9, [x1,#8*6] + eor x14, x14, x14 + bl __inner_loop_62 + + mov x20, x17 + mov x21, x19 + ldp x0, x15, [sp] // original out_ptr and n_ptr + bl __smul_383x63 + bl __smul_767x63_tail + ldr x30, [x29,#8] + + asr x22, x8, #63 // sign as mask + ldp x9, x10, [x15,#8*0] + ldp x11, x12, [x15,#8*2] + ldp x13, x14, [x15,#8*4] + + and x9, x9, x22 // add mod<<384 conditionally + and x10, x10, x22 + adds x3, x3, x9 + and x11, x11, x22 + adcs x4, x4, x10 + and x12, x12, x22 + adcs x5, x5, x11 + and x13, x13, x22 + adcs x6, x6, x12 + and x14, x14, x22 + stp x3, x4, [x0,#8*6] + adcs x7, x7, x13 + stp x5, x6, [x0,#8*8] + adc x8, x8, x14 + stp x7, x8, [x0,#8*10] + + add sp, sp, #1040 + ldp x19, x20, [x29,#16] + ldp x21, x22, [x29,#32] + ldp x23, x24, [x29,#48] + ldp x25, x26, [x29,#64] + ldp x27, x28, [x29,#80] + ldr x29, [sp],#128 + .inst 0xd50323bf + ret +.size ct_inverse_mod_383,.-ct_inverse_mod_383 + +//////////////////////////////////////////////////////////////////////// +// see corresponding commentary in ctx_inverse_mod_384-x86_64... +.type __smul_383x63, %function +.align 5 +__smul_383x63: + ldp x3, x4, [x1,#8*0+96] // load |u| (or |v|) + asr x17, x20, #63 // |f_|'s sign as mask (or |g_|'s) + ldp x5, x6, [x1,#8*2+96] + eor x20, x20, x17 // conditionally negate |f_| (or |g_|) + ldp x7, x8, [x1,#8*4+96] + + eor x3, x3, x17 // conditionally negate |u| (or |v|) + sub x20, x20, x17 + eor x4, x4, x17 + adds x3, x3, x17, lsr#63 + eor x5, x5, x17 + adcs x4, x4, xzr + eor x6, x6, x17 + adcs x5, x5, xzr + eor x7, x7, x17 + adcs x6, x6, xzr + umulh x22, x3, x20 + eor x8, x8, x17 + umulh x23, x4, x20 + adcs x7, x7, xzr + umulh x24, x5, x20 + adcs x8, x8, xzr + umulh x25, x6, x20 + umulh x26, x7, x20 + mul x3, x3, x20 + mul x4, x4, x20 + mul x5, x5, x20 + adds x4, x4, x22 + mul x6, x6, x20 + adcs x5, x5, x23 + mul x7, x7, x20 + adcs x6, x6, x24 + mul x27,x8, x20 + adcs x7, x7, x25 + adcs x27,x27,x26 + adc x2, xzr, xzr + ldp x9, x10, [x1,#8*0+144] // load |u| (or |v|) + asr x17, x21, #63 // |f_|'s sign as mask (or |g_|'s) + ldp x11, x12, [x1,#8*2+144] + eor x21, x21, x17 // conditionally negate |f_| (or |g_|) + ldp x13, x14, [x1,#8*4+144] + + eor x9, x9, x17 // conditionally negate |u| (or |v|) + sub x21, x21, x17 + eor x10, x10, x17 + adds x9, x9, x17, lsr#63 + eor x11, x11, x17 + adcs x10, x10, xzr + eor x12, x12, x17 + adcs x11, x11, xzr + eor x13, x13, x17 + adcs x12, x12, xzr + umulh x22, x9, x21 + eor x14, x14, x17 + umulh x23, x10, x21 + adcs x13, x13, xzr + umulh x24, x11, x21 + adcs x14, x14, xzr + umulh x25, x12, x21 + adc x19, xzr, xzr // used in __smul_767x63_tail + umulh x26, x13, x21 + mul x9, x9, x21 + mul x10, x10, x21 + mul x11, x11, x21 + adds x10, x10, x22 + mul x12, x12, x21 + adcs x11, x11, x23 + mul x13, x13, x21 + adcs x12, x12, x24 + mul x28,x14, x21 + adcs x13, x13, x25 + adcs x28,x28,x26 + adc x2, x2, xzr + + adds x3, x3, x9 + adcs x4, x4, x10 + adcs x5, x5, x11 + adcs x6, x6, x12 + stp x3, x4, [x0,#8*0] + adcs x7, x7, x13 + stp x5, x6, [x0,#8*2] + adcs x27, x27, x28 + stp x7, x27, [x0,#8*4] + adc x28, x2, xzr // used in __smul_767x63_tail + + ret +.size __smul_383x63,.-__smul_383x63 + +.type __smul_767x63_tail, %function +.align 5 +__smul_767x63_tail: + smulh x27, x8, x20 + ldp x3, x4, [x1,#8*24] // load rest of |v| + umulh x14,x14, x21 + ldp x5, x6, [x1,#8*26] + ldp x7, x8, [x1,#8*28] + + eor x3, x3, x17 // conditionally negate rest of |v| + eor x4, x4, x17 + eor x5, x5, x17 + adds x3, x3, x19 + eor x6, x6, x17 + adcs x4, x4, xzr + eor x7, x7, x17 + adcs x5, x5, xzr + eor x8, x8, x17 + adcs x6, x6, xzr + umulh x22, x3, x21 + adcs x7, x7, xzr + umulh x23, x4, x21 + adc x8, x8, xzr + + umulh x24, x5, x21 + add x14, x14, x28 + umulh x25, x6, x21 + asr x28, x27, #63 + umulh x26, x7, x21 + mul x3, x3, x21 + mul x4, x4, x21 + mul x5, x5, x21 + adds x3, x3, x14 + mul x6, x6, x21 + adcs x4, x4, x22 + mul x7, x7, x21 + adcs x5, x5, x23 + mul x8, x8, x21 + adcs x6, x6, x24 + adcs x7, x7, x25 + adc x8, x8, x26 + + adds x3, x3, x27 + adcs x4, x4, x28 + adcs x5, x5, x28 + adcs x6, x6, x28 + stp x3, x4, [x0,#8*6] + adcs x7, x7, x28 + stp x5, x6, [x0,#8*8] + adc x8, x8, x28 + stp x7, x8, [x0,#8*10] + + ret +.size __smul_767x63_tail,.-__smul_767x63_tail + +.type __smul_383_n_shift_by_62, %function +.align 5 +__smul_383_n_shift_by_62: + ldp x3, x4, [x1,#8*0+0] // load |a| (or |b|) + asr x28, x15, #63 // |f0|'s sign as mask (or |g0|'s) + ldp x5, x6, [x1,#8*2+0] + eor x2, x15, x28 // conditionally negate |f0| (or |g0|) + ldp x7, x8, [x1,#8*4+0] + + eor x3, x3, x28 // conditionally negate |a| (or |b|) + sub x2, x2, x28 + eor x4, x4, x28 + adds x3, x3, x28, lsr#63 + eor x5, x5, x28 + adcs x4, x4, xzr + eor x6, x6, x28 + adcs x5, x5, xzr + eor x7, x7, x28 + umulh x22, x3, x2 + adcs x6, x6, xzr + umulh x23, x4, x2 + eor x8, x8, x28 + umulh x24, x5, x2 + adcs x7, x7, xzr + umulh x25, x6, x2 + adc x8, x8, xzr + + umulh x26, x7, x2 + smulh x27, x8, x2 + mul x3, x3, x2 + mul x4, x4, x2 + mul x5, x5, x2 + adds x4, x4, x22 + mul x6, x6, x2 + adcs x5, x5, x23 + mul x7, x7, x2 + adcs x6, x6, x24 + mul x8, x8, x2 + adcs x7, x7, x25 + adcs x8, x8 ,x26 + adc x27, x27, xzr + ldp x9, x10, [x1,#8*0+48] // load |a| (or |b|) + asr x28, x16, #63 // |f0|'s sign as mask (or |g0|'s) + ldp x11, x12, [x1,#8*2+48] + eor x2, x16, x28 // conditionally negate |f0| (or |g0|) + ldp x13, x14, [x1,#8*4+48] + + eor x9, x9, x28 // conditionally negate |a| (or |b|) + sub x2, x2, x28 + eor x10, x10, x28 + adds x9, x9, x28, lsr#63 + eor x11, x11, x28 + adcs x10, x10, xzr + eor x12, x12, x28 + adcs x11, x11, xzr + eor x13, x13, x28 + umulh x22, x9, x2 + adcs x12, x12, xzr + umulh x23, x10, x2 + eor x14, x14, x28 + umulh x24, x11, x2 + adcs x13, x13, xzr + umulh x25, x12, x2 + adc x14, x14, xzr + + umulh x26, x13, x2 + smulh x28, x14, x2 + mul x9, x9, x2 + mul x10, x10, x2 + mul x11, x11, x2 + adds x10, x10, x22 + mul x12, x12, x2 + adcs x11, x11, x23 + mul x13, x13, x2 + adcs x12, x12, x24 + mul x14, x14, x2 + adcs x13, x13, x25 + adcs x14, x14 ,x26 + adc x28, x28, xzr + adds x3, x3, x9 + adcs x4, x4, x10 + adcs x5, x5, x11 + adcs x6, x6, x12 + adcs x7, x7, x13 + adcs x8, x8, x14 + adc x9, x27, x28 + + extr x3, x4, x3, #62 + extr x4, x5, x4, #62 + extr x5, x6, x5, #62 + asr x28, x9, #63 + extr x6, x7, x6, #62 + extr x7, x8, x7, #62 + extr x8, x9, x8, #62 + + eor x3, x3, x28 + eor x4, x4, x28 + adds x3, x3, x28, lsr#63 + eor x5, x5, x28 + adcs x4, x4, xzr + eor x6, x6, x28 + adcs x5, x5, xzr + eor x7, x7, x28 + adcs x6, x6, xzr + eor x8, x8, x28 + stp x3, x4, [x0,#8*0] + adcs x7, x7, xzr + stp x5, x6, [x0,#8*2] + adc x8, x8, xzr + stp x7, x8, [x0,#8*4] + + eor x15, x15, x28 + eor x16, x16, x28 + sub x15, x15, x28 + sub x16, x16, x28 + + ret +.size __smul_383_n_shift_by_62,.-__smul_383_n_shift_by_62 +.type __ab_approximation_62, %function +.align 4 +__ab_approximation_62: + ldp x7, x8, [x1,#8*4] + ldp x13, x14, [x1,#8*10] + ldp x5, x6, [x1,#8*2] + ldp x11, x12, [x1,#8*8] + +.Lab_approximation_62_loaded: + orr x22, x8, x14 // check top-most limbs, ... + cmp x22, #0 + csel x8, x8, x7, ne + csel x14, x14, x13, ne + csel x7, x7, x6, ne + orr x22, x8, x14 // ... ones before top-most, ... + csel x13, x13, x12, ne + + ldp x3, x4, [x1,#8*0] + ldp x9, x10, [x1,#8*6] + + cmp x22, #0 + csel x8, x8, x7, ne + csel x14, x14, x13, ne + csel x7, x7, x5, ne + orr x22, x8, x14 // ... and ones before that ... + csel x13, x13, x11, ne + + cmp x22, #0 + csel x8, x8, x7, ne + csel x14, x14, x13, ne + csel x7, x7, x4, ne + orr x22, x8, x14 + csel x13, x13, x10, ne + + clz x22, x22 + cmp x22, #64 + csel x22, x22, xzr, ne + csel x8, x8, x7, ne + csel x14, x14, x13, ne + neg x23, x22 + + lslv x8, x8, x22 // align high limbs to the left + lslv x14, x14, x22 + lsrv x7, x7, x23 + lsrv x13, x13, x23 + and x7, x7, x23, asr#6 + and x13, x13, x23, asr#6 + orr x8, x8, x7 + orr x14, x14, x13 + + b __inner_loop_62 + ret +.size __ab_approximation_62,.-__ab_approximation_62 +.type __inner_loop_62, %function +.align 4 +__inner_loop_62: + mov x15, #1 // |f0|=1 + mov x16, #0 // |g0|=0 + mov x17, #0 // |f1|=0 + mov x19, #1 // |g1|=1 + +.Loop_62: + sbfx x28, x3, #0, #1 // if |a_| is odd, then we'll be subtracting + sub x2, x2, #1 + subs x24, x9, x3 // |b_|-|a_| + and x22, x9, x28 + sbc x25, x14, x8 + and x23, x14, x28 + subs x26, x3, x22 // |a_|-|b_| (or |a_|-0 if |a_| was even) + mov x22, x15 + sbcs x27, x8, x23 + mov x23, x16 + csel x9, x9, x3, hs // |b_| = |a_| + csel x14, x14, x8, hs + csel x3, x26, x24, hs // borrow means |a_|<|b_|, replace with |b_|-|a_| + csel x8, x27, x25, hs + csel x15, x15, x17, hs // exchange |f0| and |f1| + csel x17, x17, x22, hs + csel x16, x16, x19, hs // exchange |g0| and |g1| + csel x19, x19, x23, hs + extr x3, x8, x3, #1 + lsr x8, x8, #1 + and x22, x17, x28 + and x23, x19, x28 + add x17, x17, x17 // |f1|<<=1 + add x19, x19, x19 // |g1|<<=1 + sub x15, x15, x22 // |f0|-=|f1| (or |f0-=0| if |a_| was even) + sub x16, x16, x23 // |g0|-=|g1| (or |g0-=0| ...) + cbnz x2, .Loop_62 + + ret +.size __inner_loop_62,.-__inner_loop_62 diff --git a/crypto/blst_src/build/elf/ct_is_square_mod_384-armv8.S b/crypto/blst_src/build/elf/ct_is_square_mod_384-armv8.S new file mode 100644 index 00000000000..07dd99a8af3 --- /dev/null +++ b/crypto/blst_src/build/elf/ct_is_square_mod_384-armv8.S @@ -0,0 +1,325 @@ +.text + +.globl ct_is_square_mod_384 +.hidden ct_is_square_mod_384 +.type ct_is_square_mod_384, %function +.align 5 +ct_is_square_mod_384: + .inst 0xd503233f + stp x29, x30, [sp,#-128]! + add x29, sp, #0 + stp x19, x20, [sp,#16] + stp x21, x22, [sp,#32] + stp x23, x24, [sp,#48] + stp x25, x26, [sp,#64] + stp x27, x28, [sp,#80] + sub sp, sp, #512 + + ldp x3, x4, [x0,#8*0] // load input + ldp x5, x6, [x0,#8*2] + ldp x7, x8, [x0,#8*4] + + add x0, sp, #255 // find closest 256-byte-aligned spot + and x0, x0, #-256 // in the frame... + + ldp x9, x10, [x1,#8*0] // load modulus + ldp x11, x12, [x1,#8*2] + ldp x13, x14, [x1,#8*4] + + stp x3, x4, [x0,#8*6] // copy input to |a| + stp x5, x6, [x0,#8*8] + stp x7, x8, [x0,#8*10] + stp x9, x10, [x0,#8*0] // copy modulus to |b| + stp x11, x12, [x0,#8*2] + stp x13, x14, [x0,#8*4] + + eor x2, x2, x2 // init the .Legendre symbol + mov x15, #24 // 24 is 768/30-1 + b .Loop_is_square + +.align 4 +.Loop_is_square: + bl __ab_approximation_30 + sub x15, x15, #1 + + eor x1, x0, #128 // pointer to dst |b| + bl __smul_384_n_shift_by_30 + + mov x19, x16 // |f0| + mov x20, x17 // |g0| + add x1, x1, #8*6 // pointer to dst |a| + bl __smul_384_n_shift_by_30 + + ldp x9, x10, [x1,#-8*6] + eor x0, x0, #128 // flip-flop src |a|b| + and x27, x27, x9 // if |a| was negative, + add x2, x2, x27, lsr#1 // adjust |L| + + cbnz x15, .Loop_is_square + + ////////////////////////////////////////// last iteration + //bl __ab_approximation_30 // |a| and |b| are exact, + //ldr x8, [x0,#8*6] // and loaded + //ldr x14, [x0,#8*0] + mov x15, #48 // 48 is 768%30 + 30 + bl __inner_loop_48 + ldr x30, [x29,#8] + + and x0, x2, #1 + eor x0, x0, #1 + + add sp, sp, #512 + ldp x19, x20, [x29,#16] + ldp x21, x22, [x29,#32] + ldp x23, x24, [x29,#48] + ldp x25, x26, [x29,#64] + ldp x27, x28, [x29,#80] + ldr x29, [sp],#128 + .inst 0xd50323bf + ret +.size ct_is_square_mod_384,.-ct_is_square_mod_384 + +.type __smul_384_n_shift_by_30, %function +.align 5 +__smul_384_n_shift_by_30: + ldp x3, x4, [x0,#8*0+0] // load |b| (or |a|) + asr x27, x20, #63 // |g1|'s sign as mask (or |f1|'s) + ldp x5, x6, [x0,#8*2+0] + eor x20, x20, x27 // conditionally negate |g1| (or |f1|) + ldp x7, x8, [x0,#8*4+0] + + eor x3, x3, x27 // conditionally negate |b| (or |a|) + sub x20, x20, x27 + eor x4, x4, x27 + adds x3, x3, x27, lsr#63 + eor x5, x5, x27 + adcs x4, x4, xzr + eor x6, x6, x27 + adcs x5, x5, xzr + eor x7, x7, x27 + umulh x21, x3, x20 + adcs x6, x6, xzr + umulh x22, x4, x20 + eor x8, x8, x27 + umulh x23, x5, x20 + adcs x7, x7, xzr + umulh x24, x6, x20 + adc x8, x8, xzr + + umulh x25, x7, x20 + and x28, x20, x27 + umulh x26, x8, x20 + neg x28, x28 + mul x3, x3, x20 + mul x4, x4, x20 + mul x5, x5, x20 + adds x4, x4, x21 + mul x6, x6, x20 + adcs x5, x5, x22 + mul x7, x7, x20 + adcs x6, x6, x23 + mul x8, x8, x20 + adcs x7, x7, x24 + adcs x8, x8 ,x25 + adc x26, x26, x28 + ldp x9, x10, [x0,#8*0+48] // load |b| (or |a|) + asr x27, x19, #63 // |g1|'s sign as mask (or |f1|'s) + ldp x11, x12, [x0,#8*2+48] + eor x19, x19, x27 // conditionally negate |g1| (or |f1|) + ldp x13, x14, [x0,#8*4+48] + + eor x9, x9, x27 // conditionally negate |b| (or |a|) + sub x19, x19, x27 + eor x10, x10, x27 + adds x9, x9, x27, lsr#63 + eor x11, x11, x27 + adcs x10, x10, xzr + eor x12, x12, x27 + adcs x11, x11, xzr + eor x13, x13, x27 + umulh x21, x9, x19 + adcs x12, x12, xzr + umulh x22, x10, x19 + eor x14, x14, x27 + umulh x23, x11, x19 + adcs x13, x13, xzr + umulh x24, x12, x19 + adc x14, x14, xzr + + umulh x25, x13, x19 + and x28, x19, x27 + umulh x27, x14, x19 + neg x28, x28 + mul x9, x9, x19 + mul x10, x10, x19 + mul x11, x11, x19 + adds x10, x10, x21 + mul x12, x12, x19 + adcs x11, x11, x22 + mul x13, x13, x19 + adcs x12, x12, x23 + mul x14, x14, x19 + adcs x13, x13, x24 + adcs x14, x14 ,x25 + adc x27, x27, x28 + adds x3, x3, x9 + adcs x4, x4, x10 + adcs x5, x5, x11 + adcs x6, x6, x12 + adcs x7, x7, x13 + adcs x8, x8, x14 + adc x9, x26, x27 + + extr x3, x4, x3, #30 + extr x4, x5, x4, #30 + extr x5, x6, x5, #30 + asr x27, x9, #63 + extr x6, x7, x6, #30 + extr x7, x8, x7, #30 + extr x8, x9, x8, #30 + + eor x3, x3, x27 + eor x4, x4, x27 + adds x3, x3, x27, lsr#63 + eor x5, x5, x27 + adcs x4, x4, xzr + eor x6, x6, x27 + adcs x5, x5, xzr + eor x7, x7, x27 + adcs x6, x6, xzr + eor x8, x8, x27 + stp x3, x4, [x1,#8*0] + adcs x7, x7, xzr + stp x5, x6, [x1,#8*2] + adc x8, x8, xzr + stp x7, x8, [x1,#8*4] + + ret +.size __smul_384_n_shift_by_30,.-__smul_384_n_shift_by_30 +.type __ab_approximation_30, %function +.align 4 +__ab_approximation_30: + ldp x13, x14, [x0,#8*4] // |a| is still in registers + ldp x11, x12, [x0,#8*2] + + orr x21, x8, x14 // check top-most limbs, ... + cmp x21, #0 + csel x8, x8, x7, ne + csel x14, x14, x13, ne + csel x7, x7, x6, ne + orr x21, x8, x14 // ... ones before top-most, ... + csel x13, x13, x12, ne + + cmp x21, #0 + csel x8, x8, x7, ne + csel x14, x14, x13, ne + csel x7, x7, x5, ne + orr x21, x8, x14 // ... and ones before that ... + csel x13, x13, x11, ne + + cmp x21, #0 + csel x8, x8, x7, ne + csel x14, x14, x13, ne + csel x7, x7, x4, ne + orr x21, x8, x14 // and one more, ... + csel x13, x13, x10, ne + + cmp x21, #0 + csel x8, x8, x7, ne + csel x14, x14, x13, ne + csel x7, x7, x3, ne + orr x21, x8, x14 + csel x13, x13, x9, ne + + clz x21, x21 + cmp x21, #64 + csel x21, x21, xzr, ne + csel x8, x8, x7, ne + csel x14, x14, x13, ne + neg x22, x21 + + lslv x8, x8, x21 // align high limbs to the left + lslv x14, x14, x21 + lsrv x7, x7, x22 + lsrv x13, x13, x22 + and x7, x7, x22, asr#6 + and x13, x13, x22, asr#6 + orr x8, x8, x7 + orr x14, x14, x13 + + bfxil x8, x3, #0, #32 + bfxil x14, x9, #0, #32 + + b __inner_loop_30 + ret +.size __ab_approximation_30,.-__ab_approximation_30 + +.type __inner_loop_30, %function +.align 4 +__inner_loop_30: + mov x28, #30 + mov x17, #0x7FFFFFFF80000000 // |f0|=1, |g0|=0 + mov x20, #0x800000007FFFFFFF // |f1|=0, |g1|=1 + mov x27,#0x7FFFFFFF7FFFFFFF + +.Loop_30: + sbfx x24, x8, #0, #1 // if |a_| is odd, then we'll be subtracting + and x25, x8, x14 + sub x28, x28, #1 + and x21, x14, x24 + + sub x22, x14, x8 // |b_|-|a_| + subs x23, x8, x21 // |a_|-|b_| (or |a_|-0 if |a_| was even) + add x25, x2, x25, lsr#1 // L + (a_ & b_) >> 1 + mov x21, x20 + csel x14, x14, x8, hs // |b_| = |a_| + csel x8, x23, x22, hs // borrow means |a_|<|b_|, replace with |b_|-|a_| + csel x20, x20, x17, hs // exchange |fg0| and |fg1| + csel x17, x17, x21, hs + csel x2, x2, x25, hs + lsr x8, x8, #1 + and x21, x20, x24 + and x22, x27, x24 + add x23, x14, #2 + sub x17, x17, x21 // |f0|-=|f1| (or |f0-=0| if |a_| was even) + add x20, x20, x20 // |f1|<<=1 + add x2, x2, x23, lsr#2 // "negate" |L| if |b|%8 is 3 or 5 + add x17, x17, x22 + sub x20, x20, x27 + + cbnz x28, .Loop_30 + + mov x27, #0x7FFFFFFF + ubfx x16, x17, #0, #32 + ubfx x17, x17, #32, #32 + ubfx x19, x20, #0, #32 + ubfx x20, x20, #32, #32 + sub x16, x16, x27 // remove the bias + sub x17, x17, x27 + sub x19, x19, x27 + sub x20, x20, x27 + + ret +.size __inner_loop_30,.-__inner_loop_30 +.type __inner_loop_48, %function +.align 4 +__inner_loop_48: +.Loop_48: + sbfx x24, x3, #0, #1 // if |a_| is odd, then we'll be subtracting + and x25, x3, x9 + sub x15, x15, #1 + and x21, x9, x24 + sub x22, x9, x3 // |b_|-|a_| + subs x23, x3, x21 // |a_|-|b_| (or |a_|-0 if |a_| was even) + add x25, x2, x25, lsr#1 + csel x9, x9, x3, hs // |b_| = |a_| + csel x3, x23, x22, hs // borrow means |a_|<|b_|, replace with |b_|-|a_| + csel x2, x2, x25, hs + add x23, x9, #2 + lsr x3, x3, #1 + add x2, x2, x23, lsr#2 // "negate" |L| if |b|%8 is 3 or 5 + + cbnz x15, .Loop_48 + + ret +.size __inner_loop_48,.-__inner_loop_48 diff --git a/crypto/blst_src/build/elf/ct_is_square_mod_384-x86_64.s b/crypto/blst_src/build/elf/ct_is_square_mod_384-x86_64.s new file mode 100644 index 00000000000..bf610fa7440 --- /dev/null +++ b/crypto/blst_src/build/elf/ct_is_square_mod_384-x86_64.s @@ -0,0 +1,480 @@ +.text + +.globl ct_is_square_mod_384 +.hidden ct_is_square_mod_384 +.type ct_is_square_mod_384,@function +.align 32 +ct_is_square_mod_384: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + subq $536,%rsp +.cfi_adjust_cfa_offset 536 + + + leaq 24+255(%rsp),%rax + andq $-256,%rax + + movq 0(%rdi),%r8 + movq 8(%rdi),%r9 + movq 16(%rdi),%r10 + movq 24(%rdi),%r11 + movq 32(%rdi),%r12 + movq 40(%rdi),%r13 + + movq 0(%rsi),%r14 + movq 8(%rsi),%r15 + movq 16(%rsi),%rbx + movq 24(%rsi),%rcx + movq 32(%rsi),%rdx + movq 40(%rsi),%rdi + movq %rax,%rsi + + movq %r8,0(%rax) + movq %r9,8(%rax) + movq %r10,16(%rax) + movq %r11,24(%rax) + movq %r12,32(%rax) + movq %r13,40(%rax) + + movq %r14,48(%rax) + movq %r15,56(%rax) + movq %rbx,64(%rax) + movq %rcx,72(%rax) + movq %rdx,80(%rax) + movq %rdi,88(%rax) + + xorq %rbp,%rbp + movl $24,%ecx + jmp .Loop_is_square + +.align 32 +.Loop_is_square: + movl %ecx,16(%rsp) + + call __ab_approximation_30 + movq %rax,0(%rsp) + movq %rbx,8(%rsp) + + movq $128+48,%rdi + xorq %rsi,%rdi + call __smulq_384_n_shift_by_30 + + movq 0(%rsp),%rdx + movq 8(%rsp),%rcx + leaq -48(%rdi),%rdi + call __smulq_384_n_shift_by_30 + + movl 16(%rsp),%ecx + xorq $128,%rsi + + andq 48(%rdi),%r14 + shrq $1,%r14 + addq %r14,%rbp + + subl $1,%ecx + jnz .Loop_is_square + + + + + movq 48(%rsi),%r9 + call __inner_loop_48 + + movq $1,%rax + andq %rbp,%rax + xorq $1,%rax + + leaq 536(%rsp),%r8 + movq 0(%r8),%r15 +.cfi_restore %r15 + movq 8(%r8),%r14 +.cfi_restore %r14 + movq 16(%r8),%r13 +.cfi_restore %r13 + movq 24(%r8),%r12 +.cfi_restore %r12 + movq 32(%r8),%rbx +.cfi_restore %rbx + movq 40(%r8),%rbp +.cfi_restore %rbp + leaq 48(%r8),%rsp +.cfi_adjust_cfa_offset -536-8*6 + + .byte 0xf3,0xc3 +.cfi_endproc +.size ct_is_square_mod_384,.-ct_is_square_mod_384 + +.type __smulq_384_n_shift_by_30,@function +.align 32 +__smulq_384_n_shift_by_30: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + + movq %rdx,%rbx + sarq $63,%rdx + xorq %rax,%rax + subq %rdx,%rax + + xorq %rdx,%rbx + addq %rax,%rbx + + xorq %rdx,%r8 + xorq %rdx,%r9 + xorq %rdx,%r10 + xorq %rdx,%r11 + xorq %rdx,%r12 + xorq %rdx,%r13 + addq %r8,%rax + adcq $0,%r9 + adcq $0,%r10 + adcq $0,%r11 + adcq $0,%r12 + adcq $0,%r13 + + movq %rdx,%r14 + andq %rbx,%r14 + mulq %rbx + movq %rax,%r8 + movq %r9,%rax + movq %rdx,%r9 + mulq %rbx + addq %rax,%r9 + movq %r10,%rax + adcq $0,%rdx + movq %rdx,%r10 + mulq %rbx + addq %rax,%r10 + movq %r11,%rax + adcq $0,%rdx + movq %rdx,%r11 + mulq %rbx + addq %rax,%r11 + movq %r12,%rax + adcq $0,%rdx + movq %rdx,%r12 + mulq %rbx + addq %rax,%r12 + movq %r13,%rax + adcq $0,%rdx + movq %rdx,%r13 + negq %r14 + mulq %rbx + addq %rax,%r13 + adcq %rdx,%r14 + leaq 48(%rsi),%rsi + movq %rcx,%rdx + + movq %r8,0(%rdi) + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + movq %r12,32(%rdi) + movq %r13,40(%rdi) + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + + movq %rdx,%rbx + sarq $63,%rdx + xorq %rax,%rax + subq %rdx,%rax + + xorq %rdx,%rbx + addq %rax,%rbx + + xorq %rdx,%r8 + xorq %rdx,%r9 + xorq %rdx,%r10 + xorq %rdx,%r11 + xorq %rdx,%r12 + xorq %rdx,%r13 + addq %r8,%rax + adcq $0,%r9 + adcq $0,%r10 + adcq $0,%r11 + adcq $0,%r12 + adcq $0,%r13 + + movq %rdx,%r15 + andq %rbx,%r15 + mulq %rbx + movq %rax,%r8 + movq %r9,%rax + movq %rdx,%r9 + mulq %rbx + addq %rax,%r9 + movq %r10,%rax + adcq $0,%rdx + movq %rdx,%r10 + mulq %rbx + addq %rax,%r10 + movq %r11,%rax + adcq $0,%rdx + movq %rdx,%r11 + mulq %rbx + addq %rax,%r11 + movq %r12,%rax + adcq $0,%rdx + movq %rdx,%r12 + mulq %rbx + addq %rax,%r12 + movq %r13,%rax + adcq $0,%rdx + movq %rdx,%r13 + negq %r15 + mulq %rbx + addq %rax,%r13 + adcq %rdx,%r15 + leaq -48(%rsi),%rsi + + addq 0(%rdi),%r8 + adcq 8(%rdi),%r9 + adcq 16(%rdi),%r10 + adcq 24(%rdi),%r11 + adcq 32(%rdi),%r12 + adcq 40(%rdi),%r13 + adcq %r15,%r14 + + shrdq $30,%r9,%r8 + shrdq $30,%r10,%r9 + shrdq $30,%r11,%r10 + shrdq $30,%r12,%r11 + shrdq $30,%r13,%r12 + shrdq $30,%r14,%r13 + + sarq $63,%r14 + xorq %rbx,%rbx + subq %r14,%rbx + + xorq %r14,%r8 + xorq %r14,%r9 + xorq %r14,%r10 + xorq %r14,%r11 + xorq %r14,%r12 + xorq %r14,%r13 + addq %rbx,%r8 + adcq $0,%r9 + adcq $0,%r10 + adcq $0,%r11 + adcq $0,%r12 + adcq $0,%r13 + + movq %r8,0(%rdi) + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + movq %r12,32(%rdi) + movq %r13,40(%rdi) + + .byte 0xf3,0xc3 +.cfi_endproc +.size __smulq_384_n_shift_by_30,.-__smulq_384_n_shift_by_30 +.type __ab_approximation_30,@function +.align 32 +__ab_approximation_30: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + movq 88(%rsi),%rbx + movq 80(%rsi),%r15 + movq 72(%rsi),%r14 + + movq %r13,%rax + orq %rbx,%rax + cmovzq %r12,%r13 + cmovzq %r15,%rbx + cmovzq %r11,%r12 + movq 64(%rsi),%r11 + cmovzq %r14,%r15 + + movq %r13,%rax + orq %rbx,%rax + cmovzq %r12,%r13 + cmovzq %r15,%rbx + cmovzq %r10,%r12 + movq 56(%rsi),%r10 + cmovzq %r11,%r15 + + movq %r13,%rax + orq %rbx,%rax + cmovzq %r12,%r13 + cmovzq %r15,%rbx + cmovzq %r9,%r12 + movq 48(%rsi),%r9 + cmovzq %r10,%r15 + + movq %r13,%rax + orq %rbx,%rax + cmovzq %r12,%r13 + cmovzq %r15,%rbx + cmovzq %r8,%r12 + cmovzq %r9,%r15 + + movq %r13,%rax + orq %rbx,%rax + bsrq %rax,%rcx + leaq 1(%rcx),%rcx + cmovzq %r8,%r13 + cmovzq %r9,%rbx + cmovzq %rax,%rcx + negq %rcx + + + shldq %cl,%r12,%r13 + shldq %cl,%r15,%rbx + + movq $0xFFFFFFFF00000000,%rax + movl %r8d,%r8d + movl %r9d,%r9d + andq %rax,%r13 + andq %rax,%rbx + orq %r13,%r8 + orq %rbx,%r9 + + jmp __inner_loop_30 + + .byte 0xf3,0xc3 +.cfi_endproc +.size __ab_approximation_30,.-__ab_approximation_30 +.type __inner_loop_30,@function +.align 32 +__inner_loop_30: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + movq $0x7FFFFFFF80000000,%rbx + movq $0x800000007FFFFFFF,%rcx + leaq -1(%rbx),%r15 + movl $30,%edi + +.Loop_30: + movq %r8,%rax + andq %r9,%rax + shrq $1,%rax + + cmpq %r9,%r8 + movq %r8,%r10 + movq %r9,%r11 + leaq (%rax,%rbp,1),%rax + movq %rbx,%r12 + movq %rcx,%r13 + movq %rbp,%r14 + cmovbq %r9,%r8 + cmovbq %r10,%r9 + cmovbq %rcx,%rbx + cmovbq %r12,%rcx + cmovbq %rax,%rbp + + subq %r9,%r8 + subq %rcx,%rbx + addq %r15,%rbx + + testq $1,%r10 + cmovzq %r10,%r8 + cmovzq %r11,%r9 + cmovzq %r12,%rbx + cmovzq %r13,%rcx + cmovzq %r14,%rbp + + leaq 2(%r9),%rax + shrq $1,%r8 + shrq $2,%rax + addq %rcx,%rcx + leaq (%rax,%rbp,1),%rbp + subq %r15,%rcx + + subl $1,%edi + jnz .Loop_30 + + shrq $32,%r15 + movl %ebx,%eax + shrq $32,%rbx + movl %ecx,%edx + shrq $32,%rcx + subq %r15,%rax + subq %r15,%rbx + subq %r15,%rdx + subq %r15,%rcx + + .byte 0xf3,0xc3 +.cfi_endproc +.size __inner_loop_30,.-__inner_loop_30 + +.type __inner_loop_48,@function +.align 32 +__inner_loop_48: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + movl $48,%edi + +.Loop_48: + movq %r8,%rax + andq %r9,%rax + shrq $1,%rax + + cmpq %r9,%r8 + movq %r8,%r10 + movq %r9,%r11 + leaq (%rax,%rbp,1),%rax + movq %rbp,%r12 + cmovbq %r9,%r8 + cmovbq %r10,%r9 + cmovbq %rax,%rbp + + subq %r9,%r8 + + testq $1,%r10 + cmovzq %r10,%r8 + cmovzq %r11,%r9 + cmovzq %r12,%rbp + + leaq 2(%r9),%rax + shrq $1,%r8 + shrq $2,%rax + addq %rax,%rbp + + subl $1,%edi + jnz .Loop_48 + + .byte 0xf3,0xc3 +.cfi_endproc +.size __inner_loop_48,.-__inner_loop_48 + +.section .note.GNU-stack,"",@progbits +.section .note.gnu.property,"a",@note + .long 4,2f-1f,5 + .byte 0x47,0x4E,0x55,0 +1: .long 0xc0000002,4,3 +.align 8 +2: diff --git a/crypto/blst_src/build/elf/ctq_inverse_mod_384-x86_64.s b/crypto/blst_src/build/elf/ctq_inverse_mod_384-x86_64.s new file mode 100644 index 00000000000..9cca518721f --- /dev/null +++ b/crypto/blst_src/build/elf/ctq_inverse_mod_384-x86_64.s @@ -0,0 +1,1201 @@ +.comm __blst_platform_cap,4 +.text + +.globl ct_inverse_mod_383 +.hidden ct_inverse_mod_383 +.type ct_inverse_mod_383,@function +.align 32 +ct_inverse_mod_383: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + +#ifdef __BLST_PORTABLE__ + testl $1,__blst_platform_cap(%rip) + jnz ct_inverse_mod_383$1 +#endif + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + subq $1112,%rsp +.cfi_adjust_cfa_offset 1112 + + + leaq 88+511(%rsp),%rax + andq $-512,%rax + movq %rdi,32(%rsp) + movq %rcx,40(%rsp) + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + + movq 0(%rdx),%r14 + movq 8(%rdx),%r15 + movq 16(%rdx),%rbx + movq 24(%rdx),%rbp + movq 32(%rdx),%rsi + movq 40(%rdx),%rdi + + movq %r8,0(%rax) + movq %r9,8(%rax) + movq %r10,16(%rax) + movq %r11,24(%rax) + movq %r12,32(%rax) + movq %r13,40(%rax) + + movq %r14,48(%rax) + movq %r15,56(%rax) + movq %rbx,64(%rax) + movq %rbp,72(%rax) + movq %rsi,80(%rax) + movq %rax,%rsi + movq %rdi,88(%rax) + + + movl $62,%edi + call __ab_approximation_62 + + + movq %r12,72(%rsp) + movq %r13,80(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulq_383_n_shift_by_62 + + + movq %rdx,96(%rdi) + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulq_383_n_shift_by_62 + + + movq %rdx,96(%rdi) + + + xorq $256,%rsi + movl $62,%edi + call __ab_approximation_62 + + + movq %r12,72(%rsp) + movq %r13,80(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulq_383_n_shift_by_62 + movq %rdx,56(%rsp) + movq %rcx,64(%rsp) + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulq_383_n_shift_by_62 + + + + movq 96(%rsi),%rax + movq 144(%rsi),%r11 + movq %rdx,%rbx + movq %rax,%r10 + imulq 56(%rsp) + movq %rax,%r8 + movq %r11,%rax + movq %rdx,%r9 + imulq 64(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + movq %r8,48(%rdi) + movq %r9,56(%rdi) + sarq $63,%r9 + movq %r9,64(%rdi) + movq %r9,72(%rdi) + movq %r9,80(%rdi) + movq %r9,88(%rdi) + leaq 96(%rsi),%rsi + + movq %r10,%rax + imulq %rbx + movq %rax,%r8 + movq %r11,%rax + movq %rdx,%r9 + imulq %rcx + addq %rax,%r8 + adcq %rdx,%r9 + movq %r8,96(%rdi) + movq %r9,104(%rdi) + sarq $63,%r9 + movq %r9,112(%rdi) + movq %r9,120(%rdi) + movq %r9,128(%rdi) + movq %r9,136(%rdi) + xorq $256+96,%rsi + movl $62,%edi + call __ab_approximation_62 + + + movq %r12,72(%rsp) + movq %r13,80(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulq_383_n_shift_by_62 + movq %rdx,56(%rsp) + movq %rcx,64(%rsp) + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulq_383_n_shift_by_62 + movq %rdx,72(%rsp) + movq %rcx,80(%rsp) + + movq 56(%rsp),%rdx + movq 64(%rsp),%rcx + leaq 96(%rsi),%rsi + leaq 48(%rdi),%rdi + call __smulq_383x63 + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulq_383x63 + xorq $256+96,%rsi + movl $62,%edi + call __ab_approximation_62 + + + movq %r12,72(%rsp) + movq %r13,80(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulq_383_n_shift_by_62 + movq %rdx,56(%rsp) + movq %rcx,64(%rsp) + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulq_383_n_shift_by_62 + movq %rdx,72(%rsp) + movq %rcx,80(%rsp) + + movq 56(%rsp),%rdx + movq 64(%rsp),%rcx + leaq 96(%rsi),%rsi + leaq 48(%rdi),%rdi + call __smulq_383x63 + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulq_383x63 + xorq $256+96,%rsi + movl $62,%edi + call __ab_approximation_62 + + + movq %r12,72(%rsp) + movq %r13,80(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulq_383_n_shift_by_62 + movq %rdx,56(%rsp) + movq %rcx,64(%rsp) + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulq_383_n_shift_by_62 + movq %rdx,72(%rsp) + movq %rcx,80(%rsp) + + movq 56(%rsp),%rdx + movq 64(%rsp),%rcx + leaq 96(%rsi),%rsi + leaq 48(%rdi),%rdi + call __smulq_383x63 + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulq_383x63 + xorq $256+96,%rsi + movl $62,%edi + call __ab_approximation_62 + + + movq %r12,72(%rsp) + movq %r13,80(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulq_383_n_shift_by_62 + movq %rdx,56(%rsp) + movq %rcx,64(%rsp) + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulq_383_n_shift_by_62 + movq %rdx,72(%rsp) + movq %rcx,80(%rsp) + + movq 56(%rsp),%rdx + movq 64(%rsp),%rcx + leaq 96(%rsi),%rsi + leaq 48(%rdi),%rdi + call __smulq_383x63 + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulq_383x63 + sarq $63,%r13 + movq %r13,48(%rdi) + movq %r13,56(%rdi) + movq %r13,64(%rdi) + movq %r13,72(%rdi) + movq %r13,80(%rdi) + movq %r13,88(%rdi) + xorq $256+96,%rsi + movl $62,%edi + call __ab_approximation_62 + + + movq %r12,72(%rsp) + movq %r13,80(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulq_383_n_shift_by_62 + movq %rdx,56(%rsp) + movq %rcx,64(%rsp) + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulq_383_n_shift_by_62 + movq %rdx,72(%rsp) + movq %rcx,80(%rsp) + + movq 56(%rsp),%rdx + movq 64(%rsp),%rcx + leaq 96(%rsi),%rsi + leaq 48(%rdi),%rdi + call __smulq_383x63 + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulq_767x63 + xorq $256+96,%rsi + movl $62,%edi + call __ab_approximation_62 + + + movq %r12,72(%rsp) + movq %r13,80(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulq_383_n_shift_by_62 + movq %rdx,56(%rsp) + movq %rcx,64(%rsp) + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulq_383_n_shift_by_62 + movq %rdx,72(%rsp) + movq %rcx,80(%rsp) + + movq 56(%rsp),%rdx + movq 64(%rsp),%rcx + leaq 96(%rsi),%rsi + leaq 48(%rdi),%rdi + call __smulq_383x63 + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulq_767x63 + xorq $256+96,%rsi + movl $62,%edi + call __ab_approximation_62 + + + movq %r12,72(%rsp) + movq %r13,80(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulq_383_n_shift_by_62 + movq %rdx,56(%rsp) + movq %rcx,64(%rsp) + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulq_383_n_shift_by_62 + movq %rdx,72(%rsp) + movq %rcx,80(%rsp) + + movq 56(%rsp),%rdx + movq 64(%rsp),%rcx + leaq 96(%rsi),%rsi + leaq 48(%rdi),%rdi + call __smulq_383x63 + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulq_767x63 + xorq $256+96,%rsi + movl $62,%edi + call __ab_approximation_62 + + + movq %r12,72(%rsp) + movq %r13,80(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulq_383_n_shift_by_62 + movq %rdx,56(%rsp) + movq %rcx,64(%rsp) + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulq_383_n_shift_by_62 + movq %rdx,72(%rsp) + movq %rcx,80(%rsp) + + movq 56(%rsp),%rdx + movq 64(%rsp),%rcx + leaq 96(%rsi),%rsi + leaq 48(%rdi),%rdi + call __smulq_383x63 + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulq_767x63 + xorq $256+96,%rsi + movl $62,%edi + call __ab_approximation_62 + + + movq %r12,72(%rsp) + movq %r13,80(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulq_383_n_shift_by_62 + movq %rdx,56(%rsp) + movq %rcx,64(%rsp) + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulq_383_n_shift_by_62 + movq %rdx,72(%rsp) + movq %rcx,80(%rsp) + + movq 56(%rsp),%rdx + movq 64(%rsp),%rcx + leaq 96(%rsi),%rsi + leaq 48(%rdi),%rdi + call __smulq_383x63 + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulq_767x63 + + xorq $256+96,%rsi + movl $62,%edi + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 48(%rsi),%r10 + movq 56(%rsi),%r11 + call __inner_loop_62 + + + movq %r12,72(%rsp) + movq %r13,80(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + movq %r8,0(%rdi) + movq %r10,48(%rdi) + + + + leaq 96(%rsi),%rsi + leaq 96(%rdi),%rdi + call __smulq_383x63 + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulq_767x63 + + + xorq $256+96,%rsi + movl $22,%edi + + movq 0(%rsi),%r8 + xorq %r9,%r9 + movq 48(%rsi),%r10 + xorq %r11,%r11 + call __inner_loop_62 + + + + + + + + leaq 96(%rsi),%rsi + + + + + + movq %r12,%rdx + movq %r13,%rcx + movq 32(%rsp),%rdi + call __smulq_767x63 + + movq 40(%rsp),%rsi + movq %rax,%rdx + sarq $63,%rax + + movq %rax,%r8 + movq %rax,%r9 + movq %rax,%r10 + andq 0(%rsi),%r8 + andq 8(%rsi),%r9 + movq %rax,%r11 + andq 16(%rsi),%r10 + andq 24(%rsi),%r11 + movq %rax,%r12 + andq 32(%rsi),%r12 + andq 40(%rsi),%rax + + addq %r8,%r14 + adcq %r9,%r15 + adcq %r10,%rbx + adcq %r11,%rbp + adcq %r12,%rcx + adcq %rax,%rdx + + movq %r14,48(%rdi) + movq %r15,56(%rdi) + movq %rbx,64(%rdi) + movq %rbp,72(%rdi) + movq %rcx,80(%rdi) + movq %rdx,88(%rdi) + + leaq 1112(%rsp),%r8 + movq 0(%r8),%r15 +.cfi_restore %r15 + movq 8(%r8),%r14 +.cfi_restore %r14 + movq 16(%r8),%r13 +.cfi_restore %r13 + movq 24(%r8),%r12 +.cfi_restore %r12 + movq 32(%r8),%rbx +.cfi_restore %rbx + movq 40(%r8),%rbp +.cfi_restore %rbp + leaq 48(%r8),%rsp +.cfi_adjust_cfa_offset -1112-8*6 + + .byte 0xf3,0xc3 +.cfi_endproc +.size ct_inverse_mod_383,.-ct_inverse_mod_383 +.type __smulq_767x63,@function +.align 32 +__smulq_767x63: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + + movq %rdx,%rbp + sarq $63,%rdx + xorq %rax,%rax + subq %rdx,%rax + + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + leaq 48(%rsi),%rsi + + xorq %rdx,%rbp + addq %rax,%rbp + + xorq %rdx,%r8 + xorq %rdx,%r9 + xorq %rdx,%r10 + xorq %rdx,%r11 + xorq %rdx,%r12 + xorq %rdx,%r13 + addq %r8,%rax + adcq $0,%r9 + adcq $0,%r10 + adcq $0,%r11 + adcq $0,%r12 + adcq $0,%r13 + + mulq %rbp + movq %rax,0(%rdi) + movq %r9,%rax + movq %rdx,%r9 + mulq %rbp + addq %rax,%r9 + movq %r10,%rax + adcq $0,%rdx + movq %rdx,%r10 + movq %r9,8(%rdi) + mulq %rbp + addq %rax,%r10 + movq %r11,%rax + adcq $0,%rdx + movq %rdx,%r11 + movq %r10,16(%rdi) + mulq %rbp + addq %rax,%r11 + movq %r12,%rax + adcq $0,%rdx + movq %rdx,%r12 + movq %r11,24(%rdi) + mulq %rbp + addq %rax,%r12 + movq %r13,%rax + adcq $0,%rdx + movq %rdx,%r13 + movq %r12,32(%rdi) + imulq %rbp + addq %rax,%r13 + adcq $0,%rdx + + movq %r13,40(%rdi) + movq %rdx,48(%rdi) + sarq $63,%rdx + movq %rdx,56(%rdi) + movq %rcx,%rdx + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + movq 48(%rsi),%r14 + movq 56(%rsi),%r15 + movq 64(%rsi),%rbx + movq 72(%rsi),%rbp + movq 80(%rsi),%rcx + movq 88(%rsi),%rdi + + movq %rdx,%rsi + sarq $63,%rdx + xorq %rax,%rax + subq %rdx,%rax + + xorq %rdx,%rsi + addq %rax,%rsi + + xorq %rdx,%r8 + xorq %rdx,%r9 + xorq %rdx,%r10 + xorq %rdx,%r11 + xorq %rdx,%r12 + xorq %rdx,%r13 + xorq %rdx,%r14 + xorq %rdx,%r15 + xorq %rdx,%rbx + xorq %rdx,%rbp + xorq %rdx,%rcx + xorq %rdx,%rdi + addq %r8,%rax + adcq $0,%r9 + adcq $0,%r10 + adcq $0,%r11 + adcq $0,%r12 + adcq $0,%r13 + adcq $0,%r14 + adcq $0,%r15 + adcq $0,%rbx + adcq $0,%rbp + adcq $0,%rcx + adcq $0,%rdi + + mulq %rsi + movq %rax,%r8 + movq %r9,%rax + movq %rdx,%r9 + mulq %rsi + addq %rax,%r9 + movq %r10,%rax + adcq $0,%rdx + movq %rdx,%r10 + mulq %rsi + addq %rax,%r10 + movq %r11,%rax + adcq $0,%rdx + movq %rdx,%r11 + mulq %rsi + addq %rax,%r11 + movq %r12,%rax + adcq $0,%rdx + movq %rdx,%r12 + mulq %rsi + addq %rax,%r12 + movq %r13,%rax + adcq $0,%rdx + movq %rdx,%r13 + mulq %rsi + addq %rax,%r13 + movq %r14,%rax + adcq $0,%rdx + movq %rdx,%r14 + mulq %rsi + addq %rax,%r14 + movq %r15,%rax + adcq $0,%rdx + movq %rdx,%r15 + mulq %rsi + addq %rax,%r15 + movq %rbx,%rax + adcq $0,%rdx + movq %rdx,%rbx + mulq %rsi + addq %rax,%rbx + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%rbp + mulq %rsi + addq %rax,%rbp + movq %rcx,%rax + adcq $0,%rdx + movq %rdx,%rcx + mulq %rsi + addq %rax,%rcx + movq %rdi,%rax + adcq $0,%rdx + movq %rdx,%rdi + movq 8(%rsp),%rdx + imulq %rsi,%rax + movq 16(%rsp),%rsi + addq %rdi,%rax + + addq 0(%rdx),%r8 + adcq 8(%rdx),%r9 + adcq 16(%rdx),%r10 + adcq 24(%rdx),%r11 + adcq 32(%rdx),%r12 + adcq 40(%rdx),%r13 + adcq 48(%rdx),%r14 + movq 56(%rdx),%rdi + adcq %rdi,%r15 + adcq %rdi,%rbx + adcq %rdi,%rbp + adcq %rdi,%rcx + adcq %rdi,%rax + + movq %rdx,%rdi + + movq %r8,0(%rdx) + movq %r9,8(%rdx) + movq %r10,16(%rdx) + movq %r11,24(%rdx) + movq %r12,32(%rdx) + movq %r13,40(%rdx) + movq %r14,48(%rdx) + movq %r15,56(%rdx) + movq %rbx,64(%rdx) + movq %rbp,72(%rdx) + movq %rcx,80(%rdx) + movq %rax,88(%rdx) + + .byte 0xf3,0xc3 +.cfi_endproc +.size __smulq_767x63,.-__smulq_767x63 +.type __smulq_383x63,@function +.align 32 +__smulq_383x63: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + + movq %rdx,%rbp + sarq $63,%rdx + xorq %rax,%rax + subq %rdx,%rax + + xorq %rdx,%rbp + addq %rax,%rbp + + xorq %rdx,%r8 + xorq %rdx,%r9 + xorq %rdx,%r10 + xorq %rdx,%r11 + xorq %rdx,%r12 + xorq %rdx,%r13 + addq %r8,%rax + adcq $0,%r9 + adcq $0,%r10 + adcq $0,%r11 + adcq $0,%r12 + adcq $0,%r13 + + mulq %rbp + movq %rax,%r8 + movq %r9,%rax + movq %rdx,%r9 + mulq %rbp + addq %rax,%r9 + movq %r10,%rax + adcq $0,%rdx + movq %rdx,%r10 + mulq %rbp + addq %rax,%r10 + movq %r11,%rax + adcq $0,%rdx + movq %rdx,%r11 + mulq %rbp + addq %rax,%r11 + movq %r12,%rax + adcq $0,%rdx + movq %rdx,%r12 + mulq %rbp + addq %rax,%r12 + movq %r13,%rax + adcq $0,%rdx + movq %rdx,%r13 + imulq %rbp,%rax + addq %rax,%r13 + + leaq 48(%rsi),%rsi + movq %rcx,%rdx + + movq %r8,0(%rdi) + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + movq %r12,32(%rdi) + movq %r13,40(%rdi) + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + + movq %rdx,%rbp + sarq $63,%rdx + xorq %rax,%rax + subq %rdx,%rax + + xorq %rdx,%rbp + addq %rax,%rbp + + xorq %rdx,%r8 + xorq %rdx,%r9 + xorq %rdx,%r10 + xorq %rdx,%r11 + xorq %rdx,%r12 + xorq %rdx,%r13 + addq %r8,%rax + adcq $0,%r9 + adcq $0,%r10 + adcq $0,%r11 + adcq $0,%r12 + adcq $0,%r13 + + mulq %rbp + movq %rax,%r8 + movq %r9,%rax + movq %rdx,%r9 + mulq %rbp + addq %rax,%r9 + movq %r10,%rax + adcq $0,%rdx + movq %rdx,%r10 + mulq %rbp + addq %rax,%r10 + movq %r11,%rax + adcq $0,%rdx + movq %rdx,%r11 + mulq %rbp + addq %rax,%r11 + movq %r12,%rax + adcq $0,%rdx + movq %rdx,%r12 + mulq %rbp + addq %rax,%r12 + movq %r13,%rax + adcq $0,%rdx + movq %rdx,%r13 + imulq %rbp,%rax + addq %rax,%r13 + + leaq -48(%rsi),%rsi + + addq 0(%rdi),%r8 + adcq 8(%rdi),%r9 + adcq 16(%rdi),%r10 + adcq 24(%rdi),%r11 + adcq 32(%rdi),%r12 + adcq 40(%rdi),%r13 + + movq %r8,0(%rdi) + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + movq %r12,32(%rdi) + movq %r13,40(%rdi) + + .byte 0xf3,0xc3 +.cfi_endproc +.size __smulq_383x63,.-__smulq_383x63 +.type __smulq_383_n_shift_by_62,@function +.align 32 +__smulq_383_n_shift_by_62: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + movq %rdx,%rbx + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + + movq %rdx,%rbp + sarq $63,%rdx + xorq %rax,%rax + subq %rdx,%rax + + xorq %rdx,%rbp + addq %rax,%rbp + + xorq %rdx,%r8 + xorq %rdx,%r9 + xorq %rdx,%r10 + xorq %rdx,%r11 + xorq %rdx,%r12 + xorq %rdx,%r13 + addq %r8,%rax + adcq $0,%r9 + adcq $0,%r10 + adcq $0,%r11 + adcq $0,%r12 + adcq $0,%r13 + + mulq %rbp + movq %rax,%r8 + movq %r9,%rax + movq %rdx,%r9 + mulq %rbp + addq %rax,%r9 + movq %r10,%rax + adcq $0,%rdx + movq %rdx,%r10 + mulq %rbp + addq %rax,%r10 + movq %r11,%rax + adcq $0,%rdx + movq %rdx,%r11 + mulq %rbp + addq %rax,%r11 + movq %r12,%rax + adcq $0,%rdx + movq %rdx,%r12 + mulq %rbp + addq %rax,%r12 + movq %r13,%rax + adcq $0,%rdx + movq %rdx,%r13 + imulq %rbp + addq %rax,%r13 + adcq $0,%rdx + + leaq 48(%rsi),%rsi + movq %rdx,%r14 + movq %rcx,%rdx + + movq %r8,0(%rdi) + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + movq %r12,32(%rdi) + movq %r13,40(%rdi) + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + + movq %rdx,%rbp + sarq $63,%rdx + xorq %rax,%rax + subq %rdx,%rax + + xorq %rdx,%rbp + addq %rax,%rbp + + xorq %rdx,%r8 + xorq %rdx,%r9 + xorq %rdx,%r10 + xorq %rdx,%r11 + xorq %rdx,%r12 + xorq %rdx,%r13 + addq %r8,%rax + adcq $0,%r9 + adcq $0,%r10 + adcq $0,%r11 + adcq $0,%r12 + adcq $0,%r13 + + mulq %rbp + movq %rax,%r8 + movq %r9,%rax + movq %rdx,%r9 + mulq %rbp + addq %rax,%r9 + movq %r10,%rax + adcq $0,%rdx + movq %rdx,%r10 + mulq %rbp + addq %rax,%r10 + movq %r11,%rax + adcq $0,%rdx + movq %rdx,%r11 + mulq %rbp + addq %rax,%r11 + movq %r12,%rax + adcq $0,%rdx + movq %rdx,%r12 + mulq %rbp + addq %rax,%r12 + movq %r13,%rax + adcq $0,%rdx + movq %rdx,%r13 + imulq %rbp + addq %rax,%r13 + adcq $0,%rdx + + leaq -48(%rsi),%rsi + + addq 0(%rdi),%r8 + adcq 8(%rdi),%r9 + adcq 16(%rdi),%r10 + adcq 24(%rdi),%r11 + adcq 32(%rdi),%r12 + adcq 40(%rdi),%r13 + adcq %rdx,%r14 + movq %rbx,%rdx + + shrdq $62,%r9,%r8 + shrdq $62,%r10,%r9 + shrdq $62,%r11,%r10 + shrdq $62,%r12,%r11 + shrdq $62,%r13,%r12 + shrdq $62,%r14,%r13 + + sarq $63,%r14 + xorq %rbp,%rbp + subq %r14,%rbp + + xorq %r14,%r8 + xorq %r14,%r9 + xorq %r14,%r10 + xorq %r14,%r11 + xorq %r14,%r12 + xorq %r14,%r13 + addq %rbp,%r8 + adcq $0,%r9 + adcq $0,%r10 + adcq $0,%r11 + adcq $0,%r12 + adcq $0,%r13 + + movq %r8,0(%rdi) + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + movq %r12,32(%rdi) + movq %r13,40(%rdi) + + xorq %r14,%rdx + xorq %r14,%rcx + addq %rbp,%rdx + addq %rbp,%rcx + + .byte 0xf3,0xc3 +.cfi_endproc +.size __smulq_383_n_shift_by_62,.-__smulq_383_n_shift_by_62 +.type __ab_approximation_62,@function +.align 32 +__ab_approximation_62: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + movq 40(%rsi),%r9 + movq 88(%rsi),%r11 + movq 32(%rsi),%rbx + movq 80(%rsi),%rbp + movq 24(%rsi),%r8 + movq 72(%rsi),%r10 + + movq %r9,%rax + orq %r11,%rax + cmovzq %rbx,%r9 + cmovzq %rbp,%r11 + cmovzq %r8,%rbx + cmovzq %r10,%rbp + movq 16(%rsi),%r8 + movq 64(%rsi),%r10 + + movq %r9,%rax + orq %r11,%rax + cmovzq %rbx,%r9 + cmovzq %rbp,%r11 + cmovzq %r8,%rbx + cmovzq %r10,%rbp + movq 8(%rsi),%r8 + movq 56(%rsi),%r10 + + movq %r9,%rax + orq %r11,%rax + cmovzq %rbx,%r9 + cmovzq %rbp,%r11 + cmovzq %r8,%rbx + cmovzq %r10,%rbp + movq 0(%rsi),%r8 + movq 48(%rsi),%r10 + + movq %r9,%rax + orq %r11,%rax + bsrq %rax,%rcx + leaq 1(%rcx),%rcx + cmovzq %rbx,%r9 + cmovzq %rbp,%r11 + cmovzq %rax,%rcx + negq %rcx + + + shldq %cl,%rbx,%r9 + shldq %cl,%rbp,%r11 + + jmp __inner_loop_62 + + .byte 0xf3,0xc3 +.cfi_endproc +.size __ab_approximation_62,.-__ab_approximation_62 +.type __inner_loop_62,@function +.align 8 +.long 0 +__inner_loop_62: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + movq $1,%rdx + xorq %rcx,%rcx + xorq %r12,%r12 + movq $1,%r13 + movq %rsi,8(%rsp) + +.Loop_62: + xorq %rax,%rax + xorq %rbx,%rbx + testq $1,%r8 + movq %r10,%rbp + movq %r11,%r14 + cmovnzq %r10,%rax + cmovnzq %r11,%rbx + subq %r8,%rbp + sbbq %r9,%r14 + movq %r8,%r15 + movq %r9,%rsi + subq %rax,%r8 + sbbq %rbx,%r9 + cmovcq %rbp,%r8 + cmovcq %r14,%r9 + cmovcq %r15,%r10 + cmovcq %rsi,%r11 + movq %rdx,%rax + cmovcq %r12,%rdx + cmovcq %rax,%r12 + movq %rcx,%rbx + cmovcq %r13,%rcx + cmovcq %rbx,%r13 + xorq %rax,%rax + xorq %rbx,%rbx + shrdq $1,%r9,%r8 + shrq $1,%r9 + testq $1,%r15 + cmovnzq %r12,%rax + cmovnzq %r13,%rbx + addq %r12,%r12 + addq %r13,%r13 + subq %rax,%rdx + subq %rbx,%rcx + subl $1,%edi + jnz .Loop_62 + + movq 8(%rsp),%rsi + .byte 0xf3,0xc3 +.cfi_endproc +.size __inner_loop_62,.-__inner_loop_62 + +.section .note.GNU-stack,"",@progbits +.section .note.gnu.property,"a",@note + .long 4,2f-1f,5 + .byte 0x47,0x4E,0x55,0 +1: .long 0xc0000002,4,3 +.align 8 +2: diff --git a/crypto/blst_src/build/elf/ctx_inverse_mod_384-x86_64.s b/crypto/blst_src/build/elf/ctx_inverse_mod_384-x86_64.s new file mode 100644 index 00000000000..9f4d12babd4 --- /dev/null +++ b/crypto/blst_src/build/elf/ctx_inverse_mod_384-x86_64.s @@ -0,0 +1,1576 @@ +.text + +.globl ctx_inverse_mod_383 +.hidden ctx_inverse_mod_383 +.type ctx_inverse_mod_383,@function +.align 32 +ctx_inverse_mod_383: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + +ct_inverse_mod_383$1: + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + subq $1112,%rsp +.cfi_adjust_cfa_offset 1112 + + + leaq 88+511(%rsp),%rax + andq $-512,%rax + movq %rdi,32(%rsp) + movq %rcx,40(%rsp) + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + + movq 0(%rdx),%r14 + movq 8(%rdx),%r15 + movq 16(%rdx),%rbx + movq 24(%rdx),%rbp + movq 32(%rdx),%rsi + movq 40(%rdx),%rdi + + movq %r8,0(%rax) + movq %r9,8(%rax) + movq %r10,16(%rax) + movq %r11,24(%rax) + movq %r12,32(%rax) + movq %r13,40(%rax) + + movq %r14,48(%rax) + movq %r15,56(%rax) + movq %rbx,64(%rax) + movq %rbp,72(%rax) + movq %rsi,80(%rax) + movq %rax,%rsi + movq %rdi,88(%rax) + + + movl $31,%edi + call __ab_approximation_31 + + + movq %r12,72(%rsp) + movq %r13,80(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulx_383_n_shift_by_31 + + + movq %rdx,96(%rdi) + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_383_n_shift_by_31 + + + movq %rdx,96(%rdi) + + + xorq $256,%rsi + movl $31,%edi + call __ab_approximation_31 + + + movq %r12,72(%rsp) + movq %r13,80(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulx_383_n_shift_by_31 + movq %rdx,56(%rsp) + movq %rcx,64(%rsp) + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_383_n_shift_by_31 + + + + movq 96(%rsi),%rax + movq 144(%rsi),%r11 + movq %rdx,%rbx + movq %rax,%r10 + imulq 56(%rsp) + movq %rax,%r8 + movq %r11,%rax + movq %rdx,%r9 + imulq 64(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + movq %r8,48(%rdi) + movq %r9,56(%rdi) + sarq $63,%r9 + movq %r9,64(%rdi) + movq %r9,72(%rdi) + movq %r9,80(%rdi) + movq %r9,88(%rdi) + leaq 96(%rsi),%rsi + + movq %r10,%rax + imulq %rbx + movq %rax,%r8 + movq %r11,%rax + movq %rdx,%r9 + imulq %rcx + addq %rax,%r8 + adcq %rdx,%r9 + movq %r8,96(%rdi) + movq %r9,104(%rdi) + sarq $63,%r9 + movq %r9,112(%rdi) + movq %r9,120(%rdi) + movq %r9,128(%rdi) + movq %r9,136(%rdi) + xorq $256+96,%rsi + movl $31,%edi + call __ab_approximation_31 + + + movq %r12,72(%rsp) + movq %r13,80(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulx_383_n_shift_by_31 + movq %rdx,56(%rsp) + movq %rcx,64(%rsp) + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_383_n_shift_by_31 + movq %rdx,72(%rsp) + movq %rcx,80(%rsp) + + movq 56(%rsp),%rdx + movq 64(%rsp),%rcx + leaq 96(%rsi),%rsi + leaq 48(%rdi),%rdi + call __smulx_383x63 + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_383x63 + xorq $256+96,%rsi + movl $31,%edi + call __ab_approximation_31 + + + movq %r12,72(%rsp) + movq %r13,80(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulx_383_n_shift_by_31 + movq %rdx,56(%rsp) + movq %rcx,64(%rsp) + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_383_n_shift_by_31 + movq %rdx,72(%rsp) + movq %rcx,80(%rsp) + + movq 56(%rsp),%rdx + movq 64(%rsp),%rcx + leaq 96(%rsi),%rsi + leaq 48(%rdi),%rdi + call __smulx_383x63 + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_383x63 + xorq $256+96,%rsi + movl $31,%edi + call __ab_approximation_31 + + + movq %r12,72(%rsp) + movq %r13,80(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulx_383_n_shift_by_31 + movq %rdx,56(%rsp) + movq %rcx,64(%rsp) + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_383_n_shift_by_31 + movq %rdx,72(%rsp) + movq %rcx,80(%rsp) + + movq 56(%rsp),%rdx + movq 64(%rsp),%rcx + leaq 96(%rsi),%rsi + leaq 48(%rdi),%rdi + call __smulx_383x63 + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_383x63 + xorq $256+96,%rsi + movl $31,%edi + call __ab_approximation_31 + + + movq %r12,72(%rsp) + movq %r13,80(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulx_383_n_shift_by_31 + movq %rdx,56(%rsp) + movq %rcx,64(%rsp) + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_383_n_shift_by_31 + movq %rdx,72(%rsp) + movq %rcx,80(%rsp) + + movq 56(%rsp),%rdx + movq 64(%rsp),%rcx + leaq 96(%rsi),%rsi + leaq 48(%rdi),%rdi + call __smulx_383x63 + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_383x63 + xorq $256+96,%rsi + movl $31,%edi + call __ab_approximation_31 + + + movq %r12,72(%rsp) + movq %r13,80(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulx_383_n_shift_by_31 + movq %rdx,56(%rsp) + movq %rcx,64(%rsp) + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_383_n_shift_by_31 + movq %rdx,72(%rsp) + movq %rcx,80(%rsp) + + movq 56(%rsp),%rdx + movq 64(%rsp),%rcx + leaq 96(%rsi),%rsi + leaq 48(%rdi),%rdi + call __smulx_383x63 + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_383x63 + xorq $256+96,%rsi + movl $31,%edi + call __ab_approximation_31 + + + movq %r12,72(%rsp) + movq %r13,80(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulx_383_n_shift_by_31 + movq %rdx,56(%rsp) + movq %rcx,64(%rsp) + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_383_n_shift_by_31 + movq %rdx,72(%rsp) + movq %rcx,80(%rsp) + + movq 56(%rsp),%rdx + movq 64(%rsp),%rcx + leaq 96(%rsi),%rsi + leaq 48(%rdi),%rdi + call __smulx_383x63 + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_383x63 + xorq $256+96,%rsi + movl $31,%edi + call __ab_approximation_31 + + + movq %r12,72(%rsp) + movq %r13,80(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulx_383_n_shift_by_31 + movq %rdx,56(%rsp) + movq %rcx,64(%rsp) + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_383_n_shift_by_31 + movq %rdx,72(%rsp) + movq %rcx,80(%rsp) + + movq 56(%rsp),%rdx + movq 64(%rsp),%rcx + leaq 96(%rsi),%rsi + leaq 48(%rdi),%rdi + call __smulx_383x63 + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_383x63 + xorq $256+96,%rsi + movl $31,%edi + call __ab_approximation_31 + + + movq %r12,72(%rsp) + movq %r13,80(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulx_383_n_shift_by_31 + movq %rdx,56(%rsp) + movq %rcx,64(%rsp) + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_383_n_shift_by_31 + movq %rdx,72(%rsp) + movq %rcx,80(%rsp) + + movq 56(%rsp),%rdx + movq 64(%rsp),%rcx + leaq 96(%rsi),%rsi + leaq 48(%rdi),%rdi + call __smulx_383x63 + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_383x63 + xorq $256+96,%rsi + movl $31,%edi + call __ab_approximation_31 + + + movq %r12,72(%rsp) + movq %r13,80(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulx_383_n_shift_by_31 + movq %rdx,56(%rsp) + movq %rcx,64(%rsp) + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_383_n_shift_by_31 + movq %rdx,72(%rsp) + movq %rcx,80(%rsp) + + movq 56(%rsp),%rdx + movq 64(%rsp),%rcx + leaq 96(%rsi),%rsi + leaq 48(%rdi),%rdi + call __smulx_383x63 + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_383x63 + xorq $256+96,%rsi + movl $31,%edi + call __ab_approximation_31 + + + movq %r12,72(%rsp) + movq %r13,80(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulx_383_n_shift_by_31 + movq %rdx,56(%rsp) + movq %rcx,64(%rsp) + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_383_n_shift_by_31 + movq %rdx,72(%rsp) + movq %rcx,80(%rsp) + + movq 56(%rsp),%rdx + movq 64(%rsp),%rcx + leaq 96(%rsi),%rsi + leaq 48(%rdi),%rdi + call __smulx_383x63 + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_383x63 + sarq $63,%r13 + movq %r13,48(%rdi) + movq %r13,56(%rdi) + movq %r13,64(%rdi) + movq %r13,72(%rdi) + movq %r13,80(%rdi) + movq %r13,88(%rdi) + xorq $256+96,%rsi + movl $31,%edi + call __ab_approximation_31 + + + movq %r12,72(%rsp) + movq %r13,80(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulx_383_n_shift_by_31 + movq %rdx,56(%rsp) + movq %rcx,64(%rsp) + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_383_n_shift_by_31 + movq %rdx,72(%rsp) + movq %rcx,80(%rsp) + + movq 56(%rsp),%rdx + movq 64(%rsp),%rcx + leaq 96(%rsi),%rsi + leaq 48(%rdi),%rdi + call __smulx_383x63 + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_767x63 + xorq $256+96,%rsi + movl $31,%edi + call __ab_approximation_31 + + + movq %r12,72(%rsp) + movq %r13,80(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulx_383_n_shift_by_31 + movq %rdx,56(%rsp) + movq %rcx,64(%rsp) + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_383_n_shift_by_31 + movq %rdx,72(%rsp) + movq %rcx,80(%rsp) + + movq 56(%rsp),%rdx + movq 64(%rsp),%rcx + leaq 96(%rsi),%rsi + leaq 48(%rdi),%rdi + call __smulx_383x63 + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_767x63 + xorq $256+96,%rsi + movl $31,%edi + call __ab_approximation_31 + + + movq %r12,72(%rsp) + movq %r13,80(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulx_383_n_shift_by_31 + movq %rdx,56(%rsp) + movq %rcx,64(%rsp) + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_383_n_shift_by_31 + movq %rdx,72(%rsp) + movq %rcx,80(%rsp) + + movq 56(%rsp),%rdx + movq 64(%rsp),%rcx + leaq 96(%rsi),%rsi + leaq 48(%rdi),%rdi + call __smulx_383x63 + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_767x63 + xorq $256+96,%rsi + movl $31,%edi + call __ab_approximation_31 + + + movq %r12,72(%rsp) + movq %r13,80(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulx_383_n_shift_by_31 + movq %rdx,56(%rsp) + movq %rcx,64(%rsp) + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_383_n_shift_by_31 + movq %rdx,72(%rsp) + movq %rcx,80(%rsp) + + movq 56(%rsp),%rdx + movq 64(%rsp),%rcx + leaq 96(%rsi),%rsi + leaq 48(%rdi),%rdi + call __smulx_383x63 + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_767x63 + xorq $256+96,%rsi + movl $31,%edi + call __ab_approximation_31 + + + movq %r12,72(%rsp) + movq %r13,80(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulx_383_n_shift_by_31 + movq %rdx,56(%rsp) + movq %rcx,64(%rsp) + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_383_n_shift_by_31 + movq %rdx,72(%rsp) + movq %rcx,80(%rsp) + + movq 56(%rsp),%rdx + movq 64(%rsp),%rcx + leaq 96(%rsi),%rsi + leaq 48(%rdi),%rdi + call __smulx_383x63 + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_767x63 + xorq $256+96,%rsi + movl $31,%edi + call __ab_approximation_31 + + + movq %r12,72(%rsp) + movq %r13,80(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulx_383_n_shift_by_31 + movq %rdx,56(%rsp) + movq %rcx,64(%rsp) + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_383_n_shift_by_31 + movq %rdx,72(%rsp) + movq %rcx,80(%rsp) + + movq 56(%rsp),%rdx + movq 64(%rsp),%rcx + leaq 96(%rsi),%rsi + leaq 48(%rdi),%rdi + call __smulx_383x63 + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_767x63 + xorq $256+96,%rsi + movl $31,%edi + call __ab_approximation_31 + + + movq %r12,72(%rsp) + movq %r13,80(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulx_383_n_shift_by_31 + movq %rdx,56(%rsp) + movq %rcx,64(%rsp) + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_383_n_shift_by_31 + movq %rdx,72(%rsp) + movq %rcx,80(%rsp) + + movq 56(%rsp),%rdx + movq 64(%rsp),%rcx + leaq 96(%rsi),%rsi + leaq 48(%rdi),%rdi + call __smulx_383x63 + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_767x63 + xorq $256+96,%rsi + movl $31,%edi + call __ab_approximation_31 + + + movq %r12,72(%rsp) + movq %r13,80(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulx_191_n_shift_by_31 + movq %rdx,56(%rsp) + movq %rcx,64(%rsp) + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_191_n_shift_by_31 + movq %rdx,72(%rsp) + movq %rcx,80(%rsp) + + movq 56(%rsp),%rdx + movq 64(%rsp),%rcx + leaq 96(%rsi),%rsi + leaq 48(%rdi),%rdi + call __smulx_383x63 + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_767x63 + xorq $256+96,%rsi + movl $31,%edi + call __ab_approximation_31 + + + movq %r12,72(%rsp) + movq %r13,80(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulx_191_n_shift_by_31 + movq %rdx,56(%rsp) + movq %rcx,64(%rsp) + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_191_n_shift_by_31 + movq %rdx,72(%rsp) + movq %rcx,80(%rsp) + + movq 56(%rsp),%rdx + movq 64(%rsp),%rcx + leaq 96(%rsi),%rsi + leaq 48(%rdi),%rdi + call __smulx_383x63 + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_767x63 + xorq $256+96,%rsi + movl $31,%edi + call __ab_approximation_31 + + + movq %r12,72(%rsp) + movq %r13,80(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulx_191_n_shift_by_31 + movq %rdx,56(%rsp) + movq %rcx,64(%rsp) + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_191_n_shift_by_31 + movq %rdx,72(%rsp) + movq %rcx,80(%rsp) + + movq 56(%rsp),%rdx + movq 64(%rsp),%rcx + leaq 96(%rsi),%rsi + leaq 48(%rdi),%rdi + call __smulx_383x63 + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_767x63 + xorq $256+96,%rsi + movl $31,%edi + call __ab_approximation_31 + + + movq %r12,72(%rsp) + movq %r13,80(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulx_191_n_shift_by_31 + movq %rdx,56(%rsp) + movq %rcx,64(%rsp) + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_191_n_shift_by_31 + movq %rdx,72(%rsp) + movq %rcx,80(%rsp) + + movq 56(%rsp),%rdx + movq 64(%rsp),%rcx + leaq 96(%rsi),%rsi + leaq 48(%rdi),%rdi + call __smulx_383x63 + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_767x63 + + xorq $256+96,%rsi + movl $53,%edi + + movq 0(%rsi),%r8 + + movq 48(%rsi),%r10 + + call __tail_loop_53 + + + + + + + + leaq 96(%rsi),%rsi + + + + + + movq %r12,%rdx + movq %r13,%rcx + movq 32(%rsp),%rdi + call __smulx_767x63 + + movq 40(%rsp),%rsi + movq %rax,%rdx + sarq $63,%rax + + movq %rax,%r8 + movq %rax,%r9 + movq %rax,%r10 + andq 0(%rsi),%r8 + andq 8(%rsi),%r9 + movq %rax,%r11 + andq 16(%rsi),%r10 + andq 24(%rsi),%r11 + movq %rax,%r12 + andq 32(%rsi),%r12 + andq 40(%rsi),%rax + + addq %r8,%r14 + adcq %r9,%r15 + adcq %r10,%rbx + adcq %r11,%rbp + adcq %r12,%rcx + adcq %rax,%rdx + + movq %r14,48(%rdi) + movq %r15,56(%rdi) + movq %rbx,64(%rdi) + movq %rbp,72(%rdi) + movq %rcx,80(%rdi) + movq %rdx,88(%rdi) + + leaq 1112(%rsp),%r8 + movq 0(%r8),%r15 +.cfi_restore %r15 + movq 8(%r8),%r14 +.cfi_restore %r14 + movq 16(%r8),%r13 +.cfi_restore %r13 + movq 24(%r8),%r12 +.cfi_restore %r12 + movq 32(%r8),%rbx +.cfi_restore %rbx + movq 40(%r8),%rbp +.cfi_restore %rbp + leaq 48(%r8),%rsp +.cfi_adjust_cfa_offset -1112-8*6 + + .byte 0xf3,0xc3 +.cfi_endproc +.size ctx_inverse_mod_383,.-ctx_inverse_mod_383 +.type __smulx_767x63,@function +.align 32 +__smulx_767x63: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + + movq %rdx,%rax + sarq $63,%rax + xorq %rbp,%rbp + subq %rax,%rbp + + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + leaq 48(%rsi),%rsi + + xorq %rax,%rdx + addq %rbp,%rdx + + xorq %rax,%r8 + xorq %rax,%r9 + xorq %rax,%r10 + xorq %rax,%r11 + xorq %rax,%r12 + xorq %r13,%rax + addq %rbp,%r8 + adcq $0,%r9 + adcq $0,%r10 + adcq $0,%r11 + adcq $0,%r12 + adcq $0,%rax + + mulxq %r8,%r8,%rbp + mulxq %r9,%r9,%r13 + addq %rbp,%r9 + mulxq %r10,%r10,%rbp + adcq %r13,%r10 + mulxq %r11,%r11,%r13 + adcq %rbp,%r11 + mulxq %r12,%r12,%rbp + adcq %r13,%r12 + adcq $0,%rbp + imulq %rdx + addq %rbp,%rax + adcq $0,%rdx + + movq %r8,0(%rdi) + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + movq %r12,32(%rdi) + movq %rax,40(%rdi) + movq %rdx,48(%rdi) + sarq $63,%rdx + movq %rdx,56(%rdi) + movq %rcx,%rdx + movq %rcx,%rax + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + movq 48(%rsi),%r14 + movq 56(%rsi),%r15 + movq 64(%rsi),%rbx + movq 72(%rsi),%rbp + movq 80(%rsi),%rcx + movq 88(%rsi),%rdi + + sarq $63,%rax + xorq %rsi,%rsi + subq %rax,%rsi + + xorq %rax,%rdx + addq %rsi,%rdx + + xorq %rax,%r8 + xorq %rax,%r9 + xorq %rax,%r10 + xorq %rax,%r11 + xorq %rax,%r12 + xorq %rax,%r13 + xorq %rax,%r14 + xorq %rax,%r15 + xorq %rax,%rbx + xorq %rax,%rbp + xorq %rax,%rcx + xorq %rax,%rdi + addq %rsi,%r8 + adcq $0,%r9 + adcq $0,%r10 + adcq $0,%r11 + adcq $0,%r12 + adcq $0,%r13 + adcq $0,%r14 + adcq $0,%r15 + adcq $0,%rbx + adcq $0,%rbp + adcq $0,%rcx + adcq $0,%rdi + + mulxq %r8,%r8,%rax + mulxq %r9,%r9,%rsi + addq %rax,%r9 + mulxq %r10,%r10,%rax + adcq %rsi,%r10 + mulxq %r11,%r11,%rsi + adcq %rax,%r11 + mulxq %r12,%r12,%rax + adcq %rsi,%r12 + mulxq %r13,%r13,%rsi + adcq %rax,%r13 + mulxq %r14,%r14,%rax + adcq %rsi,%r14 + mulxq %r15,%r15,%rsi + adcq %rax,%r15 + mulxq %rbx,%rbx,%rax + adcq %rsi,%rbx + mulxq %rbp,%rbp,%rsi + adcq %rax,%rbp + mulxq %rcx,%rcx,%rax + adcq %rsi,%rcx + mulxq %rdi,%rdi,%rsi + movq 8(%rsp),%rdx + movq 16(%rsp),%rsi + adcq %rdi,%rax + + addq 0(%rdx),%r8 + adcq 8(%rdx),%r9 + adcq 16(%rdx),%r10 + adcq 24(%rdx),%r11 + adcq 32(%rdx),%r12 + adcq 40(%rdx),%r13 + adcq 48(%rdx),%r14 + movq 56(%rdx),%rdi + adcq %rdi,%r15 + adcq %rdi,%rbx + adcq %rdi,%rbp + adcq %rdi,%rcx + adcq %rdi,%rax + + movq %rdx,%rdi + + movq %r8,0(%rdx) + movq %r9,8(%rdx) + movq %r10,16(%rdx) + movq %r11,24(%rdx) + movq %r12,32(%rdx) + movq %r13,40(%rdx) + movq %r14,48(%rdx) + movq %r15,56(%rdx) + movq %rbx,64(%rdx) + movq %rbp,72(%rdx) + movq %rcx,80(%rdx) + movq %rax,88(%rdx) + + .byte 0xf3,0xc3 +.cfi_endproc +.size __smulx_767x63,.-__smulx_767x63 +.type __smulx_383x63,@function +.align 32 +__smulx_383x63: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + movq 0+0(%rsi),%r8 + movq 0+8(%rsi),%r9 + movq 0+16(%rsi),%r10 + movq 0+24(%rsi),%r11 + movq 0+32(%rsi),%r12 + movq 0+40(%rsi),%r13 + + movq %rdx,%rbp + sarq $63,%rbp + xorq %rax,%rax + subq %rbp,%rax + + xorq %rbp,%rdx + addq %rax,%rdx + + xorq %rbp,%r8 + xorq %rbp,%r9 + xorq %rbp,%r10 + xorq %rbp,%r11 + xorq %rbp,%r12 + xorq %rbp,%r13 + addq %rax,%r8 + adcq $0,%r9 + adcq $0,%r10 + adcq $0,%r11 + adcq $0,%r12 + adcq $0,%r13 + + mulxq %r8,%r8,%rbp + mulxq %r9,%r9,%rax + addq %rbp,%r9 + mulxq %r10,%r10,%rbp + adcq %rax,%r10 + mulxq %r11,%r11,%rax + adcq %rbp,%r11 + mulxq %r12,%r12,%rbp + adcq %rax,%r12 + mulxq %r13,%r13,%rax + movq %rcx,%rdx + adcq %rbp,%r13 + + movq %r8,0(%rdi) + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + movq %r12,32(%rdi) + movq %r13,40(%rdi) + movq 48+0(%rsi),%r8 + movq 48+8(%rsi),%r9 + movq 48+16(%rsi),%r10 + movq 48+24(%rsi),%r11 + movq 48+32(%rsi),%r12 + movq 48+40(%rsi),%r13 + + movq %rdx,%rbp + sarq $63,%rbp + xorq %rax,%rax + subq %rbp,%rax + + xorq %rbp,%rdx + addq %rax,%rdx + + xorq %rbp,%r8 + xorq %rbp,%r9 + xorq %rbp,%r10 + xorq %rbp,%r11 + xorq %rbp,%r12 + xorq %rbp,%r13 + addq %rax,%r8 + adcq $0,%r9 + adcq $0,%r10 + adcq $0,%r11 + adcq $0,%r12 + adcq $0,%r13 + + mulxq %r8,%r8,%rbp + mulxq %r9,%r9,%rax + addq %rbp,%r9 + mulxq %r10,%r10,%rbp + adcq %rax,%r10 + mulxq %r11,%r11,%rax + adcq %rbp,%r11 + mulxq %r12,%r12,%rbp + adcq %rax,%r12 + mulxq %r13,%r13,%rax + adcq %rbp,%r13 + + addq 0(%rdi),%r8 + adcq 8(%rdi),%r9 + adcq 16(%rdi),%r10 + adcq 24(%rdi),%r11 + adcq 32(%rdi),%r12 + adcq 40(%rdi),%r13 + + movq %r8,0(%rdi) + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + movq %r12,32(%rdi) + movq %r13,40(%rdi) + + .byte 0xf3,0xc3 +.cfi_endproc +.size __smulx_383x63,.-__smulx_383x63 +.type __smulx_383_n_shift_by_31,@function +.align 32 +__smulx_383_n_shift_by_31: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + movq %rdx,%rbx + xorq %r14,%r14 + movq 0+0(%rsi),%r8 + movq 0+8(%rsi),%r9 + movq 0+16(%rsi),%r10 + movq 0+24(%rsi),%r11 + movq 0+32(%rsi),%r12 + movq 0+40(%rsi),%r13 + + movq %rdx,%rax + sarq $63,%rax + xorq %rbp,%rbp + subq %rax,%rbp + + xorq %rax,%rdx + addq %rbp,%rdx + + xorq %rax,%r8 + xorq %rax,%r9 + xorq %rax,%r10 + xorq %rax,%r11 + xorq %rax,%r12 + xorq %r13,%rax + addq %rbp,%r8 + adcq $0,%r9 + adcq $0,%r10 + adcq $0,%r11 + adcq $0,%r12 + adcq $0,%rax + + mulxq %r8,%r8,%rbp + mulxq %r9,%r9,%r13 + addq %rbp,%r9 + mulxq %r10,%r10,%rbp + adcq %r13,%r10 + mulxq %r11,%r11,%r13 + adcq %rbp,%r11 + mulxq %r12,%r12,%rbp + adcq %r13,%r12 + adcq $0,%rbp + imulq %rdx + addq %rbp,%rax + adcq %rdx,%r14 + + movq %rcx,%rdx + + movq %r8,0(%rdi) + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + movq %r12,32(%rdi) + movq %rax,40(%rdi) + movq 48+0(%rsi),%r8 + movq 48+8(%rsi),%r9 + movq 48+16(%rsi),%r10 + movq 48+24(%rsi),%r11 + movq 48+32(%rsi),%r12 + movq 48+40(%rsi),%r13 + + movq %rdx,%rax + sarq $63,%rax + xorq %rbp,%rbp + subq %rax,%rbp + + xorq %rax,%rdx + addq %rbp,%rdx + + xorq %rax,%r8 + xorq %rax,%r9 + xorq %rax,%r10 + xorq %rax,%r11 + xorq %rax,%r12 + xorq %r13,%rax + addq %rbp,%r8 + adcq $0,%r9 + adcq $0,%r10 + adcq $0,%r11 + adcq $0,%r12 + adcq $0,%rax + + mulxq %r8,%r8,%rbp + mulxq %r9,%r9,%r13 + addq %rbp,%r9 + mulxq %r10,%r10,%rbp + adcq %r13,%r10 + mulxq %r11,%r11,%r13 + adcq %rbp,%r11 + mulxq %r12,%r12,%rbp + adcq %r13,%r12 + adcq $0,%rbp + imulq %rdx + addq %rbp,%rax + adcq $0,%rdx + + addq 0(%rdi),%r8 + adcq 8(%rdi),%r9 + adcq 16(%rdi),%r10 + adcq 24(%rdi),%r11 + adcq 32(%rdi),%r12 + adcq 40(%rdi),%rax + adcq %rdx,%r14 + movq %rbx,%rdx + + shrdq $31,%r9,%r8 + shrdq $31,%r10,%r9 + shrdq $31,%r11,%r10 + shrdq $31,%r12,%r11 + shrdq $31,%rax,%r12 + shrdq $31,%r14,%rax + + sarq $63,%r14 + xorq %rbp,%rbp + subq %r14,%rbp + + xorq %r14,%r8 + xorq %r14,%r9 + xorq %r14,%r10 + xorq %r14,%r11 + xorq %r14,%r12 + xorq %r14,%rax + addq %rbp,%r8 + adcq $0,%r9 + adcq $0,%r10 + adcq $0,%r11 + adcq $0,%r12 + adcq $0,%rax + + movq %r8,0(%rdi) + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + movq %r12,32(%rdi) + movq %rax,40(%rdi) + + xorq %r14,%rdx + xorq %r14,%rcx + addq %rbp,%rdx + addq %rbp,%rcx + + .byte 0xf3,0xc3 +.cfi_endproc +.size __smulx_383_n_shift_by_31,.-__smulx_383_n_shift_by_31 +.type __smulx_191_n_shift_by_31,@function +.align 32 +__smulx_191_n_shift_by_31: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + movq %rdx,%rbx + movq 0+0(%rsi),%r8 + movq 0+8(%rsi),%r9 + movq 0+16(%rsi),%r10 + + movq %rdx,%rax + sarq $63,%rax + xorq %rbp,%rbp + subq %rax,%rbp + + xorq %rax,%rdx + addq %rbp,%rdx + + xorq %rax,%r8 + xorq %rax,%r9 + xorq %r10,%rax + addq %rbp,%r8 + adcq $0,%r9 + adcq $0,%rax + + mulxq %r8,%r8,%rbp + mulxq %r9,%r9,%r10 + addq %rbp,%r9 + adcq $0,%r10 + imulq %rdx + addq %rax,%r10 + adcq $0,%rdx + movq %rdx,%r14 + movq %rcx,%rdx + movq 48+0(%rsi),%r11 + movq 48+8(%rsi),%r12 + movq 48+16(%rsi),%r13 + + movq %rdx,%rax + sarq $63,%rax + xorq %rbp,%rbp + subq %rax,%rbp + + xorq %rax,%rdx + addq %rbp,%rdx + + xorq %rax,%r11 + xorq %rax,%r12 + xorq %r13,%rax + addq %rbp,%r11 + adcq $0,%r12 + adcq $0,%rax + + mulxq %r11,%r11,%rbp + mulxq %r12,%r12,%r13 + addq %rbp,%r12 + adcq $0,%r13 + imulq %rdx + addq %rax,%r13 + adcq $0,%rdx + addq %r8,%r11 + adcq %r9,%r12 + adcq %r10,%r13 + adcq %rdx,%r14 + movq %rbx,%rdx + + shrdq $31,%r12,%r11 + shrdq $31,%r13,%r12 + shrdq $31,%r14,%r13 + + sarq $63,%r14 + xorq %rbp,%rbp + subq %r14,%rbp + + xorq %r14,%r11 + xorq %r14,%r12 + xorq %r14,%r13 + addq %rbp,%r11 + adcq $0,%r12 + adcq $0,%r13 + + movq %r11,0(%rdi) + movq %r12,8(%rdi) + movq %r13,16(%rdi) + + xorq %r14,%rdx + xorq %r14,%rcx + addq %rbp,%rdx + addq %rbp,%rcx + + .byte 0xf3,0xc3 +.cfi_endproc +.size __smulx_191_n_shift_by_31,.-__smulx_191_n_shift_by_31 +.type __ab_approximation_31,@function +.align 32 +__ab_approximation_31: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + movq 40(%rsi),%r9 + movq 88(%rsi),%r11 + movq 32(%rsi),%rbx + movq 80(%rsi),%rbp + movq 24(%rsi),%r8 + movq 72(%rsi),%r10 + + movq %r9,%rax + orq %r11,%rax + cmovzq %rbx,%r9 + cmovzq %rbp,%r11 + cmovzq %r8,%rbx + movq 16(%rsi),%r8 + cmovzq %r10,%rbp + movq 64(%rsi),%r10 + + movq %r9,%rax + orq %r11,%rax + cmovzq %rbx,%r9 + cmovzq %rbp,%r11 + cmovzq %r8,%rbx + movq 8(%rsi),%r8 + cmovzq %r10,%rbp + movq 56(%rsi),%r10 + + movq %r9,%rax + orq %r11,%rax + cmovzq %rbx,%r9 + cmovzq %rbp,%r11 + cmovzq %r8,%rbx + movq 0(%rsi),%r8 + cmovzq %r10,%rbp + movq 48(%rsi),%r10 + + movq %r9,%rax + orq %r11,%rax + cmovzq %rbx,%r9 + cmovzq %rbp,%r11 + cmovzq %r8,%rbx + cmovzq %r10,%rbp + + movq %r9,%rax + orq %r11,%rax + bsrq %rax,%rcx + leaq 1(%rcx),%rcx + cmovzq %r8,%r9 + cmovzq %r10,%r11 + cmovzq %rax,%rcx + negq %rcx + + + shldq %cl,%rbx,%r9 + shldq %cl,%rbp,%r11 + + movl $0x7FFFFFFF,%eax + andq %rax,%r8 + andq %rax,%r10 + andnq %r9,%rax,%r9 + andnq %r11,%rax,%r11 + orq %r9,%r8 + orq %r11,%r10 + + jmp __inner_loop_31 + + .byte 0xf3,0xc3 +.cfi_endproc +.size __ab_approximation_31,.-__ab_approximation_31 +.type __inner_loop_31,@function +.align 32 +__inner_loop_31: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + movq $0x7FFFFFFF80000000,%rcx + movq $0x800000007FFFFFFF,%r13 + movq $0x7FFFFFFF7FFFFFFF,%r15 + +.Loop_31: + cmpq %r10,%r8 + movq %r8,%rax + movq %r10,%rbx + movq %rcx,%rbp + movq %r13,%r14 + cmovbq %r10,%r8 + cmovbq %rax,%r10 + cmovbq %r13,%rcx + cmovbq %rbp,%r13 + + subq %r10,%r8 + subq %r13,%rcx + addq %r15,%rcx + + testq $1,%rax + cmovzq %rax,%r8 + cmovzq %rbx,%r10 + cmovzq %rbp,%rcx + cmovzq %r14,%r13 + + shrq $1,%r8 + addq %r13,%r13 + subq %r15,%r13 + subl $1,%edi + jnz .Loop_31 + + shrq $32,%r15 + movl %ecx,%edx + movl %r13d,%r12d + shrq $32,%rcx + shrq $32,%r13 + subq %r15,%rdx + subq %r15,%rcx + subq %r15,%r12 + subq %r15,%r13 + + .byte 0xf3,0xc3 +.cfi_endproc +.size __inner_loop_31,.-__inner_loop_31 + +.type __tail_loop_53,@function +.align 32 +__tail_loop_53: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + movq $1,%rdx + xorq %rcx,%rcx + xorq %r12,%r12 + movq $1,%r13 + +.Loop_53: + xorq %rax,%rax + testq $1,%r8 + movq %r10,%rbx + cmovnzq %r10,%rax + subq %r8,%rbx + movq %r8,%rbp + subq %rax,%r8 + cmovcq %rbx,%r8 + cmovcq %rbp,%r10 + movq %rdx,%rax + cmovcq %r12,%rdx + cmovcq %rax,%r12 + movq %rcx,%rbx + cmovcq %r13,%rcx + cmovcq %rbx,%r13 + xorq %rax,%rax + xorq %rbx,%rbx + shrq $1,%r8 + testq $1,%rbp + cmovnzq %r12,%rax + cmovnzq %r13,%rbx + addq %r12,%r12 + addq %r13,%r13 + subq %rax,%rdx + subq %rbx,%rcx + subl $1,%edi + jnz .Loop_53 + + .byte 0xf3,0xc3 +.cfi_endproc +.size __tail_loop_53,.-__tail_loop_53 + +.section .note.GNU-stack,"",@progbits +.section .note.gnu.property,"a",@note + .long 4,2f-1f,5 + .byte 0x47,0x4E,0x55,0 +1: .long 0xc0000002,4,3 +.align 8 +2: diff --git a/crypto/blst_src/build/elf/div3w-armv8.S b/crypto/blst_src/build/elf/div3w-armv8.S new file mode 100644 index 00000000000..37621bee415 --- /dev/null +++ b/crypto/blst_src/build/elf/div3w-armv8.S @@ -0,0 +1,88 @@ +.text + +.globl div_3_limbs +.type div_3_limbs,%function +.align 5 +div_3_limbs: + ldp x4,x5,[x0] // load R + eor x0,x0,x0 // Q = 0 + mov x3,#64 // loop counter + nop + +.Loop: + subs x6,x4,x1 // R - D + add x0,x0,x0 // Q <<= 1 + sbcs x7,x5,x2 + add x0,x0,#1 // Q + speculative bit + csel x4,x4,x6,lo // select between R and R - D + extr x1,x2,x1,#1 // D >>= 1 + csel x5,x5,x7,lo + lsr x2,x2,#1 + sbc x0,x0,xzr // subtract speculative bit + sub x3,x3,#1 + cbnz x3,.Loop + + asr x3,x0,#63 // top bit -> mask + add x0,x0,x0 // Q <<= 1 + subs x6,x4,x1 // R - D + add x0,x0,#1 // Q + speculative bit + sbcs x7,x5,x2 + sbc x0,x0,xzr // subtract speculative bit + + orr x0,x0,x3 // all ones if overflow + + ret +.size div_3_limbs,.-div_3_limbs +.globl quot_rem_128 +.type quot_rem_128,%function +.align 5 +quot_rem_128: + ldp x3,x4,[x1] + + mul x5,x3,x2 // divisor[0:1} * quotient + umulh x6,x3,x2 + mul x11, x4,x2 + umulh x7,x4,x2 + + ldp x8,x9,[x0] // load 3 limbs of the dividend + ldr x10,[x0,#16] + + adds x6,x6,x11 + adc x7,x7,xzr + + subs x8,x8,x5 // dividend - divisor * quotient + sbcs x9,x9,x6 + sbcs x10,x10,x7 + sbc x5,xzr,xzr // borrow -> mask + + add x2,x2,x5 // if borrowed, adjust the quotient ... + and x3,x3,x5 + and x4,x4,x5 + adds x8,x8,x3 // ... and add divisor + adc x9,x9,x4 + + stp x8,x9,[x0] // save 2 limbs of the remainder + str x2,[x0,#16] // and one limb of the quotient + + mov x0,x2 // return adjusted quotient + + ret +.size quot_rem_128,.-quot_rem_128 + +.globl quot_rem_64 +.type quot_rem_64,%function +.align 5 +quot_rem_64: + ldr x3,[x1] + ldr x8,[x0] // load 1 limb of the dividend + + mul x5,x3,x2 // divisor * quotient + + sub x8,x8,x5 // dividend - divisor * quotient + + stp x8,x2,[x0] // save remainder and quotient + + mov x0,x2 // return quotient + + ret +.size quot_rem_64,.-quot_rem_64 diff --git a/crypto/blst_src/build/elf/div3w-x86_64.s b/crypto/blst_src/build/elf/div3w-x86_64.s new file mode 100644 index 00000000000..5d9fd8a9139 --- /dev/null +++ b/crypto/blst_src/build/elf/div3w-x86_64.s @@ -0,0 +1,132 @@ +.text + +.globl div_3_limbs +.hidden div_3_limbs +.type div_3_limbs,@function +.align 32 +div_3_limbs: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + + + movq (%rdi),%r8 + movq 8(%rdi),%r9 + xorq %rax,%rax + movl $64,%ecx + +.Loop: + movq %r8,%r10 + subq %rsi,%r8 + movq %r9,%r11 + sbbq %rdx,%r9 + leaq 1(%rax,%rax,1),%rax + movq %rdx,%rdi + cmovcq %r10,%r8 + cmovcq %r11,%r9 + sbbq $0,%rax + shlq $63,%rdi + shrq $1,%rsi + shrq $1,%rdx + orq %rdi,%rsi + subl $1,%ecx + jnz .Loop + + leaq 1(%rax,%rax,1),%rcx + sarq $63,%rax + + subq %rsi,%r8 + sbbq %rdx,%r9 + sbbq $0,%rcx + + orq %rcx,%rax + + + .byte 0xf3,0xc3 +.cfi_endproc +.size div_3_limbs,.-div_3_limbs +.globl quot_rem_128 +.hidden quot_rem_128 +.type quot_rem_128,@function +.align 32 +quot_rem_128: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + + + movq %rdx,%rax + movq %rdx,%rcx + + mulq 0(%rsi) + movq %rax,%r8 + movq %rcx,%rax + movq %rdx,%r9 + + mulq 8(%rsi) + addq %rax,%r9 + adcq $0,%rdx + + movq 0(%rdi),%r10 + movq 8(%rdi),%r11 + movq 16(%rdi),%rax + + subq %r8,%r10 + sbbq %r9,%r11 + sbbq %rdx,%rax + sbbq %r8,%r8 + + addq %r8,%rcx + movq %r8,%r9 + andq 0(%rsi),%r8 + andq 8(%rsi),%r9 + addq %r8,%r10 + adcq %r9,%r11 + + movq %r10,0(%rdi) + movq %r11,8(%rdi) + movq %rcx,16(%rdi) + + movq %rcx,%rax + + + .byte 0xf3,0xc3 +.cfi_endproc +.size quot_rem_128,.-quot_rem_128 + + + + + +.globl quot_rem_64 +.hidden quot_rem_64 +.type quot_rem_64,@function +.align 32 +quot_rem_64: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + + + movq %rdx,%rax + imulq 0(%rsi),%rdx + + movq 0(%rdi),%r10 + + subq %rdx,%r10 + + movq %r10,0(%rdi) + movq %rax,8(%rdi) + + + .byte 0xf3,0xc3 +.cfi_endproc +.size quot_rem_64,.-quot_rem_64 + +.section .note.GNU-stack,"",@progbits +.section .note.gnu.property,"a",@note + .long 4,2f-1f,5 + .byte 0x47,0x4E,0x55,0 +1: .long 0xc0000002,4,3 +.align 8 +2: diff --git a/crypto/blst_src/build/elf/mul_mont_256-armv8.S b/crypto/blst_src/build/elf/mul_mont_256-armv8.S new file mode 100644 index 00000000000..8bb1197f464 --- /dev/null +++ b/crypto/blst_src/build/elf/mul_mont_256-armv8.S @@ -0,0 +1,464 @@ +.text + +.globl mul_mont_sparse_256 +.hidden mul_mont_sparse_256 +.type mul_mont_sparse_256,%function +.align 5 +mul_mont_sparse_256: + stp x29,x30,[sp,#-64]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + + ldp x10,x11,[x1] + ldr x9, [x2] + ldp x12,x13,[x1,#16] + + mul x19,x10,x9 + ldp x5,x6,[x3] + mul x20,x11,x9 + ldp x7,x8,[x3,#16] + mul x21,x12,x9 + mul x22,x13,x9 + + umulh x14,x10,x9 + umulh x15,x11,x9 + mul x3,x4,x19 + umulh x16,x12,x9 + umulh x17,x13,x9 + adds x20,x20,x14 + //mul x14,x5,x3 + adcs x21,x21,x15 + mul x15,x6,x3 + adcs x22,x22,x16 + mul x16,x7,x3 + adc x23,xzr, x17 + mul x17,x8,x3 + ldr x9,[x2,8*1] + subs xzr,x19,#1 //adds x19,x19,x14 + umulh x14,x5,x3 + adcs x20,x20,x15 + umulh x15,x6,x3 + adcs x21,x21,x16 + umulh x16,x7,x3 + adcs x22,x22,x17 + umulh x17,x8,x3 + adc x23,x23,xzr + + adds x19,x20,x14 + mul x14,x10,x9 + adcs x20,x21,x15 + mul x15,x11,x9 + adcs x21,x22,x16 + mul x16,x12,x9 + adcs x22,x23,x17 + mul x17,x13,x9 + adc x23,xzr,xzr + + adds x19,x19,x14 + umulh x14,x10,x9 + adcs x20,x20,x15 + umulh x15,x11,x9 + adcs x21,x21,x16 + mul x3,x4,x19 + umulh x16,x12,x9 + adcs x22,x22,x17 + umulh x17,x13,x9 + adc x23,x23,xzr + + adds x20,x20,x14 + //mul x14,x5,x3 + adcs x21,x21,x15 + mul x15,x6,x3 + adcs x22,x22,x16 + mul x16,x7,x3 + adc x23,x23,x17 + mul x17,x8,x3 + ldr x9,[x2,8*2] + subs xzr,x19,#1 //adds x19,x19,x14 + umulh x14,x5,x3 + adcs x20,x20,x15 + umulh x15,x6,x3 + adcs x21,x21,x16 + umulh x16,x7,x3 + adcs x22,x22,x17 + umulh x17,x8,x3 + adc x23,x23,xzr + + adds x19,x20,x14 + mul x14,x10,x9 + adcs x20,x21,x15 + mul x15,x11,x9 + adcs x21,x22,x16 + mul x16,x12,x9 + adcs x22,x23,x17 + mul x17,x13,x9 + adc x23,xzr,xzr + + adds x19,x19,x14 + umulh x14,x10,x9 + adcs x20,x20,x15 + umulh x15,x11,x9 + adcs x21,x21,x16 + mul x3,x4,x19 + umulh x16,x12,x9 + adcs x22,x22,x17 + umulh x17,x13,x9 + adc x23,x23,xzr + + adds x20,x20,x14 + //mul x14,x5,x3 + adcs x21,x21,x15 + mul x15,x6,x3 + adcs x22,x22,x16 + mul x16,x7,x3 + adc x23,x23,x17 + mul x17,x8,x3 + ldr x9,[x2,8*3] + subs xzr,x19,#1 //adds x19,x19,x14 + umulh x14,x5,x3 + adcs x20,x20,x15 + umulh x15,x6,x3 + adcs x21,x21,x16 + umulh x16,x7,x3 + adcs x22,x22,x17 + umulh x17,x8,x3 + adc x23,x23,xzr + + adds x19,x20,x14 + mul x14,x10,x9 + adcs x20,x21,x15 + mul x15,x11,x9 + adcs x21,x22,x16 + mul x16,x12,x9 + adcs x22,x23,x17 + mul x17,x13,x9 + adc x23,xzr,xzr + + adds x19,x19,x14 + umulh x14,x10,x9 + adcs x20,x20,x15 + umulh x15,x11,x9 + adcs x21,x21,x16 + mul x3,x4,x19 + umulh x16,x12,x9 + adcs x22,x22,x17 + umulh x17,x13,x9 + adc x23,x23,xzr + + adds x20,x20,x14 + //mul x14,x5,x3 + adcs x21,x21,x15 + mul x15,x6,x3 + adcs x22,x22,x16 + mul x16,x7,x3 + adc x23,x23,x17 + mul x17,x8,x3 + subs xzr,x19,#1 //adds x19,x19,x14 + umulh x14,x5,x3 + adcs x20,x20,x15 + umulh x15,x6,x3 + adcs x21,x21,x16 + umulh x16,x7,x3 + adcs x22,x22,x17 + umulh x17,x8,x3 + adc x23,x23,xzr + + adds x19,x20,x14 + adcs x20,x21,x15 + adcs x21,x22,x16 + adcs x22,x23,x17 + adc x23,xzr,xzr + + subs x14,x19,x5 + sbcs x15,x20,x6 + sbcs x16,x21,x7 + sbcs x17,x22,x8 + sbcs xzr, x23,xzr + + csel x19,x19,x14,lo + csel x20,x20,x15,lo + csel x21,x21,x16,lo + csel x22,x22,x17,lo + + stp x19,x20,[x0] + stp x21,x22,[x0,#16] + + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldr x29,[sp],#64 + ret +.size mul_mont_sparse_256,.-mul_mont_sparse_256 +.globl sqr_mont_sparse_256 +.hidden sqr_mont_sparse_256 +.type sqr_mont_sparse_256,%function +.align 5 +sqr_mont_sparse_256: + .inst 0xd503233f + stp x29,x30,[sp,#-48]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + + ldp x5,x6,[x1] + ldp x7,x8,[x1,#16] + mov x4,x3 + + //////////////////////////////////////////////////////////////// + // | | | | | |a1*a0| | + // | | | | |a2*a0| | | + // | |a3*a2|a3*a0| | | | + // | | | |a2*a1| | | | + // | | |a3*a1| | | | | + // *| | | | | | | | 2| + // +|a3*a3|a2*a2|a1*a1|a0*a0| + // |--+--+--+--+--+--+--+--| + // |A7|A6|A5|A4|A3|A2|A1|A0|, where Ax is x10 + // + // "can't overflow" below mark carrying into high part of + // multiplication result, which can't overflow, because it + // can never be all ones. + + mul x11,x6,x5 // a[1]*a[0] + umulh x15,x6,x5 + mul x12,x7,x5 // a[2]*a[0] + umulh x16,x7,x5 + mul x13,x8,x5 // a[3]*a[0] + umulh x19,x8,x5 + + adds x12,x12,x15 // accumulate high parts of multiplication + mul x14,x7,x6 // a[2]*a[1] + umulh x15,x7,x6 + adcs x13,x13,x16 + mul x16,x8,x6 // a[3]*a[1] + umulh x17,x8,x6 + adc x19,x19,xzr // can't overflow + + mul x20,x8,x7 // a[3]*a[2] + umulh x21,x8,x7 + + adds x15,x15,x16 // accumulate high parts of multiplication + mul x10,x5,x5 // a[0]*a[0] + adc x16,x17,xzr // can't overflow + + adds x13,x13,x14 // accumulate low parts of multiplication + umulh x5,x5,x5 + adcs x19,x19,x15 + mul x15,x6,x6 // a[1]*a[1] + adcs x20,x20,x16 + umulh x6,x6,x6 + adc x21,x21,xzr // can't overflow + + adds x11,x11,x11 // acc[1-6]*=2 + mul x16,x7,x7 // a[2]*a[2] + adcs x12,x12,x12 + umulh x7,x7,x7 + adcs x13,x13,x13 + mul x17,x8,x8 // a[3]*a[3] + adcs x19,x19,x19 + umulh x8,x8,x8 + adcs x20,x20,x20 + adcs x21,x21,x21 + adc x22,xzr,xzr + + adds x11,x11,x5 // +a[i]*a[i] + adcs x12,x12,x15 + adcs x13,x13,x6 + adcs x19,x19,x16 + adcs x20,x20,x7 + adcs x21,x21,x17 + adc x22,x22,x8 + + bl __mul_by_1_mont_256 + ldr x30,[x29,#8] + + adds x10,x10,x19 // accumulate upper half + adcs x11,x11,x20 + adcs x12,x12,x21 + adcs x13,x13,x22 + adc x19,xzr,xzr + + subs x14,x10,x5 + sbcs x15,x11,x6 + sbcs x16,x12,x7 + sbcs x17,x13,x8 + sbcs xzr, x19,xzr + + csel x10,x10,x14,lo + csel x11,x11,x15,lo + csel x12,x12,x16,lo + csel x13,x13,x17,lo + + stp x10,x11,[x0] + stp x12,x13,[x0,#16] + + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldr x29,[sp],#48 + .inst 0xd50323bf + ret +.size sqr_mont_sparse_256,.-sqr_mont_sparse_256 +.globl from_mont_256 +.hidden from_mont_256 +.type from_mont_256,%function +.align 5 +from_mont_256: + .inst 0xd503233f + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + + mov x4,x3 + ldp x10,x11,[x1] + ldp x12,x13,[x1,#16] + + bl __mul_by_1_mont_256 + ldr x30,[x29,#8] + + subs x14,x10,x5 + sbcs x15,x11,x6 + sbcs x16,x12,x7 + sbcs x17,x13,x8 + + csel x10,x10,x14,lo + csel x11,x11,x15,lo + csel x12,x12,x16,lo + csel x13,x13,x17,lo + + stp x10,x11,[x0] + stp x12,x13,[x0,#16] + + ldr x29,[sp],#16 + .inst 0xd50323bf + ret +.size from_mont_256,.-from_mont_256 + +.globl redc_mont_256 +.hidden redc_mont_256 +.type redc_mont_256,%function +.align 5 +redc_mont_256: + .inst 0xd503233f + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + + mov x4,x3 + ldp x10,x11,[x1] + ldp x12,x13,[x1,#16] + + bl __mul_by_1_mont_256 + ldr x30,[x29,#8] + + ldp x14,x15,[x1,#32] + ldp x16,x17,[x1,#48] + + adds x10,x10,x14 + adcs x11,x11,x15 + adcs x12,x12,x16 + adcs x13,x13,x17 + adc x9,xzr,xzr + + subs x14,x10,x5 + sbcs x15,x11,x6 + sbcs x16,x12,x7 + sbcs x17,x13,x8 + sbcs xzr, x9,xzr + + csel x10,x10,x14,lo + csel x11,x11,x15,lo + csel x12,x12,x16,lo + csel x13,x13,x17,lo + + stp x10,x11,[x0] + stp x12,x13,[x0,#16] + + ldr x29,[sp],#16 + .inst 0xd50323bf + ret +.size redc_mont_256,.-redc_mont_256 + +.type __mul_by_1_mont_256,%function +.align 5 +__mul_by_1_mont_256: + mul x3,x4,x10 + ldp x5,x6,[x2] + ldp x7,x8,[x2,#16] + //mul x14,x5,x3 + mul x15,x6,x3 + mul x16,x7,x3 + mul x17,x8,x3 + subs xzr,x10,#1 //adds x10,x10,x14 + umulh x14,x5,x3 + adcs x11,x11,x15 + umulh x15,x6,x3 + adcs x12,x12,x16 + umulh x16,x7,x3 + adcs x13,x13,x17 + umulh x17,x8,x3 + adc x9,xzr,xzr + + adds x10,x11,x14 + adcs x11,x12,x15 + adcs x12,x13,x16 + mul x3,x4,x10 + adc x13,x9,x17 + //mul x14,x5,x3 + mul x15,x6,x3 + mul x16,x7,x3 + mul x17,x8,x3 + subs xzr,x10,#1 //adds x10,x10,x14 + umulh x14,x5,x3 + adcs x11,x11,x15 + umulh x15,x6,x3 + adcs x12,x12,x16 + umulh x16,x7,x3 + adcs x13,x13,x17 + umulh x17,x8,x3 + adc x9,xzr,xzr + + adds x10,x11,x14 + adcs x11,x12,x15 + adcs x12,x13,x16 + mul x3,x4,x10 + adc x13,x9,x17 + //mul x14,x5,x3 + mul x15,x6,x3 + mul x16,x7,x3 + mul x17,x8,x3 + subs xzr,x10,#1 //adds x10,x10,x14 + umulh x14,x5,x3 + adcs x11,x11,x15 + umulh x15,x6,x3 + adcs x12,x12,x16 + umulh x16,x7,x3 + adcs x13,x13,x17 + umulh x17,x8,x3 + adc x9,xzr,xzr + + adds x10,x11,x14 + adcs x11,x12,x15 + adcs x12,x13,x16 + mul x3,x4,x10 + adc x13,x9,x17 + //mul x14,x5,x3 + mul x15,x6,x3 + mul x16,x7,x3 + mul x17,x8,x3 + subs xzr,x10,#1 //adds x10,x10,x14 + umulh x14,x5,x3 + adcs x11,x11,x15 + umulh x15,x6,x3 + adcs x12,x12,x16 + umulh x16,x7,x3 + adcs x13,x13,x17 + umulh x17,x8,x3 + adc x9,xzr,xzr + + adds x10,x11,x14 + adcs x11,x12,x15 + adcs x12,x13,x16 + adc x13,x9,x17 + + ret +.size __mul_by_1_mont_256,.-__mul_by_1_mont_256 diff --git a/crypto/blst_src/build/elf/mul_mont_384-armv8.S b/crypto/blst_src/build/elf/mul_mont_384-armv8.S new file mode 100644 index 00000000000..c048e816b85 --- /dev/null +++ b/crypto/blst_src/build/elf/mul_mont_384-armv8.S @@ -0,0 +1,2372 @@ +.text + +.globl add_mod_384x384 +.type add_mod_384x384,%function +.align 5 +add_mod_384x384: + .inst 0xd503233f + stp x29,x30,[sp,#-64]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + + ldp x5,x6,[x3] + ldp x7,x8,[x3,#16] + ldp x9,x10,[x3,#32] + + bl __add_mod_384x384 + ldr x30,[x29,#8] + + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldr x29,[sp],#64 + .inst 0xd50323bf + ret +.size add_mod_384x384,.-add_mod_384x384 + +.type __add_mod_384x384,%function +.align 5 +__add_mod_384x384: + ldp x11, x12, [x1] + ldp x19,x20,[x2] + ldp x13, x14, [x1,#16] + adds x11,x11,x19 + ldp x21,x22,[x2,#16] + adcs x12,x12,x20 + ldp x15, x16, [x1,#32] + adcs x13,x13,x21 + ldp x23,x24,[x2,#32] + adcs x14,x14,x22 + stp x11, x12, [x0] + adcs x15,x15,x23 + ldp x11, x12, [x1,#48] + adcs x16,x16,x24 + + ldp x19,x20,[x2,#48] + stp x13, x14, [x0,#16] + ldp x13, x14, [x1,#64] + ldp x21,x22,[x2,#64] + + adcs x11,x11,x19 + stp x15, x16, [x0,#32] + adcs x12,x12,x20 + ldp x15, x16, [x1,#80] + adcs x13,x13,x21 + ldp x23,x24,[x2,#80] + adcs x14,x14,x22 + adcs x15,x15,x23 + adcs x16,x16,x24 + adc x17,xzr,xzr + + subs x19,x11,x5 + sbcs x20,x12,x6 + sbcs x21,x13,x7 + sbcs x22,x14,x8 + sbcs x23,x15,x9 + sbcs x24,x16,x10 + sbcs xzr,x17,xzr + + csel x11,x11,x19,lo + csel x12,x12,x20,lo + csel x13,x13,x21,lo + csel x14,x14,x22,lo + stp x11,x12,[x0,#48] + csel x15,x15,x23,lo + stp x13,x14,[x0,#64] + csel x16,x16,x24,lo + stp x15,x16,[x0,#80] + + ret +.size __add_mod_384x384,.-__add_mod_384x384 + +.globl sub_mod_384x384 +.type sub_mod_384x384,%function +.align 5 +sub_mod_384x384: + .inst 0xd503233f + stp x29,x30,[sp,#-64]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + + ldp x5,x6,[x3] + ldp x7,x8,[x3,#16] + ldp x9,x10,[x3,#32] + + bl __sub_mod_384x384 + ldr x30,[x29,#8] + + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldr x29,[sp],#64 + .inst 0xd50323bf + ret +.size sub_mod_384x384,.-sub_mod_384x384 + +.type __sub_mod_384x384,%function +.align 5 +__sub_mod_384x384: + ldp x11, x12, [x1] + ldp x19,x20,[x2] + ldp x13, x14, [x1,#16] + subs x11,x11,x19 + ldp x21,x22,[x2,#16] + sbcs x12,x12,x20 + ldp x15, x16, [x1,#32] + sbcs x13,x13,x21 + ldp x23,x24,[x2,#32] + sbcs x14,x14,x22 + stp x11, x12, [x0] + sbcs x15,x15,x23 + ldp x11, x12, [x1,#48] + sbcs x16,x16,x24 + + ldp x19,x20,[x2,#48] + stp x13, x14, [x0,#16] + ldp x13, x14, [x1,#64] + ldp x21,x22,[x2,#64] + + sbcs x11,x11,x19 + stp x15, x16, [x0,#32] + sbcs x12,x12,x20 + ldp x15, x16, [x1,#80] + sbcs x13,x13,x21 + ldp x23,x24,[x2,#80] + sbcs x14,x14,x22 + sbcs x15,x15,x23 + sbcs x16,x16,x24 + sbc x17,xzr,xzr + + and x19,x5,x17 + and x20,x6,x17 + adds x11,x11,x19 + and x21,x7,x17 + adcs x12,x12,x20 + and x22,x8,x17 + adcs x13,x13,x21 + and x23,x9,x17 + adcs x14,x14,x22 + and x24,x10,x17 + adcs x15,x15,x23 + stp x11,x12,[x0,#48] + adc x16,x16,x24 + stp x13,x14,[x0,#64] + stp x15,x16,[x0,#80] + + ret +.size __sub_mod_384x384,.-__sub_mod_384x384 + +.type __add_mod_384,%function +.align 5 +__add_mod_384: + ldp x11, x12, [x1] + ldp x19,x20,[x2] + ldp x13, x14, [x1,#16] + adds x11,x11,x19 + ldp x21,x22,[x2,#16] + adcs x12,x12,x20 + ldp x15, x16, [x1,#32] + adcs x13,x13,x21 + ldp x23,x24,[x2,#32] + adcs x14,x14,x22 + adcs x15,x15,x23 + adcs x16,x16,x24 + adc x17,xzr,xzr + + subs x19,x11,x5 + sbcs x20,x12,x6 + sbcs x21,x13,x7 + sbcs x22,x14,x8 + sbcs x23,x15,x9 + sbcs x24,x16,x10 + sbcs xzr,x17,xzr + + csel x11,x11,x19,lo + csel x12,x12,x20,lo + csel x13,x13,x21,lo + csel x14,x14,x22,lo + csel x15,x15,x23,lo + stp x11,x12,[x0] + csel x16,x16,x24,lo + stp x13,x14,[x0,#16] + stp x15,x16,[x0,#32] + + ret +.size __add_mod_384,.-__add_mod_384 + +.type __sub_mod_384,%function +.align 5 +__sub_mod_384: + ldp x11, x12, [x1] + ldp x19,x20,[x2] + ldp x13, x14, [x1,#16] + subs x11,x11,x19 + ldp x21,x22,[x2,#16] + sbcs x12,x12,x20 + ldp x15, x16, [x1,#32] + sbcs x13,x13,x21 + ldp x23,x24,[x2,#32] + sbcs x14,x14,x22 + sbcs x15,x15,x23 + sbcs x16,x16,x24 + sbc x17,xzr,xzr + + and x19,x5,x17 + and x20,x6,x17 + adds x11,x11,x19 + and x21,x7,x17 + adcs x12,x12,x20 + and x22,x8,x17 + adcs x13,x13,x21 + and x23,x9,x17 + adcs x14,x14,x22 + and x24,x10,x17 + adcs x15,x15,x23 + stp x11,x12,[x0] + adc x16,x16,x24 + stp x13,x14,[x0,#16] + stp x15,x16,[x0,#32] + + ret +.size __sub_mod_384,.-__sub_mod_384 + +.globl mul_mont_384x +.hidden mul_mont_384x +.type mul_mont_384x,%function +.align 5 +mul_mont_384x: + .inst 0xd503233f + stp x29,x30,[sp,#-128]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + sub sp,sp,#288 // space for 3 768-bit vectors + + mov x26,x0 // save r_ptr + mov x27,x1 // save b_ptr + mov x28,x2 // save b_ptr + + sub x0,sp,#0 // mul_384(t0, a->re, b->re) + bl __mul_384 + + add x1,x1,#48 // mul_384(t1, a->im, b->im) + add x2,x2,#48 + add x0,sp,#96 + bl __mul_384 + + ldp x5,x6,[x3] + ldp x7,x8,[x3,#16] + ldp x9,x10,[x3,#32] + + sub x2,x1,#48 + add x0,sp,#240 + bl __add_mod_384 + + add x1,x28,#0 + add x2,x28,#48 + add x0,sp,#192 // t2 + bl __add_mod_384 + + add x1,x0,#0 + add x2,x0,#48 + bl __mul_384 // mul_384(t2, a->re+a->im, b->re+b->im) + + ldp x5,x6,[x3] + ldp x7,x8,[x3,#16] + ldp x9,x10,[x3,#32] + + mov x1,x0 + add x2,sp,#0 + bl __sub_mod_384x384 + + add x2,sp,#96 + bl __sub_mod_384x384 // t2 = t2-t0-t1 + + add x1,sp,#0 + add x2,sp,#96 + add x0,sp,#0 + bl __sub_mod_384x384 // t0 = t0-t1 + + add x1,sp,#0 // ret->re = redc(t0) + add x0,x26,#0 + bl __mul_by_1_mont_384 + bl __redc_tail_mont_384 + + add x1,sp,#192 // ret->im = redc(t2) + add x0,x0,#48 + bl __mul_by_1_mont_384 + bl __redc_tail_mont_384 + ldr x30,[x29,#8] + + add sp,sp,#288 + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldr x29,[sp],#128 + .inst 0xd50323bf + ret +.size mul_mont_384x,.-mul_mont_384x + +.globl sqr_mont_384x +.hidden sqr_mont_384x +.type sqr_mont_384x,%function +.align 5 +sqr_mont_384x: + .inst 0xd503233f + stp x29,x30,[sp,#-128]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + stp x3,x0,[sp,#96] // __mul_mont_384 wants them there + sub sp,sp,#96 // space for 2 384-bit vectors + mov x4,x3 // adjust for missing b_ptr + + ldp x5,x6,[x2] + ldp x7,x8,[x2,#16] + ldp x9,x10,[x2,#32] + + add x2,x1,#48 + add x0,sp,#0 + bl __add_mod_384 // t0 = a->re + a->im + + add x0,sp,#48 + bl __sub_mod_384 // t1 = a->re - a->im + + ldp x11,x12,[x1] + ldr x17, [x2] + ldp x13,x14,[x1,#16] + ldp x15,x16,[x1,#32] + + bl __mul_mont_384 // mul_mont_384(ret->im, a->re, a->im) + + adds x11,x11,x11 // add with itself + adcs x12,x12,x12 + adcs x13,x13,x13 + adcs x14,x14,x14 + adcs x15,x15,x15 + adcs x16,x16,x16 + adc x25,xzr,xzr + + subs x19,x11,x5 + sbcs x20,x12,x6 + sbcs x21,x13,x7 + sbcs x22,x14,x8 + sbcs x23,x15,x9 + sbcs x24,x16,x10 + sbcs xzr,x25,xzr + + csel x19,x11,x19,lo + csel x20,x12,x20,lo + csel x21,x13,x21,lo + ldp x11,x12,[sp] + csel x22,x14,x22,lo + ldr x17, [sp,#48] + csel x23,x15,x23,lo + ldp x13,x14,[sp,#16] + csel x24,x16,x24,lo + ldp x15,x16,[sp,#32] + + stp x19,x20,[x2,#48] + stp x21,x22,[x2,#64] + stp x23,x24,[x2,#80] + + add x2,sp,#48 + bl __mul_mont_384 // mul_mont_384(ret->re, t0, t1) + ldr x30,[x29,#8] + + stp x11,x12,[x2] + stp x13,x14,[x2,#16] + stp x15,x16,[x2,#32] + + add sp,sp,#96 + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldr x29,[sp],#128 + .inst 0xd50323bf + ret +.size sqr_mont_384x,.-sqr_mont_384x + +.globl mul_mont_384 +.hidden mul_mont_384 +.type mul_mont_384,%function +.align 5 +mul_mont_384: + .inst 0xd503233f + stp x29,x30,[sp,#-128]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + stp x4,x0,[sp,#96] // __mul_mont_384 wants them there + + ldp x11,x12,[x1] + ldr x17, [x2] + ldp x13,x14,[x1,#16] + ldp x15,x16,[x1,#32] + + ldp x5,x6,[x3] + ldp x7,x8,[x3,#16] + ldp x9,x10,[x3,#32] + + bl __mul_mont_384 + ldr x30,[x29,#8] + + stp x11,x12,[x2] + stp x13,x14,[x2,#16] + stp x15,x16,[x2,#32] + + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldr x29,[sp],#128 + .inst 0xd50323bf + ret +.size mul_mont_384,.-mul_mont_384 + +.type __mul_mont_384,%function +.align 5 +__mul_mont_384: + mul x19,x11,x17 + mul x20,x12,x17 + mul x21,x13,x17 + mul x22,x14,x17 + mul x23,x15,x17 + mul x24,x16,x17 + mul x4,x4,x19 + + umulh x26,x11,x17 + umulh x27,x12,x17 + umulh x28,x13,x17 + umulh x0,x14,x17 + umulh x1,x15,x17 + umulh x3,x16,x17 + + adds x20,x20,x26 + // mul x26,x5,x4 + adcs x21,x21,x27 + mul x27,x6,x4 + adcs x22,x22,x28 + mul x28,x7,x4 + adcs x23,x23,x0 + mul x0,x8,x4 + adcs x24,x24,x1 + mul x1,x9,x4 + adc x25,xzr, x3 + mul x3,x10,x4 + mov x17,xzr + subs xzr,x19,#1 // adds x19,x19,x26 + umulh x26,x5,x4 + adcs x20,x20,x27 + umulh x27,x6,x4 + adcs x21,x21,x28 + umulh x28,x7,x4 + adcs x22,x22,x0 + umulh x0,x8,x4 + adcs x23,x23,x1 + umulh x1,x9,x4 + adcs x24,x24,x3 + umulh x3,x10,x4 + adcs x25,x25,xzr + adc x4,x17,xzr + ldr x17,[x2,8*1] + + adds x19,x20,x26 + mul x26,x11,x17 + adcs x20,x21,x27 + mul x27,x12,x17 + adcs x21,x22,x28 + mul x28,x13,x17 + adcs x22,x23,x0 + mul x0,x14,x17 + adcs x23,x24,x1 + mul x1,x15,x17 + adcs x24,x25,x3 + mul x3,x16,x17 + adc x25,x4,xzr + ldr x4,[x29,#96] + + adds x19,x19,x26 + umulh x26,x11,x17 + adcs x20,x20,x27 + umulh x27,x12,x17 + adcs x21,x21,x28 + mul x4,x4,x19 + umulh x28,x13,x17 + adcs x22,x22,x0 + umulh x0,x14,x17 + adcs x23,x23,x1 + umulh x1,x15,x17 + adcs x24,x24,x3 + umulh x3,x16,x17 + adcs x25,x25,xzr + adc x17,xzr,xzr + + adds x20,x20,x26 + // mul x26,x5,x4 + adcs x21,x21,x27 + mul x27,x6,x4 + adcs x22,x22,x28 + mul x28,x7,x4 + adcs x23,x23,x0 + mul x0,x8,x4 + adcs x24,x24,x1 + mul x1,x9,x4 + adcs x25,x25,x3 + mul x3,x10,x4 + adc x17,x17,xzr + subs xzr,x19,#1 // adds x19,x19,x26 + umulh x26,x5,x4 + adcs x20,x20,x27 + umulh x27,x6,x4 + adcs x21,x21,x28 + umulh x28,x7,x4 + adcs x22,x22,x0 + umulh x0,x8,x4 + adcs x23,x23,x1 + umulh x1,x9,x4 + adcs x24,x24,x3 + umulh x3,x10,x4 + adcs x25,x25,xzr + adc x4,x17,xzr + ldr x17,[x2,8*2] + + adds x19,x20,x26 + mul x26,x11,x17 + adcs x20,x21,x27 + mul x27,x12,x17 + adcs x21,x22,x28 + mul x28,x13,x17 + adcs x22,x23,x0 + mul x0,x14,x17 + adcs x23,x24,x1 + mul x1,x15,x17 + adcs x24,x25,x3 + mul x3,x16,x17 + adc x25,x4,xzr + ldr x4,[x29,#96] + + adds x19,x19,x26 + umulh x26,x11,x17 + adcs x20,x20,x27 + umulh x27,x12,x17 + adcs x21,x21,x28 + mul x4,x4,x19 + umulh x28,x13,x17 + adcs x22,x22,x0 + umulh x0,x14,x17 + adcs x23,x23,x1 + umulh x1,x15,x17 + adcs x24,x24,x3 + umulh x3,x16,x17 + adcs x25,x25,xzr + adc x17,xzr,xzr + + adds x20,x20,x26 + // mul x26,x5,x4 + adcs x21,x21,x27 + mul x27,x6,x4 + adcs x22,x22,x28 + mul x28,x7,x4 + adcs x23,x23,x0 + mul x0,x8,x4 + adcs x24,x24,x1 + mul x1,x9,x4 + adcs x25,x25,x3 + mul x3,x10,x4 + adc x17,x17,xzr + subs xzr,x19,#1 // adds x19,x19,x26 + umulh x26,x5,x4 + adcs x20,x20,x27 + umulh x27,x6,x4 + adcs x21,x21,x28 + umulh x28,x7,x4 + adcs x22,x22,x0 + umulh x0,x8,x4 + adcs x23,x23,x1 + umulh x1,x9,x4 + adcs x24,x24,x3 + umulh x3,x10,x4 + adcs x25,x25,xzr + adc x4,x17,xzr + ldr x17,[x2,8*3] + + adds x19,x20,x26 + mul x26,x11,x17 + adcs x20,x21,x27 + mul x27,x12,x17 + adcs x21,x22,x28 + mul x28,x13,x17 + adcs x22,x23,x0 + mul x0,x14,x17 + adcs x23,x24,x1 + mul x1,x15,x17 + adcs x24,x25,x3 + mul x3,x16,x17 + adc x25,x4,xzr + ldr x4,[x29,#96] + + adds x19,x19,x26 + umulh x26,x11,x17 + adcs x20,x20,x27 + umulh x27,x12,x17 + adcs x21,x21,x28 + mul x4,x4,x19 + umulh x28,x13,x17 + adcs x22,x22,x0 + umulh x0,x14,x17 + adcs x23,x23,x1 + umulh x1,x15,x17 + adcs x24,x24,x3 + umulh x3,x16,x17 + adcs x25,x25,xzr + adc x17,xzr,xzr + + adds x20,x20,x26 + // mul x26,x5,x4 + adcs x21,x21,x27 + mul x27,x6,x4 + adcs x22,x22,x28 + mul x28,x7,x4 + adcs x23,x23,x0 + mul x0,x8,x4 + adcs x24,x24,x1 + mul x1,x9,x4 + adcs x25,x25,x3 + mul x3,x10,x4 + adc x17,x17,xzr + subs xzr,x19,#1 // adds x19,x19,x26 + umulh x26,x5,x4 + adcs x20,x20,x27 + umulh x27,x6,x4 + adcs x21,x21,x28 + umulh x28,x7,x4 + adcs x22,x22,x0 + umulh x0,x8,x4 + adcs x23,x23,x1 + umulh x1,x9,x4 + adcs x24,x24,x3 + umulh x3,x10,x4 + adcs x25,x25,xzr + adc x4,x17,xzr + ldr x17,[x2,8*4] + + adds x19,x20,x26 + mul x26,x11,x17 + adcs x20,x21,x27 + mul x27,x12,x17 + adcs x21,x22,x28 + mul x28,x13,x17 + adcs x22,x23,x0 + mul x0,x14,x17 + adcs x23,x24,x1 + mul x1,x15,x17 + adcs x24,x25,x3 + mul x3,x16,x17 + adc x25,x4,xzr + ldr x4,[x29,#96] + + adds x19,x19,x26 + umulh x26,x11,x17 + adcs x20,x20,x27 + umulh x27,x12,x17 + adcs x21,x21,x28 + mul x4,x4,x19 + umulh x28,x13,x17 + adcs x22,x22,x0 + umulh x0,x14,x17 + adcs x23,x23,x1 + umulh x1,x15,x17 + adcs x24,x24,x3 + umulh x3,x16,x17 + adcs x25,x25,xzr + adc x17,xzr,xzr + + adds x20,x20,x26 + // mul x26,x5,x4 + adcs x21,x21,x27 + mul x27,x6,x4 + adcs x22,x22,x28 + mul x28,x7,x4 + adcs x23,x23,x0 + mul x0,x8,x4 + adcs x24,x24,x1 + mul x1,x9,x4 + adcs x25,x25,x3 + mul x3,x10,x4 + adc x17,x17,xzr + subs xzr,x19,#1 // adds x19,x19,x26 + umulh x26,x5,x4 + adcs x20,x20,x27 + umulh x27,x6,x4 + adcs x21,x21,x28 + umulh x28,x7,x4 + adcs x22,x22,x0 + umulh x0,x8,x4 + adcs x23,x23,x1 + umulh x1,x9,x4 + adcs x24,x24,x3 + umulh x3,x10,x4 + adcs x25,x25,xzr + adc x4,x17,xzr + ldr x17,[x2,8*5] + + adds x19,x20,x26 + mul x26,x11,x17 + adcs x20,x21,x27 + mul x27,x12,x17 + adcs x21,x22,x28 + mul x28,x13,x17 + adcs x22,x23,x0 + mul x0,x14,x17 + adcs x23,x24,x1 + mul x1,x15,x17 + adcs x24,x25,x3 + mul x3,x16,x17 + adc x25,x4,xzr + ldr x4,[x29,#96] + + adds x19,x19,x26 + umulh x26,x11,x17 + adcs x20,x20,x27 + umulh x27,x12,x17 + adcs x21,x21,x28 + mul x4,x4,x19 + umulh x28,x13,x17 + adcs x22,x22,x0 + umulh x0,x14,x17 + adcs x23,x23,x1 + umulh x1,x15,x17 + adcs x24,x24,x3 + umulh x3,x16,x17 + adcs x25,x25,xzr + adc x17,xzr,xzr + + adds x20,x20,x26 + // mul x26,x5,x4 + adcs x21,x21,x27 + mul x27,x6,x4 + adcs x22,x22,x28 + mul x28,x7,x4 + adcs x23,x23,x0 + mul x0,x8,x4 + adcs x24,x24,x1 + mul x1,x9,x4 + adcs x25,x25,x3 + mul x3,x10,x4 + adc x17,x17,xzr + subs xzr,x19,#1 // adds x19,x19,x26 + umulh x26,x5,x4 + adcs x20,x20,x27 + umulh x27,x6,x4 + adcs x21,x21,x28 + umulh x28,x7,x4 + adcs x22,x22,x0 + umulh x0,x8,x4 + adcs x23,x23,x1 + umulh x1,x9,x4 + adcs x24,x24,x3 + umulh x3,x10,x4 + adcs x25,x25,xzr + ldp x4,x2,[x29,#96] // pull r_ptr + adc x17,x17,xzr + + adds x19,x20,x26 + adcs x20,x21,x27 + adcs x21,x22,x28 + adcs x22,x23,x0 + adcs x23,x24,x1 + adcs x24,x25,x3 + adc x25,x17,xzr + + subs x26,x19,x5 + sbcs x27,x20,x6 + sbcs x28,x21,x7 + sbcs x0,x22,x8 + sbcs x1,x23,x9 + sbcs x3,x24,x10 + sbcs xzr, x25,xzr + + csel x11,x19,x26,lo + csel x12,x20,x27,lo + csel x13,x21,x28,lo + csel x14,x22,x0,lo + csel x15,x23,x1,lo + csel x16,x24,x3,lo + ret +.size __mul_mont_384,.-__mul_mont_384 + +.globl sqr_mont_384 +.hidden sqr_mont_384 +.type sqr_mont_384,%function +.align 5 +sqr_mont_384: + .inst 0xd503233f + stp x29,x30,[sp,#-128]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + sub sp,sp,#96 // space for 768-bit vector + mov x4,x3 // adjust for missing b_ptr + + mov x3,x0 // save r_ptr + mov x0,sp + + ldp x11,x12,[x1] + ldp x13,x14,[x1,#16] + ldp x15,x16,[x1,#32] + + bl __sqr_384 + + ldp x5,x6,[x2] + ldp x7,x8,[x2,#16] + ldp x9,x10,[x2,#32] + + mov x1,sp + mov x0,x3 // restore r_ptr + bl __mul_by_1_mont_384 + bl __redc_tail_mont_384 + ldr x30,[x29,#8] + + add sp,sp,#96 + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldr x29,[sp],#128 + .inst 0xd50323bf + ret +.size sqr_mont_384,.-sqr_mont_384 + +.globl sqr_n_mul_mont_383 +.hidden sqr_n_mul_mont_383 +.type sqr_n_mul_mont_383,%function +.align 5 +sqr_n_mul_mont_383: + .inst 0xd503233f + stp x29,x30,[sp,#-128]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + stp x4,x0,[sp,#96] // __mul_mont_384 wants them there + sub sp,sp,#96 // space for 768-bit vector + mov x17,x5 // save b_ptr + + ldp x11,x12,[x1] + ldp x13,x14,[x1,#16] + ldp x15,x16,[x1,#32] + mov x0,sp +.Loop_sqr_383: + bl __sqr_384 + sub x2,x2,#1 // counter + + ldp x5,x6,[x3] + ldp x7,x8,[x3,#16] + ldp x9,x10,[x3,#32] + + mov x1,sp + bl __mul_by_1_mont_384 + + ldp x19,x20,[x1,#48] + ldp x21,x22,[x1,#64] + ldp x23,x24,[x1,#80] + + adds x11,x11,x19 // just accumulate upper half + adcs x12,x12,x20 + adcs x13,x13,x21 + adcs x14,x14,x22 + adcs x15,x15,x23 + adc x16,x16,x24 + + cbnz x2,.Loop_sqr_383 + + mov x2,x17 + ldr x17,[x17] + bl __mul_mont_384 + ldr x30,[x29,#8] + + stp x11,x12,[x2] + stp x13,x14,[x2,#16] + stp x15,x16,[x2,#32] + + add sp,sp,#96 + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldr x29,[sp],#128 + .inst 0xd50323bf + ret +.size sqr_n_mul_mont_383,.-sqr_n_mul_mont_383 +.type __sqr_384,%function +.align 5 +__sqr_384: + mul x19,x12,x11 + mul x20,x13,x11 + mul x21,x14,x11 + mul x22,x15,x11 + mul x23,x16,x11 + + umulh x6,x12,x11 + umulh x7,x13,x11 + umulh x8,x14,x11 + umulh x9,x15,x11 + adds x20,x20,x6 + umulh x10,x16,x11 + adcs x21,x21,x7 + mul x7,x13,x12 + adcs x22,x22,x8 + mul x8,x14,x12 + adcs x23,x23,x9 + mul x9,x15,x12 + adc x24,xzr, x10 + mul x10,x16,x12 + + adds x21,x21,x7 + umulh x7,x13,x12 + adcs x22,x22,x8 + umulh x8,x14,x12 + adcs x23,x23,x9 + umulh x9,x15,x12 + adcs x24,x24,x10 + umulh x10,x16,x12 + adc x25,xzr,xzr + + mul x5,x11,x11 + adds x22,x22,x7 + umulh x11, x11,x11 + adcs x23,x23,x8 + mul x8,x14,x13 + adcs x24,x24,x9 + mul x9,x15,x13 + adc x25,x25,x10 + mul x10,x16,x13 + + adds x23,x23,x8 + umulh x8,x14,x13 + adcs x24,x24,x9 + umulh x9,x15,x13 + adcs x25,x25,x10 + umulh x10,x16,x13 + adc x26,xzr,xzr + + mul x6,x12,x12 + adds x24,x24,x8 + umulh x12, x12,x12 + adcs x25,x25,x9 + mul x9,x15,x14 + adc x26,x26,x10 + mul x10,x16,x14 + + adds x25,x25,x9 + umulh x9,x15,x14 + adcs x26,x26,x10 + umulh x10,x16,x14 + adc x27,xzr,xzr + mul x7,x13,x13 + adds x26,x26,x9 + umulh x13, x13,x13 + adc x27,x27,x10 + mul x8,x14,x14 + + mul x10,x16,x15 + umulh x14, x14,x14 + adds x27,x27,x10 + umulh x10,x16,x15 + mul x9,x15,x15 + adc x28,x10,xzr + + adds x19,x19,x19 + adcs x20,x20,x20 + adcs x21,x21,x21 + adcs x22,x22,x22 + adcs x23,x23,x23 + adcs x24,x24,x24 + adcs x25,x25,x25 + adcs x26,x26,x26 + umulh x15, x15,x15 + adcs x27,x27,x27 + mul x10,x16,x16 + adcs x28,x28,x28 + umulh x16, x16,x16 + adc x1,xzr,xzr + + adds x19,x19,x11 + adcs x20,x20,x6 + adcs x21,x21,x12 + adcs x22,x22,x7 + adcs x23,x23,x13 + adcs x24,x24,x8 + adcs x25,x25,x14 + stp x5,x19,[x0] + adcs x26,x26,x9 + stp x20,x21,[x0,#16] + adcs x27,x27,x15 + stp x22,x23,[x0,#32] + adcs x28,x28,x10 + stp x24,x25,[x0,#48] + adc x16,x16,x1 + stp x26,x27,[x0,#64] + stp x28,x16,[x0,#80] + + ret +.size __sqr_384,.-__sqr_384 +.globl sqr_384 +.hidden sqr_384 +.type sqr_384,%function +.align 5 +sqr_384: + .inst 0xd503233f + stp x29,x30,[sp,#-128]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + + ldp x11,x12,[x1] + ldp x13,x14,[x1,#16] + ldp x15,x16,[x1,#32] + + bl __sqr_384 + ldr x30,[x29,#8] + + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldr x29,[sp],#128 + .inst 0xd50323bf + ret +.size sqr_384,.-sqr_384 + +.globl redc_mont_384 +.hidden redc_mont_384 +.type redc_mont_384,%function +.align 5 +redc_mont_384: + .inst 0xd503233f + stp x29,x30,[sp,#-128]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + mov x4,x3 // adjust for missing b_ptr + + ldp x5,x6,[x2] + ldp x7,x8,[x2,#16] + ldp x9,x10,[x2,#32] + + bl __mul_by_1_mont_384 + bl __redc_tail_mont_384 + ldr x30,[x29,#8] + + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldr x29,[sp],#128 + .inst 0xd50323bf + ret +.size redc_mont_384,.-redc_mont_384 + +.globl from_mont_384 +.hidden from_mont_384 +.type from_mont_384,%function +.align 5 +from_mont_384: + .inst 0xd503233f + stp x29,x30,[sp,#-128]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + mov x4,x3 // adjust for missing b_ptr + + ldp x5,x6,[x2] + ldp x7,x8,[x2,#16] + ldp x9,x10,[x2,#32] + + bl __mul_by_1_mont_384 + ldr x30,[x29,#8] + + subs x19,x11,x5 + sbcs x20,x12,x6 + sbcs x21,x13,x7 + sbcs x22,x14,x8 + sbcs x23,x15,x9 + sbcs x24,x16,x10 + + csel x11,x11,x19,lo + csel x12,x12,x20,lo + csel x13,x13,x21,lo + csel x14,x14,x22,lo + csel x15,x15,x23,lo + csel x16,x16,x24,lo + + stp x11,x12,[x0] + stp x13,x14,[x0,#16] + stp x15,x16,[x0,#32] + + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldr x29,[sp],#128 + .inst 0xd50323bf + ret +.size from_mont_384,.-from_mont_384 + +.type __mul_by_1_mont_384,%function +.align 5 +__mul_by_1_mont_384: + ldp x11,x12,[x1] + ldp x13,x14,[x1,#16] + mul x26,x4,x11 + ldp x15,x16,[x1,#32] + + // mul x19,x5,x26 + mul x20,x6,x26 + mul x21,x7,x26 + mul x22,x8,x26 + mul x23,x9,x26 + mul x24,x10,x26 + subs xzr,x11,#1 // adds x19,x19,x11 + umulh x11,x5,x26 + adcs x20,x20,x12 + umulh x12,x6,x26 + adcs x21,x21,x13 + umulh x13,x7,x26 + adcs x22,x22,x14 + umulh x14,x8,x26 + adcs x23,x23,x15 + umulh x15,x9,x26 + adcs x24,x24,x16 + umulh x16,x10,x26 + adc x25,xzr,xzr + adds x11,x11,x20 + adcs x12,x12,x21 + adcs x13,x13,x22 + mul x26,x4,x11 + adcs x14,x14,x23 + adcs x15,x15,x24 + adc x16,x16,x25 + + // mul x19,x5,x26 + mul x20,x6,x26 + mul x21,x7,x26 + mul x22,x8,x26 + mul x23,x9,x26 + mul x24,x10,x26 + subs xzr,x11,#1 // adds x19,x19,x11 + umulh x11,x5,x26 + adcs x20,x20,x12 + umulh x12,x6,x26 + adcs x21,x21,x13 + umulh x13,x7,x26 + adcs x22,x22,x14 + umulh x14,x8,x26 + adcs x23,x23,x15 + umulh x15,x9,x26 + adcs x24,x24,x16 + umulh x16,x10,x26 + adc x25,xzr,xzr + adds x11,x11,x20 + adcs x12,x12,x21 + adcs x13,x13,x22 + mul x26,x4,x11 + adcs x14,x14,x23 + adcs x15,x15,x24 + adc x16,x16,x25 + + // mul x19,x5,x26 + mul x20,x6,x26 + mul x21,x7,x26 + mul x22,x8,x26 + mul x23,x9,x26 + mul x24,x10,x26 + subs xzr,x11,#1 // adds x19,x19,x11 + umulh x11,x5,x26 + adcs x20,x20,x12 + umulh x12,x6,x26 + adcs x21,x21,x13 + umulh x13,x7,x26 + adcs x22,x22,x14 + umulh x14,x8,x26 + adcs x23,x23,x15 + umulh x15,x9,x26 + adcs x24,x24,x16 + umulh x16,x10,x26 + adc x25,xzr,xzr + adds x11,x11,x20 + adcs x12,x12,x21 + adcs x13,x13,x22 + mul x26,x4,x11 + adcs x14,x14,x23 + adcs x15,x15,x24 + adc x16,x16,x25 + + // mul x19,x5,x26 + mul x20,x6,x26 + mul x21,x7,x26 + mul x22,x8,x26 + mul x23,x9,x26 + mul x24,x10,x26 + subs xzr,x11,#1 // adds x19,x19,x11 + umulh x11,x5,x26 + adcs x20,x20,x12 + umulh x12,x6,x26 + adcs x21,x21,x13 + umulh x13,x7,x26 + adcs x22,x22,x14 + umulh x14,x8,x26 + adcs x23,x23,x15 + umulh x15,x9,x26 + adcs x24,x24,x16 + umulh x16,x10,x26 + adc x25,xzr,xzr + adds x11,x11,x20 + adcs x12,x12,x21 + adcs x13,x13,x22 + mul x26,x4,x11 + adcs x14,x14,x23 + adcs x15,x15,x24 + adc x16,x16,x25 + + // mul x19,x5,x26 + mul x20,x6,x26 + mul x21,x7,x26 + mul x22,x8,x26 + mul x23,x9,x26 + mul x24,x10,x26 + subs xzr,x11,#1 // adds x19,x19,x11 + umulh x11,x5,x26 + adcs x20,x20,x12 + umulh x12,x6,x26 + adcs x21,x21,x13 + umulh x13,x7,x26 + adcs x22,x22,x14 + umulh x14,x8,x26 + adcs x23,x23,x15 + umulh x15,x9,x26 + adcs x24,x24,x16 + umulh x16,x10,x26 + adc x25,xzr,xzr + adds x11,x11,x20 + adcs x12,x12,x21 + adcs x13,x13,x22 + mul x26,x4,x11 + adcs x14,x14,x23 + adcs x15,x15,x24 + adc x16,x16,x25 + + // mul x19,x5,x26 + mul x20,x6,x26 + mul x21,x7,x26 + mul x22,x8,x26 + mul x23,x9,x26 + mul x24,x10,x26 + subs xzr,x11,#1 // adds x19,x19,x11 + umulh x11,x5,x26 + adcs x20,x20,x12 + umulh x12,x6,x26 + adcs x21,x21,x13 + umulh x13,x7,x26 + adcs x22,x22,x14 + umulh x14,x8,x26 + adcs x23,x23,x15 + umulh x15,x9,x26 + adcs x24,x24,x16 + umulh x16,x10,x26 + adc x25,xzr,xzr + adds x11,x11,x20 + adcs x12,x12,x21 + adcs x13,x13,x22 + adcs x14,x14,x23 + adcs x15,x15,x24 + adc x16,x16,x25 + + ret +.size __mul_by_1_mont_384,.-__mul_by_1_mont_384 + +.type __redc_tail_mont_384,%function +.align 5 +__redc_tail_mont_384: + ldp x19,x20,[x1,#48] + ldp x21,x22,[x1,#64] + ldp x23,x24,[x1,#80] + + adds x11,x11,x19 // accumulate upper half + adcs x12,x12,x20 + adcs x13,x13,x21 + adcs x14,x14,x22 + adcs x15,x15,x23 + adcs x16,x16,x24 + adc x25,xzr,xzr + + subs x19,x11,x5 + sbcs x20,x12,x6 + sbcs x21,x13,x7 + sbcs x22,x14,x8 + sbcs x23,x15,x9 + sbcs x24,x16,x10 + sbcs xzr,x25,xzr + + csel x11,x11,x19,lo + csel x12,x12,x20,lo + csel x13,x13,x21,lo + csel x14,x14,x22,lo + csel x15,x15,x23,lo + csel x16,x16,x24,lo + + stp x11,x12,[x0] + stp x13,x14,[x0,#16] + stp x15,x16,[x0,#32] + + ret +.size __redc_tail_mont_384,.-__redc_tail_mont_384 + +.globl mul_384 +.hidden mul_384 +.type mul_384,%function +.align 5 +mul_384: + .inst 0xd503233f + stp x29,x30,[sp,#-128]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + + bl __mul_384 + ldr x30,[x29,#8] + + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldr x29,[sp],#128 + .inst 0xd50323bf + ret +.size mul_384,.-mul_384 + +.type __mul_384,%function +.align 5 +__mul_384: + ldp x11,x12,[x1] + ldr x17, [x2] + ldp x13,x14,[x1,#16] + ldp x15,x16,[x1,#32] + + mul x19,x11,x17 + mul x20,x12,x17 + mul x21,x13,x17 + mul x22,x14,x17 + mul x23,x15,x17 + mul x24,x16,x17 + + umulh x5,x11,x17 + umulh x6,x12,x17 + umulh x7,x13,x17 + umulh x8,x14,x17 + umulh x9,x15,x17 + umulh x10,x16,x17 + ldr x17,[x2,8*1] + + str x19,[x0] + adds x19,x20,x5 + mul x5,x11,x17 + adcs x20,x21,x6 + mul x6,x12,x17 + adcs x21,x22,x7 + mul x7,x13,x17 + adcs x22,x23,x8 + mul x8,x14,x17 + adcs x23,x24,x9 + mul x9,x15,x17 + adc x24,xzr, x10 + mul x10,x16,x17 + adds x19,x19,x5 + umulh x5,x11,x17 + adcs x20,x20,x6 + umulh x6,x12,x17 + adcs x21,x21,x7 + umulh x7,x13,x17 + adcs x22,x22,x8 + umulh x8,x14,x17 + adcs x23,x23,x9 + umulh x9,x15,x17 + adcs x24,x24,x10 + umulh x10,x16,x17 + ldr x17,[x2,#8*(1+1)] + adc x25,xzr,xzr + + str x19,[x0,8*1] + adds x19,x20,x5 + mul x5,x11,x17 + adcs x20,x21,x6 + mul x6,x12,x17 + adcs x21,x22,x7 + mul x7,x13,x17 + adcs x22,x23,x8 + mul x8,x14,x17 + adcs x23,x24,x9 + mul x9,x15,x17 + adc x24,x25,x10 + mul x10,x16,x17 + adds x19,x19,x5 + umulh x5,x11,x17 + adcs x20,x20,x6 + umulh x6,x12,x17 + adcs x21,x21,x7 + umulh x7,x13,x17 + adcs x22,x22,x8 + umulh x8,x14,x17 + adcs x23,x23,x9 + umulh x9,x15,x17 + adcs x24,x24,x10 + umulh x10,x16,x17 + ldr x17,[x2,#8*(2+1)] + adc x25,xzr,xzr + + str x19,[x0,8*2] + adds x19,x20,x5 + mul x5,x11,x17 + adcs x20,x21,x6 + mul x6,x12,x17 + adcs x21,x22,x7 + mul x7,x13,x17 + adcs x22,x23,x8 + mul x8,x14,x17 + adcs x23,x24,x9 + mul x9,x15,x17 + adc x24,x25,x10 + mul x10,x16,x17 + adds x19,x19,x5 + umulh x5,x11,x17 + adcs x20,x20,x6 + umulh x6,x12,x17 + adcs x21,x21,x7 + umulh x7,x13,x17 + adcs x22,x22,x8 + umulh x8,x14,x17 + adcs x23,x23,x9 + umulh x9,x15,x17 + adcs x24,x24,x10 + umulh x10,x16,x17 + ldr x17,[x2,#8*(3+1)] + adc x25,xzr,xzr + + str x19,[x0,8*3] + adds x19,x20,x5 + mul x5,x11,x17 + adcs x20,x21,x6 + mul x6,x12,x17 + adcs x21,x22,x7 + mul x7,x13,x17 + adcs x22,x23,x8 + mul x8,x14,x17 + adcs x23,x24,x9 + mul x9,x15,x17 + adc x24,x25,x10 + mul x10,x16,x17 + adds x19,x19,x5 + umulh x5,x11,x17 + adcs x20,x20,x6 + umulh x6,x12,x17 + adcs x21,x21,x7 + umulh x7,x13,x17 + adcs x22,x22,x8 + umulh x8,x14,x17 + adcs x23,x23,x9 + umulh x9,x15,x17 + adcs x24,x24,x10 + umulh x10,x16,x17 + ldr x17,[x2,#8*(4+1)] + adc x25,xzr,xzr + + str x19,[x0,8*4] + adds x19,x20,x5 + mul x5,x11,x17 + adcs x20,x21,x6 + mul x6,x12,x17 + adcs x21,x22,x7 + mul x7,x13,x17 + adcs x22,x23,x8 + mul x8,x14,x17 + adcs x23,x24,x9 + mul x9,x15,x17 + adc x24,x25,x10 + mul x10,x16,x17 + adds x19,x19,x5 + umulh x5,x11,x17 + adcs x20,x20,x6 + umulh x6,x12,x17 + adcs x21,x21,x7 + umulh x7,x13,x17 + adcs x22,x22,x8 + umulh x8,x14,x17 + adcs x23,x23,x9 + umulh x9,x15,x17 + adcs x24,x24,x10 + umulh x10,x16,x17 + adc x25,xzr,xzr + + str x19,[x0,8*5] + adds x19,x20,x5 + adcs x20,x21,x6 + adcs x21,x22,x7 + adcs x22,x23,x8 + adcs x23,x24,x9 + adc x24,x25,x10 + + stp x19,x20,[x0,#48] + stp x21,x22,[x0,#64] + stp x23,x24,[x0,#80] + + ret +.size __mul_384,.-__mul_384 + +.globl mul_382x +.hidden mul_382x +.type mul_382x,%function +.align 5 +mul_382x: + .inst 0xd503233f + stp x29,x30,[sp,#-128]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + sub sp,sp,#96 // space for two 384-bit vectors + + ldp x11,x12,[x1] + mov x26,x0 // save r_ptr + ldp x19,x20,[x1,#48] + mov x27,x1 // save a_ptr + ldp x13,x14,[x1,#16] + mov x28,x2 // save b_ptr + ldp x21,x22,[x1,#64] + ldp x15,x16,[x1,#32] + adds x5,x11,x19 // t0 = a->re + a->im + ldp x23,x24,[x1,#80] + adcs x6,x12,x20 + ldp x11,x12,[x2] + adcs x7,x13,x21 + ldp x19,x20,[x2,#48] + adcs x8,x14,x22 + ldp x13,x14,[x2,#16] + adcs x9,x15,x23 + ldp x21,x22,[x2,#64] + adc x10,x16,x24 + ldp x15,x16,[x2,#32] + + stp x5,x6,[sp] + adds x5,x11,x19 // t1 = b->re + b->im + ldp x23,x24,[x2,#80] + adcs x6,x12,x20 + stp x7,x8,[sp,#16] + adcs x7,x13,x21 + adcs x8,x14,x22 + stp x9,x10,[sp,#32] + adcs x9,x15,x23 + stp x5,x6,[sp,#48] + adc x10,x16,x24 + stp x7,x8,[sp,#64] + stp x9,x10,[sp,#80] + + bl __mul_384 // mul_384(ret->re, a->re, b->re) + + add x1,sp,#0 // mul_384(ret->im, t0, t1) + add x2,sp,#48 + add x0,x26,#96 + bl __mul_384 + + add x1,x27,#48 // mul_384(tx, a->im, b->im) + add x2,x28,#48 + add x0,sp,#0 + bl __mul_384 + + ldp x5,x6,[x3] + ldp x7,x8,[x3,#16] + ldp x9,x10,[x3,#32] + + add x1,x26,#96 // ret->im -= tx + add x2,sp,#0 + add x0,x26,#96 + bl __sub_mod_384x384 + + add x2,x26,#0 // ret->im -= ret->re + bl __sub_mod_384x384 + + add x1,x26,#0 // ret->re -= tx + add x2,sp,#0 + add x0,x26,#0 + bl __sub_mod_384x384 + ldr x30,[x29,#8] + + add sp,sp,#96 + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldr x29,[sp],#128 + .inst 0xd50323bf + ret +.size mul_382x,.-mul_382x + +.globl sqr_382x +.hidden sqr_382x +.type sqr_382x,%function +.align 5 +sqr_382x: + .inst 0xd503233f + stp x29,x30,[sp,#-128]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + + ldp x11,x12,[x1] + ldp x19,x20,[x1,#48] + ldp x13,x14,[x1,#16] + adds x5,x11,x19 // t0 = a->re + a->im + ldp x21,x22,[x1,#64] + adcs x6,x12,x20 + ldp x15,x16,[x1,#32] + adcs x7,x13,x21 + ldp x23,x24,[x1,#80] + adcs x8,x14,x22 + stp x5,x6,[x0] + adcs x9,x15,x23 + ldp x5,x6,[x2] + adc x10,x16,x24 + stp x7,x8,[x0,#16] + + subs x11,x11,x19 // t1 = a->re - a->im + ldp x7,x8,[x2,#16] + sbcs x12,x12,x20 + stp x9,x10,[x0,#32] + sbcs x13,x13,x21 + ldp x9,x10,[x2,#32] + sbcs x14,x14,x22 + sbcs x15,x15,x23 + sbcs x16,x16,x24 + sbc x25,xzr,xzr + + and x19,x5,x25 + and x20,x6,x25 + adds x11,x11,x19 + and x21,x7,x25 + adcs x12,x12,x20 + and x22,x8,x25 + adcs x13,x13,x21 + and x23,x9,x25 + adcs x14,x14,x22 + and x24,x10,x25 + adcs x15,x15,x23 + stp x11,x12,[x0,#48] + adc x16,x16,x24 + stp x13,x14,[x0,#64] + stp x15,x16,[x0,#80] + + mov x4,x1 // save a_ptr + add x1,x0,#0 // mul_384(ret->re, t0, t1) + add x2,x0,#48 + bl __mul_384 + + add x1,x4,#0 // mul_384(ret->im, a->re, a->im) + add x2,x4,#48 + add x0,x0,#96 + bl __mul_384 + ldr x30,[x29,#8] + + ldp x11,x12,[x0] + ldp x13,x14,[x0,#16] + adds x11,x11,x11 // add with itself + ldp x15,x16,[x0,#32] + adcs x12,x12,x12 + adcs x13,x13,x13 + adcs x14,x14,x14 + adcs x15,x15,x15 + adcs x16,x16,x16 + adcs x19,x19,x19 + adcs x20,x20,x20 + stp x11,x12,[x0] + adcs x21,x21,x21 + stp x13,x14,[x0,#16] + adcs x22,x22,x22 + stp x15,x16,[x0,#32] + adcs x23,x23,x23 + stp x19,x20,[x0,#48] + adc x24,x24,x24 + stp x21,x22,[x0,#64] + stp x23,x24,[x0,#80] + + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldr x29,[sp],#128 + .inst 0xd50323bf + ret +.size sqr_382x,.-sqr_382x + +.globl sqr_mont_382x +.hidden sqr_mont_382x +.type sqr_mont_382x,%function +.align 5 +sqr_mont_382x: + .inst 0xd503233f + stp x29,x30,[sp,#-128]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + stp x3,x0,[sp,#96] // __mul_mont_384 wants them there + sub sp,sp,#112 // space for two 384-bit vectors + word + mov x4,x3 // adjust for missing b_ptr + + ldp x11,x12,[x1] + ldp x13,x14,[x1,#16] + ldp x15,x16,[x1,#32] + + ldp x17,x20,[x1,#48] + ldp x21,x22,[x1,#64] + ldp x23,x24,[x1,#80] + + adds x5,x11,x17 // t0 = a->re + a->im + adcs x6,x12,x20 + adcs x7,x13,x21 + adcs x8,x14,x22 + adcs x9,x15,x23 + adc x10,x16,x24 + + subs x19,x11,x17 // t1 = a->re - a->im + sbcs x20,x12,x20 + sbcs x21,x13,x21 + sbcs x22,x14,x22 + sbcs x23,x15,x23 + sbcs x24,x16,x24 + sbc x25,xzr,xzr // borrow flag as mask + + stp x5,x6,[sp] + stp x7,x8,[sp,#16] + stp x9,x10,[sp,#32] + stp x19,x20,[sp,#48] + stp x21,x22,[sp,#64] + stp x23,x24,[sp,#80] + str x25,[sp,#96] + + ldp x5,x6,[x2] + ldp x7,x8,[x2,#16] + ldp x9,x10,[x2,#32] + + add x2,x1,#48 + bl __mul_mont_383_nonred // mul_mont_384(ret->im, a->re, a->im) + + adds x19,x11,x11 // add with itself + adcs x20,x12,x12 + adcs x21,x13,x13 + adcs x22,x14,x14 + adcs x23,x15,x15 + adc x24,x16,x16 + + stp x19,x20,[x2,#48] + stp x21,x22,[x2,#64] + stp x23,x24,[x2,#80] + + ldp x11,x12,[sp] + ldr x17,[sp,#48] + ldp x13,x14,[sp,#16] + ldp x15,x16,[sp,#32] + + add x2,sp,#48 + bl __mul_mont_383_nonred // mul_mont_384(ret->im, t0, t1) + ldr x30,[x29,#8] + + ldr x25,[sp,#96] // account for sign from a->re - a->im + ldp x19,x20,[sp] + ldp x21,x22,[sp,#16] + ldp x23,x24,[sp,#32] + + and x19,x19,x25 + and x20,x20,x25 + and x21,x21,x25 + and x22,x22,x25 + and x23,x23,x25 + and x24,x24,x25 + + subs x11,x11,x19 + sbcs x12,x12,x20 + sbcs x13,x13,x21 + sbcs x14,x14,x22 + sbcs x15,x15,x23 + sbcs x16,x16,x24 + sbc x25,xzr,xzr + + and x19,x5,x25 + and x20,x6,x25 + and x21,x7,x25 + and x22,x8,x25 + and x23,x9,x25 + and x24,x10,x25 + + adds x11,x11,x19 + adcs x12,x12,x20 + adcs x13,x13,x21 + adcs x14,x14,x22 + adcs x15,x15,x23 + adc x16,x16,x24 + + stp x11,x12,[x2] + stp x13,x14,[x2,#16] + stp x15,x16,[x2,#32] + + add sp,sp,#112 + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldr x29,[sp],#128 + .inst 0xd50323bf + ret +.size sqr_mont_382x,.-sqr_mont_382x + +.type __mul_mont_383_nonred,%function +.align 5 +__mul_mont_383_nonred: + mul x19,x11,x17 + mul x20,x12,x17 + mul x21,x13,x17 + mul x22,x14,x17 + mul x23,x15,x17 + mul x24,x16,x17 + mul x4,x4,x19 + + umulh x26,x11,x17 + umulh x27,x12,x17 + umulh x28,x13,x17 + umulh x0,x14,x17 + umulh x1,x15,x17 + umulh x3,x16,x17 + + adds x20,x20,x26 + mul x26,x5,x4 + adcs x21,x21,x27 + mul x27,x6,x4 + adcs x22,x22,x28 + mul x28,x7,x4 + adcs x23,x23,x0 + mul x0,x8,x4 + adcs x24,x24,x1 + mul x1,x9,x4 + adc x25,xzr, x3 + mul x3,x10,x4 + ldr x17,[x2,8*1] + adds x19,x19,x26 + umulh x26,x5,x4 + adcs x20,x20,x27 + umulh x27,x6,x4 + adcs x21,x21,x28 + umulh x28,x7,x4 + adcs x22,x22,x0 + umulh x0,x8,x4 + adcs x23,x23,x1 + umulh x1,x9,x4 + adcs x24,x24,x3 + umulh x3,x10,x4 + adc x25,x25,xzr + + ldr x4,[x29,#96] + adds x19,x20,x26 + mul x26,x11,x17 + adcs x20,x21,x27 + mul x27,x12,x17 + adcs x21,x22,x28 + mul x28,x13,x17 + adcs x22,x23,x0 + mul x0,x14,x17 + adcs x23,x24,x1 + mul x1,x15,x17 + adcs x24,x25,x3 + mul x3,x16,x17 + adc x25,xzr,xzr + + adds x19,x19,x26 + umulh x26,x11,x17 + adcs x20,x20,x27 + umulh x27,x12,x17 + adcs x21,x21,x28 + mul x4,x4,x19 + umulh x28,x13,x17 + adcs x22,x22,x0 + umulh x0,x14,x17 + adcs x23,x23,x1 + umulh x1,x15,x17 + adcs x24,x24,x3 + umulh x3,x16,x17 + adc x25,x25,xzr + + adds x20,x20,x26 + mul x26,x5,x4 + adcs x21,x21,x27 + mul x27,x6,x4 + adcs x22,x22,x28 + mul x28,x7,x4 + adcs x23,x23,x0 + mul x0,x8,x4 + adcs x24,x24,x1 + mul x1,x9,x4 + adc x25,x25,x3 + mul x3,x10,x4 + ldr x17,[x2,8*2] + adds x19,x19,x26 + umulh x26,x5,x4 + adcs x20,x20,x27 + umulh x27,x6,x4 + adcs x21,x21,x28 + umulh x28,x7,x4 + adcs x22,x22,x0 + umulh x0,x8,x4 + adcs x23,x23,x1 + umulh x1,x9,x4 + adcs x24,x24,x3 + umulh x3,x10,x4 + adc x25,x25,xzr + + ldr x4,[x29,#96] + adds x19,x20,x26 + mul x26,x11,x17 + adcs x20,x21,x27 + mul x27,x12,x17 + adcs x21,x22,x28 + mul x28,x13,x17 + adcs x22,x23,x0 + mul x0,x14,x17 + adcs x23,x24,x1 + mul x1,x15,x17 + adcs x24,x25,x3 + mul x3,x16,x17 + adc x25,xzr,xzr + + adds x19,x19,x26 + umulh x26,x11,x17 + adcs x20,x20,x27 + umulh x27,x12,x17 + adcs x21,x21,x28 + mul x4,x4,x19 + umulh x28,x13,x17 + adcs x22,x22,x0 + umulh x0,x14,x17 + adcs x23,x23,x1 + umulh x1,x15,x17 + adcs x24,x24,x3 + umulh x3,x16,x17 + adc x25,x25,xzr + + adds x20,x20,x26 + mul x26,x5,x4 + adcs x21,x21,x27 + mul x27,x6,x4 + adcs x22,x22,x28 + mul x28,x7,x4 + adcs x23,x23,x0 + mul x0,x8,x4 + adcs x24,x24,x1 + mul x1,x9,x4 + adc x25,x25,x3 + mul x3,x10,x4 + ldr x17,[x2,8*3] + adds x19,x19,x26 + umulh x26,x5,x4 + adcs x20,x20,x27 + umulh x27,x6,x4 + adcs x21,x21,x28 + umulh x28,x7,x4 + adcs x22,x22,x0 + umulh x0,x8,x4 + adcs x23,x23,x1 + umulh x1,x9,x4 + adcs x24,x24,x3 + umulh x3,x10,x4 + adc x25,x25,xzr + + ldr x4,[x29,#96] + adds x19,x20,x26 + mul x26,x11,x17 + adcs x20,x21,x27 + mul x27,x12,x17 + adcs x21,x22,x28 + mul x28,x13,x17 + adcs x22,x23,x0 + mul x0,x14,x17 + adcs x23,x24,x1 + mul x1,x15,x17 + adcs x24,x25,x3 + mul x3,x16,x17 + adc x25,xzr,xzr + + adds x19,x19,x26 + umulh x26,x11,x17 + adcs x20,x20,x27 + umulh x27,x12,x17 + adcs x21,x21,x28 + mul x4,x4,x19 + umulh x28,x13,x17 + adcs x22,x22,x0 + umulh x0,x14,x17 + adcs x23,x23,x1 + umulh x1,x15,x17 + adcs x24,x24,x3 + umulh x3,x16,x17 + adc x25,x25,xzr + + adds x20,x20,x26 + mul x26,x5,x4 + adcs x21,x21,x27 + mul x27,x6,x4 + adcs x22,x22,x28 + mul x28,x7,x4 + adcs x23,x23,x0 + mul x0,x8,x4 + adcs x24,x24,x1 + mul x1,x9,x4 + adc x25,x25,x3 + mul x3,x10,x4 + ldr x17,[x2,8*4] + adds x19,x19,x26 + umulh x26,x5,x4 + adcs x20,x20,x27 + umulh x27,x6,x4 + adcs x21,x21,x28 + umulh x28,x7,x4 + adcs x22,x22,x0 + umulh x0,x8,x4 + adcs x23,x23,x1 + umulh x1,x9,x4 + adcs x24,x24,x3 + umulh x3,x10,x4 + adc x25,x25,xzr + + ldr x4,[x29,#96] + adds x19,x20,x26 + mul x26,x11,x17 + adcs x20,x21,x27 + mul x27,x12,x17 + adcs x21,x22,x28 + mul x28,x13,x17 + adcs x22,x23,x0 + mul x0,x14,x17 + adcs x23,x24,x1 + mul x1,x15,x17 + adcs x24,x25,x3 + mul x3,x16,x17 + adc x25,xzr,xzr + + adds x19,x19,x26 + umulh x26,x11,x17 + adcs x20,x20,x27 + umulh x27,x12,x17 + adcs x21,x21,x28 + mul x4,x4,x19 + umulh x28,x13,x17 + adcs x22,x22,x0 + umulh x0,x14,x17 + adcs x23,x23,x1 + umulh x1,x15,x17 + adcs x24,x24,x3 + umulh x3,x16,x17 + adc x25,x25,xzr + + adds x20,x20,x26 + mul x26,x5,x4 + adcs x21,x21,x27 + mul x27,x6,x4 + adcs x22,x22,x28 + mul x28,x7,x4 + adcs x23,x23,x0 + mul x0,x8,x4 + adcs x24,x24,x1 + mul x1,x9,x4 + adc x25,x25,x3 + mul x3,x10,x4 + ldr x17,[x2,8*5] + adds x19,x19,x26 + umulh x26,x5,x4 + adcs x20,x20,x27 + umulh x27,x6,x4 + adcs x21,x21,x28 + umulh x28,x7,x4 + adcs x22,x22,x0 + umulh x0,x8,x4 + adcs x23,x23,x1 + umulh x1,x9,x4 + adcs x24,x24,x3 + umulh x3,x10,x4 + adc x25,x25,xzr + + ldr x4,[x29,#96] + adds x19,x20,x26 + mul x26,x11,x17 + adcs x20,x21,x27 + mul x27,x12,x17 + adcs x21,x22,x28 + mul x28,x13,x17 + adcs x22,x23,x0 + mul x0,x14,x17 + adcs x23,x24,x1 + mul x1,x15,x17 + adcs x24,x25,x3 + mul x3,x16,x17 + adc x25,xzr,xzr + + adds x19,x19,x26 + umulh x26,x11,x17 + adcs x20,x20,x27 + umulh x27,x12,x17 + adcs x21,x21,x28 + mul x4,x4,x19 + umulh x28,x13,x17 + adcs x22,x22,x0 + umulh x0,x14,x17 + adcs x23,x23,x1 + umulh x1,x15,x17 + adcs x24,x24,x3 + umulh x3,x16,x17 + adc x25,x25,xzr + + adds x20,x20,x26 + mul x26,x5,x4 + adcs x21,x21,x27 + mul x27,x6,x4 + adcs x22,x22,x28 + mul x28,x7,x4 + adcs x23,x23,x0 + mul x0,x8,x4 + adcs x24,x24,x1 + mul x1,x9,x4 + adc x25,x25,x3 + mul x3,x10,x4 + adds x19,x19,x26 + umulh x26,x5,x4 + adcs x20,x20,x27 + umulh x27,x6,x4 + adcs x21,x21,x28 + umulh x28,x7,x4 + adcs x22,x22,x0 + umulh x0,x8,x4 + adcs x23,x23,x1 + umulh x1,x9,x4 + adcs x24,x24,x3 + umulh x3,x10,x4 + adc x25,x25,xzr + ldp x4,x2,[x29,#96] // pull r_ptr + + adds x11,x20,x26 + adcs x12,x21,x27 + adcs x13,x22,x28 + adcs x14,x23,x0 + adcs x15,x24,x1 + adcs x16,x25,x3 + + ret +.size __mul_mont_383_nonred,.-__mul_mont_383_nonred + +.globl sgn0_pty_mont_384 +.hidden sgn0_pty_mont_384 +.type sgn0_pty_mont_384,%function +.align 5 +sgn0_pty_mont_384: + .inst 0xd503233f + stp x29,x30,[sp,#-128]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + + mov x4,x2 + ldp x5,x6,[x1] + ldp x7,x8,[x1,#16] + ldp x9,x10,[x1,#32] + mov x1,x0 + + bl __mul_by_1_mont_384 + ldr x30,[x29,#8] + + and x0,x11,#1 + adds x11,x11,x11 + adcs x12,x12,x12 + adcs x13,x13,x13 + adcs x14,x14,x14 + adcs x15,x15,x15 + adcs x16,x16,x16 + adc x17,xzr,xzr + + subs x11,x11,x5 + sbcs x12,x12,x6 + sbcs x13,x13,x7 + sbcs x14,x14,x8 + sbcs x15,x15,x9 + sbcs x16,x16,x10 + sbc x17,x17,xzr + + mvn x17,x17 + and x17,x17,#2 + orr x0,x0,x17 + + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldr x29,[sp],#128 + .inst 0xd50323bf + ret +.size sgn0_pty_mont_384,.-sgn0_pty_mont_384 + +.globl sgn0_pty_mont_384x +.hidden sgn0_pty_mont_384x +.type sgn0_pty_mont_384x,%function +.align 5 +sgn0_pty_mont_384x: + .inst 0xd503233f + stp x29,x30,[sp,#-128]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + + mov x4,x2 + ldp x5,x6,[x1] + ldp x7,x8,[x1,#16] + ldp x9,x10,[x1,#32] + mov x1,x0 + + bl __mul_by_1_mont_384 + add x1,x1,#48 + + and x2,x11,#1 + orr x3,x11,x12 + adds x11,x11,x11 + orr x3,x3,x13 + adcs x12,x12,x12 + orr x3,x3,x14 + adcs x13,x13,x13 + orr x3,x3,x15 + adcs x14,x14,x14 + orr x3,x3,x16 + adcs x15,x15,x15 + adcs x16,x16,x16 + adc x17,xzr,xzr + + subs x11,x11,x5 + sbcs x12,x12,x6 + sbcs x13,x13,x7 + sbcs x14,x14,x8 + sbcs x15,x15,x9 + sbcs x16,x16,x10 + sbc x17,x17,xzr + + mvn x17,x17 + and x17,x17,#2 + orr x2,x2,x17 + + bl __mul_by_1_mont_384 + ldr x30,[x29,#8] + + and x0,x11,#1 + orr x1,x11,x12 + adds x11,x11,x11 + orr x1,x1,x13 + adcs x12,x12,x12 + orr x1,x1,x14 + adcs x13,x13,x13 + orr x1,x1,x15 + adcs x14,x14,x14 + orr x1,x1,x16 + adcs x15,x15,x15 + adcs x16,x16,x16 + adc x17,xzr,xzr + + subs x11,x11,x5 + sbcs x12,x12,x6 + sbcs x13,x13,x7 + sbcs x14,x14,x8 + sbcs x15,x15,x9 + sbcs x16,x16,x10 + sbc x17,x17,xzr + + mvn x17,x17 + and x17,x17,#2 + orr x0,x0,x17 + + cmp x3,#0 + csel x3,x0,x2,eq // a->re==0? prty(a->im) : prty(a->re) + + cmp x1,#0 + csel x1,x0,x2,ne // a->im!=0? sgn0(a->im) : sgn0(a->re) + + and x3,x3,#1 + and x1,x1,#2 + orr x0,x1,x3 // pack sign and parity + + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldr x29,[sp],#128 + .inst 0xd50323bf + ret +.size sgn0_pty_mont_384x,.-sgn0_pty_mont_384x diff --git a/crypto/blst_src/build/elf/mulq_mont_256-x86_64.s b/crypto/blst_src/build/elf/mulq_mont_256-x86_64.s new file mode 100644 index 00000000000..10b1b56cb50 --- /dev/null +++ b/crypto/blst_src/build/elf/mulq_mont_256-x86_64.s @@ -0,0 +1,731 @@ +.comm __blst_platform_cap,4 +.text + +.globl mul_mont_sparse_256 +.hidden mul_mont_sparse_256 +.type mul_mont_sparse_256,@function +.align 32 +mul_mont_sparse_256: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + +#ifdef __BLST_PORTABLE__ + testl $1,__blst_platform_cap(%rip) + jnz mul_mont_sparse_256$1 +#endif + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + pushq %rdi +.cfi_adjust_cfa_offset 8 + + + movq 0(%rdx),%rax + movq 0(%rsi),%r13 + movq 8(%rsi),%r14 + movq 16(%rsi),%r12 + movq 24(%rsi),%rbp + movq %rdx,%rbx + + movq %rax,%r15 + mulq %r13 + movq %rax,%r9 + movq %r15,%rax + movq %rdx,%r10 + call __mulq_mont_sparse_256 + + movq 8(%rsp),%r15 +.cfi_restore %r15 + movq 16(%rsp),%r14 +.cfi_restore %r14 + movq 24(%rsp),%r13 +.cfi_restore %r13 + movq 32(%rsp),%r12 +.cfi_restore %r12 + movq 40(%rsp),%rbx +.cfi_restore %rbx + movq 48(%rsp),%rbp +.cfi_restore %rbp + leaq 56(%rsp),%rsp +.cfi_adjust_cfa_offset -56 + + .byte 0xf3,0xc3 +.cfi_endproc +.size mul_mont_sparse_256,.-mul_mont_sparse_256 + +.globl sqr_mont_sparse_256 +.hidden sqr_mont_sparse_256 +.type sqr_mont_sparse_256,@function +.align 32 +sqr_mont_sparse_256: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + +#ifdef __BLST_PORTABLE__ + testl $1,__blst_platform_cap(%rip) + jnz sqr_mont_sparse_256$1 +#endif + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + pushq %rdi +.cfi_adjust_cfa_offset 8 + + + movq 0(%rsi),%rax + movq %rcx,%r8 + movq 8(%rsi),%r14 + movq %rdx,%rcx + movq 16(%rsi),%r12 + leaq (%rsi),%rbx + movq 24(%rsi),%rbp + + movq %rax,%r15 + mulq %rax + movq %rax,%r9 + movq %r15,%rax + movq %rdx,%r10 + call __mulq_mont_sparse_256 + + movq 8(%rsp),%r15 +.cfi_restore %r15 + movq 16(%rsp),%r14 +.cfi_restore %r14 + movq 24(%rsp),%r13 +.cfi_restore %r13 + movq 32(%rsp),%r12 +.cfi_restore %r12 + movq 40(%rsp),%rbx +.cfi_restore %rbx + movq 48(%rsp),%rbp +.cfi_restore %rbp + leaq 56(%rsp),%rsp +.cfi_adjust_cfa_offset -56 + + .byte 0xf3,0xc3 +.cfi_endproc +.size sqr_mont_sparse_256,.-sqr_mont_sparse_256 +.type __mulq_mont_sparse_256,@function +.align 32 +__mulq_mont_sparse_256: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + mulq %r14 + addq %rax,%r10 + movq %r15,%rax + adcq $0,%rdx + movq %rdx,%r11 + + mulq %r12 + addq %rax,%r11 + movq %r15,%rax + adcq $0,%rdx + movq %rdx,%r12 + + mulq %rbp + addq %rax,%r12 + movq 8(%rbx),%rax + adcq $0,%rdx + xorq %r14,%r14 + movq %rdx,%r13 + + movq %r9,%rdi + imulq %r8,%r9 + + + movq %rax,%r15 + mulq 0(%rsi) + addq %rax,%r10 + movq %r15,%rax + adcq $0,%rdx + movq %rdx,%rbp + + mulq 8(%rsi) + addq %rax,%r11 + movq %r15,%rax + adcq $0,%rdx + addq %rbp,%r11 + adcq $0,%rdx + movq %rdx,%rbp + + mulq 16(%rsi) + addq %rax,%r12 + movq %r15,%rax + adcq $0,%rdx + addq %rbp,%r12 + adcq $0,%rdx + movq %rdx,%rbp + + mulq 24(%rsi) + addq %rax,%r13 + movq %r9,%rax + adcq $0,%rdx + addq %rbp,%r13 + adcq %rdx,%r14 + xorq %r15,%r15 + + + mulq 0(%rcx) + addq %rax,%rdi + movq %r9,%rax + adcq %rdx,%rdi + + mulq 8(%rcx) + addq %rax,%r10 + movq %r9,%rax + adcq $0,%rdx + addq %rdi,%r10 + adcq $0,%rdx + movq %rdx,%rbp + + mulq 16(%rcx) + addq %rax,%r11 + movq %r9,%rax + adcq $0,%rdx + addq %rbp,%r11 + adcq $0,%rdx + movq %rdx,%rbp + + mulq 24(%rcx) + addq %rax,%r12 + movq 16(%rbx),%rax + adcq $0,%rdx + addq %rbp,%r12 + adcq $0,%rdx + addq %rdx,%r13 + adcq $0,%r14 + adcq $0,%r15 + movq %r10,%rdi + imulq %r8,%r10 + + + movq %rax,%r9 + mulq 0(%rsi) + addq %rax,%r11 + movq %r9,%rax + adcq $0,%rdx + movq %rdx,%rbp + + mulq 8(%rsi) + addq %rax,%r12 + movq %r9,%rax + adcq $0,%rdx + addq %rbp,%r12 + adcq $0,%rdx + movq %rdx,%rbp + + mulq 16(%rsi) + addq %rax,%r13 + movq %r9,%rax + adcq $0,%rdx + addq %rbp,%r13 + adcq $0,%rdx + movq %rdx,%rbp + + mulq 24(%rsi) + addq %rax,%r14 + movq %r10,%rax + adcq $0,%rdx + addq %rbp,%r14 + adcq %rdx,%r15 + xorq %r9,%r9 + + + mulq 0(%rcx) + addq %rax,%rdi + movq %r10,%rax + adcq %rdx,%rdi + + mulq 8(%rcx) + addq %rax,%r11 + movq %r10,%rax + adcq $0,%rdx + addq %rdi,%r11 + adcq $0,%rdx + movq %rdx,%rbp + + mulq 16(%rcx) + addq %rax,%r12 + movq %r10,%rax + adcq $0,%rdx + addq %rbp,%r12 + adcq $0,%rdx + movq %rdx,%rbp + + mulq 24(%rcx) + addq %rax,%r13 + movq 24(%rbx),%rax + adcq $0,%rdx + addq %rbp,%r13 + adcq $0,%rdx + addq %rdx,%r14 + adcq $0,%r15 + adcq $0,%r9 + movq %r11,%rdi + imulq %r8,%r11 + + + movq %rax,%r10 + mulq 0(%rsi) + addq %rax,%r12 + movq %r10,%rax + adcq $0,%rdx + movq %rdx,%rbp + + mulq 8(%rsi) + addq %rax,%r13 + movq %r10,%rax + adcq $0,%rdx + addq %rbp,%r13 + adcq $0,%rdx + movq %rdx,%rbp + + mulq 16(%rsi) + addq %rax,%r14 + movq %r10,%rax + adcq $0,%rdx + addq %rbp,%r14 + adcq $0,%rdx + movq %rdx,%rbp + + mulq 24(%rsi) + addq %rax,%r15 + movq %r11,%rax + adcq $0,%rdx + addq %rbp,%r15 + adcq %rdx,%r9 + xorq %r10,%r10 + + + mulq 0(%rcx) + addq %rax,%rdi + movq %r11,%rax + adcq %rdx,%rdi + + mulq 8(%rcx) + addq %rax,%r12 + movq %r11,%rax + adcq $0,%rdx + addq %rdi,%r12 + adcq $0,%rdx + movq %rdx,%rbp + + mulq 16(%rcx) + addq %rax,%r13 + movq %r11,%rax + adcq $0,%rdx + addq %rbp,%r13 + adcq $0,%rdx + movq %rdx,%rbp + + mulq 24(%rcx) + addq %rax,%r14 + movq %r12,%rax + adcq $0,%rdx + addq %rbp,%r14 + adcq $0,%rdx + addq %rdx,%r15 + adcq $0,%r9 + adcq $0,%r10 + imulq %r8,%rax + movq 8(%rsp),%rsi + + + movq %rax,%r11 + mulq 0(%rcx) + addq %rax,%r12 + movq %r11,%rax + adcq %rdx,%r12 + + mulq 8(%rcx) + addq %rax,%r13 + movq %r11,%rax + adcq $0,%rdx + addq %r12,%r13 + adcq $0,%rdx + movq %rdx,%rbp + + mulq 16(%rcx) + addq %rax,%r14 + movq %r11,%rax + adcq $0,%rdx + addq %rbp,%r14 + adcq $0,%rdx + movq %rdx,%rbp + + mulq 24(%rcx) + movq %r14,%rbx + addq %rbp,%r15 + adcq $0,%rdx + addq %rax,%r15 + movq %r13,%rax + adcq $0,%rdx + addq %rdx,%r9 + adcq $0,%r10 + + + + + movq %r15,%r12 + subq 0(%rcx),%r13 + sbbq 8(%rcx),%r14 + sbbq 16(%rcx),%r15 + movq %r9,%rbp + sbbq 24(%rcx),%r9 + sbbq $0,%r10 + + cmovcq %rax,%r13 + cmovcq %rbx,%r14 + cmovcq %r12,%r15 + movq %r13,0(%rsi) + cmovcq %rbp,%r9 + movq %r14,8(%rsi) + movq %r15,16(%rsi) + movq %r9,24(%rsi) + + .byte 0xf3,0xc3 +.cfi_endproc +.size __mulq_mont_sparse_256,.-__mulq_mont_sparse_256 +.globl from_mont_256 +.hidden from_mont_256 +.type from_mont_256,@function +.align 32 +from_mont_256: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + +#ifdef __BLST_PORTABLE__ + testl $1,__blst_platform_cap(%rip) + jnz from_mont_256$1 +#endif + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + subq $8,%rsp +.cfi_adjust_cfa_offset 8 + + + movq %rdx,%rbx + call __mulq_by_1_mont_256 + + + + + + movq %r14,%r10 + movq %r15,%r11 + movq %r9,%r12 + + subq 0(%rbx),%r13 + sbbq 8(%rbx),%r14 + sbbq 16(%rbx),%r15 + sbbq 24(%rbx),%r9 + + cmovncq %r13,%rax + cmovncq %r14,%r10 + cmovncq %r15,%r11 + movq %rax,0(%rdi) + cmovncq %r9,%r12 + movq %r10,8(%rdi) + movq %r11,16(%rdi) + movq %r12,24(%rdi) + + movq 8(%rsp),%r15 +.cfi_restore %r15 + movq 16(%rsp),%r14 +.cfi_restore %r14 + movq 24(%rsp),%r13 +.cfi_restore %r13 + movq 32(%rsp),%r12 +.cfi_restore %r12 + movq 40(%rsp),%rbx +.cfi_restore %rbx + movq 48(%rsp),%rbp +.cfi_restore %rbp + leaq 56(%rsp),%rsp +.cfi_adjust_cfa_offset -56 + + .byte 0xf3,0xc3 +.cfi_endproc +.size from_mont_256,.-from_mont_256 + +.globl redc_mont_256 +.hidden redc_mont_256 +.type redc_mont_256,@function +.align 32 +redc_mont_256: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + +#ifdef __BLST_PORTABLE__ + testl $1,__blst_platform_cap(%rip) + jnz redc_mont_256$1 +#endif + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + subq $8,%rsp +.cfi_adjust_cfa_offset 8 + + + movq %rdx,%rbx + call __mulq_by_1_mont_256 + + addq 32(%rsi),%r13 + adcq 40(%rsi),%r14 + movq %r13,%rax + adcq 48(%rsi),%r15 + movq %r14,%r10 + adcq 56(%rsi),%r9 + sbbq %rsi,%rsi + + + + + movq %r15,%r11 + subq 0(%rbx),%r13 + sbbq 8(%rbx),%r14 + sbbq 16(%rbx),%r15 + movq %r9,%r12 + sbbq 24(%rbx),%r9 + sbbq $0,%rsi + + cmovncq %r13,%rax + cmovncq %r14,%r10 + cmovncq %r15,%r11 + movq %rax,0(%rdi) + cmovncq %r9,%r12 + movq %r10,8(%rdi) + movq %r11,16(%rdi) + movq %r12,24(%rdi) + + movq 8(%rsp),%r15 +.cfi_restore %r15 + movq 16(%rsp),%r14 +.cfi_restore %r14 + movq 24(%rsp),%r13 +.cfi_restore %r13 + movq 32(%rsp),%r12 +.cfi_restore %r12 + movq 40(%rsp),%rbx +.cfi_restore %rbx + movq 48(%rsp),%rbp +.cfi_restore %rbp + leaq 56(%rsp),%rsp +.cfi_adjust_cfa_offset -56 + + .byte 0xf3,0xc3 +.cfi_endproc +.size redc_mont_256,.-redc_mont_256 +.type __mulq_by_1_mont_256,@function +.align 32 +__mulq_by_1_mont_256: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + movq 0(%rsi),%rax + movq 8(%rsi),%r10 + movq 16(%rsi),%r11 + movq 24(%rsi),%r12 + + movq %rax,%r13 + imulq %rcx,%rax + movq %rax,%r9 + + mulq 0(%rbx) + addq %rax,%r13 + movq %r9,%rax + adcq %rdx,%r13 + + mulq 8(%rbx) + addq %rax,%r10 + movq %r9,%rax + adcq $0,%rdx + addq %r13,%r10 + adcq $0,%rdx + movq %rdx,%r13 + + mulq 16(%rbx) + movq %r10,%r14 + imulq %rcx,%r10 + addq %rax,%r11 + movq %r9,%rax + adcq $0,%rdx + addq %r13,%r11 + adcq $0,%rdx + movq %rdx,%r13 + + mulq 24(%rbx) + addq %rax,%r12 + movq %r10,%rax + adcq $0,%rdx + addq %r13,%r12 + adcq $0,%rdx + movq %rdx,%r13 + + mulq 0(%rbx) + addq %rax,%r14 + movq %r10,%rax + adcq %rdx,%r14 + + mulq 8(%rbx) + addq %rax,%r11 + movq %r10,%rax + adcq $0,%rdx + addq %r14,%r11 + adcq $0,%rdx + movq %rdx,%r14 + + mulq 16(%rbx) + movq %r11,%r15 + imulq %rcx,%r11 + addq %rax,%r12 + movq %r10,%rax + adcq $0,%rdx + addq %r14,%r12 + adcq $0,%rdx + movq %rdx,%r14 + + mulq 24(%rbx) + addq %rax,%r13 + movq %r11,%rax + adcq $0,%rdx + addq %r14,%r13 + adcq $0,%rdx + movq %rdx,%r14 + + mulq 0(%rbx) + addq %rax,%r15 + movq %r11,%rax + adcq %rdx,%r15 + + mulq 8(%rbx) + addq %rax,%r12 + movq %r11,%rax + adcq $0,%rdx + addq %r15,%r12 + adcq $0,%rdx + movq %rdx,%r15 + + mulq 16(%rbx) + movq %r12,%r9 + imulq %rcx,%r12 + addq %rax,%r13 + movq %r11,%rax + adcq $0,%rdx + addq %r15,%r13 + adcq $0,%rdx + movq %rdx,%r15 + + mulq 24(%rbx) + addq %rax,%r14 + movq %r12,%rax + adcq $0,%rdx + addq %r15,%r14 + adcq $0,%rdx + movq %rdx,%r15 + + mulq 0(%rbx) + addq %rax,%r9 + movq %r12,%rax + adcq %rdx,%r9 + + mulq 8(%rbx) + addq %rax,%r13 + movq %r12,%rax + adcq $0,%rdx + addq %r9,%r13 + adcq $0,%rdx + movq %rdx,%r9 + + mulq 16(%rbx) + addq %rax,%r14 + movq %r12,%rax + adcq $0,%rdx + addq %r9,%r14 + adcq $0,%rdx + movq %rdx,%r9 + + mulq 24(%rbx) + addq %rax,%r15 + movq %r13,%rax + adcq $0,%rdx + addq %r9,%r15 + adcq $0,%rdx + movq %rdx,%r9 + .byte 0xf3,0xc3 +.cfi_endproc +.size __mulq_by_1_mont_256,.-__mulq_by_1_mont_256 + +.section .note.GNU-stack,"",@progbits +.section .note.gnu.property,"a",@note + .long 4,2f-1f,5 + .byte 0x47,0x4E,0x55,0 +1: .long 0xc0000002,4,3 +.align 8 +2: diff --git a/crypto/blst_src/build/elf/mulq_mont_384-x86_64.s b/crypto/blst_src/build/elf/mulq_mont_384-x86_64.s new file mode 100644 index 00000000000..903ba23b12c --- /dev/null +++ b/crypto/blst_src/build/elf/mulq_mont_384-x86_64.s @@ -0,0 +1,3681 @@ +.comm __blst_platform_cap,4 +.text + + + + + + + +.type __subq_mod_384x384,@function +.align 32 +__subq_mod_384x384: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + movq 48(%rsi),%r14 + + subq 0(%rdx),%r8 + movq 56(%rsi),%r15 + sbbq 8(%rdx),%r9 + movq 64(%rsi),%rax + sbbq 16(%rdx),%r10 + movq 72(%rsi),%rbx + sbbq 24(%rdx),%r11 + movq 80(%rsi),%rbp + sbbq 32(%rdx),%r12 + movq 88(%rsi),%rsi + sbbq 40(%rdx),%r13 + movq %r8,0(%rdi) + sbbq 48(%rdx),%r14 + movq 0(%rcx),%r8 + movq %r9,8(%rdi) + sbbq 56(%rdx),%r15 + movq 8(%rcx),%r9 + movq %r10,16(%rdi) + sbbq 64(%rdx),%rax + movq 16(%rcx),%r10 + movq %r11,24(%rdi) + sbbq 72(%rdx),%rbx + movq 24(%rcx),%r11 + movq %r12,32(%rdi) + sbbq 80(%rdx),%rbp + movq 32(%rcx),%r12 + movq %r13,40(%rdi) + sbbq 88(%rdx),%rsi + movq 40(%rcx),%r13 + sbbq %rdx,%rdx + + andq %rdx,%r8 + andq %rdx,%r9 + andq %rdx,%r10 + andq %rdx,%r11 + andq %rdx,%r12 + andq %rdx,%r13 + + addq %r8,%r14 + adcq %r9,%r15 + movq %r14,48(%rdi) + adcq %r10,%rax + movq %r15,56(%rdi) + adcq %r11,%rbx + movq %rax,64(%rdi) + adcq %r12,%rbp + movq %rbx,72(%rdi) + adcq %r13,%rsi + movq %rbp,80(%rdi) + movq %rsi,88(%rdi) + + .byte 0xf3,0xc3 +.cfi_endproc +.size __subq_mod_384x384,.-__subq_mod_384x384 + +.type __addq_mod_384,@function +.align 32 +__addq_mod_384: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + + addq 0(%rdx),%r8 + adcq 8(%rdx),%r9 + adcq 16(%rdx),%r10 + movq %r8,%r14 + adcq 24(%rdx),%r11 + movq %r9,%r15 + adcq 32(%rdx),%r12 + movq %r10,%rax + adcq 40(%rdx),%r13 + movq %r11,%rbx + sbbq %rdx,%rdx + + subq 0(%rcx),%r8 + sbbq 8(%rcx),%r9 + movq %r12,%rbp + sbbq 16(%rcx),%r10 + sbbq 24(%rcx),%r11 + sbbq 32(%rcx),%r12 + movq %r13,%rsi + sbbq 40(%rcx),%r13 + sbbq $0,%rdx + + cmovcq %r14,%r8 + cmovcq %r15,%r9 + cmovcq %rax,%r10 + movq %r8,0(%rdi) + cmovcq %rbx,%r11 + movq %r9,8(%rdi) + cmovcq %rbp,%r12 + movq %r10,16(%rdi) + cmovcq %rsi,%r13 + movq %r11,24(%rdi) + movq %r12,32(%rdi) + movq %r13,40(%rdi) + + .byte 0xf3,0xc3 +.cfi_endproc +.size __addq_mod_384,.-__addq_mod_384 + +.type __subq_mod_384,@function +.align 32 +__subq_mod_384: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + +__subq_mod_384_a_is_loaded: + subq 0(%rdx),%r8 + movq 0(%rcx),%r14 + sbbq 8(%rdx),%r9 + movq 8(%rcx),%r15 + sbbq 16(%rdx),%r10 + movq 16(%rcx),%rax + sbbq 24(%rdx),%r11 + movq 24(%rcx),%rbx + sbbq 32(%rdx),%r12 + movq 32(%rcx),%rbp + sbbq 40(%rdx),%r13 + movq 40(%rcx),%rsi + sbbq %rdx,%rdx + + andq %rdx,%r14 + andq %rdx,%r15 + andq %rdx,%rax + andq %rdx,%rbx + andq %rdx,%rbp + andq %rdx,%rsi + + addq %r14,%r8 + adcq %r15,%r9 + movq %r8,0(%rdi) + adcq %rax,%r10 + movq %r9,8(%rdi) + adcq %rbx,%r11 + movq %r10,16(%rdi) + adcq %rbp,%r12 + movq %r11,24(%rdi) + adcq %rsi,%r13 + movq %r12,32(%rdi) + movq %r13,40(%rdi) + + .byte 0xf3,0xc3 +.cfi_endproc +.size __subq_mod_384,.-__subq_mod_384 +.globl mul_mont_384x +.hidden mul_mont_384x +.type mul_mont_384x,@function +.align 32 +mul_mont_384x: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + +#ifdef __BLST_PORTABLE__ + testl $1,__blst_platform_cap(%rip) + jnz mul_mont_384x$1 +#endif + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + subq $328,%rsp +.cfi_adjust_cfa_offset 328 + + + movq %rdx,%rbx + movq %rdi,32(%rsp) + movq %rsi,24(%rsp) + movq %rdx,16(%rsp) + movq %rcx,8(%rsp) + movq %r8,0(%rsp) + + + + + leaq 40(%rsp),%rdi + call __mulq_384 + + + leaq 48(%rbx),%rbx + leaq 48(%rsi),%rsi + leaq 40+96(%rsp),%rdi + call __mulq_384 + + + movq 8(%rsp),%rcx + leaq -48(%rsi),%rdx + leaq 40+192+48(%rsp),%rdi + call __addq_mod_384 + + movq 16(%rsp),%rsi + leaq 48(%rsi),%rdx + leaq -48(%rdi),%rdi + call __addq_mod_384 + + leaq (%rdi),%rbx + leaq 48(%rdi),%rsi + call __mulq_384 + + + leaq (%rdi),%rsi + leaq 40(%rsp),%rdx + movq 8(%rsp),%rcx + call __subq_mod_384x384 + + leaq (%rdi),%rsi + leaq -96(%rdi),%rdx + call __subq_mod_384x384 + + + leaq 40(%rsp),%rsi + leaq 40+96(%rsp),%rdx + leaq 40(%rsp),%rdi + call __subq_mod_384x384 + + movq %rcx,%rbx + + + leaq 40(%rsp),%rsi + movq 0(%rsp),%rcx + movq 32(%rsp),%rdi + call __mulq_by_1_mont_384 + call __redq_tail_mont_384 + + + leaq 40+192(%rsp),%rsi + movq 0(%rsp),%rcx + leaq 48(%rdi),%rdi + call __mulq_by_1_mont_384 + call __redq_tail_mont_384 + + leaq 328(%rsp),%r8 + movq 0(%r8),%r15 +.cfi_restore %r15 + movq 8(%r8),%r14 +.cfi_restore %r14 + movq 16(%r8),%r13 +.cfi_restore %r13 + movq 24(%r8),%r12 +.cfi_restore %r12 + movq 32(%r8),%rbx +.cfi_restore %rbx + movq 40(%r8),%rbp +.cfi_restore %rbp + leaq 48(%r8),%rsp +.cfi_adjust_cfa_offset -328-8*6 + + .byte 0xf3,0xc3 +.cfi_endproc +.size mul_mont_384x,.-mul_mont_384x +.globl sqr_mont_384x +.hidden sqr_mont_384x +.type sqr_mont_384x,@function +.align 32 +sqr_mont_384x: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + +#ifdef __BLST_PORTABLE__ + testl $1,__blst_platform_cap(%rip) + jnz sqr_mont_384x$1 +#endif + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + subq $136,%rsp +.cfi_adjust_cfa_offset 136 + + + movq %rcx,0(%rsp) + movq %rdx,%rcx + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + + + leaq 48(%rsi),%rdx + leaq 32(%rsp),%rdi + call __addq_mod_384 + + + movq 16(%rsp),%rsi + leaq 48(%rsi),%rdx + leaq 32+48(%rsp),%rdi + call __subq_mod_384 + + + movq 16(%rsp),%rsi + leaq 48(%rsi),%rbx + + movq 48(%rsi),%rax + movq 0(%rsi),%r14 + movq 8(%rsi),%r15 + movq 16(%rsi),%r12 + movq 24(%rsi),%r13 + + call __mulq_mont_384 + addq %r14,%r14 + adcq %r15,%r15 + adcq %r8,%r8 + movq %r14,%r12 + adcq %r9,%r9 + movq %r15,%r13 + adcq %r10,%r10 + movq %r8,%rax + adcq %r11,%r11 + movq %r9,%rbx + sbbq %rdx,%rdx + + subq 0(%rcx),%r14 + sbbq 8(%rcx),%r15 + movq %r10,%rbp + sbbq 16(%rcx),%r8 + sbbq 24(%rcx),%r9 + sbbq 32(%rcx),%r10 + movq %r11,%rsi + sbbq 40(%rcx),%r11 + sbbq $0,%rdx + + cmovcq %r12,%r14 + cmovcq %r13,%r15 + cmovcq %rax,%r8 + movq %r14,48(%rdi) + cmovcq %rbx,%r9 + movq %r15,56(%rdi) + cmovcq %rbp,%r10 + movq %r8,64(%rdi) + cmovcq %rsi,%r11 + movq %r9,72(%rdi) + movq %r10,80(%rdi) + movq %r11,88(%rdi) + + leaq 32(%rsp),%rsi + leaq 32+48(%rsp),%rbx + + movq 32+48(%rsp),%rax + movq 32+0(%rsp),%r14 + movq 32+8(%rsp),%r15 + movq 32+16(%rsp),%r12 + movq 32+24(%rsp),%r13 + + call __mulq_mont_384 + + leaq 136(%rsp),%r8 + movq 0(%r8),%r15 +.cfi_restore %r15 + movq 8(%r8),%r14 +.cfi_restore %r14 + movq 16(%r8),%r13 +.cfi_restore %r13 + movq 24(%r8),%r12 +.cfi_restore %r12 + movq 32(%r8),%rbx +.cfi_restore %rbx + movq 40(%r8),%rbp +.cfi_restore %rbp + leaq 48(%r8),%rsp +.cfi_adjust_cfa_offset -136-8*6 + + .byte 0xf3,0xc3 +.cfi_endproc +.size sqr_mont_384x,.-sqr_mont_384x + +.globl mul_382x +.hidden mul_382x +.type mul_382x,@function +.align 32 +mul_382x: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + +#ifdef __BLST_PORTABLE__ + testl $1,__blst_platform_cap(%rip) + jnz mul_382x$1 +#endif + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + subq $136,%rsp +.cfi_adjust_cfa_offset 136 + + + leaq 96(%rdi),%rdi + movq %rsi,0(%rsp) + movq %rdx,8(%rsp) + movq %rdi,16(%rsp) + movq %rcx,24(%rsp) + + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + + addq 48(%rsi),%r8 + adcq 56(%rsi),%r9 + adcq 64(%rsi),%r10 + adcq 72(%rsi),%r11 + adcq 80(%rsi),%r12 + adcq 88(%rsi),%r13 + + movq %r8,32+0(%rsp) + movq %r9,32+8(%rsp) + movq %r10,32+16(%rsp) + movq %r11,32+24(%rsp) + movq %r12,32+32(%rsp) + movq %r13,32+40(%rsp) + + + movq 0(%rdx),%r8 + movq 8(%rdx),%r9 + movq 16(%rdx),%r10 + movq 24(%rdx),%r11 + movq 32(%rdx),%r12 + movq 40(%rdx),%r13 + + addq 48(%rdx),%r8 + adcq 56(%rdx),%r9 + adcq 64(%rdx),%r10 + adcq 72(%rdx),%r11 + adcq 80(%rdx),%r12 + adcq 88(%rdx),%r13 + + movq %r8,32+48(%rsp) + movq %r9,32+56(%rsp) + movq %r10,32+64(%rsp) + movq %r11,32+72(%rsp) + movq %r12,32+80(%rsp) + movq %r13,32+88(%rsp) + + + leaq 32+0(%rsp),%rsi + leaq 32+48(%rsp),%rbx + call __mulq_384 + + + movq 0(%rsp),%rsi + movq 8(%rsp),%rbx + leaq -96(%rdi),%rdi + call __mulq_384 + + + leaq 48(%rsi),%rsi + leaq 48(%rbx),%rbx + leaq 32(%rsp),%rdi + call __mulq_384 + + + movq 16(%rsp),%rsi + leaq 32(%rsp),%rdx + movq 24(%rsp),%rcx + movq %rsi,%rdi + call __subq_mod_384x384 + + + leaq 0(%rdi),%rsi + leaq -96(%rdi),%rdx + call __subq_mod_384x384 + + + leaq -96(%rdi),%rsi + leaq 32(%rsp),%rdx + leaq -96(%rdi),%rdi + call __subq_mod_384x384 + + leaq 136(%rsp),%r8 + movq 0(%r8),%r15 +.cfi_restore %r15 + movq 8(%r8),%r14 +.cfi_restore %r14 + movq 16(%r8),%r13 +.cfi_restore %r13 + movq 24(%r8),%r12 +.cfi_restore %r12 + movq 32(%r8),%rbx +.cfi_restore %rbx + movq 40(%r8),%rbp +.cfi_restore %rbp + leaq 48(%r8),%rsp +.cfi_adjust_cfa_offset -136-8*6 + + .byte 0xf3,0xc3 +.cfi_endproc +.size mul_382x,.-mul_382x +.globl sqr_382x +.hidden sqr_382x +.type sqr_382x,@function +.align 32 +sqr_382x: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + +#ifdef __BLST_PORTABLE__ + testl $1,__blst_platform_cap(%rip) + jnz sqr_382x$1 +#endif + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + pushq %rsi +.cfi_adjust_cfa_offset 8 + + + movq %rdx,%rcx + + + movq 0(%rsi),%r14 + movq 8(%rsi),%r15 + movq 16(%rsi),%rax + movq 24(%rsi),%rbx + movq 32(%rsi),%rbp + movq 40(%rsi),%rdx + + movq %r14,%r8 + addq 48(%rsi),%r14 + movq %r15,%r9 + adcq 56(%rsi),%r15 + movq %rax,%r10 + adcq 64(%rsi),%rax + movq %rbx,%r11 + adcq 72(%rsi),%rbx + movq %rbp,%r12 + adcq 80(%rsi),%rbp + movq %rdx,%r13 + adcq 88(%rsi),%rdx + + movq %r14,0(%rdi) + movq %r15,8(%rdi) + movq %rax,16(%rdi) + movq %rbx,24(%rdi) + movq %rbp,32(%rdi) + movq %rdx,40(%rdi) + + + leaq 48(%rsi),%rdx + leaq 48(%rdi),%rdi + call __subq_mod_384_a_is_loaded + + + leaq (%rdi),%rsi + leaq -48(%rdi),%rbx + leaq -48(%rdi),%rdi + call __mulq_384 + + + movq (%rsp),%rsi + leaq 48(%rsi),%rbx + leaq 96(%rdi),%rdi + call __mulq_384 + + movq 0(%rdi),%r8 + movq 8(%rdi),%r9 + movq 16(%rdi),%r10 + movq 24(%rdi),%r11 + movq 32(%rdi),%r12 + movq 40(%rdi),%r13 + movq 48(%rdi),%r14 + movq 56(%rdi),%r15 + movq 64(%rdi),%rax + movq 72(%rdi),%rbx + movq 80(%rdi),%rbp + addq %r8,%r8 + movq 88(%rdi),%rdx + adcq %r9,%r9 + movq %r8,0(%rdi) + adcq %r10,%r10 + movq %r9,8(%rdi) + adcq %r11,%r11 + movq %r10,16(%rdi) + adcq %r12,%r12 + movq %r11,24(%rdi) + adcq %r13,%r13 + movq %r12,32(%rdi) + adcq %r14,%r14 + movq %r13,40(%rdi) + adcq %r15,%r15 + movq %r14,48(%rdi) + adcq %rax,%rax + movq %r15,56(%rdi) + adcq %rbx,%rbx + movq %rax,64(%rdi) + adcq %rbp,%rbp + movq %rbx,72(%rdi) + adcq %rdx,%rdx + movq %rbp,80(%rdi) + movq %rdx,88(%rdi) + + movq 8(%rsp),%r15 +.cfi_restore %r15 + movq 16(%rsp),%r14 +.cfi_restore %r14 + movq 24(%rsp),%r13 +.cfi_restore %r13 + movq 32(%rsp),%r12 +.cfi_restore %r12 + movq 40(%rsp),%rbx +.cfi_restore %rbx + movq 48(%rsp),%rbp +.cfi_restore %rbp + leaq 56(%rsp),%rsp +.cfi_adjust_cfa_offset -8*7 + + .byte 0xf3,0xc3 +.cfi_endproc +.size sqr_382x,.-sqr_382x +.globl mul_384 +.hidden mul_384 +.type mul_384,@function +.align 32 +mul_384: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + +#ifdef __BLST_PORTABLE__ + testl $1,__blst_platform_cap(%rip) + jnz mul_384$1 +#endif + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + + + movq %rdx,%rbx + call __mulq_384 + + movq 0(%rsp),%r12 +.cfi_restore %r12 + movq 8(%rsp),%rbx +.cfi_restore %rbx + movq 16(%rsp),%rbp +.cfi_restore %rbp + leaq 24(%rsp),%rsp +.cfi_adjust_cfa_offset -24 + + .byte 0xf3,0xc3 +.cfi_endproc +.size mul_384,.-mul_384 + +.type __mulq_384,@function +.align 32 +__mulq_384: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + movq 0(%rbx),%rax + + movq %rax,%rbp + mulq 0(%rsi) + movq %rax,0(%rdi) + movq %rbp,%rax + movq %rdx,%rcx + + mulq 8(%rsi) + addq %rax,%rcx + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%r8 + + mulq 16(%rsi) + addq %rax,%r8 + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%r9 + + mulq 24(%rsi) + addq %rax,%r9 + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%r10 + + mulq 32(%rsi) + addq %rax,%r10 + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%r11 + + mulq 40(%rsi) + addq %rax,%r11 + movq 8(%rbx),%rax + adcq $0,%rdx + movq %rdx,%r12 + movq %rax,%rbp + mulq 0(%rsi) + addq %rax,%rcx + movq %rbp,%rax + adcq $0,%rdx + movq %rcx,8(%rdi) + movq %rdx,%rcx + + mulq 8(%rsi) + addq %rax,%r8 + movq %rbp,%rax + adcq $0,%rdx + addq %r8,%rcx + adcq $0,%rdx + movq %rdx,%r8 + + mulq 16(%rsi) + addq %rax,%r9 + movq %rbp,%rax + adcq $0,%rdx + addq %r9,%r8 + adcq $0,%rdx + movq %rdx,%r9 + + mulq 24(%rsi) + addq %rax,%r10 + movq %rbp,%rax + adcq $0,%rdx + addq %r10,%r9 + adcq $0,%rdx + movq %rdx,%r10 + + mulq 32(%rsi) + addq %rax,%r11 + movq %rbp,%rax + adcq $0,%rdx + addq %r11,%r10 + adcq $0,%rdx + movq %rdx,%r11 + + mulq 40(%rsi) + addq %rax,%r12 + movq 16(%rbx),%rax + adcq $0,%rdx + addq %r12,%r11 + adcq $0,%rdx + movq %rdx,%r12 + movq %rax,%rbp + mulq 0(%rsi) + addq %rax,%rcx + movq %rbp,%rax + adcq $0,%rdx + movq %rcx,16(%rdi) + movq %rdx,%rcx + + mulq 8(%rsi) + addq %rax,%r8 + movq %rbp,%rax + adcq $0,%rdx + addq %r8,%rcx + adcq $0,%rdx + movq %rdx,%r8 + + mulq 16(%rsi) + addq %rax,%r9 + movq %rbp,%rax + adcq $0,%rdx + addq %r9,%r8 + adcq $0,%rdx + movq %rdx,%r9 + + mulq 24(%rsi) + addq %rax,%r10 + movq %rbp,%rax + adcq $0,%rdx + addq %r10,%r9 + adcq $0,%rdx + movq %rdx,%r10 + + mulq 32(%rsi) + addq %rax,%r11 + movq %rbp,%rax + adcq $0,%rdx + addq %r11,%r10 + adcq $0,%rdx + movq %rdx,%r11 + + mulq 40(%rsi) + addq %rax,%r12 + movq 24(%rbx),%rax + adcq $0,%rdx + addq %r12,%r11 + adcq $0,%rdx + movq %rdx,%r12 + movq %rax,%rbp + mulq 0(%rsi) + addq %rax,%rcx + movq %rbp,%rax + adcq $0,%rdx + movq %rcx,24(%rdi) + movq %rdx,%rcx + + mulq 8(%rsi) + addq %rax,%r8 + movq %rbp,%rax + adcq $0,%rdx + addq %r8,%rcx + adcq $0,%rdx + movq %rdx,%r8 + + mulq 16(%rsi) + addq %rax,%r9 + movq %rbp,%rax + adcq $0,%rdx + addq %r9,%r8 + adcq $0,%rdx + movq %rdx,%r9 + + mulq 24(%rsi) + addq %rax,%r10 + movq %rbp,%rax + adcq $0,%rdx + addq %r10,%r9 + adcq $0,%rdx + movq %rdx,%r10 + + mulq 32(%rsi) + addq %rax,%r11 + movq %rbp,%rax + adcq $0,%rdx + addq %r11,%r10 + adcq $0,%rdx + movq %rdx,%r11 + + mulq 40(%rsi) + addq %rax,%r12 + movq 32(%rbx),%rax + adcq $0,%rdx + addq %r12,%r11 + adcq $0,%rdx + movq %rdx,%r12 + movq %rax,%rbp + mulq 0(%rsi) + addq %rax,%rcx + movq %rbp,%rax + adcq $0,%rdx + movq %rcx,32(%rdi) + movq %rdx,%rcx + + mulq 8(%rsi) + addq %rax,%r8 + movq %rbp,%rax + adcq $0,%rdx + addq %r8,%rcx + adcq $0,%rdx + movq %rdx,%r8 + + mulq 16(%rsi) + addq %rax,%r9 + movq %rbp,%rax + adcq $0,%rdx + addq %r9,%r8 + adcq $0,%rdx + movq %rdx,%r9 + + mulq 24(%rsi) + addq %rax,%r10 + movq %rbp,%rax + adcq $0,%rdx + addq %r10,%r9 + adcq $0,%rdx + movq %rdx,%r10 + + mulq 32(%rsi) + addq %rax,%r11 + movq %rbp,%rax + adcq $0,%rdx + addq %r11,%r10 + adcq $0,%rdx + movq %rdx,%r11 + + mulq 40(%rsi) + addq %rax,%r12 + movq 40(%rbx),%rax + adcq $0,%rdx + addq %r12,%r11 + adcq $0,%rdx + movq %rdx,%r12 + movq %rax,%rbp + mulq 0(%rsi) + addq %rax,%rcx + movq %rbp,%rax + adcq $0,%rdx + movq %rcx,40(%rdi) + movq %rdx,%rcx + + mulq 8(%rsi) + addq %rax,%r8 + movq %rbp,%rax + adcq $0,%rdx + addq %r8,%rcx + adcq $0,%rdx + movq %rdx,%r8 + + mulq 16(%rsi) + addq %rax,%r9 + movq %rbp,%rax + adcq $0,%rdx + addq %r9,%r8 + adcq $0,%rdx + movq %rdx,%r9 + + mulq 24(%rsi) + addq %rax,%r10 + movq %rbp,%rax + adcq $0,%rdx + addq %r10,%r9 + adcq $0,%rdx + movq %rdx,%r10 + + mulq 32(%rsi) + addq %rax,%r11 + movq %rbp,%rax + adcq $0,%rdx + addq %r11,%r10 + adcq $0,%rdx + movq %rdx,%r11 + + mulq 40(%rsi) + addq %rax,%r12 + movq %rax,%rax + adcq $0,%rdx + addq %r12,%r11 + adcq $0,%rdx + movq %rdx,%r12 + movq %rcx,48(%rdi) + movq %r8,56(%rdi) + movq %r9,64(%rdi) + movq %r10,72(%rdi) + movq %r11,80(%rdi) + movq %r12,88(%rdi) + + .byte 0xf3,0xc3 +.cfi_endproc +.size __mulq_384,.-__mulq_384 +.globl sqr_384 +.hidden sqr_384 +.type sqr_384,@function +.align 32 +sqr_384: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + +#ifdef __BLST_PORTABLE__ + testl $1,__blst_platform_cap(%rip) + jnz sqr_384$1 +#endif + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + subq $8,%rsp +.cfi_adjust_cfa_offset 8 + + + call __sqrq_384 + + movq 8(%rsp),%r15 +.cfi_restore %r15 + movq 16(%rsp),%r14 +.cfi_restore %r14 + movq 24(%rsp),%r13 +.cfi_restore %r13 + movq 32(%rsp),%r12 +.cfi_restore %r12 + movq 40(%rsp),%rbx +.cfi_restore %rbx + movq 48(%rsp),%rbp +.cfi_restore %rbp + leaq 56(%rsp),%rsp +.cfi_adjust_cfa_offset -56 + + .byte 0xf3,0xc3 +.cfi_endproc +.size sqr_384,.-sqr_384 + +.type __sqrq_384,@function +.align 32 +__sqrq_384: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + movq 0(%rsi),%rax + movq 8(%rsi),%r15 + movq 16(%rsi),%rcx + movq 24(%rsi),%rbx + + + movq %rax,%r14 + mulq %r15 + movq %rax,%r9 + movq %r14,%rax + movq 32(%rsi),%rbp + movq %rdx,%r10 + + mulq %rcx + addq %rax,%r10 + movq %r14,%rax + adcq $0,%rdx + movq 40(%rsi),%rsi + movq %rdx,%r11 + + mulq %rbx + addq %rax,%r11 + movq %r14,%rax + adcq $0,%rdx + movq %rdx,%r12 + + mulq %rbp + addq %rax,%r12 + movq %r14,%rax + adcq $0,%rdx + movq %rdx,%r13 + + mulq %rsi + addq %rax,%r13 + movq %r14,%rax + adcq $0,%rdx + movq %rdx,%r14 + + mulq %rax + xorq %r8,%r8 + movq %rax,0(%rdi) + movq %r15,%rax + addq %r9,%r9 + adcq $0,%r8 + addq %rdx,%r9 + adcq $0,%r8 + movq %r9,8(%rdi) + + mulq %rcx + addq %rax,%r11 + movq %r15,%rax + adcq $0,%rdx + movq %rdx,%r9 + + mulq %rbx + addq %rax,%r12 + movq %r15,%rax + adcq $0,%rdx + addq %r9,%r12 + adcq $0,%rdx + movq %rdx,%r9 + + mulq %rbp + addq %rax,%r13 + movq %r15,%rax + adcq $0,%rdx + addq %r9,%r13 + adcq $0,%rdx + movq %rdx,%r9 + + mulq %rsi + addq %rax,%r14 + movq %r15,%rax + adcq $0,%rdx + addq %r9,%r14 + adcq $0,%rdx + movq %rdx,%r15 + + mulq %rax + xorq %r9,%r9 + addq %rax,%r8 + movq %rcx,%rax + addq %r10,%r10 + adcq %r11,%r11 + adcq $0,%r9 + addq %r8,%r10 + adcq %rdx,%r11 + adcq $0,%r9 + movq %r10,16(%rdi) + + mulq %rbx + addq %rax,%r13 + movq %rcx,%rax + adcq $0,%rdx + movq %r11,24(%rdi) + movq %rdx,%r8 + + mulq %rbp + addq %rax,%r14 + movq %rcx,%rax + adcq $0,%rdx + addq %r8,%r14 + adcq $0,%rdx + movq %rdx,%r8 + + mulq %rsi + addq %rax,%r15 + movq %rcx,%rax + adcq $0,%rdx + addq %r8,%r15 + adcq $0,%rdx + movq %rdx,%rcx + + mulq %rax + xorq %r11,%r11 + addq %rax,%r9 + movq %rbx,%rax + addq %r12,%r12 + adcq %r13,%r13 + adcq $0,%r11 + addq %r9,%r12 + adcq %rdx,%r13 + adcq $0,%r11 + movq %r12,32(%rdi) + + + mulq %rbp + addq %rax,%r15 + movq %rbx,%rax + adcq $0,%rdx + movq %r13,40(%rdi) + movq %rdx,%r8 + + mulq %rsi + addq %rax,%rcx + movq %rbx,%rax + adcq $0,%rdx + addq %r8,%rcx + adcq $0,%rdx + movq %rdx,%rbx + + mulq %rax + xorq %r12,%r12 + addq %rax,%r11 + movq %rbp,%rax + addq %r14,%r14 + adcq %r15,%r15 + adcq $0,%r12 + addq %r11,%r14 + adcq %rdx,%r15 + movq %r14,48(%rdi) + adcq $0,%r12 + movq %r15,56(%rdi) + + + mulq %rsi + addq %rax,%rbx + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%rbp + + mulq %rax + xorq %r13,%r13 + addq %rax,%r12 + movq %rsi,%rax + addq %rcx,%rcx + adcq %rbx,%rbx + adcq $0,%r13 + addq %r12,%rcx + adcq %rdx,%rbx + movq %rcx,64(%rdi) + adcq $0,%r13 + movq %rbx,72(%rdi) + + + mulq %rax + addq %r13,%rax + addq %rbp,%rbp + adcq $0,%rdx + addq %rbp,%rax + adcq $0,%rdx + movq %rax,80(%rdi) + movq %rdx,88(%rdi) + + .byte 0xf3,0xc3 +.cfi_endproc +.size __sqrq_384,.-__sqrq_384 + +.globl sqr_mont_384 +.hidden sqr_mont_384 +.type sqr_mont_384,@function +.align 32 +sqr_mont_384: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + +#ifdef __BLST_PORTABLE__ + testl $1,__blst_platform_cap(%rip) + jnz sqr_mont_384$1 +#endif + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + subq $120,%rsp +.cfi_adjust_cfa_offset 8*15 + + + movq %rcx,96(%rsp) + movq %rdx,104(%rsp) + movq %rdi,112(%rsp) + + movq %rsp,%rdi + call __sqrq_384 + + leaq 0(%rsp),%rsi + movq 96(%rsp),%rcx + movq 104(%rsp),%rbx + movq 112(%rsp),%rdi + call __mulq_by_1_mont_384 + call __redq_tail_mont_384 + + leaq 120(%rsp),%r8 + movq 120(%rsp),%r15 +.cfi_restore %r15 + movq 8(%r8),%r14 +.cfi_restore %r14 + movq 16(%r8),%r13 +.cfi_restore %r13 + movq 24(%r8),%r12 +.cfi_restore %r12 + movq 32(%r8),%rbx +.cfi_restore %rbx + movq 40(%r8),%rbp +.cfi_restore %rbp + leaq 48(%r8),%rsp +.cfi_adjust_cfa_offset -8*21 + + .byte 0xf3,0xc3 +.cfi_endproc +.size sqr_mont_384,.-sqr_mont_384 + + + +.globl redc_mont_384 +.hidden redc_mont_384 +.type redc_mont_384,@function +.align 32 +redc_mont_384: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + +#ifdef __BLST_PORTABLE__ + testl $1,__blst_platform_cap(%rip) + jnz redc_mont_384$1 +#endif + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + subq $8,%rsp +.cfi_adjust_cfa_offset 8 + + + movq %rdx,%rbx + call __mulq_by_1_mont_384 + call __redq_tail_mont_384 + + movq 8(%rsp),%r15 +.cfi_restore %r15 + movq 16(%rsp),%r14 +.cfi_restore %r14 + movq 24(%rsp),%r13 +.cfi_restore %r13 + movq 32(%rsp),%r12 +.cfi_restore %r12 + movq 40(%rsp),%rbx +.cfi_restore %rbx + movq 48(%rsp),%rbp +.cfi_restore %rbp + leaq 56(%rsp),%rsp +.cfi_adjust_cfa_offset -56 + + .byte 0xf3,0xc3 +.cfi_endproc +.size redc_mont_384,.-redc_mont_384 + + + + +.globl from_mont_384 +.hidden from_mont_384 +.type from_mont_384,@function +.align 32 +from_mont_384: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + +#ifdef __BLST_PORTABLE__ + testl $1,__blst_platform_cap(%rip) + jnz from_mont_384$1 +#endif + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + subq $8,%rsp +.cfi_adjust_cfa_offset 8 + + + movq %rdx,%rbx + call __mulq_by_1_mont_384 + + + + + + movq %r15,%rcx + movq %r8,%rdx + movq %r9,%rbp + + subq 0(%rbx),%r14 + sbbq 8(%rbx),%r15 + movq %r10,%r13 + sbbq 16(%rbx),%r8 + sbbq 24(%rbx),%r9 + sbbq 32(%rbx),%r10 + movq %r11,%rsi + sbbq 40(%rbx),%r11 + + cmovcq %rax,%r14 + cmovcq %rcx,%r15 + cmovcq %rdx,%r8 + movq %r14,0(%rdi) + cmovcq %rbp,%r9 + movq %r15,8(%rdi) + cmovcq %r13,%r10 + movq %r8,16(%rdi) + cmovcq %rsi,%r11 + movq %r9,24(%rdi) + movq %r10,32(%rdi) + movq %r11,40(%rdi) + + movq 8(%rsp),%r15 +.cfi_restore %r15 + movq 16(%rsp),%r14 +.cfi_restore %r14 + movq 24(%rsp),%r13 +.cfi_restore %r13 + movq 32(%rsp),%r12 +.cfi_restore %r12 + movq 40(%rsp),%rbx +.cfi_restore %rbx + movq 48(%rsp),%rbp +.cfi_restore %rbp + leaq 56(%rsp),%rsp +.cfi_adjust_cfa_offset -56 + + .byte 0xf3,0xc3 +.cfi_endproc +.size from_mont_384,.-from_mont_384 +.type __mulq_by_1_mont_384,@function +.align 32 +__mulq_by_1_mont_384: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + movq 0(%rsi),%rax + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + + movq %rax,%r14 + imulq %rcx,%rax + movq %rax,%r8 + + mulq 0(%rbx) + addq %rax,%r14 + movq %r8,%rax + adcq %rdx,%r14 + + mulq 8(%rbx) + addq %rax,%r9 + movq %r8,%rax + adcq $0,%rdx + addq %r14,%r9 + adcq $0,%rdx + movq %rdx,%r14 + + mulq 16(%rbx) + addq %rax,%r10 + movq %r8,%rax + adcq $0,%rdx + addq %r14,%r10 + adcq $0,%rdx + movq %rdx,%r14 + + mulq 24(%rbx) + addq %rax,%r11 + movq %r8,%rax + adcq $0,%rdx + movq %r9,%r15 + imulq %rcx,%r9 + addq %r14,%r11 + adcq $0,%rdx + movq %rdx,%r14 + + mulq 32(%rbx) + addq %rax,%r12 + movq %r8,%rax + adcq $0,%rdx + addq %r14,%r12 + adcq $0,%rdx + movq %rdx,%r14 + + mulq 40(%rbx) + addq %rax,%r13 + movq %r9,%rax + adcq $0,%rdx + addq %r14,%r13 + adcq $0,%rdx + movq %rdx,%r14 + + mulq 0(%rbx) + addq %rax,%r15 + movq %r9,%rax + adcq %rdx,%r15 + + mulq 8(%rbx) + addq %rax,%r10 + movq %r9,%rax + adcq $0,%rdx + addq %r15,%r10 + adcq $0,%rdx + movq %rdx,%r15 + + mulq 16(%rbx) + addq %rax,%r11 + movq %r9,%rax + adcq $0,%rdx + addq %r15,%r11 + adcq $0,%rdx + movq %rdx,%r15 + + mulq 24(%rbx) + addq %rax,%r12 + movq %r9,%rax + adcq $0,%rdx + movq %r10,%r8 + imulq %rcx,%r10 + addq %r15,%r12 + adcq $0,%rdx + movq %rdx,%r15 + + mulq 32(%rbx) + addq %rax,%r13 + movq %r9,%rax + adcq $0,%rdx + addq %r15,%r13 + adcq $0,%rdx + movq %rdx,%r15 + + mulq 40(%rbx) + addq %rax,%r14 + movq %r10,%rax + adcq $0,%rdx + addq %r15,%r14 + adcq $0,%rdx + movq %rdx,%r15 + + mulq 0(%rbx) + addq %rax,%r8 + movq %r10,%rax + adcq %rdx,%r8 + + mulq 8(%rbx) + addq %rax,%r11 + movq %r10,%rax + adcq $0,%rdx + addq %r8,%r11 + adcq $0,%rdx + movq %rdx,%r8 + + mulq 16(%rbx) + addq %rax,%r12 + movq %r10,%rax + adcq $0,%rdx + addq %r8,%r12 + adcq $0,%rdx + movq %rdx,%r8 + + mulq 24(%rbx) + addq %rax,%r13 + movq %r10,%rax + adcq $0,%rdx + movq %r11,%r9 + imulq %rcx,%r11 + addq %r8,%r13 + adcq $0,%rdx + movq %rdx,%r8 + + mulq 32(%rbx) + addq %rax,%r14 + movq %r10,%rax + adcq $0,%rdx + addq %r8,%r14 + adcq $0,%rdx + movq %rdx,%r8 + + mulq 40(%rbx) + addq %rax,%r15 + movq %r11,%rax + adcq $0,%rdx + addq %r8,%r15 + adcq $0,%rdx + movq %rdx,%r8 + + mulq 0(%rbx) + addq %rax,%r9 + movq %r11,%rax + adcq %rdx,%r9 + + mulq 8(%rbx) + addq %rax,%r12 + movq %r11,%rax + adcq $0,%rdx + addq %r9,%r12 + adcq $0,%rdx + movq %rdx,%r9 + + mulq 16(%rbx) + addq %rax,%r13 + movq %r11,%rax + adcq $0,%rdx + addq %r9,%r13 + adcq $0,%rdx + movq %rdx,%r9 + + mulq 24(%rbx) + addq %rax,%r14 + movq %r11,%rax + adcq $0,%rdx + movq %r12,%r10 + imulq %rcx,%r12 + addq %r9,%r14 + adcq $0,%rdx + movq %rdx,%r9 + + mulq 32(%rbx) + addq %rax,%r15 + movq %r11,%rax + adcq $0,%rdx + addq %r9,%r15 + adcq $0,%rdx + movq %rdx,%r9 + + mulq 40(%rbx) + addq %rax,%r8 + movq %r12,%rax + adcq $0,%rdx + addq %r9,%r8 + adcq $0,%rdx + movq %rdx,%r9 + + mulq 0(%rbx) + addq %rax,%r10 + movq %r12,%rax + adcq %rdx,%r10 + + mulq 8(%rbx) + addq %rax,%r13 + movq %r12,%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %rdx,%r10 + + mulq 16(%rbx) + addq %rax,%r14 + movq %r12,%rax + adcq $0,%rdx + addq %r10,%r14 + adcq $0,%rdx + movq %rdx,%r10 + + mulq 24(%rbx) + addq %rax,%r15 + movq %r12,%rax + adcq $0,%rdx + movq %r13,%r11 + imulq %rcx,%r13 + addq %r10,%r15 + adcq $0,%rdx + movq %rdx,%r10 + + mulq 32(%rbx) + addq %rax,%r8 + movq %r12,%rax + adcq $0,%rdx + addq %r10,%r8 + adcq $0,%rdx + movq %rdx,%r10 + + mulq 40(%rbx) + addq %rax,%r9 + movq %r13,%rax + adcq $0,%rdx + addq %r10,%r9 + adcq $0,%rdx + movq %rdx,%r10 + + mulq 0(%rbx) + addq %rax,%r11 + movq %r13,%rax + adcq %rdx,%r11 + + mulq 8(%rbx) + addq %rax,%r14 + movq %r13,%rax + adcq $0,%rdx + addq %r11,%r14 + adcq $0,%rdx + movq %rdx,%r11 + + mulq 16(%rbx) + addq %rax,%r15 + movq %r13,%rax + adcq $0,%rdx + addq %r11,%r15 + adcq $0,%rdx + movq %rdx,%r11 + + mulq 24(%rbx) + addq %rax,%r8 + movq %r13,%rax + adcq $0,%rdx + addq %r11,%r8 + adcq $0,%rdx + movq %rdx,%r11 + + mulq 32(%rbx) + addq %rax,%r9 + movq %r13,%rax + adcq $0,%rdx + addq %r11,%r9 + adcq $0,%rdx + movq %rdx,%r11 + + mulq 40(%rbx) + addq %rax,%r10 + movq %r14,%rax + adcq $0,%rdx + addq %r11,%r10 + adcq $0,%rdx + movq %rdx,%r11 + .byte 0xf3,0xc3 +.cfi_endproc +.size __mulq_by_1_mont_384,.-__mulq_by_1_mont_384 + +.type __redq_tail_mont_384,@function +.align 32 +__redq_tail_mont_384: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + addq 48(%rsi),%r14 + movq %r14,%rax + adcq 56(%rsi),%r15 + adcq 64(%rsi),%r8 + adcq 72(%rsi),%r9 + movq %r15,%rcx + adcq 80(%rsi),%r10 + adcq 88(%rsi),%r11 + sbbq %r12,%r12 + + + + + movq %r8,%rdx + movq %r9,%rbp + + subq 0(%rbx),%r14 + sbbq 8(%rbx),%r15 + movq %r10,%r13 + sbbq 16(%rbx),%r8 + sbbq 24(%rbx),%r9 + sbbq 32(%rbx),%r10 + movq %r11,%rsi + sbbq 40(%rbx),%r11 + sbbq $0,%r12 + + cmovcq %rax,%r14 + cmovcq %rcx,%r15 + cmovcq %rdx,%r8 + movq %r14,0(%rdi) + cmovcq %rbp,%r9 + movq %r15,8(%rdi) + cmovcq %r13,%r10 + movq %r8,16(%rdi) + cmovcq %rsi,%r11 + movq %r9,24(%rdi) + movq %r10,32(%rdi) + movq %r11,40(%rdi) + + .byte 0xf3,0xc3 +.cfi_endproc +.size __redq_tail_mont_384,.-__redq_tail_mont_384 + +.globl sgn0_pty_mont_384 +.hidden sgn0_pty_mont_384 +.type sgn0_pty_mont_384,@function +.align 32 +sgn0_pty_mont_384: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + +#ifdef __BLST_PORTABLE__ + testl $1,__blst_platform_cap(%rip) + jnz sgn0_pty_mont_384$1 +#endif + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + subq $8,%rsp +.cfi_adjust_cfa_offset 8 + + + movq %rsi,%rbx + leaq 0(%rdi),%rsi + movq %rdx,%rcx + call __mulq_by_1_mont_384 + + xorq %rax,%rax + movq %r14,%r13 + addq %r14,%r14 + adcq %r15,%r15 + adcq %r8,%r8 + adcq %r9,%r9 + adcq %r10,%r10 + adcq %r11,%r11 + adcq $0,%rax + + subq 0(%rbx),%r14 + sbbq 8(%rbx),%r15 + sbbq 16(%rbx),%r8 + sbbq 24(%rbx),%r9 + sbbq 32(%rbx),%r10 + sbbq 40(%rbx),%r11 + sbbq $0,%rax + + notq %rax + andq $1,%r13 + andq $2,%rax + orq %r13,%rax + + movq 8(%rsp),%r15 +.cfi_restore %r15 + movq 16(%rsp),%r14 +.cfi_restore %r14 + movq 24(%rsp),%r13 +.cfi_restore %r13 + movq 32(%rsp),%r12 +.cfi_restore %r12 + movq 40(%rsp),%rbx +.cfi_restore %rbx + movq 48(%rsp),%rbp +.cfi_restore %rbp + leaq 56(%rsp),%rsp +.cfi_adjust_cfa_offset -56 + + .byte 0xf3,0xc3 +.cfi_endproc +.size sgn0_pty_mont_384,.-sgn0_pty_mont_384 + +.globl sgn0_pty_mont_384x +.hidden sgn0_pty_mont_384x +.type sgn0_pty_mont_384x,@function +.align 32 +sgn0_pty_mont_384x: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + +#ifdef __BLST_PORTABLE__ + testl $1,__blst_platform_cap(%rip) + jnz sgn0_pty_mont_384x$1 +#endif + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + subq $8,%rsp +.cfi_adjust_cfa_offset 8 + + + movq %rsi,%rbx + leaq 48(%rdi),%rsi + movq %rdx,%rcx + call __mulq_by_1_mont_384 + + movq %r14,%r12 + orq %r15,%r14 + orq %r8,%r14 + orq %r9,%r14 + orq %r10,%r14 + orq %r11,%r14 + + leaq 0(%rdi),%rsi + xorq %rdi,%rdi + movq %r12,%r13 + addq %r12,%r12 + adcq %r15,%r15 + adcq %r8,%r8 + adcq %r9,%r9 + adcq %r10,%r10 + adcq %r11,%r11 + adcq $0,%rdi + + subq 0(%rbx),%r12 + sbbq 8(%rbx),%r15 + sbbq 16(%rbx),%r8 + sbbq 24(%rbx),%r9 + sbbq 32(%rbx),%r10 + sbbq 40(%rbx),%r11 + sbbq $0,%rdi + + movq %r14,0(%rsp) + notq %rdi + andq $1,%r13 + andq $2,%rdi + orq %r13,%rdi + + call __mulq_by_1_mont_384 + + movq %r14,%r12 + orq %r15,%r14 + orq %r8,%r14 + orq %r9,%r14 + orq %r10,%r14 + orq %r11,%r14 + + xorq %rax,%rax + movq %r12,%r13 + addq %r12,%r12 + adcq %r15,%r15 + adcq %r8,%r8 + adcq %r9,%r9 + adcq %r10,%r10 + adcq %r11,%r11 + adcq $0,%rax + + subq 0(%rbx),%r12 + sbbq 8(%rbx),%r15 + sbbq 16(%rbx),%r8 + sbbq 24(%rbx),%r9 + sbbq 32(%rbx),%r10 + sbbq 40(%rbx),%r11 + sbbq $0,%rax + + movq 0(%rsp),%r12 + + notq %rax + + testq %r14,%r14 + cmovzq %rdi,%r13 + + testq %r12,%r12 + cmovnzq %rdi,%rax + + andq $1,%r13 + andq $2,%rax + orq %r13,%rax + + movq 8(%rsp),%r15 +.cfi_restore %r15 + movq 16(%rsp),%r14 +.cfi_restore %r14 + movq 24(%rsp),%r13 +.cfi_restore %r13 + movq 32(%rsp),%r12 +.cfi_restore %r12 + movq 40(%rsp),%rbx +.cfi_restore %rbx + movq 48(%rsp),%rbp +.cfi_restore %rbp + leaq 56(%rsp),%rsp +.cfi_adjust_cfa_offset -56 + + .byte 0xf3,0xc3 +.cfi_endproc +.size sgn0_pty_mont_384x,.-sgn0_pty_mont_384x +.globl mul_mont_384 +.hidden mul_mont_384 +.type mul_mont_384,@function +.align 32 +mul_mont_384: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + +#ifdef __BLST_PORTABLE__ + testl $1,__blst_platform_cap(%rip) + jnz mul_mont_384$1 +#endif + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + subq $24,%rsp +.cfi_adjust_cfa_offset 8*3 + + + movq 0(%rdx),%rax + movq 0(%rsi),%r14 + movq 8(%rsi),%r15 + movq 16(%rsi),%r12 + movq 24(%rsi),%r13 + movq %rdx,%rbx + movq %r8,0(%rsp) + movq %rdi,8(%rsp) + + call __mulq_mont_384 + + movq 24(%rsp),%r15 +.cfi_restore %r15 + movq 32(%rsp),%r14 +.cfi_restore %r14 + movq 40(%rsp),%r13 +.cfi_restore %r13 + movq 48(%rsp),%r12 +.cfi_restore %r12 + movq 56(%rsp),%rbx +.cfi_restore %rbx + movq 64(%rsp),%rbp +.cfi_restore %rbp + leaq 72(%rsp),%rsp +.cfi_adjust_cfa_offset -72 + + .byte 0xf3,0xc3 +.cfi_endproc +.size mul_mont_384,.-mul_mont_384 +.type __mulq_mont_384,@function +.align 32 +__mulq_mont_384: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + movq %rax,%rdi + mulq %r14 + movq %rax,%r8 + movq %rdi,%rax + movq %rdx,%r9 + + mulq %r15 + addq %rax,%r9 + movq %rdi,%rax + adcq $0,%rdx + movq %rdx,%r10 + + mulq %r12 + addq %rax,%r10 + movq %rdi,%rax + adcq $0,%rdx + movq %rdx,%r11 + + movq %r8,%rbp + imulq 8(%rsp),%r8 + + mulq %r13 + addq %rax,%r11 + movq %rdi,%rax + adcq $0,%rdx + movq %rdx,%r12 + + mulq 32(%rsi) + addq %rax,%r12 + movq %rdi,%rax + adcq $0,%rdx + movq %rdx,%r13 + + mulq 40(%rsi) + addq %rax,%r13 + movq %r8,%rax + adcq $0,%rdx + xorq %r15,%r15 + movq %rdx,%r14 + + mulq 0(%rcx) + addq %rax,%rbp + movq %r8,%rax + adcq %rdx,%rbp + + mulq 8(%rcx) + addq %rax,%r9 + movq %r8,%rax + adcq $0,%rdx + addq %rbp,%r9 + adcq $0,%rdx + movq %rdx,%rbp + + mulq 16(%rcx) + addq %rax,%r10 + movq %r8,%rax + adcq $0,%rdx + addq %rbp,%r10 + adcq $0,%rdx + movq %rdx,%rbp + + mulq 24(%rcx) + addq %rbp,%r11 + adcq $0,%rdx + addq %rax,%r11 + movq %r8,%rax + adcq $0,%rdx + movq %rdx,%rbp + + mulq 32(%rcx) + addq %rax,%r12 + movq %r8,%rax + adcq $0,%rdx + addq %rbp,%r12 + adcq $0,%rdx + movq %rdx,%rbp + + mulq 40(%rcx) + addq %rax,%r13 + movq 8(%rbx),%rax + adcq $0,%rdx + addq %rbp,%r13 + adcq %rdx,%r14 + adcq $0,%r15 + + movq %rax,%rdi + mulq 0(%rsi) + addq %rax,%r9 + movq %rdi,%rax + adcq $0,%rdx + movq %rdx,%r8 + + mulq 8(%rsi) + addq %rax,%r10 + movq %rdi,%rax + adcq $0,%rdx + addq %r8,%r10 + adcq $0,%rdx + movq %rdx,%r8 + + mulq 16(%rsi) + addq %rax,%r11 + movq %rdi,%rax + adcq $0,%rdx + addq %r8,%r11 + adcq $0,%rdx + movq %rdx,%r8 + + movq %r9,%rbp + imulq 8(%rsp),%r9 + + mulq 24(%rsi) + addq %rax,%r12 + movq %rdi,%rax + adcq $0,%rdx + addq %r8,%r12 + adcq $0,%rdx + movq %rdx,%r8 + + mulq 32(%rsi) + addq %rax,%r13 + movq %rdi,%rax + adcq $0,%rdx + addq %r8,%r13 + adcq $0,%rdx + movq %rdx,%r8 + + mulq 40(%rsi) + addq %r8,%r14 + adcq $0,%rdx + xorq %r8,%r8 + addq %rax,%r14 + movq %r9,%rax + adcq %rdx,%r15 + adcq $0,%r8 + + mulq 0(%rcx) + addq %rax,%rbp + movq %r9,%rax + adcq %rdx,%rbp + + mulq 8(%rcx) + addq %rax,%r10 + movq %r9,%rax + adcq $0,%rdx + addq %rbp,%r10 + adcq $0,%rdx + movq %rdx,%rbp + + mulq 16(%rcx) + addq %rax,%r11 + movq %r9,%rax + adcq $0,%rdx + addq %rbp,%r11 + adcq $0,%rdx + movq %rdx,%rbp + + mulq 24(%rcx) + addq %rbp,%r12 + adcq $0,%rdx + addq %rax,%r12 + movq %r9,%rax + adcq $0,%rdx + movq %rdx,%rbp + + mulq 32(%rcx) + addq %rax,%r13 + movq %r9,%rax + adcq $0,%rdx + addq %rbp,%r13 + adcq $0,%rdx + movq %rdx,%rbp + + mulq 40(%rcx) + addq %rax,%r14 + movq 16(%rbx),%rax + adcq $0,%rdx + addq %rbp,%r14 + adcq %rdx,%r15 + adcq $0,%r8 + + movq %rax,%rdi + mulq 0(%rsi) + addq %rax,%r10 + movq %rdi,%rax + adcq $0,%rdx + movq %rdx,%r9 + + mulq 8(%rsi) + addq %rax,%r11 + movq %rdi,%rax + adcq $0,%rdx + addq %r9,%r11 + adcq $0,%rdx + movq %rdx,%r9 + + mulq 16(%rsi) + addq %rax,%r12 + movq %rdi,%rax + adcq $0,%rdx + addq %r9,%r12 + adcq $0,%rdx + movq %rdx,%r9 + + movq %r10,%rbp + imulq 8(%rsp),%r10 + + mulq 24(%rsi) + addq %rax,%r13 + movq %rdi,%rax + adcq $0,%rdx + addq %r9,%r13 + adcq $0,%rdx + movq %rdx,%r9 + + mulq 32(%rsi) + addq %rax,%r14 + movq %rdi,%rax + adcq $0,%rdx + addq %r9,%r14 + adcq $0,%rdx + movq %rdx,%r9 + + mulq 40(%rsi) + addq %r9,%r15 + adcq $0,%rdx + xorq %r9,%r9 + addq %rax,%r15 + movq %r10,%rax + adcq %rdx,%r8 + adcq $0,%r9 + + mulq 0(%rcx) + addq %rax,%rbp + movq %r10,%rax + adcq %rdx,%rbp + + mulq 8(%rcx) + addq %rax,%r11 + movq %r10,%rax + adcq $0,%rdx + addq %rbp,%r11 + adcq $0,%rdx + movq %rdx,%rbp + + mulq 16(%rcx) + addq %rax,%r12 + movq %r10,%rax + adcq $0,%rdx + addq %rbp,%r12 + adcq $0,%rdx + movq %rdx,%rbp + + mulq 24(%rcx) + addq %rbp,%r13 + adcq $0,%rdx + addq %rax,%r13 + movq %r10,%rax + adcq $0,%rdx + movq %rdx,%rbp + + mulq 32(%rcx) + addq %rax,%r14 + movq %r10,%rax + adcq $0,%rdx + addq %rbp,%r14 + adcq $0,%rdx + movq %rdx,%rbp + + mulq 40(%rcx) + addq %rax,%r15 + movq 24(%rbx),%rax + adcq $0,%rdx + addq %rbp,%r15 + adcq %rdx,%r8 + adcq $0,%r9 + + movq %rax,%rdi + mulq 0(%rsi) + addq %rax,%r11 + movq %rdi,%rax + adcq $0,%rdx + movq %rdx,%r10 + + mulq 8(%rsi) + addq %rax,%r12 + movq %rdi,%rax + adcq $0,%rdx + addq %r10,%r12 + adcq $0,%rdx + movq %rdx,%r10 + + mulq 16(%rsi) + addq %rax,%r13 + movq %rdi,%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %rdx,%r10 + + movq %r11,%rbp + imulq 8(%rsp),%r11 + + mulq 24(%rsi) + addq %rax,%r14 + movq %rdi,%rax + adcq $0,%rdx + addq %r10,%r14 + adcq $0,%rdx + movq %rdx,%r10 + + mulq 32(%rsi) + addq %rax,%r15 + movq %rdi,%rax + adcq $0,%rdx + addq %r10,%r15 + adcq $0,%rdx + movq %rdx,%r10 + + mulq 40(%rsi) + addq %r10,%r8 + adcq $0,%rdx + xorq %r10,%r10 + addq %rax,%r8 + movq %r11,%rax + adcq %rdx,%r9 + adcq $0,%r10 + + mulq 0(%rcx) + addq %rax,%rbp + movq %r11,%rax + adcq %rdx,%rbp + + mulq 8(%rcx) + addq %rax,%r12 + movq %r11,%rax + adcq $0,%rdx + addq %rbp,%r12 + adcq $0,%rdx + movq %rdx,%rbp + + mulq 16(%rcx) + addq %rax,%r13 + movq %r11,%rax + adcq $0,%rdx + addq %rbp,%r13 + adcq $0,%rdx + movq %rdx,%rbp + + mulq 24(%rcx) + addq %rbp,%r14 + adcq $0,%rdx + addq %rax,%r14 + movq %r11,%rax + adcq $0,%rdx + movq %rdx,%rbp + + mulq 32(%rcx) + addq %rax,%r15 + movq %r11,%rax + adcq $0,%rdx + addq %rbp,%r15 + adcq $0,%rdx + movq %rdx,%rbp + + mulq 40(%rcx) + addq %rax,%r8 + movq 32(%rbx),%rax + adcq $0,%rdx + addq %rbp,%r8 + adcq %rdx,%r9 + adcq $0,%r10 + + movq %rax,%rdi + mulq 0(%rsi) + addq %rax,%r12 + movq %rdi,%rax + adcq $0,%rdx + movq %rdx,%r11 + + mulq 8(%rsi) + addq %rax,%r13 + movq %rdi,%rax + adcq $0,%rdx + addq %r11,%r13 + adcq $0,%rdx + movq %rdx,%r11 + + mulq 16(%rsi) + addq %rax,%r14 + movq %rdi,%rax + adcq $0,%rdx + addq %r11,%r14 + adcq $0,%rdx + movq %rdx,%r11 + + movq %r12,%rbp + imulq 8(%rsp),%r12 + + mulq 24(%rsi) + addq %rax,%r15 + movq %rdi,%rax + adcq $0,%rdx + addq %r11,%r15 + adcq $0,%rdx + movq %rdx,%r11 + + mulq 32(%rsi) + addq %rax,%r8 + movq %rdi,%rax + adcq $0,%rdx + addq %r11,%r8 + adcq $0,%rdx + movq %rdx,%r11 + + mulq 40(%rsi) + addq %r11,%r9 + adcq $0,%rdx + xorq %r11,%r11 + addq %rax,%r9 + movq %r12,%rax + adcq %rdx,%r10 + adcq $0,%r11 + + mulq 0(%rcx) + addq %rax,%rbp + movq %r12,%rax + adcq %rdx,%rbp + + mulq 8(%rcx) + addq %rax,%r13 + movq %r12,%rax + adcq $0,%rdx + addq %rbp,%r13 + adcq $0,%rdx + movq %rdx,%rbp + + mulq 16(%rcx) + addq %rax,%r14 + movq %r12,%rax + adcq $0,%rdx + addq %rbp,%r14 + adcq $0,%rdx + movq %rdx,%rbp + + mulq 24(%rcx) + addq %rbp,%r15 + adcq $0,%rdx + addq %rax,%r15 + movq %r12,%rax + adcq $0,%rdx + movq %rdx,%rbp + + mulq 32(%rcx) + addq %rax,%r8 + movq %r12,%rax + adcq $0,%rdx + addq %rbp,%r8 + adcq $0,%rdx + movq %rdx,%rbp + + mulq 40(%rcx) + addq %rax,%r9 + movq 40(%rbx),%rax + adcq $0,%rdx + addq %rbp,%r9 + adcq %rdx,%r10 + adcq $0,%r11 + + movq %rax,%rdi + mulq 0(%rsi) + addq %rax,%r13 + movq %rdi,%rax + adcq $0,%rdx + movq %rdx,%r12 + + mulq 8(%rsi) + addq %rax,%r14 + movq %rdi,%rax + adcq $0,%rdx + addq %r12,%r14 + adcq $0,%rdx + movq %rdx,%r12 + + mulq 16(%rsi) + addq %rax,%r15 + movq %rdi,%rax + adcq $0,%rdx + addq %r12,%r15 + adcq $0,%rdx + movq %rdx,%r12 + + movq %r13,%rbp + imulq 8(%rsp),%r13 + + mulq 24(%rsi) + addq %rax,%r8 + movq %rdi,%rax + adcq $0,%rdx + addq %r12,%r8 + adcq $0,%rdx + movq %rdx,%r12 + + mulq 32(%rsi) + addq %rax,%r9 + movq %rdi,%rax + adcq $0,%rdx + addq %r12,%r9 + adcq $0,%rdx + movq %rdx,%r12 + + mulq 40(%rsi) + addq %r12,%r10 + adcq $0,%rdx + xorq %r12,%r12 + addq %rax,%r10 + movq %r13,%rax + adcq %rdx,%r11 + adcq $0,%r12 + + mulq 0(%rcx) + addq %rax,%rbp + movq %r13,%rax + adcq %rdx,%rbp + + mulq 8(%rcx) + addq %rax,%r14 + movq %r13,%rax + adcq $0,%rdx + addq %rbp,%r14 + adcq $0,%rdx + movq %rdx,%rbp + + mulq 16(%rcx) + addq %rax,%r15 + movq %r13,%rax + adcq $0,%rdx + addq %rbp,%r15 + adcq $0,%rdx + movq %rdx,%rbp + + mulq 24(%rcx) + addq %rbp,%r8 + adcq $0,%rdx + addq %rax,%r8 + movq %r13,%rax + adcq $0,%rdx + movq %rdx,%rbp + + mulq 32(%rcx) + addq %rax,%r9 + movq %r13,%rax + adcq $0,%rdx + addq %rbp,%r9 + adcq $0,%rdx + movq %rdx,%rbp + + mulq 40(%rcx) + addq %rax,%r10 + movq %r14,%rax + adcq $0,%rdx + addq %rbp,%r10 + adcq %rdx,%r11 + adcq $0,%r12 + + + + + movq 16(%rsp),%rdi + subq 0(%rcx),%r14 + movq %r15,%rdx + sbbq 8(%rcx),%r15 + movq %r8,%rbx + sbbq 16(%rcx),%r8 + movq %r9,%rsi + sbbq 24(%rcx),%r9 + movq %r10,%rbp + sbbq 32(%rcx),%r10 + movq %r11,%r13 + sbbq 40(%rcx),%r11 + sbbq $0,%r12 + + cmovcq %rax,%r14 + cmovcq %rdx,%r15 + cmovcq %rbx,%r8 + movq %r14,0(%rdi) + cmovcq %rsi,%r9 + movq %r15,8(%rdi) + cmovcq %rbp,%r10 + movq %r8,16(%rdi) + cmovcq %r13,%r11 + movq %r9,24(%rdi) + movq %r10,32(%rdi) + movq %r11,40(%rdi) + + .byte 0xf3,0xc3 +.cfi_endproc +.size __mulq_mont_384,.-__mulq_mont_384 +.globl sqr_n_mul_mont_384 +.hidden sqr_n_mul_mont_384 +.type sqr_n_mul_mont_384,@function +.align 32 +sqr_n_mul_mont_384: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + +#ifdef __BLST_PORTABLE__ + testl $1,__blst_platform_cap(%rip) + jnz sqr_n_mul_mont_384$1 +#endif + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + subq $136,%rsp +.cfi_adjust_cfa_offset 8*17 + + + movq %r8,0(%rsp) + movq %rdi,8(%rsp) + movq %rcx,16(%rsp) + leaq 32(%rsp),%rdi + movq %r9,24(%rsp) + movq (%r9),%xmm2 + +.Loop_sqr_384: + movd %edx,%xmm1 + + call __sqrq_384 + + leaq 0(%rdi),%rsi + movq 0(%rsp),%rcx + movq 16(%rsp),%rbx + call __mulq_by_1_mont_384 + call __redq_tail_mont_384 + + movd %xmm1,%edx + leaq 0(%rdi),%rsi + decl %edx + jnz .Loop_sqr_384 + +.byte 102,72,15,126,208 + movq %rbx,%rcx + movq 24(%rsp),%rbx + + + + + + + movq %r8,%r12 + movq %r9,%r13 + + call __mulq_mont_384 + + leaq 136(%rsp),%r8 + movq 136(%rsp),%r15 +.cfi_restore %r15 + movq 8(%r8),%r14 +.cfi_restore %r14 + movq 16(%r8),%r13 +.cfi_restore %r13 + movq 24(%r8),%r12 +.cfi_restore %r12 + movq 32(%r8),%rbx +.cfi_restore %rbx + movq 40(%r8),%rbp +.cfi_restore %rbp + leaq 48(%r8),%rsp +.cfi_adjust_cfa_offset -8*23 + + .byte 0xf3,0xc3 +.cfi_endproc +.size sqr_n_mul_mont_384,.-sqr_n_mul_mont_384 + +.globl sqr_n_mul_mont_383 +.hidden sqr_n_mul_mont_383 +.type sqr_n_mul_mont_383,@function +.align 32 +sqr_n_mul_mont_383: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + +#ifdef __BLST_PORTABLE__ + testl $1,__blst_platform_cap(%rip) + jnz sqr_n_mul_mont_383$1 +#endif + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + subq $136,%rsp +.cfi_adjust_cfa_offset 8*17 + + + movq %r8,0(%rsp) + movq %rdi,8(%rsp) + movq %rcx,16(%rsp) + leaq 32(%rsp),%rdi + movq %r9,24(%rsp) + movq (%r9),%xmm2 + +.Loop_sqr_383: + movd %edx,%xmm1 + + call __sqrq_384 + + leaq 0(%rdi),%rsi + movq 0(%rsp),%rcx + movq 16(%rsp),%rbx + call __mulq_by_1_mont_384 + + movd %xmm1,%edx + addq 48(%rsi),%r14 + adcq 56(%rsi),%r15 + adcq 64(%rsi),%r8 + adcq 72(%rsi),%r9 + adcq 80(%rsi),%r10 + adcq 88(%rsi),%r11 + leaq 0(%rdi),%rsi + + movq %r14,0(%rdi) + movq %r15,8(%rdi) + movq %r8,16(%rdi) + movq %r9,24(%rdi) + movq %r10,32(%rdi) + movq %r11,40(%rdi) + + decl %edx + jnz .Loop_sqr_383 + +.byte 102,72,15,126,208 + movq %rbx,%rcx + movq 24(%rsp),%rbx + + + + + + + movq %r8,%r12 + movq %r9,%r13 + + call __mulq_mont_384 + + leaq 136(%rsp),%r8 + movq 136(%rsp),%r15 +.cfi_restore %r15 + movq 8(%r8),%r14 +.cfi_restore %r14 + movq 16(%r8),%r13 +.cfi_restore %r13 + movq 24(%r8),%r12 +.cfi_restore %r12 + movq 32(%r8),%rbx +.cfi_restore %rbx + movq 40(%r8),%rbp +.cfi_restore %rbp + leaq 48(%r8),%rsp +.cfi_adjust_cfa_offset -8*23 + + .byte 0xf3,0xc3 +.cfi_endproc +.size sqr_n_mul_mont_383,.-sqr_n_mul_mont_383 +.type __mulq_mont_383_nonred,@function +.align 32 +__mulq_mont_383_nonred: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + movq %rax,%rbp + mulq %r14 + movq %rax,%r8 + movq %rbp,%rax + movq %rdx,%r9 + + mulq %r15 + addq %rax,%r9 + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%r10 + + mulq %r12 + addq %rax,%r10 + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%r11 + + movq %r8,%r15 + imulq 8(%rsp),%r8 + + mulq %r13 + addq %rax,%r11 + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%r12 + + mulq 32(%rsi) + addq %rax,%r12 + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%r13 + + mulq 40(%rsi) + addq %rax,%r13 + movq %r8,%rax + adcq $0,%rdx + movq %rdx,%r14 + + mulq 0(%rcx) + addq %rax,%r15 + movq %r8,%rax + adcq %rdx,%r15 + + mulq 8(%rcx) + addq %rax,%r9 + movq %r8,%rax + adcq $0,%rdx + addq %r15,%r9 + adcq $0,%rdx + movq %rdx,%r15 + + mulq 16(%rcx) + addq %rax,%r10 + movq %r8,%rax + adcq $0,%rdx + addq %r15,%r10 + adcq $0,%rdx + movq %rdx,%r15 + + mulq 24(%rcx) + addq %r15,%r11 + adcq $0,%rdx + addq %rax,%r11 + movq %r8,%rax + adcq $0,%rdx + movq %rdx,%r15 + + mulq 32(%rcx) + addq %rax,%r12 + movq %r8,%rax + adcq $0,%rdx + addq %r15,%r12 + adcq $0,%rdx + movq %rdx,%r15 + + mulq 40(%rcx) + addq %rax,%r13 + movq 8(%rbx),%rax + adcq $0,%rdx + addq %r15,%r13 + adcq %rdx,%r14 + + movq %rax,%rbp + mulq 0(%rsi) + addq %rax,%r9 + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%r15 + + mulq 8(%rsi) + addq %rax,%r10 + movq %rbp,%rax + adcq $0,%rdx + addq %r15,%r10 + adcq $0,%rdx + movq %rdx,%r15 + + mulq 16(%rsi) + addq %rax,%r11 + movq %rbp,%rax + adcq $0,%rdx + addq %r15,%r11 + adcq $0,%rdx + movq %rdx,%r15 + + movq %r9,%r8 + imulq 8(%rsp),%r9 + + mulq 24(%rsi) + addq %rax,%r12 + movq %rbp,%rax + adcq $0,%rdx + addq %r15,%r12 + adcq $0,%rdx + movq %rdx,%r15 + + mulq 32(%rsi) + addq %rax,%r13 + movq %rbp,%rax + adcq $0,%rdx + addq %r15,%r13 + adcq $0,%rdx + movq %rdx,%r15 + + mulq 40(%rsi) + addq %r15,%r14 + adcq $0,%rdx + addq %rax,%r14 + movq %r9,%rax + adcq $0,%rdx + movq %rdx,%r15 + + mulq 0(%rcx) + addq %rax,%r8 + movq %r9,%rax + adcq %rdx,%r8 + + mulq 8(%rcx) + addq %rax,%r10 + movq %r9,%rax + adcq $0,%rdx + addq %r8,%r10 + adcq $0,%rdx + movq %rdx,%r8 + + mulq 16(%rcx) + addq %rax,%r11 + movq %r9,%rax + adcq $0,%rdx + addq %r8,%r11 + adcq $0,%rdx + movq %rdx,%r8 + + mulq 24(%rcx) + addq %r8,%r12 + adcq $0,%rdx + addq %rax,%r12 + movq %r9,%rax + adcq $0,%rdx + movq %rdx,%r8 + + mulq 32(%rcx) + addq %rax,%r13 + movq %r9,%rax + adcq $0,%rdx + addq %r8,%r13 + adcq $0,%rdx + movq %rdx,%r8 + + mulq 40(%rcx) + addq %rax,%r14 + movq 16(%rbx),%rax + adcq $0,%rdx + addq %r8,%r14 + adcq %rdx,%r15 + + movq %rax,%rbp + mulq 0(%rsi) + addq %rax,%r10 + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%r8 + + mulq 8(%rsi) + addq %rax,%r11 + movq %rbp,%rax + adcq $0,%rdx + addq %r8,%r11 + adcq $0,%rdx + movq %rdx,%r8 + + mulq 16(%rsi) + addq %rax,%r12 + movq %rbp,%rax + adcq $0,%rdx + addq %r8,%r12 + adcq $0,%rdx + movq %rdx,%r8 + + movq %r10,%r9 + imulq 8(%rsp),%r10 + + mulq 24(%rsi) + addq %rax,%r13 + movq %rbp,%rax + adcq $0,%rdx + addq %r8,%r13 + adcq $0,%rdx + movq %rdx,%r8 + + mulq 32(%rsi) + addq %rax,%r14 + movq %rbp,%rax + adcq $0,%rdx + addq %r8,%r14 + adcq $0,%rdx + movq %rdx,%r8 + + mulq 40(%rsi) + addq %r8,%r15 + adcq $0,%rdx + addq %rax,%r15 + movq %r10,%rax + adcq $0,%rdx + movq %rdx,%r8 + + mulq 0(%rcx) + addq %rax,%r9 + movq %r10,%rax + adcq %rdx,%r9 + + mulq 8(%rcx) + addq %rax,%r11 + movq %r10,%rax + adcq $0,%rdx + addq %r9,%r11 + adcq $0,%rdx + movq %rdx,%r9 + + mulq 16(%rcx) + addq %rax,%r12 + movq %r10,%rax + adcq $0,%rdx + addq %r9,%r12 + adcq $0,%rdx + movq %rdx,%r9 + + mulq 24(%rcx) + addq %r9,%r13 + adcq $0,%rdx + addq %rax,%r13 + movq %r10,%rax + adcq $0,%rdx + movq %rdx,%r9 + + mulq 32(%rcx) + addq %rax,%r14 + movq %r10,%rax + adcq $0,%rdx + addq %r9,%r14 + adcq $0,%rdx + movq %rdx,%r9 + + mulq 40(%rcx) + addq %rax,%r15 + movq 24(%rbx),%rax + adcq $0,%rdx + addq %r9,%r15 + adcq %rdx,%r8 + + movq %rax,%rbp + mulq 0(%rsi) + addq %rax,%r11 + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%r9 + + mulq 8(%rsi) + addq %rax,%r12 + movq %rbp,%rax + adcq $0,%rdx + addq %r9,%r12 + adcq $0,%rdx + movq %rdx,%r9 + + mulq 16(%rsi) + addq %rax,%r13 + movq %rbp,%rax + adcq $0,%rdx + addq %r9,%r13 + adcq $0,%rdx + movq %rdx,%r9 + + movq %r11,%r10 + imulq 8(%rsp),%r11 + + mulq 24(%rsi) + addq %rax,%r14 + movq %rbp,%rax + adcq $0,%rdx + addq %r9,%r14 + adcq $0,%rdx + movq %rdx,%r9 + + mulq 32(%rsi) + addq %rax,%r15 + movq %rbp,%rax + adcq $0,%rdx + addq %r9,%r15 + adcq $0,%rdx + movq %rdx,%r9 + + mulq 40(%rsi) + addq %r9,%r8 + adcq $0,%rdx + addq %rax,%r8 + movq %r11,%rax + adcq $0,%rdx + movq %rdx,%r9 + + mulq 0(%rcx) + addq %rax,%r10 + movq %r11,%rax + adcq %rdx,%r10 + + mulq 8(%rcx) + addq %rax,%r12 + movq %r11,%rax + adcq $0,%rdx + addq %r10,%r12 + adcq $0,%rdx + movq %rdx,%r10 + + mulq 16(%rcx) + addq %rax,%r13 + movq %r11,%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %rdx,%r10 + + mulq 24(%rcx) + addq %r10,%r14 + adcq $0,%rdx + addq %rax,%r14 + movq %r11,%rax + adcq $0,%rdx + movq %rdx,%r10 + + mulq 32(%rcx) + addq %rax,%r15 + movq %r11,%rax + adcq $0,%rdx + addq %r10,%r15 + adcq $0,%rdx + movq %rdx,%r10 + + mulq 40(%rcx) + addq %rax,%r8 + movq 32(%rbx),%rax + adcq $0,%rdx + addq %r10,%r8 + adcq %rdx,%r9 + + movq %rax,%rbp + mulq 0(%rsi) + addq %rax,%r12 + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%r10 + + mulq 8(%rsi) + addq %rax,%r13 + movq %rbp,%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %rdx,%r10 + + mulq 16(%rsi) + addq %rax,%r14 + movq %rbp,%rax + adcq $0,%rdx + addq %r10,%r14 + adcq $0,%rdx + movq %rdx,%r10 + + movq %r12,%r11 + imulq 8(%rsp),%r12 + + mulq 24(%rsi) + addq %rax,%r15 + movq %rbp,%rax + adcq $0,%rdx + addq %r10,%r15 + adcq $0,%rdx + movq %rdx,%r10 + + mulq 32(%rsi) + addq %rax,%r8 + movq %rbp,%rax + adcq $0,%rdx + addq %r10,%r8 + adcq $0,%rdx + movq %rdx,%r10 + + mulq 40(%rsi) + addq %r10,%r9 + adcq $0,%rdx + addq %rax,%r9 + movq %r12,%rax + adcq $0,%rdx + movq %rdx,%r10 + + mulq 0(%rcx) + addq %rax,%r11 + movq %r12,%rax + adcq %rdx,%r11 + + mulq 8(%rcx) + addq %rax,%r13 + movq %r12,%rax + adcq $0,%rdx + addq %r11,%r13 + adcq $0,%rdx + movq %rdx,%r11 + + mulq 16(%rcx) + addq %rax,%r14 + movq %r12,%rax + adcq $0,%rdx + addq %r11,%r14 + adcq $0,%rdx + movq %rdx,%r11 + + mulq 24(%rcx) + addq %r11,%r15 + adcq $0,%rdx + addq %rax,%r15 + movq %r12,%rax + adcq $0,%rdx + movq %rdx,%r11 + + mulq 32(%rcx) + addq %rax,%r8 + movq %r12,%rax + adcq $0,%rdx + addq %r11,%r8 + adcq $0,%rdx + movq %rdx,%r11 + + mulq 40(%rcx) + addq %rax,%r9 + movq 40(%rbx),%rax + adcq $0,%rdx + addq %r11,%r9 + adcq %rdx,%r10 + + movq %rax,%rbp + mulq 0(%rsi) + addq %rax,%r13 + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%r11 + + mulq 8(%rsi) + addq %rax,%r14 + movq %rbp,%rax + adcq $0,%rdx + addq %r11,%r14 + adcq $0,%rdx + movq %rdx,%r11 + + mulq 16(%rsi) + addq %rax,%r15 + movq %rbp,%rax + adcq $0,%rdx + addq %r11,%r15 + adcq $0,%rdx + movq %rdx,%r11 + + movq %r13,%r12 + imulq 8(%rsp),%r13 + + mulq 24(%rsi) + addq %rax,%r8 + movq %rbp,%rax + adcq $0,%rdx + addq %r11,%r8 + adcq $0,%rdx + movq %rdx,%r11 + + mulq 32(%rsi) + addq %rax,%r9 + movq %rbp,%rax + adcq $0,%rdx + addq %r11,%r9 + adcq $0,%rdx + movq %rdx,%r11 + + mulq 40(%rsi) + addq %r11,%r10 + adcq $0,%rdx + addq %rax,%r10 + movq %r13,%rax + adcq $0,%rdx + movq %rdx,%r11 + + mulq 0(%rcx) + addq %rax,%r12 + movq %r13,%rax + adcq %rdx,%r12 + + mulq 8(%rcx) + addq %rax,%r14 + movq %r13,%rax + adcq $0,%rdx + addq %r12,%r14 + adcq $0,%rdx + movq %rdx,%r12 + + mulq 16(%rcx) + addq %rax,%r15 + movq %r13,%rax + adcq $0,%rdx + addq %r12,%r15 + adcq $0,%rdx + movq %rdx,%r12 + + mulq 24(%rcx) + addq %r12,%r8 + adcq $0,%rdx + addq %rax,%r8 + movq %r13,%rax + adcq $0,%rdx + movq %rdx,%r12 + + mulq 32(%rcx) + addq %rax,%r9 + movq %r13,%rax + adcq $0,%rdx + addq %r12,%r9 + adcq $0,%rdx + movq %rdx,%r12 + + mulq 40(%rcx) + addq %rax,%r10 + movq %r14,%rax + adcq $0,%rdx + addq %r12,%r10 + adcq %rdx,%r11 + .byte 0xf3,0xc3 +.cfi_endproc +.size __mulq_mont_383_nonred,.-__mulq_mont_383_nonred +.globl sqr_mont_382x +.hidden sqr_mont_382x +.type sqr_mont_382x,@function +.align 32 +sqr_mont_382x: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + +#ifdef __BLST_PORTABLE__ + testl $1,__blst_platform_cap(%rip) + jnz sqr_mont_382x$1 +#endif + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + subq $136,%rsp +.cfi_adjust_cfa_offset 136 + + + movq %rcx,0(%rsp) + movq %rdx,%rcx + movq %rsi,16(%rsp) + movq %rdi,24(%rsp) + + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + + movq %r8,%r14 + addq 48(%rsi),%r8 + movq %r9,%r15 + adcq 56(%rsi),%r9 + movq %r10,%rax + adcq 64(%rsi),%r10 + movq %r11,%rdx + adcq 72(%rsi),%r11 + movq %r12,%rbx + adcq 80(%rsi),%r12 + movq %r13,%rbp + adcq 88(%rsi),%r13 + + subq 48(%rsi),%r14 + sbbq 56(%rsi),%r15 + sbbq 64(%rsi),%rax + sbbq 72(%rsi),%rdx + sbbq 80(%rsi),%rbx + sbbq 88(%rsi),%rbp + sbbq %rdi,%rdi + + movq %r8,32+0(%rsp) + movq %r9,32+8(%rsp) + movq %r10,32+16(%rsp) + movq %r11,32+24(%rsp) + movq %r12,32+32(%rsp) + movq %r13,32+40(%rsp) + + movq %r14,32+48(%rsp) + movq %r15,32+56(%rsp) + movq %rax,32+64(%rsp) + movq %rdx,32+72(%rsp) + movq %rbx,32+80(%rsp) + movq %rbp,32+88(%rsp) + movq %rdi,32+96(%rsp) + + + + leaq 48(%rsi),%rbx + + movq 48(%rsi),%rax + movq 0(%rsi),%r14 + movq 8(%rsi),%r15 + movq 16(%rsi),%r12 + movq 24(%rsi),%r13 + + movq 24(%rsp),%rdi + call __mulq_mont_383_nonred + addq %r14,%r14 + adcq %r15,%r15 + adcq %r8,%r8 + adcq %r9,%r9 + adcq %r10,%r10 + adcq %r11,%r11 + + movq %r14,48(%rdi) + movq %r15,56(%rdi) + movq %r8,64(%rdi) + movq %r9,72(%rdi) + movq %r10,80(%rdi) + movq %r11,88(%rdi) + + leaq 32(%rsp),%rsi + leaq 32+48(%rsp),%rbx + + movq 32+48(%rsp),%rax + movq 32+0(%rsp),%r14 + movq 32+8(%rsp),%r15 + movq 32+16(%rsp),%r12 + movq 32+24(%rsp),%r13 + + call __mulq_mont_383_nonred + movq 32+96(%rsp),%rsi + movq 32+0(%rsp),%r12 + movq 32+8(%rsp),%r13 + andq %rsi,%r12 + movq 32+16(%rsp),%rax + andq %rsi,%r13 + movq 32+24(%rsp),%rbx + andq %rsi,%rax + movq 32+32(%rsp),%rbp + andq %rsi,%rbx + andq %rsi,%rbp + andq 32+40(%rsp),%rsi + + subq %r12,%r14 + movq 0(%rcx),%r12 + sbbq %r13,%r15 + movq 8(%rcx),%r13 + sbbq %rax,%r8 + movq 16(%rcx),%rax + sbbq %rbx,%r9 + movq 24(%rcx),%rbx + sbbq %rbp,%r10 + movq 32(%rcx),%rbp + sbbq %rsi,%r11 + sbbq %rsi,%rsi + + andq %rsi,%r12 + andq %rsi,%r13 + andq %rsi,%rax + andq %rsi,%rbx + andq %rsi,%rbp + andq 40(%rcx),%rsi + + addq %r12,%r14 + adcq %r13,%r15 + adcq %rax,%r8 + adcq %rbx,%r9 + adcq %rbp,%r10 + adcq %rsi,%r11 + + movq %r14,0(%rdi) + movq %r15,8(%rdi) + movq %r8,16(%rdi) + movq %r9,24(%rdi) + movq %r10,32(%rdi) + movq %r11,40(%rdi) + leaq 136(%rsp),%r8 + movq 0(%r8),%r15 +.cfi_restore %r15 + movq 8(%r8),%r14 +.cfi_restore %r14 + movq 16(%r8),%r13 +.cfi_restore %r13 + movq 24(%r8),%r12 +.cfi_restore %r12 + movq 32(%r8),%rbx +.cfi_restore %rbx + movq 40(%r8),%rbp +.cfi_restore %rbp + leaq 48(%r8),%rsp +.cfi_adjust_cfa_offset -136-8*6 + + .byte 0xf3,0xc3 +.cfi_endproc +.size sqr_mont_382x,.-sqr_mont_382x + +.section .note.GNU-stack,"",@progbits +.section .note.gnu.property,"a",@note + .long 4,2f-1f,5 + .byte 0x47,0x4E,0x55,0 +1: .long 0xc0000002,4,3 +.align 8 +2: diff --git a/crypto/blst_src/build/elf/mulx_mont_256-x86_64.s b/crypto/blst_src/build/elf/mulx_mont_256-x86_64.s new file mode 100644 index 00000000000..42e89134cff --- /dev/null +++ b/crypto/blst_src/build/elf/mulx_mont_256-x86_64.s @@ -0,0 +1,631 @@ +.text + +.globl mulx_mont_sparse_256 +.hidden mulx_mont_sparse_256 +.type mulx_mont_sparse_256,@function +.align 32 +mulx_mont_sparse_256: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + +mul_mont_sparse_256$1: + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + subq $8,%rsp +.cfi_adjust_cfa_offset 8 + + + movq %rdx,%rbx + movq 0(%rdx),%rdx + movq 0(%rsi),%r14 + movq 8(%rsi),%r15 + movq 16(%rsi),%rbp + movq 24(%rsi),%r9 + leaq -128(%rsi),%rsi + leaq -128(%rcx),%rcx + + mulxq %r14,%rax,%r11 + call __mulx_mont_sparse_256 + + movq 8(%rsp),%r15 +.cfi_restore %r15 + movq 16(%rsp),%r14 +.cfi_restore %r14 + movq 24(%rsp),%r13 +.cfi_restore %r13 + movq 32(%rsp),%r12 +.cfi_restore %r12 + movq 40(%rsp),%rbx +.cfi_restore %rbx + movq 48(%rsp),%rbp +.cfi_restore %rbp + leaq 56(%rsp),%rsp +.cfi_adjust_cfa_offset -56 + + .byte 0xf3,0xc3 +.cfi_endproc +.size mulx_mont_sparse_256,.-mulx_mont_sparse_256 + +.globl sqrx_mont_sparse_256 +.hidden sqrx_mont_sparse_256 +.type sqrx_mont_sparse_256,@function +.align 32 +sqrx_mont_sparse_256: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + +sqr_mont_sparse_256$1: + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + subq $8,%rsp +.cfi_adjust_cfa_offset 8 + + + movq %rsi,%rbx + movq %rcx,%r8 + movq %rdx,%rcx + movq 0(%rsi),%rdx + movq 8(%rsi),%r15 + movq 16(%rsi),%rbp + movq 24(%rsi),%r9 + leaq -128(%rbx),%rsi + leaq -128(%rcx),%rcx + + mulxq %rdx,%rax,%r11 + call __mulx_mont_sparse_256 + + movq 8(%rsp),%r15 +.cfi_restore %r15 + movq 16(%rsp),%r14 +.cfi_restore %r14 + movq 24(%rsp),%r13 +.cfi_restore %r13 + movq 32(%rsp),%r12 +.cfi_restore %r12 + movq 40(%rsp),%rbx +.cfi_restore %rbx + movq 48(%rsp),%rbp +.cfi_restore %rbp + leaq 56(%rsp),%rsp +.cfi_adjust_cfa_offset -56 + + .byte 0xf3,0xc3 +.cfi_endproc +.size sqrx_mont_sparse_256,.-sqrx_mont_sparse_256 +.type __mulx_mont_sparse_256,@function +.align 32 +__mulx_mont_sparse_256: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + mulxq %r15,%r15,%r12 + mulxq %rbp,%rbp,%r13 + addq %r15,%r11 + mulxq %r9,%r9,%r14 + movq 8(%rbx),%rdx + adcq %rbp,%r12 + adcq %r9,%r13 + adcq $0,%r14 + + movq %rax,%r10 + imulq %r8,%rax + + + xorq %r15,%r15 + mulxq 0+128(%rsi),%rbp,%r9 + adoxq %rbp,%r11 + adcxq %r9,%r12 + + mulxq 8+128(%rsi),%rbp,%r9 + adoxq %rbp,%r12 + adcxq %r9,%r13 + + mulxq 16+128(%rsi),%rbp,%r9 + adoxq %rbp,%r13 + adcxq %r9,%r14 + + mulxq 24+128(%rsi),%rbp,%r9 + movq %rax,%rdx + adoxq %rbp,%r14 + adcxq %r15,%r9 + adoxq %r9,%r15 + + + mulxq 0+128(%rcx),%rbp,%rax + adcxq %rbp,%r10 + adoxq %r11,%rax + + mulxq 8+128(%rcx),%rbp,%r9 + adcxq %rbp,%rax + adoxq %r9,%r12 + + mulxq 16+128(%rcx),%rbp,%r9 + adcxq %rbp,%r12 + adoxq %r9,%r13 + + mulxq 24+128(%rcx),%rbp,%r9 + movq 16(%rbx),%rdx + adcxq %rbp,%r13 + adoxq %r9,%r14 + adcxq %r10,%r14 + adoxq %r10,%r15 + adcxq %r10,%r15 + adoxq %r10,%r10 + adcq $0,%r10 + movq %rax,%r11 + imulq %r8,%rax + + + xorq %rbp,%rbp + mulxq 0+128(%rsi),%rbp,%r9 + adoxq %rbp,%r12 + adcxq %r9,%r13 + + mulxq 8+128(%rsi),%rbp,%r9 + adoxq %rbp,%r13 + adcxq %r9,%r14 + + mulxq 16+128(%rsi),%rbp,%r9 + adoxq %rbp,%r14 + adcxq %r9,%r15 + + mulxq 24+128(%rsi),%rbp,%r9 + movq %rax,%rdx + adoxq %rbp,%r15 + adcxq %r10,%r9 + adoxq %r9,%r10 + + + mulxq 0+128(%rcx),%rbp,%rax + adcxq %rbp,%r11 + adoxq %r12,%rax + + mulxq 8+128(%rcx),%rbp,%r9 + adcxq %rbp,%rax + adoxq %r9,%r13 + + mulxq 16+128(%rcx),%rbp,%r9 + adcxq %rbp,%r13 + adoxq %r9,%r14 + + mulxq 24+128(%rcx),%rbp,%r9 + movq 24(%rbx),%rdx + adcxq %rbp,%r14 + adoxq %r9,%r15 + adcxq %r11,%r15 + adoxq %r11,%r10 + adcxq %r11,%r10 + adoxq %r11,%r11 + adcq $0,%r11 + movq %rax,%r12 + imulq %r8,%rax + + + xorq %rbp,%rbp + mulxq 0+128(%rsi),%rbp,%r9 + adoxq %rbp,%r13 + adcxq %r9,%r14 + + mulxq 8+128(%rsi),%rbp,%r9 + adoxq %rbp,%r14 + adcxq %r9,%r15 + + mulxq 16+128(%rsi),%rbp,%r9 + adoxq %rbp,%r15 + adcxq %r9,%r10 + + mulxq 24+128(%rsi),%rbp,%r9 + movq %rax,%rdx + adoxq %rbp,%r10 + adcxq %r11,%r9 + adoxq %r9,%r11 + + + mulxq 0+128(%rcx),%rbp,%rax + adcxq %rbp,%r12 + adoxq %r13,%rax + + mulxq 8+128(%rcx),%rbp,%r9 + adcxq %rbp,%rax + adoxq %r9,%r14 + + mulxq 16+128(%rcx),%rbp,%r9 + adcxq %rbp,%r14 + adoxq %r9,%r15 + + mulxq 24+128(%rcx),%rbp,%r9 + movq %rax,%rdx + adcxq %rbp,%r15 + adoxq %r9,%r10 + adcxq %r12,%r10 + adoxq %r12,%r11 + adcxq %r12,%r11 + adoxq %r12,%r12 + adcq $0,%r12 + imulq %r8,%rdx + + + xorq %rbp,%rbp + mulxq 0+128(%rcx),%r13,%r9 + adcxq %rax,%r13 + adoxq %r9,%r14 + + mulxq 8+128(%rcx),%rbp,%r9 + adcxq %rbp,%r14 + adoxq %r9,%r15 + + mulxq 16+128(%rcx),%rbp,%r9 + adcxq %rbp,%r15 + adoxq %r9,%r10 + + mulxq 24+128(%rcx),%rbp,%r9 + movq %r14,%rdx + leaq 128(%rcx),%rcx + adcxq %rbp,%r10 + adoxq %r9,%r11 + movq %r15,%rax + adcxq %r13,%r11 + adoxq %r13,%r12 + adcq $0,%r12 + + + + + movq %r10,%rbp + subq 0(%rcx),%r14 + sbbq 8(%rcx),%r15 + sbbq 16(%rcx),%r10 + movq %r11,%r9 + sbbq 24(%rcx),%r11 + sbbq $0,%r12 + + cmovcq %rdx,%r14 + cmovcq %rax,%r15 + cmovcq %rbp,%r10 + movq %r14,0(%rdi) + cmovcq %r9,%r11 + movq %r15,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + + .byte 0xf3,0xc3 +.cfi_endproc +.size __mulx_mont_sparse_256,.-__mulx_mont_sparse_256 +.globl fromx_mont_256 +.hidden fromx_mont_256 +.type fromx_mont_256,@function +.align 32 +fromx_mont_256: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + +from_mont_256$1: + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + subq $8,%rsp +.cfi_adjust_cfa_offset 8 + + + movq %rdx,%rbx + call __mulx_by_1_mont_256 + + + + + + movq %r15,%rdx + movq %r10,%r12 + movq %r11,%r13 + + subq 0(%rbx),%r14 + sbbq 8(%rbx),%r15 + sbbq 16(%rbx),%r10 + sbbq 24(%rbx),%r11 + + cmovncq %r14,%rax + cmovncq %r15,%rdx + cmovncq %r10,%r12 + movq %rax,0(%rdi) + cmovncq %r11,%r13 + movq %rdx,8(%rdi) + movq %r12,16(%rdi) + movq %r13,24(%rdi) + + movq 8(%rsp),%r15 +.cfi_restore %r15 + movq 16(%rsp),%r14 +.cfi_restore %r14 + movq 24(%rsp),%r13 +.cfi_restore %r13 + movq 32(%rsp),%r12 +.cfi_restore %r12 + movq 40(%rsp),%rbx +.cfi_restore %rbx + movq 48(%rsp),%rbp +.cfi_restore %rbp + leaq 56(%rsp),%rsp +.cfi_adjust_cfa_offset -56 + + .byte 0xf3,0xc3 +.cfi_endproc +.size fromx_mont_256,.-fromx_mont_256 + +.globl redcx_mont_256 +.hidden redcx_mont_256 +.type redcx_mont_256,@function +.align 32 +redcx_mont_256: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + +redc_mont_256$1: + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + subq $8,%rsp +.cfi_adjust_cfa_offset 8 + + + movq %rdx,%rbx + call __mulx_by_1_mont_256 + + addq 32(%rsi),%r14 + adcq 40(%rsi),%r15 + movq %r14,%rax + adcq 48(%rsi),%r10 + movq %r15,%rdx + adcq 56(%rsi),%r11 + sbbq %rsi,%rsi + + + + + movq %r10,%r12 + subq 0(%rbx),%r14 + sbbq 8(%rbx),%r15 + sbbq 16(%rbx),%r10 + movq %r11,%r13 + sbbq 24(%rbx),%r11 + sbbq $0,%rsi + + cmovncq %r14,%rax + cmovncq %r15,%rdx + cmovncq %r10,%r12 + movq %rax,0(%rdi) + cmovncq %r11,%r13 + movq %rdx,8(%rdi) + movq %r12,16(%rdi) + movq %r13,24(%rdi) + + movq 8(%rsp),%r15 +.cfi_restore %r15 + movq 16(%rsp),%r14 +.cfi_restore %r14 + movq 24(%rsp),%r13 +.cfi_restore %r13 + movq 32(%rsp),%r12 +.cfi_restore %r12 + movq 40(%rsp),%rbx +.cfi_restore %rbx + movq 48(%rsp),%rbp +.cfi_restore %rbp + leaq 56(%rsp),%rsp +.cfi_adjust_cfa_offset -56 + + .byte 0xf3,0xc3 +.cfi_endproc +.size redcx_mont_256,.-redcx_mont_256 +.type __mulx_by_1_mont_256,@function +.align 32 +__mulx_by_1_mont_256: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + movq 0(%rsi),%rax + movq 8(%rsi),%r11 + movq 16(%rsi),%r12 + movq 24(%rsi),%r13 + + movq %rax,%r14 + imulq %rcx,%rax + movq %rax,%r10 + + mulq 0(%rbx) + addq %rax,%r14 + movq %r10,%rax + adcq %rdx,%r14 + + mulq 8(%rbx) + addq %rax,%r11 + movq %r10,%rax + adcq $0,%rdx + addq %r14,%r11 + adcq $0,%rdx + movq %rdx,%r14 + + mulq 16(%rbx) + movq %r11,%r15 + imulq %rcx,%r11 + addq %rax,%r12 + movq %r10,%rax + adcq $0,%rdx + addq %r14,%r12 + adcq $0,%rdx + movq %rdx,%r14 + + mulq 24(%rbx) + addq %rax,%r13 + movq %r11,%rax + adcq $0,%rdx + addq %r14,%r13 + adcq $0,%rdx + movq %rdx,%r14 + + mulq 0(%rbx) + addq %rax,%r15 + movq %r11,%rax + adcq %rdx,%r15 + + mulq 8(%rbx) + addq %rax,%r12 + movq %r11,%rax + adcq $0,%rdx + addq %r15,%r12 + adcq $0,%rdx + movq %rdx,%r15 + + mulq 16(%rbx) + movq %r12,%r10 + imulq %rcx,%r12 + addq %rax,%r13 + movq %r11,%rax + adcq $0,%rdx + addq %r15,%r13 + adcq $0,%rdx + movq %rdx,%r15 + + mulq 24(%rbx) + addq %rax,%r14 + movq %r12,%rax + adcq $0,%rdx + addq %r15,%r14 + adcq $0,%rdx + movq %rdx,%r15 + + mulq 0(%rbx) + addq %rax,%r10 + movq %r12,%rax + adcq %rdx,%r10 + + mulq 8(%rbx) + addq %rax,%r13 + movq %r12,%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %rdx,%r10 + + mulq 16(%rbx) + movq %r13,%r11 + imulq %rcx,%r13 + addq %rax,%r14 + movq %r12,%rax + adcq $0,%rdx + addq %r10,%r14 + adcq $0,%rdx + movq %rdx,%r10 + + mulq 24(%rbx) + addq %rax,%r15 + movq %r13,%rax + adcq $0,%rdx + addq %r10,%r15 + adcq $0,%rdx + movq %rdx,%r10 + + mulq 0(%rbx) + addq %rax,%r11 + movq %r13,%rax + adcq %rdx,%r11 + + mulq 8(%rbx) + addq %rax,%r14 + movq %r13,%rax + adcq $0,%rdx + addq %r11,%r14 + adcq $0,%rdx + movq %rdx,%r11 + + mulq 16(%rbx) + addq %rax,%r15 + movq %r13,%rax + adcq $0,%rdx + addq %r11,%r15 + adcq $0,%rdx + movq %rdx,%r11 + + mulq 24(%rbx) + addq %rax,%r10 + movq %r14,%rax + adcq $0,%rdx + addq %r11,%r10 + adcq $0,%rdx + movq %rdx,%r11 + .byte 0xf3,0xc3 +.cfi_endproc +.size __mulx_by_1_mont_256,.-__mulx_by_1_mont_256 + +.section .note.GNU-stack,"",@progbits +.section .note.gnu.property,"a",@note + .long 4,2f-1f,5 + .byte 0x47,0x4E,0x55,0 +1: .long 0xc0000002,4,3 +.align 8 +2: diff --git a/crypto/blst_src/build/elf/mulx_mont_384-x86_64.s b/crypto/blst_src/build/elf/mulx_mont_384-x86_64.s new file mode 100644 index 00000000000..5c67d918d22 --- /dev/null +++ b/crypto/blst_src/build/elf/mulx_mont_384-x86_64.s @@ -0,0 +1,2983 @@ +.text + + + + + + + +.type __subx_mod_384x384,@function +.align 32 +__subx_mod_384x384: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + movq 48(%rsi),%r14 + + subq 0(%rdx),%r8 + movq 56(%rsi),%r15 + sbbq 8(%rdx),%r9 + movq 64(%rsi),%rax + sbbq 16(%rdx),%r10 + movq 72(%rsi),%rbx + sbbq 24(%rdx),%r11 + movq 80(%rsi),%rbp + sbbq 32(%rdx),%r12 + movq 88(%rsi),%rsi + sbbq 40(%rdx),%r13 + movq %r8,0(%rdi) + sbbq 48(%rdx),%r14 + movq 0(%rcx),%r8 + movq %r9,8(%rdi) + sbbq 56(%rdx),%r15 + movq 8(%rcx),%r9 + movq %r10,16(%rdi) + sbbq 64(%rdx),%rax + movq 16(%rcx),%r10 + movq %r11,24(%rdi) + sbbq 72(%rdx),%rbx + movq 24(%rcx),%r11 + movq %r12,32(%rdi) + sbbq 80(%rdx),%rbp + movq 32(%rcx),%r12 + movq %r13,40(%rdi) + sbbq 88(%rdx),%rsi + movq 40(%rcx),%r13 + sbbq %rdx,%rdx + + andq %rdx,%r8 + andq %rdx,%r9 + andq %rdx,%r10 + andq %rdx,%r11 + andq %rdx,%r12 + andq %rdx,%r13 + + addq %r8,%r14 + adcq %r9,%r15 + movq %r14,48(%rdi) + adcq %r10,%rax + movq %r15,56(%rdi) + adcq %r11,%rbx + movq %rax,64(%rdi) + adcq %r12,%rbp + movq %rbx,72(%rdi) + adcq %r13,%rsi + movq %rbp,80(%rdi) + movq %rsi,88(%rdi) + + .byte 0xf3,0xc3 +.cfi_endproc +.size __subx_mod_384x384,.-__subx_mod_384x384 + +.type __addx_mod_384,@function +.align 32 +__addx_mod_384: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + + addq 0(%rdx),%r8 + adcq 8(%rdx),%r9 + adcq 16(%rdx),%r10 + movq %r8,%r14 + adcq 24(%rdx),%r11 + movq %r9,%r15 + adcq 32(%rdx),%r12 + movq %r10,%rax + adcq 40(%rdx),%r13 + movq %r11,%rbx + sbbq %rdx,%rdx + + subq 0(%rcx),%r8 + sbbq 8(%rcx),%r9 + movq %r12,%rbp + sbbq 16(%rcx),%r10 + sbbq 24(%rcx),%r11 + sbbq 32(%rcx),%r12 + movq %r13,%rsi + sbbq 40(%rcx),%r13 + sbbq $0,%rdx + + cmovcq %r14,%r8 + cmovcq %r15,%r9 + cmovcq %rax,%r10 + movq %r8,0(%rdi) + cmovcq %rbx,%r11 + movq %r9,8(%rdi) + cmovcq %rbp,%r12 + movq %r10,16(%rdi) + cmovcq %rsi,%r13 + movq %r11,24(%rdi) + movq %r12,32(%rdi) + movq %r13,40(%rdi) + + .byte 0xf3,0xc3 +.cfi_endproc +.size __addx_mod_384,.-__addx_mod_384 + +.type __subx_mod_384,@function +.align 32 +__subx_mod_384: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + +__subx_mod_384_a_is_loaded: + subq 0(%rdx),%r8 + movq 0(%rcx),%r14 + sbbq 8(%rdx),%r9 + movq 8(%rcx),%r15 + sbbq 16(%rdx),%r10 + movq 16(%rcx),%rax + sbbq 24(%rdx),%r11 + movq 24(%rcx),%rbx + sbbq 32(%rdx),%r12 + movq 32(%rcx),%rbp + sbbq 40(%rdx),%r13 + movq 40(%rcx),%rsi + sbbq %rdx,%rdx + + andq %rdx,%r14 + andq %rdx,%r15 + andq %rdx,%rax + andq %rdx,%rbx + andq %rdx,%rbp + andq %rdx,%rsi + + addq %r14,%r8 + adcq %r15,%r9 + movq %r8,0(%rdi) + adcq %rax,%r10 + movq %r9,8(%rdi) + adcq %rbx,%r11 + movq %r10,16(%rdi) + adcq %rbp,%r12 + movq %r11,24(%rdi) + adcq %rsi,%r13 + movq %r12,32(%rdi) + movq %r13,40(%rdi) + + .byte 0xf3,0xc3 +.cfi_endproc +.size __subx_mod_384,.-__subx_mod_384 +.globl mulx_mont_384x +.hidden mulx_mont_384x +.type mulx_mont_384x,@function +.align 32 +mulx_mont_384x: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + +mul_mont_384x$1: + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + subq $328,%rsp +.cfi_adjust_cfa_offset 328 + + + movq %rdx,%rbx + movq %rdi,32(%rsp) + movq %rsi,24(%rsp) + movq %rdx,16(%rsp) + movq %rcx,8(%rsp) + movq %r8,0(%rsp) + + + + + leaq 40(%rsp),%rdi + call __mulx_384 + + + leaq 48(%rbx),%rbx + leaq 128+48(%rsi),%rsi + leaq 96(%rdi),%rdi + call __mulx_384 + + + movq 8(%rsp),%rcx + leaq (%rbx),%rsi + leaq -48(%rbx),%rdx + leaq 40+192+48(%rsp),%rdi + call __addx_mod_384 + + movq 24(%rsp),%rsi + leaq 48(%rsi),%rdx + leaq -48(%rdi),%rdi + call __addx_mod_384 + + leaq (%rdi),%rbx + leaq 48(%rdi),%rsi + call __mulx_384 + + + leaq (%rdi),%rsi + leaq 40(%rsp),%rdx + movq 8(%rsp),%rcx + call __subx_mod_384x384 + + leaq (%rdi),%rsi + leaq -96(%rdi),%rdx + call __subx_mod_384x384 + + + leaq 40(%rsp),%rsi + leaq 40+96(%rsp),%rdx + leaq 40(%rsp),%rdi + call __subx_mod_384x384 + + leaq (%rcx),%rbx + + + leaq 40(%rsp),%rsi + movq 0(%rsp),%rcx + movq 32(%rsp),%rdi + call __mulx_by_1_mont_384 + call __redx_tail_mont_384 + + + leaq 40+192(%rsp),%rsi + movq 0(%rsp),%rcx + leaq 48(%rdi),%rdi + call __mulx_by_1_mont_384 + call __redx_tail_mont_384 + + leaq 328(%rsp),%r8 + movq 0(%r8),%r15 +.cfi_restore %r15 + movq 8(%r8),%r14 +.cfi_restore %r14 + movq 16(%r8),%r13 +.cfi_restore %r13 + movq 24(%r8),%r12 +.cfi_restore %r12 + movq 32(%r8),%rbx +.cfi_restore %rbx + movq 40(%r8),%rbp +.cfi_restore %rbp + leaq 48(%r8),%rsp +.cfi_adjust_cfa_offset -328-8*6 + + .byte 0xf3,0xc3 +.cfi_endproc +.size mulx_mont_384x,.-mulx_mont_384x +.globl sqrx_mont_384x +.hidden sqrx_mont_384x +.type sqrx_mont_384x,@function +.align 32 +sqrx_mont_384x: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + +sqr_mont_384x$1: + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + subq $136,%rsp +.cfi_adjust_cfa_offset 136 + + + movq %rcx,0(%rsp) + movq %rdx,%rcx + + movq %rdi,16(%rsp) + movq %rsi,24(%rsp) + + + leaq 48(%rsi),%rdx + leaq 32(%rsp),%rdi + call __addx_mod_384 + + + movq 24(%rsp),%rsi + leaq 48(%rsi),%rdx + leaq 32+48(%rsp),%rdi + call __subx_mod_384 + + + movq 24(%rsp),%rsi + leaq 48(%rsi),%rbx + + movq 48(%rsi),%rdx + movq 0(%rsi),%r14 + movq 8(%rsi),%r15 + movq 16(%rsi),%rax + movq 24(%rsi),%r12 + movq 32(%rsi),%rdi + movq 40(%rsi),%rbp + leaq -128(%rsi),%rsi + leaq -128(%rcx),%rcx + + mulxq %r14,%r8,%r9 + call __mulx_mont_384 + addq %rdx,%rdx + adcq %r15,%r15 + adcq %rax,%rax + movq %rdx,%r8 + adcq %r12,%r12 + movq %r15,%r9 + adcq %rdi,%rdi + movq %rax,%r10 + adcq %rbp,%rbp + movq %r12,%r11 + sbbq %rsi,%rsi + + subq 0(%rcx),%rdx + sbbq 8(%rcx),%r15 + movq %rdi,%r13 + sbbq 16(%rcx),%rax + sbbq 24(%rcx),%r12 + sbbq 32(%rcx),%rdi + movq %rbp,%r14 + sbbq 40(%rcx),%rbp + sbbq $0,%rsi + + cmovcq %r8,%rdx + cmovcq %r9,%r15 + cmovcq %r10,%rax + movq %rdx,48(%rbx) + cmovcq %r11,%r12 + movq %r15,56(%rbx) + cmovcq %r13,%rdi + movq %rax,64(%rbx) + cmovcq %r14,%rbp + movq %r12,72(%rbx) + movq %rdi,80(%rbx) + movq %rbp,88(%rbx) + + leaq 32(%rsp),%rsi + leaq 32+48(%rsp),%rbx + + movq 32+48(%rsp),%rdx + movq 32+0(%rsp),%r14 + movq 32+8(%rsp),%r15 + movq 32+16(%rsp),%rax + movq 32+24(%rsp),%r12 + movq 32+32(%rsp),%rdi + movq 32+40(%rsp),%rbp + leaq -128(%rsi),%rsi + leaq -128(%rcx),%rcx + + mulxq %r14,%r8,%r9 + call __mulx_mont_384 + + leaq 136(%rsp),%r8 + movq 0(%r8),%r15 +.cfi_restore %r15 + movq 8(%r8),%r14 +.cfi_restore %r14 + movq 16(%r8),%r13 +.cfi_restore %r13 + movq 24(%r8),%r12 +.cfi_restore %r12 + movq 32(%r8),%rbx +.cfi_restore %rbx + movq 40(%r8),%rbp +.cfi_restore %rbp + leaq 48(%r8),%rsp +.cfi_adjust_cfa_offset -136-8*6 + + .byte 0xf3,0xc3 +.cfi_endproc +.size sqrx_mont_384x,.-sqrx_mont_384x + +.globl mulx_382x +.hidden mulx_382x +.type mulx_382x,@function +.align 32 +mulx_382x: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + +mul_382x$1: + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + subq $136,%rsp +.cfi_adjust_cfa_offset 136 + + + leaq 96(%rdi),%rdi + movq %rsi,0(%rsp) + movq %rdx,8(%rsp) + movq %rdi,16(%rsp) + movq %rcx,24(%rsp) + + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + + addq 48(%rsi),%r8 + adcq 56(%rsi),%r9 + adcq 64(%rsi),%r10 + adcq 72(%rsi),%r11 + adcq 80(%rsi),%r12 + adcq 88(%rsi),%r13 + + movq %r8,32+0(%rsp) + movq %r9,32+8(%rsp) + movq %r10,32+16(%rsp) + movq %r11,32+24(%rsp) + movq %r12,32+32(%rsp) + movq %r13,32+40(%rsp) + + + movq 0(%rdx),%r8 + movq 8(%rdx),%r9 + movq 16(%rdx),%r10 + movq 24(%rdx),%r11 + movq 32(%rdx),%r12 + movq 40(%rdx),%r13 + + addq 48(%rdx),%r8 + adcq 56(%rdx),%r9 + adcq 64(%rdx),%r10 + adcq 72(%rdx),%r11 + adcq 80(%rdx),%r12 + adcq 88(%rdx),%r13 + + movq %r8,32+48(%rsp) + movq %r9,32+56(%rsp) + movq %r10,32+64(%rsp) + movq %r11,32+72(%rsp) + movq %r12,32+80(%rsp) + movq %r13,32+88(%rsp) + + + leaq 32+0(%rsp),%rsi + leaq 32+48(%rsp),%rbx + call __mulx_384 + + + movq 0(%rsp),%rsi + movq 8(%rsp),%rbx + leaq -96(%rdi),%rdi + call __mulx_384 + + + leaq 48+128(%rsi),%rsi + leaq 48(%rbx),%rbx + leaq 32(%rsp),%rdi + call __mulx_384 + + + movq 16(%rsp),%rsi + leaq 32(%rsp),%rdx + movq 24(%rsp),%rcx + movq %rsi,%rdi + call __subx_mod_384x384 + + + leaq 0(%rdi),%rsi + leaq -96(%rdi),%rdx + call __subx_mod_384x384 + + + leaq -96(%rdi),%rsi + leaq 32(%rsp),%rdx + leaq -96(%rdi),%rdi + call __subx_mod_384x384 + + leaq 136(%rsp),%r8 + movq 0(%r8),%r15 +.cfi_restore %r15 + movq 8(%r8),%r14 +.cfi_restore %r14 + movq 16(%r8),%r13 +.cfi_restore %r13 + movq 24(%r8),%r12 +.cfi_restore %r12 + movq 32(%r8),%rbx +.cfi_restore %rbx + movq 40(%r8),%rbp +.cfi_restore %rbp + leaq 48(%r8),%rsp +.cfi_adjust_cfa_offset -136-8*6 + + .byte 0xf3,0xc3 +.cfi_endproc +.size mulx_382x,.-mulx_382x +.globl sqrx_382x +.hidden sqrx_382x +.type sqrx_382x,@function +.align 32 +sqrx_382x: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + +sqr_382x$1: + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + pushq %rsi +.cfi_adjust_cfa_offset 8 + + + movq %rdx,%rcx + + + movq 0(%rsi),%r14 + movq 8(%rsi),%r15 + movq 16(%rsi),%rax + movq 24(%rsi),%rbx + movq 32(%rsi),%rbp + movq 40(%rsi),%rdx + + movq %r14,%r8 + addq 48(%rsi),%r14 + movq %r15,%r9 + adcq 56(%rsi),%r15 + movq %rax,%r10 + adcq 64(%rsi),%rax + movq %rbx,%r11 + adcq 72(%rsi),%rbx + movq %rbp,%r12 + adcq 80(%rsi),%rbp + movq %rdx,%r13 + adcq 88(%rsi),%rdx + + movq %r14,0(%rdi) + movq %r15,8(%rdi) + movq %rax,16(%rdi) + movq %rbx,24(%rdi) + movq %rbp,32(%rdi) + movq %rdx,40(%rdi) + + + leaq 48(%rsi),%rdx + leaq 48(%rdi),%rdi + call __subx_mod_384_a_is_loaded + + + leaq (%rdi),%rsi + leaq -48(%rdi),%rbx + leaq -48(%rdi),%rdi + call __mulx_384 + + + movq (%rsp),%rsi + leaq 48(%rsi),%rbx + leaq 96(%rdi),%rdi + call __mulx_384 + + movq 0(%rdi),%r8 + movq 8(%rdi),%r9 + movq 16(%rdi),%r10 + movq 24(%rdi),%r11 + movq 32(%rdi),%r12 + movq 40(%rdi),%r13 + movq 48(%rdi),%r14 + movq 56(%rdi),%r15 + movq 64(%rdi),%rax + movq 72(%rdi),%rbx + movq 80(%rdi),%rbp + addq %r8,%r8 + movq 88(%rdi),%rdx + adcq %r9,%r9 + movq %r8,0(%rdi) + adcq %r10,%r10 + movq %r9,8(%rdi) + adcq %r11,%r11 + movq %r10,16(%rdi) + adcq %r12,%r12 + movq %r11,24(%rdi) + adcq %r13,%r13 + movq %r12,32(%rdi) + adcq %r14,%r14 + movq %r13,40(%rdi) + adcq %r15,%r15 + movq %r14,48(%rdi) + adcq %rax,%rax + movq %r15,56(%rdi) + adcq %rbx,%rbx + movq %rax,64(%rdi) + adcq %rbp,%rbp + movq %rbx,72(%rdi) + adcq %rdx,%rdx + movq %rbp,80(%rdi) + movq %rdx,88(%rdi) + + movq 8(%rsp),%r15 +.cfi_restore %r15 + movq 16(%rsp),%r14 +.cfi_restore %r14 + movq 24(%rsp),%r13 +.cfi_restore %r13 + movq 32(%rsp),%r12 +.cfi_restore %r12 + movq 40(%rsp),%rbx +.cfi_restore %rbx + movq 48(%rsp),%rbp +.cfi_restore %rbp + leaq 56(%rsp),%rsp +.cfi_adjust_cfa_offset -8*7 + + .byte 0xf3,0xc3 +.cfi_endproc +.size sqrx_382x,.-sqrx_382x +.globl mulx_384 +.hidden mulx_384 +.type mulx_384,@function +.align 32 +mulx_384: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + +mul_384$1: + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + + + movq %rdx,%rbx + call __mulx_384 + + movq 0(%rsp),%r15 +.cfi_restore %r15 + movq 8(%rsp),%r14 +.cfi_restore %r14 + movq 16(%rsp),%r13 +.cfi_restore %r13 + movq 24(%rsp),%r12 +.cfi_restore %r12 + movq 32(%rsp),%rbx +.cfi_restore %rbx + movq 40(%rsp),%rbp +.cfi_restore %rbp + leaq 48(%rsp),%rsp +.cfi_adjust_cfa_offset -48 + + .byte 0xf3,0xc3 +.cfi_endproc +.size mulx_384,.-mulx_384 + +.type __mulx_384,@function +.align 32 +__mulx_384: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + movq 0(%rbx),%rdx + movq 0(%rsi),%r14 + movq 8(%rsi),%r15 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + leaq -128(%rsi),%rsi + + mulxq %r14,%r9,%rcx + xorq %rbp,%rbp + + mulxq %r15,%r8,%rax + adcxq %rcx,%r8 + movq %r9,0(%rdi) + + mulxq %r10,%r9,%rcx + adcxq %rax,%r9 + + mulxq %r11,%r10,%rax + adcxq %rcx,%r10 + + mulxq %r12,%r11,%rcx + adcxq %rax,%r11 + + mulxq %r13,%r12,%r13 + movq 8(%rbx),%rdx + adcxq %rcx,%r12 + adcxq %rbp,%r13 + mulxq %r14,%rax,%rcx + adcxq %r8,%rax + adoxq %rcx,%r9 + movq %rax,8(%rdi) + + mulxq %r15,%r8,%rcx + adcxq %r9,%r8 + adoxq %rcx,%r10 + + mulxq 128+16(%rsi),%r9,%rax + adcxq %r10,%r9 + adoxq %rax,%r11 + + mulxq 128+24(%rsi),%r10,%rcx + adcxq %r11,%r10 + adoxq %rcx,%r12 + + mulxq 128+32(%rsi),%r11,%rax + adcxq %r12,%r11 + adoxq %r13,%rax + + mulxq 128+40(%rsi),%r12,%r13 + movq 16(%rbx),%rdx + adcxq %rax,%r12 + adoxq %rbp,%r13 + adcxq %rbp,%r13 + mulxq %r14,%rax,%rcx + adcxq %r8,%rax + adoxq %rcx,%r9 + movq %rax,16(%rdi) + + mulxq %r15,%r8,%rcx + adcxq %r9,%r8 + adoxq %rcx,%r10 + + mulxq 128+16(%rsi),%r9,%rax + adcxq %r10,%r9 + adoxq %rax,%r11 + + mulxq 128+24(%rsi),%r10,%rcx + adcxq %r11,%r10 + adoxq %rcx,%r12 + + mulxq 128+32(%rsi),%r11,%rax + adcxq %r12,%r11 + adoxq %r13,%rax + + mulxq 128+40(%rsi),%r12,%r13 + movq 24(%rbx),%rdx + adcxq %rax,%r12 + adoxq %rbp,%r13 + adcxq %rbp,%r13 + mulxq %r14,%rax,%rcx + adcxq %r8,%rax + adoxq %rcx,%r9 + movq %rax,24(%rdi) + + mulxq %r15,%r8,%rcx + adcxq %r9,%r8 + adoxq %rcx,%r10 + + mulxq 128+16(%rsi),%r9,%rax + adcxq %r10,%r9 + adoxq %rax,%r11 + + mulxq 128+24(%rsi),%r10,%rcx + adcxq %r11,%r10 + adoxq %rcx,%r12 + + mulxq 128+32(%rsi),%r11,%rax + adcxq %r12,%r11 + adoxq %r13,%rax + + mulxq 128+40(%rsi),%r12,%r13 + movq 32(%rbx),%rdx + adcxq %rax,%r12 + adoxq %rbp,%r13 + adcxq %rbp,%r13 + mulxq %r14,%rax,%rcx + adcxq %r8,%rax + adoxq %rcx,%r9 + movq %rax,32(%rdi) + + mulxq %r15,%r8,%rcx + adcxq %r9,%r8 + adoxq %rcx,%r10 + + mulxq 128+16(%rsi),%r9,%rax + adcxq %r10,%r9 + adoxq %rax,%r11 + + mulxq 128+24(%rsi),%r10,%rcx + adcxq %r11,%r10 + adoxq %rcx,%r12 + + mulxq 128+32(%rsi),%r11,%rax + adcxq %r12,%r11 + adoxq %r13,%rax + + mulxq 128+40(%rsi),%r12,%r13 + movq 40(%rbx),%rdx + adcxq %rax,%r12 + adoxq %rbp,%r13 + adcxq %rbp,%r13 + mulxq %r14,%rax,%rcx + adcxq %r8,%rax + adoxq %rcx,%r9 + movq %rax,40(%rdi) + + mulxq %r15,%r8,%rcx + adcxq %r9,%r8 + adoxq %rcx,%r10 + + mulxq 128+16(%rsi),%r9,%rax + adcxq %r10,%r9 + adoxq %rax,%r11 + + mulxq 128+24(%rsi),%r10,%rcx + adcxq %r11,%r10 + adoxq %rcx,%r12 + + mulxq 128+32(%rsi),%r11,%rax + adcxq %r12,%r11 + adoxq %r13,%rax + + mulxq 128+40(%rsi),%r12,%r13 + movq %rax,%rdx + adcxq %rax,%r12 + adoxq %rbp,%r13 + adcxq %rbp,%r13 + movq %r8,48(%rdi) + movq %r9,56(%rdi) + movq %r10,64(%rdi) + movq %r11,72(%rdi) + movq %r12,80(%rdi) + movq %r13,88(%rdi) + + .byte 0xf3,0xc3 +.cfi_endproc +.size __mulx_384,.-__mulx_384 +.globl sqrx_384 +.hidden sqrx_384 +.type sqrx_384,@function +.align 32 +sqrx_384: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + +sqr_384$1: + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + pushq %rdi +.cfi_adjust_cfa_offset 8 + + + call __sqrx_384 + + movq 8(%rsp),%r15 +.cfi_restore %r15 + movq 16(%rsp),%r14 +.cfi_restore %r14 + movq 24(%rsp),%r13 +.cfi_restore %r13 + movq 32(%rsp),%r12 +.cfi_restore %r12 + movq 40(%rsp),%rbx +.cfi_restore %rbx + movq 48(%rsp),%rbp +.cfi_restore %rbp + leaq 56(%rsp),%rsp +.cfi_adjust_cfa_offset -56 + + .byte 0xf3,0xc3 +.cfi_endproc +.size sqrx_384,.-sqrx_384 +.type __sqrx_384,@function +.align 32 +__sqrx_384: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + movq 0(%rsi),%rdx + movq 8(%rsi),%r14 + movq 16(%rsi),%r15 + movq 24(%rsi),%rcx + movq 32(%rsi),%rbx + + + mulxq %r14,%r8,%rdi + movq 40(%rsi),%rbp + mulxq %r15,%r9,%rax + addq %rdi,%r9 + mulxq %rcx,%r10,%rdi + adcq %rax,%r10 + mulxq %rbx,%r11,%rax + adcq %rdi,%r11 + mulxq %rbp,%r12,%r13 + movq %r14,%rdx + adcq %rax,%r12 + adcq $0,%r13 + + + xorq %r14,%r14 + mulxq %r15,%rdi,%rax + adcxq %rdi,%r10 + adoxq %rax,%r11 + + mulxq %rcx,%rdi,%rax + adcxq %rdi,%r11 + adoxq %rax,%r12 + + mulxq %rbx,%rdi,%rax + adcxq %rdi,%r12 + adoxq %rax,%r13 + + mulxq %rbp,%rdi,%rax + movq %r15,%rdx + adcxq %rdi,%r13 + adoxq %r14,%rax + adcxq %rax,%r14 + + + xorq %r15,%r15 + mulxq %rcx,%rdi,%rax + adcxq %rdi,%r12 + adoxq %rax,%r13 + + mulxq %rbx,%rdi,%rax + adcxq %rdi,%r13 + adoxq %rax,%r14 + + mulxq %rbp,%rdi,%rax + movq %rcx,%rdx + adcxq %rdi,%r14 + adoxq %r15,%rax + adcxq %rax,%r15 + + + xorq %rcx,%rcx + mulxq %rbx,%rdi,%rax + adcxq %rdi,%r14 + adoxq %rax,%r15 + + mulxq %rbp,%rdi,%rax + movq %rbx,%rdx + adcxq %rdi,%r15 + adoxq %rcx,%rax + adcxq %rax,%rcx + + + mulxq %rbp,%rdi,%rbx + movq 0(%rsi),%rdx + addq %rdi,%rcx + movq 8(%rsp),%rdi + adcq $0,%rbx + + + xorq %rbp,%rbp + adcxq %r8,%r8 + adcxq %r9,%r9 + adcxq %r10,%r10 + adcxq %r11,%r11 + adcxq %r12,%r12 + + + mulxq %rdx,%rdx,%rax + movq %rdx,0(%rdi) + movq 8(%rsi),%rdx + adoxq %rax,%r8 + movq %r8,8(%rdi) + + mulxq %rdx,%r8,%rax + movq 16(%rsi),%rdx + adoxq %r8,%r9 + adoxq %rax,%r10 + movq %r9,16(%rdi) + movq %r10,24(%rdi) + + mulxq %rdx,%r8,%r9 + movq 24(%rsi),%rdx + adoxq %r8,%r11 + adoxq %r9,%r12 + adcxq %r13,%r13 + adcxq %r14,%r14 + movq %r11,32(%rdi) + movq %r12,40(%rdi) + + mulxq %rdx,%r8,%r9 + movq 32(%rsi),%rdx + adoxq %r8,%r13 + adoxq %r9,%r14 + adcxq %r15,%r15 + adcxq %rcx,%rcx + movq %r13,48(%rdi) + movq %r14,56(%rdi) + + mulxq %rdx,%r8,%r9 + movq 40(%rsi),%rdx + adoxq %r8,%r15 + adoxq %r9,%rcx + adcxq %rbx,%rbx + adcxq %rbp,%rbp + movq %r15,64(%rdi) + movq %rcx,72(%rdi) + + mulxq %rdx,%r8,%r9 + adoxq %r8,%rbx + adoxq %r9,%rbp + + movq %rbx,80(%rdi) + movq %rbp,88(%rdi) + + .byte 0xf3,0xc3 +.cfi_endproc +.size __sqrx_384,.-__sqrx_384 + + + +.globl redcx_mont_384 +.hidden redcx_mont_384 +.type redcx_mont_384,@function +.align 32 +redcx_mont_384: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + +redc_mont_384$1: + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + subq $8,%rsp +.cfi_adjust_cfa_offset 8 + + + movq %rdx,%rbx + call __mulx_by_1_mont_384 + call __redx_tail_mont_384 + + movq 8(%rsp),%r15 +.cfi_restore %r15 + movq 16(%rsp),%r14 +.cfi_restore %r14 + movq 24(%rsp),%r13 +.cfi_restore %r13 + movq 32(%rsp),%r12 +.cfi_restore %r12 + movq 40(%rsp),%rbx +.cfi_restore %rbx + movq 48(%rsp),%rbp +.cfi_restore %rbp + leaq 56(%rsp),%rsp +.cfi_adjust_cfa_offset -56 + + .byte 0xf3,0xc3 +.cfi_endproc +.size redcx_mont_384,.-redcx_mont_384 + + + + +.globl fromx_mont_384 +.hidden fromx_mont_384 +.type fromx_mont_384,@function +.align 32 +fromx_mont_384: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + +from_mont_384$1: + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + subq $8,%rsp +.cfi_adjust_cfa_offset 8 + + + movq %rdx,%rbx + call __mulx_by_1_mont_384 + + + + + movq %r14,%rax + movq %r15,%rcx + movq %r8,%rdx + movq %r9,%rbp + + subq 0(%rbx),%r14 + sbbq 8(%rbx),%r15 + movq %r10,%r13 + sbbq 16(%rbx),%r8 + sbbq 24(%rbx),%r9 + sbbq 32(%rbx),%r10 + movq %r11,%rsi + sbbq 40(%rbx),%r11 + + cmovcq %rax,%r14 + cmovcq %rcx,%r15 + cmovcq %rdx,%r8 + movq %r14,0(%rdi) + cmovcq %rbp,%r9 + movq %r15,8(%rdi) + cmovcq %r13,%r10 + movq %r8,16(%rdi) + cmovcq %rsi,%r11 + movq %r9,24(%rdi) + movq %r10,32(%rdi) + movq %r11,40(%rdi) + + movq 8(%rsp),%r15 +.cfi_restore %r15 + movq 16(%rsp),%r14 +.cfi_restore %r14 + movq 24(%rsp),%r13 +.cfi_restore %r13 + movq 32(%rsp),%r12 +.cfi_restore %r12 + movq 40(%rsp),%rbx +.cfi_restore %rbx + movq 48(%rsp),%rbp +.cfi_restore %rbp + leaq 56(%rsp),%rsp +.cfi_adjust_cfa_offset -56 + + .byte 0xf3,0xc3 +.cfi_endproc +.size fromx_mont_384,.-fromx_mont_384 +.type __mulx_by_1_mont_384,@function +.align 32 +__mulx_by_1_mont_384: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + movq 0(%rsi),%r8 + movq %rcx,%rdx + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + imulq %r8,%rdx + + + xorq %r14,%r14 + mulxq 0(%rbx),%rax,%rbp + adcxq %rax,%r8 + adoxq %rbp,%r9 + + mulxq 8(%rbx),%rax,%rbp + adcxq %rax,%r9 + adoxq %rbp,%r10 + + mulxq 16(%rbx),%rax,%rbp + adcxq %rax,%r10 + adoxq %rbp,%r11 + + mulxq 24(%rbx),%rax,%rbp + adcxq %rax,%r11 + adoxq %rbp,%r12 + + mulxq 32(%rbx),%rax,%rbp + adcxq %rax,%r12 + adoxq %rbp,%r13 + + mulxq 40(%rbx),%rax,%rbp + movq %rcx,%rdx + adcxq %rax,%r13 + adoxq %r14,%rbp + adcxq %rbp,%r14 + imulq %r9,%rdx + + + xorq %r15,%r15 + mulxq 0(%rbx),%rax,%rbp + adcxq %rax,%r9 + adoxq %rbp,%r10 + + mulxq 8(%rbx),%rax,%rbp + adcxq %rax,%r10 + adoxq %rbp,%r11 + + mulxq 16(%rbx),%rax,%rbp + adcxq %rax,%r11 + adoxq %rbp,%r12 + + mulxq 24(%rbx),%rax,%rbp + adcxq %rax,%r12 + adoxq %rbp,%r13 + + mulxq 32(%rbx),%rax,%rbp + adcxq %rax,%r13 + adoxq %rbp,%r14 + + mulxq 40(%rbx),%rax,%rbp + movq %rcx,%rdx + adcxq %rax,%r14 + adoxq %r15,%rbp + adcxq %rbp,%r15 + imulq %r10,%rdx + + + xorq %r8,%r8 + mulxq 0(%rbx),%rax,%rbp + adcxq %rax,%r10 + adoxq %rbp,%r11 + + mulxq 8(%rbx),%rax,%rbp + adcxq %rax,%r11 + adoxq %rbp,%r12 + + mulxq 16(%rbx),%rax,%rbp + adcxq %rax,%r12 + adoxq %rbp,%r13 + + mulxq 24(%rbx),%rax,%rbp + adcxq %rax,%r13 + adoxq %rbp,%r14 + + mulxq 32(%rbx),%rax,%rbp + adcxq %rax,%r14 + adoxq %rbp,%r15 + + mulxq 40(%rbx),%rax,%rbp + movq %rcx,%rdx + adcxq %rax,%r15 + adoxq %r8,%rbp + adcxq %rbp,%r8 + imulq %r11,%rdx + + + xorq %r9,%r9 + mulxq 0(%rbx),%rax,%rbp + adcxq %rax,%r11 + adoxq %rbp,%r12 + + mulxq 8(%rbx),%rax,%rbp + adcxq %rax,%r12 + adoxq %rbp,%r13 + + mulxq 16(%rbx),%rax,%rbp + adcxq %rax,%r13 + adoxq %rbp,%r14 + + mulxq 24(%rbx),%rax,%rbp + adcxq %rax,%r14 + adoxq %rbp,%r15 + + mulxq 32(%rbx),%rax,%rbp + adcxq %rax,%r15 + adoxq %rbp,%r8 + + mulxq 40(%rbx),%rax,%rbp + movq %rcx,%rdx + adcxq %rax,%r8 + adoxq %r9,%rbp + adcxq %rbp,%r9 + imulq %r12,%rdx + + + xorq %r10,%r10 + mulxq 0(%rbx),%rax,%rbp + adcxq %rax,%r12 + adoxq %rbp,%r13 + + mulxq 8(%rbx),%rax,%rbp + adcxq %rax,%r13 + adoxq %rbp,%r14 + + mulxq 16(%rbx),%rax,%rbp + adcxq %rax,%r14 + adoxq %rbp,%r15 + + mulxq 24(%rbx),%rax,%rbp + adcxq %rax,%r15 + adoxq %rbp,%r8 + + mulxq 32(%rbx),%rax,%rbp + adcxq %rax,%r8 + adoxq %rbp,%r9 + + mulxq 40(%rbx),%rax,%rbp + movq %rcx,%rdx + adcxq %rax,%r9 + adoxq %r10,%rbp + adcxq %rbp,%r10 + imulq %r13,%rdx + + + xorq %r11,%r11 + mulxq 0(%rbx),%rax,%rbp + adcxq %rax,%r13 + adoxq %rbp,%r14 + + mulxq 8(%rbx),%rax,%rbp + adcxq %rax,%r14 + adoxq %rbp,%r15 + + mulxq 16(%rbx),%rax,%rbp + adcxq %rax,%r15 + adoxq %rbp,%r8 + + mulxq 24(%rbx),%rax,%rbp + adcxq %rax,%r8 + adoxq %rbp,%r9 + + mulxq 32(%rbx),%rax,%rbp + adcxq %rax,%r9 + adoxq %rbp,%r10 + + mulxq 40(%rbx),%rax,%rbp + movq %rcx,%rdx + adcxq %rax,%r10 + adoxq %r11,%rbp + adcxq %rbp,%r11 + .byte 0xf3,0xc3 +.cfi_endproc +.size __mulx_by_1_mont_384,.-__mulx_by_1_mont_384 + +.type __redx_tail_mont_384,@function +.align 32 +__redx_tail_mont_384: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + addq 48(%rsi),%r14 + movq %r14,%rax + adcq 56(%rsi),%r15 + adcq 64(%rsi),%r8 + adcq 72(%rsi),%r9 + movq %r15,%rcx + adcq 80(%rsi),%r10 + adcq 88(%rsi),%r11 + sbbq %r12,%r12 + + + + + movq %r8,%rdx + movq %r9,%rbp + + subq 0(%rbx),%r14 + sbbq 8(%rbx),%r15 + movq %r10,%r13 + sbbq 16(%rbx),%r8 + sbbq 24(%rbx),%r9 + sbbq 32(%rbx),%r10 + movq %r11,%rsi + sbbq 40(%rbx),%r11 + sbbq $0,%r12 + + cmovcq %rax,%r14 + cmovcq %rcx,%r15 + cmovcq %rdx,%r8 + movq %r14,0(%rdi) + cmovcq %rbp,%r9 + movq %r15,8(%rdi) + cmovcq %r13,%r10 + movq %r8,16(%rdi) + cmovcq %rsi,%r11 + movq %r9,24(%rdi) + movq %r10,32(%rdi) + movq %r11,40(%rdi) + + .byte 0xf3,0xc3 +.cfi_endproc +.size __redx_tail_mont_384,.-__redx_tail_mont_384 + +.globl sgn0x_pty_mont_384 +.hidden sgn0x_pty_mont_384 +.type sgn0x_pty_mont_384,@function +.align 32 +sgn0x_pty_mont_384: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + +sgn0_pty_mont_384$1: + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + subq $8,%rsp +.cfi_adjust_cfa_offset 8 + + + movq %rsi,%rbx + leaq 0(%rdi),%rsi + movq %rdx,%rcx + call __mulx_by_1_mont_384 + + xorq %rax,%rax + movq %r14,%r13 + addq %r14,%r14 + adcq %r15,%r15 + adcq %r8,%r8 + adcq %r9,%r9 + adcq %r10,%r10 + adcq %r11,%r11 + adcq $0,%rax + + subq 0(%rbx),%r14 + sbbq 8(%rbx),%r15 + sbbq 16(%rbx),%r8 + sbbq 24(%rbx),%r9 + sbbq 32(%rbx),%r10 + sbbq 40(%rbx),%r11 + sbbq $0,%rax + + notq %rax + andq $1,%r13 + andq $2,%rax + orq %r13,%rax + + movq 8(%rsp),%r15 +.cfi_restore %r15 + movq 16(%rsp),%r14 +.cfi_restore %r14 + movq 24(%rsp),%r13 +.cfi_restore %r13 + movq 32(%rsp),%r12 +.cfi_restore %r12 + movq 40(%rsp),%rbx +.cfi_restore %rbx + movq 48(%rsp),%rbp +.cfi_restore %rbp + leaq 56(%rsp),%rsp +.cfi_adjust_cfa_offset -56 + + .byte 0xf3,0xc3 +.cfi_endproc +.size sgn0x_pty_mont_384,.-sgn0x_pty_mont_384 + +.globl sgn0x_pty_mont_384x +.hidden sgn0x_pty_mont_384x +.type sgn0x_pty_mont_384x,@function +.align 32 +sgn0x_pty_mont_384x: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + +sgn0_pty_mont_384x$1: + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + subq $8,%rsp +.cfi_adjust_cfa_offset 8 + + + movq %rsi,%rbx + leaq 48(%rdi),%rsi + movq %rdx,%rcx + call __mulx_by_1_mont_384 + + movq %r14,%r12 + orq %r15,%r14 + orq %r8,%r14 + orq %r9,%r14 + orq %r10,%r14 + orq %r11,%r14 + + leaq 0(%rdi),%rsi + xorq %rdi,%rdi + movq %r12,%r13 + addq %r12,%r12 + adcq %r15,%r15 + adcq %r8,%r8 + adcq %r9,%r9 + adcq %r10,%r10 + adcq %r11,%r11 + adcq $0,%rdi + + subq 0(%rbx),%r12 + sbbq 8(%rbx),%r15 + sbbq 16(%rbx),%r8 + sbbq 24(%rbx),%r9 + sbbq 32(%rbx),%r10 + sbbq 40(%rbx),%r11 + sbbq $0,%rdi + + movq %r14,0(%rsp) + notq %rdi + andq $1,%r13 + andq $2,%rdi + orq %r13,%rdi + + call __mulx_by_1_mont_384 + + movq %r14,%r12 + orq %r15,%r14 + orq %r8,%r14 + orq %r9,%r14 + orq %r10,%r14 + orq %r11,%r14 + + xorq %rax,%rax + movq %r12,%r13 + addq %r12,%r12 + adcq %r15,%r15 + adcq %r8,%r8 + adcq %r9,%r9 + adcq %r10,%r10 + adcq %r11,%r11 + adcq $0,%rax + + subq 0(%rbx),%r12 + sbbq 8(%rbx),%r15 + sbbq 16(%rbx),%r8 + sbbq 24(%rbx),%r9 + sbbq 32(%rbx),%r10 + sbbq 40(%rbx),%r11 + sbbq $0,%rax + + movq 0(%rsp),%r12 + + notq %rax + + testq %r14,%r14 + cmovzq %rdi,%r13 + + testq %r12,%r12 + cmovnzq %rdi,%rax + + andq $1,%r13 + andq $2,%rax + orq %r13,%rax + + movq 8(%rsp),%r15 +.cfi_restore %r15 + movq 16(%rsp),%r14 +.cfi_restore %r14 + movq 24(%rsp),%r13 +.cfi_restore %r13 + movq 32(%rsp),%r12 +.cfi_restore %r12 + movq 40(%rsp),%rbx +.cfi_restore %rbx + movq 48(%rsp),%rbp +.cfi_restore %rbp + leaq 56(%rsp),%rsp +.cfi_adjust_cfa_offset -56 + + .byte 0xf3,0xc3 +.cfi_endproc +.size sgn0x_pty_mont_384x,.-sgn0x_pty_mont_384x +.globl mulx_mont_384 +.hidden mulx_mont_384 +.type mulx_mont_384,@function +.align 32 +mulx_mont_384: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + +mul_mont_384$1: + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + leaq -24(%rsp),%rsp +.cfi_adjust_cfa_offset 8*3 + + + movq %rdx,%rbx + movq 0(%rdx),%rdx + movq 0(%rsi),%r14 + movq 8(%rsi),%r15 + movq 16(%rsi),%rax + movq 24(%rsi),%r12 + movq %rdi,16(%rsp) + movq 32(%rsi),%rdi + movq 40(%rsi),%rbp + leaq -128(%rsi),%rsi + leaq -128(%rcx),%rcx + movq %r8,(%rsp) + + mulxq %r14,%r8,%r9 + call __mulx_mont_384 + + movq 24(%rsp),%r15 +.cfi_restore %r15 + movq 32(%rsp),%r14 +.cfi_restore %r14 + movq 40(%rsp),%r13 +.cfi_restore %r13 + movq 48(%rsp),%r12 +.cfi_restore %r12 + movq 56(%rsp),%rbx +.cfi_restore %rbx + movq 64(%rsp),%rbp +.cfi_restore %rbp + leaq 72(%rsp),%rsp +.cfi_adjust_cfa_offset -8*9 + + .byte 0xf3,0xc3 +.cfi_endproc +.size mulx_mont_384,.-mulx_mont_384 +.type __mulx_mont_384,@function +.align 32 +__mulx_mont_384: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + + mulxq %r15,%r14,%r10 + mulxq %rax,%r15,%r11 + addq %r14,%r9 + mulxq %r12,%rax,%r12 + adcq %r15,%r10 + mulxq %rdi,%rdi,%r13 + adcq %rax,%r11 + mulxq %rbp,%rbp,%r14 + movq 8(%rbx),%rdx + adcq %rdi,%r12 + adcq %rbp,%r13 + adcq $0,%r14 + xorq %r15,%r15 + + movq %r8,16(%rsp) + imulq 8(%rsp),%r8 + + + xorq %rax,%rax + mulxq 0+128(%rsi),%rdi,%rbp + adoxq %rdi,%r9 + adcxq %rbp,%r10 + + mulxq 8+128(%rsi),%rdi,%rbp + adoxq %rdi,%r10 + adcxq %rbp,%r11 + + mulxq 16+128(%rsi),%rdi,%rbp + adoxq %rdi,%r11 + adcxq %rbp,%r12 + + mulxq 24+128(%rsi),%rdi,%rbp + adoxq %rdi,%r12 + adcxq %rbp,%r13 + + mulxq 32+128(%rsi),%rdi,%rbp + adoxq %rdi,%r13 + adcxq %rbp,%r14 + + mulxq 40+128(%rsi),%rdi,%rbp + movq %r8,%rdx + adoxq %rdi,%r14 + adcxq %rbp,%r15 + adoxq %rax,%r15 + adoxq %rax,%rax + + + xorq %r8,%r8 + mulxq 0+128(%rcx),%rdi,%rbp + adcxq 16(%rsp),%rdi + adoxq %rbp,%r9 + + mulxq 8+128(%rcx),%rdi,%rbp + adcxq %rdi,%r9 + adoxq %rbp,%r10 + + mulxq 16+128(%rcx),%rdi,%rbp + adcxq %rdi,%r10 + adoxq %rbp,%r11 + + mulxq 24+128(%rcx),%rdi,%rbp + adcxq %rdi,%r11 + adoxq %rbp,%r12 + + mulxq 32+128(%rcx),%rdi,%rbp + adcxq %rdi,%r12 + adoxq %rbp,%r13 + + mulxq 40+128(%rcx),%rdi,%rbp + movq 16(%rbx),%rdx + adcxq %rdi,%r13 + adoxq %rbp,%r14 + adcxq %r8,%r14 + adoxq %r8,%r15 + adcxq %r8,%r15 + adoxq %r8,%rax + adcxq %r8,%rax + movq %r9,16(%rsp) + imulq 8(%rsp),%r9 + + + xorq %r8,%r8 + mulxq 0+128(%rsi),%rdi,%rbp + adoxq %rdi,%r10 + adcxq %rbp,%r11 + + mulxq 8+128(%rsi),%rdi,%rbp + adoxq %rdi,%r11 + adcxq %rbp,%r12 + + mulxq 16+128(%rsi),%rdi,%rbp + adoxq %rdi,%r12 + adcxq %rbp,%r13 + + mulxq 24+128(%rsi),%rdi,%rbp + adoxq %rdi,%r13 + adcxq %rbp,%r14 + + mulxq 32+128(%rsi),%rdi,%rbp + adoxq %rdi,%r14 + adcxq %rbp,%r15 + + mulxq 40+128(%rsi),%rdi,%rbp + movq %r9,%rdx + adoxq %rdi,%r15 + adcxq %rbp,%rax + adoxq %r8,%rax + adoxq %r8,%r8 + + + xorq %r9,%r9 + mulxq 0+128(%rcx),%rdi,%rbp + adcxq 16(%rsp),%rdi + adoxq %rbp,%r10 + + mulxq 8+128(%rcx),%rdi,%rbp + adcxq %rdi,%r10 + adoxq %rbp,%r11 + + mulxq 16+128(%rcx),%rdi,%rbp + adcxq %rdi,%r11 + adoxq %rbp,%r12 + + mulxq 24+128(%rcx),%rdi,%rbp + adcxq %rdi,%r12 + adoxq %rbp,%r13 + + mulxq 32+128(%rcx),%rdi,%rbp + adcxq %rdi,%r13 + adoxq %rbp,%r14 + + mulxq 40+128(%rcx),%rdi,%rbp + movq 24(%rbx),%rdx + adcxq %rdi,%r14 + adoxq %rbp,%r15 + adcxq %r9,%r15 + adoxq %r9,%rax + adcxq %r9,%rax + adoxq %r9,%r8 + adcxq %r9,%r8 + movq %r10,16(%rsp) + imulq 8(%rsp),%r10 + + + xorq %r9,%r9 + mulxq 0+128(%rsi),%rdi,%rbp + adoxq %rdi,%r11 + adcxq %rbp,%r12 + + mulxq 8+128(%rsi),%rdi,%rbp + adoxq %rdi,%r12 + adcxq %rbp,%r13 + + mulxq 16+128(%rsi),%rdi,%rbp + adoxq %rdi,%r13 + adcxq %rbp,%r14 + + mulxq 24+128(%rsi),%rdi,%rbp + adoxq %rdi,%r14 + adcxq %rbp,%r15 + + mulxq 32+128(%rsi),%rdi,%rbp + adoxq %rdi,%r15 + adcxq %rbp,%rax + + mulxq 40+128(%rsi),%rdi,%rbp + movq %r10,%rdx + adoxq %rdi,%rax + adcxq %rbp,%r8 + adoxq %r9,%r8 + adoxq %r9,%r9 + + + xorq %r10,%r10 + mulxq 0+128(%rcx),%rdi,%rbp + adcxq 16(%rsp),%rdi + adoxq %rbp,%r11 + + mulxq 8+128(%rcx),%rdi,%rbp + adcxq %rdi,%r11 + adoxq %rbp,%r12 + + mulxq 16+128(%rcx),%rdi,%rbp + adcxq %rdi,%r12 + adoxq %rbp,%r13 + + mulxq 24+128(%rcx),%rdi,%rbp + adcxq %rdi,%r13 + adoxq %rbp,%r14 + + mulxq 32+128(%rcx),%rdi,%rbp + adcxq %rdi,%r14 + adoxq %rbp,%r15 + + mulxq 40+128(%rcx),%rdi,%rbp + movq 32(%rbx),%rdx + adcxq %rdi,%r15 + adoxq %rbp,%rax + adcxq %r10,%rax + adoxq %r10,%r8 + adcxq %r10,%r8 + adoxq %r10,%r9 + adcxq %r10,%r9 + movq %r11,16(%rsp) + imulq 8(%rsp),%r11 + + + xorq %r10,%r10 + mulxq 0+128(%rsi),%rdi,%rbp + adoxq %rdi,%r12 + adcxq %rbp,%r13 + + mulxq 8+128(%rsi),%rdi,%rbp + adoxq %rdi,%r13 + adcxq %rbp,%r14 + + mulxq 16+128(%rsi),%rdi,%rbp + adoxq %rdi,%r14 + adcxq %rbp,%r15 + + mulxq 24+128(%rsi),%rdi,%rbp + adoxq %rdi,%r15 + adcxq %rbp,%rax + + mulxq 32+128(%rsi),%rdi,%rbp + adoxq %rdi,%rax + adcxq %rbp,%r8 + + mulxq 40+128(%rsi),%rdi,%rbp + movq %r11,%rdx + adoxq %rdi,%r8 + adcxq %rbp,%r9 + adoxq %r10,%r9 + adoxq %r10,%r10 + + + xorq %r11,%r11 + mulxq 0+128(%rcx),%rdi,%rbp + adcxq 16(%rsp),%rdi + adoxq %rbp,%r12 + + mulxq 8+128(%rcx),%rdi,%rbp + adcxq %rdi,%r12 + adoxq %rbp,%r13 + + mulxq 16+128(%rcx),%rdi,%rbp + adcxq %rdi,%r13 + adoxq %rbp,%r14 + + mulxq 24+128(%rcx),%rdi,%rbp + adcxq %rdi,%r14 + adoxq %rbp,%r15 + + mulxq 32+128(%rcx),%rdi,%rbp + adcxq %rdi,%r15 + adoxq %rbp,%rax + + mulxq 40+128(%rcx),%rdi,%rbp + movq 40(%rbx),%rdx + adcxq %rdi,%rax + adoxq %rbp,%r8 + adcxq %r11,%r8 + adoxq %r11,%r9 + adcxq %r11,%r9 + adoxq %r11,%r10 + adcxq %r11,%r10 + movq %r12,16(%rsp) + imulq 8(%rsp),%r12 + + + xorq %r11,%r11 + mulxq 0+128(%rsi),%rdi,%rbp + adoxq %rdi,%r13 + adcxq %rbp,%r14 + + mulxq 8+128(%rsi),%rdi,%rbp + adoxq %rdi,%r14 + adcxq %rbp,%r15 + + mulxq 16+128(%rsi),%rdi,%rbp + adoxq %rdi,%r15 + adcxq %rbp,%rax + + mulxq 24+128(%rsi),%rdi,%rbp + adoxq %rdi,%rax + adcxq %rbp,%r8 + + mulxq 32+128(%rsi),%rdi,%rbp + adoxq %rdi,%r8 + adcxq %rbp,%r9 + + mulxq 40+128(%rsi),%rdi,%rbp + movq %r12,%rdx + adoxq %rdi,%r9 + adcxq %rbp,%r10 + adoxq %r11,%r10 + adoxq %r11,%r11 + + + xorq %r12,%r12 + mulxq 0+128(%rcx),%rdi,%rbp + adcxq 16(%rsp),%rdi + adoxq %rbp,%r13 + + mulxq 8+128(%rcx),%rdi,%rbp + adcxq %rdi,%r13 + adoxq %rbp,%r14 + + mulxq 16+128(%rcx),%rdi,%rbp + adcxq %rdi,%r14 + adoxq %rbp,%r15 + + mulxq 24+128(%rcx),%rdi,%rbp + adcxq %rdi,%r15 + adoxq %rbp,%rax + + mulxq 32+128(%rcx),%rdi,%rbp + adcxq %rdi,%rax + adoxq %rbp,%r8 + + mulxq 40+128(%rcx),%rdi,%rbp + movq %r13,%rdx + adcxq %rdi,%r8 + adoxq %rbp,%r9 + adcxq %r12,%r9 + adoxq %r12,%r10 + adcxq %r12,%r10 + adoxq %r12,%r11 + adcxq %r12,%r11 + imulq 8(%rsp),%rdx + movq 24(%rsp),%rbx + + + xorq %r12,%r12 + mulxq 0+128(%rcx),%rdi,%rbp + adcxq %rdi,%r13 + adoxq %rbp,%r14 + + mulxq 8+128(%rcx),%rdi,%rbp + adcxq %rdi,%r14 + adoxq %rbp,%r15 + + mulxq 16+128(%rcx),%rdi,%rbp + adcxq %rdi,%r15 + adoxq %rbp,%rax + + mulxq 24+128(%rcx),%rdi,%rbp + adcxq %rdi,%rax + adoxq %rbp,%r8 + movq %r15,%r13 + + mulxq 32+128(%rcx),%rdi,%rbp + adcxq %rdi,%r8 + adoxq %rbp,%r9 + movq %rax,%rsi + + mulxq 40+128(%rcx),%rdi,%rbp + adcxq %rdi,%r9 + adoxq %rbp,%r10 + movq %r14,%rdx + adcxq %r12,%r10 + adoxq %r12,%r11 + leaq 128(%rcx),%rcx + movq %r8,%r12 + adcq $0,%r11 + + + + + subq 0(%rcx),%r14 + sbbq 8(%rcx),%r15 + movq %r9,%rdi + sbbq 16(%rcx),%rax + sbbq 24(%rcx),%r8 + sbbq 32(%rcx),%r9 + movq %r10,%rbp + sbbq 40(%rcx),%r10 + sbbq $0,%r11 + + cmovncq %r14,%rdx + cmovcq %r13,%r15 + cmovcq %rsi,%rax + cmovncq %r8,%r12 + movq %rdx,0(%rbx) + cmovncq %r9,%rdi + movq %r15,8(%rbx) + cmovncq %r10,%rbp + movq %rax,16(%rbx) + movq %r12,24(%rbx) + movq %rdi,32(%rbx) + movq %rbp,40(%rbx) + + .byte 0xf3,0xc3 +.cfi_endproc +.size __mulx_mont_384,.-__mulx_mont_384 +.globl sqrx_mont_384 +.hidden sqrx_mont_384 +.type sqrx_mont_384,@function +.align 32 +sqrx_mont_384: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + +sqr_mont_384$1: + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + leaq -24(%rsp),%rsp +.cfi_adjust_cfa_offset 8*3 + + + movq %rcx,%r8 + leaq -128(%rdx),%rcx + movq 0(%rsi),%rdx + movq 8(%rsi),%r15 + movq 16(%rsi),%rax + movq 24(%rsi),%r12 + movq %rdi,16(%rsp) + movq 32(%rsi),%rdi + movq 40(%rsi),%rbp + + leaq (%rsi),%rbx + movq %r8,(%rsp) + leaq -128(%rsi),%rsi + + mulxq %rdx,%r8,%r9 + call __mulx_mont_384 + + movq 24(%rsp),%r15 +.cfi_restore %r15 + movq 32(%rsp),%r14 +.cfi_restore %r14 + movq 40(%rsp),%r13 +.cfi_restore %r13 + movq 48(%rsp),%r12 +.cfi_restore %r12 + movq 56(%rsp),%rbx +.cfi_restore %rbx + movq 64(%rsp),%rbp +.cfi_restore %rbp + leaq 72(%rsp),%rsp +.cfi_adjust_cfa_offset -8*9 + + .byte 0xf3,0xc3 +.cfi_endproc +.size sqrx_mont_384,.-sqrx_mont_384 + +.globl sqrx_n_mul_mont_384 +.hidden sqrx_n_mul_mont_384 +.type sqrx_n_mul_mont_384,@function +.align 32 +sqrx_n_mul_mont_384: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + +sqr_n_mul_mont_384$1: + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + leaq -40(%rsp),%rsp +.cfi_adjust_cfa_offset 8*5 + + + movq %rdx,%r10 + movq 0(%rsi),%rdx + movq 8(%rsi),%r15 + movq 16(%rsi),%rax + movq %rsi,%rbx + movq 24(%rsi),%r12 + movq %rdi,16(%rsp) + movq 32(%rsi),%rdi + movq 40(%rsi),%rbp + + movq %r8,(%rsp) + movq %r9,24(%rsp) + movq 0(%r9),%xmm2 + +.Loop_sqrx_384: + movd %r10d,%xmm1 + leaq -128(%rbx),%rsi + leaq -128(%rcx),%rcx + + mulxq %rdx,%r8,%r9 + call __mulx_mont_384 + + movd %xmm1,%r10d + decl %r10d + jnz .Loop_sqrx_384 + + movq %rdx,%r14 +.byte 102,72,15,126,210 + leaq -128(%rbx),%rsi + movq 24(%rsp),%rbx + leaq -128(%rcx),%rcx + + mulxq %r14,%r8,%r9 + call __mulx_mont_384 + + movq 40(%rsp),%r15 +.cfi_restore %r15 + movq 48(%rsp),%r14 +.cfi_restore %r14 + movq 56(%rsp),%r13 +.cfi_restore %r13 + movq 64(%rsp),%r12 +.cfi_restore %r12 + movq 72(%rsp),%rbx +.cfi_restore %rbx + movq 80(%rsp),%rbp +.cfi_restore %rbp + leaq 88(%rsp),%rsp +.cfi_adjust_cfa_offset -8*11 + + .byte 0xf3,0xc3 +.cfi_endproc +.size sqrx_n_mul_mont_384,.-sqrx_n_mul_mont_384 + +.globl sqrx_n_mul_mont_383 +.hidden sqrx_n_mul_mont_383 +.type sqrx_n_mul_mont_383,@function +.align 32 +sqrx_n_mul_mont_383: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + +sqr_n_mul_mont_383$1: + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + leaq -40(%rsp),%rsp +.cfi_adjust_cfa_offset 8*5 + + + movq %rdx,%r10 + movq 0(%rsi),%rdx + movq 8(%rsi),%r15 + movq 16(%rsi),%rax + movq %rsi,%rbx + movq 24(%rsi),%r12 + movq %rdi,16(%rsp) + movq 32(%rsi),%rdi + movq 40(%rsi),%rbp + + movq %r8,(%rsp) + movq %r9,24(%rsp) + movq 0(%r9),%xmm2 + leaq -128(%rcx),%rcx + +.Loop_sqrx_383: + movd %r10d,%xmm1 + leaq -128(%rbx),%rsi + + mulxq %rdx,%r8,%r9 + call __mulx_mont_383_nonred + + movd %xmm1,%r10d + decl %r10d + jnz .Loop_sqrx_383 + + movq %rdx,%r14 +.byte 102,72,15,126,210 + leaq -128(%rbx),%rsi + movq 24(%rsp),%rbx + + mulxq %r14,%r8,%r9 + call __mulx_mont_384 + + movq 40(%rsp),%r15 +.cfi_restore %r15 + movq 48(%rsp),%r14 +.cfi_restore %r14 + movq 56(%rsp),%r13 +.cfi_restore %r13 + movq 64(%rsp),%r12 +.cfi_restore %r12 + movq 72(%rsp),%rbx +.cfi_restore %rbx + movq 80(%rsp),%rbp +.cfi_restore %rbp + leaq 88(%rsp),%rsp +.cfi_adjust_cfa_offset -8*11 + + .byte 0xf3,0xc3 +.cfi_endproc +.size sqrx_n_mul_mont_383,.-sqrx_n_mul_mont_383 +.type __mulx_mont_383_nonred,@function +.align 32 +__mulx_mont_383_nonred: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + + mulxq %r15,%r14,%r10 + mulxq %rax,%r15,%r11 + addq %r14,%r9 + mulxq %r12,%rax,%r12 + adcq %r15,%r10 + mulxq %rdi,%rdi,%r13 + adcq %rax,%r11 + mulxq %rbp,%rbp,%r14 + movq 8(%rbx),%rdx + adcq %rdi,%r12 + adcq %rbp,%r13 + adcq $0,%r14 + movq %r8,%rax + imulq 8(%rsp),%r8 + + + xorq %r15,%r15 + mulxq 0+128(%rsi),%rdi,%rbp + adoxq %rdi,%r9 + adcxq %rbp,%r10 + + mulxq 8+128(%rsi),%rdi,%rbp + adoxq %rdi,%r10 + adcxq %rbp,%r11 + + mulxq 16+128(%rsi),%rdi,%rbp + adoxq %rdi,%r11 + adcxq %rbp,%r12 + + mulxq 24+128(%rsi),%rdi,%rbp + adoxq %rdi,%r12 + adcxq %rbp,%r13 + + mulxq 32+128(%rsi),%rdi,%rbp + adoxq %rdi,%r13 + adcxq %rbp,%r14 + + mulxq 40+128(%rsi),%rdi,%rbp + movq %r8,%rdx + adoxq %rdi,%r14 + adcxq %r15,%rbp + adoxq %rbp,%r15 + + + xorq %r8,%r8 + mulxq 0+128(%rcx),%rdi,%rbp + adcxq %rdi,%rax + adoxq %rbp,%r9 + + mulxq 8+128(%rcx),%rdi,%rbp + adcxq %rdi,%r9 + adoxq %rbp,%r10 + + mulxq 16+128(%rcx),%rdi,%rbp + adcxq %rdi,%r10 + adoxq %rbp,%r11 + + mulxq 24+128(%rcx),%rdi,%rbp + adcxq %rdi,%r11 + adoxq %rbp,%r12 + + mulxq 32+128(%rcx),%rdi,%rbp + adcxq %rdi,%r12 + adoxq %rbp,%r13 + + mulxq 40+128(%rcx),%rdi,%rbp + movq 16(%rbx),%rdx + adcxq %rdi,%r13 + adoxq %rbp,%r14 + adcxq %rax,%r14 + adoxq %rax,%r15 + adcxq %rax,%r15 + movq %r9,%r8 + imulq 8(%rsp),%r9 + + + xorq %rax,%rax + mulxq 0+128(%rsi),%rdi,%rbp + adoxq %rdi,%r10 + adcxq %rbp,%r11 + + mulxq 8+128(%rsi),%rdi,%rbp + adoxq %rdi,%r11 + adcxq %rbp,%r12 + + mulxq 16+128(%rsi),%rdi,%rbp + adoxq %rdi,%r12 + adcxq %rbp,%r13 + + mulxq 24+128(%rsi),%rdi,%rbp + adoxq %rdi,%r13 + adcxq %rbp,%r14 + + mulxq 32+128(%rsi),%rdi,%rbp + adoxq %rdi,%r14 + adcxq %rbp,%r15 + + mulxq 40+128(%rsi),%rdi,%rbp + movq %r9,%rdx + adoxq %rdi,%r15 + adcxq %rax,%rbp + adoxq %rbp,%rax + + + xorq %r9,%r9 + mulxq 0+128(%rcx),%rdi,%rbp + adcxq %rdi,%r8 + adoxq %rbp,%r10 + + mulxq 8+128(%rcx),%rdi,%rbp + adcxq %rdi,%r10 + adoxq %rbp,%r11 + + mulxq 16+128(%rcx),%rdi,%rbp + adcxq %rdi,%r11 + adoxq %rbp,%r12 + + mulxq 24+128(%rcx),%rdi,%rbp + adcxq %rdi,%r12 + adoxq %rbp,%r13 + + mulxq 32+128(%rcx),%rdi,%rbp + adcxq %rdi,%r13 + adoxq %rbp,%r14 + + mulxq 40+128(%rcx),%rdi,%rbp + movq 24(%rbx),%rdx + adcxq %rdi,%r14 + adoxq %rbp,%r15 + adcxq %r8,%r15 + adoxq %r8,%rax + adcxq %r8,%rax + movq %r10,%r9 + imulq 8(%rsp),%r10 + + + xorq %r8,%r8 + mulxq 0+128(%rsi),%rdi,%rbp + adoxq %rdi,%r11 + adcxq %rbp,%r12 + + mulxq 8+128(%rsi),%rdi,%rbp + adoxq %rdi,%r12 + adcxq %rbp,%r13 + + mulxq 16+128(%rsi),%rdi,%rbp + adoxq %rdi,%r13 + adcxq %rbp,%r14 + + mulxq 24+128(%rsi),%rdi,%rbp + adoxq %rdi,%r14 + adcxq %rbp,%r15 + + mulxq 32+128(%rsi),%rdi,%rbp + adoxq %rdi,%r15 + adcxq %rbp,%rax + + mulxq 40+128(%rsi),%rdi,%rbp + movq %r10,%rdx + adoxq %rdi,%rax + adcxq %r8,%rbp + adoxq %rbp,%r8 + + + xorq %r10,%r10 + mulxq 0+128(%rcx),%rdi,%rbp + adcxq %rdi,%r9 + adoxq %rbp,%r11 + + mulxq 8+128(%rcx),%rdi,%rbp + adcxq %rdi,%r11 + adoxq %rbp,%r12 + + mulxq 16+128(%rcx),%rdi,%rbp + adcxq %rdi,%r12 + adoxq %rbp,%r13 + + mulxq 24+128(%rcx),%rdi,%rbp + adcxq %rdi,%r13 + adoxq %rbp,%r14 + + mulxq 32+128(%rcx),%rdi,%rbp + adcxq %rdi,%r14 + adoxq %rbp,%r15 + + mulxq 40+128(%rcx),%rdi,%rbp + movq 32(%rbx),%rdx + adcxq %rdi,%r15 + adoxq %rbp,%rax + adcxq %r9,%rax + adoxq %r9,%r8 + adcxq %r9,%r8 + movq %r11,%r10 + imulq 8(%rsp),%r11 + + + xorq %r9,%r9 + mulxq 0+128(%rsi),%rdi,%rbp + adoxq %rdi,%r12 + adcxq %rbp,%r13 + + mulxq 8+128(%rsi),%rdi,%rbp + adoxq %rdi,%r13 + adcxq %rbp,%r14 + + mulxq 16+128(%rsi),%rdi,%rbp + adoxq %rdi,%r14 + adcxq %rbp,%r15 + + mulxq 24+128(%rsi),%rdi,%rbp + adoxq %rdi,%r15 + adcxq %rbp,%rax + + mulxq 32+128(%rsi),%rdi,%rbp + adoxq %rdi,%rax + adcxq %rbp,%r8 + + mulxq 40+128(%rsi),%rdi,%rbp + movq %r11,%rdx + adoxq %rdi,%r8 + adcxq %r9,%rbp + adoxq %rbp,%r9 + + + xorq %r11,%r11 + mulxq 0+128(%rcx),%rdi,%rbp + adcxq %rdi,%r10 + adoxq %rbp,%r12 + + mulxq 8+128(%rcx),%rdi,%rbp + adcxq %rdi,%r12 + adoxq %rbp,%r13 + + mulxq 16+128(%rcx),%rdi,%rbp + adcxq %rdi,%r13 + adoxq %rbp,%r14 + + mulxq 24+128(%rcx),%rdi,%rbp + adcxq %rdi,%r14 + adoxq %rbp,%r15 + + mulxq 32+128(%rcx),%rdi,%rbp + adcxq %rdi,%r15 + adoxq %rbp,%rax + + mulxq 40+128(%rcx),%rdi,%rbp + movq 40(%rbx),%rdx + adcxq %rdi,%rax + adoxq %rbp,%r8 + adcxq %r10,%r8 + adoxq %r10,%r9 + adcxq %r10,%r9 + movq %r12,%r11 + imulq 8(%rsp),%r12 + + + xorq %r10,%r10 + mulxq 0+128(%rsi),%rdi,%rbp + adoxq %rdi,%r13 + adcxq %rbp,%r14 + + mulxq 8+128(%rsi),%rdi,%rbp + adoxq %rdi,%r14 + adcxq %rbp,%r15 + + mulxq 16+128(%rsi),%rdi,%rbp + adoxq %rdi,%r15 + adcxq %rbp,%rax + + mulxq 24+128(%rsi),%rdi,%rbp + adoxq %rdi,%rax + adcxq %rbp,%r8 + + mulxq 32+128(%rsi),%rdi,%rbp + adoxq %rdi,%r8 + adcxq %rbp,%r9 + + mulxq 40+128(%rsi),%rdi,%rbp + movq %r12,%rdx + adoxq %rdi,%r9 + adcxq %r10,%rbp + adoxq %rbp,%r10 + + + xorq %r12,%r12 + mulxq 0+128(%rcx),%rdi,%rbp + adcxq %rdi,%r11 + adoxq %rbp,%r13 + + mulxq 8+128(%rcx),%rdi,%rbp + adcxq %rdi,%r13 + adoxq %rbp,%r14 + + mulxq 16+128(%rcx),%rdi,%rbp + adcxq %rdi,%r14 + adoxq %rbp,%r15 + + mulxq 24+128(%rcx),%rdi,%rbp + adcxq %rdi,%r15 + adoxq %rbp,%rax + + mulxq 32+128(%rcx),%rdi,%rbp + adcxq %rdi,%rax + adoxq %rbp,%r8 + + mulxq 40+128(%rcx),%rdi,%rbp + movq %r13,%rdx + adcxq %rdi,%r8 + adoxq %rbp,%r9 + adcxq %r11,%r9 + adoxq %r11,%r10 + adcxq %r11,%r10 + imulq 8(%rsp),%rdx + movq 24(%rsp),%rbx + + + xorq %r12,%r12 + mulxq 0+128(%rcx),%rdi,%rbp + adcxq %rdi,%r13 + adoxq %rbp,%r14 + + mulxq 8+128(%rcx),%rdi,%rbp + adcxq %rdi,%r14 + adoxq %rbp,%r15 + + mulxq 16+128(%rcx),%rdi,%rbp + adcxq %rdi,%r15 + adoxq %rbp,%rax + + mulxq 24+128(%rcx),%rdi,%rbp + adcxq %rdi,%rax + adoxq %rbp,%r8 + + mulxq 32+128(%rcx),%rdi,%rbp + adcxq %rdi,%r8 + adoxq %rbp,%r9 + + mulxq 40+128(%rcx),%rdi,%rbp + movq %r14,%rdx + adcxq %rdi,%r9 + adoxq %rbp,%r10 + adcq $0,%r10 + movq %r8,%r12 + + movq %r14,0(%rbx) + movq %r15,8(%rbx) + movq %rax,16(%rbx) + movq %r9,%rdi + movq %r8,24(%rbx) + movq %r9,32(%rbx) + movq %r10,40(%rbx) + movq %r10,%rbp + + .byte 0xf3,0xc3 +.cfi_endproc +.size __mulx_mont_383_nonred,.-__mulx_mont_383_nonred +.globl sqrx_mont_382x +.hidden sqrx_mont_382x +.type sqrx_mont_382x,@function +.align 32 +sqrx_mont_382x: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + +sqr_mont_382x$1: + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + subq $136,%rsp +.cfi_adjust_cfa_offset 136 + + + movq %rcx,0(%rsp) + movq %rdx,%rcx + movq %rdi,16(%rsp) + movq %rsi,24(%rsp) + + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + + movq %r8,%r14 + addq 48(%rsi),%r8 + movq %r9,%r15 + adcq 56(%rsi),%r9 + movq %r10,%rax + adcq 64(%rsi),%r10 + movq %r11,%rdx + adcq 72(%rsi),%r11 + movq %r12,%rbx + adcq 80(%rsi),%r12 + movq %r13,%rbp + adcq 88(%rsi),%r13 + + subq 48(%rsi),%r14 + sbbq 56(%rsi),%r15 + sbbq 64(%rsi),%rax + sbbq 72(%rsi),%rdx + sbbq 80(%rsi),%rbx + sbbq 88(%rsi),%rbp + sbbq %rdi,%rdi + + movq %r8,32+0(%rsp) + movq %r9,32+8(%rsp) + movq %r10,32+16(%rsp) + movq %r11,32+24(%rsp) + movq %r12,32+32(%rsp) + movq %r13,32+40(%rsp) + + movq %r14,32+48(%rsp) + movq %r15,32+56(%rsp) + movq %rax,32+64(%rsp) + movq %rdx,32+72(%rsp) + movq %rbx,32+80(%rsp) + movq %rbp,32+88(%rsp) + movq %rdi,32+96(%rsp) + + + + leaq 48(%rsi),%rbx + + movq 48(%rsi),%rdx + movq 0(%rsi),%r14 + movq 8(%rsi),%r15 + movq 16(%rsi),%rax + movq 24(%rsi),%r12 + movq 32(%rsi),%rdi + movq 40(%rsi),%rbp + leaq -128(%rsi),%rsi + leaq -128(%rcx),%rcx + + mulxq %r14,%r8,%r9 + call __mulx_mont_383_nonred + addq %rdx,%rdx + adcq %r15,%r15 + adcq %rax,%rax + adcq %r12,%r12 + adcq %rdi,%rdi + adcq %rbp,%rbp + + movq %rdx,48(%rbx) + movq %r15,56(%rbx) + movq %rax,64(%rbx) + movq %r12,72(%rbx) + movq %rdi,80(%rbx) + movq %rbp,88(%rbx) + + leaq 32-128(%rsp),%rsi + leaq 32+48(%rsp),%rbx + + movq 32+48(%rsp),%rdx + movq 32+0(%rsp),%r14 + movq 32+8(%rsp),%r15 + movq 32+16(%rsp),%rax + movq 32+24(%rsp),%r12 + movq 32+32(%rsp),%rdi + movq 32+40(%rsp),%rbp + + + + mulxq %r14,%r8,%r9 + call __mulx_mont_383_nonred + movq 32+96(%rsp),%r14 + leaq 128(%rcx),%rcx + movq 32+0(%rsp),%r8 + andq %r14,%r8 + movq 32+8(%rsp),%r9 + andq %r14,%r9 + movq 32+16(%rsp),%r10 + andq %r14,%r10 + movq 32+24(%rsp),%r11 + andq %r14,%r11 + movq 32+32(%rsp),%r13 + andq %r14,%r13 + andq 32+40(%rsp),%r14 + + subq %r8,%rdx + movq 0(%rcx),%r8 + sbbq %r9,%r15 + movq 8(%rcx),%r9 + sbbq %r10,%rax + movq 16(%rcx),%r10 + sbbq %r11,%r12 + movq 24(%rcx),%r11 + sbbq %r13,%rdi + movq 32(%rcx),%r13 + sbbq %r14,%rbp + sbbq %r14,%r14 + + andq %r14,%r8 + andq %r14,%r9 + andq %r14,%r10 + andq %r14,%r11 + andq %r14,%r13 + andq 40(%rcx),%r14 + + addq %r8,%rdx + adcq %r9,%r15 + adcq %r10,%rax + adcq %r11,%r12 + adcq %r13,%rdi + adcq %r14,%rbp + + movq %rdx,0(%rbx) + movq %r15,8(%rbx) + movq %rax,16(%rbx) + movq %r12,24(%rbx) + movq %rdi,32(%rbx) + movq %rbp,40(%rbx) + leaq 136(%rsp),%r8 + movq 0(%r8),%r15 +.cfi_restore %r15 + movq 8(%r8),%r14 +.cfi_restore %r14 + movq 16(%r8),%r13 +.cfi_restore %r13 + movq 24(%r8),%r12 +.cfi_restore %r12 + movq 32(%r8),%rbx +.cfi_restore %rbx + movq 40(%r8),%rbp +.cfi_restore %rbp + leaq 48(%r8),%rsp +.cfi_adjust_cfa_offset -136-8*6 + + .byte 0xf3,0xc3 +.cfi_endproc +.size sqrx_mont_382x,.-sqrx_mont_382x + +.section .note.GNU-stack,"",@progbits +.section .note.gnu.property,"a",@note + .long 4,2f-1f,5 + .byte 0x47,0x4E,0x55,0 +1: .long 0xc0000002,4,3 +.align 8 +2: diff --git a/crypto/blst_src/build/elf/sha256-armv8.S b/crypto/blst_src/build/elf/sha256-armv8.S new file mode 100644 index 00000000000..45c1162c467 --- /dev/null +++ b/crypto/blst_src/build/elf/sha256-armv8.S @@ -0,0 +1,1083 @@ +// +// Copyright Supranational LLC +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 +// +// ==================================================================== +// Written by Andy Polyakov, @dot-asm, initially for the OpenSSL +// project. +// ==================================================================== +// +// sha256_block procedure for ARMv8. +// +// This module is stripped of scalar code paths, with rationale that all +// known processors are NEON-capable. +// +// See original module at CRYPTOGAMS for further details. + +.comm __blst_platform_cap,4 +.text + +.align 6 +.type .LK256,%object +.LK256: +.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 +.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 +.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 +.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 +.long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc +.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da +.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 +.long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 +.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 +.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 +.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 +.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 +.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 +.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 +.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 +.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 +.long 0 //terminator +.size .LK256,.-.LK256 +.byte 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,64,100,111,116,45,97,115,109,0 +.align 2 +.align 2 +.globl blst_sha256_block_armv8 +.type blst_sha256_block_armv8,%function +.align 6 +blst_sha256_block_armv8: +.Lv8_entry: + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + + ld1 {v0.4s,v1.4s},[x0] + adr x3,.LK256 + +.Loop_hw: + ld1 {v4.16b,v5.16b,v6.16b,v7.16b},[x1],#64 + sub x2,x2,#1 + ld1 {v16.4s},[x3],#16 + rev32 v4.16b,v4.16b + rev32 v5.16b,v5.16b + rev32 v6.16b,v6.16b + rev32 v7.16b,v7.16b + orr v18.16b,v0.16b,v0.16b // offload + orr v19.16b,v1.16b,v1.16b + ld1 {v17.4s},[x3],#16 + add v16.4s,v16.4s,v4.4s +.inst 0x5e2828a4 //sha256su0 v4.16b,v5.16b + orr v2.16b,v0.16b,v0.16b +.inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s +.inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s +.inst 0x5e0760c4 //sha256su1 v4.16b,v6.16b,v7.16b + ld1 {v16.4s},[x3],#16 + add v17.4s,v17.4s,v5.4s +.inst 0x5e2828c5 //sha256su0 v5.16b,v6.16b + orr v2.16b,v0.16b,v0.16b +.inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s +.inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s +.inst 0x5e0460e5 //sha256su1 v5.16b,v7.16b,v4.16b + ld1 {v17.4s},[x3],#16 + add v16.4s,v16.4s,v6.4s +.inst 0x5e2828e6 //sha256su0 v6.16b,v7.16b + orr v2.16b,v0.16b,v0.16b +.inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s +.inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s +.inst 0x5e056086 //sha256su1 v6.16b,v4.16b,v5.16b + ld1 {v16.4s},[x3],#16 + add v17.4s,v17.4s,v7.4s +.inst 0x5e282887 //sha256su0 v7.16b,v4.16b + orr v2.16b,v0.16b,v0.16b +.inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s +.inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s +.inst 0x5e0660a7 //sha256su1 v7.16b,v5.16b,v6.16b + ld1 {v17.4s},[x3],#16 + add v16.4s,v16.4s,v4.4s +.inst 0x5e2828a4 //sha256su0 v4.16b,v5.16b + orr v2.16b,v0.16b,v0.16b +.inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s +.inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s +.inst 0x5e0760c4 //sha256su1 v4.16b,v6.16b,v7.16b + ld1 {v16.4s},[x3],#16 + add v17.4s,v17.4s,v5.4s +.inst 0x5e2828c5 //sha256su0 v5.16b,v6.16b + orr v2.16b,v0.16b,v0.16b +.inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s +.inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s +.inst 0x5e0460e5 //sha256su1 v5.16b,v7.16b,v4.16b + ld1 {v17.4s},[x3],#16 + add v16.4s,v16.4s,v6.4s +.inst 0x5e2828e6 //sha256su0 v6.16b,v7.16b + orr v2.16b,v0.16b,v0.16b +.inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s +.inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s +.inst 0x5e056086 //sha256su1 v6.16b,v4.16b,v5.16b + ld1 {v16.4s},[x3],#16 + add v17.4s,v17.4s,v7.4s +.inst 0x5e282887 //sha256su0 v7.16b,v4.16b + orr v2.16b,v0.16b,v0.16b +.inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s +.inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s +.inst 0x5e0660a7 //sha256su1 v7.16b,v5.16b,v6.16b + ld1 {v17.4s},[x3],#16 + add v16.4s,v16.4s,v4.4s +.inst 0x5e2828a4 //sha256su0 v4.16b,v5.16b + orr v2.16b,v0.16b,v0.16b +.inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s +.inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s +.inst 0x5e0760c4 //sha256su1 v4.16b,v6.16b,v7.16b + ld1 {v16.4s},[x3],#16 + add v17.4s,v17.4s,v5.4s +.inst 0x5e2828c5 //sha256su0 v5.16b,v6.16b + orr v2.16b,v0.16b,v0.16b +.inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s +.inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s +.inst 0x5e0460e5 //sha256su1 v5.16b,v7.16b,v4.16b + ld1 {v17.4s},[x3],#16 + add v16.4s,v16.4s,v6.4s +.inst 0x5e2828e6 //sha256su0 v6.16b,v7.16b + orr v2.16b,v0.16b,v0.16b +.inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s +.inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s +.inst 0x5e056086 //sha256su1 v6.16b,v4.16b,v5.16b + ld1 {v16.4s},[x3],#16 + add v17.4s,v17.4s,v7.4s +.inst 0x5e282887 //sha256su0 v7.16b,v4.16b + orr v2.16b,v0.16b,v0.16b +.inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s +.inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s +.inst 0x5e0660a7 //sha256su1 v7.16b,v5.16b,v6.16b + ld1 {v17.4s},[x3],#16 + add v16.4s,v16.4s,v4.4s + orr v2.16b,v0.16b,v0.16b +.inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s +.inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s + + ld1 {v16.4s},[x3],#16 + add v17.4s,v17.4s,v5.4s + orr v2.16b,v0.16b,v0.16b +.inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s +.inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s + + ld1 {v17.4s},[x3] + add v16.4s,v16.4s,v6.4s + sub x3,x3,#64*4-16 // rewind + orr v2.16b,v0.16b,v0.16b +.inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s +.inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s + + add v17.4s,v17.4s,v7.4s + orr v2.16b,v0.16b,v0.16b +.inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s +.inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s + + add v0.4s,v0.4s,v18.4s + add v1.4s,v1.4s,v19.4s + + cbnz x2,.Loop_hw + + st1 {v0.4s,v1.4s},[x0] + + ldr x29,[sp],#16 + ret +.size blst_sha256_block_armv8,.-blst_sha256_block_armv8 +.globl blst_sha256_block_data_order +.type blst_sha256_block_data_order,%function +.align 4 +blst_sha256_block_data_order: + adrp x16,__blst_platform_cap + ldr w16,[x16,#:lo12:__blst_platform_cap] + tst w16,#1 + b.ne .Lv8_entry + + stp x29, x30, [sp, #-16]! + mov x29, sp + sub sp,sp,#16*4 + + adr x16,.LK256 + add x2,x1,x2,lsl#6 // len to point at the end of inp + + ld1 {v0.16b},[x1], #16 + ld1 {v1.16b},[x1], #16 + ld1 {v2.16b},[x1], #16 + ld1 {v3.16b},[x1], #16 + ld1 {v4.4s},[x16], #16 + ld1 {v5.4s},[x16], #16 + ld1 {v6.4s},[x16], #16 + ld1 {v7.4s},[x16], #16 + rev32 v0.16b,v0.16b // yes, even on + rev32 v1.16b,v1.16b // big-endian + rev32 v2.16b,v2.16b + rev32 v3.16b,v3.16b + mov x17,sp + add v4.4s,v4.4s,v0.4s + add v5.4s,v5.4s,v1.4s + add v6.4s,v6.4s,v2.4s + st1 {v4.4s,v5.4s},[x17], #32 + add v7.4s,v7.4s,v3.4s + st1 {v6.4s,v7.4s},[x17] + sub x17,x17,#32 + + ldp w3,w4,[x0] + ldp w5,w6,[x0,#8] + ldp w7,w8,[x0,#16] + ldp w9,w10,[x0,#24] + ldr w12,[sp,#0] + mov w13,wzr + eor w14,w4,w5 + mov w15,wzr + b .L_00_48 + +.align 4 +.L_00_48: + ext v4.16b,v0.16b,v1.16b,#4 + add w10,w10,w12 + add w3,w3,w15 + and w12,w8,w7 + bic w15,w9,w7 + ext v7.16b,v2.16b,v3.16b,#4 + eor w11,w7,w7,ror#5 + add w3,w3,w13 + mov d19,v3.d[1] + orr w12,w12,w15 + eor w11,w11,w7,ror#19 + ushr v6.4s,v4.4s,#7 + eor w15,w3,w3,ror#11 + ushr v5.4s,v4.4s,#3 + add w10,w10,w12 + add v0.4s,v0.4s,v7.4s + ror w11,w11,#6 + sli v6.4s,v4.4s,#25 + eor w13,w3,w4 + eor w15,w15,w3,ror#20 + ushr v7.4s,v4.4s,#18 + add w10,w10,w11 + ldr w12,[sp,#4] + and w14,w14,w13 + eor v5.16b,v5.16b,v6.16b + ror w15,w15,#2 + add w6,w6,w10 + sli v7.4s,v4.4s,#14 + eor w14,w14,w4 + ushr v16.4s,v19.4s,#17 + add w9,w9,w12 + add w10,w10,w15 + and w12,w7,w6 + eor v5.16b,v5.16b,v7.16b + bic w15,w8,w6 + eor w11,w6,w6,ror#5 + sli v16.4s,v19.4s,#15 + add w10,w10,w14 + orr w12,w12,w15 + ushr v17.4s,v19.4s,#10 + eor w11,w11,w6,ror#19 + eor w15,w10,w10,ror#11 + ushr v7.4s,v19.4s,#19 + add w9,w9,w12 + ror w11,w11,#6 + add v0.4s,v0.4s,v5.4s + eor w14,w10,w3 + eor w15,w15,w10,ror#20 + sli v7.4s,v19.4s,#13 + add w9,w9,w11 + ldr w12,[sp,#8] + and w13,w13,w14 + eor v17.16b,v17.16b,v16.16b + ror w15,w15,#2 + add w5,w5,w9 + eor w13,w13,w3 + eor v17.16b,v17.16b,v7.16b + add w8,w8,w12 + add w9,w9,w15 + and w12,w6,w5 + add v0.4s,v0.4s,v17.4s + bic w15,w7,w5 + eor w11,w5,w5,ror#5 + add w9,w9,w13 + ushr v18.4s,v0.4s,#17 + orr w12,w12,w15 + ushr v19.4s,v0.4s,#10 + eor w11,w11,w5,ror#19 + eor w15,w9,w9,ror#11 + sli v18.4s,v0.4s,#15 + add w8,w8,w12 + ushr v17.4s,v0.4s,#19 + ror w11,w11,#6 + eor w13,w9,w10 + eor v19.16b,v19.16b,v18.16b + eor w15,w15,w9,ror#20 + add w8,w8,w11 + sli v17.4s,v0.4s,#13 + ldr w12,[sp,#12] + and w14,w14,w13 + ror w15,w15,#2 + ld1 {v4.4s},[x16], #16 + add w4,w4,w8 + eor v19.16b,v19.16b,v17.16b + eor w14,w14,w10 + eor v17.16b,v17.16b,v17.16b + add w7,w7,w12 + add w8,w8,w15 + and w12,w5,w4 + mov v17.d[1],v19.d[0] + bic w15,w6,w4 + eor w11,w4,w4,ror#5 + add w8,w8,w14 + add v0.4s,v0.4s,v17.4s + orr w12,w12,w15 + eor w11,w11,w4,ror#19 + eor w15,w8,w8,ror#11 + add v4.4s,v4.4s,v0.4s + add w7,w7,w12 + ror w11,w11,#6 + eor w14,w8,w9 + eor w15,w15,w8,ror#20 + add w7,w7,w11 + ldr w12,[sp,#16] + and w13,w13,w14 + ror w15,w15,#2 + add w3,w3,w7 + eor w13,w13,w9 + st1 {v4.4s},[x17], #16 + ext v4.16b,v1.16b,v2.16b,#4 + add w6,w6,w12 + add w7,w7,w15 + and w12,w4,w3 + bic w15,w5,w3 + ext v7.16b,v3.16b,v0.16b,#4 + eor w11,w3,w3,ror#5 + add w7,w7,w13 + mov d19,v0.d[1] + orr w12,w12,w15 + eor w11,w11,w3,ror#19 + ushr v6.4s,v4.4s,#7 + eor w15,w7,w7,ror#11 + ushr v5.4s,v4.4s,#3 + add w6,w6,w12 + add v1.4s,v1.4s,v7.4s + ror w11,w11,#6 + sli v6.4s,v4.4s,#25 + eor w13,w7,w8 + eor w15,w15,w7,ror#20 + ushr v7.4s,v4.4s,#18 + add w6,w6,w11 + ldr w12,[sp,#20] + and w14,w14,w13 + eor v5.16b,v5.16b,v6.16b + ror w15,w15,#2 + add w10,w10,w6 + sli v7.4s,v4.4s,#14 + eor w14,w14,w8 + ushr v16.4s,v19.4s,#17 + add w5,w5,w12 + add w6,w6,w15 + and w12,w3,w10 + eor v5.16b,v5.16b,v7.16b + bic w15,w4,w10 + eor w11,w10,w10,ror#5 + sli v16.4s,v19.4s,#15 + add w6,w6,w14 + orr w12,w12,w15 + ushr v17.4s,v19.4s,#10 + eor w11,w11,w10,ror#19 + eor w15,w6,w6,ror#11 + ushr v7.4s,v19.4s,#19 + add w5,w5,w12 + ror w11,w11,#6 + add v1.4s,v1.4s,v5.4s + eor w14,w6,w7 + eor w15,w15,w6,ror#20 + sli v7.4s,v19.4s,#13 + add w5,w5,w11 + ldr w12,[sp,#24] + and w13,w13,w14 + eor v17.16b,v17.16b,v16.16b + ror w15,w15,#2 + add w9,w9,w5 + eor w13,w13,w7 + eor v17.16b,v17.16b,v7.16b + add w4,w4,w12 + add w5,w5,w15 + and w12,w10,w9 + add v1.4s,v1.4s,v17.4s + bic w15,w3,w9 + eor w11,w9,w9,ror#5 + add w5,w5,w13 + ushr v18.4s,v1.4s,#17 + orr w12,w12,w15 + ushr v19.4s,v1.4s,#10 + eor w11,w11,w9,ror#19 + eor w15,w5,w5,ror#11 + sli v18.4s,v1.4s,#15 + add w4,w4,w12 + ushr v17.4s,v1.4s,#19 + ror w11,w11,#6 + eor w13,w5,w6 + eor v19.16b,v19.16b,v18.16b + eor w15,w15,w5,ror#20 + add w4,w4,w11 + sli v17.4s,v1.4s,#13 + ldr w12,[sp,#28] + and w14,w14,w13 + ror w15,w15,#2 + ld1 {v4.4s},[x16], #16 + add w8,w8,w4 + eor v19.16b,v19.16b,v17.16b + eor w14,w14,w6 + eor v17.16b,v17.16b,v17.16b + add w3,w3,w12 + add w4,w4,w15 + and w12,w9,w8 + mov v17.d[1],v19.d[0] + bic w15,w10,w8 + eor w11,w8,w8,ror#5 + add w4,w4,w14 + add v1.4s,v1.4s,v17.4s + orr w12,w12,w15 + eor w11,w11,w8,ror#19 + eor w15,w4,w4,ror#11 + add v4.4s,v4.4s,v1.4s + add w3,w3,w12 + ror w11,w11,#6 + eor w14,w4,w5 + eor w15,w15,w4,ror#20 + add w3,w3,w11 + ldr w12,[sp,#32] + and w13,w13,w14 + ror w15,w15,#2 + add w7,w7,w3 + eor w13,w13,w5 + st1 {v4.4s},[x17], #16 + ext v4.16b,v2.16b,v3.16b,#4 + add w10,w10,w12 + add w3,w3,w15 + and w12,w8,w7 + bic w15,w9,w7 + ext v7.16b,v0.16b,v1.16b,#4 + eor w11,w7,w7,ror#5 + add w3,w3,w13 + mov d19,v1.d[1] + orr w12,w12,w15 + eor w11,w11,w7,ror#19 + ushr v6.4s,v4.4s,#7 + eor w15,w3,w3,ror#11 + ushr v5.4s,v4.4s,#3 + add w10,w10,w12 + add v2.4s,v2.4s,v7.4s + ror w11,w11,#6 + sli v6.4s,v4.4s,#25 + eor w13,w3,w4 + eor w15,w15,w3,ror#20 + ushr v7.4s,v4.4s,#18 + add w10,w10,w11 + ldr w12,[sp,#36] + and w14,w14,w13 + eor v5.16b,v5.16b,v6.16b + ror w15,w15,#2 + add w6,w6,w10 + sli v7.4s,v4.4s,#14 + eor w14,w14,w4 + ushr v16.4s,v19.4s,#17 + add w9,w9,w12 + add w10,w10,w15 + and w12,w7,w6 + eor v5.16b,v5.16b,v7.16b + bic w15,w8,w6 + eor w11,w6,w6,ror#5 + sli v16.4s,v19.4s,#15 + add w10,w10,w14 + orr w12,w12,w15 + ushr v17.4s,v19.4s,#10 + eor w11,w11,w6,ror#19 + eor w15,w10,w10,ror#11 + ushr v7.4s,v19.4s,#19 + add w9,w9,w12 + ror w11,w11,#6 + add v2.4s,v2.4s,v5.4s + eor w14,w10,w3 + eor w15,w15,w10,ror#20 + sli v7.4s,v19.4s,#13 + add w9,w9,w11 + ldr w12,[sp,#40] + and w13,w13,w14 + eor v17.16b,v17.16b,v16.16b + ror w15,w15,#2 + add w5,w5,w9 + eor w13,w13,w3 + eor v17.16b,v17.16b,v7.16b + add w8,w8,w12 + add w9,w9,w15 + and w12,w6,w5 + add v2.4s,v2.4s,v17.4s + bic w15,w7,w5 + eor w11,w5,w5,ror#5 + add w9,w9,w13 + ushr v18.4s,v2.4s,#17 + orr w12,w12,w15 + ushr v19.4s,v2.4s,#10 + eor w11,w11,w5,ror#19 + eor w15,w9,w9,ror#11 + sli v18.4s,v2.4s,#15 + add w8,w8,w12 + ushr v17.4s,v2.4s,#19 + ror w11,w11,#6 + eor w13,w9,w10 + eor v19.16b,v19.16b,v18.16b + eor w15,w15,w9,ror#20 + add w8,w8,w11 + sli v17.4s,v2.4s,#13 + ldr w12,[sp,#44] + and w14,w14,w13 + ror w15,w15,#2 + ld1 {v4.4s},[x16], #16 + add w4,w4,w8 + eor v19.16b,v19.16b,v17.16b + eor w14,w14,w10 + eor v17.16b,v17.16b,v17.16b + add w7,w7,w12 + add w8,w8,w15 + and w12,w5,w4 + mov v17.d[1],v19.d[0] + bic w15,w6,w4 + eor w11,w4,w4,ror#5 + add w8,w8,w14 + add v2.4s,v2.4s,v17.4s + orr w12,w12,w15 + eor w11,w11,w4,ror#19 + eor w15,w8,w8,ror#11 + add v4.4s,v4.4s,v2.4s + add w7,w7,w12 + ror w11,w11,#6 + eor w14,w8,w9 + eor w15,w15,w8,ror#20 + add w7,w7,w11 + ldr w12,[sp,#48] + and w13,w13,w14 + ror w15,w15,#2 + add w3,w3,w7 + eor w13,w13,w9 + st1 {v4.4s},[x17], #16 + ext v4.16b,v3.16b,v0.16b,#4 + add w6,w6,w12 + add w7,w7,w15 + and w12,w4,w3 + bic w15,w5,w3 + ext v7.16b,v1.16b,v2.16b,#4 + eor w11,w3,w3,ror#5 + add w7,w7,w13 + mov d19,v2.d[1] + orr w12,w12,w15 + eor w11,w11,w3,ror#19 + ushr v6.4s,v4.4s,#7 + eor w15,w7,w7,ror#11 + ushr v5.4s,v4.4s,#3 + add w6,w6,w12 + add v3.4s,v3.4s,v7.4s + ror w11,w11,#6 + sli v6.4s,v4.4s,#25 + eor w13,w7,w8 + eor w15,w15,w7,ror#20 + ushr v7.4s,v4.4s,#18 + add w6,w6,w11 + ldr w12,[sp,#52] + and w14,w14,w13 + eor v5.16b,v5.16b,v6.16b + ror w15,w15,#2 + add w10,w10,w6 + sli v7.4s,v4.4s,#14 + eor w14,w14,w8 + ushr v16.4s,v19.4s,#17 + add w5,w5,w12 + add w6,w6,w15 + and w12,w3,w10 + eor v5.16b,v5.16b,v7.16b + bic w15,w4,w10 + eor w11,w10,w10,ror#5 + sli v16.4s,v19.4s,#15 + add w6,w6,w14 + orr w12,w12,w15 + ushr v17.4s,v19.4s,#10 + eor w11,w11,w10,ror#19 + eor w15,w6,w6,ror#11 + ushr v7.4s,v19.4s,#19 + add w5,w5,w12 + ror w11,w11,#6 + add v3.4s,v3.4s,v5.4s + eor w14,w6,w7 + eor w15,w15,w6,ror#20 + sli v7.4s,v19.4s,#13 + add w5,w5,w11 + ldr w12,[sp,#56] + and w13,w13,w14 + eor v17.16b,v17.16b,v16.16b + ror w15,w15,#2 + add w9,w9,w5 + eor w13,w13,w7 + eor v17.16b,v17.16b,v7.16b + add w4,w4,w12 + add w5,w5,w15 + and w12,w10,w9 + add v3.4s,v3.4s,v17.4s + bic w15,w3,w9 + eor w11,w9,w9,ror#5 + add w5,w5,w13 + ushr v18.4s,v3.4s,#17 + orr w12,w12,w15 + ushr v19.4s,v3.4s,#10 + eor w11,w11,w9,ror#19 + eor w15,w5,w5,ror#11 + sli v18.4s,v3.4s,#15 + add w4,w4,w12 + ushr v17.4s,v3.4s,#19 + ror w11,w11,#6 + eor w13,w5,w6 + eor v19.16b,v19.16b,v18.16b + eor w15,w15,w5,ror#20 + add w4,w4,w11 + sli v17.4s,v3.4s,#13 + ldr w12,[sp,#60] + and w14,w14,w13 + ror w15,w15,#2 + ld1 {v4.4s},[x16], #16 + add w8,w8,w4 + eor v19.16b,v19.16b,v17.16b + eor w14,w14,w6 + eor v17.16b,v17.16b,v17.16b + add w3,w3,w12 + add w4,w4,w15 + and w12,w9,w8 + mov v17.d[1],v19.d[0] + bic w15,w10,w8 + eor w11,w8,w8,ror#5 + add w4,w4,w14 + add v3.4s,v3.4s,v17.4s + orr w12,w12,w15 + eor w11,w11,w8,ror#19 + eor w15,w4,w4,ror#11 + add v4.4s,v4.4s,v3.4s + add w3,w3,w12 + ror w11,w11,#6 + eor w14,w4,w5 + eor w15,w15,w4,ror#20 + add w3,w3,w11 + ldr w12,[x16] + and w13,w13,w14 + ror w15,w15,#2 + add w7,w7,w3 + eor w13,w13,w5 + st1 {v4.4s},[x17], #16 + cmp w12,#0 // check for K256 terminator + ldr w12,[sp,#0] + sub x17,x17,#64 + bne .L_00_48 + + sub x16,x16,#256 // rewind x16 + cmp x1,x2 + mov x17, #64 + csel x17, x17, xzr, eq + sub x1,x1,x17 // avoid SEGV + mov x17,sp + add w10,w10,w12 + add w3,w3,w15 + and w12,w8,w7 + ld1 {v0.16b},[x1],#16 + bic w15,w9,w7 + eor w11,w7,w7,ror#5 + ld1 {v4.4s},[x16],#16 + add w3,w3,w13 + orr w12,w12,w15 + eor w11,w11,w7,ror#19 + eor w15,w3,w3,ror#11 + rev32 v0.16b,v0.16b + add w10,w10,w12 + ror w11,w11,#6 + eor w13,w3,w4 + eor w15,w15,w3,ror#20 + add v4.4s,v4.4s,v0.4s + add w10,w10,w11 + ldr w12,[sp,#4] + and w14,w14,w13 + ror w15,w15,#2 + add w6,w6,w10 + eor w14,w14,w4 + add w9,w9,w12 + add w10,w10,w15 + and w12,w7,w6 + bic w15,w8,w6 + eor w11,w6,w6,ror#5 + add w10,w10,w14 + orr w12,w12,w15 + eor w11,w11,w6,ror#19 + eor w15,w10,w10,ror#11 + add w9,w9,w12 + ror w11,w11,#6 + eor w14,w10,w3 + eor w15,w15,w10,ror#20 + add w9,w9,w11 + ldr w12,[sp,#8] + and w13,w13,w14 + ror w15,w15,#2 + add w5,w5,w9 + eor w13,w13,w3 + add w8,w8,w12 + add w9,w9,w15 + and w12,w6,w5 + bic w15,w7,w5 + eor w11,w5,w5,ror#5 + add w9,w9,w13 + orr w12,w12,w15 + eor w11,w11,w5,ror#19 + eor w15,w9,w9,ror#11 + add w8,w8,w12 + ror w11,w11,#6 + eor w13,w9,w10 + eor w15,w15,w9,ror#20 + add w8,w8,w11 + ldr w12,[sp,#12] + and w14,w14,w13 + ror w15,w15,#2 + add w4,w4,w8 + eor w14,w14,w10 + add w7,w7,w12 + add w8,w8,w15 + and w12,w5,w4 + bic w15,w6,w4 + eor w11,w4,w4,ror#5 + add w8,w8,w14 + orr w12,w12,w15 + eor w11,w11,w4,ror#19 + eor w15,w8,w8,ror#11 + add w7,w7,w12 + ror w11,w11,#6 + eor w14,w8,w9 + eor w15,w15,w8,ror#20 + add w7,w7,w11 + ldr w12,[sp,#16] + and w13,w13,w14 + ror w15,w15,#2 + add w3,w3,w7 + eor w13,w13,w9 + st1 {v4.4s},[x17], #16 + add w6,w6,w12 + add w7,w7,w15 + and w12,w4,w3 + ld1 {v1.16b},[x1],#16 + bic w15,w5,w3 + eor w11,w3,w3,ror#5 + ld1 {v4.4s},[x16],#16 + add w7,w7,w13 + orr w12,w12,w15 + eor w11,w11,w3,ror#19 + eor w15,w7,w7,ror#11 + rev32 v1.16b,v1.16b + add w6,w6,w12 + ror w11,w11,#6 + eor w13,w7,w8 + eor w15,w15,w7,ror#20 + add v4.4s,v4.4s,v1.4s + add w6,w6,w11 + ldr w12,[sp,#20] + and w14,w14,w13 + ror w15,w15,#2 + add w10,w10,w6 + eor w14,w14,w8 + add w5,w5,w12 + add w6,w6,w15 + and w12,w3,w10 + bic w15,w4,w10 + eor w11,w10,w10,ror#5 + add w6,w6,w14 + orr w12,w12,w15 + eor w11,w11,w10,ror#19 + eor w15,w6,w6,ror#11 + add w5,w5,w12 + ror w11,w11,#6 + eor w14,w6,w7 + eor w15,w15,w6,ror#20 + add w5,w5,w11 + ldr w12,[sp,#24] + and w13,w13,w14 + ror w15,w15,#2 + add w9,w9,w5 + eor w13,w13,w7 + add w4,w4,w12 + add w5,w5,w15 + and w12,w10,w9 + bic w15,w3,w9 + eor w11,w9,w9,ror#5 + add w5,w5,w13 + orr w12,w12,w15 + eor w11,w11,w9,ror#19 + eor w15,w5,w5,ror#11 + add w4,w4,w12 + ror w11,w11,#6 + eor w13,w5,w6 + eor w15,w15,w5,ror#20 + add w4,w4,w11 + ldr w12,[sp,#28] + and w14,w14,w13 + ror w15,w15,#2 + add w8,w8,w4 + eor w14,w14,w6 + add w3,w3,w12 + add w4,w4,w15 + and w12,w9,w8 + bic w15,w10,w8 + eor w11,w8,w8,ror#5 + add w4,w4,w14 + orr w12,w12,w15 + eor w11,w11,w8,ror#19 + eor w15,w4,w4,ror#11 + add w3,w3,w12 + ror w11,w11,#6 + eor w14,w4,w5 + eor w15,w15,w4,ror#20 + add w3,w3,w11 + ldr w12,[sp,#32] + and w13,w13,w14 + ror w15,w15,#2 + add w7,w7,w3 + eor w13,w13,w5 + st1 {v4.4s},[x17], #16 + add w10,w10,w12 + add w3,w3,w15 + and w12,w8,w7 + ld1 {v2.16b},[x1],#16 + bic w15,w9,w7 + eor w11,w7,w7,ror#5 + ld1 {v4.4s},[x16],#16 + add w3,w3,w13 + orr w12,w12,w15 + eor w11,w11,w7,ror#19 + eor w15,w3,w3,ror#11 + rev32 v2.16b,v2.16b + add w10,w10,w12 + ror w11,w11,#6 + eor w13,w3,w4 + eor w15,w15,w3,ror#20 + add v4.4s,v4.4s,v2.4s + add w10,w10,w11 + ldr w12,[sp,#36] + and w14,w14,w13 + ror w15,w15,#2 + add w6,w6,w10 + eor w14,w14,w4 + add w9,w9,w12 + add w10,w10,w15 + and w12,w7,w6 + bic w15,w8,w6 + eor w11,w6,w6,ror#5 + add w10,w10,w14 + orr w12,w12,w15 + eor w11,w11,w6,ror#19 + eor w15,w10,w10,ror#11 + add w9,w9,w12 + ror w11,w11,#6 + eor w14,w10,w3 + eor w15,w15,w10,ror#20 + add w9,w9,w11 + ldr w12,[sp,#40] + and w13,w13,w14 + ror w15,w15,#2 + add w5,w5,w9 + eor w13,w13,w3 + add w8,w8,w12 + add w9,w9,w15 + and w12,w6,w5 + bic w15,w7,w5 + eor w11,w5,w5,ror#5 + add w9,w9,w13 + orr w12,w12,w15 + eor w11,w11,w5,ror#19 + eor w15,w9,w9,ror#11 + add w8,w8,w12 + ror w11,w11,#6 + eor w13,w9,w10 + eor w15,w15,w9,ror#20 + add w8,w8,w11 + ldr w12,[sp,#44] + and w14,w14,w13 + ror w15,w15,#2 + add w4,w4,w8 + eor w14,w14,w10 + add w7,w7,w12 + add w8,w8,w15 + and w12,w5,w4 + bic w15,w6,w4 + eor w11,w4,w4,ror#5 + add w8,w8,w14 + orr w12,w12,w15 + eor w11,w11,w4,ror#19 + eor w15,w8,w8,ror#11 + add w7,w7,w12 + ror w11,w11,#6 + eor w14,w8,w9 + eor w15,w15,w8,ror#20 + add w7,w7,w11 + ldr w12,[sp,#48] + and w13,w13,w14 + ror w15,w15,#2 + add w3,w3,w7 + eor w13,w13,w9 + st1 {v4.4s},[x17], #16 + add w6,w6,w12 + add w7,w7,w15 + and w12,w4,w3 + ld1 {v3.16b},[x1],#16 + bic w15,w5,w3 + eor w11,w3,w3,ror#5 + ld1 {v4.4s},[x16],#16 + add w7,w7,w13 + orr w12,w12,w15 + eor w11,w11,w3,ror#19 + eor w15,w7,w7,ror#11 + rev32 v3.16b,v3.16b + add w6,w6,w12 + ror w11,w11,#6 + eor w13,w7,w8 + eor w15,w15,w7,ror#20 + add v4.4s,v4.4s,v3.4s + add w6,w6,w11 + ldr w12,[sp,#52] + and w14,w14,w13 + ror w15,w15,#2 + add w10,w10,w6 + eor w14,w14,w8 + add w5,w5,w12 + add w6,w6,w15 + and w12,w3,w10 + bic w15,w4,w10 + eor w11,w10,w10,ror#5 + add w6,w6,w14 + orr w12,w12,w15 + eor w11,w11,w10,ror#19 + eor w15,w6,w6,ror#11 + add w5,w5,w12 + ror w11,w11,#6 + eor w14,w6,w7 + eor w15,w15,w6,ror#20 + add w5,w5,w11 + ldr w12,[sp,#56] + and w13,w13,w14 + ror w15,w15,#2 + add w9,w9,w5 + eor w13,w13,w7 + add w4,w4,w12 + add w5,w5,w15 + and w12,w10,w9 + bic w15,w3,w9 + eor w11,w9,w9,ror#5 + add w5,w5,w13 + orr w12,w12,w15 + eor w11,w11,w9,ror#19 + eor w15,w5,w5,ror#11 + add w4,w4,w12 + ror w11,w11,#6 + eor w13,w5,w6 + eor w15,w15,w5,ror#20 + add w4,w4,w11 + ldr w12,[sp,#60] + and w14,w14,w13 + ror w15,w15,#2 + add w8,w8,w4 + eor w14,w14,w6 + add w3,w3,w12 + add w4,w4,w15 + and w12,w9,w8 + bic w15,w10,w8 + eor w11,w8,w8,ror#5 + add w4,w4,w14 + orr w12,w12,w15 + eor w11,w11,w8,ror#19 + eor w15,w4,w4,ror#11 + add w3,w3,w12 + ror w11,w11,#6 + eor w14,w4,w5 + eor w15,w15,w4,ror#20 + add w3,w3,w11 + and w13,w13,w14 + ror w15,w15,#2 + add w7,w7,w3 + eor w13,w13,w5 + st1 {v4.4s},[x17], #16 + add w3,w3,w15 // h+=Sigma0(a) from the past + ldp w11,w12,[x0,#0] + add w3,w3,w13 // h+=Maj(a,b,c) from the past + ldp w13,w14,[x0,#8] + add w3,w3,w11 // accumulate + add w4,w4,w12 + ldp w11,w12,[x0,#16] + add w5,w5,w13 + add w6,w6,w14 + ldp w13,w14,[x0,#24] + add w7,w7,w11 + add w8,w8,w12 + ldr w12,[sp,#0] + stp w3,w4,[x0,#0] + add w9,w9,w13 + mov w13,wzr + stp w5,w6,[x0,#8] + add w10,w10,w14 + stp w7,w8,[x0,#16] + eor w14,w4,w5 + stp w9,w10,[x0,#24] + mov w15,wzr + mov x17,sp + b.ne .L_00_48 + + ldr x29,[x29] + add sp,sp,#16*4+16 + ret +.size blst_sha256_block_data_order,.-blst_sha256_block_data_order +.globl blst_sha256_emit +.hidden blst_sha256_emit +.type blst_sha256_emit,%function +.align 4 +blst_sha256_emit: + ldp x4,x5,[x1] + ldp x6,x7,[x1,#16] +#ifndef __AARCH64EB__ + rev x4,x4 + rev x5,x5 + rev x6,x6 + rev x7,x7 +#endif + str w4,[x0,#4] + lsr x4,x4,#32 + str w5,[x0,#12] + lsr x5,x5,#32 + str w6,[x0,#20] + lsr x6,x6,#32 + str w7,[x0,#28] + lsr x7,x7,#32 + str w4,[x0,#0] + str w5,[x0,#8] + str w6,[x0,#16] + str w7,[x0,#24] + ret +.size blst_sha256_emit,.-blst_sha256_emit + +.globl blst_sha256_bcopy +.hidden blst_sha256_bcopy +.type blst_sha256_bcopy,%function +.align 4 +blst_sha256_bcopy: +.Loop_bcopy: + ldrb w3,[x1],#1 + sub x2,x2,#1 + strb w3,[x0],#1 + cbnz x2,.Loop_bcopy + ret +.size blst_sha256_bcopy,.-blst_sha256_bcopy + +.globl blst_sha256_hcopy +.hidden blst_sha256_hcopy +.type blst_sha256_hcopy,%function +.align 4 +blst_sha256_hcopy: + ldp x4,x5,[x1] + ldp x6,x7,[x1,#16] + stp x4,x5,[x0] + stp x6,x7,[x0,#16] + ret +.size blst_sha256_hcopy,.-blst_sha256_hcopy diff --git a/crypto/blst_src/build/elf/sha256-portable-x86_64.s b/crypto/blst_src/build/elf/sha256-portable-x86_64.s new file mode 100644 index 00000000000..2fd6a770917 --- /dev/null +++ b/crypto/blst_src/build/elf/sha256-portable-x86_64.s @@ -0,0 +1,1758 @@ +.comm __blst_platform_cap,4 +.text + +.globl blst_sha256_block_data_order +.type blst_sha256_block_data_order,@function +.align 16 +blst_sha256_block_data_order: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + movq %rsp,%rbp +.cfi_def_cfa_register %rbp +#ifdef __BLST_PORTABLE__ + testl $2,__blst_platform_cap(%rip) + jnz .Lblst_sha256_block_data_order$2 +#endif + pushq %rbx +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_offset %r15,-56 + shlq $4,%rdx + subq $64+24,%rsp + +.cfi_def_cfa %rsp,144 + + leaq (%rsi,%rdx,4),%rdx + movq %rdi,64+0(%rsp) + movq %rsi,64+8(%rsp) + movq %rdx,64+16(%rsp) + + movl 0(%rdi),%eax + movl 4(%rdi),%ebx + movl 8(%rdi),%ecx + movl 12(%rdi),%edx + movl 16(%rdi),%r8d + movl 20(%rdi),%r9d + movl 24(%rdi),%r10d + movl 28(%rdi),%r11d + jmp .Lloop + +.align 16 +.Lloop: + movl %ebx,%edi + leaq K256(%rip),%rbp + xorl %ecx,%edi + movl 0(%rsi),%r12d + movl %r8d,%r13d + movl %eax,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r9d,%r15d + + xorl %r8d,%r13d + rorl $9,%r14d + xorl %r10d,%r15d + + movl %r12d,0(%rsp) + xorl %eax,%r14d + andl %r8d,%r15d + + rorl $5,%r13d + addl %r11d,%r12d + xorl %r10d,%r15d + + rorl $11,%r14d + xorl %r8d,%r13d + addl %r15d,%r12d + + movl %eax,%r15d + addl 0(%rbp),%r12d + xorl %eax,%r14d + + xorl %ebx,%r15d + rorl $6,%r13d + movl %ebx,%r11d + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%r11d + addl %r12d,%edx + addl %r12d,%r11d + addl %r14d,%r11d + movl 4(%rsi),%r12d + movl %edx,%r13d + movl %r11d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r8d,%edi + + xorl %edx,%r13d + rorl $9,%r14d + xorl %r9d,%edi + + movl %r12d,4(%rsp) + xorl %r11d,%r14d + andl %edx,%edi + + rorl $5,%r13d + addl %r10d,%r12d + xorl %r9d,%edi + + rorl $11,%r14d + xorl %edx,%r13d + addl %edi,%r12d + + movl %r11d,%edi + addl 4(%rbp),%r12d + xorl %r11d,%r14d + + xorl %eax,%edi + rorl $6,%r13d + movl %eax,%r10d + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%r10d + addl %r12d,%ecx + addl %r12d,%r10d + addl %r14d,%r10d + movl 8(%rsi),%r12d + movl %ecx,%r13d + movl %r10d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %edx,%r15d + + xorl %ecx,%r13d + rorl $9,%r14d + xorl %r8d,%r15d + + movl %r12d,8(%rsp) + xorl %r10d,%r14d + andl %ecx,%r15d + + rorl $5,%r13d + addl %r9d,%r12d + xorl %r8d,%r15d + + rorl $11,%r14d + xorl %ecx,%r13d + addl %r15d,%r12d + + movl %r10d,%r15d + addl 8(%rbp),%r12d + xorl %r10d,%r14d + + xorl %r11d,%r15d + rorl $6,%r13d + movl %r11d,%r9d + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%r9d + addl %r12d,%ebx + addl %r12d,%r9d + addl %r14d,%r9d + movl 12(%rsi),%r12d + movl %ebx,%r13d + movl %r9d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %ecx,%edi + + xorl %ebx,%r13d + rorl $9,%r14d + xorl %edx,%edi + + movl %r12d,12(%rsp) + xorl %r9d,%r14d + andl %ebx,%edi + + rorl $5,%r13d + addl %r8d,%r12d + xorl %edx,%edi + + rorl $11,%r14d + xorl %ebx,%r13d + addl %edi,%r12d + + movl %r9d,%edi + addl 12(%rbp),%r12d + xorl %r9d,%r14d + + xorl %r10d,%edi + rorl $6,%r13d + movl %r10d,%r8d + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%r8d + addl %r12d,%eax + addl %r12d,%r8d + addl %r14d,%r8d + movl 16(%rsi),%r12d + movl %eax,%r13d + movl %r8d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %ebx,%r15d + + xorl %eax,%r13d + rorl $9,%r14d + xorl %ecx,%r15d + + movl %r12d,16(%rsp) + xorl %r8d,%r14d + andl %eax,%r15d + + rorl $5,%r13d + addl %edx,%r12d + xorl %ecx,%r15d + + rorl $11,%r14d + xorl %eax,%r13d + addl %r15d,%r12d + + movl %r8d,%r15d + addl 16(%rbp),%r12d + xorl %r8d,%r14d + + xorl %r9d,%r15d + rorl $6,%r13d + movl %r9d,%edx + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%edx + addl %r12d,%r11d + addl %r12d,%edx + addl %r14d,%edx + movl 20(%rsi),%r12d + movl %r11d,%r13d + movl %edx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %eax,%edi + + xorl %r11d,%r13d + rorl $9,%r14d + xorl %ebx,%edi + + movl %r12d,20(%rsp) + xorl %edx,%r14d + andl %r11d,%edi + + rorl $5,%r13d + addl %ecx,%r12d + xorl %ebx,%edi + + rorl $11,%r14d + xorl %r11d,%r13d + addl %edi,%r12d + + movl %edx,%edi + addl 20(%rbp),%r12d + xorl %edx,%r14d + + xorl %r8d,%edi + rorl $6,%r13d + movl %r8d,%ecx + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%ecx + addl %r12d,%r10d + addl %r12d,%ecx + addl %r14d,%ecx + movl 24(%rsi),%r12d + movl %r10d,%r13d + movl %ecx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r11d,%r15d + + xorl %r10d,%r13d + rorl $9,%r14d + xorl %eax,%r15d + + movl %r12d,24(%rsp) + xorl %ecx,%r14d + andl %r10d,%r15d + + rorl $5,%r13d + addl %ebx,%r12d + xorl %eax,%r15d + + rorl $11,%r14d + xorl %r10d,%r13d + addl %r15d,%r12d + + movl %ecx,%r15d + addl 24(%rbp),%r12d + xorl %ecx,%r14d + + xorl %edx,%r15d + rorl $6,%r13d + movl %edx,%ebx + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%ebx + addl %r12d,%r9d + addl %r12d,%ebx + addl %r14d,%ebx + movl 28(%rsi),%r12d + movl %r9d,%r13d + movl %ebx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r10d,%edi + + xorl %r9d,%r13d + rorl $9,%r14d + xorl %r11d,%edi + + movl %r12d,28(%rsp) + xorl %ebx,%r14d + andl %r9d,%edi + + rorl $5,%r13d + addl %eax,%r12d + xorl %r11d,%edi + + rorl $11,%r14d + xorl %r9d,%r13d + addl %edi,%r12d + + movl %ebx,%edi + addl 28(%rbp),%r12d + xorl %ebx,%r14d + + xorl %ecx,%edi + rorl $6,%r13d + movl %ecx,%eax + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%eax + addl %r12d,%r8d + addl %r12d,%eax + addl %r14d,%eax + movl 32(%rsi),%r12d + movl %r8d,%r13d + movl %eax,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r9d,%r15d + + xorl %r8d,%r13d + rorl $9,%r14d + xorl %r10d,%r15d + + movl %r12d,32(%rsp) + xorl %eax,%r14d + andl %r8d,%r15d + + rorl $5,%r13d + addl %r11d,%r12d + xorl %r10d,%r15d + + rorl $11,%r14d + xorl %r8d,%r13d + addl %r15d,%r12d + + movl %eax,%r15d + addl 32(%rbp),%r12d + xorl %eax,%r14d + + xorl %ebx,%r15d + rorl $6,%r13d + movl %ebx,%r11d + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%r11d + addl %r12d,%edx + addl %r12d,%r11d + addl %r14d,%r11d + movl 36(%rsi),%r12d + movl %edx,%r13d + movl %r11d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r8d,%edi + + xorl %edx,%r13d + rorl $9,%r14d + xorl %r9d,%edi + + movl %r12d,36(%rsp) + xorl %r11d,%r14d + andl %edx,%edi + + rorl $5,%r13d + addl %r10d,%r12d + xorl %r9d,%edi + + rorl $11,%r14d + xorl %edx,%r13d + addl %edi,%r12d + + movl %r11d,%edi + addl 36(%rbp),%r12d + xorl %r11d,%r14d + + xorl %eax,%edi + rorl $6,%r13d + movl %eax,%r10d + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%r10d + addl %r12d,%ecx + addl %r12d,%r10d + addl %r14d,%r10d + movl 40(%rsi),%r12d + movl %ecx,%r13d + movl %r10d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %edx,%r15d + + xorl %ecx,%r13d + rorl $9,%r14d + xorl %r8d,%r15d + + movl %r12d,40(%rsp) + xorl %r10d,%r14d + andl %ecx,%r15d + + rorl $5,%r13d + addl %r9d,%r12d + xorl %r8d,%r15d + + rorl $11,%r14d + xorl %ecx,%r13d + addl %r15d,%r12d + + movl %r10d,%r15d + addl 40(%rbp),%r12d + xorl %r10d,%r14d + + xorl %r11d,%r15d + rorl $6,%r13d + movl %r11d,%r9d + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%r9d + addl %r12d,%ebx + addl %r12d,%r9d + addl %r14d,%r9d + movl 44(%rsi),%r12d + movl %ebx,%r13d + movl %r9d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %ecx,%edi + + xorl %ebx,%r13d + rorl $9,%r14d + xorl %edx,%edi + + movl %r12d,44(%rsp) + xorl %r9d,%r14d + andl %ebx,%edi + + rorl $5,%r13d + addl %r8d,%r12d + xorl %edx,%edi + + rorl $11,%r14d + xorl %ebx,%r13d + addl %edi,%r12d + + movl %r9d,%edi + addl 44(%rbp),%r12d + xorl %r9d,%r14d + + xorl %r10d,%edi + rorl $6,%r13d + movl %r10d,%r8d + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%r8d + addl %r12d,%eax + addl %r12d,%r8d + addl %r14d,%r8d + movl 48(%rsi),%r12d + movl %eax,%r13d + movl %r8d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %ebx,%r15d + + xorl %eax,%r13d + rorl $9,%r14d + xorl %ecx,%r15d + + movl %r12d,48(%rsp) + xorl %r8d,%r14d + andl %eax,%r15d + + rorl $5,%r13d + addl %edx,%r12d + xorl %ecx,%r15d + + rorl $11,%r14d + xorl %eax,%r13d + addl %r15d,%r12d + + movl %r8d,%r15d + addl 48(%rbp),%r12d + xorl %r8d,%r14d + + xorl %r9d,%r15d + rorl $6,%r13d + movl %r9d,%edx + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%edx + addl %r12d,%r11d + addl %r12d,%edx + addl %r14d,%edx + movl 52(%rsi),%r12d + movl %r11d,%r13d + movl %edx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %eax,%edi + + xorl %r11d,%r13d + rorl $9,%r14d + xorl %ebx,%edi + + movl %r12d,52(%rsp) + xorl %edx,%r14d + andl %r11d,%edi + + rorl $5,%r13d + addl %ecx,%r12d + xorl %ebx,%edi + + rorl $11,%r14d + xorl %r11d,%r13d + addl %edi,%r12d + + movl %edx,%edi + addl 52(%rbp),%r12d + xorl %edx,%r14d + + xorl %r8d,%edi + rorl $6,%r13d + movl %r8d,%ecx + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%ecx + addl %r12d,%r10d + addl %r12d,%ecx + addl %r14d,%ecx + movl 56(%rsi),%r12d + movl %r10d,%r13d + movl %ecx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r11d,%r15d + + xorl %r10d,%r13d + rorl $9,%r14d + xorl %eax,%r15d + + movl %r12d,56(%rsp) + xorl %ecx,%r14d + andl %r10d,%r15d + + rorl $5,%r13d + addl %ebx,%r12d + xorl %eax,%r15d + + rorl $11,%r14d + xorl %r10d,%r13d + addl %r15d,%r12d + + movl %ecx,%r15d + addl 56(%rbp),%r12d + xorl %ecx,%r14d + + xorl %edx,%r15d + rorl $6,%r13d + movl %edx,%ebx + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%ebx + addl %r12d,%r9d + addl %r12d,%ebx + addl %r14d,%ebx + movl 60(%rsi),%r12d + movl %r9d,%r13d + movl %ebx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r10d,%edi + + xorl %r9d,%r13d + rorl $9,%r14d + xorl %r11d,%edi + + movl %r12d,60(%rsp) + xorl %ebx,%r14d + andl %r9d,%edi + + rorl $5,%r13d + addl %eax,%r12d + xorl %r11d,%edi + + rorl $11,%r14d + xorl %r9d,%r13d + addl %edi,%r12d + + movl %ebx,%edi + addl 60(%rbp),%r12d + xorl %ebx,%r14d + + xorl %ecx,%edi + rorl $6,%r13d + movl %ecx,%eax + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%eax + addl %r12d,%r8d + addl %r12d,%eax + jmp .Lrounds_16_xx +.align 16 +.Lrounds_16_xx: + movl 4(%rsp),%r13d + movl 56(%rsp),%r15d + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%eax + movl %r15d,%r14d + rorl $2,%r15d + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 36(%rsp),%r12d + + addl 0(%rsp),%r12d + movl %r8d,%r13d + addl %r15d,%r12d + movl %eax,%r14d + rorl $14,%r13d + movl %r9d,%r15d + + xorl %r8d,%r13d + rorl $9,%r14d + xorl %r10d,%r15d + + movl %r12d,0(%rsp) + xorl %eax,%r14d + andl %r8d,%r15d + + rorl $5,%r13d + addl %r11d,%r12d + xorl %r10d,%r15d + + rorl $11,%r14d + xorl %r8d,%r13d + addl %r15d,%r12d + + movl %eax,%r15d + addl 64(%rbp),%r12d + xorl %eax,%r14d + + xorl %ebx,%r15d + rorl $6,%r13d + movl %ebx,%r11d + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%r11d + addl %r12d,%edx + addl %r12d,%r11d + movl 8(%rsp),%r13d + movl 60(%rsp),%edi + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r11d + movl %edi,%r14d + rorl $2,%edi + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi + shrl $10,%r14d + + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 40(%rsp),%r12d + + addl 4(%rsp),%r12d + movl %edx,%r13d + addl %edi,%r12d + movl %r11d,%r14d + rorl $14,%r13d + movl %r8d,%edi + + xorl %edx,%r13d + rorl $9,%r14d + xorl %r9d,%edi + + movl %r12d,4(%rsp) + xorl %r11d,%r14d + andl %edx,%edi + + rorl $5,%r13d + addl %r10d,%r12d + xorl %r9d,%edi + + rorl $11,%r14d + xorl %edx,%r13d + addl %edi,%r12d + + movl %r11d,%edi + addl 68(%rbp),%r12d + xorl %r11d,%r14d + + xorl %eax,%edi + rorl $6,%r13d + movl %eax,%r10d + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%r10d + addl %r12d,%ecx + addl %r12d,%r10d + movl 12(%rsp),%r13d + movl 0(%rsp),%r15d + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r10d + movl %r15d,%r14d + rorl $2,%r15d + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 44(%rsp),%r12d + + addl 8(%rsp),%r12d + movl %ecx,%r13d + addl %r15d,%r12d + movl %r10d,%r14d + rorl $14,%r13d + movl %edx,%r15d + + xorl %ecx,%r13d + rorl $9,%r14d + xorl %r8d,%r15d + + movl %r12d,8(%rsp) + xorl %r10d,%r14d + andl %ecx,%r15d + + rorl $5,%r13d + addl %r9d,%r12d + xorl %r8d,%r15d + + rorl $11,%r14d + xorl %ecx,%r13d + addl %r15d,%r12d + + movl %r10d,%r15d + addl 72(%rbp),%r12d + xorl %r10d,%r14d + + xorl %r11d,%r15d + rorl $6,%r13d + movl %r11d,%r9d + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%r9d + addl %r12d,%ebx + addl %r12d,%r9d + movl 16(%rsp),%r13d + movl 4(%rsp),%edi + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r9d + movl %edi,%r14d + rorl $2,%edi + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi + shrl $10,%r14d + + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 48(%rsp),%r12d + + addl 12(%rsp),%r12d + movl %ebx,%r13d + addl %edi,%r12d + movl %r9d,%r14d + rorl $14,%r13d + movl %ecx,%edi + + xorl %ebx,%r13d + rorl $9,%r14d + xorl %edx,%edi + + movl %r12d,12(%rsp) + xorl %r9d,%r14d + andl %ebx,%edi + + rorl $5,%r13d + addl %r8d,%r12d + xorl %edx,%edi + + rorl $11,%r14d + xorl %ebx,%r13d + addl %edi,%r12d + + movl %r9d,%edi + addl 76(%rbp),%r12d + xorl %r9d,%r14d + + xorl %r10d,%edi + rorl $6,%r13d + movl %r10d,%r8d + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%r8d + addl %r12d,%eax + addl %r12d,%r8d + movl 20(%rsp),%r13d + movl 8(%rsp),%r15d + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r8d + movl %r15d,%r14d + rorl $2,%r15d + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 52(%rsp),%r12d + + addl 16(%rsp),%r12d + movl %eax,%r13d + addl %r15d,%r12d + movl %r8d,%r14d + rorl $14,%r13d + movl %ebx,%r15d + + xorl %eax,%r13d + rorl $9,%r14d + xorl %ecx,%r15d + + movl %r12d,16(%rsp) + xorl %r8d,%r14d + andl %eax,%r15d + + rorl $5,%r13d + addl %edx,%r12d + xorl %ecx,%r15d + + rorl $11,%r14d + xorl %eax,%r13d + addl %r15d,%r12d + + movl %r8d,%r15d + addl 80(%rbp),%r12d + xorl %r8d,%r14d + + xorl %r9d,%r15d + rorl $6,%r13d + movl %r9d,%edx + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%edx + addl %r12d,%r11d + addl %r12d,%edx + movl 24(%rsp),%r13d + movl 12(%rsp),%edi + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%edx + movl %edi,%r14d + rorl $2,%edi + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi + shrl $10,%r14d + + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 56(%rsp),%r12d + + addl 20(%rsp),%r12d + movl %r11d,%r13d + addl %edi,%r12d + movl %edx,%r14d + rorl $14,%r13d + movl %eax,%edi + + xorl %r11d,%r13d + rorl $9,%r14d + xorl %ebx,%edi + + movl %r12d,20(%rsp) + xorl %edx,%r14d + andl %r11d,%edi + + rorl $5,%r13d + addl %ecx,%r12d + xorl %ebx,%edi + + rorl $11,%r14d + xorl %r11d,%r13d + addl %edi,%r12d + + movl %edx,%edi + addl 84(%rbp),%r12d + xorl %edx,%r14d + + xorl %r8d,%edi + rorl $6,%r13d + movl %r8d,%ecx + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%ecx + addl %r12d,%r10d + addl %r12d,%ecx + movl 28(%rsp),%r13d + movl 16(%rsp),%r15d + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%ecx + movl %r15d,%r14d + rorl $2,%r15d + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 60(%rsp),%r12d + + addl 24(%rsp),%r12d + movl %r10d,%r13d + addl %r15d,%r12d + movl %ecx,%r14d + rorl $14,%r13d + movl %r11d,%r15d + + xorl %r10d,%r13d + rorl $9,%r14d + xorl %eax,%r15d + + movl %r12d,24(%rsp) + xorl %ecx,%r14d + andl %r10d,%r15d + + rorl $5,%r13d + addl %ebx,%r12d + xorl %eax,%r15d + + rorl $11,%r14d + xorl %r10d,%r13d + addl %r15d,%r12d + + movl %ecx,%r15d + addl 88(%rbp),%r12d + xorl %ecx,%r14d + + xorl %edx,%r15d + rorl $6,%r13d + movl %edx,%ebx + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%ebx + addl %r12d,%r9d + addl %r12d,%ebx + movl 32(%rsp),%r13d + movl 20(%rsp),%edi + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%ebx + movl %edi,%r14d + rorl $2,%edi + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi + shrl $10,%r14d + + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 0(%rsp),%r12d + + addl 28(%rsp),%r12d + movl %r9d,%r13d + addl %edi,%r12d + movl %ebx,%r14d + rorl $14,%r13d + movl %r10d,%edi + + xorl %r9d,%r13d + rorl $9,%r14d + xorl %r11d,%edi + + movl %r12d,28(%rsp) + xorl %ebx,%r14d + andl %r9d,%edi + + rorl $5,%r13d + addl %eax,%r12d + xorl %r11d,%edi + + rorl $11,%r14d + xorl %r9d,%r13d + addl %edi,%r12d + + movl %ebx,%edi + addl 92(%rbp),%r12d + xorl %ebx,%r14d + + xorl %ecx,%edi + rorl $6,%r13d + movl %ecx,%eax + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%eax + addl %r12d,%r8d + addl %r12d,%eax + movl 36(%rsp),%r13d + movl 24(%rsp),%r15d + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%eax + movl %r15d,%r14d + rorl $2,%r15d + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 4(%rsp),%r12d + + addl 32(%rsp),%r12d + movl %r8d,%r13d + addl %r15d,%r12d + movl %eax,%r14d + rorl $14,%r13d + movl %r9d,%r15d + + xorl %r8d,%r13d + rorl $9,%r14d + xorl %r10d,%r15d + + movl %r12d,32(%rsp) + xorl %eax,%r14d + andl %r8d,%r15d + + rorl $5,%r13d + addl %r11d,%r12d + xorl %r10d,%r15d + + rorl $11,%r14d + xorl %r8d,%r13d + addl %r15d,%r12d + + movl %eax,%r15d + addl 96(%rbp),%r12d + xorl %eax,%r14d + + xorl %ebx,%r15d + rorl $6,%r13d + movl %ebx,%r11d + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%r11d + addl %r12d,%edx + addl %r12d,%r11d + movl 40(%rsp),%r13d + movl 28(%rsp),%edi + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r11d + movl %edi,%r14d + rorl $2,%edi + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi + shrl $10,%r14d + + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 8(%rsp),%r12d + + addl 36(%rsp),%r12d + movl %edx,%r13d + addl %edi,%r12d + movl %r11d,%r14d + rorl $14,%r13d + movl %r8d,%edi + + xorl %edx,%r13d + rorl $9,%r14d + xorl %r9d,%edi + + movl %r12d,36(%rsp) + xorl %r11d,%r14d + andl %edx,%edi + + rorl $5,%r13d + addl %r10d,%r12d + xorl %r9d,%edi + + rorl $11,%r14d + xorl %edx,%r13d + addl %edi,%r12d + + movl %r11d,%edi + addl 100(%rbp),%r12d + xorl %r11d,%r14d + + xorl %eax,%edi + rorl $6,%r13d + movl %eax,%r10d + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%r10d + addl %r12d,%ecx + addl %r12d,%r10d + movl 44(%rsp),%r13d + movl 32(%rsp),%r15d + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r10d + movl %r15d,%r14d + rorl $2,%r15d + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 12(%rsp),%r12d + + addl 40(%rsp),%r12d + movl %ecx,%r13d + addl %r15d,%r12d + movl %r10d,%r14d + rorl $14,%r13d + movl %edx,%r15d + + xorl %ecx,%r13d + rorl $9,%r14d + xorl %r8d,%r15d + + movl %r12d,40(%rsp) + xorl %r10d,%r14d + andl %ecx,%r15d + + rorl $5,%r13d + addl %r9d,%r12d + xorl %r8d,%r15d + + rorl $11,%r14d + xorl %ecx,%r13d + addl %r15d,%r12d + + movl %r10d,%r15d + addl 104(%rbp),%r12d + xorl %r10d,%r14d + + xorl %r11d,%r15d + rorl $6,%r13d + movl %r11d,%r9d + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%r9d + addl %r12d,%ebx + addl %r12d,%r9d + movl 48(%rsp),%r13d + movl 36(%rsp),%edi + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r9d + movl %edi,%r14d + rorl $2,%edi + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi + shrl $10,%r14d + + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 16(%rsp),%r12d + + addl 44(%rsp),%r12d + movl %ebx,%r13d + addl %edi,%r12d + movl %r9d,%r14d + rorl $14,%r13d + movl %ecx,%edi + + xorl %ebx,%r13d + rorl $9,%r14d + xorl %edx,%edi + + movl %r12d,44(%rsp) + xorl %r9d,%r14d + andl %ebx,%edi + + rorl $5,%r13d + addl %r8d,%r12d + xorl %edx,%edi + + rorl $11,%r14d + xorl %ebx,%r13d + addl %edi,%r12d + + movl %r9d,%edi + addl 108(%rbp),%r12d + xorl %r9d,%r14d + + xorl %r10d,%edi + rorl $6,%r13d + movl %r10d,%r8d + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%r8d + addl %r12d,%eax + addl %r12d,%r8d + movl 52(%rsp),%r13d + movl 40(%rsp),%r15d + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r8d + movl %r15d,%r14d + rorl $2,%r15d + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 20(%rsp),%r12d + + addl 48(%rsp),%r12d + movl %eax,%r13d + addl %r15d,%r12d + movl %r8d,%r14d + rorl $14,%r13d + movl %ebx,%r15d + + xorl %eax,%r13d + rorl $9,%r14d + xorl %ecx,%r15d + + movl %r12d,48(%rsp) + xorl %r8d,%r14d + andl %eax,%r15d + + rorl $5,%r13d + addl %edx,%r12d + xorl %ecx,%r15d + + rorl $11,%r14d + xorl %eax,%r13d + addl %r15d,%r12d + + movl %r8d,%r15d + addl 112(%rbp),%r12d + xorl %r8d,%r14d + + xorl %r9d,%r15d + rorl $6,%r13d + movl %r9d,%edx + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%edx + addl %r12d,%r11d + addl %r12d,%edx + movl 56(%rsp),%r13d + movl 44(%rsp),%edi + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%edx + movl %edi,%r14d + rorl $2,%edi + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi + shrl $10,%r14d + + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 24(%rsp),%r12d + + addl 52(%rsp),%r12d + movl %r11d,%r13d + addl %edi,%r12d + movl %edx,%r14d + rorl $14,%r13d + movl %eax,%edi + + xorl %r11d,%r13d + rorl $9,%r14d + xorl %ebx,%edi + + movl %r12d,52(%rsp) + xorl %edx,%r14d + andl %r11d,%edi + + rorl $5,%r13d + addl %ecx,%r12d + xorl %ebx,%edi + + rorl $11,%r14d + xorl %r11d,%r13d + addl %edi,%r12d + + movl %edx,%edi + addl 116(%rbp),%r12d + xorl %edx,%r14d + + xorl %r8d,%edi + rorl $6,%r13d + movl %r8d,%ecx + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%ecx + addl %r12d,%r10d + addl %r12d,%ecx + movl 60(%rsp),%r13d + movl 48(%rsp),%r15d + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%ecx + movl %r15d,%r14d + rorl $2,%r15d + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 28(%rsp),%r12d + + addl 56(%rsp),%r12d + movl %r10d,%r13d + addl %r15d,%r12d + movl %ecx,%r14d + rorl $14,%r13d + movl %r11d,%r15d + + xorl %r10d,%r13d + rorl $9,%r14d + xorl %eax,%r15d + + movl %r12d,56(%rsp) + xorl %ecx,%r14d + andl %r10d,%r15d + + rorl $5,%r13d + addl %ebx,%r12d + xorl %eax,%r15d + + rorl $11,%r14d + xorl %r10d,%r13d + addl %r15d,%r12d + + movl %ecx,%r15d + addl 120(%rbp),%r12d + xorl %ecx,%r14d + + xorl %edx,%r15d + rorl $6,%r13d + movl %edx,%ebx + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%ebx + addl %r12d,%r9d + addl %r12d,%ebx + movl 0(%rsp),%r13d + movl 52(%rsp),%edi + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%ebx + movl %edi,%r14d + rorl $2,%edi + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi + shrl $10,%r14d + + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 32(%rsp),%r12d + + addl 60(%rsp),%r12d + movl %r9d,%r13d + addl %edi,%r12d + movl %ebx,%r14d + rorl $14,%r13d + movl %r10d,%edi + + xorl %r9d,%r13d + rorl $9,%r14d + xorl %r11d,%edi + + movl %r12d,60(%rsp) + xorl %ebx,%r14d + andl %r9d,%edi + + rorl $5,%r13d + addl %eax,%r12d + xorl %r11d,%edi + + rorl $11,%r14d + xorl %r9d,%r13d + addl %edi,%r12d + + movl %ebx,%edi + addl 124(%rbp),%r12d + xorl %ebx,%r14d + + xorl %ecx,%edi + rorl $6,%r13d + movl %ecx,%eax + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%eax + addl %r12d,%r8d + addl %r12d,%eax + leaq 64(%rbp),%rbp + cmpb $0x19,3(%rbp) + jnz .Lrounds_16_xx + + movq 64+0(%rsp),%rdi + addl %r14d,%eax + leaq 64(%rsi),%rsi + + addl 0(%rdi),%eax + addl 4(%rdi),%ebx + addl 8(%rdi),%ecx + addl 12(%rdi),%edx + addl 16(%rdi),%r8d + addl 20(%rdi),%r9d + addl 24(%rdi),%r10d + addl 28(%rdi),%r11d + + cmpq 64+16(%rsp),%rsi + + movl %eax,0(%rdi) + movl %ebx,4(%rdi) + movl %ecx,8(%rdi) + movl %edx,12(%rdi) + movl %r8d,16(%rdi) + movl %r9d,20(%rdi) + movl %r10d,24(%rdi) + movl %r11d,28(%rdi) + jb .Lloop + + leaq 64+24+48(%rsp),%r11 +.cfi_def_cfa %r11,8 + movq 64+24(%rsp),%r15 + movq -40(%r11),%r14 + movq -32(%r11),%r13 + movq -24(%r11),%r12 + movq -16(%r11),%rbx + movq -8(%r11),%rbp +.cfi_restore %r12 +.cfi_restore %r13 +.cfi_restore %r14 +.cfi_restore %r15 +.cfi_restore %rbp +.cfi_restore %rbx + leaq (%r11),%rsp + .byte 0xf3,0xc3 +.cfi_endproc +.size blst_sha256_block_data_order,.-blst_sha256_block_data_order + +#ifndef __BLST_PORTABLE__ +.align 64 +.type K256,@object +K256: +.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 +.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 +.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 +.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 +.long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc +.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da +.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 +.long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 +.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 +.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 +.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 +.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 +.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 +.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 +.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 +.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 + +.byte 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,64,100,111,116,45,97,115,109,0 +.globl blst_sha256_emit +.hidden blst_sha256_emit +.type blst_sha256_emit,@function +.align 16 +blst_sha256_emit: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + bswapq %r8 + movq 24(%rsi),%r11 + bswapq %r9 + movl %r8d,4(%rdi) + bswapq %r10 + movl %r9d,12(%rdi) + bswapq %r11 + movl %r10d,20(%rdi) + shrq $32,%r8 + movl %r11d,28(%rdi) + shrq $32,%r9 + movl %r8d,0(%rdi) + shrq $32,%r10 + movl %r9d,8(%rdi) + shrq $32,%r11 + movl %r10d,16(%rdi) + movl %r11d,24(%rdi) + .byte 0xf3,0xc3 +.cfi_endproc +.size blst_sha256_emit,.-blst_sha256_emit + +.globl blst_sha256_bcopy +.hidden blst_sha256_bcopy +.type blst_sha256_bcopy,@function +.align 16 +blst_sha256_bcopy: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + subq %rsi,%rdi +.Loop_bcopy: + movzbl (%rsi),%eax + leaq 1(%rsi),%rsi + movb %al,-1(%rdi,%rsi,1) + decq %rdx + jnz .Loop_bcopy + .byte 0xf3,0xc3 +.cfi_endproc +.size blst_sha256_bcopy,.-blst_sha256_bcopy + +.globl blst_sha256_hcopy +.hidden blst_sha256_hcopy +.type blst_sha256_hcopy,@function +.align 16 +blst_sha256_hcopy: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq %r8,0(%rdi) + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + .byte 0xf3,0xc3 +.cfi_endproc +.size blst_sha256_hcopy,.-blst_sha256_hcopy +#endif + +.section .note.GNU-stack,"",@progbits +.section .note.gnu.property,"a",@note + .long 4,2f-1f,5 + .byte 0x47,0x4E,0x55,0 +1: .long 0xc0000002,4,3 +.align 8 +2: diff --git a/crypto/blst_src/build/elf/sha256-x86_64.s b/crypto/blst_src/build/elf/sha256-x86_64.s new file mode 100644 index 00000000000..940051aab16 --- /dev/null +++ b/crypto/blst_src/build/elf/sha256-x86_64.s @@ -0,0 +1,1455 @@ +.comm __blst_platform_cap,4 +.text + +.align 64 +.type K256,@object +K256: +.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 +.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 +.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 +.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 +.long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc +.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da +.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 +.long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 +.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 +.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 +.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 +.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 +.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 +.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 +.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 +.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 + +.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f +.long 0x03020100,0x0b0a0908,0xffffffff,0xffffffff +.long 0xffffffff,0xffffffff,0x03020100,0x0b0a0908 +.byte 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,64,100,111,116,45,97,115,109,0 +.globl blst_sha256_block_data_order_shaext +.hidden blst_sha256_block_data_order_shaext +.type blst_sha256_block_data_order_shaext,@function +.align 64 +blst_sha256_block_data_order_shaext: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + movq %rsp,%rbp +.cfi_def_cfa_register %rbp +.Lblst_sha256_block_data_order$2: + + leaq K256+128(%rip),%rcx + movdqu (%rdi),%xmm1 + movdqu 16(%rdi),%xmm2 + movdqa 256-128(%rcx),%xmm7 + + pshufd $0x1b,%xmm1,%xmm0 + pshufd $0xb1,%xmm1,%xmm1 + pshufd $0x1b,%xmm2,%xmm2 + movdqa %xmm7,%xmm8 +.byte 102,15,58,15,202,8 + punpcklqdq %xmm0,%xmm2 + jmp .Loop_shaext + +.align 16 +.Loop_shaext: + movdqu (%rsi),%xmm3 + movdqu 16(%rsi),%xmm4 + movdqu 32(%rsi),%xmm5 +.byte 102,15,56,0,223 + movdqu 48(%rsi),%xmm6 + + movdqa 0-128(%rcx),%xmm0 + paddd %xmm3,%xmm0 +.byte 102,15,56,0,231 + movdqa %xmm2,%xmm10 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + nop + movdqa %xmm1,%xmm9 +.byte 15,56,203,202 + + movdqa 16-128(%rcx),%xmm0 + paddd %xmm4,%xmm0 +.byte 102,15,56,0,239 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + leaq 64(%rsi),%rsi +.byte 15,56,204,220 +.byte 15,56,203,202 + + movdqa 32-128(%rcx),%xmm0 + paddd %xmm5,%xmm0 +.byte 102,15,56,0,247 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movdqa %xmm6,%xmm7 +.byte 102,15,58,15,253,4 + nop + paddd %xmm7,%xmm3 +.byte 15,56,204,229 +.byte 15,56,203,202 + + movdqa 48-128(%rcx),%xmm0 + paddd %xmm6,%xmm0 +.byte 15,56,205,222 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movdqa %xmm3,%xmm7 +.byte 102,15,58,15,254,4 + nop + paddd %xmm7,%xmm4 +.byte 15,56,204,238 +.byte 15,56,203,202 + movdqa 64-128(%rcx),%xmm0 + paddd %xmm3,%xmm0 +.byte 15,56,205,227 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movdqa %xmm4,%xmm7 +.byte 102,15,58,15,251,4 + nop + paddd %xmm7,%xmm5 +.byte 15,56,204,243 +.byte 15,56,203,202 + movdqa 80-128(%rcx),%xmm0 + paddd %xmm4,%xmm0 +.byte 15,56,205,236 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movdqa %xmm5,%xmm7 +.byte 102,15,58,15,252,4 + nop + paddd %xmm7,%xmm6 +.byte 15,56,204,220 +.byte 15,56,203,202 + movdqa 96-128(%rcx),%xmm0 + paddd %xmm5,%xmm0 +.byte 15,56,205,245 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movdqa %xmm6,%xmm7 +.byte 102,15,58,15,253,4 + nop + paddd %xmm7,%xmm3 +.byte 15,56,204,229 +.byte 15,56,203,202 + movdqa 112-128(%rcx),%xmm0 + paddd %xmm6,%xmm0 +.byte 15,56,205,222 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movdqa %xmm3,%xmm7 +.byte 102,15,58,15,254,4 + nop + paddd %xmm7,%xmm4 +.byte 15,56,204,238 +.byte 15,56,203,202 + movdqa 128-128(%rcx),%xmm0 + paddd %xmm3,%xmm0 +.byte 15,56,205,227 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movdqa %xmm4,%xmm7 +.byte 102,15,58,15,251,4 + nop + paddd %xmm7,%xmm5 +.byte 15,56,204,243 +.byte 15,56,203,202 + movdqa 144-128(%rcx),%xmm0 + paddd %xmm4,%xmm0 +.byte 15,56,205,236 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movdqa %xmm5,%xmm7 +.byte 102,15,58,15,252,4 + nop + paddd %xmm7,%xmm6 +.byte 15,56,204,220 +.byte 15,56,203,202 + movdqa 160-128(%rcx),%xmm0 + paddd %xmm5,%xmm0 +.byte 15,56,205,245 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movdqa %xmm6,%xmm7 +.byte 102,15,58,15,253,4 + nop + paddd %xmm7,%xmm3 +.byte 15,56,204,229 +.byte 15,56,203,202 + movdqa 176-128(%rcx),%xmm0 + paddd %xmm6,%xmm0 +.byte 15,56,205,222 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movdqa %xmm3,%xmm7 +.byte 102,15,58,15,254,4 + nop + paddd %xmm7,%xmm4 +.byte 15,56,204,238 +.byte 15,56,203,202 + movdqa 192-128(%rcx),%xmm0 + paddd %xmm3,%xmm0 +.byte 15,56,205,227 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movdqa %xmm4,%xmm7 +.byte 102,15,58,15,251,4 + nop + paddd %xmm7,%xmm5 +.byte 15,56,204,243 +.byte 15,56,203,202 + movdqa 208-128(%rcx),%xmm0 + paddd %xmm4,%xmm0 +.byte 15,56,205,236 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movdqa %xmm5,%xmm7 +.byte 102,15,58,15,252,4 +.byte 15,56,203,202 + paddd %xmm7,%xmm6 + + movdqa 224-128(%rcx),%xmm0 + paddd %xmm5,%xmm0 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 +.byte 15,56,205,245 + movdqa %xmm8,%xmm7 +.byte 15,56,203,202 + + movdqa 240-128(%rcx),%xmm0 + paddd %xmm6,%xmm0 + nop +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + decq %rdx + nop +.byte 15,56,203,202 + + paddd %xmm10,%xmm2 + paddd %xmm9,%xmm1 + jnz .Loop_shaext + + pshufd $0xb1,%xmm2,%xmm2 + pshufd $0x1b,%xmm1,%xmm7 + pshufd $0xb1,%xmm1,%xmm1 + punpckhqdq %xmm2,%xmm1 +.byte 102,15,58,15,215,8 + + movdqu %xmm1,(%rdi) + movdqu %xmm2,16(%rdi) +.cfi_def_cfa_register %rsp + popq %rbp +.cfi_adjust_cfa_offset -8 +.cfi_restore %rbp + + .byte 0xf3,0xc3 +.cfi_endproc +.size blst_sha256_block_data_order_shaext,.-blst_sha256_block_data_order_shaext +.globl blst_sha256_block_data_order +.hidden blst_sha256_block_data_order +.type blst_sha256_block_data_order,@function +.align 64 +blst_sha256_block_data_order: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + movq %rsp,%rbp +.cfi_def_cfa_register %rbp + testl $2,__blst_platform_cap(%rip) + jnz .Lblst_sha256_block_data_order$2 + pushq %rbx +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_offset %r15,-56 + shlq $4,%rdx + subq $24,%rsp + + leaq (%rsi,%rdx,4),%rdx + movq %rdi,-64(%rbp) + + movq %rdx,-48(%rbp) + + + leaq -64(%rsp),%rsp + movl 0(%rdi),%eax + andq $-64,%rsp + movl 4(%rdi),%ebx + movl 8(%rdi),%ecx + movl 12(%rdi),%edx + movl 16(%rdi),%r8d + movl 20(%rdi),%r9d + movl 24(%rdi),%r10d + movl 28(%rdi),%r11d + + + jmp .Lloop_ssse3 +.align 16 +.Lloop_ssse3: + movdqa K256+256(%rip),%xmm7 + movq %rsi,-56(%rbp) + movdqu 0(%rsi),%xmm0 + movdqu 16(%rsi),%xmm1 + movdqu 32(%rsi),%xmm2 +.byte 102,15,56,0,199 + movdqu 48(%rsi),%xmm3 + leaq K256(%rip),%rsi +.byte 102,15,56,0,207 + movdqa 0(%rsi),%xmm4 + movdqa 16(%rsi),%xmm5 +.byte 102,15,56,0,215 + paddd %xmm0,%xmm4 + movdqa 32(%rsi),%xmm6 +.byte 102,15,56,0,223 + movdqa 48(%rsi),%xmm7 + paddd %xmm1,%xmm5 + paddd %xmm2,%xmm6 + paddd %xmm3,%xmm7 + movdqa %xmm4,0(%rsp) + movl %eax,%r14d + movdqa %xmm5,16(%rsp) + movl %ebx,%edi + movdqa %xmm6,32(%rsp) + xorl %ecx,%edi + movdqa %xmm7,48(%rsp) + movl %r8d,%r13d + jmp .Lssse3_00_47 + +.align 16 +.Lssse3_00_47: + subq $-64,%rsi + rorl $14,%r13d + movdqa %xmm1,%xmm4 + movl %r14d,%eax + movl %r9d,%r12d + movdqa %xmm3,%xmm7 + rorl $9,%r14d + xorl %r8d,%r13d + xorl %r10d,%r12d + rorl $5,%r13d + xorl %eax,%r14d +.byte 102,15,58,15,224,4 + andl %r8d,%r12d + xorl %r8d,%r13d +.byte 102,15,58,15,250,4 + addl 0(%rsp),%r11d + movl %eax,%r15d + xorl %r10d,%r12d + rorl $11,%r14d + movdqa %xmm4,%xmm5 + xorl %ebx,%r15d + addl %r12d,%r11d + movdqa %xmm4,%xmm6 + rorl $6,%r13d + andl %r15d,%edi + psrld $3,%xmm4 + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%edi + paddd %xmm7,%xmm0 + rorl $2,%r14d + addl %r11d,%edx + psrld $7,%xmm6 + addl %edi,%r11d + movl %edx,%r13d + pshufd $250,%xmm3,%xmm7 + addl %r11d,%r14d + rorl $14,%r13d + pslld $14,%xmm5 + movl %r14d,%r11d + movl %r8d,%r12d + pxor %xmm6,%xmm4 + rorl $9,%r14d + xorl %edx,%r13d + xorl %r9d,%r12d + rorl $5,%r13d + psrld $11,%xmm6 + xorl %r11d,%r14d + pxor %xmm5,%xmm4 + andl %edx,%r12d + xorl %edx,%r13d + pslld $11,%xmm5 + addl 4(%rsp),%r10d + movl %r11d,%edi + pxor %xmm6,%xmm4 + xorl %r9d,%r12d + rorl $11,%r14d + movdqa %xmm7,%xmm6 + xorl %eax,%edi + addl %r12d,%r10d + pxor %xmm5,%xmm4 + rorl $6,%r13d + andl %edi,%r15d + xorl %r11d,%r14d + psrld $10,%xmm7 + addl %r13d,%r10d + xorl %eax,%r15d + paddd %xmm4,%xmm0 + rorl $2,%r14d + addl %r10d,%ecx + psrlq $17,%xmm6 + addl %r15d,%r10d + movl %ecx,%r13d + addl %r10d,%r14d + pxor %xmm6,%xmm7 + rorl $14,%r13d + movl %r14d,%r10d + movl %edx,%r12d + rorl $9,%r14d + psrlq $2,%xmm6 + xorl %ecx,%r13d + xorl %r8d,%r12d + pxor %xmm6,%xmm7 + rorl $5,%r13d + xorl %r10d,%r14d + andl %ecx,%r12d + pshufd $128,%xmm7,%xmm7 + xorl %ecx,%r13d + addl 8(%rsp),%r9d + movl %r10d,%r15d + psrldq $8,%xmm7 + xorl %r8d,%r12d + rorl $11,%r14d + xorl %r11d,%r15d + addl %r12d,%r9d + rorl $6,%r13d + paddd %xmm7,%xmm0 + andl %r15d,%edi + xorl %r10d,%r14d + addl %r13d,%r9d + pshufd $80,%xmm0,%xmm7 + xorl %r11d,%edi + rorl $2,%r14d + addl %r9d,%ebx + movdqa %xmm7,%xmm6 + addl %edi,%r9d + movl %ebx,%r13d + psrld $10,%xmm7 + addl %r9d,%r14d + rorl $14,%r13d + psrlq $17,%xmm6 + movl %r14d,%r9d + movl %ecx,%r12d + pxor %xmm6,%xmm7 + rorl $9,%r14d + xorl %ebx,%r13d + xorl %edx,%r12d + rorl $5,%r13d + xorl %r9d,%r14d + psrlq $2,%xmm6 + andl %ebx,%r12d + xorl %ebx,%r13d + addl 12(%rsp),%r8d + pxor %xmm6,%xmm7 + movl %r9d,%edi + xorl %edx,%r12d + rorl $11,%r14d + pshufd $8,%xmm7,%xmm7 + xorl %r10d,%edi + addl %r12d,%r8d + movdqa 0(%rsi),%xmm6 + rorl $6,%r13d + andl %edi,%r15d + pslldq $8,%xmm7 + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + paddd %xmm7,%xmm0 + rorl $2,%r14d + addl %r8d,%eax + addl %r15d,%r8d + paddd %xmm0,%xmm6 + movl %eax,%r13d + addl %r8d,%r14d + movdqa %xmm6,0(%rsp) + rorl $14,%r13d + movdqa %xmm2,%xmm4 + movl %r14d,%r8d + movl %ebx,%r12d + movdqa %xmm0,%xmm7 + rorl $9,%r14d + xorl %eax,%r13d + xorl %ecx,%r12d + rorl $5,%r13d + xorl %r8d,%r14d +.byte 102,15,58,15,225,4 + andl %eax,%r12d + xorl %eax,%r13d +.byte 102,15,58,15,251,4 + addl 16(%rsp),%edx + movl %r8d,%r15d + xorl %ecx,%r12d + rorl $11,%r14d + movdqa %xmm4,%xmm5 + xorl %r9d,%r15d + addl %r12d,%edx + movdqa %xmm4,%xmm6 + rorl $6,%r13d + andl %r15d,%edi + psrld $3,%xmm4 + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%edi + paddd %xmm7,%xmm1 + rorl $2,%r14d + addl %edx,%r11d + psrld $7,%xmm6 + addl %edi,%edx + movl %r11d,%r13d + pshufd $250,%xmm0,%xmm7 + addl %edx,%r14d + rorl $14,%r13d + pslld $14,%xmm5 + movl %r14d,%edx + movl %eax,%r12d + pxor %xmm6,%xmm4 + rorl $9,%r14d + xorl %r11d,%r13d + xorl %ebx,%r12d + rorl $5,%r13d + psrld $11,%xmm6 + xorl %edx,%r14d + pxor %xmm5,%xmm4 + andl %r11d,%r12d + xorl %r11d,%r13d + pslld $11,%xmm5 + addl 20(%rsp),%ecx + movl %edx,%edi + pxor %xmm6,%xmm4 + xorl %ebx,%r12d + rorl $11,%r14d + movdqa %xmm7,%xmm6 + xorl %r8d,%edi + addl %r12d,%ecx + pxor %xmm5,%xmm4 + rorl $6,%r13d + andl %edi,%r15d + xorl %edx,%r14d + psrld $10,%xmm7 + addl %r13d,%ecx + xorl %r8d,%r15d + paddd %xmm4,%xmm1 + rorl $2,%r14d + addl %ecx,%r10d + psrlq $17,%xmm6 + addl %r15d,%ecx + movl %r10d,%r13d + addl %ecx,%r14d + pxor %xmm6,%xmm7 + rorl $14,%r13d + movl %r14d,%ecx + movl %r11d,%r12d + rorl $9,%r14d + psrlq $2,%xmm6 + xorl %r10d,%r13d + xorl %eax,%r12d + pxor %xmm6,%xmm7 + rorl $5,%r13d + xorl %ecx,%r14d + andl %r10d,%r12d + pshufd $128,%xmm7,%xmm7 + xorl %r10d,%r13d + addl 24(%rsp),%ebx + movl %ecx,%r15d + psrldq $8,%xmm7 + xorl %eax,%r12d + rorl $11,%r14d + xorl %edx,%r15d + addl %r12d,%ebx + rorl $6,%r13d + paddd %xmm7,%xmm1 + andl %r15d,%edi + xorl %ecx,%r14d + addl %r13d,%ebx + pshufd $80,%xmm1,%xmm7 + xorl %edx,%edi + rorl $2,%r14d + addl %ebx,%r9d + movdqa %xmm7,%xmm6 + addl %edi,%ebx + movl %r9d,%r13d + psrld $10,%xmm7 + addl %ebx,%r14d + rorl $14,%r13d + psrlq $17,%xmm6 + movl %r14d,%ebx + movl %r10d,%r12d + pxor %xmm6,%xmm7 + rorl $9,%r14d + xorl %r9d,%r13d + xorl %r11d,%r12d + rorl $5,%r13d + xorl %ebx,%r14d + psrlq $2,%xmm6 + andl %r9d,%r12d + xorl %r9d,%r13d + addl 28(%rsp),%eax + pxor %xmm6,%xmm7 + movl %ebx,%edi + xorl %r11d,%r12d + rorl $11,%r14d + pshufd $8,%xmm7,%xmm7 + xorl %ecx,%edi + addl %r12d,%eax + movdqa 16(%rsi),%xmm6 + rorl $6,%r13d + andl %edi,%r15d + pslldq $8,%xmm7 + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + paddd %xmm7,%xmm1 + rorl $2,%r14d + addl %eax,%r8d + addl %r15d,%eax + paddd %xmm1,%xmm6 + movl %r8d,%r13d + addl %eax,%r14d + movdqa %xmm6,16(%rsp) + rorl $14,%r13d + movdqa %xmm3,%xmm4 + movl %r14d,%eax + movl %r9d,%r12d + movdqa %xmm1,%xmm7 + rorl $9,%r14d + xorl %r8d,%r13d + xorl %r10d,%r12d + rorl $5,%r13d + xorl %eax,%r14d +.byte 102,15,58,15,226,4 + andl %r8d,%r12d + xorl %r8d,%r13d +.byte 102,15,58,15,248,4 + addl 32(%rsp),%r11d + movl %eax,%r15d + xorl %r10d,%r12d + rorl $11,%r14d + movdqa %xmm4,%xmm5 + xorl %ebx,%r15d + addl %r12d,%r11d + movdqa %xmm4,%xmm6 + rorl $6,%r13d + andl %r15d,%edi + psrld $3,%xmm4 + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%edi + paddd %xmm7,%xmm2 + rorl $2,%r14d + addl %r11d,%edx + psrld $7,%xmm6 + addl %edi,%r11d + movl %edx,%r13d + pshufd $250,%xmm1,%xmm7 + addl %r11d,%r14d + rorl $14,%r13d + pslld $14,%xmm5 + movl %r14d,%r11d + movl %r8d,%r12d + pxor %xmm6,%xmm4 + rorl $9,%r14d + xorl %edx,%r13d + xorl %r9d,%r12d + rorl $5,%r13d + psrld $11,%xmm6 + xorl %r11d,%r14d + pxor %xmm5,%xmm4 + andl %edx,%r12d + xorl %edx,%r13d + pslld $11,%xmm5 + addl 36(%rsp),%r10d + movl %r11d,%edi + pxor %xmm6,%xmm4 + xorl %r9d,%r12d + rorl $11,%r14d + movdqa %xmm7,%xmm6 + xorl %eax,%edi + addl %r12d,%r10d + pxor %xmm5,%xmm4 + rorl $6,%r13d + andl %edi,%r15d + xorl %r11d,%r14d + psrld $10,%xmm7 + addl %r13d,%r10d + xorl %eax,%r15d + paddd %xmm4,%xmm2 + rorl $2,%r14d + addl %r10d,%ecx + psrlq $17,%xmm6 + addl %r15d,%r10d + movl %ecx,%r13d + addl %r10d,%r14d + pxor %xmm6,%xmm7 + rorl $14,%r13d + movl %r14d,%r10d + movl %edx,%r12d + rorl $9,%r14d + psrlq $2,%xmm6 + xorl %ecx,%r13d + xorl %r8d,%r12d + pxor %xmm6,%xmm7 + rorl $5,%r13d + xorl %r10d,%r14d + andl %ecx,%r12d + pshufd $128,%xmm7,%xmm7 + xorl %ecx,%r13d + addl 40(%rsp),%r9d + movl %r10d,%r15d + psrldq $8,%xmm7 + xorl %r8d,%r12d + rorl $11,%r14d + xorl %r11d,%r15d + addl %r12d,%r9d + rorl $6,%r13d + paddd %xmm7,%xmm2 + andl %r15d,%edi + xorl %r10d,%r14d + addl %r13d,%r9d + pshufd $80,%xmm2,%xmm7 + xorl %r11d,%edi + rorl $2,%r14d + addl %r9d,%ebx + movdqa %xmm7,%xmm6 + addl %edi,%r9d + movl %ebx,%r13d + psrld $10,%xmm7 + addl %r9d,%r14d + rorl $14,%r13d + psrlq $17,%xmm6 + movl %r14d,%r9d + movl %ecx,%r12d + pxor %xmm6,%xmm7 + rorl $9,%r14d + xorl %ebx,%r13d + xorl %edx,%r12d + rorl $5,%r13d + xorl %r9d,%r14d + psrlq $2,%xmm6 + andl %ebx,%r12d + xorl %ebx,%r13d + addl 44(%rsp),%r8d + pxor %xmm6,%xmm7 + movl %r9d,%edi + xorl %edx,%r12d + rorl $11,%r14d + pshufd $8,%xmm7,%xmm7 + xorl %r10d,%edi + addl %r12d,%r8d + movdqa 32(%rsi),%xmm6 + rorl $6,%r13d + andl %edi,%r15d + pslldq $8,%xmm7 + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + paddd %xmm7,%xmm2 + rorl $2,%r14d + addl %r8d,%eax + addl %r15d,%r8d + paddd %xmm2,%xmm6 + movl %eax,%r13d + addl %r8d,%r14d + movdqa %xmm6,32(%rsp) + rorl $14,%r13d + movdqa %xmm0,%xmm4 + movl %r14d,%r8d + movl %ebx,%r12d + movdqa %xmm2,%xmm7 + rorl $9,%r14d + xorl %eax,%r13d + xorl %ecx,%r12d + rorl $5,%r13d + xorl %r8d,%r14d +.byte 102,15,58,15,227,4 + andl %eax,%r12d + xorl %eax,%r13d +.byte 102,15,58,15,249,4 + addl 48(%rsp),%edx + movl %r8d,%r15d + xorl %ecx,%r12d + rorl $11,%r14d + movdqa %xmm4,%xmm5 + xorl %r9d,%r15d + addl %r12d,%edx + movdqa %xmm4,%xmm6 + rorl $6,%r13d + andl %r15d,%edi + psrld $3,%xmm4 + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%edi + paddd %xmm7,%xmm3 + rorl $2,%r14d + addl %edx,%r11d + psrld $7,%xmm6 + addl %edi,%edx + movl %r11d,%r13d + pshufd $250,%xmm2,%xmm7 + addl %edx,%r14d + rorl $14,%r13d + pslld $14,%xmm5 + movl %r14d,%edx + movl %eax,%r12d + pxor %xmm6,%xmm4 + rorl $9,%r14d + xorl %r11d,%r13d + xorl %ebx,%r12d + rorl $5,%r13d + psrld $11,%xmm6 + xorl %edx,%r14d + pxor %xmm5,%xmm4 + andl %r11d,%r12d + xorl %r11d,%r13d + pslld $11,%xmm5 + addl 52(%rsp),%ecx + movl %edx,%edi + pxor %xmm6,%xmm4 + xorl %ebx,%r12d + rorl $11,%r14d + movdqa %xmm7,%xmm6 + xorl %r8d,%edi + addl %r12d,%ecx + pxor %xmm5,%xmm4 + rorl $6,%r13d + andl %edi,%r15d + xorl %edx,%r14d + psrld $10,%xmm7 + addl %r13d,%ecx + xorl %r8d,%r15d + paddd %xmm4,%xmm3 + rorl $2,%r14d + addl %ecx,%r10d + psrlq $17,%xmm6 + addl %r15d,%ecx + movl %r10d,%r13d + addl %ecx,%r14d + pxor %xmm6,%xmm7 + rorl $14,%r13d + movl %r14d,%ecx + movl %r11d,%r12d + rorl $9,%r14d + psrlq $2,%xmm6 + xorl %r10d,%r13d + xorl %eax,%r12d + pxor %xmm6,%xmm7 + rorl $5,%r13d + xorl %ecx,%r14d + andl %r10d,%r12d + pshufd $128,%xmm7,%xmm7 + xorl %r10d,%r13d + addl 56(%rsp),%ebx + movl %ecx,%r15d + psrldq $8,%xmm7 + xorl %eax,%r12d + rorl $11,%r14d + xorl %edx,%r15d + addl %r12d,%ebx + rorl $6,%r13d + paddd %xmm7,%xmm3 + andl %r15d,%edi + xorl %ecx,%r14d + addl %r13d,%ebx + pshufd $80,%xmm3,%xmm7 + xorl %edx,%edi + rorl $2,%r14d + addl %ebx,%r9d + movdqa %xmm7,%xmm6 + addl %edi,%ebx + movl %r9d,%r13d + psrld $10,%xmm7 + addl %ebx,%r14d + rorl $14,%r13d + psrlq $17,%xmm6 + movl %r14d,%ebx + movl %r10d,%r12d + pxor %xmm6,%xmm7 + rorl $9,%r14d + xorl %r9d,%r13d + xorl %r11d,%r12d + rorl $5,%r13d + xorl %ebx,%r14d + psrlq $2,%xmm6 + andl %r9d,%r12d + xorl %r9d,%r13d + addl 60(%rsp),%eax + pxor %xmm6,%xmm7 + movl %ebx,%edi + xorl %r11d,%r12d + rorl $11,%r14d + pshufd $8,%xmm7,%xmm7 + xorl %ecx,%edi + addl %r12d,%eax + movdqa 48(%rsi),%xmm6 + rorl $6,%r13d + andl %edi,%r15d + pslldq $8,%xmm7 + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + paddd %xmm7,%xmm3 + rorl $2,%r14d + addl %eax,%r8d + addl %r15d,%eax + paddd %xmm3,%xmm6 + movl %r8d,%r13d + addl %eax,%r14d + movdqa %xmm6,48(%rsp) + cmpb $0,67(%rsi) + jne .Lssse3_00_47 + rorl $14,%r13d + movl %r14d,%eax + movl %r9d,%r12d + rorl $9,%r14d + xorl %r8d,%r13d + xorl %r10d,%r12d + rorl $5,%r13d + xorl %eax,%r14d + andl %r8d,%r12d + xorl %r8d,%r13d + addl 0(%rsp),%r11d + movl %eax,%r15d + xorl %r10d,%r12d + rorl $11,%r14d + xorl %ebx,%r15d + addl %r12d,%r11d + rorl $6,%r13d + andl %r15d,%edi + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%edi + rorl $2,%r14d + addl %r11d,%edx + addl %edi,%r11d + movl %edx,%r13d + addl %r11d,%r14d + rorl $14,%r13d + movl %r14d,%r11d + movl %r8d,%r12d + rorl $9,%r14d + xorl %edx,%r13d + xorl %r9d,%r12d + rorl $5,%r13d + xorl %r11d,%r14d + andl %edx,%r12d + xorl %edx,%r13d + addl 4(%rsp),%r10d + movl %r11d,%edi + xorl %r9d,%r12d + rorl $11,%r14d + xorl %eax,%edi + addl %r12d,%r10d + rorl $6,%r13d + andl %edi,%r15d + xorl %r11d,%r14d + addl %r13d,%r10d + xorl %eax,%r15d + rorl $2,%r14d + addl %r10d,%ecx + addl %r15d,%r10d + movl %ecx,%r13d + addl %r10d,%r14d + rorl $14,%r13d + movl %r14d,%r10d + movl %edx,%r12d + rorl $9,%r14d + xorl %ecx,%r13d + xorl %r8d,%r12d + rorl $5,%r13d + xorl %r10d,%r14d + andl %ecx,%r12d + xorl %ecx,%r13d + addl 8(%rsp),%r9d + movl %r10d,%r15d + xorl %r8d,%r12d + rorl $11,%r14d + xorl %r11d,%r15d + addl %r12d,%r9d + rorl $6,%r13d + andl %r15d,%edi + xorl %r10d,%r14d + addl %r13d,%r9d + xorl %r11d,%edi + rorl $2,%r14d + addl %r9d,%ebx + addl %edi,%r9d + movl %ebx,%r13d + addl %r9d,%r14d + rorl $14,%r13d + movl %r14d,%r9d + movl %ecx,%r12d + rorl $9,%r14d + xorl %ebx,%r13d + xorl %edx,%r12d + rorl $5,%r13d + xorl %r9d,%r14d + andl %ebx,%r12d + xorl %ebx,%r13d + addl 12(%rsp),%r8d + movl %r9d,%edi + xorl %edx,%r12d + rorl $11,%r14d + xorl %r10d,%edi + addl %r12d,%r8d + rorl $6,%r13d + andl %edi,%r15d + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + rorl $2,%r14d + addl %r8d,%eax + addl %r15d,%r8d + movl %eax,%r13d + addl %r8d,%r14d + rorl $14,%r13d + movl %r14d,%r8d + movl %ebx,%r12d + rorl $9,%r14d + xorl %eax,%r13d + xorl %ecx,%r12d + rorl $5,%r13d + xorl %r8d,%r14d + andl %eax,%r12d + xorl %eax,%r13d + addl 16(%rsp),%edx + movl %r8d,%r15d + xorl %ecx,%r12d + rorl $11,%r14d + xorl %r9d,%r15d + addl %r12d,%edx + rorl $6,%r13d + andl %r15d,%edi + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%edi + rorl $2,%r14d + addl %edx,%r11d + addl %edi,%edx + movl %r11d,%r13d + addl %edx,%r14d + rorl $14,%r13d + movl %r14d,%edx + movl %eax,%r12d + rorl $9,%r14d + xorl %r11d,%r13d + xorl %ebx,%r12d + rorl $5,%r13d + xorl %edx,%r14d + andl %r11d,%r12d + xorl %r11d,%r13d + addl 20(%rsp),%ecx + movl %edx,%edi + xorl %ebx,%r12d + rorl $11,%r14d + xorl %r8d,%edi + addl %r12d,%ecx + rorl $6,%r13d + andl %edi,%r15d + xorl %edx,%r14d + addl %r13d,%ecx + xorl %r8d,%r15d + rorl $2,%r14d + addl %ecx,%r10d + addl %r15d,%ecx + movl %r10d,%r13d + addl %ecx,%r14d + rorl $14,%r13d + movl %r14d,%ecx + movl %r11d,%r12d + rorl $9,%r14d + xorl %r10d,%r13d + xorl %eax,%r12d + rorl $5,%r13d + xorl %ecx,%r14d + andl %r10d,%r12d + xorl %r10d,%r13d + addl 24(%rsp),%ebx + movl %ecx,%r15d + xorl %eax,%r12d + rorl $11,%r14d + xorl %edx,%r15d + addl %r12d,%ebx + rorl $6,%r13d + andl %r15d,%edi + xorl %ecx,%r14d + addl %r13d,%ebx + xorl %edx,%edi + rorl $2,%r14d + addl %ebx,%r9d + addl %edi,%ebx + movl %r9d,%r13d + addl %ebx,%r14d + rorl $14,%r13d + movl %r14d,%ebx + movl %r10d,%r12d + rorl $9,%r14d + xorl %r9d,%r13d + xorl %r11d,%r12d + rorl $5,%r13d + xorl %ebx,%r14d + andl %r9d,%r12d + xorl %r9d,%r13d + addl 28(%rsp),%eax + movl %ebx,%edi + xorl %r11d,%r12d + rorl $11,%r14d + xorl %ecx,%edi + addl %r12d,%eax + rorl $6,%r13d + andl %edi,%r15d + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + rorl $2,%r14d + addl %eax,%r8d + addl %r15d,%eax + movl %r8d,%r13d + addl %eax,%r14d + rorl $14,%r13d + movl %r14d,%eax + movl %r9d,%r12d + rorl $9,%r14d + xorl %r8d,%r13d + xorl %r10d,%r12d + rorl $5,%r13d + xorl %eax,%r14d + andl %r8d,%r12d + xorl %r8d,%r13d + addl 32(%rsp),%r11d + movl %eax,%r15d + xorl %r10d,%r12d + rorl $11,%r14d + xorl %ebx,%r15d + addl %r12d,%r11d + rorl $6,%r13d + andl %r15d,%edi + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%edi + rorl $2,%r14d + addl %r11d,%edx + addl %edi,%r11d + movl %edx,%r13d + addl %r11d,%r14d + rorl $14,%r13d + movl %r14d,%r11d + movl %r8d,%r12d + rorl $9,%r14d + xorl %edx,%r13d + xorl %r9d,%r12d + rorl $5,%r13d + xorl %r11d,%r14d + andl %edx,%r12d + xorl %edx,%r13d + addl 36(%rsp),%r10d + movl %r11d,%edi + xorl %r9d,%r12d + rorl $11,%r14d + xorl %eax,%edi + addl %r12d,%r10d + rorl $6,%r13d + andl %edi,%r15d + xorl %r11d,%r14d + addl %r13d,%r10d + xorl %eax,%r15d + rorl $2,%r14d + addl %r10d,%ecx + addl %r15d,%r10d + movl %ecx,%r13d + addl %r10d,%r14d + rorl $14,%r13d + movl %r14d,%r10d + movl %edx,%r12d + rorl $9,%r14d + xorl %ecx,%r13d + xorl %r8d,%r12d + rorl $5,%r13d + xorl %r10d,%r14d + andl %ecx,%r12d + xorl %ecx,%r13d + addl 40(%rsp),%r9d + movl %r10d,%r15d + xorl %r8d,%r12d + rorl $11,%r14d + xorl %r11d,%r15d + addl %r12d,%r9d + rorl $6,%r13d + andl %r15d,%edi + xorl %r10d,%r14d + addl %r13d,%r9d + xorl %r11d,%edi + rorl $2,%r14d + addl %r9d,%ebx + addl %edi,%r9d + movl %ebx,%r13d + addl %r9d,%r14d + rorl $14,%r13d + movl %r14d,%r9d + movl %ecx,%r12d + rorl $9,%r14d + xorl %ebx,%r13d + xorl %edx,%r12d + rorl $5,%r13d + xorl %r9d,%r14d + andl %ebx,%r12d + xorl %ebx,%r13d + addl 44(%rsp),%r8d + movl %r9d,%edi + xorl %edx,%r12d + rorl $11,%r14d + xorl %r10d,%edi + addl %r12d,%r8d + rorl $6,%r13d + andl %edi,%r15d + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + rorl $2,%r14d + addl %r8d,%eax + addl %r15d,%r8d + movl %eax,%r13d + addl %r8d,%r14d + rorl $14,%r13d + movl %r14d,%r8d + movl %ebx,%r12d + rorl $9,%r14d + xorl %eax,%r13d + xorl %ecx,%r12d + rorl $5,%r13d + xorl %r8d,%r14d + andl %eax,%r12d + xorl %eax,%r13d + addl 48(%rsp),%edx + movl %r8d,%r15d + xorl %ecx,%r12d + rorl $11,%r14d + xorl %r9d,%r15d + addl %r12d,%edx + rorl $6,%r13d + andl %r15d,%edi + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%edi + rorl $2,%r14d + addl %edx,%r11d + addl %edi,%edx + movl %r11d,%r13d + addl %edx,%r14d + rorl $14,%r13d + movl %r14d,%edx + movl %eax,%r12d + rorl $9,%r14d + xorl %r11d,%r13d + xorl %ebx,%r12d + rorl $5,%r13d + xorl %edx,%r14d + andl %r11d,%r12d + xorl %r11d,%r13d + addl 52(%rsp),%ecx + movl %edx,%edi + xorl %ebx,%r12d + rorl $11,%r14d + xorl %r8d,%edi + addl %r12d,%ecx + rorl $6,%r13d + andl %edi,%r15d + xorl %edx,%r14d + addl %r13d,%ecx + xorl %r8d,%r15d + rorl $2,%r14d + addl %ecx,%r10d + addl %r15d,%ecx + movl %r10d,%r13d + addl %ecx,%r14d + rorl $14,%r13d + movl %r14d,%ecx + movl %r11d,%r12d + rorl $9,%r14d + xorl %r10d,%r13d + xorl %eax,%r12d + rorl $5,%r13d + xorl %ecx,%r14d + andl %r10d,%r12d + xorl %r10d,%r13d + addl 56(%rsp),%ebx + movl %ecx,%r15d + xorl %eax,%r12d + rorl $11,%r14d + xorl %edx,%r15d + addl %r12d,%ebx + rorl $6,%r13d + andl %r15d,%edi + xorl %ecx,%r14d + addl %r13d,%ebx + xorl %edx,%edi + rorl $2,%r14d + addl %ebx,%r9d + addl %edi,%ebx + movl %r9d,%r13d + addl %ebx,%r14d + rorl $14,%r13d + movl %r14d,%ebx + movl %r10d,%r12d + rorl $9,%r14d + xorl %r9d,%r13d + xorl %r11d,%r12d + rorl $5,%r13d + xorl %ebx,%r14d + andl %r9d,%r12d + xorl %r9d,%r13d + addl 60(%rsp),%eax + movl %ebx,%edi + xorl %r11d,%r12d + rorl $11,%r14d + xorl %ecx,%edi + addl %r12d,%eax + rorl $6,%r13d + andl %edi,%r15d + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + rorl $2,%r14d + addl %eax,%r8d + addl %r15d,%eax + movl %r8d,%r13d + addl %eax,%r14d + movq -64(%rbp),%rdi + movl %r14d,%eax + movq -56(%rbp),%rsi + + addl 0(%rdi),%eax + addl 4(%rdi),%ebx + addl 8(%rdi),%ecx + addl 12(%rdi),%edx + addl 16(%rdi),%r8d + addl 20(%rdi),%r9d + addl 24(%rdi),%r10d + addl 28(%rdi),%r11d + + leaq 64(%rsi),%rsi + cmpq -48(%rbp),%rsi + + movl %eax,0(%rdi) + movl %ebx,4(%rdi) + movl %ecx,8(%rdi) + movl %edx,12(%rdi) + movl %r8d,16(%rdi) + movl %r9d,20(%rdi) + movl %r10d,24(%rdi) + movl %r11d,28(%rdi) + jb .Lloop_ssse3 + + xorps %xmm0,%xmm0 + movaps %xmm0,0(%rsp) + movaps %xmm0,16(%rsp) + movaps %xmm0,32(%rsp) + movaps %xmm0,48(%rsp) + movq -40(%rbp),%r15 + movq -32(%rbp),%r14 + movq -24(%rbp),%r13 + movq -16(%rbp),%r12 + movq -8(%rbp),%rbx + movq %rbp,%rsp +.cfi_def_cfa_register %rsp + popq %rbp +.cfi_adjust_cfa_offset -8 +.cfi_restore %rbp +.cfi_restore %r12 +.cfi_restore %r13 +.cfi_restore %r14 +.cfi_restore %r15 +.cfi_restore %rbx + .byte 0xf3,0xc3 +.cfi_endproc +.size blst_sha256_block_data_order,.-blst_sha256_block_data_order +.globl blst_sha256_emit +.hidden blst_sha256_emit +.type blst_sha256_emit,@function +.align 16 +blst_sha256_emit: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + bswapq %r8 + movq 24(%rsi),%r11 + bswapq %r9 + movl %r8d,4(%rdi) + bswapq %r10 + movl %r9d,12(%rdi) + bswapq %r11 + movl %r10d,20(%rdi) + shrq $32,%r8 + movl %r11d,28(%rdi) + shrq $32,%r9 + movl %r8d,0(%rdi) + shrq $32,%r10 + movl %r9d,8(%rdi) + shrq $32,%r11 + movl %r10d,16(%rdi) + movl %r11d,24(%rdi) + .byte 0xf3,0xc3 +.cfi_endproc +.size blst_sha256_emit,.-blst_sha256_emit + +.globl blst_sha256_bcopy +.hidden blst_sha256_bcopy +.type blst_sha256_bcopy,@function +.align 16 +blst_sha256_bcopy: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + subq %rsi,%rdi +.Loop_bcopy: + movzbl (%rsi),%eax + leaq 1(%rsi),%rsi + movb %al,-1(%rdi,%rsi,1) + decq %rdx + jnz .Loop_bcopy + .byte 0xf3,0xc3 +.cfi_endproc +.size blst_sha256_bcopy,.-blst_sha256_bcopy + +.globl blst_sha256_hcopy +.hidden blst_sha256_hcopy +.type blst_sha256_hcopy,@function +.align 16 +blst_sha256_hcopy: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq %r8,0(%rdi) + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + .byte 0xf3,0xc3 +.cfi_endproc +.size blst_sha256_hcopy,.-blst_sha256_hcopy + +.section .note.GNU-stack,"",@progbits +.section .note.gnu.property,"a",@note + .long 4,2f-1f,5 + .byte 0x47,0x4E,0x55,0 +1: .long 0xc0000002,4,3 +.align 8 +2: diff --git a/crypto/blst_src/build/mach-o/add_mod_256-armv8.S b/crypto/blst_src/build/mach-o/add_mod_256-armv8.S new file mode 100644 index 00000000000..198d65aef69 --- /dev/null +++ b/crypto/blst_src/build/mach-o/add_mod_256-armv8.S @@ -0,0 +1,379 @@ +.text + +.globl _add_mod_256 +.private_extern _add_mod_256 + +.align 5 +_add_mod_256: + ldp x8,x9,[x1] + ldp x12,x13,[x2] + + ldp x10,x11,[x1,#16] + adds x8,x8,x12 + ldp x14,x15,[x2,#16] + adcs x9,x9,x13 + ldp x4,x5,[x3] + adcs x10,x10,x14 + ldp x6,x7,[x3,#16] + adcs x11,x11,x15 + adc x3,xzr,xzr + + subs x16,x8,x4 + sbcs x17,x9,x5 + sbcs x1,x10,x6 + sbcs x2,x11,x7 + sbcs xzr,x3,xzr + + csel x8,x8,x16,lo + csel x9,x9,x17,lo + csel x10,x10,x1,lo + stp x8,x9,[x0] + csel x11,x11,x2,lo + stp x10,x11,[x0,#16] + + ret + + +.globl _mul_by_3_mod_256 +.private_extern _mul_by_3_mod_256 + +.align 5 +_mul_by_3_mod_256: + ldp x12,x13,[x1] + ldp x14,x15,[x1,#16] + + adds x8,x12,x12 + ldp x4,x5,[x2] + adcs x9,x13,x13 + ldp x6,x7,[x2,#16] + adcs x10,x14,x14 + adcs x11,x15,x15 + adc x3,xzr,xzr + + subs x16,x8,x4 + sbcs x17,x9,x5 + sbcs x1,x10,x6 + sbcs x2,x11,x7 + sbcs xzr,x3,xzr + + csel x8,x8,x16,lo + csel x9,x9,x17,lo + csel x10,x10,x1,lo + csel x11,x11,x2,lo + + adds x8,x8,x12 + adcs x9,x9,x13 + adcs x10,x10,x14 + adcs x11,x11,x15 + adc x3,xzr,xzr + + subs x16,x8,x4 + sbcs x17,x9,x5 + sbcs x1,x10,x6 + sbcs x2,x11,x7 + sbcs xzr,x3,xzr + + csel x8,x8,x16,lo + csel x9,x9,x17,lo + csel x10,x10,x1,lo + stp x8,x9,[x0] + csel x11,x11,x2,lo + stp x10,x11,[x0,#16] + + ret + + +.globl _lshift_mod_256 +.private_extern _lshift_mod_256 + +.align 5 +_lshift_mod_256: + ldp x8,x9,[x1] + ldp x10,x11,[x1,#16] + + ldp x4,x5,[x3] + ldp x6,x7,[x3,#16] + +Loop_lshift_mod_256: + adds x8,x8,x8 + sub x2,x2,#1 + adcs x9,x9,x9 + adcs x10,x10,x10 + adcs x11,x11,x11 + adc x3,xzr,xzr + + subs x12,x8,x4 + sbcs x13,x9,x5 + sbcs x14,x10,x6 + sbcs x15,x11,x7 + sbcs xzr,x3,xzr + + csel x8,x8,x12,lo + csel x9,x9,x13,lo + csel x10,x10,x14,lo + csel x11,x11,x15,lo + + cbnz x2,Loop_lshift_mod_256 + + stp x8,x9,[x0] + stp x10,x11,[x0,#16] + + ret + + +.globl _rshift_mod_256 +.private_extern _rshift_mod_256 + +.align 5 +_rshift_mod_256: + ldp x8,x9,[x1] + ldp x10,x11,[x1,#16] + + ldp x4,x5,[x3] + ldp x6,x7,[x3,#16] + +Loop_rshift: + adds x12,x8,x4 + sub x2,x2,#1 + adcs x13,x9,x5 + adcs x14,x10,x6 + adcs x15,x11,x7 + adc x3,xzr,xzr + tst x8,#1 + + csel x12,x12,x8,ne + csel x13,x13,x9,ne + csel x14,x14,x10,ne + csel x15,x15,x11,ne + csel x3,x3,xzr,ne + + extr x8,x13,x12,#1 + extr x9,x14,x13,#1 + extr x10,x15,x14,#1 + extr x11,x3,x15,#1 + + cbnz x2,Loop_rshift + + stp x8,x9,[x0] + stp x10,x11,[x0,#16] + + ret + + +.globl _cneg_mod_256 +.private_extern _cneg_mod_256 + +.align 5 +_cneg_mod_256: + ldp x8,x9,[x1] + ldp x4,x5,[x3] + + ldp x10,x11,[x1,#16] + subs x12,x4,x8 + ldp x6,x7,[x3,#16] + orr x4,x8,x9 + sbcs x13,x5,x9 + orr x5,x10,x11 + sbcs x14,x6,x10 + orr x3,x4,x5 + sbc x15,x7,x11 + + cmp x3,#0 + csetm x3,ne + ands x2,x2,x3 + + csel x8,x8,x12,eq + csel x9,x9,x13,eq + csel x10,x10,x14,eq + stp x8,x9,[x0] + csel x11,x11,x15,eq + stp x10,x11,[x0,#16] + + ret + + +.globl _sub_mod_256 +.private_extern _sub_mod_256 + +.align 5 +_sub_mod_256: + ldp x8,x9,[x1] + ldp x12,x13,[x2] + + ldp x10,x11,[x1,#16] + subs x8,x8,x12 + ldp x14,x15,[x2,#16] + sbcs x9,x9,x13 + ldp x4,x5,[x3] + sbcs x10,x10,x14 + ldp x6,x7,[x3,#16] + sbcs x11,x11,x15 + sbc x3,xzr,xzr + + and x4,x4,x3 + and x5,x5,x3 + adds x8,x8,x4 + and x6,x6,x3 + adcs x9,x9,x5 + and x7,x7,x3 + adcs x10,x10,x6 + stp x8,x9,[x0] + adc x11,x11,x7 + stp x10,x11,[x0,#16] + + ret + + +.globl _check_mod_256 +.private_extern _check_mod_256 + +.align 5 +_check_mod_256: + ldp x8,x9,[x0] + ldp x10,x11,[x0,#16] + ldp x4,x5,[x1] + ldp x6,x7,[x1,#16] + +#ifdef __AARCH64EB__ + rev x8,x8 + rev x9,x9 + rev x10,x10 + rev x11,x11 +#endif + + subs xzr,x8,x4 + sbcs xzr,x9,x5 + orr x8,x8,x9 + sbcs xzr,x10,x6 + orr x8,x8,x10 + sbcs xzr,x11,x7 + orr x8,x8,x11 + sbc x1,xzr,xzr + + cmp x8,#0 + mov x0,#1 + csel x0,x0,xzr,ne + and x0,x0,x1 + + ret + + +.globl _add_n_check_mod_256 +.private_extern _add_n_check_mod_256 + +.align 5 +_add_n_check_mod_256: + ldp x8,x9,[x1] + ldp x12,x13,[x2] + ldp x10,x11,[x1,#16] + ldp x14,x15,[x2,#16] + +#ifdef __AARCH64EB__ + rev x8,x8 + rev x12,x12 + rev x9,x9 + rev x13,x13 + rev x10,x10 + rev x14,x14 + rev x11,x11 + rev x15,x15 +#endif + + adds x8,x8,x12 + ldp x4,x5,[x3] + adcs x9,x9,x13 + ldp x6,x7,[x3,#16] + adcs x10,x10,x14 + adcs x11,x11,x15 + adc x3,xzr,xzr + + subs x16,x8,x4 + sbcs x17,x9,x5 + sbcs x1,x10,x6 + sbcs x2,x11,x7 + sbcs xzr,x3,xzr + + csel x8,x8,x16,lo + csel x9,x9,x17,lo + csel x10,x10,x1,lo + csel x11,x11,x2,lo + + orr x16, x8, x9 + orr x17, x10, x11 + orr x16, x16, x17 + +#ifdef __AARCH64EB__ + rev x8,x8 + rev x9,x9 + rev x10,x10 + rev x11,x11 +#endif + + stp x8,x9,[x0] + stp x10,x11,[x0,#16] + + mov x17, #1 + cmp x16, #0 + csel x0, x17, xzr, ne + + ret + + +.globl _sub_n_check_mod_256 +.private_extern _sub_n_check_mod_256 + +.align 5 +_sub_n_check_mod_256: + ldp x8,x9,[x1] + ldp x12,x13,[x2] + ldp x10,x11,[x1,#16] + ldp x14,x15,[x2,#16] + +#ifdef __AARCH64EB__ + rev x8,x8 + rev x12,x12 + rev x9,x9 + rev x13,x13 + rev x10,x10 + rev x14,x14 + rev x11,x11 + rev x15,x15 +#endif + + subs x8,x8,x12 + sbcs x9,x9,x13 + ldp x4,x5,[x3] + sbcs x10,x10,x14 + ldp x6,x7,[x3,#16] + sbcs x11,x11,x15 + sbc x3,xzr,xzr + + and x4,x4,x3 + and x5,x5,x3 + adds x8,x8,x4 + and x6,x6,x3 + adcs x9,x9,x5 + and x7,x7,x3 + adcs x10,x10,x6 + adc x11,x11,x7 + + orr x16, x8, x9 + orr x17, x10, x11 + orr x16, x16, x17 + +#ifdef __AARCH64EB__ + rev x8,x8 + rev x9,x9 + rev x10,x10 + rev x11,x11 +#endif + + stp x8,x9,[x0] + stp x10,x11,[x0,#16] + + mov x17, #1 + cmp x16, #0 + csel x0, x17, xzr, ne + + ret + diff --git a/crypto/blst_src/build/mach-o/add_mod_256-x86_64.s b/crypto/blst_src/build/mach-o/add_mod_256-x86_64.s new file mode 100644 index 00000000000..19e5ba9834f --- /dev/null +++ b/crypto/blst_src/build/mach-o/add_mod_256-x86_64.s @@ -0,0 +1,564 @@ +.text + +.globl _add_mod_256 +.private_extern _add_mod_256 + +.p2align 5 +_add_mod_256: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + subq $8,%rsp +.cfi_adjust_cfa_offset 8 + + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + +L$oaded_a_add_mod_256: + addq 0(%rdx),%r8 + adcq 8(%rdx),%r9 + movq %r8,%rax + adcq 16(%rdx),%r10 + movq %r9,%rsi + adcq 24(%rdx),%r11 + sbbq %rdx,%rdx + + movq %r10,%rbx + subq 0(%rcx),%r8 + sbbq 8(%rcx),%r9 + sbbq 16(%rcx),%r10 + movq %r11,%rbp + sbbq 24(%rcx),%r11 + sbbq $0,%rdx + + cmovcq %rax,%r8 + cmovcq %rsi,%r9 + movq %r8,0(%rdi) + cmovcq %rbx,%r10 + movq %r9,8(%rdi) + cmovcq %rbp,%r11 + movq %r10,16(%rdi) + movq %r11,24(%rdi) + + movq 8(%rsp),%rbx +.cfi_restore %rbx + movq 16(%rsp),%rbp +.cfi_restore %rbp + leaq 24(%rsp),%rsp +.cfi_adjust_cfa_offset -24 + + .byte 0xf3,0xc3 +.cfi_endproc + + + +.globl _mul_by_3_mod_256 +.private_extern _mul_by_3_mod_256 + +.p2align 5 +_mul_by_3_mod_256: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + + + movq %rdx,%rcx + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq %rsi,%rdx + movq 24(%rsi),%r11 + + call __lshift_mod_256 + movq 0(%rsp),%r12 +.cfi_restore %r12 + jmp L$oaded_a_add_mod_256 + + movq 8(%rsp),%rbx +.cfi_restore %rbx + movq 16(%rsp),%rbp +.cfi_restore %rbp + leaq 24(%rsp),%rsp +.cfi_adjust_cfa_offset -24 + + .byte 0xf3,0xc3 +.cfi_endproc + + + +.p2align 5 +__lshift_mod_256: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + addq %r8,%r8 + adcq %r9,%r9 + movq %r8,%rax + adcq %r10,%r10 + movq %r9,%rsi + adcq %r11,%r11 + sbbq %r12,%r12 + + movq %r10,%rbx + subq 0(%rcx),%r8 + sbbq 8(%rcx),%r9 + sbbq 16(%rcx),%r10 + movq %r11,%rbp + sbbq 24(%rcx),%r11 + sbbq $0,%r12 + + cmovcq %rax,%r8 + cmovcq %rsi,%r9 + cmovcq %rbx,%r10 + cmovcq %rbp,%r11 + + .byte 0xf3,0xc3 +.cfi_endproc + + + +.globl _lshift_mod_256 +.private_extern _lshift_mod_256 + +.p2align 5 +_lshift_mod_256: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + +L$oop_lshift_mod_256: + call __lshift_mod_256 + decl %edx + jnz L$oop_lshift_mod_256 + + movq %r8,0(%rdi) + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + + movq 0(%rsp),%r12 +.cfi_restore %r12 + movq 8(%rsp),%rbx +.cfi_restore %rbx + movq 16(%rsp),%rbp +.cfi_restore %rbp + leaq 24(%rsp),%rsp +.cfi_adjust_cfa_offset -24 + + .byte 0xf3,0xc3 +.cfi_endproc + + + +.globl _rshift_mod_256 +.private_extern _rshift_mod_256 + +.p2align 5 +_rshift_mod_256: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + subq $8,%rsp +.cfi_adjust_cfa_offset 8 + + + movq 0(%rsi),%rbp + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + +L$oop_rshift_mod_256: + movq %rbp,%r8 + andq $1,%rbp + movq 0(%rcx),%rax + negq %rbp + movq 8(%rcx),%rsi + movq 16(%rcx),%rbx + + andq %rbp,%rax + andq %rbp,%rsi + andq %rbp,%rbx + andq 24(%rcx),%rbp + + addq %rax,%r8 + adcq %rsi,%r9 + adcq %rbx,%r10 + adcq %rbp,%r11 + sbbq %rax,%rax + + shrq $1,%r8 + movq %r9,%rbp + shrq $1,%r9 + movq %r10,%rbx + shrq $1,%r10 + movq %r11,%rsi + shrq $1,%r11 + + shlq $63,%rbp + shlq $63,%rbx + orq %r8,%rbp + shlq $63,%rsi + orq %rbx,%r9 + shlq $63,%rax + orq %rsi,%r10 + orq %rax,%r11 + + decl %edx + jnz L$oop_rshift_mod_256 + + movq %rbp,0(%rdi) + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + + movq 8(%rsp),%rbx +.cfi_restore %rbx + movq 16(%rsp),%rbp +.cfi_restore %rbp + leaq 24(%rsp),%rsp +.cfi_adjust_cfa_offset -24 + + .byte 0xf3,0xc3 +.cfi_endproc + + + +.globl _cneg_mod_256 +.private_extern _cneg_mod_256 + +.p2align 5 +_cneg_mod_256: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + + + movq 0(%rsi),%r12 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq %r12,%r8 + movq 24(%rsi),%r11 + orq %r9,%r12 + orq %r10,%r12 + orq %r11,%r12 + movq $-1,%rbp + + movq 0(%rcx),%rax + cmovnzq %rbp,%r12 + movq 8(%rcx),%rsi + movq 16(%rcx),%rbx + andq %r12,%rax + movq 24(%rcx),%rbp + andq %r12,%rsi + andq %r12,%rbx + andq %r12,%rbp + + subq %r8,%rax + sbbq %r9,%rsi + sbbq %r10,%rbx + sbbq %r11,%rbp + + orq %rdx,%rdx + + cmovzq %r8,%rax + cmovzq %r9,%rsi + movq %rax,0(%rdi) + cmovzq %r10,%rbx + movq %rsi,8(%rdi) + cmovzq %r11,%rbp + movq %rbx,16(%rdi) + movq %rbp,24(%rdi) + + movq 0(%rsp),%r12 +.cfi_restore %r12 + movq 8(%rsp),%rbx +.cfi_restore %rbx + movq 16(%rsp),%rbp +.cfi_restore %rbp + leaq 24(%rsp),%rsp +.cfi_adjust_cfa_offset -24 + + .byte 0xf3,0xc3 +.cfi_endproc + + + +.globl _sub_mod_256 +.private_extern _sub_mod_256 + +.p2align 5 +_sub_mod_256: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + subq $8,%rsp +.cfi_adjust_cfa_offset 8 + + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + + subq 0(%rdx),%r8 + movq 0(%rcx),%rax + sbbq 8(%rdx),%r9 + movq 8(%rcx),%rsi + sbbq 16(%rdx),%r10 + movq 16(%rcx),%rbx + sbbq 24(%rdx),%r11 + movq 24(%rcx),%rbp + sbbq %rdx,%rdx + + andq %rdx,%rax + andq %rdx,%rsi + andq %rdx,%rbx + andq %rdx,%rbp + + addq %rax,%r8 + adcq %rsi,%r9 + movq %r8,0(%rdi) + adcq %rbx,%r10 + movq %r9,8(%rdi) + adcq %rbp,%r11 + movq %r10,16(%rdi) + movq %r11,24(%rdi) + + movq 8(%rsp),%rbx +.cfi_restore %rbx + movq 16(%rsp),%rbp +.cfi_restore %rbp + leaq 24(%rsp),%rsp +.cfi_adjust_cfa_offset -24 + + .byte 0xf3,0xc3 +.cfi_endproc + + + +.globl _check_mod_256 +.private_extern _check_mod_256 + +.p2align 5 +_check_mod_256: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + + movq 0(%rdi),%rax + movq 8(%rdi),%r9 + movq 16(%rdi),%r10 + movq 24(%rdi),%r11 + + movq %rax,%r8 + orq %r9,%rax + orq %r10,%rax + orq %r11,%rax + + subq 0(%rsi),%r8 + sbbq 8(%rsi),%r9 + sbbq 16(%rsi),%r10 + sbbq 24(%rsi),%r11 + sbbq %rsi,%rsi + + movq $1,%rdx + cmpq $0,%rax + cmovneq %rdx,%rax + andq %rsi,%rax + + .byte 0xf3,0xc3 +.cfi_endproc + + + +.globl _add_n_check_mod_256 +.private_extern _add_n_check_mod_256 + +.p2align 5 +_add_n_check_mod_256: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + subq $8,%rsp +.cfi_adjust_cfa_offset 8 + + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + + addq 0(%rdx),%r8 + adcq 8(%rdx),%r9 + movq %r8,%rax + adcq 16(%rdx),%r10 + movq %r9,%rsi + adcq 24(%rdx),%r11 + sbbq %rdx,%rdx + + movq %r10,%rbx + subq 0(%rcx),%r8 + sbbq 8(%rcx),%r9 + sbbq 16(%rcx),%r10 + movq %r11,%rbp + sbbq 24(%rcx),%r11 + sbbq $0,%rdx + + cmovcq %rax,%r8 + cmovcq %rsi,%r9 + movq %r8,0(%rdi) + cmovcq %rbx,%r10 + movq %r9,8(%rdi) + cmovcq %rbp,%r11 + movq %r10,16(%rdi) + movq %r11,24(%rdi) + + orq %r9,%r8 + orq %r11,%r10 + orq %r10,%r8 + movq $1,%rax + cmovzq %r8,%rax + + movq 8(%rsp),%rbx +.cfi_restore %rbx + movq 16(%rsp),%rbp +.cfi_restore %rbp + leaq 24(%rsp),%rsp +.cfi_adjust_cfa_offset -24 + + .byte 0xf3,0xc3 +.cfi_endproc + + + +.globl _sub_n_check_mod_256 +.private_extern _sub_n_check_mod_256 + +.p2align 5 +_sub_n_check_mod_256: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + subq $8,%rsp +.cfi_adjust_cfa_offset 8 + + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + + subq 0(%rdx),%r8 + movq 0(%rcx),%rax + sbbq 8(%rdx),%r9 + movq 8(%rcx),%rsi + sbbq 16(%rdx),%r10 + movq 16(%rcx),%rbx + sbbq 24(%rdx),%r11 + movq 24(%rcx),%rbp + sbbq %rdx,%rdx + + andq %rdx,%rax + andq %rdx,%rsi + andq %rdx,%rbx + andq %rdx,%rbp + + addq %rax,%r8 + adcq %rsi,%r9 + movq %r8,0(%rdi) + adcq %rbx,%r10 + movq %r9,8(%rdi) + adcq %rbp,%r11 + movq %r10,16(%rdi) + movq %r11,24(%rdi) + + orq %r9,%r8 + orq %r11,%r10 + orq %r10,%r8 + movq $1,%rax + cmovzq %r8,%rax + + movq 8(%rsp),%rbx +.cfi_restore %rbx + movq 16(%rsp),%rbp +.cfi_restore %rbp + leaq 24(%rsp),%rsp +.cfi_adjust_cfa_offset -24 + + .byte 0xf3,0xc3 +.cfi_endproc + diff --git a/crypto/blst_src/build/mach-o/add_mod_384-armv8.S b/crypto/blst_src/build/mach-o/add_mod_384-armv8.S new file mode 100644 index 00000000000..a62995f2bed --- /dev/null +++ b/crypto/blst_src/build/mach-o/add_mod_384-armv8.S @@ -0,0 +1,1000 @@ +.text + +.globl _add_mod_384 +.private_extern _add_mod_384 + +.align 5 +_add_mod_384: +.long 3573752639 + stp x29,x30,[sp,#-48]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + + ldp x4,x5,[x3] + ldp x6,x7,[x3,#16] + ldp x8,x9,[x3,#32] + + bl __add_mod_384 + ldr x30,[sp,#8] + + stp x10,x11,[x0] + stp x12,x13,[x0,#16] + stp x14,x15,[x0,#32] + + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldr x29,[sp],#48 +.long 3573752767 + ret + + + +.align 5 +__add_mod_384: + ldp x10,x11,[x1] + ldp x16,x17,[x2] + ldp x12,x13,[x1,#16] + ldp x19,x20,[x2,#16] + ldp x14,x15,[x1,#32] + ldp x21,x22,[x2,#32] + +__add_mod_384_ab_are_loaded: + adds x10,x10,x16 + adcs x11,x11,x17 + adcs x12,x12,x19 + adcs x13,x13,x20 + adcs x14,x14,x21 + adcs x15,x15,x22 + adc x3,xzr,xzr + + subs x16,x10,x4 + sbcs x17,x11,x5 + sbcs x19,x12,x6 + sbcs x20,x13,x7 + sbcs x21,x14,x8 + sbcs x22,x15,x9 + sbcs xzr,x3,xzr + + csel x10,x10,x16,lo + csel x11,x11,x17,lo + csel x12,x12,x19,lo + csel x13,x13,x20,lo + csel x14,x14,x21,lo + csel x15,x15,x22,lo + + ret + + +.globl _add_mod_384x +.private_extern _add_mod_384x + +.align 5 +_add_mod_384x: +.long 3573752639 + stp x29,x30,[sp,#-48]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + + ldp x4,x5,[x3] + ldp x6,x7,[x3,#16] + ldp x8,x9,[x3,#32] + + bl __add_mod_384 + + stp x10,x11,[x0] + add x1,x1,#48 + stp x12,x13,[x0,#16] + add x2,x2,#48 + stp x14,x15,[x0,#32] + + bl __add_mod_384 + ldr x30,[sp,#8] + + stp x10,x11,[x0,#48] + stp x12,x13,[x0,#64] + stp x14,x15,[x0,#80] + + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldr x29,[sp],#48 +.long 3573752767 + ret + + +.globl _rshift_mod_384 +.private_extern _rshift_mod_384 + +.align 5 +_rshift_mod_384: +.long 3573752639 + stp x29,x30,[sp,#-48]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + + ldp x10,x11,[x1] + ldp x12,x13,[x1,#16] + ldp x14,x15,[x1,#32] + + ldp x4,x5,[x3] + ldp x6,x7,[x3,#16] + ldp x8,x9,[x3,#32] + +Loop_rshift_mod_384: + sub x2,x2,#1 + bl __rshift_mod_384 + cbnz x2,Loop_rshift_mod_384 + + ldr x30,[sp,#8] + stp x10,x11,[x0] + stp x12,x13,[x0,#16] + stp x14,x15,[x0,#32] + + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldr x29,[sp],#48 +.long 3573752767 + ret + + + +.align 5 +__rshift_mod_384: + sbfx x22,x10,#0,#1 + and x16,x22,x4 + and x17,x22,x5 + adds x10,x10,x16 + and x19,x22,x6 + adcs x11,x11,x17 + and x20,x22,x7 + adcs x12,x12,x19 + and x21,x22,x8 + adcs x13,x13,x20 + and x22,x22,x9 + adcs x14,x14,x21 + extr x10,x11,x10,#1 // a[0:5] >>= 1 + adcs x15,x15,x22 + extr x11,x12,x11,#1 + adc x22,xzr,xzr + extr x12,x13,x12,#1 + extr x13,x14,x13,#1 + extr x14,x15,x14,#1 + extr x15,x22,x15,#1 + ret + + +.globl _div_by_2_mod_384 +.private_extern _div_by_2_mod_384 + +.align 5 +_div_by_2_mod_384: +.long 3573752639 + stp x29,x30,[sp,#-48]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + + ldp x10,x11,[x1] + ldp x12,x13,[x1,#16] + ldp x14,x15,[x1,#32] + + ldp x4,x5,[x2] + ldp x6,x7,[x2,#16] + ldp x8,x9,[x2,#32] + + bl __rshift_mod_384 + + ldr x30,[sp,#8] + stp x10,x11,[x0] + stp x12,x13,[x0,#16] + stp x14,x15,[x0,#32] + + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldr x29,[sp],#48 +.long 3573752767 + ret + + +.globl _lshift_mod_384 +.private_extern _lshift_mod_384 + +.align 5 +_lshift_mod_384: +.long 3573752639 + stp x29,x30,[sp,#-48]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + + ldp x10,x11,[x1] + ldp x12,x13,[x1,#16] + ldp x14,x15,[x1,#32] + + ldp x4,x5,[x3] + ldp x6,x7,[x3,#16] + ldp x8,x9,[x3,#32] + +Loop_lshift_mod_384: + sub x2,x2,#1 + bl __lshift_mod_384 + cbnz x2,Loop_lshift_mod_384 + + ldr x30,[sp,#8] + stp x10,x11,[x0] + stp x12,x13,[x0,#16] + stp x14,x15,[x0,#32] + + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldr x29,[sp],#48 +.long 3573752767 + ret + + + +.align 5 +__lshift_mod_384: + adds x10,x10,x10 + adcs x11,x11,x11 + adcs x12,x12,x12 + adcs x13,x13,x13 + adcs x14,x14,x14 + adcs x15,x15,x15 + adc x3,xzr,xzr + + subs x16,x10,x4 + sbcs x17,x11,x5 + sbcs x19,x12,x6 + sbcs x20,x13,x7 + sbcs x21,x14,x8 + sbcs x22,x15,x9 + sbcs xzr,x3,xzr + + csel x10,x10,x16,lo + csel x11,x11,x17,lo + csel x12,x12,x19,lo + csel x13,x13,x20,lo + csel x14,x14,x21,lo + csel x15,x15,x22,lo + + ret + + +.globl _mul_by_3_mod_384 +.private_extern _mul_by_3_mod_384 + +.align 5 +_mul_by_3_mod_384: +.long 3573752639 + stp x29,x30,[sp,#-48]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + + ldp x10,x11,[x1] + ldp x12,x13,[x1,#16] + ldp x14,x15,[x1,#32] + + ldp x4,x5,[x2] + ldp x6,x7,[x2,#16] + ldp x8,x9,[x2,#32] + + bl __lshift_mod_384 + + ldp x16,x17,[x1] + ldp x19,x20,[x1,#16] + ldp x21,x22,[x1,#32] + + bl __add_mod_384_ab_are_loaded + ldr x30,[sp,#8] + + stp x10,x11,[x0] + stp x12,x13,[x0,#16] + stp x14,x15,[x0,#32] + + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldr x29,[sp],#48 +.long 3573752767 + ret + + +.globl _mul_by_8_mod_384 +.private_extern _mul_by_8_mod_384 + +.align 5 +_mul_by_8_mod_384: +.long 3573752639 + stp x29,x30,[sp,#-48]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + + ldp x10,x11,[x1] + ldp x12,x13,[x1,#16] + ldp x14,x15,[x1,#32] + + ldp x4,x5,[x2] + ldp x6,x7,[x2,#16] + ldp x8,x9,[x2,#32] + + bl __lshift_mod_384 + bl __lshift_mod_384 + bl __lshift_mod_384 + ldr x30,[sp,#8] + + stp x10,x11,[x0] + stp x12,x13,[x0,#16] + stp x14,x15,[x0,#32] + + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldr x29,[sp],#48 +.long 3573752767 + ret + + +.globl _mul_by_3_mod_384x +.private_extern _mul_by_3_mod_384x + +.align 5 +_mul_by_3_mod_384x: +.long 3573752639 + stp x29,x30,[sp,#-48]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + + ldp x10,x11,[x1] + ldp x12,x13,[x1,#16] + ldp x14,x15,[x1,#32] + + ldp x4,x5,[x2] + ldp x6,x7,[x2,#16] + ldp x8,x9,[x2,#32] + + bl __lshift_mod_384 + + ldp x16,x17,[x1] + ldp x19,x20,[x1,#16] + ldp x21,x22,[x1,#32] + + bl __add_mod_384_ab_are_loaded + + stp x10,x11,[x0] + ldp x10,x11,[x1,#48] + stp x12,x13,[x0,#16] + ldp x12,x13,[x1,#64] + stp x14,x15,[x0,#32] + ldp x14,x15,[x1,#80] + + bl __lshift_mod_384 + + ldp x16,x17,[x1,#48] + ldp x19,x20,[x1,#64] + ldp x21,x22,[x1,#80] + + bl __add_mod_384_ab_are_loaded + ldr x30,[sp,#8] + + stp x10,x11,[x0,#48] + stp x12,x13,[x0,#64] + stp x14,x15,[x0,#80] + + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldr x29,[sp],#48 +.long 3573752767 + ret + + +.globl _mul_by_8_mod_384x +.private_extern _mul_by_8_mod_384x + +.align 5 +_mul_by_8_mod_384x: +.long 3573752639 + stp x29,x30,[sp,#-48]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + + ldp x10,x11,[x1] + ldp x12,x13,[x1,#16] + ldp x14,x15,[x1,#32] + + ldp x4,x5,[x2] + ldp x6,x7,[x2,#16] + ldp x8,x9,[x2,#32] + + bl __lshift_mod_384 + bl __lshift_mod_384 + bl __lshift_mod_384 + + stp x10,x11,[x0] + ldp x10,x11,[x1,#48] + stp x12,x13,[x0,#16] + ldp x12,x13,[x1,#64] + stp x14,x15,[x0,#32] + ldp x14,x15,[x1,#80] + + bl __lshift_mod_384 + bl __lshift_mod_384 + bl __lshift_mod_384 + ldr x30,[sp,#8] + + stp x10,x11,[x0,#48] + stp x12,x13,[x0,#64] + stp x14,x15,[x0,#80] + + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldr x29,[sp],#48 +.long 3573752767 + ret + + +.globl _cneg_mod_384 +.private_extern _cneg_mod_384 + +.align 5 +_cneg_mod_384: +.long 3573752639 + stp x29,x30,[sp,#-48]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + + ldp x10,x11,[x1] + ldp x4,x5,[x3] + ldp x12,x13,[x1,#16] + ldp x6,x7,[x3,#16] + + subs x16,x4,x10 + ldp x14,x15,[x1,#32] + ldp x8,x9,[x3,#32] + orr x3,x10,x11 + sbcs x17,x5,x11 + orr x3,x3,x12 + sbcs x19,x6,x12 + orr x3,x3,x13 + sbcs x20,x7,x13 + orr x3,x3,x14 + sbcs x21,x8,x14 + orr x3,x3,x15 + sbc x22,x9,x15 + + cmp x3,#0 + csetm x3,ne + ands x2,x2,x3 + + csel x10,x10,x16,eq + csel x11,x11,x17,eq + csel x12,x12,x19,eq + csel x13,x13,x20,eq + stp x10,x11,[x0] + csel x14,x14,x21,eq + stp x12,x13,[x0,#16] + csel x15,x15,x22,eq + stp x14,x15,[x0,#32] + + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldr x29,[sp],#48 +.long 3573752767 + ret + + +.globl _sub_mod_384 +.private_extern _sub_mod_384 + +.align 5 +_sub_mod_384: +.long 3573752639 + stp x29,x30,[sp,#-48]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + + ldp x4,x5,[x3] + ldp x6,x7,[x3,#16] + ldp x8,x9,[x3,#32] + + bl __sub_mod_384 + ldr x30,[sp,#8] + + stp x10,x11,[x0] + stp x12,x13,[x0,#16] + stp x14,x15,[x0,#32] + + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldr x29,[sp],#48 +.long 3573752767 + ret + + + +.align 5 +__sub_mod_384: + ldp x10,x11,[x1] + ldp x16,x17,[x2] + ldp x12,x13,[x1,#16] + ldp x19,x20,[x2,#16] + ldp x14,x15,[x1,#32] + ldp x21,x22,[x2,#32] + + subs x10,x10,x16 + sbcs x11,x11,x17 + sbcs x12,x12,x19 + sbcs x13,x13,x20 + sbcs x14,x14,x21 + sbcs x15,x15,x22 + sbc x3,xzr,xzr + + and x16,x4,x3 + and x17,x5,x3 + adds x10,x10,x16 + and x19,x6,x3 + adcs x11,x11,x17 + and x20,x7,x3 + adcs x12,x12,x19 + and x21,x8,x3 + adcs x13,x13,x20 + and x22,x9,x3 + adcs x14,x14,x21 + adc x15,x15,x22 + + ret + + +.globl _sub_mod_384x +.private_extern _sub_mod_384x + +.align 5 +_sub_mod_384x: +.long 3573752639 + stp x29,x30,[sp,#-48]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + + ldp x4,x5,[x3] + ldp x6,x7,[x3,#16] + ldp x8,x9,[x3,#32] + + bl __sub_mod_384 + + stp x10,x11,[x0] + add x1,x1,#48 + stp x12,x13,[x0,#16] + add x2,x2,#48 + stp x14,x15,[x0,#32] + + bl __sub_mod_384 + ldr x30,[sp,#8] + + stp x10,x11,[x0,#48] + stp x12,x13,[x0,#64] + stp x14,x15,[x0,#80] + + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldr x29,[sp],#48 +.long 3573752767 + ret + + +.globl _mul_by_1_plus_i_mod_384x +.private_extern _mul_by_1_plus_i_mod_384x + +.align 5 +_mul_by_1_plus_i_mod_384x: +.long 3573752639 + stp x29,x30,[sp,#-48]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + + ldp x4,x5,[x2] + ldp x6,x7,[x2,#16] + ldp x8,x9,[x2,#32] + add x2,x1,#48 + + bl __sub_mod_384 // a->re - a->im + + ldp x16,x17,[x1] + ldp x19,x20,[x1,#16] + ldp x21,x22,[x1,#32] + stp x10,x11,[x0] + ldp x10,x11,[x1,#48] + stp x12,x13,[x0,#16] + ldp x12,x13,[x1,#64] + stp x14,x15,[x0,#32] + ldp x14,x15,[x1,#80] + + bl __add_mod_384_ab_are_loaded // a->re + a->im + ldr x30,[sp,#8] + + stp x10,x11,[x0,#48] + stp x12,x13,[x0,#64] + stp x14,x15,[x0,#80] + + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldr x29,[sp],#48 +.long 3573752767 + ret + + +.globl _sgn0_pty_mod_384 +.private_extern _sgn0_pty_mod_384 + +.align 5 +_sgn0_pty_mod_384: + ldp x10,x11,[x0] + ldp x12,x13,[x0,#16] + ldp x14,x15,[x0,#32] + + ldp x4,x5,[x1] + ldp x6,x7,[x1,#16] + ldp x8,x9,[x1,#32] + + and x0,x10,#1 + adds x10,x10,x10 + adcs x11,x11,x11 + adcs x12,x12,x12 + adcs x13,x13,x13 + adcs x14,x14,x14 + adcs x15,x15,x15 + adc x3,xzr,xzr + + subs x10,x10,x4 + sbcs x11,x11,x5 + sbcs x12,x12,x6 + sbcs x13,x13,x7 + sbcs x14,x14,x8 + sbcs x15,x15,x9 + sbc x3,x3,xzr + + mvn x3,x3 + and x3,x3,#2 + orr x0,x0,x3 + + ret + + +.globl _sgn0_pty_mod_384x +.private_extern _sgn0_pty_mod_384x + +.align 5 +_sgn0_pty_mod_384x: + ldp x10,x11,[x0] + ldp x12,x13,[x0,#16] + ldp x14,x15,[x0,#32] + + ldp x4,x5,[x1] + ldp x6,x7,[x1,#16] + ldp x8,x9,[x1,#32] + + and x2,x10,#1 + orr x3,x10,x11 + adds x10,x10,x10 + orr x3,x3,x12 + adcs x11,x11,x11 + orr x3,x3,x13 + adcs x12,x12,x12 + orr x3,x3,x14 + adcs x13,x13,x13 + orr x3,x3,x15 + adcs x14,x14,x14 + adcs x15,x15,x15 + adc x16,xzr,xzr + + subs x10,x10,x4 + sbcs x11,x11,x5 + sbcs x12,x12,x6 + sbcs x13,x13,x7 + sbcs x14,x14,x8 + sbcs x15,x15,x9 + sbc x16,x16,xzr + + ldp x10,x11,[x0,#48] + ldp x12,x13,[x0,#64] + ldp x14,x15,[x0,#80] + + mvn x16,x16 + and x16,x16,#2 + orr x2,x2,x16 + + and x0,x10,#1 + orr x1,x10,x11 + adds x10,x10,x10 + orr x1,x1,x12 + adcs x11,x11,x11 + orr x1,x1,x13 + adcs x12,x12,x12 + orr x1,x1,x14 + adcs x13,x13,x13 + orr x1,x1,x15 + adcs x14,x14,x14 + adcs x15,x15,x15 + adc x16,xzr,xzr + + subs x10,x10,x4 + sbcs x11,x11,x5 + sbcs x12,x12,x6 + sbcs x13,x13,x7 + sbcs x14,x14,x8 + sbcs x15,x15,x9 + sbc x16,x16,xzr + + mvn x16,x16 + and x16,x16,#2 + orr x0,x0,x16 + + cmp x3,#0 + csel x3,x0,x2,eq // a->re==0? prty(a->im) : prty(a->re) + + cmp x1,#0 + csel x1,x0,x2,ne // a->im!=0? sgn0(a->im) : sgn0(a->re) + + and x3,x3,#1 + and x1,x1,#2 + orr x0,x1,x3 // pack sign and parity + + ret + +.globl _vec_select_32 +.private_extern _vec_select_32 + +.align 5 +_vec_select_32: + dup v6.2d, x3 + ld1 {v0.2d, v1.2d, v2.2d}, [x1],#48 + cmeq v6.2d, v6.2d, #0 + ld1 {v3.2d, v4.2d, v5.2d}, [x2],#48 + bit v0.16b, v3.16b, v6.16b + bit v1.16b, v4.16b, v6.16b + bit v2.16b, v5.16b, v6.16b + st1 {v0.2d, v1.2d, v2.2d}, [x0] + ret + +.globl _vec_select_48 +.private_extern _vec_select_48 + +.align 5 +_vec_select_48: + dup v6.2d, x3 + ld1 {v0.2d, v1.2d, v2.2d}, [x1],#48 + cmeq v6.2d, v6.2d, #0 + ld1 {v3.2d, v4.2d, v5.2d}, [x2],#48 + bit v0.16b, v3.16b, v6.16b + bit v1.16b, v4.16b, v6.16b + bit v2.16b, v5.16b, v6.16b + st1 {v0.2d, v1.2d, v2.2d}, [x0] + ret + +.globl _vec_select_96 +.private_extern _vec_select_96 + +.align 5 +_vec_select_96: + dup v6.2d, x3 + ld1 {v0.2d, v1.2d, v2.2d}, [x1],#48 + cmeq v6.2d, v6.2d, #0 + ld1 {v3.2d, v4.2d, v5.2d}, [x2],#48 + bit v0.16b, v3.16b, v6.16b + ld1 {v16.2d, v17.2d, v18.2d}, [x1],#48 + bit v1.16b, v4.16b, v6.16b + ld1 {v19.2d, v20.2d, v21.2d}, [x2],#48 + bit v2.16b, v5.16b, v6.16b + st1 {v0.2d, v1.2d, v2.2d}, [x0],#48 + bit v16.16b, v19.16b, v6.16b + bit v17.16b, v20.16b, v6.16b + bit v18.16b, v21.16b, v6.16b + st1 {v16.2d, v17.2d, v18.2d}, [x0] + ret + +.globl _vec_select_192 +.private_extern _vec_select_192 + +.align 5 +_vec_select_192: + dup v6.2d, x3 + ld1 {v0.2d, v1.2d, v2.2d}, [x1],#48 + cmeq v6.2d, v6.2d, #0 + ld1 {v3.2d, v4.2d, v5.2d}, [x2],#48 + bit v0.16b, v3.16b, v6.16b + ld1 {v16.2d, v17.2d, v18.2d}, [x1],#48 + bit v1.16b, v4.16b, v6.16b + ld1 {v19.2d, v20.2d, v21.2d}, [x2],#48 + bit v2.16b, v5.16b, v6.16b + st1 {v0.2d, v1.2d, v2.2d}, [x0],#48 + bit v16.16b, v19.16b, v6.16b + ld1 {v0.2d, v1.2d, v2.2d}, [x1],#48 + bit v17.16b, v20.16b, v6.16b + ld1 {v3.2d, v4.2d, v5.2d}, [x2],#48 + bit v18.16b, v21.16b, v6.16b + st1 {v16.2d, v17.2d, v18.2d}, [x0],#48 + bit v0.16b, v3.16b, v6.16b + ld1 {v16.2d, v17.2d, v18.2d}, [x1],#48 + bit v1.16b, v4.16b, v6.16b + ld1 {v19.2d, v20.2d, v21.2d}, [x2],#48 + bit v2.16b, v5.16b, v6.16b + st1 {v0.2d, v1.2d, v2.2d}, [x0],#48 + bit v16.16b, v19.16b, v6.16b + bit v17.16b, v20.16b, v6.16b + bit v18.16b, v21.16b, v6.16b + st1 {v16.2d, v17.2d, v18.2d}, [x0] + ret + +.globl _vec_select_144 +.private_extern _vec_select_144 + +.align 5 +_vec_select_144: + dup v6.2d, x3 + ld1 {v0.2d, v1.2d, v2.2d}, [x1],#48 + cmeq v6.2d, v6.2d, #0 + ld1 {v3.2d, v4.2d, v5.2d}, [x2],#48 + bit v0.16b, v3.16b, v6.16b + ld1 {v16.2d, v17.2d, v18.2d}, [x1],#48 + bit v1.16b, v4.16b, v6.16b + ld1 {v19.2d, v20.2d, v21.2d}, [x2],#48 + bit v2.16b, v5.16b, v6.16b + st1 {v0.2d, v1.2d, v2.2d}, [x0],#48 + bit v16.16b, v19.16b, v6.16b + ld1 {v0.2d, v1.2d, v2.2d}, [x1],#48 + bit v17.16b, v20.16b, v6.16b + ld1 {v3.2d, v4.2d, v5.2d}, [x2],#48 + bit v18.16b, v21.16b, v6.16b + st1 {v16.2d, v17.2d, v18.2d}, [x0],#48 + bit v0.16b, v3.16b, v6.16b + bit v1.16b, v4.16b, v6.16b + bit v2.16b, v5.16b, v6.16b + st1 {v0.2d, v1.2d, v2.2d}, [x0] + ret + +.globl _vec_select_288 +.private_extern _vec_select_288 + +.align 5 +_vec_select_288: + dup v6.2d, x3 + ld1 {v0.2d, v1.2d, v2.2d}, [x1],#48 + cmeq v6.2d, v6.2d, #0 + ld1 {v3.2d, v4.2d, v5.2d}, [x2],#48 + bit v0.16b, v3.16b, v6.16b + ld1 {v16.2d, v17.2d, v18.2d}, [x1],#48 + bit v1.16b, v4.16b, v6.16b + ld1 {v19.2d, v20.2d, v21.2d}, [x2],#48 + bit v2.16b, v5.16b, v6.16b + st1 {v0.2d, v1.2d, v2.2d}, [x0],#48 + bit v16.16b, v19.16b, v6.16b + ld1 {v0.2d, v1.2d, v2.2d}, [x1],#48 + bit v17.16b, v20.16b, v6.16b + ld1 {v3.2d, v4.2d, v5.2d}, [x2],#48 + bit v18.16b, v21.16b, v6.16b + st1 {v16.2d, v17.2d, v18.2d}, [x0],#48 + bit v0.16b, v3.16b, v6.16b + ld1 {v16.2d, v17.2d, v18.2d}, [x1],#48 + bit v1.16b, v4.16b, v6.16b + ld1 {v19.2d, v20.2d, v21.2d}, [x2],#48 + bit v2.16b, v5.16b, v6.16b + st1 {v0.2d, v1.2d, v2.2d}, [x0],#48 + bit v16.16b, v19.16b, v6.16b + ld1 {v0.2d, v1.2d, v2.2d}, [x1],#48 + bit v17.16b, v20.16b, v6.16b + ld1 {v3.2d, v4.2d, v5.2d}, [x2],#48 + bit v18.16b, v21.16b, v6.16b + st1 {v16.2d, v17.2d, v18.2d}, [x0],#48 + bit v0.16b, v3.16b, v6.16b + ld1 {v16.2d, v17.2d, v18.2d}, [x1],#48 + bit v1.16b, v4.16b, v6.16b + ld1 {v19.2d, v20.2d, v21.2d}, [x2],#48 + bit v2.16b, v5.16b, v6.16b + st1 {v0.2d, v1.2d, v2.2d}, [x0],#48 + bit v16.16b, v19.16b, v6.16b + bit v17.16b, v20.16b, v6.16b + bit v18.16b, v21.16b, v6.16b + st1 {v16.2d, v17.2d, v18.2d}, [x0] + ret + +.globl _vec_prefetch +.private_extern _vec_prefetch + +.align 5 +_vec_prefetch: + add x1, x1, x0 + sub x1, x1, #1 + mov x2, #64 + prfm pldl1keep, [x0] + add x0, x0, x2 + cmp x0, x1 + csel x0, x1, x0, hi + csel x2, xzr, x2, hi + prfm pldl1keep, [x0] + add x0, x0, x2 + cmp x0, x1 + csel x0, x1, x0, hi + csel x2, xzr, x2, hi + prfm pldl1keep, [x0] + add x0, x0, x2 + cmp x0, x1 + csel x0, x1, x0, hi + csel x2, xzr, x2, hi + prfm pldl1keep, [x0] + add x0, x0, x2 + cmp x0, x1 + csel x0, x1, x0, hi + csel x2, xzr, x2, hi + prfm pldl1keep, [x0] + add x0, x0, x2 + cmp x0, x1 + csel x0, x1, x0, hi + csel x2, xzr, x2, hi + prfm pldl1keep, [x0] + add x0, x0, x2 + cmp x0, x1 + csel x0, x1, x0, hi + prfm pldl1keep, [x0] + ret + +.globl _vec_is_zero_16x +.private_extern _vec_is_zero_16x + +.align 5 +_vec_is_zero_16x: + ld1 {v0.2d}, [x0], #16 + lsr x1, x1, #4 + sub x1, x1, #1 + cbz x1, Loop_is_zero_done + +Loop_is_zero: + ld1 {v1.2d}, [x0], #16 + orr v0.16b, v0.16b, v1.16b + sub x1, x1, #1 + cbnz x1, Loop_is_zero + +Loop_is_zero_done: + dup v1.2d, v0.d[1] + orr v0.16b, v0.16b, v1.16b + mov x1, v0.d[0] + mov x0, #1 + cmp x1, #0 + csel x0, x0, xzr, eq + ret + +.globl _vec_is_equal_16x +.private_extern _vec_is_equal_16x + +.align 5 +_vec_is_equal_16x: + ld1 {v0.2d}, [x0], #16 + ld1 {v1.2d}, [x1], #16 + lsr x2, x2, #4 + eor v0.16b, v0.16b, v1.16b + +Loop_is_equal: + sub x2, x2, #1 + cbz x2, Loop_is_equal_done + ld1 {v1.2d}, [x0], #16 + ld1 {v2.2d}, [x1], #16 + eor v1.16b, v1.16b, v2.16b + orr v0.16b, v0.16b, v1.16b + b Loop_is_equal + nop + +Loop_is_equal_done: + dup v1.2d, v0.d[1] + orr v0.16b, v0.16b, v1.16b + mov x1, v0.d[0] + mov x0, #1 + cmp x1, #0 + csel x0, x0, xzr, eq + ret + diff --git a/crypto/blst_src/build/mach-o/add_mod_384-x86_64.s b/crypto/blst_src/build/mach-o/add_mod_384-x86_64.s new file mode 100644 index 00000000000..974978e3425 --- /dev/null +++ b/crypto/blst_src/build/mach-o/add_mod_384-x86_64.s @@ -0,0 +1,1899 @@ +.text + +.globl _add_mod_384 +.private_extern _add_mod_384 + +.p2align 5 +_add_mod_384: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + subq $8,%rsp +.cfi_adjust_cfa_offset 8 + + + call __add_mod_384 + + movq 8(%rsp),%r15 +.cfi_restore %r15 + movq 16(%rsp),%r14 +.cfi_restore %r14 + movq 24(%rsp),%r13 +.cfi_restore %r13 + movq 32(%rsp),%r12 +.cfi_restore %r12 + movq 40(%rsp),%rbx +.cfi_restore %rbx + movq 48(%rsp),%rbp +.cfi_restore %rbp + leaq 56(%rsp),%rsp +.cfi_adjust_cfa_offset -56 + + .byte 0xf3,0xc3 +.cfi_endproc + + + +.p2align 5 +__add_mod_384: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + +__add_mod_384_a_is_loaded: + addq 0(%rdx),%r8 + adcq 8(%rdx),%r9 + adcq 16(%rdx),%r10 + movq %r8,%r14 + adcq 24(%rdx),%r11 + movq %r9,%r15 + adcq 32(%rdx),%r12 + movq %r10,%rax + adcq 40(%rdx),%r13 + movq %r11,%rbx + sbbq %rdx,%rdx + + subq 0(%rcx),%r8 + sbbq 8(%rcx),%r9 + movq %r12,%rbp + sbbq 16(%rcx),%r10 + sbbq 24(%rcx),%r11 + sbbq 32(%rcx),%r12 + movq %r13,%rsi + sbbq 40(%rcx),%r13 + sbbq $0,%rdx + + cmovcq %r14,%r8 + cmovcq %r15,%r9 + cmovcq %rax,%r10 + movq %r8,0(%rdi) + cmovcq %rbx,%r11 + movq %r9,8(%rdi) + cmovcq %rbp,%r12 + movq %r10,16(%rdi) + cmovcq %rsi,%r13 + movq %r11,24(%rdi) + movq %r12,32(%rdi) + movq %r13,40(%rdi) + + .byte 0xf3,0xc3 +.cfi_endproc + + +.globl _add_mod_384x +.private_extern _add_mod_384x + +.p2align 5 +_add_mod_384x: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + subq $24,%rsp +.cfi_adjust_cfa_offset 24 + + + movq %rsi,0(%rsp) + movq %rdx,8(%rsp) + leaq 48(%rsi),%rsi + leaq 48(%rdx),%rdx + leaq 48(%rdi),%rdi + call __add_mod_384 + + movq 0(%rsp),%rsi + movq 8(%rsp),%rdx + leaq -48(%rdi),%rdi + call __add_mod_384 + + movq 24+0(%rsp),%r15 +.cfi_restore %r15 + movq 24+8(%rsp),%r14 +.cfi_restore %r14 + movq 24+16(%rsp),%r13 +.cfi_restore %r13 + movq 24+24(%rsp),%r12 +.cfi_restore %r12 + movq 24+32(%rsp),%rbx +.cfi_restore %rbx + movq 24+40(%rsp),%rbp +.cfi_restore %rbp + leaq 24+48(%rsp),%rsp +.cfi_adjust_cfa_offset -24-8*6 + + .byte 0xf3,0xc3 +.cfi_endproc + + + +.globl _rshift_mod_384 +.private_extern _rshift_mod_384 + +.p2align 5 +_rshift_mod_384: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + pushq %rdi +.cfi_adjust_cfa_offset 8 + + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + +L$oop_rshift_mod_384: + call __rshift_mod_384 + decl %edx + jnz L$oop_rshift_mod_384 + + movq %r8,0(%rdi) + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + movq %r12,32(%rdi) + movq %r13,40(%rdi) + + movq 8(%rsp),%r15 +.cfi_restore %r15 + movq 16(%rsp),%r14 +.cfi_restore %r14 + movq 24(%rsp),%r13 +.cfi_restore %r13 + movq 32(%rsp),%r12 +.cfi_restore %r12 + movq 40(%rsp),%rbx +.cfi_restore %rbx + movq 48(%rsp),%rbp +.cfi_restore %rbp + leaq 56(%rsp),%rsp +.cfi_adjust_cfa_offset -56 + + .byte 0xf3,0xc3 +.cfi_endproc + + + +.p2align 5 +__rshift_mod_384: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + movq $1,%rsi + movq 0(%rcx),%r14 + andq %r8,%rsi + movq 8(%rcx),%r15 + negq %rsi + movq 16(%rcx),%rax + andq %rsi,%r14 + movq 24(%rcx),%rbx + andq %rsi,%r15 + movq 32(%rcx),%rbp + andq %rsi,%rax + andq %rsi,%rbx + andq %rsi,%rbp + andq 40(%rcx),%rsi + + addq %r8,%r14 + adcq %r9,%r15 + adcq %r10,%rax + adcq %r11,%rbx + adcq %r12,%rbp + adcq %r13,%rsi + sbbq %r13,%r13 + + shrq $1,%r14 + movq %r15,%r8 + shrq $1,%r15 + movq %rax,%r9 + shrq $1,%rax + movq %rbx,%r10 + shrq $1,%rbx + movq %rbp,%r11 + shrq $1,%rbp + movq %rsi,%r12 + shrq $1,%rsi + shlq $63,%r8 + shlq $63,%r9 + orq %r14,%r8 + shlq $63,%r10 + orq %r15,%r9 + shlq $63,%r11 + orq %rax,%r10 + shlq $63,%r12 + orq %rbx,%r11 + shlq $63,%r13 + orq %rbp,%r12 + orq %rsi,%r13 + + .byte 0xf3,0xc3 +.cfi_endproc + + +.globl _div_by_2_mod_384 +.private_extern _div_by_2_mod_384 + +.p2align 5 +_div_by_2_mod_384: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + pushq %rdi +.cfi_adjust_cfa_offset 8 + + + movq 0(%rsi),%r8 + movq %rdx,%rcx + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + + call __rshift_mod_384 + + movq %r8,0(%rdi) + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + movq %r12,32(%rdi) + movq %r13,40(%rdi) + + movq 8(%rsp),%r15 +.cfi_restore %r15 + movq 16(%rsp),%r14 +.cfi_restore %r14 + movq 24(%rsp),%r13 +.cfi_restore %r13 + movq 32(%rsp),%r12 +.cfi_restore %r12 + movq 40(%rsp),%rbx +.cfi_restore %rbx + movq 48(%rsp),%rbp +.cfi_restore %rbp + leaq 56(%rsp),%rsp +.cfi_adjust_cfa_offset -56 + + .byte 0xf3,0xc3 +.cfi_endproc + + + +.globl _lshift_mod_384 +.private_extern _lshift_mod_384 + +.p2align 5 +_lshift_mod_384: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + pushq %rdi +.cfi_adjust_cfa_offset 8 + + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + +L$oop_lshift_mod_384: + addq %r8,%r8 + adcq %r9,%r9 + adcq %r10,%r10 + movq %r8,%r14 + adcq %r11,%r11 + movq %r9,%r15 + adcq %r12,%r12 + movq %r10,%rax + adcq %r13,%r13 + movq %r11,%rbx + sbbq %rdi,%rdi + + subq 0(%rcx),%r8 + sbbq 8(%rcx),%r9 + movq %r12,%rbp + sbbq 16(%rcx),%r10 + sbbq 24(%rcx),%r11 + sbbq 32(%rcx),%r12 + movq %r13,%rsi + sbbq 40(%rcx),%r13 + sbbq $0,%rdi + + movq (%rsp),%rdi + cmovcq %r14,%r8 + cmovcq %r15,%r9 + cmovcq %rax,%r10 + cmovcq %rbx,%r11 + cmovcq %rbp,%r12 + cmovcq %rsi,%r13 + + decl %edx + jnz L$oop_lshift_mod_384 + + movq %r8,0(%rdi) + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + movq %r12,32(%rdi) + movq %r13,40(%rdi) + + movq 8(%rsp),%r15 +.cfi_restore %r15 + movq 16(%rsp),%r14 +.cfi_restore %r14 + movq 24(%rsp),%r13 +.cfi_restore %r13 + movq 32(%rsp),%r12 +.cfi_restore %r12 + movq 40(%rsp),%rbx +.cfi_restore %rbx + movq 48(%rsp),%rbp +.cfi_restore %rbp + leaq 56(%rsp),%rsp +.cfi_adjust_cfa_offset -56 + + .byte 0xf3,0xc3 +.cfi_endproc + + + +.p2align 5 +__lshift_mod_384: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + addq %r8,%r8 + adcq %r9,%r9 + adcq %r10,%r10 + movq %r8,%r14 + adcq %r11,%r11 + movq %r9,%r15 + adcq %r12,%r12 + movq %r10,%rax + adcq %r13,%r13 + movq %r11,%rbx + sbbq %rdx,%rdx + + subq 0(%rcx),%r8 + sbbq 8(%rcx),%r9 + movq %r12,%rbp + sbbq 16(%rcx),%r10 + sbbq 24(%rcx),%r11 + sbbq 32(%rcx),%r12 + movq %r13,%rsi + sbbq 40(%rcx),%r13 + sbbq $0,%rdx + + cmovcq %r14,%r8 + cmovcq %r15,%r9 + cmovcq %rax,%r10 + cmovcq %rbx,%r11 + cmovcq %rbp,%r12 + cmovcq %rsi,%r13 + + .byte 0xf3,0xc3 +.cfi_endproc + + + +.globl _mul_by_3_mod_384 +.private_extern _mul_by_3_mod_384 + +.p2align 5 +_mul_by_3_mod_384: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + pushq %rsi +.cfi_adjust_cfa_offset 8 + + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + movq %rdx,%rcx + + call __lshift_mod_384 + + movq (%rsp),%rdx + call __add_mod_384_a_is_loaded + + movq 8(%rsp),%r15 +.cfi_restore %r15 + movq 16(%rsp),%r14 +.cfi_restore %r14 + movq 24(%rsp),%r13 +.cfi_restore %r13 + movq 32(%rsp),%r12 +.cfi_restore %r12 + movq 40(%rsp),%rbx +.cfi_restore %rbx + movq 48(%rsp),%rbp +.cfi_restore %rbp + leaq 56(%rsp),%rsp +.cfi_adjust_cfa_offset -56 + + .byte 0xf3,0xc3 +.cfi_endproc + + +.globl _mul_by_8_mod_384 +.private_extern _mul_by_8_mod_384 + +.p2align 5 +_mul_by_8_mod_384: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + subq $8,%rsp +.cfi_adjust_cfa_offset 8 + + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + movq %rdx,%rcx + + call __lshift_mod_384 + call __lshift_mod_384 + call __lshift_mod_384 + + movq %r8,0(%rdi) + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + movq %r12,32(%rdi) + movq %r13,40(%rdi) + + movq 8(%rsp),%r15 +.cfi_restore %r15 + movq 16(%rsp),%r14 +.cfi_restore %r14 + movq 24(%rsp),%r13 +.cfi_restore %r13 + movq 32(%rsp),%r12 +.cfi_restore %r12 + movq 40(%rsp),%rbx +.cfi_restore %rbx + movq 48(%rsp),%rbp +.cfi_restore %rbp + leaq 56(%rsp),%rsp +.cfi_adjust_cfa_offset -56 + + .byte 0xf3,0xc3 +.cfi_endproc + + + +.globl _mul_by_3_mod_384x +.private_extern _mul_by_3_mod_384x + +.p2align 5 +_mul_by_3_mod_384x: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + pushq %rsi +.cfi_adjust_cfa_offset 8 + + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + movq %rdx,%rcx + + call __lshift_mod_384 + + movq (%rsp),%rdx + call __add_mod_384_a_is_loaded + + movq (%rsp),%rsi + leaq 48(%rdi),%rdi + + movq 48(%rsi),%r8 + movq 56(%rsi),%r9 + movq 64(%rsi),%r10 + movq 72(%rsi),%r11 + movq 80(%rsi),%r12 + movq 88(%rsi),%r13 + + call __lshift_mod_384 + + movq $48,%rdx + addq (%rsp),%rdx + call __add_mod_384_a_is_loaded + + movq 8(%rsp),%r15 +.cfi_restore %r15 + movq 16(%rsp),%r14 +.cfi_restore %r14 + movq 24(%rsp),%r13 +.cfi_restore %r13 + movq 32(%rsp),%r12 +.cfi_restore %r12 + movq 40(%rsp),%rbx +.cfi_restore %rbx + movq 48(%rsp),%rbp +.cfi_restore %rbp + leaq 56(%rsp),%rsp +.cfi_adjust_cfa_offset -56 + + .byte 0xf3,0xc3 +.cfi_endproc + + +.globl _mul_by_8_mod_384x +.private_extern _mul_by_8_mod_384x + +.p2align 5 +_mul_by_8_mod_384x: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + pushq %rsi +.cfi_adjust_cfa_offset 8 + + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + movq %rdx,%rcx + + call __lshift_mod_384 + call __lshift_mod_384 + call __lshift_mod_384 + + movq (%rsp),%rsi + movq %r8,0(%rdi) + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + movq %r12,32(%rdi) + movq %r13,40(%rdi) + + movq 48+0(%rsi),%r8 + movq 48+8(%rsi),%r9 + movq 48+16(%rsi),%r10 + movq 48+24(%rsi),%r11 + movq 48+32(%rsi),%r12 + movq 48+40(%rsi),%r13 + + call __lshift_mod_384 + call __lshift_mod_384 + call __lshift_mod_384 + + movq %r8,48+0(%rdi) + movq %r9,48+8(%rdi) + movq %r10,48+16(%rdi) + movq %r11,48+24(%rdi) + movq %r12,48+32(%rdi) + movq %r13,48+40(%rdi) + + movq 8(%rsp),%r15 +.cfi_restore %r15 + movq 16(%rsp),%r14 +.cfi_restore %r14 + movq 24(%rsp),%r13 +.cfi_restore %r13 + movq 32(%rsp),%r12 +.cfi_restore %r12 + movq 40(%rsp),%rbx +.cfi_restore %rbx + movq 48(%rsp),%rbp +.cfi_restore %rbp + leaq 56(%rsp),%rsp +.cfi_adjust_cfa_offset -56 + + .byte 0xf3,0xc3 +.cfi_endproc + + + +.globl _cneg_mod_384 +.private_extern _cneg_mod_384 + +.p2align 5 +_cneg_mod_384: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + pushq %rdx +.cfi_adjust_cfa_offset 8 + + + movq 0(%rsi),%rdx + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq %rdx,%r8 + movq 24(%rsi),%r11 + orq %r9,%rdx + movq 32(%rsi),%r12 + orq %r10,%rdx + movq 40(%rsi),%r13 + orq %r11,%rdx + movq $-1,%rsi + orq %r12,%rdx + orq %r13,%rdx + + movq 0(%rcx),%r14 + cmovnzq %rsi,%rdx + movq 8(%rcx),%r15 + movq 16(%rcx),%rax + andq %rdx,%r14 + movq 24(%rcx),%rbx + andq %rdx,%r15 + movq 32(%rcx),%rbp + andq %rdx,%rax + movq 40(%rcx),%rsi + andq %rdx,%rbx + movq 0(%rsp),%rcx + andq %rdx,%rbp + andq %rdx,%rsi + + subq %r8,%r14 + sbbq %r9,%r15 + sbbq %r10,%rax + sbbq %r11,%rbx + sbbq %r12,%rbp + sbbq %r13,%rsi + + orq %rcx,%rcx + + cmovzq %r8,%r14 + cmovzq %r9,%r15 + cmovzq %r10,%rax + movq %r14,0(%rdi) + cmovzq %r11,%rbx + movq %r15,8(%rdi) + cmovzq %r12,%rbp + movq %rax,16(%rdi) + cmovzq %r13,%rsi + movq %rbx,24(%rdi) + movq %rbp,32(%rdi) + movq %rsi,40(%rdi) + + movq 8(%rsp),%r15 +.cfi_restore %r15 + movq 16(%rsp),%r14 +.cfi_restore %r14 + movq 24(%rsp),%r13 +.cfi_restore %r13 + movq 32(%rsp),%r12 +.cfi_restore %r12 + movq 40(%rsp),%rbx +.cfi_restore %rbx + movq 48(%rsp),%rbp +.cfi_restore %rbp + leaq 56(%rsp),%rsp +.cfi_adjust_cfa_offset -56 + + .byte 0xf3,0xc3 +.cfi_endproc + + + +.globl _sub_mod_384 +.private_extern _sub_mod_384 + +.p2align 5 +_sub_mod_384: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + subq $8,%rsp +.cfi_adjust_cfa_offset 8 + + + call __sub_mod_384 + + movq 8(%rsp),%r15 +.cfi_restore %r15 + movq 16(%rsp),%r14 +.cfi_restore %r14 + movq 24(%rsp),%r13 +.cfi_restore %r13 + movq 32(%rsp),%r12 +.cfi_restore %r12 + movq 40(%rsp),%rbx +.cfi_restore %rbx + movq 48(%rsp),%rbp +.cfi_restore %rbp + leaq 56(%rsp),%rsp +.cfi_adjust_cfa_offset -56 + + .byte 0xf3,0xc3 +.cfi_endproc + + + +.p2align 5 +__sub_mod_384: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + + subq 0(%rdx),%r8 + movq 0(%rcx),%r14 + sbbq 8(%rdx),%r9 + movq 8(%rcx),%r15 + sbbq 16(%rdx),%r10 + movq 16(%rcx),%rax + sbbq 24(%rdx),%r11 + movq 24(%rcx),%rbx + sbbq 32(%rdx),%r12 + movq 32(%rcx),%rbp + sbbq 40(%rdx),%r13 + movq 40(%rcx),%rsi + sbbq %rdx,%rdx + + andq %rdx,%r14 + andq %rdx,%r15 + andq %rdx,%rax + andq %rdx,%rbx + andq %rdx,%rbp + andq %rdx,%rsi + + addq %r14,%r8 + adcq %r15,%r9 + movq %r8,0(%rdi) + adcq %rax,%r10 + movq %r9,8(%rdi) + adcq %rbx,%r11 + movq %r10,16(%rdi) + adcq %rbp,%r12 + movq %r11,24(%rdi) + adcq %rsi,%r13 + movq %r12,32(%rdi) + movq %r13,40(%rdi) + + .byte 0xf3,0xc3 +.cfi_endproc + + +.globl _sub_mod_384x +.private_extern _sub_mod_384x + +.p2align 5 +_sub_mod_384x: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + subq $24,%rsp +.cfi_adjust_cfa_offset 24 + + + movq %rsi,0(%rsp) + movq %rdx,8(%rsp) + leaq 48(%rsi),%rsi + leaq 48(%rdx),%rdx + leaq 48(%rdi),%rdi + call __sub_mod_384 + + movq 0(%rsp),%rsi + movq 8(%rsp),%rdx + leaq -48(%rdi),%rdi + call __sub_mod_384 + + movq 24+0(%rsp),%r15 +.cfi_restore %r15 + movq 24+8(%rsp),%r14 +.cfi_restore %r14 + movq 24+16(%rsp),%r13 +.cfi_restore %r13 + movq 24+24(%rsp),%r12 +.cfi_restore %r12 + movq 24+32(%rsp),%rbx +.cfi_restore %rbx + movq 24+40(%rsp),%rbp +.cfi_restore %rbp + leaq 24+48(%rsp),%rsp +.cfi_adjust_cfa_offset -24-8*6 + + .byte 0xf3,0xc3 +.cfi_endproc + +.globl _mul_by_1_plus_i_mod_384x +.private_extern _mul_by_1_plus_i_mod_384x + +.p2align 5 +_mul_by_1_plus_i_mod_384x: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + subq $56,%rsp +.cfi_adjust_cfa_offset 56 + + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + + movq %r8,%r14 + addq 48(%rsi),%r8 + movq %r9,%r15 + adcq 56(%rsi),%r9 + movq %r10,%rax + adcq 64(%rsi),%r10 + movq %r11,%rbx + adcq 72(%rsi),%r11 + movq %r12,%rcx + adcq 80(%rsi),%r12 + movq %r13,%rbp + adcq 88(%rsi),%r13 + movq %rdi,48(%rsp) + sbbq %rdi,%rdi + + subq 48(%rsi),%r14 + sbbq 56(%rsi),%r15 + sbbq 64(%rsi),%rax + sbbq 72(%rsi),%rbx + sbbq 80(%rsi),%rcx + sbbq 88(%rsi),%rbp + sbbq %rsi,%rsi + + movq %r8,0(%rsp) + movq 0(%rdx),%r8 + movq %r9,8(%rsp) + movq 8(%rdx),%r9 + movq %r10,16(%rsp) + movq 16(%rdx),%r10 + movq %r11,24(%rsp) + movq 24(%rdx),%r11 + movq %r12,32(%rsp) + andq %rsi,%r8 + movq 32(%rdx),%r12 + movq %r13,40(%rsp) + andq %rsi,%r9 + movq 40(%rdx),%r13 + andq %rsi,%r10 + andq %rsi,%r11 + andq %rsi,%r12 + andq %rsi,%r13 + movq 48(%rsp),%rsi + + addq %r8,%r14 + movq 0(%rsp),%r8 + adcq %r9,%r15 + movq 8(%rsp),%r9 + adcq %r10,%rax + movq 16(%rsp),%r10 + adcq %r11,%rbx + movq 24(%rsp),%r11 + adcq %r12,%rcx + movq 32(%rsp),%r12 + adcq %r13,%rbp + movq 40(%rsp),%r13 + + movq %r14,0(%rsi) + movq %r8,%r14 + movq %r15,8(%rsi) + movq %rax,16(%rsi) + movq %r9,%r15 + movq %rbx,24(%rsi) + movq %rcx,32(%rsi) + movq %r10,%rax + movq %rbp,40(%rsi) + + subq 0(%rdx),%r8 + movq %r11,%rbx + sbbq 8(%rdx),%r9 + sbbq 16(%rdx),%r10 + movq %r12,%rcx + sbbq 24(%rdx),%r11 + sbbq 32(%rdx),%r12 + movq %r13,%rbp + sbbq 40(%rdx),%r13 + sbbq $0,%rdi + + cmovcq %r14,%r8 + cmovcq %r15,%r9 + cmovcq %rax,%r10 + movq %r8,48(%rsi) + cmovcq %rbx,%r11 + movq %r9,56(%rsi) + cmovcq %rcx,%r12 + movq %r10,64(%rsi) + cmovcq %rbp,%r13 + movq %r11,72(%rsi) + movq %r12,80(%rsi) + movq %r13,88(%rsi) + + movq 56+0(%rsp),%r15 +.cfi_restore %r15 + movq 56+8(%rsp),%r14 +.cfi_restore %r14 + movq 56+16(%rsp),%r13 +.cfi_restore %r13 + movq 56+24(%rsp),%r12 +.cfi_restore %r12 + movq 56+32(%rsp),%rbx +.cfi_restore %rbx + movq 56+40(%rsp),%rbp +.cfi_restore %rbp + leaq 56+48(%rsp),%rsp +.cfi_adjust_cfa_offset -56-8*6 + + .byte 0xf3,0xc3 +.cfi_endproc + +.globl _sgn0_pty_mod_384 +.private_extern _sgn0_pty_mod_384 + +.p2align 5 +_sgn0_pty_mod_384: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + + + movq 0(%rdi),%r8 + movq 8(%rdi),%r9 + movq 16(%rdi),%r10 + movq 24(%rdi),%r11 + movq 32(%rdi),%rcx + movq 40(%rdi),%rdx + + xorq %rax,%rax + movq %r8,%rdi + addq %r8,%r8 + adcq %r9,%r9 + adcq %r10,%r10 + adcq %r11,%r11 + adcq %rcx,%rcx + adcq %rdx,%rdx + adcq $0,%rax + + subq 0(%rsi),%r8 + sbbq 8(%rsi),%r9 + sbbq 16(%rsi),%r10 + sbbq 24(%rsi),%r11 + sbbq 32(%rsi),%rcx + sbbq 40(%rsi),%rdx + sbbq $0,%rax + + notq %rax + andq $1,%rdi + andq $2,%rax + orq %rdi,%rax + + + .byte 0xf3,0xc3 +.cfi_endproc + + +.globl _sgn0_pty_mod_384x +.private_extern _sgn0_pty_mod_384x + +.p2align 5 +_sgn0_pty_mod_384x: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + subq $8,%rsp +.cfi_adjust_cfa_offset 8 + + + movq 48(%rdi),%r8 + movq 56(%rdi),%r9 + movq 64(%rdi),%r10 + movq 72(%rdi),%r11 + movq 80(%rdi),%rcx + movq 88(%rdi),%rdx + + movq %r8,%rbx + orq %r9,%r8 + orq %r10,%r8 + orq %r11,%r8 + orq %rcx,%r8 + orq %rdx,%r8 + + leaq 0(%rdi),%rax + xorq %rdi,%rdi + movq %rbx,%rbp + addq %rbx,%rbx + adcq %r9,%r9 + adcq %r10,%r10 + adcq %r11,%r11 + adcq %rcx,%rcx + adcq %rdx,%rdx + adcq $0,%rdi + + subq 0(%rsi),%rbx + sbbq 8(%rsi),%r9 + sbbq 16(%rsi),%r10 + sbbq 24(%rsi),%r11 + sbbq 32(%rsi),%rcx + sbbq 40(%rsi),%rdx + sbbq $0,%rdi + + movq %r8,0(%rsp) + notq %rdi + andq $1,%rbp + andq $2,%rdi + orq %rbp,%rdi + + movq 0(%rax),%r8 + movq 8(%rax),%r9 + movq 16(%rax),%r10 + movq 24(%rax),%r11 + movq 32(%rax),%rcx + movq 40(%rax),%rdx + + movq %r8,%rbx + orq %r9,%r8 + orq %r10,%r8 + orq %r11,%r8 + orq %rcx,%r8 + orq %rdx,%r8 + + xorq %rax,%rax + movq %rbx,%rbp + addq %rbx,%rbx + adcq %r9,%r9 + adcq %r10,%r10 + adcq %r11,%r11 + adcq %rcx,%rcx + adcq %rdx,%rdx + adcq $0,%rax + + subq 0(%rsi),%rbx + sbbq 8(%rsi),%r9 + sbbq 16(%rsi),%r10 + sbbq 24(%rsi),%r11 + sbbq 32(%rsi),%rcx + sbbq 40(%rsi),%rdx + sbbq $0,%rax + + movq 0(%rsp),%rbx + + notq %rax + + testq %r8,%r8 + cmovzq %rdi,%rbp + + testq %rbx,%rbx + cmovnzq %rdi,%rax + + andq $1,%rbp + andq $2,%rax + orq %rbp,%rax + + movq 8(%rsp),%rbx +.cfi_restore %rbx + movq 16(%rsp),%rbp +.cfi_restore %rbp + leaq 24(%rsp),%rsp +.cfi_adjust_cfa_offset -24 + + .byte 0xf3,0xc3 +.cfi_endproc + +.globl _vec_select_32 +.private_extern _vec_select_32 + +.p2align 5 +_vec_select_32: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + movd %ecx,%xmm5 + pxor %xmm4,%xmm4 + pshufd $0,%xmm5,%xmm5 + movdqu (%rsi),%xmm0 + leaq 16(%rsi),%rsi + pcmpeqd %xmm4,%xmm5 + movdqu (%rdx),%xmm1 + leaq 16(%rdx),%rdx + pcmpeqd %xmm5,%xmm4 + leaq 16(%rdi),%rdi + pand %xmm4,%xmm0 + movdqu 0+16-16(%rsi),%xmm2 + pand %xmm5,%xmm1 + movdqu 0+16-16(%rdx),%xmm3 + por %xmm1,%xmm0 + movdqu %xmm0,0-16(%rdi) + pand %xmm4,%xmm2 + pand %xmm5,%xmm3 + por %xmm3,%xmm2 + movdqu %xmm2,16-16(%rdi) + .byte 0xf3,0xc3 +.cfi_endproc + +.globl _vec_select_48 +.private_extern _vec_select_48 + +.p2align 5 +_vec_select_48: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + movd %ecx,%xmm5 + pxor %xmm4,%xmm4 + pshufd $0,%xmm5,%xmm5 + movdqu (%rsi),%xmm0 + leaq 24(%rsi),%rsi + pcmpeqd %xmm4,%xmm5 + movdqu (%rdx),%xmm1 + leaq 24(%rdx),%rdx + pcmpeqd %xmm5,%xmm4 + leaq 24(%rdi),%rdi + pand %xmm4,%xmm0 + movdqu 0+16-24(%rsi),%xmm2 + pand %xmm5,%xmm1 + movdqu 0+16-24(%rdx),%xmm3 + por %xmm1,%xmm0 + movdqu %xmm0,0-24(%rdi) + pand %xmm4,%xmm2 + movdqu 16+16-24(%rsi),%xmm0 + pand %xmm5,%xmm3 + movdqu 16+16-24(%rdx),%xmm1 + por %xmm3,%xmm2 + movdqu %xmm2,16-24(%rdi) + pand %xmm4,%xmm0 + pand %xmm5,%xmm1 + por %xmm1,%xmm0 + movdqu %xmm0,32-24(%rdi) + .byte 0xf3,0xc3 +.cfi_endproc + +.globl _vec_select_96 +.private_extern _vec_select_96 + +.p2align 5 +_vec_select_96: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + movd %ecx,%xmm5 + pxor %xmm4,%xmm4 + pshufd $0,%xmm5,%xmm5 + movdqu (%rsi),%xmm0 + leaq 48(%rsi),%rsi + pcmpeqd %xmm4,%xmm5 + movdqu (%rdx),%xmm1 + leaq 48(%rdx),%rdx + pcmpeqd %xmm5,%xmm4 + leaq 48(%rdi),%rdi + pand %xmm4,%xmm0 + movdqu 0+16-48(%rsi),%xmm2 + pand %xmm5,%xmm1 + movdqu 0+16-48(%rdx),%xmm3 + por %xmm1,%xmm0 + movdqu %xmm0,0-48(%rdi) + pand %xmm4,%xmm2 + movdqu 16+16-48(%rsi),%xmm0 + pand %xmm5,%xmm3 + movdqu 16+16-48(%rdx),%xmm1 + por %xmm3,%xmm2 + movdqu %xmm2,16-48(%rdi) + pand %xmm4,%xmm0 + movdqu 32+16-48(%rsi),%xmm2 + pand %xmm5,%xmm1 + movdqu 32+16-48(%rdx),%xmm3 + por %xmm1,%xmm0 + movdqu %xmm0,32-48(%rdi) + pand %xmm4,%xmm2 + movdqu 48+16-48(%rsi),%xmm0 + pand %xmm5,%xmm3 + movdqu 48+16-48(%rdx),%xmm1 + por %xmm3,%xmm2 + movdqu %xmm2,48-48(%rdi) + pand %xmm4,%xmm0 + movdqu 64+16-48(%rsi),%xmm2 + pand %xmm5,%xmm1 + movdqu 64+16-48(%rdx),%xmm3 + por %xmm1,%xmm0 + movdqu %xmm0,64-48(%rdi) + pand %xmm4,%xmm2 + pand %xmm5,%xmm3 + por %xmm3,%xmm2 + movdqu %xmm2,80-48(%rdi) + .byte 0xf3,0xc3 +.cfi_endproc + +.globl _vec_select_192 +.private_extern _vec_select_192 + +.p2align 5 +_vec_select_192: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + movd %ecx,%xmm5 + pxor %xmm4,%xmm4 + pshufd $0,%xmm5,%xmm5 + movdqu (%rsi),%xmm0 + leaq 96(%rsi),%rsi + pcmpeqd %xmm4,%xmm5 + movdqu (%rdx),%xmm1 + leaq 96(%rdx),%rdx + pcmpeqd %xmm5,%xmm4 + leaq 96(%rdi),%rdi + pand %xmm4,%xmm0 + movdqu 0+16-96(%rsi),%xmm2 + pand %xmm5,%xmm1 + movdqu 0+16-96(%rdx),%xmm3 + por %xmm1,%xmm0 + movdqu %xmm0,0-96(%rdi) + pand %xmm4,%xmm2 + movdqu 16+16-96(%rsi),%xmm0 + pand %xmm5,%xmm3 + movdqu 16+16-96(%rdx),%xmm1 + por %xmm3,%xmm2 + movdqu %xmm2,16-96(%rdi) + pand %xmm4,%xmm0 + movdqu 32+16-96(%rsi),%xmm2 + pand %xmm5,%xmm1 + movdqu 32+16-96(%rdx),%xmm3 + por %xmm1,%xmm0 + movdqu %xmm0,32-96(%rdi) + pand %xmm4,%xmm2 + movdqu 48+16-96(%rsi),%xmm0 + pand %xmm5,%xmm3 + movdqu 48+16-96(%rdx),%xmm1 + por %xmm3,%xmm2 + movdqu %xmm2,48-96(%rdi) + pand %xmm4,%xmm0 + movdqu 64+16-96(%rsi),%xmm2 + pand %xmm5,%xmm1 + movdqu 64+16-96(%rdx),%xmm3 + por %xmm1,%xmm0 + movdqu %xmm0,64-96(%rdi) + pand %xmm4,%xmm2 + movdqu 80+16-96(%rsi),%xmm0 + pand %xmm5,%xmm3 + movdqu 80+16-96(%rdx),%xmm1 + por %xmm3,%xmm2 + movdqu %xmm2,80-96(%rdi) + pand %xmm4,%xmm0 + movdqu 96+16-96(%rsi),%xmm2 + pand %xmm5,%xmm1 + movdqu 96+16-96(%rdx),%xmm3 + por %xmm1,%xmm0 + movdqu %xmm0,96-96(%rdi) + pand %xmm4,%xmm2 + movdqu 112+16-96(%rsi),%xmm0 + pand %xmm5,%xmm3 + movdqu 112+16-96(%rdx),%xmm1 + por %xmm3,%xmm2 + movdqu %xmm2,112-96(%rdi) + pand %xmm4,%xmm0 + movdqu 128+16-96(%rsi),%xmm2 + pand %xmm5,%xmm1 + movdqu 128+16-96(%rdx),%xmm3 + por %xmm1,%xmm0 + movdqu %xmm0,128-96(%rdi) + pand %xmm4,%xmm2 + movdqu 144+16-96(%rsi),%xmm0 + pand %xmm5,%xmm3 + movdqu 144+16-96(%rdx),%xmm1 + por %xmm3,%xmm2 + movdqu %xmm2,144-96(%rdi) + pand %xmm4,%xmm0 + movdqu 160+16-96(%rsi),%xmm2 + pand %xmm5,%xmm1 + movdqu 160+16-96(%rdx),%xmm3 + por %xmm1,%xmm0 + movdqu %xmm0,160-96(%rdi) + pand %xmm4,%xmm2 + pand %xmm5,%xmm3 + por %xmm3,%xmm2 + movdqu %xmm2,176-96(%rdi) + .byte 0xf3,0xc3 +.cfi_endproc + +.globl _vec_select_144 +.private_extern _vec_select_144 + +.p2align 5 +_vec_select_144: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + movd %ecx,%xmm5 + pxor %xmm4,%xmm4 + pshufd $0,%xmm5,%xmm5 + movdqu (%rsi),%xmm0 + leaq 72(%rsi),%rsi + pcmpeqd %xmm4,%xmm5 + movdqu (%rdx),%xmm1 + leaq 72(%rdx),%rdx + pcmpeqd %xmm5,%xmm4 + leaq 72(%rdi),%rdi + pand %xmm4,%xmm0 + movdqu 0+16-72(%rsi),%xmm2 + pand %xmm5,%xmm1 + movdqu 0+16-72(%rdx),%xmm3 + por %xmm1,%xmm0 + movdqu %xmm0,0-72(%rdi) + pand %xmm4,%xmm2 + movdqu 16+16-72(%rsi),%xmm0 + pand %xmm5,%xmm3 + movdqu 16+16-72(%rdx),%xmm1 + por %xmm3,%xmm2 + movdqu %xmm2,16-72(%rdi) + pand %xmm4,%xmm0 + movdqu 32+16-72(%rsi),%xmm2 + pand %xmm5,%xmm1 + movdqu 32+16-72(%rdx),%xmm3 + por %xmm1,%xmm0 + movdqu %xmm0,32-72(%rdi) + pand %xmm4,%xmm2 + movdqu 48+16-72(%rsi),%xmm0 + pand %xmm5,%xmm3 + movdqu 48+16-72(%rdx),%xmm1 + por %xmm3,%xmm2 + movdqu %xmm2,48-72(%rdi) + pand %xmm4,%xmm0 + movdqu 64+16-72(%rsi),%xmm2 + pand %xmm5,%xmm1 + movdqu 64+16-72(%rdx),%xmm3 + por %xmm1,%xmm0 + movdqu %xmm0,64-72(%rdi) + pand %xmm4,%xmm2 + movdqu 80+16-72(%rsi),%xmm0 + pand %xmm5,%xmm3 + movdqu 80+16-72(%rdx),%xmm1 + por %xmm3,%xmm2 + movdqu %xmm2,80-72(%rdi) + pand %xmm4,%xmm0 + movdqu 96+16-72(%rsi),%xmm2 + pand %xmm5,%xmm1 + movdqu 96+16-72(%rdx),%xmm3 + por %xmm1,%xmm0 + movdqu %xmm0,96-72(%rdi) + pand %xmm4,%xmm2 + movdqu 112+16-72(%rsi),%xmm0 + pand %xmm5,%xmm3 + movdqu 112+16-72(%rdx),%xmm1 + por %xmm3,%xmm2 + movdqu %xmm2,112-72(%rdi) + pand %xmm4,%xmm0 + pand %xmm5,%xmm1 + por %xmm1,%xmm0 + movdqu %xmm0,128-72(%rdi) + .byte 0xf3,0xc3 +.cfi_endproc + +.globl _vec_select_288 +.private_extern _vec_select_288 + +.p2align 5 +_vec_select_288: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + movd %ecx,%xmm5 + pxor %xmm4,%xmm4 + pshufd $0,%xmm5,%xmm5 + movdqu (%rsi),%xmm0 + leaq 144(%rsi),%rsi + pcmpeqd %xmm4,%xmm5 + movdqu (%rdx),%xmm1 + leaq 144(%rdx),%rdx + pcmpeqd %xmm5,%xmm4 + leaq 144(%rdi),%rdi + pand %xmm4,%xmm0 + movdqu 0+16-144(%rsi),%xmm2 + pand %xmm5,%xmm1 + movdqu 0+16-144(%rdx),%xmm3 + por %xmm1,%xmm0 + movdqu %xmm0,0-144(%rdi) + pand %xmm4,%xmm2 + movdqu 16+16-144(%rsi),%xmm0 + pand %xmm5,%xmm3 + movdqu 16+16-144(%rdx),%xmm1 + por %xmm3,%xmm2 + movdqu %xmm2,16-144(%rdi) + pand %xmm4,%xmm0 + movdqu 32+16-144(%rsi),%xmm2 + pand %xmm5,%xmm1 + movdqu 32+16-144(%rdx),%xmm3 + por %xmm1,%xmm0 + movdqu %xmm0,32-144(%rdi) + pand %xmm4,%xmm2 + movdqu 48+16-144(%rsi),%xmm0 + pand %xmm5,%xmm3 + movdqu 48+16-144(%rdx),%xmm1 + por %xmm3,%xmm2 + movdqu %xmm2,48-144(%rdi) + pand %xmm4,%xmm0 + movdqu 64+16-144(%rsi),%xmm2 + pand %xmm5,%xmm1 + movdqu 64+16-144(%rdx),%xmm3 + por %xmm1,%xmm0 + movdqu %xmm0,64-144(%rdi) + pand %xmm4,%xmm2 + movdqu 80+16-144(%rsi),%xmm0 + pand %xmm5,%xmm3 + movdqu 80+16-144(%rdx),%xmm1 + por %xmm3,%xmm2 + movdqu %xmm2,80-144(%rdi) + pand %xmm4,%xmm0 + movdqu 96+16-144(%rsi),%xmm2 + pand %xmm5,%xmm1 + movdqu 96+16-144(%rdx),%xmm3 + por %xmm1,%xmm0 + movdqu %xmm0,96-144(%rdi) + pand %xmm4,%xmm2 + movdqu 112+16-144(%rsi),%xmm0 + pand %xmm5,%xmm3 + movdqu 112+16-144(%rdx),%xmm1 + por %xmm3,%xmm2 + movdqu %xmm2,112-144(%rdi) + pand %xmm4,%xmm0 + movdqu 128+16-144(%rsi),%xmm2 + pand %xmm5,%xmm1 + movdqu 128+16-144(%rdx),%xmm3 + por %xmm1,%xmm0 + movdqu %xmm0,128-144(%rdi) + pand %xmm4,%xmm2 + movdqu 144+16-144(%rsi),%xmm0 + pand %xmm5,%xmm3 + movdqu 144+16-144(%rdx),%xmm1 + por %xmm3,%xmm2 + movdqu %xmm2,144-144(%rdi) + pand %xmm4,%xmm0 + movdqu 160+16-144(%rsi),%xmm2 + pand %xmm5,%xmm1 + movdqu 160+16-144(%rdx),%xmm3 + por %xmm1,%xmm0 + movdqu %xmm0,160-144(%rdi) + pand %xmm4,%xmm2 + movdqu 176+16-144(%rsi),%xmm0 + pand %xmm5,%xmm3 + movdqu 176+16-144(%rdx),%xmm1 + por %xmm3,%xmm2 + movdqu %xmm2,176-144(%rdi) + pand %xmm4,%xmm0 + movdqu 192+16-144(%rsi),%xmm2 + pand %xmm5,%xmm1 + movdqu 192+16-144(%rdx),%xmm3 + por %xmm1,%xmm0 + movdqu %xmm0,192-144(%rdi) + pand %xmm4,%xmm2 + movdqu 208+16-144(%rsi),%xmm0 + pand %xmm5,%xmm3 + movdqu 208+16-144(%rdx),%xmm1 + por %xmm3,%xmm2 + movdqu %xmm2,208-144(%rdi) + pand %xmm4,%xmm0 + movdqu 224+16-144(%rsi),%xmm2 + pand %xmm5,%xmm1 + movdqu 224+16-144(%rdx),%xmm3 + por %xmm1,%xmm0 + movdqu %xmm0,224-144(%rdi) + pand %xmm4,%xmm2 + movdqu 240+16-144(%rsi),%xmm0 + pand %xmm5,%xmm3 + movdqu 240+16-144(%rdx),%xmm1 + por %xmm3,%xmm2 + movdqu %xmm2,240-144(%rdi) + pand %xmm4,%xmm0 + movdqu 256+16-144(%rsi),%xmm2 + pand %xmm5,%xmm1 + movdqu 256+16-144(%rdx),%xmm3 + por %xmm1,%xmm0 + movdqu %xmm0,256-144(%rdi) + pand %xmm4,%xmm2 + pand %xmm5,%xmm3 + por %xmm3,%xmm2 + movdqu %xmm2,272-144(%rdi) + .byte 0xf3,0xc3 +.cfi_endproc + +.globl _vec_prefetch +.private_extern _vec_prefetch + +.p2align 5 +_vec_prefetch: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + leaq -1(%rdi,%rsi,1),%rsi + movq $64,%rax + xorq %r8,%r8 + prefetchnta (%rdi) + leaq (%rdi,%rax,1),%rdi + cmpq %rsi,%rdi + cmovaq %rsi,%rdi + cmovaq %r8,%rax + prefetchnta (%rdi) + leaq (%rdi,%rax,1),%rdi + cmpq %rsi,%rdi + cmovaq %rsi,%rdi + cmovaq %r8,%rax + prefetchnta (%rdi) + leaq (%rdi,%rax,1),%rdi + cmpq %rsi,%rdi + cmovaq %rsi,%rdi + cmovaq %r8,%rax + prefetchnta (%rdi) + leaq (%rdi,%rax,1),%rdi + cmpq %rsi,%rdi + cmovaq %rsi,%rdi + cmovaq %r8,%rax + prefetchnta (%rdi) + leaq (%rdi,%rax,1),%rdi + cmpq %rsi,%rdi + cmovaq %rsi,%rdi + cmovaq %r8,%rax + prefetchnta (%rdi) + leaq (%rdi,%rax,1),%rdi + cmpq %rsi,%rdi + cmovaq %rsi,%rdi + prefetchnta (%rdi) + .byte 0xf3,0xc3 +.cfi_endproc + +.globl _vec_is_zero_16x +.private_extern _vec_is_zero_16x + +.p2align 5 +_vec_is_zero_16x: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + shrl $4,%esi + movdqu (%rdi),%xmm0 + leaq 16(%rdi),%rdi + +L$oop_is_zero: + decl %esi + jz L$oop_is_zero_done + movdqu (%rdi),%xmm1 + leaq 16(%rdi),%rdi + por %xmm1,%xmm0 + jmp L$oop_is_zero + +L$oop_is_zero_done: + pshufd $0x4e,%xmm0,%xmm1 + por %xmm1,%xmm0 +.byte 102,72,15,126,192 + incl %esi + testq %rax,%rax + cmovnzl %esi,%eax + xorl $1,%eax + .byte 0xf3,0xc3 +.cfi_endproc + +.globl _vec_is_equal_16x +.private_extern _vec_is_equal_16x + +.p2align 5 +_vec_is_equal_16x: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + shrl $4,%edx + movdqu (%rdi),%xmm0 + movdqu (%rsi),%xmm1 + subq %rdi,%rsi + leaq 16(%rdi),%rdi + pxor %xmm1,%xmm0 + +L$oop_is_equal: + decl %edx + jz L$oop_is_equal_done + movdqu (%rdi),%xmm1 + movdqu (%rdi,%rsi,1),%xmm2 + leaq 16(%rdi),%rdi + pxor %xmm2,%xmm1 + por %xmm1,%xmm0 + jmp L$oop_is_equal + +L$oop_is_equal_done: + pshufd $0x4e,%xmm0,%xmm1 + por %xmm1,%xmm0 +.byte 102,72,15,126,192 + incl %edx + testq %rax,%rax + cmovnzl %edx,%eax + xorl $1,%eax + .byte 0xf3,0xc3 +.cfi_endproc + diff --git a/crypto/blst_src/build/mach-o/add_mod_384x384-x86_64.s b/crypto/blst_src/build/mach-o/add_mod_384x384-x86_64.s new file mode 100644 index 00000000000..2dc58f81608 --- /dev/null +++ b/crypto/blst_src/build/mach-o/add_mod_384x384-x86_64.s @@ -0,0 +1,244 @@ +.text + + +.p2align 5 +__add_mod_384x384: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + movq 48(%rsi),%r14 + + addq 0(%rdx),%r8 + movq 56(%rsi),%r15 + adcq 8(%rdx),%r9 + movq 64(%rsi),%rax + adcq 16(%rdx),%r10 + movq 72(%rsi),%rbx + adcq 24(%rdx),%r11 + movq 80(%rsi),%rbp + adcq 32(%rdx),%r12 + movq 88(%rsi),%rsi + adcq 40(%rdx),%r13 + movq %r8,0(%rdi) + adcq 48(%rdx),%r14 + movq %r9,8(%rdi) + adcq 56(%rdx),%r15 + movq %r10,16(%rdi) + adcq 64(%rdx),%rax + movq %r12,32(%rdi) + movq %r14,%r8 + adcq 72(%rdx),%rbx + movq %r11,24(%rdi) + movq %r15,%r9 + adcq 80(%rdx),%rbp + movq %r13,40(%rdi) + movq %rax,%r10 + adcq 88(%rdx),%rsi + movq %rbx,%r11 + sbbq %rdx,%rdx + + subq 0(%rcx),%r14 + sbbq 8(%rcx),%r15 + movq %rbp,%r12 + sbbq 16(%rcx),%rax + sbbq 24(%rcx),%rbx + sbbq 32(%rcx),%rbp + movq %rsi,%r13 + sbbq 40(%rcx),%rsi + sbbq $0,%rdx + + cmovcq %r8,%r14 + cmovcq %r9,%r15 + cmovcq %r10,%rax + movq %r14,48(%rdi) + cmovcq %r11,%rbx + movq %r15,56(%rdi) + cmovcq %r12,%rbp + movq %rax,64(%rdi) + cmovcq %r13,%rsi + movq %rbx,72(%rdi) + movq %rbp,80(%rdi) + movq %rsi,88(%rdi) + + .byte 0xf3,0xc3 +.cfi_endproc + + + +.p2align 5 +__sub_mod_384x384: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + movq 48(%rsi),%r14 + + subq 0(%rdx),%r8 + movq 56(%rsi),%r15 + sbbq 8(%rdx),%r9 + movq 64(%rsi),%rax + sbbq 16(%rdx),%r10 + movq 72(%rsi),%rbx + sbbq 24(%rdx),%r11 + movq 80(%rsi),%rbp + sbbq 32(%rdx),%r12 + movq 88(%rsi),%rsi + sbbq 40(%rdx),%r13 + movq %r8,0(%rdi) + sbbq 48(%rdx),%r14 + movq 0(%rcx),%r8 + movq %r9,8(%rdi) + sbbq 56(%rdx),%r15 + movq 8(%rcx),%r9 + movq %r10,16(%rdi) + sbbq 64(%rdx),%rax + movq 16(%rcx),%r10 + movq %r11,24(%rdi) + sbbq 72(%rdx),%rbx + movq 24(%rcx),%r11 + movq %r12,32(%rdi) + sbbq 80(%rdx),%rbp + movq 32(%rcx),%r12 + movq %r13,40(%rdi) + sbbq 88(%rdx),%rsi + movq 40(%rcx),%r13 + sbbq %rdx,%rdx + + andq %rdx,%r8 + andq %rdx,%r9 + andq %rdx,%r10 + andq %rdx,%r11 + andq %rdx,%r12 + andq %rdx,%r13 + + addq %r8,%r14 + adcq %r9,%r15 + movq %r14,48(%rdi) + adcq %r10,%rax + movq %r15,56(%rdi) + adcq %r11,%rbx + movq %rax,64(%rdi) + adcq %r12,%rbp + movq %rbx,72(%rdi) + adcq %r13,%rsi + movq %rbp,80(%rdi) + movq %rsi,88(%rdi) + + .byte 0xf3,0xc3 +.cfi_endproc + + +.globl _add_mod_384x384 +.private_extern _add_mod_384x384 + +.p2align 5 +_add_mod_384x384: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + subq $8,%rsp +.cfi_adjust_cfa_offset 8 + + + call __add_mod_384x384 + + movq 8(%rsp),%r15 +.cfi_restore %r15 + movq 16(%rsp),%r14 +.cfi_restore %r14 + movq 24(%rsp),%r13 +.cfi_restore %r13 + movq 32(%rsp),%r12 +.cfi_restore %r12 + movq 40(%rsp),%rbx +.cfi_restore %rbx + movq 48(%rsp),%rbp +.cfi_restore %rbp + leaq 56(%rsp),%rsp +.cfi_adjust_cfa_offset -56 + + .byte 0xf3,0xc3 +.cfi_endproc + + +.globl _sub_mod_384x384 +.private_extern _sub_mod_384x384 + +.p2align 5 +_sub_mod_384x384: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + subq $8,%rsp +.cfi_adjust_cfa_offset 8 + + + call __sub_mod_384x384 + + movq 8(%rsp),%r15 +.cfi_restore %r15 + movq 16(%rsp),%r14 +.cfi_restore %r14 + movq 24(%rsp),%r13 +.cfi_restore %r13 + movq 32(%rsp),%r12 +.cfi_restore %r12 + movq 40(%rsp),%rbx +.cfi_restore %rbx + movq 48(%rsp),%rbp +.cfi_restore %rbp + leaq 56(%rsp),%rsp +.cfi_adjust_cfa_offset -56 + + .byte 0xf3,0xc3 +.cfi_endproc + diff --git a/crypto/blst_src/build/mach-o/ct_inverse_mod_256-armv8.S b/crypto/blst_src/build/mach-o/ct_inverse_mod_256-armv8.S new file mode 100644 index 00000000000..2fd4847a496 --- /dev/null +++ b/crypto/blst_src/build/mach-o/ct_inverse_mod_256-armv8.S @@ -0,0 +1,785 @@ +.text + +.globl _ct_inverse_mod_256 +.private_extern _ct_inverse_mod_256 + +.align 5 +_ct_inverse_mod_256: +.long 3573752639 + stp x29, x30, [sp,#-80]! + add x29, sp, #0 + stp x19, x20, [sp,#16] + stp x21, x22, [sp,#32] + stp x23, x24, [sp,#48] + stp x25, x26, [sp,#64] + sub sp, sp, #1040 + + ldp x4, x5, [x1,#8*0] + ldp x6, x7, [x1,#8*2] + + add x1, sp, #16+511 // find closest 512-byte-aligned spot + and x1, x1, #-512 // in the frame... + str x0, [sp] + + ldp x8, x9, [x2,#8*0] + ldp x10, x11, [x2,#8*2] + + stp x4, x5, [x1,#8*0] // copy input to |a| + stp x6, x7, [x1,#8*2] + stp x8, x9, [x1,#8*4] // copy modulus to |b| + stp x10, x11, [x1,#8*6] + + ////////////////////////////////////////// first iteration + bl Lab_approximation_31_256_loaded + + eor x0, x1, #256 // pointer to dst |a|b|u|v| + bl __smul_256_n_shift_by_31 + str x12,[x0,#8*8] // initialize |u| with |f0| + + mov x12, x14 // |f1| + mov x13, x15 // |g1| + add x0, x0, #8*4 // pointer to dst |b| + bl __smul_256_n_shift_by_31 + str x12, [x0,#8*9] // initialize |v| with |f1| + + ////////////////////////////////////////// second iteration + eor x1, x1, #256 // flip-flop src |a|b|u|v| + bl __ab_approximation_31_256 + + eor x0, x1, #256 // pointer to dst |a|b|u|v| + bl __smul_256_n_shift_by_31 + mov x16, x12 // corrected |f0| + mov x17, x13 // corrected |g0| + + mov x12, x14 // |f1| + mov x13, x15 // |g1| + add x0, x0, #8*4 // pointer to destination |b| + bl __smul_256_n_shift_by_31 + + ldr x8, [x1,#8*8] // |u| + ldr x9, [x1,#8*13] // |v| + madd x4, x16, x8, xzr // |u|*|f0| + madd x4, x17, x9, x4 // |v|*|g0| + str x4, [x0,#8*4] + asr x5, x4, #63 // sign extension + stp x5, x5, [x0,#8*5] + stp x5, x5, [x0,#8*7] + + madd x4, x12, x8, xzr // |u|*|f1| + madd x4, x13, x9, x4 // |v|*|g1| + str x4, [x0,#8*9] + asr x5, x4, #63 // sign extension + stp x5, x5, [x0,#8*10] + stp x5, x5, [x0,#8*12] + eor x1, x1, #256 // flip-flop src |a|b|u|v| + bl __ab_approximation_31_256 + + eor x0, x1, #256 // pointer to dst |a|b|u|v| + bl __smul_256_n_shift_by_31 + mov x16, x12 // corrected |f0| + mov x17, x13 // corrected |g0| + + mov x12, x14 // |f1| + mov x13, x15 // |g1| + add x0, x0, #8*4 // pointer to destination |b| + bl __smul_256_n_shift_by_31 + + add x0, x0, #8*4 // pointer to destination |u| + bl __smul_256x63 + adc x22, x22, x23 + str x22, [x0,#8*4] + + mov x16, x12 // corrected |f1| + mov x17, x13 // corrected |g1| + add x0, x0, #8*5 // pointer to destination |v| + bl __smul_256x63 + adc x22, x22, x23 + stp x22, x22, [x0,#8*4] + stp x22, x22, [x0,#8*6] + eor x1, x1, #256 // flip-flop src |a|b|u|v| + bl __ab_approximation_31_256 + + eor x0, x1, #256 // pointer to dst |a|b|u|v| + bl __smul_256_n_shift_by_31 + mov x16, x12 // corrected |f0| + mov x17, x13 // corrected |g0| + + mov x12, x14 // |f1| + mov x13, x15 // |g1| + add x0, x0, #8*4 // pointer to destination |b| + bl __smul_256_n_shift_by_31 + + add x0, x0, #8*4 // pointer to destination |u| + bl __smul_256x63 + adc x22, x22, x23 + str x22, [x0,#8*4] + + mov x16, x12 // corrected |f1| + mov x17, x13 // corrected |g1| + add x0, x0, #8*5 // pointer to destination |v| + bl __smul_256x63 + adc x22, x22, x23 + stp x22, x22, [x0,#8*4] + stp x22, x22, [x0,#8*6] + eor x1, x1, #256 // flip-flop src |a|b|u|v| + bl __ab_approximation_31_256 + + eor x0, x1, #256 // pointer to dst |a|b|u|v| + bl __smul_256_n_shift_by_31 + mov x16, x12 // corrected |f0| + mov x17, x13 // corrected |g0| + + mov x12, x14 // |f1| + mov x13, x15 // |g1| + add x0, x0, #8*4 // pointer to destination |b| + bl __smul_256_n_shift_by_31 + + add x0, x0, #8*4 // pointer to destination |u| + bl __smul_256x63 + adc x22, x22, x23 + str x22, [x0,#8*4] + + mov x16, x12 // corrected |f1| + mov x17, x13 // corrected |g1| + add x0, x0, #8*5 // pointer to destination |v| + bl __smul_256x63 + adc x22, x22, x23 + stp x22, x22, [x0,#8*4] + stp x22, x22, [x0,#8*6] + eor x1, x1, #256 // flip-flop src |a|b|u|v| + bl __ab_approximation_31_256 + + eor x0, x1, #256 // pointer to dst |a|b|u|v| + bl __smul_256_n_shift_by_31 + mov x16, x12 // corrected |f0| + mov x17, x13 // corrected |g0| + + mov x12, x14 // |f1| + mov x13, x15 // |g1| + add x0, x0, #8*4 // pointer to destination |b| + bl __smul_256_n_shift_by_31 + + add x0, x0, #8*4 // pointer to destination |u| + bl __smul_256x63 + adc x22, x22, x23 + str x22, [x0,#8*4] + + mov x16, x12 // corrected |f1| + mov x17, x13 // corrected |g1| + add x0, x0, #8*5 // pointer to destination |v| + bl __smul_256x63 + adc x22, x22, x23 + stp x22, x22, [x0,#8*4] + stp x22, x22, [x0,#8*6] + eor x1, x1, #256 // flip-flop src |a|b|u|v| + bl __ab_approximation_31_256 + + eor x0, x1, #256 // pointer to dst |a|b|u|v| + bl __smul_256_n_shift_by_31 + mov x16, x12 // corrected |f0| + mov x17, x13 // corrected |g0| + + mov x12, x14 // |f1| + mov x13, x15 // |g1| + add x0, x0, #8*4 // pointer to destination |b| + bl __smul_256_n_shift_by_31 + + add x0, x0, #8*4 // pointer to destination |u| + bl __smul_256x63 + adc x22, x22, x23 + str x22, [x0,#8*4] + + mov x16, x12 // corrected |f1| + mov x17, x13 // corrected |g1| + add x0, x0, #8*5 // pointer to destination |v| + bl __smul_256x63 + adc x22, x22, x23 + stp x22, x22, [x0,#8*4] + stp x22, x22, [x0,#8*6] + eor x1, x1, #256 // flip-flop src |a|b|u|v| + bl __ab_approximation_31_256 + + eor x0, x1, #256 // pointer to dst |a|b|u|v| + bl __smul_256_n_shift_by_31 + mov x16, x12 // corrected |f0| + mov x17, x13 // corrected |g0| + + mov x12, x14 // |f1| + mov x13, x15 // |g1| + add x0, x0, #8*4 // pointer to destination |b| + bl __smul_256_n_shift_by_31 + + add x0, x0, #8*4 // pointer to destination |u| + bl __smul_256x63 + adc x22, x22, x23 + str x22, [x0,#8*4] + + mov x16, x12 // corrected |f1| + mov x17, x13 // corrected |g1| + add x0, x0, #8*5 // pointer to destination |v| + bl __smul_256x63 + adc x22, x22, x23 + stp x22, x22, [x0,#8*4] + stp x22, x22, [x0,#8*6] + eor x1, x1, #256 // flip-flop src |a|b|u|v| + bl __ab_approximation_31_256 + + eor x0, x1, #256 // pointer to dst |a|b|u|v| + bl __smul_256_n_shift_by_31 + mov x16, x12 // corrected |f0| + mov x17, x13 // corrected |g0| + + mov x12, x14 // |f1| + mov x13, x15 // |g1| + add x0, x0, #8*4 // pointer to destination |b| + bl __smul_256_n_shift_by_31 + + add x0, x0, #8*4 // pointer to destination |u| + bl __smul_256x63 + adc x22, x22, x23 + str x22, [x0,#8*4] + + mov x16, x12 // corrected |f1| + mov x17, x13 // corrected |g1| + add x0, x0, #8*5 // pointer to destination |v| + bl __smul_256x63 + bl __smul_512x63_tail + eor x1, x1, #256 // flip-flop src |a|b|u|v| + bl __ab_approximation_31_256 + + eor x0, x1, #256 // pointer to dst |a|b|u|v| + bl __smul_256_n_shift_by_31 + mov x16, x12 // corrected |f0| + mov x17, x13 // corrected |g0| + + mov x12, x14 // |f1| + mov x13, x15 // |g1| + add x0, x0, #8*4 // pointer to destination |b| + bl __smul_256_n_shift_by_31 + + add x0, x0, #8*4 // pointer to destination |u| + bl __smul_256x63 + adc x22, x22, x23 + str x22, [x0,#8*4] + + mov x16, x12 // corrected |f1| + mov x17, x13 // corrected |g1| + add x0, x0, #8*5 // pointer to destination |v| + bl __smul_256x63 + bl __smul_512x63_tail + eor x1, x1, #256 // flip-flop src |a|b|u|v| + bl __ab_approximation_31_256 + + eor x0, x1, #256 // pointer to dst |a|b|u|v| + bl __smul_256_n_shift_by_31 + mov x16, x12 // corrected |f0| + mov x17, x13 // corrected |g0| + + mov x12, x14 // |f1| + mov x13, x15 // |g1| + add x0, x0, #8*4 // pointer to destination |b| + bl __smul_256_n_shift_by_31 + + add x0, x0, #8*4 // pointer to destination |u| + bl __smul_256x63 + adc x22, x22, x23 + str x22, [x0,#8*4] + + mov x16, x12 // corrected |f1| + mov x17, x13 // corrected |g1| + add x0, x0, #8*5 // pointer to destination |v| + bl __smul_256x63 + bl __smul_512x63_tail + eor x1, x1, #256 // flip-flop src |a|b|u|v| + bl __ab_approximation_31_256 + + eor x0, x1, #256 // pointer to dst |a|b|u|v| + bl __smul_256_n_shift_by_31 + mov x16, x12 // corrected |f0| + mov x17, x13 // corrected |g0| + + mov x12, x14 // |f1| + mov x13, x15 // |g1| + add x0, x0, #8*4 // pointer to destination |b| + bl __smul_256_n_shift_by_31 + + add x0, x0, #8*4 // pointer to destination |u| + bl __smul_256x63 + adc x22, x22, x23 + str x22, [x0,#8*4] + + mov x16, x12 // corrected |f1| + mov x17, x13 // corrected |g1| + add x0, x0, #8*5 // pointer to destination |v| + bl __smul_256x63 + bl __smul_512x63_tail + eor x1, x1, #256 // flip-flop src |a|b|u|v| + bl __ab_approximation_31_256 + + eor x0, x1, #256 // pointer to dst |a|b|u|v| + bl __smul_256_n_shift_by_31 + mov x16, x12 // corrected |f0| + mov x17, x13 // corrected |g0| + + mov x12, x14 // |f1| + mov x13, x15 // |g1| + add x0, x0, #8*4 // pointer to destination |b| + bl __smul_256_n_shift_by_31 + + add x0, x0, #8*4 // pointer to destination |u| + bl __smul_256x63 + adc x22, x22, x23 + str x22, [x0,#8*4] + + mov x16, x12 // corrected |f1| + mov x17, x13 // corrected |g1| + add x0, x0, #8*5 // pointer to destination |v| + bl __smul_256x63 + bl __smul_512x63_tail + eor x1, x1, #256 // flip-flop src |a|b|u|v| + bl __ab_approximation_31_256 + + eor x0, x1, #256 // pointer to dst |a|b|u|v| + bl __smul_256_n_shift_by_31 + mov x16, x12 // corrected |f0| + mov x17, x13 // corrected |g0| + + mov x12, x14 // |f1| + mov x13, x15 // |g1| + add x0, x0, #8*4 // pointer to destination |b| + bl __smul_256_n_shift_by_31 + + add x0, x0, #8*4 // pointer to destination |u| + bl __smul_256x63 + adc x22, x22, x23 + str x22, [x0,#8*4] + + mov x16, x12 // corrected |f1| + mov x17, x13 // corrected |g1| + add x0, x0, #8*5 // pointer to destination |v| + bl __smul_256x63 + bl __smul_512x63_tail + eor x1, x1, #256 // flip-flop src |a|b|u|v| + bl __ab_approximation_31_256 + + eor x0, x1, #256 // pointer to dst |a|b|u|v| + bl __smul_256_n_shift_by_31 + mov x16, x12 // corrected |f0| + mov x17, x13 // corrected |g0| + + mov x12, x14 // |f1| + mov x13, x15 // |g1| + add x0, x0, #8*4 // pointer to destination |b| + bl __smul_256_n_shift_by_31 + + add x0, x0, #8*4 // pointer to destination |u| + bl __smul_256x63 + adc x22, x22, x23 + str x22, [x0,#8*4] + + mov x16, x12 // corrected |f1| + mov x17, x13 // corrected |g1| + add x0, x0, #8*5 // pointer to destination |v| + bl __smul_256x63 + bl __smul_512x63_tail + ////////////////////////////////////////// two[!] last iterations + eor x1, x1, #256 // flip-flop src |a|b|u|v| + mov x2, #47 // 31 + 512 % 31 + //bl __ab_approximation_62_256 // |a| and |b| are exact, + ldr x7, [x1,#8*0] // just load + ldr x11, [x1,#8*4] + bl __inner_loop_62_256 + + mov x16, x14 + mov x17, x15 + ldr x0, [sp] // original out_ptr + bl __smul_256x63 + bl __smul_512x63_tail + ldr x30, [x29,#8] + + smulh x20, x7, x17 // figure out top-most limb + ldp x8, x9, [x3,#8*0] + adc x23, x23, x25 + ldp x10, x11, [x3,#8*2] + + add x20, x20, x23 // x20 is 1, 0 or -1 + asr x19, x20, #63 // sign as mask + + and x23, x8, x19 // add mod<<256 conditionally + and x24, x9, x19 + adds x4, x4, x23 + and x25, x10, x19 + adcs x5, x5, x24 + and x26, x11, x19 + adcs x6, x6, x25 + adcs x7, x22, x26 + adc x20, x20, xzr // x20 is 1, 0 or -1 + + neg x19, x20 + orr x20, x20, x19 // excess bit or sign as mask + asr x19, x19, #63 // excess bit as mask + + and x8, x8, x20 // mask |mod| + and x9, x9, x20 + and x10, x10, x20 + and x11, x11, x20 + + eor x8, x8, x19 // conditionally negate |mod| + eor x9, x9, x19 + adds x8, x8, x19, lsr#63 + eor x10, x10, x19 + adcs x9, x9, xzr + eor x11, x11, x19 + adcs x10, x10, xzr + adc x11, x11, xzr + + adds x4, x4, x8 // final adjustment for |mod|<<256 + adcs x5, x5, x9 + adcs x6, x6, x10 + stp x4, x5, [x0,#8*4] + adc x7, x7, x11 + stp x6, x7, [x0,#8*6] + + add sp, sp, #1040 + ldp x19, x20, [x29,#16] + ldp x21, x22, [x29,#32] + ldp x23, x24, [x29,#48] + ldp x25, x26, [x29,#64] + ldr x29, [sp],#80 +.long 3573752767 + ret + + +//////////////////////////////////////////////////////////////////////// + +.align 5 +__smul_256x63: + ldp x4, x5, [x1,#8*0+64] // load |u| (or |v|) + asr x14, x16, #63 // |f_|'s sign as mask (or |g_|'s) + ldp x6, x7, [x1,#8*2+64] + eor x16, x16, x14 // conditionally negate |f_| (or |g_|) + ldr x22, [x1,#8*4+64] + + eor x4, x4, x14 // conditionally negate |u| (or |v|) + sub x16, x16, x14 + eor x5, x5, x14 + adds x4, x4, x14, lsr#63 + eor x6, x6, x14 + adcs x5, x5, xzr + eor x7, x7, x14 + adcs x6, x6, xzr + eor x22, x22, x14 + umulh x19, x4, x16 + adcs x7, x7, xzr + umulh x20, x5, x16 + adcs x22, x22, xzr + umulh x21, x6, x16 + mul x4, x4, x16 + cmp x16, #0 + mul x5, x5, x16 + csel x22, x22, xzr, ne + mul x6, x6, x16 + adds x5, x5, x19 + mul x24, x7, x16 + adcs x6, x6, x20 + adcs x24, x24, x21 + adc x26, xzr, xzr + ldp x8, x9, [x1,#8*0+104] // load |u| (or |v|) + asr x14, x17, #63 // |f_|'s sign as mask (or |g_|'s) + ldp x10, x11, [x1,#8*2+104] + eor x17, x17, x14 // conditionally negate |f_| (or |g_|) + ldr x23, [x1,#8*4+104] + + eor x8, x8, x14 // conditionally negate |u| (or |v|) + sub x17, x17, x14 + eor x9, x9, x14 + adds x8, x8, x14, lsr#63 + eor x10, x10, x14 + adcs x9, x9, xzr + eor x11, x11, x14 + adcs x10, x10, xzr + eor x23, x23, x14 + umulh x19, x8, x17 + adcs x11, x11, xzr + umulh x20, x9, x17 + adcs x23, x23, xzr + umulh x21, x10, x17 + adc x15, xzr, xzr // used in __smul_512x63_tail + mul x8, x8, x17 + cmp x17, #0 + mul x9, x9, x17 + csel x23, x23, xzr, ne + mul x10, x10, x17 + adds x9, x9, x19 + mul x25, x11, x17 + adcs x10, x10, x20 + adcs x25, x25, x21 + adc x26, x26, xzr + + adds x4, x4, x8 + adcs x5, x5, x9 + adcs x6, x6, x10 + stp x4, x5, [x0,#8*0] + adcs x24, x24, x25 + stp x6, x24, [x0,#8*2] + + ret + + + +.align 5 +__smul_512x63_tail: + umulh x24, x7, x16 + ldp x5, x6, [x1,#8*18] // load rest of |v| + adc x26, x26, xzr + ldr x7, [x1,#8*20] + and x22, x22, x16 + + umulh x11, x11, x17 // resume |v|*|g1| chain + + sub x24, x24, x22 // tie up |u|*|f1| chain + asr x25, x24, #63 + + eor x5, x5, x14 // conditionally negate rest of |v| + eor x6, x6, x14 + adds x5, x5, x15 + eor x7, x7, x14 + adcs x6, x6, xzr + umulh x19, x23, x17 + adc x7, x7, xzr + umulh x20, x5, x17 + add x11, x11, x26 + umulh x21, x6, x17 + + mul x4, x23, x17 + mul x5, x5, x17 + adds x4, x4, x11 + mul x6, x6, x17 + adcs x5, x5, x19 + mul x22, x7, x17 + adcs x6, x6, x20 + adcs x22, x22, x21 + adc x23, xzr, xzr // used in the final step + + adds x4, x4, x24 + adcs x5, x5, x25 + adcs x6, x6, x25 + stp x4, x5, [x0,#8*4] + adcs x22, x22, x25 // carry is used in the final step + stp x6, x22, [x0,#8*6] + + ret + + + +.align 5 +__smul_256_n_shift_by_31: + ldp x4, x5, [x1,#8*0+0] // load |a| (or |b|) + asr x24, x12, #63 // |f0|'s sign as mask (or |g0|'s) + ldp x6, x7, [x1,#8*2+0] + eor x25, x12, x24 // conditionally negate |f0| (or |g0|) + + eor x4, x4, x24 // conditionally negate |a| (or |b|) + sub x25, x25, x24 + eor x5, x5, x24 + adds x4, x4, x24, lsr#63 + eor x6, x6, x24 + adcs x5, x5, xzr + eor x7, x7, x24 + umulh x19, x4, x25 + adcs x6, x6, xzr + umulh x20, x5, x25 + adc x7, x7, xzr + umulh x21, x6, x25 + and x24, x24, x25 + umulh x22, x7, x25 + neg x24, x24 + + mul x4, x4, x25 + mul x5, x5, x25 + mul x6, x6, x25 + adds x5, x5, x19 + mul x7, x7, x25 + adcs x6, x6, x20 + adcs x7, x7, x21 + adc x22, x22, x24 + ldp x8, x9, [x1,#8*0+32] // load |a| (or |b|) + asr x24, x13, #63 // |f0|'s sign as mask (or |g0|'s) + ldp x10, x11, [x1,#8*2+32] + eor x25, x13, x24 // conditionally negate |f0| (or |g0|) + + eor x8, x8, x24 // conditionally negate |a| (or |b|) + sub x25, x25, x24 + eor x9, x9, x24 + adds x8, x8, x24, lsr#63 + eor x10, x10, x24 + adcs x9, x9, xzr + eor x11, x11, x24 + umulh x19, x8, x25 + adcs x10, x10, xzr + umulh x20, x9, x25 + adc x11, x11, xzr + umulh x21, x10, x25 + and x24, x24, x25 + umulh x23, x11, x25 + neg x24, x24 + + mul x8, x8, x25 + mul x9, x9, x25 + mul x10, x10, x25 + adds x9, x9, x19 + mul x11, x11, x25 + adcs x10, x10, x20 + adcs x11, x11, x21 + adc x23, x23, x24 + adds x4, x4, x8 + adcs x5, x5, x9 + adcs x6, x6, x10 + adcs x7, x7, x11 + adc x8, x22, x23 + + extr x4, x5, x4, #31 + extr x5, x6, x5, #31 + extr x6, x7, x6, #31 + asr x23, x8, #63 // result's sign as mask + extr x7, x8, x7, #31 + + eor x4, x4, x23 // ensure the result is positive + eor x5, x5, x23 + adds x4, x4, x23, lsr#63 + eor x6, x6, x23 + adcs x5, x5, xzr + eor x7, x7, x23 + adcs x6, x6, xzr + stp x4, x5, [x0,#8*0] + adc x7, x7, xzr + stp x6, x7, [x0,#8*2] + + eor x12, x12, x23 // adjust |f/g| accordingly + eor x13, x13, x23 + sub x12, x12, x23 + sub x13, x13, x23 + + ret + + +.align 4 +__ab_approximation_31_256: + ldp x6, x7, [x1,#8*2] + ldp x10, x11, [x1,#8*6] + ldp x4, x5, [x1,#8*0] + ldp x8, x9, [x1,#8*4] + +Lab_approximation_31_256_loaded: + orr x19, x7, x11 // check top-most limbs, ... + cmp x19, #0 + csel x7, x7, x6, ne + csel x11, x11, x10, ne + csel x6, x6, x5, ne + orr x19, x7, x11 // and ones before top-most, ... + csel x10, x10, x9, ne + + cmp x19, #0 + csel x7, x7, x6, ne + csel x11, x11, x10, ne + csel x6, x6, x4, ne + orr x19, x7, x11 // and one more, ... + csel x10, x10, x8, ne + + clz x19, x19 + cmp x19, #64 + csel x19, x19, xzr, ne + csel x7, x7, x6, ne + csel x11, x11, x10, ne + neg x20, x19 + + lslv x7, x7, x19 // align high limbs to the left + lslv x11, x11, x19 + lsrv x6, x6, x20 + lsrv x10, x10, x20 + and x6, x6, x20, asr#6 + and x10, x10, x20, asr#6 + orr x7, x7, x6 + orr x11, x11, x10 + + bfxil x7, x4, #0, #31 + bfxil x11, x8, #0, #31 + + b __inner_loop_31_256 + ret + + + +.align 4 +__inner_loop_31_256: + mov x2, #31 + mov x13, #0x7FFFFFFF80000000 // |f0|=1, |g0|=0 + mov x15, #0x800000007FFFFFFF // |f1|=0, |g1|=1 + mov x23,#0x7FFFFFFF7FFFFFFF + +Loop_31_256: + sbfx x22, x7, #0, #1 // if |a_| is odd, then we'll be subtracting + sub x2, x2, #1 + and x19, x11, x22 + sub x20, x11, x7 // |b_|-|a_| + subs x21, x7, x19 // |a_|-|b_| (or |a_|-0 if |a_| was even) + mov x19, x15 + csel x11, x11, x7, hs // |b_| = |a_| + csel x7, x21, x20, hs // borrow means |a_|<|b_|, replace with |b_|-|a_| + csel x15, x15, x13, hs // exchange |fg0| and |fg1| + csel x13, x13, x19, hs + lsr x7, x7, #1 + and x19, x15, x22 + and x20, x23, x22 + sub x13, x13, x19 // |f0|-=|f1| (or |f0-=0| if |a_| was even) + add x15, x15, x15 // |f1|<<=1 + add x13, x13, x20 + sub x15, x15, x23 + cbnz x2, Loop_31_256 + + mov x23, #0x7FFFFFFF + ubfx x12, x13, #0, #32 + ubfx x13, x13, #32, #32 + ubfx x14, x15, #0, #32 + ubfx x15, x15, #32, #32 + sub x12, x12, x23 // remove bias + sub x13, x13, x23 + sub x14, x14, x23 + sub x15, x15, x23 + + ret + + + +.align 4 +__inner_loop_62_256: + mov x12, #1 // |f0|=1 + mov x13, #0 // |g0|=0 + mov x14, #0 // |f1|=0 + mov x15, #1 // |g1|=1 + +Loop_62_256: + sbfx x22, x7, #0, #1 // if |a_| is odd, then we'll be subtracting + sub x2, x2, #1 + and x19, x11, x22 + sub x20, x11, x7 // |b_|-|a_| + subs x21, x7, x19 // |a_|-|b_| (or |a_|-0 if |a_| was even) + mov x19, x12 + csel x11, x11, x7, hs // |b_| = |a_| + csel x7, x21, x20, hs // borrow means |a_|<|b_|, replace with |b_|-|a_| + mov x20, x13 + csel x12, x12, x14, hs // exchange |f0| and |f1| + csel x14, x14, x19, hs + csel x13, x13, x15, hs // exchange |g0| and |g1| + csel x15, x15, x20, hs + lsr x7, x7, #1 + and x19, x14, x22 + and x20, x15, x22 + add x14, x14, x14 // |f1|<<=1 + add x15, x15, x15 // |g1|<<=1 + sub x12, x12, x19 // |f0|-=|f1| (or |f0-=0| if |a_| was even) + sub x13, x13, x20 // |g0|-=|g1| (or |g0-=0| ...) + cbnz x2, Loop_62_256 + + ret + diff --git a/crypto/blst_src/build/mach-o/ct_inverse_mod_256-x86_64.s b/crypto/blst_src/build/mach-o/ct_inverse_mod_256-x86_64.s new file mode 100644 index 00000000000..bf0ad8986e7 --- /dev/null +++ b/crypto/blst_src/build/mach-o/ct_inverse_mod_256-x86_64.s @@ -0,0 +1,1178 @@ +.text + +.globl _ct_inverse_mod_256 +.private_extern _ct_inverse_mod_256 + +.p2align 5 +_ct_inverse_mod_256: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + subq $1072,%rsp +.cfi_adjust_cfa_offset 1072 + + + leaq 48+511(%rsp),%rax + andq $-512,%rax + movq %rdi,32(%rsp) + movq %rcx,40(%rsp) + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + + movq 0(%rdx),%r12 + movq 8(%rdx),%r13 + movq 16(%rdx),%r14 + movq 24(%rdx),%r15 + + movq %r8,0(%rax) + movq %r9,8(%rax) + movq %r10,16(%rax) + movq %r11,24(%rax) + + movq %r12,32(%rax) + movq %r13,40(%rax) + movq %r14,48(%rax) + movq %r15,56(%rax) + movq %rax,%rsi + + + movl $31,%edx + call __ab_approximation_31_256 + + + movq %r12,16(%rsp) + movq %r13,24(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulq_256_n_shift_by_31 + + + movq %rdx,64(%rdi) + + movq 16(%rsp),%rdx + movq 24(%rsp),%rcx + leaq 32(%rdi),%rdi + call __smulq_256_n_shift_by_31 + + + movq %rdx,72(%rdi) + + + xorq $256,%rsi + movl $31,%edx + call __ab_approximation_31_256 + + + movq %r12,16(%rsp) + movq %r13,24(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulq_256_n_shift_by_31 + movq %rdx,0(%rsp) + movq %rcx,8(%rsp) + + movq 16(%rsp),%rdx + movq 24(%rsp),%rcx + leaq 32(%rdi),%rdi + call __smulq_256_n_shift_by_31 + + + + movq 64(%rsi),%r8 + movq 104(%rsi),%r12 + movq %r8,%r9 + imulq 0(%rsp),%r8 + movq %r12,%r13 + imulq 8(%rsp),%r12 + addq %r12,%r8 + movq %r8,32(%rdi) + sarq $63,%r8 + movq %r8,40(%rdi) + movq %r8,48(%rdi) + movq %r8,56(%rdi) + movq %r8,64(%rdi) + leaq 64(%rsi),%rsi + + imulq %rdx,%r9 + imulq %rcx,%r13 + addq %r13,%r9 + movq %r9,72(%rdi) + sarq $63,%r9 + movq %r9,80(%rdi) + movq %r9,88(%rdi) + movq %r9,96(%rdi) + movq %r9,104(%rdi) + xorq $256+64,%rsi + movl $31,%edx + call __ab_approximation_31_256 + + + movq %r12,16(%rsp) + movq %r13,24(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulq_256_n_shift_by_31 + movq %rdx,0(%rsp) + movq %rcx,8(%rsp) + + movq 16(%rsp),%rdx + movq 24(%rsp),%rcx + leaq 32(%rdi),%rdi + call __smulq_256_n_shift_by_31 + movq %rdx,16(%rsp) + movq %rcx,24(%rsp) + + movq 0(%rsp),%rdx + movq 8(%rsp),%rcx + leaq 64(%rsi),%rsi + leaq 32(%rdi),%rdi + call __smulq_256x63 + + movq 16(%rsp),%rdx + movq 24(%rsp),%rcx + leaq 40(%rdi),%rdi + call __smulq_256x63 + xorq $256+64,%rsi + movl $31,%edx + call __ab_approximation_31_256 + + + movq %r12,16(%rsp) + movq %r13,24(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulq_256_n_shift_by_31 + movq %rdx,0(%rsp) + movq %rcx,8(%rsp) + + movq 16(%rsp),%rdx + movq 24(%rsp),%rcx + leaq 32(%rdi),%rdi + call __smulq_256_n_shift_by_31 + movq %rdx,16(%rsp) + movq %rcx,24(%rsp) + + movq 0(%rsp),%rdx + movq 8(%rsp),%rcx + leaq 64(%rsi),%rsi + leaq 32(%rdi),%rdi + call __smulq_256x63 + + movq 16(%rsp),%rdx + movq 24(%rsp),%rcx + leaq 40(%rdi),%rdi + call __smulq_256x63 + xorq $256+64,%rsi + movl $31,%edx + call __ab_approximation_31_256 + + + movq %r12,16(%rsp) + movq %r13,24(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulq_256_n_shift_by_31 + movq %rdx,0(%rsp) + movq %rcx,8(%rsp) + + movq 16(%rsp),%rdx + movq 24(%rsp),%rcx + leaq 32(%rdi),%rdi + call __smulq_256_n_shift_by_31 + movq %rdx,16(%rsp) + movq %rcx,24(%rsp) + + movq 0(%rsp),%rdx + movq 8(%rsp),%rcx + leaq 64(%rsi),%rsi + leaq 32(%rdi),%rdi + call __smulq_256x63 + + movq 16(%rsp),%rdx + movq 24(%rsp),%rcx + leaq 40(%rdi),%rdi + call __smulq_256x63 + xorq $256+64,%rsi + movl $31,%edx + call __ab_approximation_31_256 + + + movq %r12,16(%rsp) + movq %r13,24(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulq_256_n_shift_by_31 + movq %rdx,0(%rsp) + movq %rcx,8(%rsp) + + movq 16(%rsp),%rdx + movq 24(%rsp),%rcx + leaq 32(%rdi),%rdi + call __smulq_256_n_shift_by_31 + movq %rdx,16(%rsp) + movq %rcx,24(%rsp) + + movq 0(%rsp),%rdx + movq 8(%rsp),%rcx + leaq 64(%rsi),%rsi + leaq 32(%rdi),%rdi + call __smulq_256x63 + + movq 16(%rsp),%rdx + movq 24(%rsp),%rcx + leaq 40(%rdi),%rdi + call __smulq_256x63 + xorq $256+64,%rsi + movl $31,%edx + call __ab_approximation_31_256 + + + movq %r12,16(%rsp) + movq %r13,24(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulq_256_n_shift_by_31 + movq %rdx,0(%rsp) + movq %rcx,8(%rsp) + + movq 16(%rsp),%rdx + movq 24(%rsp),%rcx + leaq 32(%rdi),%rdi + call __smulq_256_n_shift_by_31 + movq %rdx,16(%rsp) + movq %rcx,24(%rsp) + + movq 0(%rsp),%rdx + movq 8(%rsp),%rcx + leaq 64(%rsi),%rsi + leaq 32(%rdi),%rdi + call __smulq_256x63 + + movq 16(%rsp),%rdx + movq 24(%rsp),%rcx + leaq 40(%rdi),%rdi + call __smulq_256x63 + xorq $256+64,%rsi + movl $31,%edx + call __ab_approximation_31_256 + + + movq %r12,16(%rsp) + movq %r13,24(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulq_256_n_shift_by_31 + movq %rdx,0(%rsp) + movq %rcx,8(%rsp) + + movq 16(%rsp),%rdx + movq 24(%rsp),%rcx + leaq 32(%rdi),%rdi + call __smulq_256_n_shift_by_31 + movq %rdx,16(%rsp) + movq %rcx,24(%rsp) + + movq 0(%rsp),%rdx + movq 8(%rsp),%rcx + leaq 64(%rsi),%rsi + leaq 32(%rdi),%rdi + call __smulq_256x63 + + movq 16(%rsp),%rdx + movq 24(%rsp),%rcx + leaq 40(%rdi),%rdi + call __smulq_256x63 + xorq $256+64,%rsi + movl $31,%edx + call __ab_approximation_31_256 + + + movq %r12,16(%rsp) + movq %r13,24(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulq_256_n_shift_by_31 + movq %rdx,0(%rsp) + movq %rcx,8(%rsp) + + movq 16(%rsp),%rdx + movq 24(%rsp),%rcx + leaq 32(%rdi),%rdi + call __smulq_256_n_shift_by_31 + movq %rdx,16(%rsp) + movq %rcx,24(%rsp) + + movq 0(%rsp),%rdx + movq 8(%rsp),%rcx + leaq 64(%rsi),%rsi + leaq 32(%rdi),%rdi + call __smulq_256x63 + + movq 16(%rsp),%rdx + movq 24(%rsp),%rcx + leaq 40(%rdi),%rdi + call __smulq_256x63 + sarq $63,%rbp + movq %rbp,40(%rdi) + movq %rbp,48(%rdi) + movq %rbp,56(%rdi) + xorq $256+64,%rsi + movl $31,%edx + call __ab_approximation_31_256 + + + movq %r12,16(%rsp) + movq %r13,24(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulq_256_n_shift_by_31 + movq %rdx,0(%rsp) + movq %rcx,8(%rsp) + + movq 16(%rsp),%rdx + movq 24(%rsp),%rcx + leaq 32(%rdi),%rdi + call __smulq_256_n_shift_by_31 + movq %rdx,16(%rsp) + movq %rcx,24(%rsp) + + movq 0(%rsp),%rdx + movq 8(%rsp),%rcx + leaq 64(%rsi),%rsi + leaq 32(%rdi),%rdi + call __smulq_256x63 + + movq 16(%rsp),%rdx + movq 24(%rsp),%rcx + leaq 40(%rdi),%rdi + call __smulq_512x63 + xorq $256+64,%rsi + movl $31,%edx + call __ab_approximation_31_256 + + + movq %r12,16(%rsp) + movq %r13,24(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulq_256_n_shift_by_31 + movq %rdx,0(%rsp) + movq %rcx,8(%rsp) + + movq 16(%rsp),%rdx + movq 24(%rsp),%rcx + leaq 32(%rdi),%rdi + call __smulq_256_n_shift_by_31 + movq %rdx,16(%rsp) + movq %rcx,24(%rsp) + + movq 0(%rsp),%rdx + movq 8(%rsp),%rcx + leaq 64(%rsi),%rsi + leaq 32(%rdi),%rdi + call __smulq_256x63 + + movq 16(%rsp),%rdx + movq 24(%rsp),%rcx + leaq 40(%rdi),%rdi + call __smulq_512x63 + xorq $256+64,%rsi + movl $31,%edx + call __ab_approximation_31_256 + + + movq %r12,16(%rsp) + movq %r13,24(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulq_256_n_shift_by_31 + movq %rdx,0(%rsp) + movq %rcx,8(%rsp) + + movq 16(%rsp),%rdx + movq 24(%rsp),%rcx + leaq 32(%rdi),%rdi + call __smulq_256_n_shift_by_31 + movq %rdx,16(%rsp) + movq %rcx,24(%rsp) + + movq 0(%rsp),%rdx + movq 8(%rsp),%rcx + leaq 64(%rsi),%rsi + leaq 32(%rdi),%rdi + call __smulq_256x63 + + movq 16(%rsp),%rdx + movq 24(%rsp),%rcx + leaq 40(%rdi),%rdi + call __smulq_512x63 + xorq $256+64,%rsi + movl $31,%edx + call __ab_approximation_31_256 + + + movq %r12,16(%rsp) + movq %r13,24(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulq_256_n_shift_by_31 + movq %rdx,0(%rsp) + movq %rcx,8(%rsp) + + movq 16(%rsp),%rdx + movq 24(%rsp),%rcx + leaq 32(%rdi),%rdi + call __smulq_256_n_shift_by_31 + movq %rdx,16(%rsp) + movq %rcx,24(%rsp) + + movq 0(%rsp),%rdx + movq 8(%rsp),%rcx + leaq 64(%rsi),%rsi + leaq 32(%rdi),%rdi + call __smulq_256x63 + + movq 16(%rsp),%rdx + movq 24(%rsp),%rcx + leaq 40(%rdi),%rdi + call __smulq_512x63 + xorq $256+64,%rsi + movl $31,%edx + call __ab_approximation_31_256 + + + movq %r12,16(%rsp) + movq %r13,24(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulq_256_n_shift_by_31 + movq %rdx,0(%rsp) + movq %rcx,8(%rsp) + + movq 16(%rsp),%rdx + movq 24(%rsp),%rcx + leaq 32(%rdi),%rdi + call __smulq_256_n_shift_by_31 + movq %rdx,16(%rsp) + movq %rcx,24(%rsp) + + movq 0(%rsp),%rdx + movq 8(%rsp),%rcx + leaq 64(%rsi),%rsi + leaq 32(%rdi),%rdi + call __smulq_256x63 + + movq 16(%rsp),%rdx + movq 24(%rsp),%rcx + leaq 40(%rdi),%rdi + call __smulq_512x63 + xorq $256+64,%rsi + movl $31,%edx + call __ab_approximation_31_256 + + + movq %r12,16(%rsp) + movq %r13,24(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulq_256_n_shift_by_31 + movq %rdx,0(%rsp) + movq %rcx,8(%rsp) + + movq 16(%rsp),%rdx + movq 24(%rsp),%rcx + leaq 32(%rdi),%rdi + call __smulq_256_n_shift_by_31 + movq %rdx,16(%rsp) + movq %rcx,24(%rsp) + + movq 0(%rsp),%rdx + movq 8(%rsp),%rcx + leaq 64(%rsi),%rsi + leaq 32(%rdi),%rdi + call __smulq_256x63 + + movq 16(%rsp),%rdx + movq 24(%rsp),%rcx + leaq 40(%rdi),%rdi + call __smulq_512x63 + + xorq $256+64,%rsi + movl $47,%edx + + movq 0(%rsi),%r8 + + movq 32(%rsi),%r10 + + call __inner_loop_62_256 + + + + + + + + leaq 64(%rsi),%rsi + + + + + + movq %r12,%rdx + movq %r13,%rcx + movq 32(%rsp),%rdi + call __smulq_512x63 + adcq %rbp,%rdx + + movq 40(%rsp),%rsi + movq %rdx,%rax + sarq $63,%rdx + + movq %rdx,%r8 + movq %rdx,%r9 + andq 0(%rsi),%r8 + movq %rdx,%r10 + andq 8(%rsi),%r9 + andq 16(%rsi),%r10 + andq 24(%rsi),%rdx + + addq %r8,%r12 + adcq %r9,%r13 + adcq %r10,%r14 + adcq %rdx,%r15 + adcq $0,%rax + + movq %rax,%rdx + negq %rax + orq %rax,%rdx + sarq $63,%rax + + movq %rdx,%r8 + movq %rdx,%r9 + andq 0(%rsi),%r8 + movq %rdx,%r10 + andq 8(%rsi),%r9 + andq 16(%rsi),%r10 + andq 24(%rsi),%rdx + + xorq %rax,%r8 + xorq %rcx,%rcx + xorq %rax,%r9 + subq %rax,%rcx + xorq %rax,%r10 + xorq %rax,%rdx + addq %rcx,%r8 + adcq $0,%r9 + adcq $0,%r10 + adcq $0,%rdx + + addq %r8,%r12 + adcq %r9,%r13 + adcq %r10,%r14 + adcq %rdx,%r15 + + movq %r12,32(%rdi) + movq %r13,40(%rdi) + movq %r14,48(%rdi) + movq %r15,56(%rdi) + + leaq 1072(%rsp),%r8 + movq 0(%r8),%r15 +.cfi_restore %r15 + movq 8(%r8),%r14 +.cfi_restore %r14 + movq 16(%r8),%r13 +.cfi_restore %r13 + movq 24(%r8),%r12 +.cfi_restore %r12 + movq 32(%r8),%rbx +.cfi_restore %rbx + movq 40(%r8),%rbp +.cfi_restore %rbp + leaq 48(%r8),%rsp +.cfi_adjust_cfa_offset -1072-8*6 + + .byte 0xf3,0xc3 +.cfi_endproc + + +.p2align 5 +__smulq_512x63: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%rbp + + movq %rdx,%rbx + sarq $63,%rdx + xorq %rax,%rax + subq %rdx,%rax + + xorq %rdx,%rbx + addq %rax,%rbx + + xorq %rdx,%r8 + xorq %rdx,%r9 + xorq %rdx,%r10 + xorq %rdx,%r11 + xorq %rdx,%rbp + addq %r8,%rax + adcq $0,%r9 + adcq $0,%r10 + adcq $0,%r11 + adcq $0,%rbp + + mulq %rbx + movq %rax,0(%rdi) + movq %r9,%rax + movq %rdx,%r9 + mulq %rbx + addq %rax,%r9 + movq %r10,%rax + adcq $0,%rdx + movq %r9,8(%rdi) + movq %rdx,%r10 + mulq %rbx + addq %rax,%r10 + movq %r11,%rax + adcq $0,%rdx + movq %r10,16(%rdi) + movq %rdx,%r11 + andq %rbx,%rbp + negq %rbp + mulq %rbx + addq %rax,%r11 + adcq %rdx,%rbp + movq %r11,24(%rdi) + + movq 40(%rsi),%r8 + movq 48(%rsi),%r9 + movq 56(%rsi),%r10 + movq 64(%rsi),%r11 + movq 72(%rsi),%r12 + movq 80(%rsi),%r13 + movq 88(%rsi),%r14 + movq 96(%rsi),%r15 + + movq %rcx,%rdx + sarq $63,%rdx + xorq %rax,%rax + subq %rdx,%rax + + xorq %rdx,%rcx + addq %rax,%rcx + + xorq %rdx,%r8 + xorq %rdx,%r9 + xorq %rdx,%r10 + xorq %rdx,%r11 + xorq %rdx,%r12 + xorq %rdx,%r13 + xorq %rdx,%r14 + xorq %rdx,%r15 + addq %r8,%rax + adcq $0,%r9 + adcq $0,%r10 + adcq $0,%r11 + adcq $0,%r12 + adcq $0,%r13 + adcq $0,%r14 + adcq $0,%r15 + + mulq %rcx + movq %rax,%r8 + movq %r9,%rax + movq %rdx,%r9 + mulq %rcx + addq %rax,%r9 + movq %r10,%rax + adcq $0,%rdx + movq %rdx,%r10 + mulq %rcx + addq %rax,%r10 + movq %r11,%rax + adcq $0,%rdx + movq %rdx,%r11 + mulq %rcx + addq %rax,%r11 + movq %r12,%rax + adcq $0,%rdx + movq %rdx,%r12 + mulq %rcx + addq %rax,%r12 + movq %r13,%rax + adcq $0,%rdx + movq %rdx,%r13 + mulq %rcx + addq %rax,%r13 + movq %r14,%rax + adcq $0,%rdx + movq %rdx,%r14 + mulq %rcx + addq %rax,%r14 + movq %r15,%rax + adcq $0,%rdx + movq %rdx,%r15 + imulq %rcx + addq %rax,%r15 + adcq $0,%rdx + + movq %rbp,%rbx + sarq $63,%rbp + + addq 0(%rdi),%r8 + adcq 8(%rdi),%r9 + adcq 16(%rdi),%r10 + adcq 24(%rdi),%r11 + adcq %rbx,%r12 + adcq %rbp,%r13 + adcq %rbp,%r14 + adcq %rbp,%r15 + + movq %r8,0(%rdi) + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + movq %r12,32(%rdi) + movq %r13,40(%rdi) + movq %r14,48(%rdi) + movq %r15,56(%rdi) + + .byte 0xf3,0xc3 +.cfi_endproc + + + +.p2align 5 +__smulq_256x63: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + movq 0+0(%rsi),%r8 + movq 0+8(%rsi),%r9 + movq 0+16(%rsi),%r10 + movq 0+24(%rsi),%r11 + movq 0+32(%rsi),%rbp + + movq %rdx,%rbx + sarq $63,%rdx + xorq %rax,%rax + subq %rdx,%rax + + xorq %rdx,%rbx + addq %rax,%rbx + + xorq %rdx,%r8 + xorq %rdx,%r9 + xorq %rdx,%r10 + xorq %rdx,%r11 + xorq %rdx,%rbp + addq %r8,%rax + adcq $0,%r9 + adcq $0,%r10 + adcq $0,%r11 + adcq $0,%rbp + + mulq %rbx + movq %rax,%r8 + movq %r9,%rax + movq %rdx,%r9 + mulq %rbx + addq %rax,%r9 + movq %r10,%rax + adcq $0,%rdx + movq %rdx,%r10 + mulq %rbx + addq %rax,%r10 + movq %r11,%rax + adcq $0,%rdx + movq %rdx,%r11 + andq %rbx,%rbp + negq %rbp + mulq %rbx + addq %rax,%r11 + adcq %rdx,%rbp + movq %rcx,%rdx + movq 40+0(%rsi),%r12 + movq 40+8(%rsi),%r13 + movq 40+16(%rsi),%r14 + movq 40+24(%rsi),%r15 + movq 40+32(%rsi),%rcx + + movq %rdx,%rbx + sarq $63,%rdx + xorq %rax,%rax + subq %rdx,%rax + + xorq %rdx,%rbx + addq %rax,%rbx + + xorq %rdx,%r12 + xorq %rdx,%r13 + xorq %rdx,%r14 + xorq %rdx,%r15 + xorq %rdx,%rcx + addq %r12,%rax + adcq $0,%r13 + adcq $0,%r14 + adcq $0,%r15 + adcq $0,%rcx + + mulq %rbx + movq %rax,%r12 + movq %r13,%rax + movq %rdx,%r13 + mulq %rbx + addq %rax,%r13 + movq %r14,%rax + adcq $0,%rdx + movq %rdx,%r14 + mulq %rbx + addq %rax,%r14 + movq %r15,%rax + adcq $0,%rdx + movq %rdx,%r15 + andq %rbx,%rcx + negq %rcx + mulq %rbx + addq %rax,%r15 + adcq %rdx,%rcx + addq %r12,%r8 + adcq %r13,%r9 + adcq %r14,%r10 + adcq %r15,%r11 + adcq %rcx,%rbp + + movq %r8,0(%rdi) + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + movq %rbp,32(%rdi) + + .byte 0xf3,0xc3 +.cfi_endproc + + +.p2align 5 +__smulq_256_n_shift_by_31: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + movq %rdx,0(%rdi) + movq %rcx,8(%rdi) + movq %rdx,%rbp + movq 0+0(%rsi),%r8 + movq 0+8(%rsi),%r9 + movq 0+16(%rsi),%r10 + movq 0+24(%rsi),%r11 + + movq %rbp,%rbx + sarq $63,%rbp + xorq %rax,%rax + subq %rbp,%rax + + xorq %rbp,%rbx + addq %rax,%rbx + + xorq %rbp,%r8 + xorq %rbp,%r9 + xorq %rbp,%r10 + xorq %rbp,%r11 + addq %r8,%rax + adcq $0,%r9 + adcq $0,%r10 + adcq $0,%r11 + + mulq %rbx + movq %rax,%r8 + movq %r9,%rax + andq %rbx,%rbp + negq %rbp + movq %rdx,%r9 + mulq %rbx + addq %rax,%r9 + movq %r10,%rax + adcq $0,%rdx + movq %rdx,%r10 + mulq %rbx + addq %rax,%r10 + movq %r11,%rax + adcq $0,%rdx + movq %rdx,%r11 + mulq %rbx + addq %rax,%r11 + adcq %rdx,%rbp + movq 32+0(%rsi),%r12 + movq 32+8(%rsi),%r13 + movq 32+16(%rsi),%r14 + movq 32+24(%rsi),%r15 + + movq %rcx,%rbx + sarq $63,%rcx + xorq %rax,%rax + subq %rcx,%rax + + xorq %rcx,%rbx + addq %rax,%rbx + + xorq %rcx,%r12 + xorq %rcx,%r13 + xorq %rcx,%r14 + xorq %rcx,%r15 + addq %r12,%rax + adcq $0,%r13 + adcq $0,%r14 + adcq $0,%r15 + + mulq %rbx + movq %rax,%r12 + movq %r13,%rax + andq %rbx,%rcx + negq %rcx + movq %rdx,%r13 + mulq %rbx + addq %rax,%r13 + movq %r14,%rax + adcq $0,%rdx + movq %rdx,%r14 + mulq %rbx + addq %rax,%r14 + movq %r15,%rax + adcq $0,%rdx + movq %rdx,%r15 + mulq %rbx + addq %rax,%r15 + adcq %rdx,%rcx + addq %r12,%r8 + adcq %r13,%r9 + adcq %r14,%r10 + adcq %r15,%r11 + adcq %rcx,%rbp + + movq 0(%rdi),%rdx + movq 8(%rdi),%rcx + + shrdq $31,%r9,%r8 + shrdq $31,%r10,%r9 + shrdq $31,%r11,%r10 + shrdq $31,%rbp,%r11 + + sarq $63,%rbp + xorq %rax,%rax + subq %rbp,%rax + + xorq %rbp,%r8 + xorq %rbp,%r9 + xorq %rbp,%r10 + xorq %rbp,%r11 + addq %rax,%r8 + adcq $0,%r9 + adcq $0,%r10 + adcq $0,%r11 + + movq %r8,0(%rdi) + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + + xorq %rbp,%rdx + xorq %rbp,%rcx + addq %rax,%rdx + addq %rax,%rcx + + .byte 0xf3,0xc3 +.cfi_endproc + + +.p2align 5 +__ab_approximation_31_256: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + movq 24(%rsi),%r9 + movq 56(%rsi),%r11 + movq 16(%rsi),%rbx + movq 48(%rsi),%rbp + movq 8(%rsi),%r8 + movq 40(%rsi),%r10 + + movq %r9,%rax + orq %r11,%rax + cmovzq %rbx,%r9 + cmovzq %rbp,%r11 + cmovzq %r8,%rbx + movq 0(%rsi),%r8 + cmovzq %r10,%rbp + movq 32(%rsi),%r10 + + movq %r9,%rax + orq %r11,%rax + cmovzq %rbx,%r9 + cmovzq %rbp,%r11 + cmovzq %r8,%rbx + cmovzq %r10,%rbp + + movq %r9,%rax + orq %r11,%rax + bsrq %rax,%rcx + leaq 1(%rcx),%rcx + cmovzq %r8,%r9 + cmovzq %r10,%r11 + cmovzq %rax,%rcx + negq %rcx + + + shldq %cl,%rbx,%r9 + shldq %cl,%rbp,%r11 + + movl $0x7FFFFFFF,%eax + andq %rax,%r8 + andq %rax,%r10 + notq %rax + andq %rax,%r9 + andq %rax,%r11 + orq %r9,%r8 + orq %r11,%r10 + + jmp __inner_loop_31_256 + + .byte 0xf3,0xc3 +.cfi_endproc + + +.p2align 5 +__inner_loop_31_256: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + movq $0x7FFFFFFF80000000,%rcx + movq $0x800000007FFFFFFF,%r13 + movq $0x7FFFFFFF7FFFFFFF,%r15 + +L$oop_31_256: + cmpq %r10,%r8 + movq %r8,%rax + movq %r10,%rbx + movq %rcx,%rbp + movq %r13,%r14 + cmovbq %r10,%r8 + cmovbq %rax,%r10 + cmovbq %r13,%rcx + cmovbq %rbp,%r13 + + subq %r10,%r8 + subq %r13,%rcx + addq %r15,%rcx + + testq $1,%rax + cmovzq %rax,%r8 + cmovzq %rbx,%r10 + cmovzq %rbp,%rcx + cmovzq %r14,%r13 + + shrq $1,%r8 + addq %r13,%r13 + subq %r15,%r13 + subl $1,%edx + jnz L$oop_31_256 + + shrq $32,%r15 + movl %ecx,%edx + movl %r13d,%r12d + shrq $32,%rcx + shrq $32,%r13 + subq %r15,%rdx + subq %r15,%rcx + subq %r15,%r12 + subq %r15,%r13 + + .byte 0xf3,0xc3 +.cfi_endproc + + + +.p2align 5 +__inner_loop_62_256: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + movl %edx,%r15d + movq $1,%rdx + xorq %rcx,%rcx + xorq %r12,%r12 + movq %rdx,%r13 + movq %rdx,%r14 + +L$oop_62_256: + xorq %rax,%rax + testq %r14,%r8 + movq %r10,%rbx + cmovnzq %r10,%rax + subq %r8,%rbx + movq %r8,%rbp + subq %rax,%r8 + cmovcq %rbx,%r8 + cmovcq %rbp,%r10 + movq %rdx,%rax + cmovcq %r12,%rdx + cmovcq %rax,%r12 + movq %rcx,%rbx + cmovcq %r13,%rcx + cmovcq %rbx,%r13 + xorq %rax,%rax + xorq %rbx,%rbx + shrq $1,%r8 + testq %r14,%rbp + cmovnzq %r12,%rax + cmovnzq %r13,%rbx + addq %r12,%r12 + addq %r13,%r13 + subq %rax,%rdx + subq %rbx,%rcx + subl $1,%r15d + jnz L$oop_62_256 + + .byte 0xf3,0xc3 +.cfi_endproc + diff --git a/crypto/blst_src/build/mach-o/ct_inverse_mod_384-armv8.S b/crypto/blst_src/build/mach-o/ct_inverse_mod_384-armv8.S new file mode 100644 index 00000000000..b9c3acde200 --- /dev/null +++ b/crypto/blst_src/build/mach-o/ct_inverse_mod_384-armv8.S @@ -0,0 +1,718 @@ +.text + +.globl _ct_inverse_mod_383 +.private_extern _ct_inverse_mod_383 + +.align 5 +_ct_inverse_mod_383: +.long 3573752639 + stp x29, x30, [sp,#-128]! + add x29, sp, #0 + stp x19, x20, [sp,#16] + stp x21, x22, [sp,#32] + stp x23, x24, [sp,#48] + stp x25, x26, [sp,#64] + stp x27, x28, [sp,#80] + sub sp, sp, #1040 + + ldp x22, x4, [x1,#8*0] + ldp x5, x6, [x1,#8*2] + ldp x7, x8, [x1,#8*4] + + add x1, sp, #16+511 // find closest 512-byte-aligned spot + and x1, x1, #-512 // in the frame... + stp x0, x3, [sp] + + ldp x9, x10, [x2,#8*0] + ldp x11, x12, [x2,#8*2] + ldp x13, x14, [x2,#8*4] + + stp x22, x4, [x1,#8*0] // copy input to |a| + stp x5, x6, [x1,#8*2] + stp x7, x8, [x1,#8*4] + stp x9, x10, [x1,#8*6] // copy modulus to |b| + stp x11, x12, [x1,#8*8] + stp x13, x14, [x1,#8*10] + + ////////////////////////////////////////// first iteration + mov x2, #62 + bl Lab_approximation_62_loaded + + eor x0, x1, #256 // pointer to dst |a|b|u|v| + bl __smul_383_n_shift_by_62 + str x15,[x0,#8*12] // initialize |u| with |f0| + + mov x15, x17 // |f1| + mov x16, x19 // |g1| + add x0, x0, #8*6 // pointer to dst |b| + bl __smul_383_n_shift_by_62 + str x15, [x0,#8*12] // initialize |v| with |f1| + + ////////////////////////////////////////// second iteration + eor x1, x1, #256 // flip-flop src |a|b|u|v| + mov x2, #62 + bl __ab_approximation_62 + + eor x0, x1, #256 // pointer to dst |a|b|u|v| + bl __smul_383_n_shift_by_62 + mov x20, x15 // corrected |f0| + mov x21, x16 // corrected |g0| + + mov x15, x17 // |f1| + mov x16, x19 // |g1| + add x0, x0, #8*6 // pointer to destination |b| + bl __smul_383_n_shift_by_62 + + ldr x7, [x1,#8*12] // |u| + ldr x8, [x1,#8*18] // |v| + mul x3, x20, x7 // |u|*|f0| + smulh x4, x20, x7 + mul x5, x21, x8 // |v|*|g0| + smulh x6, x21, x8 + adds x3, x3, x5 + adc x4, x4, x6 + stp x3, x4, [x0,#8*6] + asr x5, x4, #63 // sign extension + stp x5, x5, [x0,#8*8] + stp x5, x5, [x0,#8*10] + + mul x3, x15, x7 // |u|*|f1| + smulh x4, x15, x7 + mul x5, x16, x8 // |v|*|g1| + smulh x6, x16, x8 + adds x3, x3, x5 + adc x4, x4, x6 + stp x3, x4, [x0,#8*12] + asr x5, x4, #63 // sign extension + stp x5, x5, [x0,#8*14] + stp x5, x5, [x0,#8*16] + eor x1, x1, #256 // flip-flop src |a|b|u|v| + mov x2, #62 + bl __ab_approximation_62 + + eor x0, x1, #256 // pointer to dst |a|b|u|v| + bl __smul_383_n_shift_by_62 + mov x20, x15 // corrected |f0| + mov x21, x16 // corrected |g0| + + mov x15, x17 // |f1| + mov x16, x19 // |g1| + add x0, x0, #8*6 // pointer to destination |b| + bl __smul_383_n_shift_by_62 + + add x0, x0, #8*6 // pointer to destination |u| + bl __smul_383x63 + + mov x20, x15 // corrected |f1| + mov x21, x16 // corrected |g1| + add x0, x0, #8*6 // pointer to destination |v| + bl __smul_383x63 + eor x1, x1, #256 // flip-flop src |a|b|u|v| + mov x2, #62 + bl __ab_approximation_62 + + eor x0, x1, #256 // pointer to dst |a|b|u|v| + bl __smul_383_n_shift_by_62 + mov x20, x15 // corrected |f0| + mov x21, x16 // corrected |g0| + + mov x15, x17 // |f1| + mov x16, x19 // |g1| + add x0, x0, #8*6 // pointer to destination |b| + bl __smul_383_n_shift_by_62 + + add x0, x0, #8*6 // pointer to destination |u| + bl __smul_383x63 + + mov x20, x15 // corrected |f1| + mov x21, x16 // corrected |g1| + add x0, x0, #8*6 // pointer to destination |v| + bl __smul_383x63 + eor x1, x1, #256 // flip-flop src |a|b|u|v| + mov x2, #62 + bl __ab_approximation_62 + + eor x0, x1, #256 // pointer to dst |a|b|u|v| + bl __smul_383_n_shift_by_62 + mov x20, x15 // corrected |f0| + mov x21, x16 // corrected |g0| + + mov x15, x17 // |f1| + mov x16, x19 // |g1| + add x0, x0, #8*6 // pointer to destination |b| + bl __smul_383_n_shift_by_62 + + add x0, x0, #8*6 // pointer to destination |u| + bl __smul_383x63 + + mov x20, x15 // corrected |f1| + mov x21, x16 // corrected |g1| + add x0, x0, #8*6 // pointer to destination |v| + bl __smul_383x63 + eor x1, x1, #256 // flip-flop src |a|b|u|v| + mov x2, #62 + bl __ab_approximation_62 + + eor x0, x1, #256 // pointer to dst |a|b|u|v| + bl __smul_383_n_shift_by_62 + mov x20, x15 // corrected |f0| + mov x21, x16 // corrected |g0| + + mov x15, x17 // |f1| + mov x16, x19 // |g1| + add x0, x0, #8*6 // pointer to destination |b| + bl __smul_383_n_shift_by_62 + + add x0, x0, #8*6 // pointer to destination |u| + bl __smul_383x63 + + mov x20, x15 // corrected |f1| + mov x21, x16 // corrected |g1| + add x0, x0, #8*6 // pointer to destination |v| + bl __smul_383x63 + asr x27, x27, #63 // sign extension + stp x27, x27, [x0,#8*6] + stp x27, x27, [x0,#8*8] + stp x27, x27, [x0,#8*10] + eor x1, x1, #256 // flip-flop src |a|b|u|v| + mov x2, #62 + bl __ab_approximation_62 + + eor x0, x1, #256 // pointer to dst |a|b|u|v| + bl __smul_383_n_shift_by_62 + mov x20, x15 // corrected |f0| + mov x21, x16 // corrected |g0| + + mov x15, x17 // |f1| + mov x16, x19 // |g1| + add x0, x0, #8*6 // pointer to destination |b| + bl __smul_383_n_shift_by_62 + + add x0, x0, #8*6 // pointer to destination |u| + bl __smul_383x63 + + mov x20, x15 // corrected |f1| + mov x21, x16 // corrected |g1| + add x0, x0, #8*6 // pointer to destination |v| + bl __smul_383x63 + bl __smul_767x63_tail + eor x1, x1, #256 // flip-flop src |a|b|u|v| + mov x2, #62 + bl __ab_approximation_62 + + eor x0, x1, #256 // pointer to dst |a|b|u|v| + bl __smul_383_n_shift_by_62 + mov x20, x15 // corrected |f0| + mov x21, x16 // corrected |g0| + + mov x15, x17 // |f1| + mov x16, x19 // |g1| + add x0, x0, #8*6 // pointer to destination |b| + bl __smul_383_n_shift_by_62 + + add x0, x0, #8*6 // pointer to destination |u| + bl __smul_383x63 + + mov x20, x15 // corrected |f1| + mov x21, x16 // corrected |g1| + add x0, x0, #8*6 // pointer to destination |v| + bl __smul_383x63 + bl __smul_767x63_tail + eor x1, x1, #256 // flip-flop src |a|b|u|v| + mov x2, #62 + bl __ab_approximation_62 + + eor x0, x1, #256 // pointer to dst |a|b|u|v| + bl __smul_383_n_shift_by_62 + mov x20, x15 // corrected |f0| + mov x21, x16 // corrected |g0| + + mov x15, x17 // |f1| + mov x16, x19 // |g1| + add x0, x0, #8*6 // pointer to destination |b| + bl __smul_383_n_shift_by_62 + + add x0, x0, #8*6 // pointer to destination |u| + bl __smul_383x63 + + mov x20, x15 // corrected |f1| + mov x21, x16 // corrected |g1| + add x0, x0, #8*6 // pointer to destination |v| + bl __smul_383x63 + bl __smul_767x63_tail + eor x1, x1, #256 // flip-flop src |a|b|u|v| + mov x2, #62 + bl __ab_approximation_62 + + eor x0, x1, #256 // pointer to dst |a|b|u|v| + bl __smul_383_n_shift_by_62 + mov x20, x15 // corrected |f0| + mov x21, x16 // corrected |g0| + + mov x15, x17 // |f1| + mov x16, x19 // |g1| + add x0, x0, #8*6 // pointer to destination |b| + bl __smul_383_n_shift_by_62 + + add x0, x0, #8*6 // pointer to destination |u| + bl __smul_383x63 + + mov x20, x15 // corrected |f1| + mov x21, x16 // corrected |g1| + add x0, x0, #8*6 // pointer to destination |v| + bl __smul_383x63 + bl __smul_767x63_tail + eor x1, x1, #256 // flip-flop src |a|b|u|v| + mov x2, #62 + bl __ab_approximation_62 + + eor x0, x1, #256 // pointer to dst |a|b|u|v| + bl __smul_383_n_shift_by_62 + mov x20, x15 // corrected |f0| + mov x21, x16 // corrected |g0| + + mov x15, x17 // |f1| + mov x16, x19 // |g1| + add x0, x0, #8*6 // pointer to destination |b| + bl __smul_383_n_shift_by_62 + + add x0, x0, #8*6 // pointer to destination |u| + bl __smul_383x63 + + mov x20, x15 // corrected |f1| + mov x21, x16 // corrected |g1| + add x0, x0, #8*6 // pointer to destination |v| + bl __smul_383x63 + bl __smul_767x63_tail + ////////////////////////////////////////// iteration before last + eor x1, x1, #256 // flip-flop src |a|b|u|v| + mov x2, #62 + //bl __ab_approximation_62 // |a| and |b| are exact, + ldp x3, x8, [x1,#8*0] // just load + ldp x9, x14, [x1,#8*6] + bl __inner_loop_62 + + eor x0, x1, #256 // pointer to dst |a|b|u|v| + str x3, [x0,#8*0] + str x9, [x0,#8*6] + + mov x20, x15 // exact |f0| + mov x21, x16 // exact |g0| + mov x15, x17 + mov x16, x19 + add x0, x0, #8*12 // pointer to dst |u| + bl __smul_383x63 + + mov x20, x15 // exact |f1| + mov x21, x16 // exact |g1| + add x0, x0, #8*6 // pointer to dst |v| + bl __smul_383x63 + bl __smul_767x63_tail + + ////////////////////////////////////////// last iteration + eor x1, x1, #256 // flip-flop src |a|b|u|v| + mov x2, #22 // 766 % 62 + //bl __ab_approximation_62 // |a| and |b| are exact, + ldr x3, [x1,#8*0] // just load + eor x8, x8, x8 + ldr x9, [x1,#8*6] + eor x14, x14, x14 + bl __inner_loop_62 + + mov x20, x17 + mov x21, x19 + ldp x0, x15, [sp] // original out_ptr and n_ptr + bl __smul_383x63 + bl __smul_767x63_tail + ldr x30, [x29,#8] + + asr x22, x8, #63 // sign as mask + ldp x9, x10, [x15,#8*0] + ldp x11, x12, [x15,#8*2] + ldp x13, x14, [x15,#8*4] + + and x9, x9, x22 // add mod<<384 conditionally + and x10, x10, x22 + adds x3, x3, x9 + and x11, x11, x22 + adcs x4, x4, x10 + and x12, x12, x22 + adcs x5, x5, x11 + and x13, x13, x22 + adcs x6, x6, x12 + and x14, x14, x22 + stp x3, x4, [x0,#8*6] + adcs x7, x7, x13 + stp x5, x6, [x0,#8*8] + adc x8, x8, x14 + stp x7, x8, [x0,#8*10] + + add sp, sp, #1040 + ldp x19, x20, [x29,#16] + ldp x21, x22, [x29,#32] + ldp x23, x24, [x29,#48] + ldp x25, x26, [x29,#64] + ldp x27, x28, [x29,#80] + ldr x29, [sp],#128 +.long 3573752767 + ret + + +//////////////////////////////////////////////////////////////////////// +// see corresponding commentary in ctx_inverse_mod_384-x86_64... + +.align 5 +__smul_383x63: + ldp x3, x4, [x1,#8*0+96] // load |u| (or |v|) + asr x17, x20, #63 // |f_|'s sign as mask (or |g_|'s) + ldp x5, x6, [x1,#8*2+96] + eor x20, x20, x17 // conditionally negate |f_| (or |g_|) + ldp x7, x8, [x1,#8*4+96] + + eor x3, x3, x17 // conditionally negate |u| (or |v|) + sub x20, x20, x17 + eor x4, x4, x17 + adds x3, x3, x17, lsr#63 + eor x5, x5, x17 + adcs x4, x4, xzr + eor x6, x6, x17 + adcs x5, x5, xzr + eor x7, x7, x17 + adcs x6, x6, xzr + umulh x22, x3, x20 + eor x8, x8, x17 + umulh x23, x4, x20 + adcs x7, x7, xzr + umulh x24, x5, x20 + adcs x8, x8, xzr + umulh x25, x6, x20 + umulh x26, x7, x20 + mul x3, x3, x20 + mul x4, x4, x20 + mul x5, x5, x20 + adds x4, x4, x22 + mul x6, x6, x20 + adcs x5, x5, x23 + mul x7, x7, x20 + adcs x6, x6, x24 + mul x27,x8, x20 + adcs x7, x7, x25 + adcs x27,x27,x26 + adc x2, xzr, xzr + ldp x9, x10, [x1,#8*0+144] // load |u| (or |v|) + asr x17, x21, #63 // |f_|'s sign as mask (or |g_|'s) + ldp x11, x12, [x1,#8*2+144] + eor x21, x21, x17 // conditionally negate |f_| (or |g_|) + ldp x13, x14, [x1,#8*4+144] + + eor x9, x9, x17 // conditionally negate |u| (or |v|) + sub x21, x21, x17 + eor x10, x10, x17 + adds x9, x9, x17, lsr#63 + eor x11, x11, x17 + adcs x10, x10, xzr + eor x12, x12, x17 + adcs x11, x11, xzr + eor x13, x13, x17 + adcs x12, x12, xzr + umulh x22, x9, x21 + eor x14, x14, x17 + umulh x23, x10, x21 + adcs x13, x13, xzr + umulh x24, x11, x21 + adcs x14, x14, xzr + umulh x25, x12, x21 + adc x19, xzr, xzr // used in __smul_767x63_tail + umulh x26, x13, x21 + mul x9, x9, x21 + mul x10, x10, x21 + mul x11, x11, x21 + adds x10, x10, x22 + mul x12, x12, x21 + adcs x11, x11, x23 + mul x13, x13, x21 + adcs x12, x12, x24 + mul x28,x14, x21 + adcs x13, x13, x25 + adcs x28,x28,x26 + adc x2, x2, xzr + + adds x3, x3, x9 + adcs x4, x4, x10 + adcs x5, x5, x11 + adcs x6, x6, x12 + stp x3, x4, [x0,#8*0] + adcs x7, x7, x13 + stp x5, x6, [x0,#8*2] + adcs x27, x27, x28 + stp x7, x27, [x0,#8*4] + adc x28, x2, xzr // used in __smul_767x63_tail + + ret + + + +.align 5 +__smul_767x63_tail: + smulh x27, x8, x20 + ldp x3, x4, [x1,#8*24] // load rest of |v| + umulh x14,x14, x21 + ldp x5, x6, [x1,#8*26] + ldp x7, x8, [x1,#8*28] + + eor x3, x3, x17 // conditionally negate rest of |v| + eor x4, x4, x17 + eor x5, x5, x17 + adds x3, x3, x19 + eor x6, x6, x17 + adcs x4, x4, xzr + eor x7, x7, x17 + adcs x5, x5, xzr + eor x8, x8, x17 + adcs x6, x6, xzr + umulh x22, x3, x21 + adcs x7, x7, xzr + umulh x23, x4, x21 + adc x8, x8, xzr + + umulh x24, x5, x21 + add x14, x14, x28 + umulh x25, x6, x21 + asr x28, x27, #63 + umulh x26, x7, x21 + mul x3, x3, x21 + mul x4, x4, x21 + mul x5, x5, x21 + adds x3, x3, x14 + mul x6, x6, x21 + adcs x4, x4, x22 + mul x7, x7, x21 + adcs x5, x5, x23 + mul x8, x8, x21 + adcs x6, x6, x24 + adcs x7, x7, x25 + adc x8, x8, x26 + + adds x3, x3, x27 + adcs x4, x4, x28 + adcs x5, x5, x28 + adcs x6, x6, x28 + stp x3, x4, [x0,#8*6] + adcs x7, x7, x28 + stp x5, x6, [x0,#8*8] + adc x8, x8, x28 + stp x7, x8, [x0,#8*10] + + ret + + + +.align 5 +__smul_383_n_shift_by_62: + ldp x3, x4, [x1,#8*0+0] // load |a| (or |b|) + asr x28, x15, #63 // |f0|'s sign as mask (or |g0|'s) + ldp x5, x6, [x1,#8*2+0] + eor x2, x15, x28 // conditionally negate |f0| (or |g0|) + ldp x7, x8, [x1,#8*4+0] + + eor x3, x3, x28 // conditionally negate |a| (or |b|) + sub x2, x2, x28 + eor x4, x4, x28 + adds x3, x3, x28, lsr#63 + eor x5, x5, x28 + adcs x4, x4, xzr + eor x6, x6, x28 + adcs x5, x5, xzr + eor x7, x7, x28 + umulh x22, x3, x2 + adcs x6, x6, xzr + umulh x23, x4, x2 + eor x8, x8, x28 + umulh x24, x5, x2 + adcs x7, x7, xzr + umulh x25, x6, x2 + adc x8, x8, xzr + + umulh x26, x7, x2 + smulh x27, x8, x2 + mul x3, x3, x2 + mul x4, x4, x2 + mul x5, x5, x2 + adds x4, x4, x22 + mul x6, x6, x2 + adcs x5, x5, x23 + mul x7, x7, x2 + adcs x6, x6, x24 + mul x8, x8, x2 + adcs x7, x7, x25 + adcs x8, x8 ,x26 + adc x27, x27, xzr + ldp x9, x10, [x1,#8*0+48] // load |a| (or |b|) + asr x28, x16, #63 // |f0|'s sign as mask (or |g0|'s) + ldp x11, x12, [x1,#8*2+48] + eor x2, x16, x28 // conditionally negate |f0| (or |g0|) + ldp x13, x14, [x1,#8*4+48] + + eor x9, x9, x28 // conditionally negate |a| (or |b|) + sub x2, x2, x28 + eor x10, x10, x28 + adds x9, x9, x28, lsr#63 + eor x11, x11, x28 + adcs x10, x10, xzr + eor x12, x12, x28 + adcs x11, x11, xzr + eor x13, x13, x28 + umulh x22, x9, x2 + adcs x12, x12, xzr + umulh x23, x10, x2 + eor x14, x14, x28 + umulh x24, x11, x2 + adcs x13, x13, xzr + umulh x25, x12, x2 + adc x14, x14, xzr + + umulh x26, x13, x2 + smulh x28, x14, x2 + mul x9, x9, x2 + mul x10, x10, x2 + mul x11, x11, x2 + adds x10, x10, x22 + mul x12, x12, x2 + adcs x11, x11, x23 + mul x13, x13, x2 + adcs x12, x12, x24 + mul x14, x14, x2 + adcs x13, x13, x25 + adcs x14, x14 ,x26 + adc x28, x28, xzr + adds x3, x3, x9 + adcs x4, x4, x10 + adcs x5, x5, x11 + adcs x6, x6, x12 + adcs x7, x7, x13 + adcs x8, x8, x14 + adc x9, x27, x28 + + extr x3, x4, x3, #62 + extr x4, x5, x4, #62 + extr x5, x6, x5, #62 + asr x28, x9, #63 + extr x6, x7, x6, #62 + extr x7, x8, x7, #62 + extr x8, x9, x8, #62 + + eor x3, x3, x28 + eor x4, x4, x28 + adds x3, x3, x28, lsr#63 + eor x5, x5, x28 + adcs x4, x4, xzr + eor x6, x6, x28 + adcs x5, x5, xzr + eor x7, x7, x28 + adcs x6, x6, xzr + eor x8, x8, x28 + stp x3, x4, [x0,#8*0] + adcs x7, x7, xzr + stp x5, x6, [x0,#8*2] + adc x8, x8, xzr + stp x7, x8, [x0,#8*4] + + eor x15, x15, x28 + eor x16, x16, x28 + sub x15, x15, x28 + sub x16, x16, x28 + + ret + + +.align 4 +__ab_approximation_62: + ldp x7, x8, [x1,#8*4] + ldp x13, x14, [x1,#8*10] + ldp x5, x6, [x1,#8*2] + ldp x11, x12, [x1,#8*8] + +Lab_approximation_62_loaded: + orr x22, x8, x14 // check top-most limbs, ... + cmp x22, #0 + csel x8, x8, x7, ne + csel x14, x14, x13, ne + csel x7, x7, x6, ne + orr x22, x8, x14 // ... ones before top-most, ... + csel x13, x13, x12, ne + + ldp x3, x4, [x1,#8*0] + ldp x9, x10, [x1,#8*6] + + cmp x22, #0 + csel x8, x8, x7, ne + csel x14, x14, x13, ne + csel x7, x7, x5, ne + orr x22, x8, x14 // ... and ones before that ... + csel x13, x13, x11, ne + + cmp x22, #0 + csel x8, x8, x7, ne + csel x14, x14, x13, ne + csel x7, x7, x4, ne + orr x22, x8, x14 + csel x13, x13, x10, ne + + clz x22, x22 + cmp x22, #64 + csel x22, x22, xzr, ne + csel x8, x8, x7, ne + csel x14, x14, x13, ne + neg x23, x22 + + lslv x8, x8, x22 // align high limbs to the left + lslv x14, x14, x22 + lsrv x7, x7, x23 + lsrv x13, x13, x23 + and x7, x7, x23, asr#6 + and x13, x13, x23, asr#6 + orr x8, x8, x7 + orr x14, x14, x13 + + b __inner_loop_62 + ret + + +.align 4 +__inner_loop_62: + mov x15, #1 // |f0|=1 + mov x16, #0 // |g0|=0 + mov x17, #0 // |f1|=0 + mov x19, #1 // |g1|=1 + +Loop_62: + sbfx x28, x3, #0, #1 // if |a_| is odd, then we'll be subtracting + sub x2, x2, #1 + subs x24, x9, x3 // |b_|-|a_| + and x22, x9, x28 + sbc x25, x14, x8 + and x23, x14, x28 + subs x26, x3, x22 // |a_|-|b_| (or |a_|-0 if |a_| was even) + mov x22, x15 + sbcs x27, x8, x23 + mov x23, x16 + csel x9, x9, x3, hs // |b_| = |a_| + csel x14, x14, x8, hs + csel x3, x26, x24, hs // borrow means |a_|<|b_|, replace with |b_|-|a_| + csel x8, x27, x25, hs + csel x15, x15, x17, hs // exchange |f0| and |f1| + csel x17, x17, x22, hs + csel x16, x16, x19, hs // exchange |g0| and |g1| + csel x19, x19, x23, hs + extr x3, x8, x3, #1 + lsr x8, x8, #1 + and x22, x17, x28 + and x23, x19, x28 + add x17, x17, x17 // |f1|<<=1 + add x19, x19, x19 // |g1|<<=1 + sub x15, x15, x22 // |f0|-=|f1| (or |f0-=0| if |a_| was even) + sub x16, x16, x23 // |g0|-=|g1| (or |g0-=0| ...) + cbnz x2, Loop_62 + + ret + diff --git a/crypto/blst_src/build/mach-o/ct_is_square_mod_384-armv8.S b/crypto/blst_src/build/mach-o/ct_is_square_mod_384-armv8.S new file mode 100644 index 00000000000..9fe0df88b59 --- /dev/null +++ b/crypto/blst_src/build/mach-o/ct_is_square_mod_384-armv8.S @@ -0,0 +1,325 @@ +.text + +.globl _ct_is_square_mod_384 +.private_extern _ct_is_square_mod_384 + +.align 5 +_ct_is_square_mod_384: +.long 3573752639 + stp x29, x30, [sp,#-128]! + add x29, sp, #0 + stp x19, x20, [sp,#16] + stp x21, x22, [sp,#32] + stp x23, x24, [sp,#48] + stp x25, x26, [sp,#64] + stp x27, x28, [sp,#80] + sub sp, sp, #512 + + ldp x3, x4, [x0,#8*0] // load input + ldp x5, x6, [x0,#8*2] + ldp x7, x8, [x0,#8*4] + + add x0, sp, #255 // find closest 256-byte-aligned spot + and x0, x0, #-256 // in the frame... + + ldp x9, x10, [x1,#8*0] // load modulus + ldp x11, x12, [x1,#8*2] + ldp x13, x14, [x1,#8*4] + + stp x3, x4, [x0,#8*6] // copy input to |a| + stp x5, x6, [x0,#8*8] + stp x7, x8, [x0,#8*10] + stp x9, x10, [x0,#8*0] // copy modulus to |b| + stp x11, x12, [x0,#8*2] + stp x13, x14, [x0,#8*4] + + eor x2, x2, x2 // init the Legendre symbol + mov x15, #24 // 24 is 768/30-1 + b Loop_is_square + +.align 4 +Loop_is_square: + bl __ab_approximation_30 + sub x15, x15, #1 + + eor x1, x0, #128 // pointer to dst |b| + bl __smul_384_n_shift_by_30 + + mov x19, x16 // |f0| + mov x20, x17 // |g0| + add x1, x1, #8*6 // pointer to dst |a| + bl __smul_384_n_shift_by_30 + + ldp x9, x10, [x1,#-8*6] + eor x0, x0, #128 // flip-flop src |a|b| + and x27, x27, x9 // if |a| was negative, + add x2, x2, x27, lsr#1 // adjust |L| + + cbnz x15, Loop_is_square + + ////////////////////////////////////////// last iteration + //bl __ab_approximation_30 // |a| and |b| are exact, + //ldr x8, [x0,#8*6] // and loaded + //ldr x14, [x0,#8*0] + mov x15, #48 // 48 is 768%30 + 30 + bl __inner_loop_48 + ldr x30, [x29,#8] + + and x0, x2, #1 + eor x0, x0, #1 + + add sp, sp, #512 + ldp x19, x20, [x29,#16] + ldp x21, x22, [x29,#32] + ldp x23, x24, [x29,#48] + ldp x25, x26, [x29,#64] + ldp x27, x28, [x29,#80] + ldr x29, [sp],#128 +.long 3573752767 + ret + + + +.align 5 +__smul_384_n_shift_by_30: + ldp x3, x4, [x0,#8*0+0] // load |b| (or |a|) + asr x27, x20, #63 // |g1|'s sign as mask (or |f1|'s) + ldp x5, x6, [x0,#8*2+0] + eor x20, x20, x27 // conditionally negate |g1| (or |f1|) + ldp x7, x8, [x0,#8*4+0] + + eor x3, x3, x27 // conditionally negate |b| (or |a|) + sub x20, x20, x27 + eor x4, x4, x27 + adds x3, x3, x27, lsr#63 + eor x5, x5, x27 + adcs x4, x4, xzr + eor x6, x6, x27 + adcs x5, x5, xzr + eor x7, x7, x27 + umulh x21, x3, x20 + adcs x6, x6, xzr + umulh x22, x4, x20 + eor x8, x8, x27 + umulh x23, x5, x20 + adcs x7, x7, xzr + umulh x24, x6, x20 + adc x8, x8, xzr + + umulh x25, x7, x20 + and x28, x20, x27 + umulh x26, x8, x20 + neg x28, x28 + mul x3, x3, x20 + mul x4, x4, x20 + mul x5, x5, x20 + adds x4, x4, x21 + mul x6, x6, x20 + adcs x5, x5, x22 + mul x7, x7, x20 + adcs x6, x6, x23 + mul x8, x8, x20 + adcs x7, x7, x24 + adcs x8, x8 ,x25 + adc x26, x26, x28 + ldp x9, x10, [x0,#8*0+48] // load |b| (or |a|) + asr x27, x19, #63 // |g1|'s sign as mask (or |f1|'s) + ldp x11, x12, [x0,#8*2+48] + eor x19, x19, x27 // conditionally negate |g1| (or |f1|) + ldp x13, x14, [x0,#8*4+48] + + eor x9, x9, x27 // conditionally negate |b| (or |a|) + sub x19, x19, x27 + eor x10, x10, x27 + adds x9, x9, x27, lsr#63 + eor x11, x11, x27 + adcs x10, x10, xzr + eor x12, x12, x27 + adcs x11, x11, xzr + eor x13, x13, x27 + umulh x21, x9, x19 + adcs x12, x12, xzr + umulh x22, x10, x19 + eor x14, x14, x27 + umulh x23, x11, x19 + adcs x13, x13, xzr + umulh x24, x12, x19 + adc x14, x14, xzr + + umulh x25, x13, x19 + and x28, x19, x27 + umulh x27, x14, x19 + neg x28, x28 + mul x9, x9, x19 + mul x10, x10, x19 + mul x11, x11, x19 + adds x10, x10, x21 + mul x12, x12, x19 + adcs x11, x11, x22 + mul x13, x13, x19 + adcs x12, x12, x23 + mul x14, x14, x19 + adcs x13, x13, x24 + adcs x14, x14 ,x25 + adc x27, x27, x28 + adds x3, x3, x9 + adcs x4, x4, x10 + adcs x5, x5, x11 + adcs x6, x6, x12 + adcs x7, x7, x13 + adcs x8, x8, x14 + adc x9, x26, x27 + + extr x3, x4, x3, #30 + extr x4, x5, x4, #30 + extr x5, x6, x5, #30 + asr x27, x9, #63 + extr x6, x7, x6, #30 + extr x7, x8, x7, #30 + extr x8, x9, x8, #30 + + eor x3, x3, x27 + eor x4, x4, x27 + adds x3, x3, x27, lsr#63 + eor x5, x5, x27 + adcs x4, x4, xzr + eor x6, x6, x27 + adcs x5, x5, xzr + eor x7, x7, x27 + adcs x6, x6, xzr + eor x8, x8, x27 + stp x3, x4, [x1,#8*0] + adcs x7, x7, xzr + stp x5, x6, [x1,#8*2] + adc x8, x8, xzr + stp x7, x8, [x1,#8*4] + + ret + + +.align 4 +__ab_approximation_30: + ldp x13, x14, [x0,#8*4] // |a| is still in registers + ldp x11, x12, [x0,#8*2] + + orr x21, x8, x14 // check top-most limbs, ... + cmp x21, #0 + csel x8, x8, x7, ne + csel x14, x14, x13, ne + csel x7, x7, x6, ne + orr x21, x8, x14 // ... ones before top-most, ... + csel x13, x13, x12, ne + + cmp x21, #0 + csel x8, x8, x7, ne + csel x14, x14, x13, ne + csel x7, x7, x5, ne + orr x21, x8, x14 // ... and ones before that ... + csel x13, x13, x11, ne + + cmp x21, #0 + csel x8, x8, x7, ne + csel x14, x14, x13, ne + csel x7, x7, x4, ne + orr x21, x8, x14 // and one more, ... + csel x13, x13, x10, ne + + cmp x21, #0 + csel x8, x8, x7, ne + csel x14, x14, x13, ne + csel x7, x7, x3, ne + orr x21, x8, x14 + csel x13, x13, x9, ne + + clz x21, x21 + cmp x21, #64 + csel x21, x21, xzr, ne + csel x8, x8, x7, ne + csel x14, x14, x13, ne + neg x22, x21 + + lslv x8, x8, x21 // align high limbs to the left + lslv x14, x14, x21 + lsrv x7, x7, x22 + lsrv x13, x13, x22 + and x7, x7, x22, asr#6 + and x13, x13, x22, asr#6 + orr x8, x8, x7 + orr x14, x14, x13 + + bfxil x8, x3, #0, #32 + bfxil x14, x9, #0, #32 + + b __inner_loop_30 + ret + + + +.align 4 +__inner_loop_30: + mov x28, #30 + mov x17, #0x7FFFFFFF80000000 // |f0|=1, |g0|=0 + mov x20, #0x800000007FFFFFFF // |f1|=0, |g1|=1 + mov x27,#0x7FFFFFFF7FFFFFFF + +Loop_30: + sbfx x24, x8, #0, #1 // if |a_| is odd, then we'll be subtracting + and x25, x8, x14 + sub x28, x28, #1 + and x21, x14, x24 + + sub x22, x14, x8 // |b_|-|a_| + subs x23, x8, x21 // |a_|-|b_| (or |a_|-0 if |a_| was even) + add x25, x2, x25, lsr#1 // L + (a_ & b_) >> 1 + mov x21, x20 + csel x14, x14, x8, hs // |b_| = |a_| + csel x8, x23, x22, hs // borrow means |a_|<|b_|, replace with |b_|-|a_| + csel x20, x20, x17, hs // exchange |fg0| and |fg1| + csel x17, x17, x21, hs + csel x2, x2, x25, hs + lsr x8, x8, #1 + and x21, x20, x24 + and x22, x27, x24 + add x23, x14, #2 + sub x17, x17, x21 // |f0|-=|f1| (or |f0-=0| if |a_| was even) + add x20, x20, x20 // |f1|<<=1 + add x2, x2, x23, lsr#2 // "negate" |L| if |b|%8 is 3 or 5 + add x17, x17, x22 + sub x20, x20, x27 + + cbnz x28, Loop_30 + + mov x27, #0x7FFFFFFF + ubfx x16, x17, #0, #32 + ubfx x17, x17, #32, #32 + ubfx x19, x20, #0, #32 + ubfx x20, x20, #32, #32 + sub x16, x16, x27 // remove the bias + sub x17, x17, x27 + sub x19, x19, x27 + sub x20, x20, x27 + + ret + + +.align 4 +__inner_loop_48: +Loop_48: + sbfx x24, x3, #0, #1 // if |a_| is odd, then we'll be subtracting + and x25, x3, x9 + sub x15, x15, #1 + and x21, x9, x24 + sub x22, x9, x3 // |b_|-|a_| + subs x23, x3, x21 // |a_|-|b_| (or |a_|-0 if |a_| was even) + add x25, x2, x25, lsr#1 + csel x9, x9, x3, hs // |b_| = |a_| + csel x3, x23, x22, hs // borrow means |a_|<|b_|, replace with |b_|-|a_| + csel x2, x2, x25, hs + add x23, x9, #2 + lsr x3, x3, #1 + add x2, x2, x23, lsr#2 // "negate" |L| if |b|%8 is 3 or 5 + + cbnz x15, Loop_48 + + ret + diff --git a/crypto/blst_src/build/mach-o/ct_is_square_mod_384-x86_64.s b/crypto/blst_src/build/mach-o/ct_is_square_mod_384-x86_64.s new file mode 100644 index 00000000000..5faadb8dbff --- /dev/null +++ b/crypto/blst_src/build/mach-o/ct_is_square_mod_384-x86_64.s @@ -0,0 +1,472 @@ +.text + +.globl _ct_is_square_mod_384 +.private_extern _ct_is_square_mod_384 + +.p2align 5 +_ct_is_square_mod_384: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + subq $536,%rsp +.cfi_adjust_cfa_offset 536 + + + leaq 24+255(%rsp),%rax + andq $-256,%rax + + movq 0(%rdi),%r8 + movq 8(%rdi),%r9 + movq 16(%rdi),%r10 + movq 24(%rdi),%r11 + movq 32(%rdi),%r12 + movq 40(%rdi),%r13 + + movq 0(%rsi),%r14 + movq 8(%rsi),%r15 + movq 16(%rsi),%rbx + movq 24(%rsi),%rcx + movq 32(%rsi),%rdx + movq 40(%rsi),%rdi + movq %rax,%rsi + + movq %r8,0(%rax) + movq %r9,8(%rax) + movq %r10,16(%rax) + movq %r11,24(%rax) + movq %r12,32(%rax) + movq %r13,40(%rax) + + movq %r14,48(%rax) + movq %r15,56(%rax) + movq %rbx,64(%rax) + movq %rcx,72(%rax) + movq %rdx,80(%rax) + movq %rdi,88(%rax) + + xorq %rbp,%rbp + movl $24,%ecx + jmp L$oop_is_square + +.p2align 5 +L$oop_is_square: + movl %ecx,16(%rsp) + + call __ab_approximation_30 + movq %rax,0(%rsp) + movq %rbx,8(%rsp) + + movq $128+48,%rdi + xorq %rsi,%rdi + call __smulq_384_n_shift_by_30 + + movq 0(%rsp),%rdx + movq 8(%rsp),%rcx + leaq -48(%rdi),%rdi + call __smulq_384_n_shift_by_30 + + movl 16(%rsp),%ecx + xorq $128,%rsi + + andq 48(%rdi),%r14 + shrq $1,%r14 + addq %r14,%rbp + + subl $1,%ecx + jnz L$oop_is_square + + + + + movq 48(%rsi),%r9 + call __inner_loop_48 + + movq $1,%rax + andq %rbp,%rax + xorq $1,%rax + + leaq 536(%rsp),%r8 + movq 0(%r8),%r15 +.cfi_restore %r15 + movq 8(%r8),%r14 +.cfi_restore %r14 + movq 16(%r8),%r13 +.cfi_restore %r13 + movq 24(%r8),%r12 +.cfi_restore %r12 + movq 32(%r8),%rbx +.cfi_restore %rbx + movq 40(%r8),%rbp +.cfi_restore %rbp + leaq 48(%r8),%rsp +.cfi_adjust_cfa_offset -536-8*6 + + .byte 0xf3,0xc3 +.cfi_endproc + + + +.p2align 5 +__smulq_384_n_shift_by_30: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + + movq %rdx,%rbx + sarq $63,%rdx + xorq %rax,%rax + subq %rdx,%rax + + xorq %rdx,%rbx + addq %rax,%rbx + + xorq %rdx,%r8 + xorq %rdx,%r9 + xorq %rdx,%r10 + xorq %rdx,%r11 + xorq %rdx,%r12 + xorq %rdx,%r13 + addq %r8,%rax + adcq $0,%r9 + adcq $0,%r10 + adcq $0,%r11 + adcq $0,%r12 + adcq $0,%r13 + + movq %rdx,%r14 + andq %rbx,%r14 + mulq %rbx + movq %rax,%r8 + movq %r9,%rax + movq %rdx,%r9 + mulq %rbx + addq %rax,%r9 + movq %r10,%rax + adcq $0,%rdx + movq %rdx,%r10 + mulq %rbx + addq %rax,%r10 + movq %r11,%rax + adcq $0,%rdx + movq %rdx,%r11 + mulq %rbx + addq %rax,%r11 + movq %r12,%rax + adcq $0,%rdx + movq %rdx,%r12 + mulq %rbx + addq %rax,%r12 + movq %r13,%rax + adcq $0,%rdx + movq %rdx,%r13 + negq %r14 + mulq %rbx + addq %rax,%r13 + adcq %rdx,%r14 + leaq 48(%rsi),%rsi + movq %rcx,%rdx + + movq %r8,0(%rdi) + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + movq %r12,32(%rdi) + movq %r13,40(%rdi) + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + + movq %rdx,%rbx + sarq $63,%rdx + xorq %rax,%rax + subq %rdx,%rax + + xorq %rdx,%rbx + addq %rax,%rbx + + xorq %rdx,%r8 + xorq %rdx,%r9 + xorq %rdx,%r10 + xorq %rdx,%r11 + xorq %rdx,%r12 + xorq %rdx,%r13 + addq %r8,%rax + adcq $0,%r9 + adcq $0,%r10 + adcq $0,%r11 + adcq $0,%r12 + adcq $0,%r13 + + movq %rdx,%r15 + andq %rbx,%r15 + mulq %rbx + movq %rax,%r8 + movq %r9,%rax + movq %rdx,%r9 + mulq %rbx + addq %rax,%r9 + movq %r10,%rax + adcq $0,%rdx + movq %rdx,%r10 + mulq %rbx + addq %rax,%r10 + movq %r11,%rax + adcq $0,%rdx + movq %rdx,%r11 + mulq %rbx + addq %rax,%r11 + movq %r12,%rax + adcq $0,%rdx + movq %rdx,%r12 + mulq %rbx + addq %rax,%r12 + movq %r13,%rax + adcq $0,%rdx + movq %rdx,%r13 + negq %r15 + mulq %rbx + addq %rax,%r13 + adcq %rdx,%r15 + leaq -48(%rsi),%rsi + + addq 0(%rdi),%r8 + adcq 8(%rdi),%r9 + adcq 16(%rdi),%r10 + adcq 24(%rdi),%r11 + adcq 32(%rdi),%r12 + adcq 40(%rdi),%r13 + adcq %r15,%r14 + + shrdq $30,%r9,%r8 + shrdq $30,%r10,%r9 + shrdq $30,%r11,%r10 + shrdq $30,%r12,%r11 + shrdq $30,%r13,%r12 + shrdq $30,%r14,%r13 + + sarq $63,%r14 + xorq %rbx,%rbx + subq %r14,%rbx + + xorq %r14,%r8 + xorq %r14,%r9 + xorq %r14,%r10 + xorq %r14,%r11 + xorq %r14,%r12 + xorq %r14,%r13 + addq %rbx,%r8 + adcq $0,%r9 + adcq $0,%r10 + adcq $0,%r11 + adcq $0,%r12 + adcq $0,%r13 + + movq %r8,0(%rdi) + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + movq %r12,32(%rdi) + movq %r13,40(%rdi) + + .byte 0xf3,0xc3 +.cfi_endproc + + +.p2align 5 +__ab_approximation_30: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + movq 88(%rsi),%rbx + movq 80(%rsi),%r15 + movq 72(%rsi),%r14 + + movq %r13,%rax + orq %rbx,%rax + cmovzq %r12,%r13 + cmovzq %r15,%rbx + cmovzq %r11,%r12 + movq 64(%rsi),%r11 + cmovzq %r14,%r15 + + movq %r13,%rax + orq %rbx,%rax + cmovzq %r12,%r13 + cmovzq %r15,%rbx + cmovzq %r10,%r12 + movq 56(%rsi),%r10 + cmovzq %r11,%r15 + + movq %r13,%rax + orq %rbx,%rax + cmovzq %r12,%r13 + cmovzq %r15,%rbx + cmovzq %r9,%r12 + movq 48(%rsi),%r9 + cmovzq %r10,%r15 + + movq %r13,%rax + orq %rbx,%rax + cmovzq %r12,%r13 + cmovzq %r15,%rbx + cmovzq %r8,%r12 + cmovzq %r9,%r15 + + movq %r13,%rax + orq %rbx,%rax + bsrq %rax,%rcx + leaq 1(%rcx),%rcx + cmovzq %r8,%r13 + cmovzq %r9,%rbx + cmovzq %rax,%rcx + negq %rcx + + + shldq %cl,%r12,%r13 + shldq %cl,%r15,%rbx + + movq $0xFFFFFFFF00000000,%rax + movl %r8d,%r8d + movl %r9d,%r9d + andq %rax,%r13 + andq %rax,%rbx + orq %r13,%r8 + orq %rbx,%r9 + + jmp __inner_loop_30 + + .byte 0xf3,0xc3 +.cfi_endproc + + +.p2align 5 +__inner_loop_30: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + movq $0x7FFFFFFF80000000,%rbx + movq $0x800000007FFFFFFF,%rcx + leaq -1(%rbx),%r15 + movl $30,%edi + +L$oop_30: + movq %r8,%rax + andq %r9,%rax + shrq $1,%rax + + cmpq %r9,%r8 + movq %r8,%r10 + movq %r9,%r11 + leaq (%rax,%rbp,1),%rax + movq %rbx,%r12 + movq %rcx,%r13 + movq %rbp,%r14 + cmovbq %r9,%r8 + cmovbq %r10,%r9 + cmovbq %rcx,%rbx + cmovbq %r12,%rcx + cmovbq %rax,%rbp + + subq %r9,%r8 + subq %rcx,%rbx + addq %r15,%rbx + + testq $1,%r10 + cmovzq %r10,%r8 + cmovzq %r11,%r9 + cmovzq %r12,%rbx + cmovzq %r13,%rcx + cmovzq %r14,%rbp + + leaq 2(%r9),%rax + shrq $1,%r8 + shrq $2,%rax + addq %rcx,%rcx + leaq (%rax,%rbp,1),%rbp + subq %r15,%rcx + + subl $1,%edi + jnz L$oop_30 + + shrq $32,%r15 + movl %ebx,%eax + shrq $32,%rbx + movl %ecx,%edx + shrq $32,%rcx + subq %r15,%rax + subq %r15,%rbx + subq %r15,%rdx + subq %r15,%rcx + + .byte 0xf3,0xc3 +.cfi_endproc + + + +.p2align 5 +__inner_loop_48: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + movl $48,%edi + +L$oop_48: + movq %r8,%rax + andq %r9,%rax + shrq $1,%rax + + cmpq %r9,%r8 + movq %r8,%r10 + movq %r9,%r11 + leaq (%rax,%rbp,1),%rax + movq %rbp,%r12 + cmovbq %r9,%r8 + cmovbq %r10,%r9 + cmovbq %rax,%rbp + + subq %r9,%r8 + + testq $1,%r10 + cmovzq %r10,%r8 + cmovzq %r11,%r9 + cmovzq %r12,%rbp + + leaq 2(%r9),%rax + shrq $1,%r8 + shrq $2,%rax + addq %rax,%rbp + + subl $1,%edi + jnz L$oop_48 + + .byte 0xf3,0xc3 +.cfi_endproc + diff --git a/crypto/blst_src/build/mach-o/ctq_inverse_mod_384-x86_64.s b/crypto/blst_src/build/mach-o/ctq_inverse_mod_384-x86_64.s new file mode 100644 index 00000000000..eebe131d0cb --- /dev/null +++ b/crypto/blst_src/build/mach-o/ctq_inverse_mod_384-x86_64.s @@ -0,0 +1,1193 @@ +.comm ___blst_platform_cap,4 +.text + +.globl _ct_inverse_mod_383 +.private_extern _ct_inverse_mod_383 + +.p2align 5 +_ct_inverse_mod_383: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + +#ifdef __BLST_PORTABLE__ + testl $1,___blst_platform_cap(%rip) + jnz ct_inverse_mod_383$1 +#endif + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + subq $1112,%rsp +.cfi_adjust_cfa_offset 1112 + + + leaq 88+511(%rsp),%rax + andq $-512,%rax + movq %rdi,32(%rsp) + movq %rcx,40(%rsp) + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + + movq 0(%rdx),%r14 + movq 8(%rdx),%r15 + movq 16(%rdx),%rbx + movq 24(%rdx),%rbp + movq 32(%rdx),%rsi + movq 40(%rdx),%rdi + + movq %r8,0(%rax) + movq %r9,8(%rax) + movq %r10,16(%rax) + movq %r11,24(%rax) + movq %r12,32(%rax) + movq %r13,40(%rax) + + movq %r14,48(%rax) + movq %r15,56(%rax) + movq %rbx,64(%rax) + movq %rbp,72(%rax) + movq %rsi,80(%rax) + movq %rax,%rsi + movq %rdi,88(%rax) + + + movl $62,%edi + call __ab_approximation_62 + + + movq %r12,72(%rsp) + movq %r13,80(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulq_383_n_shift_by_62 + + + movq %rdx,96(%rdi) + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulq_383_n_shift_by_62 + + + movq %rdx,96(%rdi) + + + xorq $256,%rsi + movl $62,%edi + call __ab_approximation_62 + + + movq %r12,72(%rsp) + movq %r13,80(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulq_383_n_shift_by_62 + movq %rdx,56(%rsp) + movq %rcx,64(%rsp) + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulq_383_n_shift_by_62 + + + + movq 96(%rsi),%rax + movq 144(%rsi),%r11 + movq %rdx,%rbx + movq %rax,%r10 + imulq 56(%rsp) + movq %rax,%r8 + movq %r11,%rax + movq %rdx,%r9 + imulq 64(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + movq %r8,48(%rdi) + movq %r9,56(%rdi) + sarq $63,%r9 + movq %r9,64(%rdi) + movq %r9,72(%rdi) + movq %r9,80(%rdi) + movq %r9,88(%rdi) + leaq 96(%rsi),%rsi + + movq %r10,%rax + imulq %rbx + movq %rax,%r8 + movq %r11,%rax + movq %rdx,%r9 + imulq %rcx + addq %rax,%r8 + adcq %rdx,%r9 + movq %r8,96(%rdi) + movq %r9,104(%rdi) + sarq $63,%r9 + movq %r9,112(%rdi) + movq %r9,120(%rdi) + movq %r9,128(%rdi) + movq %r9,136(%rdi) + xorq $256+96,%rsi + movl $62,%edi + call __ab_approximation_62 + + + movq %r12,72(%rsp) + movq %r13,80(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulq_383_n_shift_by_62 + movq %rdx,56(%rsp) + movq %rcx,64(%rsp) + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulq_383_n_shift_by_62 + movq %rdx,72(%rsp) + movq %rcx,80(%rsp) + + movq 56(%rsp),%rdx + movq 64(%rsp),%rcx + leaq 96(%rsi),%rsi + leaq 48(%rdi),%rdi + call __smulq_383x63 + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulq_383x63 + xorq $256+96,%rsi + movl $62,%edi + call __ab_approximation_62 + + + movq %r12,72(%rsp) + movq %r13,80(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulq_383_n_shift_by_62 + movq %rdx,56(%rsp) + movq %rcx,64(%rsp) + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulq_383_n_shift_by_62 + movq %rdx,72(%rsp) + movq %rcx,80(%rsp) + + movq 56(%rsp),%rdx + movq 64(%rsp),%rcx + leaq 96(%rsi),%rsi + leaq 48(%rdi),%rdi + call __smulq_383x63 + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulq_383x63 + xorq $256+96,%rsi + movl $62,%edi + call __ab_approximation_62 + + + movq %r12,72(%rsp) + movq %r13,80(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulq_383_n_shift_by_62 + movq %rdx,56(%rsp) + movq %rcx,64(%rsp) + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulq_383_n_shift_by_62 + movq %rdx,72(%rsp) + movq %rcx,80(%rsp) + + movq 56(%rsp),%rdx + movq 64(%rsp),%rcx + leaq 96(%rsi),%rsi + leaq 48(%rdi),%rdi + call __smulq_383x63 + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulq_383x63 + xorq $256+96,%rsi + movl $62,%edi + call __ab_approximation_62 + + + movq %r12,72(%rsp) + movq %r13,80(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulq_383_n_shift_by_62 + movq %rdx,56(%rsp) + movq %rcx,64(%rsp) + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulq_383_n_shift_by_62 + movq %rdx,72(%rsp) + movq %rcx,80(%rsp) + + movq 56(%rsp),%rdx + movq 64(%rsp),%rcx + leaq 96(%rsi),%rsi + leaq 48(%rdi),%rdi + call __smulq_383x63 + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulq_383x63 + sarq $63,%r13 + movq %r13,48(%rdi) + movq %r13,56(%rdi) + movq %r13,64(%rdi) + movq %r13,72(%rdi) + movq %r13,80(%rdi) + movq %r13,88(%rdi) + xorq $256+96,%rsi + movl $62,%edi + call __ab_approximation_62 + + + movq %r12,72(%rsp) + movq %r13,80(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulq_383_n_shift_by_62 + movq %rdx,56(%rsp) + movq %rcx,64(%rsp) + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulq_383_n_shift_by_62 + movq %rdx,72(%rsp) + movq %rcx,80(%rsp) + + movq 56(%rsp),%rdx + movq 64(%rsp),%rcx + leaq 96(%rsi),%rsi + leaq 48(%rdi),%rdi + call __smulq_383x63 + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulq_767x63 + xorq $256+96,%rsi + movl $62,%edi + call __ab_approximation_62 + + + movq %r12,72(%rsp) + movq %r13,80(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulq_383_n_shift_by_62 + movq %rdx,56(%rsp) + movq %rcx,64(%rsp) + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulq_383_n_shift_by_62 + movq %rdx,72(%rsp) + movq %rcx,80(%rsp) + + movq 56(%rsp),%rdx + movq 64(%rsp),%rcx + leaq 96(%rsi),%rsi + leaq 48(%rdi),%rdi + call __smulq_383x63 + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulq_767x63 + xorq $256+96,%rsi + movl $62,%edi + call __ab_approximation_62 + + + movq %r12,72(%rsp) + movq %r13,80(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulq_383_n_shift_by_62 + movq %rdx,56(%rsp) + movq %rcx,64(%rsp) + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulq_383_n_shift_by_62 + movq %rdx,72(%rsp) + movq %rcx,80(%rsp) + + movq 56(%rsp),%rdx + movq 64(%rsp),%rcx + leaq 96(%rsi),%rsi + leaq 48(%rdi),%rdi + call __smulq_383x63 + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulq_767x63 + xorq $256+96,%rsi + movl $62,%edi + call __ab_approximation_62 + + + movq %r12,72(%rsp) + movq %r13,80(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulq_383_n_shift_by_62 + movq %rdx,56(%rsp) + movq %rcx,64(%rsp) + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulq_383_n_shift_by_62 + movq %rdx,72(%rsp) + movq %rcx,80(%rsp) + + movq 56(%rsp),%rdx + movq 64(%rsp),%rcx + leaq 96(%rsi),%rsi + leaq 48(%rdi),%rdi + call __smulq_383x63 + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulq_767x63 + xorq $256+96,%rsi + movl $62,%edi + call __ab_approximation_62 + + + movq %r12,72(%rsp) + movq %r13,80(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulq_383_n_shift_by_62 + movq %rdx,56(%rsp) + movq %rcx,64(%rsp) + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulq_383_n_shift_by_62 + movq %rdx,72(%rsp) + movq %rcx,80(%rsp) + + movq 56(%rsp),%rdx + movq 64(%rsp),%rcx + leaq 96(%rsi),%rsi + leaq 48(%rdi),%rdi + call __smulq_383x63 + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulq_767x63 + + xorq $256+96,%rsi + movl $62,%edi + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 48(%rsi),%r10 + movq 56(%rsi),%r11 + call __inner_loop_62 + + + movq %r12,72(%rsp) + movq %r13,80(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + movq %r8,0(%rdi) + movq %r10,48(%rdi) + + + + leaq 96(%rsi),%rsi + leaq 96(%rdi),%rdi + call __smulq_383x63 + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulq_767x63 + + + xorq $256+96,%rsi + movl $22,%edi + + movq 0(%rsi),%r8 + xorq %r9,%r9 + movq 48(%rsi),%r10 + xorq %r11,%r11 + call __inner_loop_62 + + + + + + + + leaq 96(%rsi),%rsi + + + + + + movq %r12,%rdx + movq %r13,%rcx + movq 32(%rsp),%rdi + call __smulq_767x63 + + movq 40(%rsp),%rsi + movq %rax,%rdx + sarq $63,%rax + + movq %rax,%r8 + movq %rax,%r9 + movq %rax,%r10 + andq 0(%rsi),%r8 + andq 8(%rsi),%r9 + movq %rax,%r11 + andq 16(%rsi),%r10 + andq 24(%rsi),%r11 + movq %rax,%r12 + andq 32(%rsi),%r12 + andq 40(%rsi),%rax + + addq %r8,%r14 + adcq %r9,%r15 + adcq %r10,%rbx + adcq %r11,%rbp + adcq %r12,%rcx + adcq %rax,%rdx + + movq %r14,48(%rdi) + movq %r15,56(%rdi) + movq %rbx,64(%rdi) + movq %rbp,72(%rdi) + movq %rcx,80(%rdi) + movq %rdx,88(%rdi) + + leaq 1112(%rsp),%r8 + movq 0(%r8),%r15 +.cfi_restore %r15 + movq 8(%r8),%r14 +.cfi_restore %r14 + movq 16(%r8),%r13 +.cfi_restore %r13 + movq 24(%r8),%r12 +.cfi_restore %r12 + movq 32(%r8),%rbx +.cfi_restore %rbx + movq 40(%r8),%rbp +.cfi_restore %rbp + leaq 48(%r8),%rsp +.cfi_adjust_cfa_offset -1112-8*6 + + .byte 0xf3,0xc3 +.cfi_endproc + + +.p2align 5 +__smulq_767x63: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + + movq %rdx,%rbp + sarq $63,%rdx + xorq %rax,%rax + subq %rdx,%rax + + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + leaq 48(%rsi),%rsi + + xorq %rdx,%rbp + addq %rax,%rbp + + xorq %rdx,%r8 + xorq %rdx,%r9 + xorq %rdx,%r10 + xorq %rdx,%r11 + xorq %rdx,%r12 + xorq %rdx,%r13 + addq %r8,%rax + adcq $0,%r9 + adcq $0,%r10 + adcq $0,%r11 + adcq $0,%r12 + adcq $0,%r13 + + mulq %rbp + movq %rax,0(%rdi) + movq %r9,%rax + movq %rdx,%r9 + mulq %rbp + addq %rax,%r9 + movq %r10,%rax + adcq $0,%rdx + movq %rdx,%r10 + movq %r9,8(%rdi) + mulq %rbp + addq %rax,%r10 + movq %r11,%rax + adcq $0,%rdx + movq %rdx,%r11 + movq %r10,16(%rdi) + mulq %rbp + addq %rax,%r11 + movq %r12,%rax + adcq $0,%rdx + movq %rdx,%r12 + movq %r11,24(%rdi) + mulq %rbp + addq %rax,%r12 + movq %r13,%rax + adcq $0,%rdx + movq %rdx,%r13 + movq %r12,32(%rdi) + imulq %rbp + addq %rax,%r13 + adcq $0,%rdx + + movq %r13,40(%rdi) + movq %rdx,48(%rdi) + sarq $63,%rdx + movq %rdx,56(%rdi) + movq %rcx,%rdx + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + movq 48(%rsi),%r14 + movq 56(%rsi),%r15 + movq 64(%rsi),%rbx + movq 72(%rsi),%rbp + movq 80(%rsi),%rcx + movq 88(%rsi),%rdi + + movq %rdx,%rsi + sarq $63,%rdx + xorq %rax,%rax + subq %rdx,%rax + + xorq %rdx,%rsi + addq %rax,%rsi + + xorq %rdx,%r8 + xorq %rdx,%r9 + xorq %rdx,%r10 + xorq %rdx,%r11 + xorq %rdx,%r12 + xorq %rdx,%r13 + xorq %rdx,%r14 + xorq %rdx,%r15 + xorq %rdx,%rbx + xorq %rdx,%rbp + xorq %rdx,%rcx + xorq %rdx,%rdi + addq %r8,%rax + adcq $0,%r9 + adcq $0,%r10 + adcq $0,%r11 + adcq $0,%r12 + adcq $0,%r13 + adcq $0,%r14 + adcq $0,%r15 + adcq $0,%rbx + adcq $0,%rbp + adcq $0,%rcx + adcq $0,%rdi + + mulq %rsi + movq %rax,%r8 + movq %r9,%rax + movq %rdx,%r9 + mulq %rsi + addq %rax,%r9 + movq %r10,%rax + adcq $0,%rdx + movq %rdx,%r10 + mulq %rsi + addq %rax,%r10 + movq %r11,%rax + adcq $0,%rdx + movq %rdx,%r11 + mulq %rsi + addq %rax,%r11 + movq %r12,%rax + adcq $0,%rdx + movq %rdx,%r12 + mulq %rsi + addq %rax,%r12 + movq %r13,%rax + adcq $0,%rdx + movq %rdx,%r13 + mulq %rsi + addq %rax,%r13 + movq %r14,%rax + adcq $0,%rdx + movq %rdx,%r14 + mulq %rsi + addq %rax,%r14 + movq %r15,%rax + adcq $0,%rdx + movq %rdx,%r15 + mulq %rsi + addq %rax,%r15 + movq %rbx,%rax + adcq $0,%rdx + movq %rdx,%rbx + mulq %rsi + addq %rax,%rbx + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%rbp + mulq %rsi + addq %rax,%rbp + movq %rcx,%rax + adcq $0,%rdx + movq %rdx,%rcx + mulq %rsi + addq %rax,%rcx + movq %rdi,%rax + adcq $0,%rdx + movq %rdx,%rdi + movq 8(%rsp),%rdx + imulq %rsi,%rax + movq 16(%rsp),%rsi + addq %rdi,%rax + + addq 0(%rdx),%r8 + adcq 8(%rdx),%r9 + adcq 16(%rdx),%r10 + adcq 24(%rdx),%r11 + adcq 32(%rdx),%r12 + adcq 40(%rdx),%r13 + adcq 48(%rdx),%r14 + movq 56(%rdx),%rdi + adcq %rdi,%r15 + adcq %rdi,%rbx + adcq %rdi,%rbp + adcq %rdi,%rcx + adcq %rdi,%rax + + movq %rdx,%rdi + + movq %r8,0(%rdx) + movq %r9,8(%rdx) + movq %r10,16(%rdx) + movq %r11,24(%rdx) + movq %r12,32(%rdx) + movq %r13,40(%rdx) + movq %r14,48(%rdx) + movq %r15,56(%rdx) + movq %rbx,64(%rdx) + movq %rbp,72(%rdx) + movq %rcx,80(%rdx) + movq %rax,88(%rdx) + + .byte 0xf3,0xc3 +.cfi_endproc + + +.p2align 5 +__smulq_383x63: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + + movq %rdx,%rbp + sarq $63,%rdx + xorq %rax,%rax + subq %rdx,%rax + + xorq %rdx,%rbp + addq %rax,%rbp + + xorq %rdx,%r8 + xorq %rdx,%r9 + xorq %rdx,%r10 + xorq %rdx,%r11 + xorq %rdx,%r12 + xorq %rdx,%r13 + addq %r8,%rax + adcq $0,%r9 + adcq $0,%r10 + adcq $0,%r11 + adcq $0,%r12 + adcq $0,%r13 + + mulq %rbp + movq %rax,%r8 + movq %r9,%rax + movq %rdx,%r9 + mulq %rbp + addq %rax,%r9 + movq %r10,%rax + adcq $0,%rdx + movq %rdx,%r10 + mulq %rbp + addq %rax,%r10 + movq %r11,%rax + adcq $0,%rdx + movq %rdx,%r11 + mulq %rbp + addq %rax,%r11 + movq %r12,%rax + adcq $0,%rdx + movq %rdx,%r12 + mulq %rbp + addq %rax,%r12 + movq %r13,%rax + adcq $0,%rdx + movq %rdx,%r13 + imulq %rbp,%rax + addq %rax,%r13 + + leaq 48(%rsi),%rsi + movq %rcx,%rdx + + movq %r8,0(%rdi) + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + movq %r12,32(%rdi) + movq %r13,40(%rdi) + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + + movq %rdx,%rbp + sarq $63,%rdx + xorq %rax,%rax + subq %rdx,%rax + + xorq %rdx,%rbp + addq %rax,%rbp + + xorq %rdx,%r8 + xorq %rdx,%r9 + xorq %rdx,%r10 + xorq %rdx,%r11 + xorq %rdx,%r12 + xorq %rdx,%r13 + addq %r8,%rax + adcq $0,%r9 + adcq $0,%r10 + adcq $0,%r11 + adcq $0,%r12 + adcq $0,%r13 + + mulq %rbp + movq %rax,%r8 + movq %r9,%rax + movq %rdx,%r9 + mulq %rbp + addq %rax,%r9 + movq %r10,%rax + adcq $0,%rdx + movq %rdx,%r10 + mulq %rbp + addq %rax,%r10 + movq %r11,%rax + adcq $0,%rdx + movq %rdx,%r11 + mulq %rbp + addq %rax,%r11 + movq %r12,%rax + adcq $0,%rdx + movq %rdx,%r12 + mulq %rbp + addq %rax,%r12 + movq %r13,%rax + adcq $0,%rdx + movq %rdx,%r13 + imulq %rbp,%rax + addq %rax,%r13 + + leaq -48(%rsi),%rsi + + addq 0(%rdi),%r8 + adcq 8(%rdi),%r9 + adcq 16(%rdi),%r10 + adcq 24(%rdi),%r11 + adcq 32(%rdi),%r12 + adcq 40(%rdi),%r13 + + movq %r8,0(%rdi) + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + movq %r12,32(%rdi) + movq %r13,40(%rdi) + + .byte 0xf3,0xc3 +.cfi_endproc + + +.p2align 5 +__smulq_383_n_shift_by_62: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + movq %rdx,%rbx + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + + movq %rdx,%rbp + sarq $63,%rdx + xorq %rax,%rax + subq %rdx,%rax + + xorq %rdx,%rbp + addq %rax,%rbp + + xorq %rdx,%r8 + xorq %rdx,%r9 + xorq %rdx,%r10 + xorq %rdx,%r11 + xorq %rdx,%r12 + xorq %rdx,%r13 + addq %r8,%rax + adcq $0,%r9 + adcq $0,%r10 + adcq $0,%r11 + adcq $0,%r12 + adcq $0,%r13 + + mulq %rbp + movq %rax,%r8 + movq %r9,%rax + movq %rdx,%r9 + mulq %rbp + addq %rax,%r9 + movq %r10,%rax + adcq $0,%rdx + movq %rdx,%r10 + mulq %rbp + addq %rax,%r10 + movq %r11,%rax + adcq $0,%rdx + movq %rdx,%r11 + mulq %rbp + addq %rax,%r11 + movq %r12,%rax + adcq $0,%rdx + movq %rdx,%r12 + mulq %rbp + addq %rax,%r12 + movq %r13,%rax + adcq $0,%rdx + movq %rdx,%r13 + imulq %rbp + addq %rax,%r13 + adcq $0,%rdx + + leaq 48(%rsi),%rsi + movq %rdx,%r14 + movq %rcx,%rdx + + movq %r8,0(%rdi) + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + movq %r12,32(%rdi) + movq %r13,40(%rdi) + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + + movq %rdx,%rbp + sarq $63,%rdx + xorq %rax,%rax + subq %rdx,%rax + + xorq %rdx,%rbp + addq %rax,%rbp + + xorq %rdx,%r8 + xorq %rdx,%r9 + xorq %rdx,%r10 + xorq %rdx,%r11 + xorq %rdx,%r12 + xorq %rdx,%r13 + addq %r8,%rax + adcq $0,%r9 + adcq $0,%r10 + adcq $0,%r11 + adcq $0,%r12 + adcq $0,%r13 + + mulq %rbp + movq %rax,%r8 + movq %r9,%rax + movq %rdx,%r9 + mulq %rbp + addq %rax,%r9 + movq %r10,%rax + adcq $0,%rdx + movq %rdx,%r10 + mulq %rbp + addq %rax,%r10 + movq %r11,%rax + adcq $0,%rdx + movq %rdx,%r11 + mulq %rbp + addq %rax,%r11 + movq %r12,%rax + adcq $0,%rdx + movq %rdx,%r12 + mulq %rbp + addq %rax,%r12 + movq %r13,%rax + adcq $0,%rdx + movq %rdx,%r13 + imulq %rbp + addq %rax,%r13 + adcq $0,%rdx + + leaq -48(%rsi),%rsi + + addq 0(%rdi),%r8 + adcq 8(%rdi),%r9 + adcq 16(%rdi),%r10 + adcq 24(%rdi),%r11 + adcq 32(%rdi),%r12 + adcq 40(%rdi),%r13 + adcq %rdx,%r14 + movq %rbx,%rdx + + shrdq $62,%r9,%r8 + shrdq $62,%r10,%r9 + shrdq $62,%r11,%r10 + shrdq $62,%r12,%r11 + shrdq $62,%r13,%r12 + shrdq $62,%r14,%r13 + + sarq $63,%r14 + xorq %rbp,%rbp + subq %r14,%rbp + + xorq %r14,%r8 + xorq %r14,%r9 + xorq %r14,%r10 + xorq %r14,%r11 + xorq %r14,%r12 + xorq %r14,%r13 + addq %rbp,%r8 + adcq $0,%r9 + adcq $0,%r10 + adcq $0,%r11 + adcq $0,%r12 + adcq $0,%r13 + + movq %r8,0(%rdi) + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + movq %r12,32(%rdi) + movq %r13,40(%rdi) + + xorq %r14,%rdx + xorq %r14,%rcx + addq %rbp,%rdx + addq %rbp,%rcx + + .byte 0xf3,0xc3 +.cfi_endproc + + +.p2align 5 +__ab_approximation_62: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + movq 40(%rsi),%r9 + movq 88(%rsi),%r11 + movq 32(%rsi),%rbx + movq 80(%rsi),%rbp + movq 24(%rsi),%r8 + movq 72(%rsi),%r10 + + movq %r9,%rax + orq %r11,%rax + cmovzq %rbx,%r9 + cmovzq %rbp,%r11 + cmovzq %r8,%rbx + cmovzq %r10,%rbp + movq 16(%rsi),%r8 + movq 64(%rsi),%r10 + + movq %r9,%rax + orq %r11,%rax + cmovzq %rbx,%r9 + cmovzq %rbp,%r11 + cmovzq %r8,%rbx + cmovzq %r10,%rbp + movq 8(%rsi),%r8 + movq 56(%rsi),%r10 + + movq %r9,%rax + orq %r11,%rax + cmovzq %rbx,%r9 + cmovzq %rbp,%r11 + cmovzq %r8,%rbx + cmovzq %r10,%rbp + movq 0(%rsi),%r8 + movq 48(%rsi),%r10 + + movq %r9,%rax + orq %r11,%rax + bsrq %rax,%rcx + leaq 1(%rcx),%rcx + cmovzq %rbx,%r9 + cmovzq %rbp,%r11 + cmovzq %rax,%rcx + negq %rcx + + + shldq %cl,%rbx,%r9 + shldq %cl,%rbp,%r11 + + jmp __inner_loop_62 + + .byte 0xf3,0xc3 +.cfi_endproc + + +.p2align 3 +.long 0 +__inner_loop_62: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + movq $1,%rdx + xorq %rcx,%rcx + xorq %r12,%r12 + movq $1,%r13 + movq %rsi,8(%rsp) + +L$oop_62: + xorq %rax,%rax + xorq %rbx,%rbx + testq $1,%r8 + movq %r10,%rbp + movq %r11,%r14 + cmovnzq %r10,%rax + cmovnzq %r11,%rbx + subq %r8,%rbp + sbbq %r9,%r14 + movq %r8,%r15 + movq %r9,%rsi + subq %rax,%r8 + sbbq %rbx,%r9 + cmovcq %rbp,%r8 + cmovcq %r14,%r9 + cmovcq %r15,%r10 + cmovcq %rsi,%r11 + movq %rdx,%rax + cmovcq %r12,%rdx + cmovcq %rax,%r12 + movq %rcx,%rbx + cmovcq %r13,%rcx + cmovcq %rbx,%r13 + xorq %rax,%rax + xorq %rbx,%rbx + shrdq $1,%r9,%r8 + shrq $1,%r9 + testq $1,%r15 + cmovnzq %r12,%rax + cmovnzq %r13,%rbx + addq %r12,%r12 + addq %r13,%r13 + subq %rax,%rdx + subq %rbx,%rcx + subl $1,%edi + jnz L$oop_62 + + movq 8(%rsp),%rsi + .byte 0xf3,0xc3 +.cfi_endproc + diff --git a/crypto/blst_src/build/mach-o/ctx_inverse_mod_384-x86_64.s b/crypto/blst_src/build/mach-o/ctx_inverse_mod_384-x86_64.s new file mode 100644 index 00000000000..3f999075813 --- /dev/null +++ b/crypto/blst_src/build/mach-o/ctx_inverse_mod_384-x86_64.s @@ -0,0 +1,1568 @@ +.text + +.globl _ctx_inverse_mod_383 +.private_extern _ctx_inverse_mod_383 + +.p2align 5 +_ctx_inverse_mod_383: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + +ct_inverse_mod_383$1: + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + subq $1112,%rsp +.cfi_adjust_cfa_offset 1112 + + + leaq 88+511(%rsp),%rax + andq $-512,%rax + movq %rdi,32(%rsp) + movq %rcx,40(%rsp) + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + + movq 0(%rdx),%r14 + movq 8(%rdx),%r15 + movq 16(%rdx),%rbx + movq 24(%rdx),%rbp + movq 32(%rdx),%rsi + movq 40(%rdx),%rdi + + movq %r8,0(%rax) + movq %r9,8(%rax) + movq %r10,16(%rax) + movq %r11,24(%rax) + movq %r12,32(%rax) + movq %r13,40(%rax) + + movq %r14,48(%rax) + movq %r15,56(%rax) + movq %rbx,64(%rax) + movq %rbp,72(%rax) + movq %rsi,80(%rax) + movq %rax,%rsi + movq %rdi,88(%rax) + + + movl $31,%edi + call __ab_approximation_31 + + + movq %r12,72(%rsp) + movq %r13,80(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulx_383_n_shift_by_31 + + + movq %rdx,96(%rdi) + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_383_n_shift_by_31 + + + movq %rdx,96(%rdi) + + + xorq $256,%rsi + movl $31,%edi + call __ab_approximation_31 + + + movq %r12,72(%rsp) + movq %r13,80(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulx_383_n_shift_by_31 + movq %rdx,56(%rsp) + movq %rcx,64(%rsp) + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_383_n_shift_by_31 + + + + movq 96(%rsi),%rax + movq 144(%rsi),%r11 + movq %rdx,%rbx + movq %rax,%r10 + imulq 56(%rsp) + movq %rax,%r8 + movq %r11,%rax + movq %rdx,%r9 + imulq 64(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + movq %r8,48(%rdi) + movq %r9,56(%rdi) + sarq $63,%r9 + movq %r9,64(%rdi) + movq %r9,72(%rdi) + movq %r9,80(%rdi) + movq %r9,88(%rdi) + leaq 96(%rsi),%rsi + + movq %r10,%rax + imulq %rbx + movq %rax,%r8 + movq %r11,%rax + movq %rdx,%r9 + imulq %rcx + addq %rax,%r8 + adcq %rdx,%r9 + movq %r8,96(%rdi) + movq %r9,104(%rdi) + sarq $63,%r9 + movq %r9,112(%rdi) + movq %r9,120(%rdi) + movq %r9,128(%rdi) + movq %r9,136(%rdi) + xorq $256+96,%rsi + movl $31,%edi + call __ab_approximation_31 + + + movq %r12,72(%rsp) + movq %r13,80(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulx_383_n_shift_by_31 + movq %rdx,56(%rsp) + movq %rcx,64(%rsp) + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_383_n_shift_by_31 + movq %rdx,72(%rsp) + movq %rcx,80(%rsp) + + movq 56(%rsp),%rdx + movq 64(%rsp),%rcx + leaq 96(%rsi),%rsi + leaq 48(%rdi),%rdi + call __smulx_383x63 + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_383x63 + xorq $256+96,%rsi + movl $31,%edi + call __ab_approximation_31 + + + movq %r12,72(%rsp) + movq %r13,80(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulx_383_n_shift_by_31 + movq %rdx,56(%rsp) + movq %rcx,64(%rsp) + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_383_n_shift_by_31 + movq %rdx,72(%rsp) + movq %rcx,80(%rsp) + + movq 56(%rsp),%rdx + movq 64(%rsp),%rcx + leaq 96(%rsi),%rsi + leaq 48(%rdi),%rdi + call __smulx_383x63 + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_383x63 + xorq $256+96,%rsi + movl $31,%edi + call __ab_approximation_31 + + + movq %r12,72(%rsp) + movq %r13,80(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulx_383_n_shift_by_31 + movq %rdx,56(%rsp) + movq %rcx,64(%rsp) + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_383_n_shift_by_31 + movq %rdx,72(%rsp) + movq %rcx,80(%rsp) + + movq 56(%rsp),%rdx + movq 64(%rsp),%rcx + leaq 96(%rsi),%rsi + leaq 48(%rdi),%rdi + call __smulx_383x63 + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_383x63 + xorq $256+96,%rsi + movl $31,%edi + call __ab_approximation_31 + + + movq %r12,72(%rsp) + movq %r13,80(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulx_383_n_shift_by_31 + movq %rdx,56(%rsp) + movq %rcx,64(%rsp) + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_383_n_shift_by_31 + movq %rdx,72(%rsp) + movq %rcx,80(%rsp) + + movq 56(%rsp),%rdx + movq 64(%rsp),%rcx + leaq 96(%rsi),%rsi + leaq 48(%rdi),%rdi + call __smulx_383x63 + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_383x63 + xorq $256+96,%rsi + movl $31,%edi + call __ab_approximation_31 + + + movq %r12,72(%rsp) + movq %r13,80(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulx_383_n_shift_by_31 + movq %rdx,56(%rsp) + movq %rcx,64(%rsp) + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_383_n_shift_by_31 + movq %rdx,72(%rsp) + movq %rcx,80(%rsp) + + movq 56(%rsp),%rdx + movq 64(%rsp),%rcx + leaq 96(%rsi),%rsi + leaq 48(%rdi),%rdi + call __smulx_383x63 + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_383x63 + xorq $256+96,%rsi + movl $31,%edi + call __ab_approximation_31 + + + movq %r12,72(%rsp) + movq %r13,80(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulx_383_n_shift_by_31 + movq %rdx,56(%rsp) + movq %rcx,64(%rsp) + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_383_n_shift_by_31 + movq %rdx,72(%rsp) + movq %rcx,80(%rsp) + + movq 56(%rsp),%rdx + movq 64(%rsp),%rcx + leaq 96(%rsi),%rsi + leaq 48(%rdi),%rdi + call __smulx_383x63 + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_383x63 + xorq $256+96,%rsi + movl $31,%edi + call __ab_approximation_31 + + + movq %r12,72(%rsp) + movq %r13,80(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulx_383_n_shift_by_31 + movq %rdx,56(%rsp) + movq %rcx,64(%rsp) + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_383_n_shift_by_31 + movq %rdx,72(%rsp) + movq %rcx,80(%rsp) + + movq 56(%rsp),%rdx + movq 64(%rsp),%rcx + leaq 96(%rsi),%rsi + leaq 48(%rdi),%rdi + call __smulx_383x63 + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_383x63 + xorq $256+96,%rsi + movl $31,%edi + call __ab_approximation_31 + + + movq %r12,72(%rsp) + movq %r13,80(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulx_383_n_shift_by_31 + movq %rdx,56(%rsp) + movq %rcx,64(%rsp) + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_383_n_shift_by_31 + movq %rdx,72(%rsp) + movq %rcx,80(%rsp) + + movq 56(%rsp),%rdx + movq 64(%rsp),%rcx + leaq 96(%rsi),%rsi + leaq 48(%rdi),%rdi + call __smulx_383x63 + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_383x63 + xorq $256+96,%rsi + movl $31,%edi + call __ab_approximation_31 + + + movq %r12,72(%rsp) + movq %r13,80(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulx_383_n_shift_by_31 + movq %rdx,56(%rsp) + movq %rcx,64(%rsp) + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_383_n_shift_by_31 + movq %rdx,72(%rsp) + movq %rcx,80(%rsp) + + movq 56(%rsp),%rdx + movq 64(%rsp),%rcx + leaq 96(%rsi),%rsi + leaq 48(%rdi),%rdi + call __smulx_383x63 + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_383x63 + xorq $256+96,%rsi + movl $31,%edi + call __ab_approximation_31 + + + movq %r12,72(%rsp) + movq %r13,80(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulx_383_n_shift_by_31 + movq %rdx,56(%rsp) + movq %rcx,64(%rsp) + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_383_n_shift_by_31 + movq %rdx,72(%rsp) + movq %rcx,80(%rsp) + + movq 56(%rsp),%rdx + movq 64(%rsp),%rcx + leaq 96(%rsi),%rsi + leaq 48(%rdi),%rdi + call __smulx_383x63 + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_383x63 + sarq $63,%r13 + movq %r13,48(%rdi) + movq %r13,56(%rdi) + movq %r13,64(%rdi) + movq %r13,72(%rdi) + movq %r13,80(%rdi) + movq %r13,88(%rdi) + xorq $256+96,%rsi + movl $31,%edi + call __ab_approximation_31 + + + movq %r12,72(%rsp) + movq %r13,80(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulx_383_n_shift_by_31 + movq %rdx,56(%rsp) + movq %rcx,64(%rsp) + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_383_n_shift_by_31 + movq %rdx,72(%rsp) + movq %rcx,80(%rsp) + + movq 56(%rsp),%rdx + movq 64(%rsp),%rcx + leaq 96(%rsi),%rsi + leaq 48(%rdi),%rdi + call __smulx_383x63 + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_767x63 + xorq $256+96,%rsi + movl $31,%edi + call __ab_approximation_31 + + + movq %r12,72(%rsp) + movq %r13,80(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulx_383_n_shift_by_31 + movq %rdx,56(%rsp) + movq %rcx,64(%rsp) + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_383_n_shift_by_31 + movq %rdx,72(%rsp) + movq %rcx,80(%rsp) + + movq 56(%rsp),%rdx + movq 64(%rsp),%rcx + leaq 96(%rsi),%rsi + leaq 48(%rdi),%rdi + call __smulx_383x63 + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_767x63 + xorq $256+96,%rsi + movl $31,%edi + call __ab_approximation_31 + + + movq %r12,72(%rsp) + movq %r13,80(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulx_383_n_shift_by_31 + movq %rdx,56(%rsp) + movq %rcx,64(%rsp) + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_383_n_shift_by_31 + movq %rdx,72(%rsp) + movq %rcx,80(%rsp) + + movq 56(%rsp),%rdx + movq 64(%rsp),%rcx + leaq 96(%rsi),%rsi + leaq 48(%rdi),%rdi + call __smulx_383x63 + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_767x63 + xorq $256+96,%rsi + movl $31,%edi + call __ab_approximation_31 + + + movq %r12,72(%rsp) + movq %r13,80(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulx_383_n_shift_by_31 + movq %rdx,56(%rsp) + movq %rcx,64(%rsp) + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_383_n_shift_by_31 + movq %rdx,72(%rsp) + movq %rcx,80(%rsp) + + movq 56(%rsp),%rdx + movq 64(%rsp),%rcx + leaq 96(%rsi),%rsi + leaq 48(%rdi),%rdi + call __smulx_383x63 + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_767x63 + xorq $256+96,%rsi + movl $31,%edi + call __ab_approximation_31 + + + movq %r12,72(%rsp) + movq %r13,80(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulx_383_n_shift_by_31 + movq %rdx,56(%rsp) + movq %rcx,64(%rsp) + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_383_n_shift_by_31 + movq %rdx,72(%rsp) + movq %rcx,80(%rsp) + + movq 56(%rsp),%rdx + movq 64(%rsp),%rcx + leaq 96(%rsi),%rsi + leaq 48(%rdi),%rdi + call __smulx_383x63 + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_767x63 + xorq $256+96,%rsi + movl $31,%edi + call __ab_approximation_31 + + + movq %r12,72(%rsp) + movq %r13,80(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulx_383_n_shift_by_31 + movq %rdx,56(%rsp) + movq %rcx,64(%rsp) + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_383_n_shift_by_31 + movq %rdx,72(%rsp) + movq %rcx,80(%rsp) + + movq 56(%rsp),%rdx + movq 64(%rsp),%rcx + leaq 96(%rsi),%rsi + leaq 48(%rdi),%rdi + call __smulx_383x63 + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_767x63 + xorq $256+96,%rsi + movl $31,%edi + call __ab_approximation_31 + + + movq %r12,72(%rsp) + movq %r13,80(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulx_383_n_shift_by_31 + movq %rdx,56(%rsp) + movq %rcx,64(%rsp) + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_383_n_shift_by_31 + movq %rdx,72(%rsp) + movq %rcx,80(%rsp) + + movq 56(%rsp),%rdx + movq 64(%rsp),%rcx + leaq 96(%rsi),%rsi + leaq 48(%rdi),%rdi + call __smulx_383x63 + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_767x63 + xorq $256+96,%rsi + movl $31,%edi + call __ab_approximation_31 + + + movq %r12,72(%rsp) + movq %r13,80(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulx_191_n_shift_by_31 + movq %rdx,56(%rsp) + movq %rcx,64(%rsp) + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_191_n_shift_by_31 + movq %rdx,72(%rsp) + movq %rcx,80(%rsp) + + movq 56(%rsp),%rdx + movq 64(%rsp),%rcx + leaq 96(%rsi),%rsi + leaq 48(%rdi),%rdi + call __smulx_383x63 + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_767x63 + xorq $256+96,%rsi + movl $31,%edi + call __ab_approximation_31 + + + movq %r12,72(%rsp) + movq %r13,80(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulx_191_n_shift_by_31 + movq %rdx,56(%rsp) + movq %rcx,64(%rsp) + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_191_n_shift_by_31 + movq %rdx,72(%rsp) + movq %rcx,80(%rsp) + + movq 56(%rsp),%rdx + movq 64(%rsp),%rcx + leaq 96(%rsi),%rsi + leaq 48(%rdi),%rdi + call __smulx_383x63 + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_767x63 + xorq $256+96,%rsi + movl $31,%edi + call __ab_approximation_31 + + + movq %r12,72(%rsp) + movq %r13,80(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulx_191_n_shift_by_31 + movq %rdx,56(%rsp) + movq %rcx,64(%rsp) + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_191_n_shift_by_31 + movq %rdx,72(%rsp) + movq %rcx,80(%rsp) + + movq 56(%rsp),%rdx + movq 64(%rsp),%rcx + leaq 96(%rsi),%rsi + leaq 48(%rdi),%rdi + call __smulx_383x63 + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_767x63 + xorq $256+96,%rsi + movl $31,%edi + call __ab_approximation_31 + + + movq %r12,72(%rsp) + movq %r13,80(%rsp) + + movq $256,%rdi + xorq %rsi,%rdi + call __smulx_191_n_shift_by_31 + movq %rdx,56(%rsp) + movq %rcx,64(%rsp) + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_191_n_shift_by_31 + movq %rdx,72(%rsp) + movq %rcx,80(%rsp) + + movq 56(%rsp),%rdx + movq 64(%rsp),%rcx + leaq 96(%rsi),%rsi + leaq 48(%rdi),%rdi + call __smulx_383x63 + + movq 72(%rsp),%rdx + movq 80(%rsp),%rcx + leaq 48(%rdi),%rdi + call __smulx_767x63 + + xorq $256+96,%rsi + movl $53,%edi + + movq 0(%rsi),%r8 + + movq 48(%rsi),%r10 + + call __tail_loop_53 + + + + + + + + leaq 96(%rsi),%rsi + + + + + + movq %r12,%rdx + movq %r13,%rcx + movq 32(%rsp),%rdi + call __smulx_767x63 + + movq 40(%rsp),%rsi + movq %rax,%rdx + sarq $63,%rax + + movq %rax,%r8 + movq %rax,%r9 + movq %rax,%r10 + andq 0(%rsi),%r8 + andq 8(%rsi),%r9 + movq %rax,%r11 + andq 16(%rsi),%r10 + andq 24(%rsi),%r11 + movq %rax,%r12 + andq 32(%rsi),%r12 + andq 40(%rsi),%rax + + addq %r8,%r14 + adcq %r9,%r15 + adcq %r10,%rbx + adcq %r11,%rbp + adcq %r12,%rcx + adcq %rax,%rdx + + movq %r14,48(%rdi) + movq %r15,56(%rdi) + movq %rbx,64(%rdi) + movq %rbp,72(%rdi) + movq %rcx,80(%rdi) + movq %rdx,88(%rdi) + + leaq 1112(%rsp),%r8 + movq 0(%r8),%r15 +.cfi_restore %r15 + movq 8(%r8),%r14 +.cfi_restore %r14 + movq 16(%r8),%r13 +.cfi_restore %r13 + movq 24(%r8),%r12 +.cfi_restore %r12 + movq 32(%r8),%rbx +.cfi_restore %rbx + movq 40(%r8),%rbp +.cfi_restore %rbp + leaq 48(%r8),%rsp +.cfi_adjust_cfa_offset -1112-8*6 + + .byte 0xf3,0xc3 +.cfi_endproc + + +.p2align 5 +__smulx_767x63: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + + movq %rdx,%rax + sarq $63,%rax + xorq %rbp,%rbp + subq %rax,%rbp + + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + leaq 48(%rsi),%rsi + + xorq %rax,%rdx + addq %rbp,%rdx + + xorq %rax,%r8 + xorq %rax,%r9 + xorq %rax,%r10 + xorq %rax,%r11 + xorq %rax,%r12 + xorq %r13,%rax + addq %rbp,%r8 + adcq $0,%r9 + adcq $0,%r10 + adcq $0,%r11 + adcq $0,%r12 + adcq $0,%rax + + mulxq %r8,%r8,%rbp + mulxq %r9,%r9,%r13 + addq %rbp,%r9 + mulxq %r10,%r10,%rbp + adcq %r13,%r10 + mulxq %r11,%r11,%r13 + adcq %rbp,%r11 + mulxq %r12,%r12,%rbp + adcq %r13,%r12 + adcq $0,%rbp + imulq %rdx + addq %rbp,%rax + adcq $0,%rdx + + movq %r8,0(%rdi) + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + movq %r12,32(%rdi) + movq %rax,40(%rdi) + movq %rdx,48(%rdi) + sarq $63,%rdx + movq %rdx,56(%rdi) + movq %rcx,%rdx + movq %rcx,%rax + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + movq 48(%rsi),%r14 + movq 56(%rsi),%r15 + movq 64(%rsi),%rbx + movq 72(%rsi),%rbp + movq 80(%rsi),%rcx + movq 88(%rsi),%rdi + + sarq $63,%rax + xorq %rsi,%rsi + subq %rax,%rsi + + xorq %rax,%rdx + addq %rsi,%rdx + + xorq %rax,%r8 + xorq %rax,%r9 + xorq %rax,%r10 + xorq %rax,%r11 + xorq %rax,%r12 + xorq %rax,%r13 + xorq %rax,%r14 + xorq %rax,%r15 + xorq %rax,%rbx + xorq %rax,%rbp + xorq %rax,%rcx + xorq %rax,%rdi + addq %rsi,%r8 + adcq $0,%r9 + adcq $0,%r10 + adcq $0,%r11 + adcq $0,%r12 + adcq $0,%r13 + adcq $0,%r14 + adcq $0,%r15 + adcq $0,%rbx + adcq $0,%rbp + adcq $0,%rcx + adcq $0,%rdi + + mulxq %r8,%r8,%rax + mulxq %r9,%r9,%rsi + addq %rax,%r9 + mulxq %r10,%r10,%rax + adcq %rsi,%r10 + mulxq %r11,%r11,%rsi + adcq %rax,%r11 + mulxq %r12,%r12,%rax + adcq %rsi,%r12 + mulxq %r13,%r13,%rsi + adcq %rax,%r13 + mulxq %r14,%r14,%rax + adcq %rsi,%r14 + mulxq %r15,%r15,%rsi + adcq %rax,%r15 + mulxq %rbx,%rbx,%rax + adcq %rsi,%rbx + mulxq %rbp,%rbp,%rsi + adcq %rax,%rbp + mulxq %rcx,%rcx,%rax + adcq %rsi,%rcx + mulxq %rdi,%rdi,%rsi + movq 8(%rsp),%rdx + movq 16(%rsp),%rsi + adcq %rdi,%rax + + addq 0(%rdx),%r8 + adcq 8(%rdx),%r9 + adcq 16(%rdx),%r10 + adcq 24(%rdx),%r11 + adcq 32(%rdx),%r12 + adcq 40(%rdx),%r13 + adcq 48(%rdx),%r14 + movq 56(%rdx),%rdi + adcq %rdi,%r15 + adcq %rdi,%rbx + adcq %rdi,%rbp + adcq %rdi,%rcx + adcq %rdi,%rax + + movq %rdx,%rdi + + movq %r8,0(%rdx) + movq %r9,8(%rdx) + movq %r10,16(%rdx) + movq %r11,24(%rdx) + movq %r12,32(%rdx) + movq %r13,40(%rdx) + movq %r14,48(%rdx) + movq %r15,56(%rdx) + movq %rbx,64(%rdx) + movq %rbp,72(%rdx) + movq %rcx,80(%rdx) + movq %rax,88(%rdx) + + .byte 0xf3,0xc3 +.cfi_endproc + + +.p2align 5 +__smulx_383x63: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + movq 0+0(%rsi),%r8 + movq 0+8(%rsi),%r9 + movq 0+16(%rsi),%r10 + movq 0+24(%rsi),%r11 + movq 0+32(%rsi),%r12 + movq 0+40(%rsi),%r13 + + movq %rdx,%rbp + sarq $63,%rbp + xorq %rax,%rax + subq %rbp,%rax + + xorq %rbp,%rdx + addq %rax,%rdx + + xorq %rbp,%r8 + xorq %rbp,%r9 + xorq %rbp,%r10 + xorq %rbp,%r11 + xorq %rbp,%r12 + xorq %rbp,%r13 + addq %rax,%r8 + adcq $0,%r9 + adcq $0,%r10 + adcq $0,%r11 + adcq $0,%r12 + adcq $0,%r13 + + mulxq %r8,%r8,%rbp + mulxq %r9,%r9,%rax + addq %rbp,%r9 + mulxq %r10,%r10,%rbp + adcq %rax,%r10 + mulxq %r11,%r11,%rax + adcq %rbp,%r11 + mulxq %r12,%r12,%rbp + adcq %rax,%r12 + mulxq %r13,%r13,%rax + movq %rcx,%rdx + adcq %rbp,%r13 + + movq %r8,0(%rdi) + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + movq %r12,32(%rdi) + movq %r13,40(%rdi) + movq 48+0(%rsi),%r8 + movq 48+8(%rsi),%r9 + movq 48+16(%rsi),%r10 + movq 48+24(%rsi),%r11 + movq 48+32(%rsi),%r12 + movq 48+40(%rsi),%r13 + + movq %rdx,%rbp + sarq $63,%rbp + xorq %rax,%rax + subq %rbp,%rax + + xorq %rbp,%rdx + addq %rax,%rdx + + xorq %rbp,%r8 + xorq %rbp,%r9 + xorq %rbp,%r10 + xorq %rbp,%r11 + xorq %rbp,%r12 + xorq %rbp,%r13 + addq %rax,%r8 + adcq $0,%r9 + adcq $0,%r10 + adcq $0,%r11 + adcq $0,%r12 + adcq $0,%r13 + + mulxq %r8,%r8,%rbp + mulxq %r9,%r9,%rax + addq %rbp,%r9 + mulxq %r10,%r10,%rbp + adcq %rax,%r10 + mulxq %r11,%r11,%rax + adcq %rbp,%r11 + mulxq %r12,%r12,%rbp + adcq %rax,%r12 + mulxq %r13,%r13,%rax + adcq %rbp,%r13 + + addq 0(%rdi),%r8 + adcq 8(%rdi),%r9 + adcq 16(%rdi),%r10 + adcq 24(%rdi),%r11 + adcq 32(%rdi),%r12 + adcq 40(%rdi),%r13 + + movq %r8,0(%rdi) + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + movq %r12,32(%rdi) + movq %r13,40(%rdi) + + .byte 0xf3,0xc3 +.cfi_endproc + + +.p2align 5 +__smulx_383_n_shift_by_31: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + movq %rdx,%rbx + xorq %r14,%r14 + movq 0+0(%rsi),%r8 + movq 0+8(%rsi),%r9 + movq 0+16(%rsi),%r10 + movq 0+24(%rsi),%r11 + movq 0+32(%rsi),%r12 + movq 0+40(%rsi),%r13 + + movq %rdx,%rax + sarq $63,%rax + xorq %rbp,%rbp + subq %rax,%rbp + + xorq %rax,%rdx + addq %rbp,%rdx + + xorq %rax,%r8 + xorq %rax,%r9 + xorq %rax,%r10 + xorq %rax,%r11 + xorq %rax,%r12 + xorq %r13,%rax + addq %rbp,%r8 + adcq $0,%r9 + adcq $0,%r10 + adcq $0,%r11 + adcq $0,%r12 + adcq $0,%rax + + mulxq %r8,%r8,%rbp + mulxq %r9,%r9,%r13 + addq %rbp,%r9 + mulxq %r10,%r10,%rbp + adcq %r13,%r10 + mulxq %r11,%r11,%r13 + adcq %rbp,%r11 + mulxq %r12,%r12,%rbp + adcq %r13,%r12 + adcq $0,%rbp + imulq %rdx + addq %rbp,%rax + adcq %rdx,%r14 + + movq %rcx,%rdx + + movq %r8,0(%rdi) + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + movq %r12,32(%rdi) + movq %rax,40(%rdi) + movq 48+0(%rsi),%r8 + movq 48+8(%rsi),%r9 + movq 48+16(%rsi),%r10 + movq 48+24(%rsi),%r11 + movq 48+32(%rsi),%r12 + movq 48+40(%rsi),%r13 + + movq %rdx,%rax + sarq $63,%rax + xorq %rbp,%rbp + subq %rax,%rbp + + xorq %rax,%rdx + addq %rbp,%rdx + + xorq %rax,%r8 + xorq %rax,%r9 + xorq %rax,%r10 + xorq %rax,%r11 + xorq %rax,%r12 + xorq %r13,%rax + addq %rbp,%r8 + adcq $0,%r9 + adcq $0,%r10 + adcq $0,%r11 + adcq $0,%r12 + adcq $0,%rax + + mulxq %r8,%r8,%rbp + mulxq %r9,%r9,%r13 + addq %rbp,%r9 + mulxq %r10,%r10,%rbp + adcq %r13,%r10 + mulxq %r11,%r11,%r13 + adcq %rbp,%r11 + mulxq %r12,%r12,%rbp + adcq %r13,%r12 + adcq $0,%rbp + imulq %rdx + addq %rbp,%rax + adcq $0,%rdx + + addq 0(%rdi),%r8 + adcq 8(%rdi),%r9 + adcq 16(%rdi),%r10 + adcq 24(%rdi),%r11 + adcq 32(%rdi),%r12 + adcq 40(%rdi),%rax + adcq %rdx,%r14 + movq %rbx,%rdx + + shrdq $31,%r9,%r8 + shrdq $31,%r10,%r9 + shrdq $31,%r11,%r10 + shrdq $31,%r12,%r11 + shrdq $31,%rax,%r12 + shrdq $31,%r14,%rax + + sarq $63,%r14 + xorq %rbp,%rbp + subq %r14,%rbp + + xorq %r14,%r8 + xorq %r14,%r9 + xorq %r14,%r10 + xorq %r14,%r11 + xorq %r14,%r12 + xorq %r14,%rax + addq %rbp,%r8 + adcq $0,%r9 + adcq $0,%r10 + adcq $0,%r11 + adcq $0,%r12 + adcq $0,%rax + + movq %r8,0(%rdi) + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + movq %r12,32(%rdi) + movq %rax,40(%rdi) + + xorq %r14,%rdx + xorq %r14,%rcx + addq %rbp,%rdx + addq %rbp,%rcx + + .byte 0xf3,0xc3 +.cfi_endproc + + +.p2align 5 +__smulx_191_n_shift_by_31: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + movq %rdx,%rbx + movq 0+0(%rsi),%r8 + movq 0+8(%rsi),%r9 + movq 0+16(%rsi),%r10 + + movq %rdx,%rax + sarq $63,%rax + xorq %rbp,%rbp + subq %rax,%rbp + + xorq %rax,%rdx + addq %rbp,%rdx + + xorq %rax,%r8 + xorq %rax,%r9 + xorq %r10,%rax + addq %rbp,%r8 + adcq $0,%r9 + adcq $0,%rax + + mulxq %r8,%r8,%rbp + mulxq %r9,%r9,%r10 + addq %rbp,%r9 + adcq $0,%r10 + imulq %rdx + addq %rax,%r10 + adcq $0,%rdx + movq %rdx,%r14 + movq %rcx,%rdx + movq 48+0(%rsi),%r11 + movq 48+8(%rsi),%r12 + movq 48+16(%rsi),%r13 + + movq %rdx,%rax + sarq $63,%rax + xorq %rbp,%rbp + subq %rax,%rbp + + xorq %rax,%rdx + addq %rbp,%rdx + + xorq %rax,%r11 + xorq %rax,%r12 + xorq %r13,%rax + addq %rbp,%r11 + adcq $0,%r12 + adcq $0,%rax + + mulxq %r11,%r11,%rbp + mulxq %r12,%r12,%r13 + addq %rbp,%r12 + adcq $0,%r13 + imulq %rdx + addq %rax,%r13 + adcq $0,%rdx + addq %r8,%r11 + adcq %r9,%r12 + adcq %r10,%r13 + adcq %rdx,%r14 + movq %rbx,%rdx + + shrdq $31,%r12,%r11 + shrdq $31,%r13,%r12 + shrdq $31,%r14,%r13 + + sarq $63,%r14 + xorq %rbp,%rbp + subq %r14,%rbp + + xorq %r14,%r11 + xorq %r14,%r12 + xorq %r14,%r13 + addq %rbp,%r11 + adcq $0,%r12 + adcq $0,%r13 + + movq %r11,0(%rdi) + movq %r12,8(%rdi) + movq %r13,16(%rdi) + + xorq %r14,%rdx + xorq %r14,%rcx + addq %rbp,%rdx + addq %rbp,%rcx + + .byte 0xf3,0xc3 +.cfi_endproc + + +.p2align 5 +__ab_approximation_31: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + movq 40(%rsi),%r9 + movq 88(%rsi),%r11 + movq 32(%rsi),%rbx + movq 80(%rsi),%rbp + movq 24(%rsi),%r8 + movq 72(%rsi),%r10 + + movq %r9,%rax + orq %r11,%rax + cmovzq %rbx,%r9 + cmovzq %rbp,%r11 + cmovzq %r8,%rbx + movq 16(%rsi),%r8 + cmovzq %r10,%rbp + movq 64(%rsi),%r10 + + movq %r9,%rax + orq %r11,%rax + cmovzq %rbx,%r9 + cmovzq %rbp,%r11 + cmovzq %r8,%rbx + movq 8(%rsi),%r8 + cmovzq %r10,%rbp + movq 56(%rsi),%r10 + + movq %r9,%rax + orq %r11,%rax + cmovzq %rbx,%r9 + cmovzq %rbp,%r11 + cmovzq %r8,%rbx + movq 0(%rsi),%r8 + cmovzq %r10,%rbp + movq 48(%rsi),%r10 + + movq %r9,%rax + orq %r11,%rax + cmovzq %rbx,%r9 + cmovzq %rbp,%r11 + cmovzq %r8,%rbx + cmovzq %r10,%rbp + + movq %r9,%rax + orq %r11,%rax + bsrq %rax,%rcx + leaq 1(%rcx),%rcx + cmovzq %r8,%r9 + cmovzq %r10,%r11 + cmovzq %rax,%rcx + negq %rcx + + + shldq %cl,%rbx,%r9 + shldq %cl,%rbp,%r11 + + movl $0x7FFFFFFF,%eax + andq %rax,%r8 + andq %rax,%r10 + andnq %r9,%rax,%r9 + andnq %r11,%rax,%r11 + orq %r9,%r8 + orq %r11,%r10 + + jmp __inner_loop_31 + + .byte 0xf3,0xc3 +.cfi_endproc + + +.p2align 5 +__inner_loop_31: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + movq $0x7FFFFFFF80000000,%rcx + movq $0x800000007FFFFFFF,%r13 + movq $0x7FFFFFFF7FFFFFFF,%r15 + +L$oop_31: + cmpq %r10,%r8 + movq %r8,%rax + movq %r10,%rbx + movq %rcx,%rbp + movq %r13,%r14 + cmovbq %r10,%r8 + cmovbq %rax,%r10 + cmovbq %r13,%rcx + cmovbq %rbp,%r13 + + subq %r10,%r8 + subq %r13,%rcx + addq %r15,%rcx + + testq $1,%rax + cmovzq %rax,%r8 + cmovzq %rbx,%r10 + cmovzq %rbp,%rcx + cmovzq %r14,%r13 + + shrq $1,%r8 + addq %r13,%r13 + subq %r15,%r13 + subl $1,%edi + jnz L$oop_31 + + shrq $32,%r15 + movl %ecx,%edx + movl %r13d,%r12d + shrq $32,%rcx + shrq $32,%r13 + subq %r15,%rdx + subq %r15,%rcx + subq %r15,%r12 + subq %r15,%r13 + + .byte 0xf3,0xc3 +.cfi_endproc + + + +.p2align 5 +__tail_loop_53: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + movq $1,%rdx + xorq %rcx,%rcx + xorq %r12,%r12 + movq $1,%r13 + +L$oop_53: + xorq %rax,%rax + testq $1,%r8 + movq %r10,%rbx + cmovnzq %r10,%rax + subq %r8,%rbx + movq %r8,%rbp + subq %rax,%r8 + cmovcq %rbx,%r8 + cmovcq %rbp,%r10 + movq %rdx,%rax + cmovcq %r12,%rdx + cmovcq %rax,%r12 + movq %rcx,%rbx + cmovcq %r13,%rcx + cmovcq %rbx,%r13 + xorq %rax,%rax + xorq %rbx,%rbx + shrq $1,%r8 + testq $1,%rbp + cmovnzq %r12,%rax + cmovnzq %r13,%rbx + addq %r12,%r12 + addq %r13,%r13 + subq %rax,%rdx + subq %rbx,%rcx + subl $1,%edi + jnz L$oop_53 + + .byte 0xf3,0xc3 +.cfi_endproc + diff --git a/crypto/blst_src/build/mach-o/div3w-armv8.S b/crypto/blst_src/build/mach-o/div3w-armv8.S new file mode 100644 index 00000000000..4b130080123 --- /dev/null +++ b/crypto/blst_src/build/mach-o/div3w-armv8.S @@ -0,0 +1,88 @@ +.text + +.globl _div_3_limbs + +.align 5 +_div_3_limbs: + ldp x4,x5,[x0] // load R + eor x0,x0,x0 // Q = 0 + mov x3,#64 // loop counter + nop + +Loop: + subs x6,x4,x1 // R - D + add x0,x0,x0 // Q <<= 1 + sbcs x7,x5,x2 + add x0,x0,#1 // Q + speculative bit + csel x4,x4,x6,lo // select between R and R - D + extr x1,x2,x1,#1 // D >>= 1 + csel x5,x5,x7,lo + lsr x2,x2,#1 + sbc x0,x0,xzr // subtract speculative bit + sub x3,x3,#1 + cbnz x3,Loop + + asr x3,x0,#63 // top bit -> mask + add x0,x0,x0 // Q <<= 1 + subs x6,x4,x1 // R - D + add x0,x0,#1 // Q + speculative bit + sbcs x7,x5,x2 + sbc x0,x0,xzr // subtract speculative bit + + orr x0,x0,x3 // all ones if overflow + + ret + +.globl _quot_rem_128 + +.align 5 +_quot_rem_128: + ldp x3,x4,[x1] + + mul x5,x3,x2 // divisor[0:1} * quotient + umulh x6,x3,x2 + mul x11, x4,x2 + umulh x7,x4,x2 + + ldp x8,x9,[x0] // load 3 limbs of the dividend + ldr x10,[x0,#16] + + adds x6,x6,x11 + adc x7,x7,xzr + + subs x8,x8,x5 // dividend - divisor * quotient + sbcs x9,x9,x6 + sbcs x10,x10,x7 + sbc x5,xzr,xzr // borrow -> mask + + add x2,x2,x5 // if borrowed, adjust the quotient ... + and x3,x3,x5 + and x4,x4,x5 + adds x8,x8,x3 // ... and add divisor + adc x9,x9,x4 + + stp x8,x9,[x0] // save 2 limbs of the remainder + str x2,[x0,#16] // and one limb of the quotient + + mov x0,x2 // return adjusted quotient + + ret + + +.globl _quot_rem_64 + +.align 5 +_quot_rem_64: + ldr x3,[x1] + ldr x8,[x0] // load 1 limb of the dividend + + mul x5,x3,x2 // divisor * quotient + + sub x8,x8,x5 // dividend - divisor * quotient + + stp x8,x2,[x0] // save remainder and quotient + + mov x0,x2 // return quotient + + ret + diff --git a/crypto/blst_src/build/mach-o/div3w-x86_64.s b/crypto/blst_src/build/mach-o/div3w-x86_64.s new file mode 100644 index 00000000000..99a94d50a2b --- /dev/null +++ b/crypto/blst_src/build/mach-o/div3w-x86_64.s @@ -0,0 +1,124 @@ +.text + +.globl _div_3_limbs +.private_extern _div_3_limbs + +.p2align 5 +_div_3_limbs: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + + + movq (%rdi),%r8 + movq 8(%rdi),%r9 + xorq %rax,%rax + movl $64,%ecx + +L$oop: + movq %r8,%r10 + subq %rsi,%r8 + movq %r9,%r11 + sbbq %rdx,%r9 + leaq 1(%rax,%rax,1),%rax + movq %rdx,%rdi + cmovcq %r10,%r8 + cmovcq %r11,%r9 + sbbq $0,%rax + shlq $63,%rdi + shrq $1,%rsi + shrq $1,%rdx + orq %rdi,%rsi + subl $1,%ecx + jnz L$oop + + leaq 1(%rax,%rax,1),%rcx + sarq $63,%rax + + subq %rsi,%r8 + sbbq %rdx,%r9 + sbbq $0,%rcx + + orq %rcx,%rax + + + .byte 0xf3,0xc3 +.cfi_endproc + +.globl _quot_rem_128 +.private_extern _quot_rem_128 + +.p2align 5 +_quot_rem_128: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + + + movq %rdx,%rax + movq %rdx,%rcx + + mulq 0(%rsi) + movq %rax,%r8 + movq %rcx,%rax + movq %rdx,%r9 + + mulq 8(%rsi) + addq %rax,%r9 + adcq $0,%rdx + + movq 0(%rdi),%r10 + movq 8(%rdi),%r11 + movq 16(%rdi),%rax + + subq %r8,%r10 + sbbq %r9,%r11 + sbbq %rdx,%rax + sbbq %r8,%r8 + + addq %r8,%rcx + movq %r8,%r9 + andq 0(%rsi),%r8 + andq 8(%rsi),%r9 + addq %r8,%r10 + adcq %r9,%r11 + + movq %r10,0(%rdi) + movq %r11,8(%rdi) + movq %rcx,16(%rdi) + + movq %rcx,%rax + + + .byte 0xf3,0xc3 +.cfi_endproc + + + + + + +.globl _quot_rem_64 +.private_extern _quot_rem_64 + +.p2align 5 +_quot_rem_64: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + + + movq %rdx,%rax + imulq 0(%rsi),%rdx + + movq 0(%rdi),%r10 + + subq %rdx,%r10 + + movq %r10,0(%rdi) + movq %rax,8(%rdi) + + + .byte 0xf3,0xc3 +.cfi_endproc + diff --git a/crypto/blst_src/build/mach-o/mul_mont_256-armv8.S b/crypto/blst_src/build/mach-o/mul_mont_256-armv8.S new file mode 100644 index 00000000000..4f506b58b0f --- /dev/null +++ b/crypto/blst_src/build/mach-o/mul_mont_256-armv8.S @@ -0,0 +1,464 @@ +.text + +.globl _mul_mont_sparse_256 +.private_extern _mul_mont_sparse_256 + +.align 5 +_mul_mont_sparse_256: + stp x29,x30,[sp,#-64]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + + ldp x10,x11,[x1] + ldr x9, [x2] + ldp x12,x13,[x1,#16] + + mul x19,x10,x9 + ldp x5,x6,[x3] + mul x20,x11,x9 + ldp x7,x8,[x3,#16] + mul x21,x12,x9 + mul x22,x13,x9 + + umulh x14,x10,x9 + umulh x15,x11,x9 + mul x3,x4,x19 + umulh x16,x12,x9 + umulh x17,x13,x9 + adds x20,x20,x14 + //mul x14,x5,x3 + adcs x21,x21,x15 + mul x15,x6,x3 + adcs x22,x22,x16 + mul x16,x7,x3 + adc x23,xzr, x17 + mul x17,x8,x3 + ldr x9,[x2,8*1] + subs xzr,x19,#1 //adds x19,x19,x14 + umulh x14,x5,x3 + adcs x20,x20,x15 + umulh x15,x6,x3 + adcs x21,x21,x16 + umulh x16,x7,x3 + adcs x22,x22,x17 + umulh x17,x8,x3 + adc x23,x23,xzr + + adds x19,x20,x14 + mul x14,x10,x9 + adcs x20,x21,x15 + mul x15,x11,x9 + adcs x21,x22,x16 + mul x16,x12,x9 + adcs x22,x23,x17 + mul x17,x13,x9 + adc x23,xzr,xzr + + adds x19,x19,x14 + umulh x14,x10,x9 + adcs x20,x20,x15 + umulh x15,x11,x9 + adcs x21,x21,x16 + mul x3,x4,x19 + umulh x16,x12,x9 + adcs x22,x22,x17 + umulh x17,x13,x9 + adc x23,x23,xzr + + adds x20,x20,x14 + //mul x14,x5,x3 + adcs x21,x21,x15 + mul x15,x6,x3 + adcs x22,x22,x16 + mul x16,x7,x3 + adc x23,x23,x17 + mul x17,x8,x3 + ldr x9,[x2,8*2] + subs xzr,x19,#1 //adds x19,x19,x14 + umulh x14,x5,x3 + adcs x20,x20,x15 + umulh x15,x6,x3 + adcs x21,x21,x16 + umulh x16,x7,x3 + adcs x22,x22,x17 + umulh x17,x8,x3 + adc x23,x23,xzr + + adds x19,x20,x14 + mul x14,x10,x9 + adcs x20,x21,x15 + mul x15,x11,x9 + adcs x21,x22,x16 + mul x16,x12,x9 + adcs x22,x23,x17 + mul x17,x13,x9 + adc x23,xzr,xzr + + adds x19,x19,x14 + umulh x14,x10,x9 + adcs x20,x20,x15 + umulh x15,x11,x9 + adcs x21,x21,x16 + mul x3,x4,x19 + umulh x16,x12,x9 + adcs x22,x22,x17 + umulh x17,x13,x9 + adc x23,x23,xzr + + adds x20,x20,x14 + //mul x14,x5,x3 + adcs x21,x21,x15 + mul x15,x6,x3 + adcs x22,x22,x16 + mul x16,x7,x3 + adc x23,x23,x17 + mul x17,x8,x3 + ldr x9,[x2,8*3] + subs xzr,x19,#1 //adds x19,x19,x14 + umulh x14,x5,x3 + adcs x20,x20,x15 + umulh x15,x6,x3 + adcs x21,x21,x16 + umulh x16,x7,x3 + adcs x22,x22,x17 + umulh x17,x8,x3 + adc x23,x23,xzr + + adds x19,x20,x14 + mul x14,x10,x9 + adcs x20,x21,x15 + mul x15,x11,x9 + adcs x21,x22,x16 + mul x16,x12,x9 + adcs x22,x23,x17 + mul x17,x13,x9 + adc x23,xzr,xzr + + adds x19,x19,x14 + umulh x14,x10,x9 + adcs x20,x20,x15 + umulh x15,x11,x9 + adcs x21,x21,x16 + mul x3,x4,x19 + umulh x16,x12,x9 + adcs x22,x22,x17 + umulh x17,x13,x9 + adc x23,x23,xzr + + adds x20,x20,x14 + //mul x14,x5,x3 + adcs x21,x21,x15 + mul x15,x6,x3 + adcs x22,x22,x16 + mul x16,x7,x3 + adc x23,x23,x17 + mul x17,x8,x3 + subs xzr,x19,#1 //adds x19,x19,x14 + umulh x14,x5,x3 + adcs x20,x20,x15 + umulh x15,x6,x3 + adcs x21,x21,x16 + umulh x16,x7,x3 + adcs x22,x22,x17 + umulh x17,x8,x3 + adc x23,x23,xzr + + adds x19,x20,x14 + adcs x20,x21,x15 + adcs x21,x22,x16 + adcs x22,x23,x17 + adc x23,xzr,xzr + + subs x14,x19,x5 + sbcs x15,x20,x6 + sbcs x16,x21,x7 + sbcs x17,x22,x8 + sbcs xzr, x23,xzr + + csel x19,x19,x14,lo + csel x20,x20,x15,lo + csel x21,x21,x16,lo + csel x22,x22,x17,lo + + stp x19,x20,[x0] + stp x21,x22,[x0,#16] + + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldr x29,[sp],#64 + ret + +.globl _sqr_mont_sparse_256 +.private_extern _sqr_mont_sparse_256 + +.align 5 +_sqr_mont_sparse_256: +.long 3573752639 + stp x29,x30,[sp,#-48]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + + ldp x5,x6,[x1] + ldp x7,x8,[x1,#16] + mov x4,x3 + + //////////////////////////////////////////////////////////////// + // | | | | | |a1*a0| | + // | | | | |a2*a0| | | + // | |a3*a2|a3*a0| | | | + // | | | |a2*a1| | | | + // | | |a3*a1| | | | | + // *| | | | | | | | 2| + // +|a3*a3|a2*a2|a1*a1|a0*a0| + // |--+--+--+--+--+--+--+--| + // |A7|A6|A5|A4|A3|A2|A1|A0|, where Ax is x10 + // + // "can't overflow" below mark carrying into high part of + // multiplication result, which can't overflow, because it + // can never be all ones. + + mul x11,x6,x5 // a[1]*a[0] + umulh x15,x6,x5 + mul x12,x7,x5 // a[2]*a[0] + umulh x16,x7,x5 + mul x13,x8,x5 // a[3]*a[0] + umulh x19,x8,x5 + + adds x12,x12,x15 // accumulate high parts of multiplication + mul x14,x7,x6 // a[2]*a[1] + umulh x15,x7,x6 + adcs x13,x13,x16 + mul x16,x8,x6 // a[3]*a[1] + umulh x17,x8,x6 + adc x19,x19,xzr // can't overflow + + mul x20,x8,x7 // a[3]*a[2] + umulh x21,x8,x7 + + adds x15,x15,x16 // accumulate high parts of multiplication + mul x10,x5,x5 // a[0]*a[0] + adc x16,x17,xzr // can't overflow + + adds x13,x13,x14 // accumulate low parts of multiplication + umulh x5,x5,x5 + adcs x19,x19,x15 + mul x15,x6,x6 // a[1]*a[1] + adcs x20,x20,x16 + umulh x6,x6,x6 + adc x21,x21,xzr // can't overflow + + adds x11,x11,x11 // acc[1-6]*=2 + mul x16,x7,x7 // a[2]*a[2] + adcs x12,x12,x12 + umulh x7,x7,x7 + adcs x13,x13,x13 + mul x17,x8,x8 // a[3]*a[3] + adcs x19,x19,x19 + umulh x8,x8,x8 + adcs x20,x20,x20 + adcs x21,x21,x21 + adc x22,xzr,xzr + + adds x11,x11,x5 // +a[i]*a[i] + adcs x12,x12,x15 + adcs x13,x13,x6 + adcs x19,x19,x16 + adcs x20,x20,x7 + adcs x21,x21,x17 + adc x22,x22,x8 + + bl __mul_by_1_mont_256 + ldr x30,[x29,#8] + + adds x10,x10,x19 // accumulate upper half + adcs x11,x11,x20 + adcs x12,x12,x21 + adcs x13,x13,x22 + adc x19,xzr,xzr + + subs x14,x10,x5 + sbcs x15,x11,x6 + sbcs x16,x12,x7 + sbcs x17,x13,x8 + sbcs xzr, x19,xzr + + csel x10,x10,x14,lo + csel x11,x11,x15,lo + csel x12,x12,x16,lo + csel x13,x13,x17,lo + + stp x10,x11,[x0] + stp x12,x13,[x0,#16] + + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldr x29,[sp],#48 +.long 3573752767 + ret + +.globl _from_mont_256 +.private_extern _from_mont_256 + +.align 5 +_from_mont_256: +.long 3573752639 + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + + mov x4,x3 + ldp x10,x11,[x1] + ldp x12,x13,[x1,#16] + + bl __mul_by_1_mont_256 + ldr x30,[x29,#8] + + subs x14,x10,x5 + sbcs x15,x11,x6 + sbcs x16,x12,x7 + sbcs x17,x13,x8 + + csel x10,x10,x14,lo + csel x11,x11,x15,lo + csel x12,x12,x16,lo + csel x13,x13,x17,lo + + stp x10,x11,[x0] + stp x12,x13,[x0,#16] + + ldr x29,[sp],#16 +.long 3573752767 + ret + + +.globl _redc_mont_256 +.private_extern _redc_mont_256 + +.align 5 +_redc_mont_256: +.long 3573752639 + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + + mov x4,x3 + ldp x10,x11,[x1] + ldp x12,x13,[x1,#16] + + bl __mul_by_1_mont_256 + ldr x30,[x29,#8] + + ldp x14,x15,[x1,#32] + ldp x16,x17,[x1,#48] + + adds x10,x10,x14 + adcs x11,x11,x15 + adcs x12,x12,x16 + adcs x13,x13,x17 + adc x9,xzr,xzr + + subs x14,x10,x5 + sbcs x15,x11,x6 + sbcs x16,x12,x7 + sbcs x17,x13,x8 + sbcs xzr, x9,xzr + + csel x10,x10,x14,lo + csel x11,x11,x15,lo + csel x12,x12,x16,lo + csel x13,x13,x17,lo + + stp x10,x11,[x0] + stp x12,x13,[x0,#16] + + ldr x29,[sp],#16 +.long 3573752767 + ret + + + +.align 5 +__mul_by_1_mont_256: + mul x3,x4,x10 + ldp x5,x6,[x2] + ldp x7,x8,[x2,#16] + //mul x14,x5,x3 + mul x15,x6,x3 + mul x16,x7,x3 + mul x17,x8,x3 + subs xzr,x10,#1 //adds x10,x10,x14 + umulh x14,x5,x3 + adcs x11,x11,x15 + umulh x15,x6,x3 + adcs x12,x12,x16 + umulh x16,x7,x3 + adcs x13,x13,x17 + umulh x17,x8,x3 + adc x9,xzr,xzr + + adds x10,x11,x14 + adcs x11,x12,x15 + adcs x12,x13,x16 + mul x3,x4,x10 + adc x13,x9,x17 + //mul x14,x5,x3 + mul x15,x6,x3 + mul x16,x7,x3 + mul x17,x8,x3 + subs xzr,x10,#1 //adds x10,x10,x14 + umulh x14,x5,x3 + adcs x11,x11,x15 + umulh x15,x6,x3 + adcs x12,x12,x16 + umulh x16,x7,x3 + adcs x13,x13,x17 + umulh x17,x8,x3 + adc x9,xzr,xzr + + adds x10,x11,x14 + adcs x11,x12,x15 + adcs x12,x13,x16 + mul x3,x4,x10 + adc x13,x9,x17 + //mul x14,x5,x3 + mul x15,x6,x3 + mul x16,x7,x3 + mul x17,x8,x3 + subs xzr,x10,#1 //adds x10,x10,x14 + umulh x14,x5,x3 + adcs x11,x11,x15 + umulh x15,x6,x3 + adcs x12,x12,x16 + umulh x16,x7,x3 + adcs x13,x13,x17 + umulh x17,x8,x3 + adc x9,xzr,xzr + + adds x10,x11,x14 + adcs x11,x12,x15 + adcs x12,x13,x16 + mul x3,x4,x10 + adc x13,x9,x17 + //mul x14,x5,x3 + mul x15,x6,x3 + mul x16,x7,x3 + mul x17,x8,x3 + subs xzr,x10,#1 //adds x10,x10,x14 + umulh x14,x5,x3 + adcs x11,x11,x15 + umulh x15,x6,x3 + adcs x12,x12,x16 + umulh x16,x7,x3 + adcs x13,x13,x17 + umulh x17,x8,x3 + adc x9,xzr,xzr + + adds x10,x11,x14 + adcs x11,x12,x15 + adcs x12,x13,x16 + adc x13,x9,x17 + + ret + diff --git a/crypto/blst_src/build/mach-o/mul_mont_384-armv8.S b/crypto/blst_src/build/mach-o/mul_mont_384-armv8.S new file mode 100644 index 00000000000..5aa2e9f3ae7 --- /dev/null +++ b/crypto/blst_src/build/mach-o/mul_mont_384-armv8.S @@ -0,0 +1,2372 @@ +.text + +.globl _add_mod_384x384 + +.align 5 +_add_mod_384x384: +.long 3573752639 + stp x29,x30,[sp,#-64]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + + ldp x5,x6,[x3] + ldp x7,x8,[x3,#16] + ldp x9,x10,[x3,#32] + + bl __add_mod_384x384 + ldr x30,[x29,#8] + + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldr x29,[sp],#64 +.long 3573752767 + ret + + + +.align 5 +__add_mod_384x384: + ldp x11, x12, [x1] + ldp x19,x20,[x2] + ldp x13, x14, [x1,#16] + adds x11,x11,x19 + ldp x21,x22,[x2,#16] + adcs x12,x12,x20 + ldp x15, x16, [x1,#32] + adcs x13,x13,x21 + ldp x23,x24,[x2,#32] + adcs x14,x14,x22 + stp x11, x12, [x0] + adcs x15,x15,x23 + ldp x11, x12, [x1,#48] + adcs x16,x16,x24 + + ldp x19,x20,[x2,#48] + stp x13, x14, [x0,#16] + ldp x13, x14, [x1,#64] + ldp x21,x22,[x2,#64] + + adcs x11,x11,x19 + stp x15, x16, [x0,#32] + adcs x12,x12,x20 + ldp x15, x16, [x1,#80] + adcs x13,x13,x21 + ldp x23,x24,[x2,#80] + adcs x14,x14,x22 + adcs x15,x15,x23 + adcs x16,x16,x24 + adc x17,xzr,xzr + + subs x19,x11,x5 + sbcs x20,x12,x6 + sbcs x21,x13,x7 + sbcs x22,x14,x8 + sbcs x23,x15,x9 + sbcs x24,x16,x10 + sbcs xzr,x17,xzr + + csel x11,x11,x19,lo + csel x12,x12,x20,lo + csel x13,x13,x21,lo + csel x14,x14,x22,lo + stp x11,x12,[x0,#48] + csel x15,x15,x23,lo + stp x13,x14,[x0,#64] + csel x16,x16,x24,lo + stp x15,x16,[x0,#80] + + ret + + +.globl _sub_mod_384x384 + +.align 5 +_sub_mod_384x384: +.long 3573752639 + stp x29,x30,[sp,#-64]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + + ldp x5,x6,[x3] + ldp x7,x8,[x3,#16] + ldp x9,x10,[x3,#32] + + bl __sub_mod_384x384 + ldr x30,[x29,#8] + + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldr x29,[sp],#64 +.long 3573752767 + ret + + + +.align 5 +__sub_mod_384x384: + ldp x11, x12, [x1] + ldp x19,x20,[x2] + ldp x13, x14, [x1,#16] + subs x11,x11,x19 + ldp x21,x22,[x2,#16] + sbcs x12,x12,x20 + ldp x15, x16, [x1,#32] + sbcs x13,x13,x21 + ldp x23,x24,[x2,#32] + sbcs x14,x14,x22 + stp x11, x12, [x0] + sbcs x15,x15,x23 + ldp x11, x12, [x1,#48] + sbcs x16,x16,x24 + + ldp x19,x20,[x2,#48] + stp x13, x14, [x0,#16] + ldp x13, x14, [x1,#64] + ldp x21,x22,[x2,#64] + + sbcs x11,x11,x19 + stp x15, x16, [x0,#32] + sbcs x12,x12,x20 + ldp x15, x16, [x1,#80] + sbcs x13,x13,x21 + ldp x23,x24,[x2,#80] + sbcs x14,x14,x22 + sbcs x15,x15,x23 + sbcs x16,x16,x24 + sbc x17,xzr,xzr + + and x19,x5,x17 + and x20,x6,x17 + adds x11,x11,x19 + and x21,x7,x17 + adcs x12,x12,x20 + and x22,x8,x17 + adcs x13,x13,x21 + and x23,x9,x17 + adcs x14,x14,x22 + and x24,x10,x17 + adcs x15,x15,x23 + stp x11,x12,[x0,#48] + adc x16,x16,x24 + stp x13,x14,[x0,#64] + stp x15,x16,[x0,#80] + + ret + + + +.align 5 +__add_mod_384: + ldp x11, x12, [x1] + ldp x19,x20,[x2] + ldp x13, x14, [x1,#16] + adds x11,x11,x19 + ldp x21,x22,[x2,#16] + adcs x12,x12,x20 + ldp x15, x16, [x1,#32] + adcs x13,x13,x21 + ldp x23,x24,[x2,#32] + adcs x14,x14,x22 + adcs x15,x15,x23 + adcs x16,x16,x24 + adc x17,xzr,xzr + + subs x19,x11,x5 + sbcs x20,x12,x6 + sbcs x21,x13,x7 + sbcs x22,x14,x8 + sbcs x23,x15,x9 + sbcs x24,x16,x10 + sbcs xzr,x17,xzr + + csel x11,x11,x19,lo + csel x12,x12,x20,lo + csel x13,x13,x21,lo + csel x14,x14,x22,lo + csel x15,x15,x23,lo + stp x11,x12,[x0] + csel x16,x16,x24,lo + stp x13,x14,[x0,#16] + stp x15,x16,[x0,#32] + + ret + + + +.align 5 +__sub_mod_384: + ldp x11, x12, [x1] + ldp x19,x20,[x2] + ldp x13, x14, [x1,#16] + subs x11,x11,x19 + ldp x21,x22,[x2,#16] + sbcs x12,x12,x20 + ldp x15, x16, [x1,#32] + sbcs x13,x13,x21 + ldp x23,x24,[x2,#32] + sbcs x14,x14,x22 + sbcs x15,x15,x23 + sbcs x16,x16,x24 + sbc x17,xzr,xzr + + and x19,x5,x17 + and x20,x6,x17 + adds x11,x11,x19 + and x21,x7,x17 + adcs x12,x12,x20 + and x22,x8,x17 + adcs x13,x13,x21 + and x23,x9,x17 + adcs x14,x14,x22 + and x24,x10,x17 + adcs x15,x15,x23 + stp x11,x12,[x0] + adc x16,x16,x24 + stp x13,x14,[x0,#16] + stp x15,x16,[x0,#32] + + ret + + +.globl _mul_mont_384x +.private_extern _mul_mont_384x + +.align 5 +_mul_mont_384x: +.long 3573752639 + stp x29,x30,[sp,#-128]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + sub sp,sp,#288 // space for 3 768-bit vectors + + mov x26,x0 // save r_ptr + mov x27,x1 // save b_ptr + mov x28,x2 // save b_ptr + + sub x0,sp,#0 // mul_384(t0, a->re, b->re) + bl __mul_384 + + add x1,x1,#48 // mul_384(t1, a->im, b->im) + add x2,x2,#48 + add x0,sp,#96 + bl __mul_384 + + ldp x5,x6,[x3] + ldp x7,x8,[x3,#16] + ldp x9,x10,[x3,#32] + + sub x2,x1,#48 + add x0,sp,#240 + bl __add_mod_384 + + add x1,x28,#0 + add x2,x28,#48 + add x0,sp,#192 // t2 + bl __add_mod_384 + + add x1,x0,#0 + add x2,x0,#48 + bl __mul_384 // mul_384(t2, a->re+a->im, b->re+b->im) + + ldp x5,x6,[x3] + ldp x7,x8,[x3,#16] + ldp x9,x10,[x3,#32] + + mov x1,x0 + add x2,sp,#0 + bl __sub_mod_384x384 + + add x2,sp,#96 + bl __sub_mod_384x384 // t2 = t2-t0-t1 + + add x1,sp,#0 + add x2,sp,#96 + add x0,sp,#0 + bl __sub_mod_384x384 // t0 = t0-t1 + + add x1,sp,#0 // ret->re = redc(t0) + add x0,x26,#0 + bl __mul_by_1_mont_384 + bl __redc_tail_mont_384 + + add x1,sp,#192 // ret->im = redc(t2) + add x0,x0,#48 + bl __mul_by_1_mont_384 + bl __redc_tail_mont_384 + ldr x30,[x29,#8] + + add sp,sp,#288 + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldr x29,[sp],#128 +.long 3573752767 + ret + + +.globl _sqr_mont_384x +.private_extern _sqr_mont_384x + +.align 5 +_sqr_mont_384x: +.long 3573752639 + stp x29,x30,[sp,#-128]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + stp x3,x0,[sp,#96] // __mul_mont_384 wants them there + sub sp,sp,#96 // space for 2 384-bit vectors + mov x4,x3 // adjust for missing b_ptr + + ldp x5,x6,[x2] + ldp x7,x8,[x2,#16] + ldp x9,x10,[x2,#32] + + add x2,x1,#48 + add x0,sp,#0 + bl __add_mod_384 // t0 = a->re + a->im + + add x0,sp,#48 + bl __sub_mod_384 // t1 = a->re - a->im + + ldp x11,x12,[x1] + ldr x17, [x2] + ldp x13,x14,[x1,#16] + ldp x15,x16,[x1,#32] + + bl __mul_mont_384 // mul_mont_384(ret->im, a->re, a->im) + + adds x11,x11,x11 // add with itself + adcs x12,x12,x12 + adcs x13,x13,x13 + adcs x14,x14,x14 + adcs x15,x15,x15 + adcs x16,x16,x16 + adc x25,xzr,xzr + + subs x19,x11,x5 + sbcs x20,x12,x6 + sbcs x21,x13,x7 + sbcs x22,x14,x8 + sbcs x23,x15,x9 + sbcs x24,x16,x10 + sbcs xzr,x25,xzr + + csel x19,x11,x19,lo + csel x20,x12,x20,lo + csel x21,x13,x21,lo + ldp x11,x12,[sp] + csel x22,x14,x22,lo + ldr x17, [sp,#48] + csel x23,x15,x23,lo + ldp x13,x14,[sp,#16] + csel x24,x16,x24,lo + ldp x15,x16,[sp,#32] + + stp x19,x20,[x2,#48] + stp x21,x22,[x2,#64] + stp x23,x24,[x2,#80] + + add x2,sp,#48 + bl __mul_mont_384 // mul_mont_384(ret->re, t0, t1) + ldr x30,[x29,#8] + + stp x11,x12,[x2] + stp x13,x14,[x2,#16] + stp x15,x16,[x2,#32] + + add sp,sp,#96 + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldr x29,[sp],#128 +.long 3573752767 + ret + + +.globl _mul_mont_384 +.private_extern _mul_mont_384 + +.align 5 +_mul_mont_384: +.long 3573752639 + stp x29,x30,[sp,#-128]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + stp x4,x0,[sp,#96] // __mul_mont_384 wants them there + + ldp x11,x12,[x1] + ldr x17, [x2] + ldp x13,x14,[x1,#16] + ldp x15,x16,[x1,#32] + + ldp x5,x6,[x3] + ldp x7,x8,[x3,#16] + ldp x9,x10,[x3,#32] + + bl __mul_mont_384 + ldr x30,[x29,#8] + + stp x11,x12,[x2] + stp x13,x14,[x2,#16] + stp x15,x16,[x2,#32] + + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldr x29,[sp],#128 +.long 3573752767 + ret + + + +.align 5 +__mul_mont_384: + mul x19,x11,x17 + mul x20,x12,x17 + mul x21,x13,x17 + mul x22,x14,x17 + mul x23,x15,x17 + mul x24,x16,x17 + mul x4,x4,x19 + + umulh x26,x11,x17 + umulh x27,x12,x17 + umulh x28,x13,x17 + umulh x0,x14,x17 + umulh x1,x15,x17 + umulh x3,x16,x17 + + adds x20,x20,x26 + // mul x26,x5,x4 + adcs x21,x21,x27 + mul x27,x6,x4 + adcs x22,x22,x28 + mul x28,x7,x4 + adcs x23,x23,x0 + mul x0,x8,x4 + adcs x24,x24,x1 + mul x1,x9,x4 + adc x25,xzr, x3 + mul x3,x10,x4 + mov x17,xzr + subs xzr,x19,#1 // adds x19,x19,x26 + umulh x26,x5,x4 + adcs x20,x20,x27 + umulh x27,x6,x4 + adcs x21,x21,x28 + umulh x28,x7,x4 + adcs x22,x22,x0 + umulh x0,x8,x4 + adcs x23,x23,x1 + umulh x1,x9,x4 + adcs x24,x24,x3 + umulh x3,x10,x4 + adcs x25,x25,xzr + adc x4,x17,xzr + ldr x17,[x2,8*1] + + adds x19,x20,x26 + mul x26,x11,x17 + adcs x20,x21,x27 + mul x27,x12,x17 + adcs x21,x22,x28 + mul x28,x13,x17 + adcs x22,x23,x0 + mul x0,x14,x17 + adcs x23,x24,x1 + mul x1,x15,x17 + adcs x24,x25,x3 + mul x3,x16,x17 + adc x25,x4,xzr + ldr x4,[x29,#96] + + adds x19,x19,x26 + umulh x26,x11,x17 + adcs x20,x20,x27 + umulh x27,x12,x17 + adcs x21,x21,x28 + mul x4,x4,x19 + umulh x28,x13,x17 + adcs x22,x22,x0 + umulh x0,x14,x17 + adcs x23,x23,x1 + umulh x1,x15,x17 + adcs x24,x24,x3 + umulh x3,x16,x17 + adcs x25,x25,xzr + adc x17,xzr,xzr + + adds x20,x20,x26 + // mul x26,x5,x4 + adcs x21,x21,x27 + mul x27,x6,x4 + adcs x22,x22,x28 + mul x28,x7,x4 + adcs x23,x23,x0 + mul x0,x8,x4 + adcs x24,x24,x1 + mul x1,x9,x4 + adcs x25,x25,x3 + mul x3,x10,x4 + adc x17,x17,xzr + subs xzr,x19,#1 // adds x19,x19,x26 + umulh x26,x5,x4 + adcs x20,x20,x27 + umulh x27,x6,x4 + adcs x21,x21,x28 + umulh x28,x7,x4 + adcs x22,x22,x0 + umulh x0,x8,x4 + adcs x23,x23,x1 + umulh x1,x9,x4 + adcs x24,x24,x3 + umulh x3,x10,x4 + adcs x25,x25,xzr + adc x4,x17,xzr + ldr x17,[x2,8*2] + + adds x19,x20,x26 + mul x26,x11,x17 + adcs x20,x21,x27 + mul x27,x12,x17 + adcs x21,x22,x28 + mul x28,x13,x17 + adcs x22,x23,x0 + mul x0,x14,x17 + adcs x23,x24,x1 + mul x1,x15,x17 + adcs x24,x25,x3 + mul x3,x16,x17 + adc x25,x4,xzr + ldr x4,[x29,#96] + + adds x19,x19,x26 + umulh x26,x11,x17 + adcs x20,x20,x27 + umulh x27,x12,x17 + adcs x21,x21,x28 + mul x4,x4,x19 + umulh x28,x13,x17 + adcs x22,x22,x0 + umulh x0,x14,x17 + adcs x23,x23,x1 + umulh x1,x15,x17 + adcs x24,x24,x3 + umulh x3,x16,x17 + adcs x25,x25,xzr + adc x17,xzr,xzr + + adds x20,x20,x26 + // mul x26,x5,x4 + adcs x21,x21,x27 + mul x27,x6,x4 + adcs x22,x22,x28 + mul x28,x7,x4 + adcs x23,x23,x0 + mul x0,x8,x4 + adcs x24,x24,x1 + mul x1,x9,x4 + adcs x25,x25,x3 + mul x3,x10,x4 + adc x17,x17,xzr + subs xzr,x19,#1 // adds x19,x19,x26 + umulh x26,x5,x4 + adcs x20,x20,x27 + umulh x27,x6,x4 + adcs x21,x21,x28 + umulh x28,x7,x4 + adcs x22,x22,x0 + umulh x0,x8,x4 + adcs x23,x23,x1 + umulh x1,x9,x4 + adcs x24,x24,x3 + umulh x3,x10,x4 + adcs x25,x25,xzr + adc x4,x17,xzr + ldr x17,[x2,8*3] + + adds x19,x20,x26 + mul x26,x11,x17 + adcs x20,x21,x27 + mul x27,x12,x17 + adcs x21,x22,x28 + mul x28,x13,x17 + adcs x22,x23,x0 + mul x0,x14,x17 + adcs x23,x24,x1 + mul x1,x15,x17 + adcs x24,x25,x3 + mul x3,x16,x17 + adc x25,x4,xzr + ldr x4,[x29,#96] + + adds x19,x19,x26 + umulh x26,x11,x17 + adcs x20,x20,x27 + umulh x27,x12,x17 + adcs x21,x21,x28 + mul x4,x4,x19 + umulh x28,x13,x17 + adcs x22,x22,x0 + umulh x0,x14,x17 + adcs x23,x23,x1 + umulh x1,x15,x17 + adcs x24,x24,x3 + umulh x3,x16,x17 + adcs x25,x25,xzr + adc x17,xzr,xzr + + adds x20,x20,x26 + // mul x26,x5,x4 + adcs x21,x21,x27 + mul x27,x6,x4 + adcs x22,x22,x28 + mul x28,x7,x4 + adcs x23,x23,x0 + mul x0,x8,x4 + adcs x24,x24,x1 + mul x1,x9,x4 + adcs x25,x25,x3 + mul x3,x10,x4 + adc x17,x17,xzr + subs xzr,x19,#1 // adds x19,x19,x26 + umulh x26,x5,x4 + adcs x20,x20,x27 + umulh x27,x6,x4 + adcs x21,x21,x28 + umulh x28,x7,x4 + adcs x22,x22,x0 + umulh x0,x8,x4 + adcs x23,x23,x1 + umulh x1,x9,x4 + adcs x24,x24,x3 + umulh x3,x10,x4 + adcs x25,x25,xzr + adc x4,x17,xzr + ldr x17,[x2,8*4] + + adds x19,x20,x26 + mul x26,x11,x17 + adcs x20,x21,x27 + mul x27,x12,x17 + adcs x21,x22,x28 + mul x28,x13,x17 + adcs x22,x23,x0 + mul x0,x14,x17 + adcs x23,x24,x1 + mul x1,x15,x17 + adcs x24,x25,x3 + mul x3,x16,x17 + adc x25,x4,xzr + ldr x4,[x29,#96] + + adds x19,x19,x26 + umulh x26,x11,x17 + adcs x20,x20,x27 + umulh x27,x12,x17 + adcs x21,x21,x28 + mul x4,x4,x19 + umulh x28,x13,x17 + adcs x22,x22,x0 + umulh x0,x14,x17 + adcs x23,x23,x1 + umulh x1,x15,x17 + adcs x24,x24,x3 + umulh x3,x16,x17 + adcs x25,x25,xzr + adc x17,xzr,xzr + + adds x20,x20,x26 + // mul x26,x5,x4 + adcs x21,x21,x27 + mul x27,x6,x4 + adcs x22,x22,x28 + mul x28,x7,x4 + adcs x23,x23,x0 + mul x0,x8,x4 + adcs x24,x24,x1 + mul x1,x9,x4 + adcs x25,x25,x3 + mul x3,x10,x4 + adc x17,x17,xzr + subs xzr,x19,#1 // adds x19,x19,x26 + umulh x26,x5,x4 + adcs x20,x20,x27 + umulh x27,x6,x4 + adcs x21,x21,x28 + umulh x28,x7,x4 + adcs x22,x22,x0 + umulh x0,x8,x4 + adcs x23,x23,x1 + umulh x1,x9,x4 + adcs x24,x24,x3 + umulh x3,x10,x4 + adcs x25,x25,xzr + adc x4,x17,xzr + ldr x17,[x2,8*5] + + adds x19,x20,x26 + mul x26,x11,x17 + adcs x20,x21,x27 + mul x27,x12,x17 + adcs x21,x22,x28 + mul x28,x13,x17 + adcs x22,x23,x0 + mul x0,x14,x17 + adcs x23,x24,x1 + mul x1,x15,x17 + adcs x24,x25,x3 + mul x3,x16,x17 + adc x25,x4,xzr + ldr x4,[x29,#96] + + adds x19,x19,x26 + umulh x26,x11,x17 + adcs x20,x20,x27 + umulh x27,x12,x17 + adcs x21,x21,x28 + mul x4,x4,x19 + umulh x28,x13,x17 + adcs x22,x22,x0 + umulh x0,x14,x17 + adcs x23,x23,x1 + umulh x1,x15,x17 + adcs x24,x24,x3 + umulh x3,x16,x17 + adcs x25,x25,xzr + adc x17,xzr,xzr + + adds x20,x20,x26 + // mul x26,x5,x4 + adcs x21,x21,x27 + mul x27,x6,x4 + adcs x22,x22,x28 + mul x28,x7,x4 + adcs x23,x23,x0 + mul x0,x8,x4 + adcs x24,x24,x1 + mul x1,x9,x4 + adcs x25,x25,x3 + mul x3,x10,x4 + adc x17,x17,xzr + subs xzr,x19,#1 // adds x19,x19,x26 + umulh x26,x5,x4 + adcs x20,x20,x27 + umulh x27,x6,x4 + adcs x21,x21,x28 + umulh x28,x7,x4 + adcs x22,x22,x0 + umulh x0,x8,x4 + adcs x23,x23,x1 + umulh x1,x9,x4 + adcs x24,x24,x3 + umulh x3,x10,x4 + adcs x25,x25,xzr + ldp x4,x2,[x29,#96] // pull r_ptr + adc x17,x17,xzr + + adds x19,x20,x26 + adcs x20,x21,x27 + adcs x21,x22,x28 + adcs x22,x23,x0 + adcs x23,x24,x1 + adcs x24,x25,x3 + adc x25,x17,xzr + + subs x26,x19,x5 + sbcs x27,x20,x6 + sbcs x28,x21,x7 + sbcs x0,x22,x8 + sbcs x1,x23,x9 + sbcs x3,x24,x10 + sbcs xzr, x25,xzr + + csel x11,x19,x26,lo + csel x12,x20,x27,lo + csel x13,x21,x28,lo + csel x14,x22,x0,lo + csel x15,x23,x1,lo + csel x16,x24,x3,lo + ret + + +.globl _sqr_mont_384 +.private_extern _sqr_mont_384 + +.align 5 +_sqr_mont_384: +.long 3573752639 + stp x29,x30,[sp,#-128]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + sub sp,sp,#96 // space for 768-bit vector + mov x4,x3 // adjust for missing b_ptr + + mov x3,x0 // save r_ptr + mov x0,sp + + ldp x11,x12,[x1] + ldp x13,x14,[x1,#16] + ldp x15,x16,[x1,#32] + + bl __sqr_384 + + ldp x5,x6,[x2] + ldp x7,x8,[x2,#16] + ldp x9,x10,[x2,#32] + + mov x1,sp + mov x0,x3 // restore r_ptr + bl __mul_by_1_mont_384 + bl __redc_tail_mont_384 + ldr x30,[x29,#8] + + add sp,sp,#96 + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldr x29,[sp],#128 +.long 3573752767 + ret + + +.globl _sqr_n_mul_mont_383 +.private_extern _sqr_n_mul_mont_383 + +.align 5 +_sqr_n_mul_mont_383: +.long 3573752639 + stp x29,x30,[sp,#-128]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + stp x4,x0,[sp,#96] // __mul_mont_384 wants them there + sub sp,sp,#96 // space for 768-bit vector + mov x17,x5 // save b_ptr + + ldp x11,x12,[x1] + ldp x13,x14,[x1,#16] + ldp x15,x16,[x1,#32] + mov x0,sp +Loop_sqr_383: + bl __sqr_384 + sub x2,x2,#1 // counter + + ldp x5,x6,[x3] + ldp x7,x8,[x3,#16] + ldp x9,x10,[x3,#32] + + mov x1,sp + bl __mul_by_1_mont_384 + + ldp x19,x20,[x1,#48] + ldp x21,x22,[x1,#64] + ldp x23,x24,[x1,#80] + + adds x11,x11,x19 // just accumulate upper half + adcs x12,x12,x20 + adcs x13,x13,x21 + adcs x14,x14,x22 + adcs x15,x15,x23 + adc x16,x16,x24 + + cbnz x2,Loop_sqr_383 + + mov x2,x17 + ldr x17,[x17] + bl __mul_mont_384 + ldr x30,[x29,#8] + + stp x11,x12,[x2] + stp x13,x14,[x2,#16] + stp x15,x16,[x2,#32] + + add sp,sp,#96 + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldr x29,[sp],#128 +.long 3573752767 + ret + + +.align 5 +__sqr_384: + mul x19,x12,x11 + mul x20,x13,x11 + mul x21,x14,x11 + mul x22,x15,x11 + mul x23,x16,x11 + + umulh x6,x12,x11 + umulh x7,x13,x11 + umulh x8,x14,x11 + umulh x9,x15,x11 + adds x20,x20,x6 + umulh x10,x16,x11 + adcs x21,x21,x7 + mul x7,x13,x12 + adcs x22,x22,x8 + mul x8,x14,x12 + adcs x23,x23,x9 + mul x9,x15,x12 + adc x24,xzr, x10 + mul x10,x16,x12 + + adds x21,x21,x7 + umulh x7,x13,x12 + adcs x22,x22,x8 + umulh x8,x14,x12 + adcs x23,x23,x9 + umulh x9,x15,x12 + adcs x24,x24,x10 + umulh x10,x16,x12 + adc x25,xzr,xzr + + mul x5,x11,x11 + adds x22,x22,x7 + umulh x11, x11,x11 + adcs x23,x23,x8 + mul x8,x14,x13 + adcs x24,x24,x9 + mul x9,x15,x13 + adc x25,x25,x10 + mul x10,x16,x13 + + adds x23,x23,x8 + umulh x8,x14,x13 + adcs x24,x24,x9 + umulh x9,x15,x13 + adcs x25,x25,x10 + umulh x10,x16,x13 + adc x26,xzr,xzr + + mul x6,x12,x12 + adds x24,x24,x8 + umulh x12, x12,x12 + adcs x25,x25,x9 + mul x9,x15,x14 + adc x26,x26,x10 + mul x10,x16,x14 + + adds x25,x25,x9 + umulh x9,x15,x14 + adcs x26,x26,x10 + umulh x10,x16,x14 + adc x27,xzr,xzr + mul x7,x13,x13 + adds x26,x26,x9 + umulh x13, x13,x13 + adc x27,x27,x10 + mul x8,x14,x14 + + mul x10,x16,x15 + umulh x14, x14,x14 + adds x27,x27,x10 + umulh x10,x16,x15 + mul x9,x15,x15 + adc x28,x10,xzr + + adds x19,x19,x19 + adcs x20,x20,x20 + adcs x21,x21,x21 + adcs x22,x22,x22 + adcs x23,x23,x23 + adcs x24,x24,x24 + adcs x25,x25,x25 + adcs x26,x26,x26 + umulh x15, x15,x15 + adcs x27,x27,x27 + mul x10,x16,x16 + adcs x28,x28,x28 + umulh x16, x16,x16 + adc x1,xzr,xzr + + adds x19,x19,x11 + adcs x20,x20,x6 + adcs x21,x21,x12 + adcs x22,x22,x7 + adcs x23,x23,x13 + adcs x24,x24,x8 + adcs x25,x25,x14 + stp x5,x19,[x0] + adcs x26,x26,x9 + stp x20,x21,[x0,#16] + adcs x27,x27,x15 + stp x22,x23,[x0,#32] + adcs x28,x28,x10 + stp x24,x25,[x0,#48] + adc x16,x16,x1 + stp x26,x27,[x0,#64] + stp x28,x16,[x0,#80] + + ret + +.globl _sqr_384 +.private_extern _sqr_384 + +.align 5 +_sqr_384: +.long 3573752639 + stp x29,x30,[sp,#-128]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + + ldp x11,x12,[x1] + ldp x13,x14,[x1,#16] + ldp x15,x16,[x1,#32] + + bl __sqr_384 + ldr x30,[x29,#8] + + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldr x29,[sp],#128 +.long 3573752767 + ret + + +.globl _redc_mont_384 +.private_extern _redc_mont_384 + +.align 5 +_redc_mont_384: +.long 3573752639 + stp x29,x30,[sp,#-128]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + mov x4,x3 // adjust for missing b_ptr + + ldp x5,x6,[x2] + ldp x7,x8,[x2,#16] + ldp x9,x10,[x2,#32] + + bl __mul_by_1_mont_384 + bl __redc_tail_mont_384 + ldr x30,[x29,#8] + + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldr x29,[sp],#128 +.long 3573752767 + ret + + +.globl _from_mont_384 +.private_extern _from_mont_384 + +.align 5 +_from_mont_384: +.long 3573752639 + stp x29,x30,[sp,#-128]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + mov x4,x3 // adjust for missing b_ptr + + ldp x5,x6,[x2] + ldp x7,x8,[x2,#16] + ldp x9,x10,[x2,#32] + + bl __mul_by_1_mont_384 + ldr x30,[x29,#8] + + subs x19,x11,x5 + sbcs x20,x12,x6 + sbcs x21,x13,x7 + sbcs x22,x14,x8 + sbcs x23,x15,x9 + sbcs x24,x16,x10 + + csel x11,x11,x19,lo + csel x12,x12,x20,lo + csel x13,x13,x21,lo + csel x14,x14,x22,lo + csel x15,x15,x23,lo + csel x16,x16,x24,lo + + stp x11,x12,[x0] + stp x13,x14,[x0,#16] + stp x15,x16,[x0,#32] + + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldr x29,[sp],#128 +.long 3573752767 + ret + + + +.align 5 +__mul_by_1_mont_384: + ldp x11,x12,[x1] + ldp x13,x14,[x1,#16] + mul x26,x4,x11 + ldp x15,x16,[x1,#32] + + // mul x19,x5,x26 + mul x20,x6,x26 + mul x21,x7,x26 + mul x22,x8,x26 + mul x23,x9,x26 + mul x24,x10,x26 + subs xzr,x11,#1 // adds x19,x19,x11 + umulh x11,x5,x26 + adcs x20,x20,x12 + umulh x12,x6,x26 + adcs x21,x21,x13 + umulh x13,x7,x26 + adcs x22,x22,x14 + umulh x14,x8,x26 + adcs x23,x23,x15 + umulh x15,x9,x26 + adcs x24,x24,x16 + umulh x16,x10,x26 + adc x25,xzr,xzr + adds x11,x11,x20 + adcs x12,x12,x21 + adcs x13,x13,x22 + mul x26,x4,x11 + adcs x14,x14,x23 + adcs x15,x15,x24 + adc x16,x16,x25 + + // mul x19,x5,x26 + mul x20,x6,x26 + mul x21,x7,x26 + mul x22,x8,x26 + mul x23,x9,x26 + mul x24,x10,x26 + subs xzr,x11,#1 // adds x19,x19,x11 + umulh x11,x5,x26 + adcs x20,x20,x12 + umulh x12,x6,x26 + adcs x21,x21,x13 + umulh x13,x7,x26 + adcs x22,x22,x14 + umulh x14,x8,x26 + adcs x23,x23,x15 + umulh x15,x9,x26 + adcs x24,x24,x16 + umulh x16,x10,x26 + adc x25,xzr,xzr + adds x11,x11,x20 + adcs x12,x12,x21 + adcs x13,x13,x22 + mul x26,x4,x11 + adcs x14,x14,x23 + adcs x15,x15,x24 + adc x16,x16,x25 + + // mul x19,x5,x26 + mul x20,x6,x26 + mul x21,x7,x26 + mul x22,x8,x26 + mul x23,x9,x26 + mul x24,x10,x26 + subs xzr,x11,#1 // adds x19,x19,x11 + umulh x11,x5,x26 + adcs x20,x20,x12 + umulh x12,x6,x26 + adcs x21,x21,x13 + umulh x13,x7,x26 + adcs x22,x22,x14 + umulh x14,x8,x26 + adcs x23,x23,x15 + umulh x15,x9,x26 + adcs x24,x24,x16 + umulh x16,x10,x26 + adc x25,xzr,xzr + adds x11,x11,x20 + adcs x12,x12,x21 + adcs x13,x13,x22 + mul x26,x4,x11 + adcs x14,x14,x23 + adcs x15,x15,x24 + adc x16,x16,x25 + + // mul x19,x5,x26 + mul x20,x6,x26 + mul x21,x7,x26 + mul x22,x8,x26 + mul x23,x9,x26 + mul x24,x10,x26 + subs xzr,x11,#1 // adds x19,x19,x11 + umulh x11,x5,x26 + adcs x20,x20,x12 + umulh x12,x6,x26 + adcs x21,x21,x13 + umulh x13,x7,x26 + adcs x22,x22,x14 + umulh x14,x8,x26 + adcs x23,x23,x15 + umulh x15,x9,x26 + adcs x24,x24,x16 + umulh x16,x10,x26 + adc x25,xzr,xzr + adds x11,x11,x20 + adcs x12,x12,x21 + adcs x13,x13,x22 + mul x26,x4,x11 + adcs x14,x14,x23 + adcs x15,x15,x24 + adc x16,x16,x25 + + // mul x19,x5,x26 + mul x20,x6,x26 + mul x21,x7,x26 + mul x22,x8,x26 + mul x23,x9,x26 + mul x24,x10,x26 + subs xzr,x11,#1 // adds x19,x19,x11 + umulh x11,x5,x26 + adcs x20,x20,x12 + umulh x12,x6,x26 + adcs x21,x21,x13 + umulh x13,x7,x26 + adcs x22,x22,x14 + umulh x14,x8,x26 + adcs x23,x23,x15 + umulh x15,x9,x26 + adcs x24,x24,x16 + umulh x16,x10,x26 + adc x25,xzr,xzr + adds x11,x11,x20 + adcs x12,x12,x21 + adcs x13,x13,x22 + mul x26,x4,x11 + adcs x14,x14,x23 + adcs x15,x15,x24 + adc x16,x16,x25 + + // mul x19,x5,x26 + mul x20,x6,x26 + mul x21,x7,x26 + mul x22,x8,x26 + mul x23,x9,x26 + mul x24,x10,x26 + subs xzr,x11,#1 // adds x19,x19,x11 + umulh x11,x5,x26 + adcs x20,x20,x12 + umulh x12,x6,x26 + adcs x21,x21,x13 + umulh x13,x7,x26 + adcs x22,x22,x14 + umulh x14,x8,x26 + adcs x23,x23,x15 + umulh x15,x9,x26 + adcs x24,x24,x16 + umulh x16,x10,x26 + adc x25,xzr,xzr + adds x11,x11,x20 + adcs x12,x12,x21 + adcs x13,x13,x22 + adcs x14,x14,x23 + adcs x15,x15,x24 + adc x16,x16,x25 + + ret + + + +.align 5 +__redc_tail_mont_384: + ldp x19,x20,[x1,#48] + ldp x21,x22,[x1,#64] + ldp x23,x24,[x1,#80] + + adds x11,x11,x19 // accumulate upper half + adcs x12,x12,x20 + adcs x13,x13,x21 + adcs x14,x14,x22 + adcs x15,x15,x23 + adcs x16,x16,x24 + adc x25,xzr,xzr + + subs x19,x11,x5 + sbcs x20,x12,x6 + sbcs x21,x13,x7 + sbcs x22,x14,x8 + sbcs x23,x15,x9 + sbcs x24,x16,x10 + sbcs xzr,x25,xzr + + csel x11,x11,x19,lo + csel x12,x12,x20,lo + csel x13,x13,x21,lo + csel x14,x14,x22,lo + csel x15,x15,x23,lo + csel x16,x16,x24,lo + + stp x11,x12,[x0] + stp x13,x14,[x0,#16] + stp x15,x16,[x0,#32] + + ret + + +.globl _mul_384 +.private_extern _mul_384 + +.align 5 +_mul_384: +.long 3573752639 + stp x29,x30,[sp,#-128]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + + bl __mul_384 + ldr x30,[x29,#8] + + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldr x29,[sp],#128 +.long 3573752767 + ret + + + +.align 5 +__mul_384: + ldp x11,x12,[x1] + ldr x17, [x2] + ldp x13,x14,[x1,#16] + ldp x15,x16,[x1,#32] + + mul x19,x11,x17 + mul x20,x12,x17 + mul x21,x13,x17 + mul x22,x14,x17 + mul x23,x15,x17 + mul x24,x16,x17 + + umulh x5,x11,x17 + umulh x6,x12,x17 + umulh x7,x13,x17 + umulh x8,x14,x17 + umulh x9,x15,x17 + umulh x10,x16,x17 + ldr x17,[x2,8*1] + + str x19,[x0] + adds x19,x20,x5 + mul x5,x11,x17 + adcs x20,x21,x6 + mul x6,x12,x17 + adcs x21,x22,x7 + mul x7,x13,x17 + adcs x22,x23,x8 + mul x8,x14,x17 + adcs x23,x24,x9 + mul x9,x15,x17 + adc x24,xzr, x10 + mul x10,x16,x17 + adds x19,x19,x5 + umulh x5,x11,x17 + adcs x20,x20,x6 + umulh x6,x12,x17 + adcs x21,x21,x7 + umulh x7,x13,x17 + adcs x22,x22,x8 + umulh x8,x14,x17 + adcs x23,x23,x9 + umulh x9,x15,x17 + adcs x24,x24,x10 + umulh x10,x16,x17 + ldr x17,[x2,#8*(1+1)] + adc x25,xzr,xzr + + str x19,[x0,8*1] + adds x19,x20,x5 + mul x5,x11,x17 + adcs x20,x21,x6 + mul x6,x12,x17 + adcs x21,x22,x7 + mul x7,x13,x17 + adcs x22,x23,x8 + mul x8,x14,x17 + adcs x23,x24,x9 + mul x9,x15,x17 + adc x24,x25,x10 + mul x10,x16,x17 + adds x19,x19,x5 + umulh x5,x11,x17 + adcs x20,x20,x6 + umulh x6,x12,x17 + adcs x21,x21,x7 + umulh x7,x13,x17 + adcs x22,x22,x8 + umulh x8,x14,x17 + adcs x23,x23,x9 + umulh x9,x15,x17 + adcs x24,x24,x10 + umulh x10,x16,x17 + ldr x17,[x2,#8*(2+1)] + adc x25,xzr,xzr + + str x19,[x0,8*2] + adds x19,x20,x5 + mul x5,x11,x17 + adcs x20,x21,x6 + mul x6,x12,x17 + adcs x21,x22,x7 + mul x7,x13,x17 + adcs x22,x23,x8 + mul x8,x14,x17 + adcs x23,x24,x9 + mul x9,x15,x17 + adc x24,x25,x10 + mul x10,x16,x17 + adds x19,x19,x5 + umulh x5,x11,x17 + adcs x20,x20,x6 + umulh x6,x12,x17 + adcs x21,x21,x7 + umulh x7,x13,x17 + adcs x22,x22,x8 + umulh x8,x14,x17 + adcs x23,x23,x9 + umulh x9,x15,x17 + adcs x24,x24,x10 + umulh x10,x16,x17 + ldr x17,[x2,#8*(3+1)] + adc x25,xzr,xzr + + str x19,[x0,8*3] + adds x19,x20,x5 + mul x5,x11,x17 + adcs x20,x21,x6 + mul x6,x12,x17 + adcs x21,x22,x7 + mul x7,x13,x17 + adcs x22,x23,x8 + mul x8,x14,x17 + adcs x23,x24,x9 + mul x9,x15,x17 + adc x24,x25,x10 + mul x10,x16,x17 + adds x19,x19,x5 + umulh x5,x11,x17 + adcs x20,x20,x6 + umulh x6,x12,x17 + adcs x21,x21,x7 + umulh x7,x13,x17 + adcs x22,x22,x8 + umulh x8,x14,x17 + adcs x23,x23,x9 + umulh x9,x15,x17 + adcs x24,x24,x10 + umulh x10,x16,x17 + ldr x17,[x2,#8*(4+1)] + adc x25,xzr,xzr + + str x19,[x0,8*4] + adds x19,x20,x5 + mul x5,x11,x17 + adcs x20,x21,x6 + mul x6,x12,x17 + adcs x21,x22,x7 + mul x7,x13,x17 + adcs x22,x23,x8 + mul x8,x14,x17 + adcs x23,x24,x9 + mul x9,x15,x17 + adc x24,x25,x10 + mul x10,x16,x17 + adds x19,x19,x5 + umulh x5,x11,x17 + adcs x20,x20,x6 + umulh x6,x12,x17 + adcs x21,x21,x7 + umulh x7,x13,x17 + adcs x22,x22,x8 + umulh x8,x14,x17 + adcs x23,x23,x9 + umulh x9,x15,x17 + adcs x24,x24,x10 + umulh x10,x16,x17 + adc x25,xzr,xzr + + str x19,[x0,8*5] + adds x19,x20,x5 + adcs x20,x21,x6 + adcs x21,x22,x7 + adcs x22,x23,x8 + adcs x23,x24,x9 + adc x24,x25,x10 + + stp x19,x20,[x0,#48] + stp x21,x22,[x0,#64] + stp x23,x24,[x0,#80] + + ret + + +.globl _mul_382x +.private_extern _mul_382x + +.align 5 +_mul_382x: +.long 3573752639 + stp x29,x30,[sp,#-128]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + sub sp,sp,#96 // space for two 384-bit vectors + + ldp x11,x12,[x1] + mov x26,x0 // save r_ptr + ldp x19,x20,[x1,#48] + mov x27,x1 // save a_ptr + ldp x13,x14,[x1,#16] + mov x28,x2 // save b_ptr + ldp x21,x22,[x1,#64] + ldp x15,x16,[x1,#32] + adds x5,x11,x19 // t0 = a->re + a->im + ldp x23,x24,[x1,#80] + adcs x6,x12,x20 + ldp x11,x12,[x2] + adcs x7,x13,x21 + ldp x19,x20,[x2,#48] + adcs x8,x14,x22 + ldp x13,x14,[x2,#16] + adcs x9,x15,x23 + ldp x21,x22,[x2,#64] + adc x10,x16,x24 + ldp x15,x16,[x2,#32] + + stp x5,x6,[sp] + adds x5,x11,x19 // t1 = b->re + b->im + ldp x23,x24,[x2,#80] + adcs x6,x12,x20 + stp x7,x8,[sp,#16] + adcs x7,x13,x21 + adcs x8,x14,x22 + stp x9,x10,[sp,#32] + adcs x9,x15,x23 + stp x5,x6,[sp,#48] + adc x10,x16,x24 + stp x7,x8,[sp,#64] + stp x9,x10,[sp,#80] + + bl __mul_384 // _mul_384(ret->re, a->re, b->re) + + add x1,sp,#0 // _mul_384(ret->im, t0, t1) + add x2,sp,#48 + add x0,x26,#96 + bl __mul_384 + + add x1,x27,#48 // _mul_384(tx, a->im, b->im) + add x2,x28,#48 + add x0,sp,#0 + bl __mul_384 + + ldp x5,x6,[x3] + ldp x7,x8,[x3,#16] + ldp x9,x10,[x3,#32] + + add x1,x26,#96 // ret->im -= tx + add x2,sp,#0 + add x0,x26,#96 + bl __sub_mod_384x384 + + add x2,x26,#0 // ret->im -= ret->re + bl __sub_mod_384x384 + + add x1,x26,#0 // ret->re -= tx + add x2,sp,#0 + add x0,x26,#0 + bl __sub_mod_384x384 + ldr x30,[x29,#8] + + add sp,sp,#96 + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldr x29,[sp],#128 +.long 3573752767 + ret + + +.globl _sqr_382x +.private_extern _sqr_382x + +.align 5 +_sqr_382x: +.long 3573752639 + stp x29,x30,[sp,#-128]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + + ldp x11,x12,[x1] + ldp x19,x20,[x1,#48] + ldp x13,x14,[x1,#16] + adds x5,x11,x19 // t0 = a->re + a->im + ldp x21,x22,[x1,#64] + adcs x6,x12,x20 + ldp x15,x16,[x1,#32] + adcs x7,x13,x21 + ldp x23,x24,[x1,#80] + adcs x8,x14,x22 + stp x5,x6,[x0] + adcs x9,x15,x23 + ldp x5,x6,[x2] + adc x10,x16,x24 + stp x7,x8,[x0,#16] + + subs x11,x11,x19 // t1 = a->re - a->im + ldp x7,x8,[x2,#16] + sbcs x12,x12,x20 + stp x9,x10,[x0,#32] + sbcs x13,x13,x21 + ldp x9,x10,[x2,#32] + sbcs x14,x14,x22 + sbcs x15,x15,x23 + sbcs x16,x16,x24 + sbc x25,xzr,xzr + + and x19,x5,x25 + and x20,x6,x25 + adds x11,x11,x19 + and x21,x7,x25 + adcs x12,x12,x20 + and x22,x8,x25 + adcs x13,x13,x21 + and x23,x9,x25 + adcs x14,x14,x22 + and x24,x10,x25 + adcs x15,x15,x23 + stp x11,x12,[x0,#48] + adc x16,x16,x24 + stp x13,x14,[x0,#64] + stp x15,x16,[x0,#80] + + mov x4,x1 // save a_ptr + add x1,x0,#0 // _mul_384(ret->re, t0, t1) + add x2,x0,#48 + bl __mul_384 + + add x1,x4,#0 // _mul_384(ret->im, a->re, a->im) + add x2,x4,#48 + add x0,x0,#96 + bl __mul_384 + ldr x30,[x29,#8] + + ldp x11,x12,[x0] + ldp x13,x14,[x0,#16] + adds x11,x11,x11 // add with itself + ldp x15,x16,[x0,#32] + adcs x12,x12,x12 + adcs x13,x13,x13 + adcs x14,x14,x14 + adcs x15,x15,x15 + adcs x16,x16,x16 + adcs x19,x19,x19 + adcs x20,x20,x20 + stp x11,x12,[x0] + adcs x21,x21,x21 + stp x13,x14,[x0,#16] + adcs x22,x22,x22 + stp x15,x16,[x0,#32] + adcs x23,x23,x23 + stp x19,x20,[x0,#48] + adc x24,x24,x24 + stp x21,x22,[x0,#64] + stp x23,x24,[x0,#80] + + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldr x29,[sp],#128 +.long 3573752767 + ret + + +.globl _sqr_mont_382x +.private_extern _sqr_mont_382x + +.align 5 +_sqr_mont_382x: +.long 3573752639 + stp x29,x30,[sp,#-128]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + stp x3,x0,[sp,#96] // __mul_mont_384 wants them there + sub sp,sp,#112 // space for two 384-bit vectors + word + mov x4,x3 // adjust for missing b_ptr + + ldp x11,x12,[x1] + ldp x13,x14,[x1,#16] + ldp x15,x16,[x1,#32] + + ldp x17,x20,[x1,#48] + ldp x21,x22,[x1,#64] + ldp x23,x24,[x1,#80] + + adds x5,x11,x17 // t0 = a->re + a->im + adcs x6,x12,x20 + adcs x7,x13,x21 + adcs x8,x14,x22 + adcs x9,x15,x23 + adc x10,x16,x24 + + subs x19,x11,x17 // t1 = a->re - a->im + sbcs x20,x12,x20 + sbcs x21,x13,x21 + sbcs x22,x14,x22 + sbcs x23,x15,x23 + sbcs x24,x16,x24 + sbc x25,xzr,xzr // borrow flag as mask + + stp x5,x6,[sp] + stp x7,x8,[sp,#16] + stp x9,x10,[sp,#32] + stp x19,x20,[sp,#48] + stp x21,x22,[sp,#64] + stp x23,x24,[sp,#80] + str x25,[sp,#96] + + ldp x5,x6,[x2] + ldp x7,x8,[x2,#16] + ldp x9,x10,[x2,#32] + + add x2,x1,#48 + bl __mul_mont_383_nonred // _mul_mont_384(ret->im, a->re, a->im) + + adds x19,x11,x11 // add with itself + adcs x20,x12,x12 + adcs x21,x13,x13 + adcs x22,x14,x14 + adcs x23,x15,x15 + adc x24,x16,x16 + + stp x19,x20,[x2,#48] + stp x21,x22,[x2,#64] + stp x23,x24,[x2,#80] + + ldp x11,x12,[sp] + ldr x17,[sp,#48] + ldp x13,x14,[sp,#16] + ldp x15,x16,[sp,#32] + + add x2,sp,#48 + bl __mul_mont_383_nonred // _mul_mont_384(ret->im, t0, t1) + ldr x30,[x29,#8] + + ldr x25,[sp,#96] // account for sign from a->re - a->im + ldp x19,x20,[sp] + ldp x21,x22,[sp,#16] + ldp x23,x24,[sp,#32] + + and x19,x19,x25 + and x20,x20,x25 + and x21,x21,x25 + and x22,x22,x25 + and x23,x23,x25 + and x24,x24,x25 + + subs x11,x11,x19 + sbcs x12,x12,x20 + sbcs x13,x13,x21 + sbcs x14,x14,x22 + sbcs x15,x15,x23 + sbcs x16,x16,x24 + sbc x25,xzr,xzr + + and x19,x5,x25 + and x20,x6,x25 + and x21,x7,x25 + and x22,x8,x25 + and x23,x9,x25 + and x24,x10,x25 + + adds x11,x11,x19 + adcs x12,x12,x20 + adcs x13,x13,x21 + adcs x14,x14,x22 + adcs x15,x15,x23 + adc x16,x16,x24 + + stp x11,x12,[x2] + stp x13,x14,[x2,#16] + stp x15,x16,[x2,#32] + + add sp,sp,#112 + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldr x29,[sp],#128 +.long 3573752767 + ret + + + +.align 5 +__mul_mont_383_nonred: + mul x19,x11,x17 + mul x20,x12,x17 + mul x21,x13,x17 + mul x22,x14,x17 + mul x23,x15,x17 + mul x24,x16,x17 + mul x4,x4,x19 + + umulh x26,x11,x17 + umulh x27,x12,x17 + umulh x28,x13,x17 + umulh x0,x14,x17 + umulh x1,x15,x17 + umulh x3,x16,x17 + + adds x20,x20,x26 + mul x26,x5,x4 + adcs x21,x21,x27 + mul x27,x6,x4 + adcs x22,x22,x28 + mul x28,x7,x4 + adcs x23,x23,x0 + mul x0,x8,x4 + adcs x24,x24,x1 + mul x1,x9,x4 + adc x25,xzr, x3 + mul x3,x10,x4 + ldr x17,[x2,8*1] + adds x19,x19,x26 + umulh x26,x5,x4 + adcs x20,x20,x27 + umulh x27,x6,x4 + adcs x21,x21,x28 + umulh x28,x7,x4 + adcs x22,x22,x0 + umulh x0,x8,x4 + adcs x23,x23,x1 + umulh x1,x9,x4 + adcs x24,x24,x3 + umulh x3,x10,x4 + adc x25,x25,xzr + + ldr x4,[x29,#96] + adds x19,x20,x26 + mul x26,x11,x17 + adcs x20,x21,x27 + mul x27,x12,x17 + adcs x21,x22,x28 + mul x28,x13,x17 + adcs x22,x23,x0 + mul x0,x14,x17 + adcs x23,x24,x1 + mul x1,x15,x17 + adcs x24,x25,x3 + mul x3,x16,x17 + adc x25,xzr,xzr + + adds x19,x19,x26 + umulh x26,x11,x17 + adcs x20,x20,x27 + umulh x27,x12,x17 + adcs x21,x21,x28 + mul x4,x4,x19 + umulh x28,x13,x17 + adcs x22,x22,x0 + umulh x0,x14,x17 + adcs x23,x23,x1 + umulh x1,x15,x17 + adcs x24,x24,x3 + umulh x3,x16,x17 + adc x25,x25,xzr + + adds x20,x20,x26 + mul x26,x5,x4 + adcs x21,x21,x27 + mul x27,x6,x4 + adcs x22,x22,x28 + mul x28,x7,x4 + adcs x23,x23,x0 + mul x0,x8,x4 + adcs x24,x24,x1 + mul x1,x9,x4 + adc x25,x25,x3 + mul x3,x10,x4 + ldr x17,[x2,8*2] + adds x19,x19,x26 + umulh x26,x5,x4 + adcs x20,x20,x27 + umulh x27,x6,x4 + adcs x21,x21,x28 + umulh x28,x7,x4 + adcs x22,x22,x0 + umulh x0,x8,x4 + adcs x23,x23,x1 + umulh x1,x9,x4 + adcs x24,x24,x3 + umulh x3,x10,x4 + adc x25,x25,xzr + + ldr x4,[x29,#96] + adds x19,x20,x26 + mul x26,x11,x17 + adcs x20,x21,x27 + mul x27,x12,x17 + adcs x21,x22,x28 + mul x28,x13,x17 + adcs x22,x23,x0 + mul x0,x14,x17 + adcs x23,x24,x1 + mul x1,x15,x17 + adcs x24,x25,x3 + mul x3,x16,x17 + adc x25,xzr,xzr + + adds x19,x19,x26 + umulh x26,x11,x17 + adcs x20,x20,x27 + umulh x27,x12,x17 + adcs x21,x21,x28 + mul x4,x4,x19 + umulh x28,x13,x17 + adcs x22,x22,x0 + umulh x0,x14,x17 + adcs x23,x23,x1 + umulh x1,x15,x17 + adcs x24,x24,x3 + umulh x3,x16,x17 + adc x25,x25,xzr + + adds x20,x20,x26 + mul x26,x5,x4 + adcs x21,x21,x27 + mul x27,x6,x4 + adcs x22,x22,x28 + mul x28,x7,x4 + adcs x23,x23,x0 + mul x0,x8,x4 + adcs x24,x24,x1 + mul x1,x9,x4 + adc x25,x25,x3 + mul x3,x10,x4 + ldr x17,[x2,8*3] + adds x19,x19,x26 + umulh x26,x5,x4 + adcs x20,x20,x27 + umulh x27,x6,x4 + adcs x21,x21,x28 + umulh x28,x7,x4 + adcs x22,x22,x0 + umulh x0,x8,x4 + adcs x23,x23,x1 + umulh x1,x9,x4 + adcs x24,x24,x3 + umulh x3,x10,x4 + adc x25,x25,xzr + + ldr x4,[x29,#96] + adds x19,x20,x26 + mul x26,x11,x17 + adcs x20,x21,x27 + mul x27,x12,x17 + adcs x21,x22,x28 + mul x28,x13,x17 + adcs x22,x23,x0 + mul x0,x14,x17 + adcs x23,x24,x1 + mul x1,x15,x17 + adcs x24,x25,x3 + mul x3,x16,x17 + adc x25,xzr,xzr + + adds x19,x19,x26 + umulh x26,x11,x17 + adcs x20,x20,x27 + umulh x27,x12,x17 + adcs x21,x21,x28 + mul x4,x4,x19 + umulh x28,x13,x17 + adcs x22,x22,x0 + umulh x0,x14,x17 + adcs x23,x23,x1 + umulh x1,x15,x17 + adcs x24,x24,x3 + umulh x3,x16,x17 + adc x25,x25,xzr + + adds x20,x20,x26 + mul x26,x5,x4 + adcs x21,x21,x27 + mul x27,x6,x4 + adcs x22,x22,x28 + mul x28,x7,x4 + adcs x23,x23,x0 + mul x0,x8,x4 + adcs x24,x24,x1 + mul x1,x9,x4 + adc x25,x25,x3 + mul x3,x10,x4 + ldr x17,[x2,8*4] + adds x19,x19,x26 + umulh x26,x5,x4 + adcs x20,x20,x27 + umulh x27,x6,x4 + adcs x21,x21,x28 + umulh x28,x7,x4 + adcs x22,x22,x0 + umulh x0,x8,x4 + adcs x23,x23,x1 + umulh x1,x9,x4 + adcs x24,x24,x3 + umulh x3,x10,x4 + adc x25,x25,xzr + + ldr x4,[x29,#96] + adds x19,x20,x26 + mul x26,x11,x17 + adcs x20,x21,x27 + mul x27,x12,x17 + adcs x21,x22,x28 + mul x28,x13,x17 + adcs x22,x23,x0 + mul x0,x14,x17 + adcs x23,x24,x1 + mul x1,x15,x17 + adcs x24,x25,x3 + mul x3,x16,x17 + adc x25,xzr,xzr + + adds x19,x19,x26 + umulh x26,x11,x17 + adcs x20,x20,x27 + umulh x27,x12,x17 + adcs x21,x21,x28 + mul x4,x4,x19 + umulh x28,x13,x17 + adcs x22,x22,x0 + umulh x0,x14,x17 + adcs x23,x23,x1 + umulh x1,x15,x17 + adcs x24,x24,x3 + umulh x3,x16,x17 + adc x25,x25,xzr + + adds x20,x20,x26 + mul x26,x5,x4 + adcs x21,x21,x27 + mul x27,x6,x4 + adcs x22,x22,x28 + mul x28,x7,x4 + adcs x23,x23,x0 + mul x0,x8,x4 + adcs x24,x24,x1 + mul x1,x9,x4 + adc x25,x25,x3 + mul x3,x10,x4 + ldr x17,[x2,8*5] + adds x19,x19,x26 + umulh x26,x5,x4 + adcs x20,x20,x27 + umulh x27,x6,x4 + adcs x21,x21,x28 + umulh x28,x7,x4 + adcs x22,x22,x0 + umulh x0,x8,x4 + adcs x23,x23,x1 + umulh x1,x9,x4 + adcs x24,x24,x3 + umulh x3,x10,x4 + adc x25,x25,xzr + + ldr x4,[x29,#96] + adds x19,x20,x26 + mul x26,x11,x17 + adcs x20,x21,x27 + mul x27,x12,x17 + adcs x21,x22,x28 + mul x28,x13,x17 + adcs x22,x23,x0 + mul x0,x14,x17 + adcs x23,x24,x1 + mul x1,x15,x17 + adcs x24,x25,x3 + mul x3,x16,x17 + adc x25,xzr,xzr + + adds x19,x19,x26 + umulh x26,x11,x17 + adcs x20,x20,x27 + umulh x27,x12,x17 + adcs x21,x21,x28 + mul x4,x4,x19 + umulh x28,x13,x17 + adcs x22,x22,x0 + umulh x0,x14,x17 + adcs x23,x23,x1 + umulh x1,x15,x17 + adcs x24,x24,x3 + umulh x3,x16,x17 + adc x25,x25,xzr + + adds x20,x20,x26 + mul x26,x5,x4 + adcs x21,x21,x27 + mul x27,x6,x4 + adcs x22,x22,x28 + mul x28,x7,x4 + adcs x23,x23,x0 + mul x0,x8,x4 + adcs x24,x24,x1 + mul x1,x9,x4 + adc x25,x25,x3 + mul x3,x10,x4 + adds x19,x19,x26 + umulh x26,x5,x4 + adcs x20,x20,x27 + umulh x27,x6,x4 + adcs x21,x21,x28 + umulh x28,x7,x4 + adcs x22,x22,x0 + umulh x0,x8,x4 + adcs x23,x23,x1 + umulh x1,x9,x4 + adcs x24,x24,x3 + umulh x3,x10,x4 + adc x25,x25,xzr + ldp x4,x2,[x29,#96] // pull r_ptr + + adds x11,x20,x26 + adcs x12,x21,x27 + adcs x13,x22,x28 + adcs x14,x23,x0 + adcs x15,x24,x1 + adcs x16,x25,x3 + + ret + + +.globl _sgn0_pty_mont_384 +.private_extern _sgn0_pty_mont_384 + +.align 5 +_sgn0_pty_mont_384: +.long 3573752639 + stp x29,x30,[sp,#-128]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + + mov x4,x2 + ldp x5,x6,[x1] + ldp x7,x8,[x1,#16] + ldp x9,x10,[x1,#32] + mov x1,x0 + + bl __mul_by_1_mont_384 + ldr x30,[x29,#8] + + and x0,x11,#1 + adds x11,x11,x11 + adcs x12,x12,x12 + adcs x13,x13,x13 + adcs x14,x14,x14 + adcs x15,x15,x15 + adcs x16,x16,x16 + adc x17,xzr,xzr + + subs x11,x11,x5 + sbcs x12,x12,x6 + sbcs x13,x13,x7 + sbcs x14,x14,x8 + sbcs x15,x15,x9 + sbcs x16,x16,x10 + sbc x17,x17,xzr + + mvn x17,x17 + and x17,x17,#2 + orr x0,x0,x17 + + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldr x29,[sp],#128 +.long 3573752767 + ret + + +.globl _sgn0_pty_mont_384x +.private_extern _sgn0_pty_mont_384x + +.align 5 +_sgn0_pty_mont_384x: +.long 3573752639 + stp x29,x30,[sp,#-128]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + + mov x4,x2 + ldp x5,x6,[x1] + ldp x7,x8,[x1,#16] + ldp x9,x10,[x1,#32] + mov x1,x0 + + bl __mul_by_1_mont_384 + add x1,x1,#48 + + and x2,x11,#1 + orr x3,x11,x12 + adds x11,x11,x11 + orr x3,x3,x13 + adcs x12,x12,x12 + orr x3,x3,x14 + adcs x13,x13,x13 + orr x3,x3,x15 + adcs x14,x14,x14 + orr x3,x3,x16 + adcs x15,x15,x15 + adcs x16,x16,x16 + adc x17,xzr,xzr + + subs x11,x11,x5 + sbcs x12,x12,x6 + sbcs x13,x13,x7 + sbcs x14,x14,x8 + sbcs x15,x15,x9 + sbcs x16,x16,x10 + sbc x17,x17,xzr + + mvn x17,x17 + and x17,x17,#2 + orr x2,x2,x17 + + bl __mul_by_1_mont_384 + ldr x30,[x29,#8] + + and x0,x11,#1 + orr x1,x11,x12 + adds x11,x11,x11 + orr x1,x1,x13 + adcs x12,x12,x12 + orr x1,x1,x14 + adcs x13,x13,x13 + orr x1,x1,x15 + adcs x14,x14,x14 + orr x1,x1,x16 + adcs x15,x15,x15 + adcs x16,x16,x16 + adc x17,xzr,xzr + + subs x11,x11,x5 + sbcs x12,x12,x6 + sbcs x13,x13,x7 + sbcs x14,x14,x8 + sbcs x15,x15,x9 + sbcs x16,x16,x10 + sbc x17,x17,xzr + + mvn x17,x17 + and x17,x17,#2 + orr x0,x0,x17 + + cmp x3,#0 + csel x3,x0,x2,eq // a->re==0? prty(a->im) : prty(a->re) + + cmp x1,#0 + csel x1,x0,x2,ne // a->im!=0? sgn0(a->im) : sgn0(a->re) + + and x3,x3,#1 + and x1,x1,#2 + orr x0,x1,x3 // pack sign and parity + + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldr x29,[sp],#128 +.long 3573752767 + ret + diff --git a/crypto/blst_src/build/mach-o/mulq_mont_256-x86_64.s b/crypto/blst_src/build/mach-o/mulq_mont_256-x86_64.s new file mode 100644 index 00000000000..842c39225b6 --- /dev/null +++ b/crypto/blst_src/build/mach-o/mulq_mont_256-x86_64.s @@ -0,0 +1,723 @@ +.comm ___blst_platform_cap,4 +.text + +.globl _mul_mont_sparse_256 +.private_extern _mul_mont_sparse_256 + +.p2align 5 +_mul_mont_sparse_256: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + +#ifdef __BLST_PORTABLE__ + testl $1,___blst_platform_cap(%rip) + jnz mul_mont_sparse_256$1 +#endif + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + pushq %rdi +.cfi_adjust_cfa_offset 8 + + + movq 0(%rdx),%rax + movq 0(%rsi),%r13 + movq 8(%rsi),%r14 + movq 16(%rsi),%r12 + movq 24(%rsi),%rbp + movq %rdx,%rbx + + movq %rax,%r15 + mulq %r13 + movq %rax,%r9 + movq %r15,%rax + movq %rdx,%r10 + call __mulq_mont_sparse_256 + + movq 8(%rsp),%r15 +.cfi_restore %r15 + movq 16(%rsp),%r14 +.cfi_restore %r14 + movq 24(%rsp),%r13 +.cfi_restore %r13 + movq 32(%rsp),%r12 +.cfi_restore %r12 + movq 40(%rsp),%rbx +.cfi_restore %rbx + movq 48(%rsp),%rbp +.cfi_restore %rbp + leaq 56(%rsp),%rsp +.cfi_adjust_cfa_offset -56 + + .byte 0xf3,0xc3 +.cfi_endproc + + +.globl _sqr_mont_sparse_256 +.private_extern _sqr_mont_sparse_256 + +.p2align 5 +_sqr_mont_sparse_256: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + +#ifdef __BLST_PORTABLE__ + testl $1,___blst_platform_cap(%rip) + jnz sqr_mont_sparse_256$1 +#endif + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + pushq %rdi +.cfi_adjust_cfa_offset 8 + + + movq 0(%rsi),%rax + movq %rcx,%r8 + movq 8(%rsi),%r14 + movq %rdx,%rcx + movq 16(%rsi),%r12 + leaq (%rsi),%rbx + movq 24(%rsi),%rbp + + movq %rax,%r15 + mulq %rax + movq %rax,%r9 + movq %r15,%rax + movq %rdx,%r10 + call __mulq_mont_sparse_256 + + movq 8(%rsp),%r15 +.cfi_restore %r15 + movq 16(%rsp),%r14 +.cfi_restore %r14 + movq 24(%rsp),%r13 +.cfi_restore %r13 + movq 32(%rsp),%r12 +.cfi_restore %r12 + movq 40(%rsp),%rbx +.cfi_restore %rbx + movq 48(%rsp),%rbp +.cfi_restore %rbp + leaq 56(%rsp),%rsp +.cfi_adjust_cfa_offset -56 + + .byte 0xf3,0xc3 +.cfi_endproc + + +.p2align 5 +__mulq_mont_sparse_256: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + mulq %r14 + addq %rax,%r10 + movq %r15,%rax + adcq $0,%rdx + movq %rdx,%r11 + + mulq %r12 + addq %rax,%r11 + movq %r15,%rax + adcq $0,%rdx + movq %rdx,%r12 + + mulq %rbp + addq %rax,%r12 + movq 8(%rbx),%rax + adcq $0,%rdx + xorq %r14,%r14 + movq %rdx,%r13 + + movq %r9,%rdi + imulq %r8,%r9 + + + movq %rax,%r15 + mulq 0(%rsi) + addq %rax,%r10 + movq %r15,%rax + adcq $0,%rdx + movq %rdx,%rbp + + mulq 8(%rsi) + addq %rax,%r11 + movq %r15,%rax + adcq $0,%rdx + addq %rbp,%r11 + adcq $0,%rdx + movq %rdx,%rbp + + mulq 16(%rsi) + addq %rax,%r12 + movq %r15,%rax + adcq $0,%rdx + addq %rbp,%r12 + adcq $0,%rdx + movq %rdx,%rbp + + mulq 24(%rsi) + addq %rax,%r13 + movq %r9,%rax + adcq $0,%rdx + addq %rbp,%r13 + adcq %rdx,%r14 + xorq %r15,%r15 + + + mulq 0(%rcx) + addq %rax,%rdi + movq %r9,%rax + adcq %rdx,%rdi + + mulq 8(%rcx) + addq %rax,%r10 + movq %r9,%rax + adcq $0,%rdx + addq %rdi,%r10 + adcq $0,%rdx + movq %rdx,%rbp + + mulq 16(%rcx) + addq %rax,%r11 + movq %r9,%rax + adcq $0,%rdx + addq %rbp,%r11 + adcq $0,%rdx + movq %rdx,%rbp + + mulq 24(%rcx) + addq %rax,%r12 + movq 16(%rbx),%rax + adcq $0,%rdx + addq %rbp,%r12 + adcq $0,%rdx + addq %rdx,%r13 + adcq $0,%r14 + adcq $0,%r15 + movq %r10,%rdi + imulq %r8,%r10 + + + movq %rax,%r9 + mulq 0(%rsi) + addq %rax,%r11 + movq %r9,%rax + adcq $0,%rdx + movq %rdx,%rbp + + mulq 8(%rsi) + addq %rax,%r12 + movq %r9,%rax + adcq $0,%rdx + addq %rbp,%r12 + adcq $0,%rdx + movq %rdx,%rbp + + mulq 16(%rsi) + addq %rax,%r13 + movq %r9,%rax + adcq $0,%rdx + addq %rbp,%r13 + adcq $0,%rdx + movq %rdx,%rbp + + mulq 24(%rsi) + addq %rax,%r14 + movq %r10,%rax + adcq $0,%rdx + addq %rbp,%r14 + adcq %rdx,%r15 + xorq %r9,%r9 + + + mulq 0(%rcx) + addq %rax,%rdi + movq %r10,%rax + adcq %rdx,%rdi + + mulq 8(%rcx) + addq %rax,%r11 + movq %r10,%rax + adcq $0,%rdx + addq %rdi,%r11 + adcq $0,%rdx + movq %rdx,%rbp + + mulq 16(%rcx) + addq %rax,%r12 + movq %r10,%rax + adcq $0,%rdx + addq %rbp,%r12 + adcq $0,%rdx + movq %rdx,%rbp + + mulq 24(%rcx) + addq %rax,%r13 + movq 24(%rbx),%rax + adcq $0,%rdx + addq %rbp,%r13 + adcq $0,%rdx + addq %rdx,%r14 + adcq $0,%r15 + adcq $0,%r9 + movq %r11,%rdi + imulq %r8,%r11 + + + movq %rax,%r10 + mulq 0(%rsi) + addq %rax,%r12 + movq %r10,%rax + adcq $0,%rdx + movq %rdx,%rbp + + mulq 8(%rsi) + addq %rax,%r13 + movq %r10,%rax + adcq $0,%rdx + addq %rbp,%r13 + adcq $0,%rdx + movq %rdx,%rbp + + mulq 16(%rsi) + addq %rax,%r14 + movq %r10,%rax + adcq $0,%rdx + addq %rbp,%r14 + adcq $0,%rdx + movq %rdx,%rbp + + mulq 24(%rsi) + addq %rax,%r15 + movq %r11,%rax + adcq $0,%rdx + addq %rbp,%r15 + adcq %rdx,%r9 + xorq %r10,%r10 + + + mulq 0(%rcx) + addq %rax,%rdi + movq %r11,%rax + adcq %rdx,%rdi + + mulq 8(%rcx) + addq %rax,%r12 + movq %r11,%rax + adcq $0,%rdx + addq %rdi,%r12 + adcq $0,%rdx + movq %rdx,%rbp + + mulq 16(%rcx) + addq %rax,%r13 + movq %r11,%rax + adcq $0,%rdx + addq %rbp,%r13 + adcq $0,%rdx + movq %rdx,%rbp + + mulq 24(%rcx) + addq %rax,%r14 + movq %r12,%rax + adcq $0,%rdx + addq %rbp,%r14 + adcq $0,%rdx + addq %rdx,%r15 + adcq $0,%r9 + adcq $0,%r10 + imulq %r8,%rax + movq 8(%rsp),%rsi + + + movq %rax,%r11 + mulq 0(%rcx) + addq %rax,%r12 + movq %r11,%rax + adcq %rdx,%r12 + + mulq 8(%rcx) + addq %rax,%r13 + movq %r11,%rax + adcq $0,%rdx + addq %r12,%r13 + adcq $0,%rdx + movq %rdx,%rbp + + mulq 16(%rcx) + addq %rax,%r14 + movq %r11,%rax + adcq $0,%rdx + addq %rbp,%r14 + adcq $0,%rdx + movq %rdx,%rbp + + mulq 24(%rcx) + movq %r14,%rbx + addq %rbp,%r15 + adcq $0,%rdx + addq %rax,%r15 + movq %r13,%rax + adcq $0,%rdx + addq %rdx,%r9 + adcq $0,%r10 + + + + + movq %r15,%r12 + subq 0(%rcx),%r13 + sbbq 8(%rcx),%r14 + sbbq 16(%rcx),%r15 + movq %r9,%rbp + sbbq 24(%rcx),%r9 + sbbq $0,%r10 + + cmovcq %rax,%r13 + cmovcq %rbx,%r14 + cmovcq %r12,%r15 + movq %r13,0(%rsi) + cmovcq %rbp,%r9 + movq %r14,8(%rsi) + movq %r15,16(%rsi) + movq %r9,24(%rsi) + + .byte 0xf3,0xc3 +.cfi_endproc + +.globl _from_mont_256 +.private_extern _from_mont_256 + +.p2align 5 +_from_mont_256: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + +#ifdef __BLST_PORTABLE__ + testl $1,___blst_platform_cap(%rip) + jnz from_mont_256$1 +#endif + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + subq $8,%rsp +.cfi_adjust_cfa_offset 8 + + + movq %rdx,%rbx + call __mulq_by_1_mont_256 + + + + + + movq %r14,%r10 + movq %r15,%r11 + movq %r9,%r12 + + subq 0(%rbx),%r13 + sbbq 8(%rbx),%r14 + sbbq 16(%rbx),%r15 + sbbq 24(%rbx),%r9 + + cmovncq %r13,%rax + cmovncq %r14,%r10 + cmovncq %r15,%r11 + movq %rax,0(%rdi) + cmovncq %r9,%r12 + movq %r10,8(%rdi) + movq %r11,16(%rdi) + movq %r12,24(%rdi) + + movq 8(%rsp),%r15 +.cfi_restore %r15 + movq 16(%rsp),%r14 +.cfi_restore %r14 + movq 24(%rsp),%r13 +.cfi_restore %r13 + movq 32(%rsp),%r12 +.cfi_restore %r12 + movq 40(%rsp),%rbx +.cfi_restore %rbx + movq 48(%rsp),%rbp +.cfi_restore %rbp + leaq 56(%rsp),%rsp +.cfi_adjust_cfa_offset -56 + + .byte 0xf3,0xc3 +.cfi_endproc + + +.globl _redc_mont_256 +.private_extern _redc_mont_256 + +.p2align 5 +_redc_mont_256: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + +#ifdef __BLST_PORTABLE__ + testl $1,___blst_platform_cap(%rip) + jnz redc_mont_256$1 +#endif + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + subq $8,%rsp +.cfi_adjust_cfa_offset 8 + + + movq %rdx,%rbx + call __mulq_by_1_mont_256 + + addq 32(%rsi),%r13 + adcq 40(%rsi),%r14 + movq %r13,%rax + adcq 48(%rsi),%r15 + movq %r14,%r10 + adcq 56(%rsi),%r9 + sbbq %rsi,%rsi + + + + + movq %r15,%r11 + subq 0(%rbx),%r13 + sbbq 8(%rbx),%r14 + sbbq 16(%rbx),%r15 + movq %r9,%r12 + sbbq 24(%rbx),%r9 + sbbq $0,%rsi + + cmovncq %r13,%rax + cmovncq %r14,%r10 + cmovncq %r15,%r11 + movq %rax,0(%rdi) + cmovncq %r9,%r12 + movq %r10,8(%rdi) + movq %r11,16(%rdi) + movq %r12,24(%rdi) + + movq 8(%rsp),%r15 +.cfi_restore %r15 + movq 16(%rsp),%r14 +.cfi_restore %r14 + movq 24(%rsp),%r13 +.cfi_restore %r13 + movq 32(%rsp),%r12 +.cfi_restore %r12 + movq 40(%rsp),%rbx +.cfi_restore %rbx + movq 48(%rsp),%rbp +.cfi_restore %rbp + leaq 56(%rsp),%rsp +.cfi_adjust_cfa_offset -56 + + .byte 0xf3,0xc3 +.cfi_endproc + + +.p2align 5 +__mulq_by_1_mont_256: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + movq 0(%rsi),%rax + movq 8(%rsi),%r10 + movq 16(%rsi),%r11 + movq 24(%rsi),%r12 + + movq %rax,%r13 + imulq %rcx,%rax + movq %rax,%r9 + + mulq 0(%rbx) + addq %rax,%r13 + movq %r9,%rax + adcq %rdx,%r13 + + mulq 8(%rbx) + addq %rax,%r10 + movq %r9,%rax + adcq $0,%rdx + addq %r13,%r10 + adcq $0,%rdx + movq %rdx,%r13 + + mulq 16(%rbx) + movq %r10,%r14 + imulq %rcx,%r10 + addq %rax,%r11 + movq %r9,%rax + adcq $0,%rdx + addq %r13,%r11 + adcq $0,%rdx + movq %rdx,%r13 + + mulq 24(%rbx) + addq %rax,%r12 + movq %r10,%rax + adcq $0,%rdx + addq %r13,%r12 + adcq $0,%rdx + movq %rdx,%r13 + + mulq 0(%rbx) + addq %rax,%r14 + movq %r10,%rax + adcq %rdx,%r14 + + mulq 8(%rbx) + addq %rax,%r11 + movq %r10,%rax + adcq $0,%rdx + addq %r14,%r11 + adcq $0,%rdx + movq %rdx,%r14 + + mulq 16(%rbx) + movq %r11,%r15 + imulq %rcx,%r11 + addq %rax,%r12 + movq %r10,%rax + adcq $0,%rdx + addq %r14,%r12 + adcq $0,%rdx + movq %rdx,%r14 + + mulq 24(%rbx) + addq %rax,%r13 + movq %r11,%rax + adcq $0,%rdx + addq %r14,%r13 + adcq $0,%rdx + movq %rdx,%r14 + + mulq 0(%rbx) + addq %rax,%r15 + movq %r11,%rax + adcq %rdx,%r15 + + mulq 8(%rbx) + addq %rax,%r12 + movq %r11,%rax + adcq $0,%rdx + addq %r15,%r12 + adcq $0,%rdx + movq %rdx,%r15 + + mulq 16(%rbx) + movq %r12,%r9 + imulq %rcx,%r12 + addq %rax,%r13 + movq %r11,%rax + adcq $0,%rdx + addq %r15,%r13 + adcq $0,%rdx + movq %rdx,%r15 + + mulq 24(%rbx) + addq %rax,%r14 + movq %r12,%rax + adcq $0,%rdx + addq %r15,%r14 + adcq $0,%rdx + movq %rdx,%r15 + + mulq 0(%rbx) + addq %rax,%r9 + movq %r12,%rax + adcq %rdx,%r9 + + mulq 8(%rbx) + addq %rax,%r13 + movq %r12,%rax + adcq $0,%rdx + addq %r9,%r13 + adcq $0,%rdx + movq %rdx,%r9 + + mulq 16(%rbx) + addq %rax,%r14 + movq %r12,%rax + adcq $0,%rdx + addq %r9,%r14 + adcq $0,%rdx + movq %rdx,%r9 + + mulq 24(%rbx) + addq %rax,%r15 + movq %r13,%rax + adcq $0,%rdx + addq %r9,%r15 + adcq $0,%rdx + movq %rdx,%r9 + .byte 0xf3,0xc3 +.cfi_endproc + diff --git a/crypto/blst_src/build/mach-o/mulq_mont_384-x86_64.s b/crypto/blst_src/build/mach-o/mulq_mont_384-x86_64.s new file mode 100644 index 00000000000..7052343d0ac --- /dev/null +++ b/crypto/blst_src/build/mach-o/mulq_mont_384-x86_64.s @@ -0,0 +1,3673 @@ +.comm ___blst_platform_cap,4 +.text + + + + + + + + +.p2align 5 +__subq_mod_384x384: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + movq 48(%rsi),%r14 + + subq 0(%rdx),%r8 + movq 56(%rsi),%r15 + sbbq 8(%rdx),%r9 + movq 64(%rsi),%rax + sbbq 16(%rdx),%r10 + movq 72(%rsi),%rbx + sbbq 24(%rdx),%r11 + movq 80(%rsi),%rbp + sbbq 32(%rdx),%r12 + movq 88(%rsi),%rsi + sbbq 40(%rdx),%r13 + movq %r8,0(%rdi) + sbbq 48(%rdx),%r14 + movq 0(%rcx),%r8 + movq %r9,8(%rdi) + sbbq 56(%rdx),%r15 + movq 8(%rcx),%r9 + movq %r10,16(%rdi) + sbbq 64(%rdx),%rax + movq 16(%rcx),%r10 + movq %r11,24(%rdi) + sbbq 72(%rdx),%rbx + movq 24(%rcx),%r11 + movq %r12,32(%rdi) + sbbq 80(%rdx),%rbp + movq 32(%rcx),%r12 + movq %r13,40(%rdi) + sbbq 88(%rdx),%rsi + movq 40(%rcx),%r13 + sbbq %rdx,%rdx + + andq %rdx,%r8 + andq %rdx,%r9 + andq %rdx,%r10 + andq %rdx,%r11 + andq %rdx,%r12 + andq %rdx,%r13 + + addq %r8,%r14 + adcq %r9,%r15 + movq %r14,48(%rdi) + adcq %r10,%rax + movq %r15,56(%rdi) + adcq %r11,%rbx + movq %rax,64(%rdi) + adcq %r12,%rbp + movq %rbx,72(%rdi) + adcq %r13,%rsi + movq %rbp,80(%rdi) + movq %rsi,88(%rdi) + + .byte 0xf3,0xc3 +.cfi_endproc + + + +.p2align 5 +__addq_mod_384: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + + addq 0(%rdx),%r8 + adcq 8(%rdx),%r9 + adcq 16(%rdx),%r10 + movq %r8,%r14 + adcq 24(%rdx),%r11 + movq %r9,%r15 + adcq 32(%rdx),%r12 + movq %r10,%rax + adcq 40(%rdx),%r13 + movq %r11,%rbx + sbbq %rdx,%rdx + + subq 0(%rcx),%r8 + sbbq 8(%rcx),%r9 + movq %r12,%rbp + sbbq 16(%rcx),%r10 + sbbq 24(%rcx),%r11 + sbbq 32(%rcx),%r12 + movq %r13,%rsi + sbbq 40(%rcx),%r13 + sbbq $0,%rdx + + cmovcq %r14,%r8 + cmovcq %r15,%r9 + cmovcq %rax,%r10 + movq %r8,0(%rdi) + cmovcq %rbx,%r11 + movq %r9,8(%rdi) + cmovcq %rbp,%r12 + movq %r10,16(%rdi) + cmovcq %rsi,%r13 + movq %r11,24(%rdi) + movq %r12,32(%rdi) + movq %r13,40(%rdi) + + .byte 0xf3,0xc3 +.cfi_endproc + + + +.p2align 5 +__subq_mod_384: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + +__subq_mod_384_a_is_loaded: + subq 0(%rdx),%r8 + movq 0(%rcx),%r14 + sbbq 8(%rdx),%r9 + movq 8(%rcx),%r15 + sbbq 16(%rdx),%r10 + movq 16(%rcx),%rax + sbbq 24(%rdx),%r11 + movq 24(%rcx),%rbx + sbbq 32(%rdx),%r12 + movq 32(%rcx),%rbp + sbbq 40(%rdx),%r13 + movq 40(%rcx),%rsi + sbbq %rdx,%rdx + + andq %rdx,%r14 + andq %rdx,%r15 + andq %rdx,%rax + andq %rdx,%rbx + andq %rdx,%rbp + andq %rdx,%rsi + + addq %r14,%r8 + adcq %r15,%r9 + movq %r8,0(%rdi) + adcq %rax,%r10 + movq %r9,8(%rdi) + adcq %rbx,%r11 + movq %r10,16(%rdi) + adcq %rbp,%r12 + movq %r11,24(%rdi) + adcq %rsi,%r13 + movq %r12,32(%rdi) + movq %r13,40(%rdi) + + .byte 0xf3,0xc3 +.cfi_endproc + +.globl _mul_mont_384x +.private_extern _mul_mont_384x + +.p2align 5 +_mul_mont_384x: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + +#ifdef __BLST_PORTABLE__ + testl $1,___blst_platform_cap(%rip) + jnz mul_mont_384x$1 +#endif + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + subq $328,%rsp +.cfi_adjust_cfa_offset 328 + + + movq %rdx,%rbx + movq %rdi,32(%rsp) + movq %rsi,24(%rsp) + movq %rdx,16(%rsp) + movq %rcx,8(%rsp) + movq %r8,0(%rsp) + + + + + leaq 40(%rsp),%rdi + call __mulq_384 + + + leaq 48(%rbx),%rbx + leaq 48(%rsi),%rsi + leaq 40+96(%rsp),%rdi + call __mulq_384 + + + movq 8(%rsp),%rcx + leaq -48(%rsi),%rdx + leaq 40+192+48(%rsp),%rdi + call __addq_mod_384 + + movq 16(%rsp),%rsi + leaq 48(%rsi),%rdx + leaq -48(%rdi),%rdi + call __addq_mod_384 + + leaq (%rdi),%rbx + leaq 48(%rdi),%rsi + call __mulq_384 + + + leaq (%rdi),%rsi + leaq 40(%rsp),%rdx + movq 8(%rsp),%rcx + call __subq_mod_384x384 + + leaq (%rdi),%rsi + leaq -96(%rdi),%rdx + call __subq_mod_384x384 + + + leaq 40(%rsp),%rsi + leaq 40+96(%rsp),%rdx + leaq 40(%rsp),%rdi + call __subq_mod_384x384 + + movq %rcx,%rbx + + + leaq 40(%rsp),%rsi + movq 0(%rsp),%rcx + movq 32(%rsp),%rdi + call __mulq_by_1_mont_384 + call __redq_tail_mont_384 + + + leaq 40+192(%rsp),%rsi + movq 0(%rsp),%rcx + leaq 48(%rdi),%rdi + call __mulq_by_1_mont_384 + call __redq_tail_mont_384 + + leaq 328(%rsp),%r8 + movq 0(%r8),%r15 +.cfi_restore %r15 + movq 8(%r8),%r14 +.cfi_restore %r14 + movq 16(%r8),%r13 +.cfi_restore %r13 + movq 24(%r8),%r12 +.cfi_restore %r12 + movq 32(%r8),%rbx +.cfi_restore %rbx + movq 40(%r8),%rbp +.cfi_restore %rbp + leaq 48(%r8),%rsp +.cfi_adjust_cfa_offset -328-8*6 + + .byte 0xf3,0xc3 +.cfi_endproc + +.globl _sqr_mont_384x +.private_extern _sqr_mont_384x + +.p2align 5 +_sqr_mont_384x: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + +#ifdef __BLST_PORTABLE__ + testl $1,___blst_platform_cap(%rip) + jnz sqr_mont_384x$1 +#endif + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + subq $136,%rsp +.cfi_adjust_cfa_offset 136 + + + movq %rcx,0(%rsp) + movq %rdx,%rcx + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + + + leaq 48(%rsi),%rdx + leaq 32(%rsp),%rdi + call __addq_mod_384 + + + movq 16(%rsp),%rsi + leaq 48(%rsi),%rdx + leaq 32+48(%rsp),%rdi + call __subq_mod_384 + + + movq 16(%rsp),%rsi + leaq 48(%rsi),%rbx + + movq 48(%rsi),%rax + movq 0(%rsi),%r14 + movq 8(%rsi),%r15 + movq 16(%rsi),%r12 + movq 24(%rsi),%r13 + + call __mulq_mont_384 + addq %r14,%r14 + adcq %r15,%r15 + adcq %r8,%r8 + movq %r14,%r12 + adcq %r9,%r9 + movq %r15,%r13 + adcq %r10,%r10 + movq %r8,%rax + adcq %r11,%r11 + movq %r9,%rbx + sbbq %rdx,%rdx + + subq 0(%rcx),%r14 + sbbq 8(%rcx),%r15 + movq %r10,%rbp + sbbq 16(%rcx),%r8 + sbbq 24(%rcx),%r9 + sbbq 32(%rcx),%r10 + movq %r11,%rsi + sbbq 40(%rcx),%r11 + sbbq $0,%rdx + + cmovcq %r12,%r14 + cmovcq %r13,%r15 + cmovcq %rax,%r8 + movq %r14,48(%rdi) + cmovcq %rbx,%r9 + movq %r15,56(%rdi) + cmovcq %rbp,%r10 + movq %r8,64(%rdi) + cmovcq %rsi,%r11 + movq %r9,72(%rdi) + movq %r10,80(%rdi) + movq %r11,88(%rdi) + + leaq 32(%rsp),%rsi + leaq 32+48(%rsp),%rbx + + movq 32+48(%rsp),%rax + movq 32+0(%rsp),%r14 + movq 32+8(%rsp),%r15 + movq 32+16(%rsp),%r12 + movq 32+24(%rsp),%r13 + + call __mulq_mont_384 + + leaq 136(%rsp),%r8 + movq 0(%r8),%r15 +.cfi_restore %r15 + movq 8(%r8),%r14 +.cfi_restore %r14 + movq 16(%r8),%r13 +.cfi_restore %r13 + movq 24(%r8),%r12 +.cfi_restore %r12 + movq 32(%r8),%rbx +.cfi_restore %rbx + movq 40(%r8),%rbp +.cfi_restore %rbp + leaq 48(%r8),%rsp +.cfi_adjust_cfa_offset -136-8*6 + + .byte 0xf3,0xc3 +.cfi_endproc + + +.globl _mul_382x +.private_extern _mul_382x + +.p2align 5 +_mul_382x: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + +#ifdef __BLST_PORTABLE__ + testl $1,___blst_platform_cap(%rip) + jnz mul_382x$1 +#endif + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + subq $136,%rsp +.cfi_adjust_cfa_offset 136 + + + leaq 96(%rdi),%rdi + movq %rsi,0(%rsp) + movq %rdx,8(%rsp) + movq %rdi,16(%rsp) + movq %rcx,24(%rsp) + + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + + addq 48(%rsi),%r8 + adcq 56(%rsi),%r9 + adcq 64(%rsi),%r10 + adcq 72(%rsi),%r11 + adcq 80(%rsi),%r12 + adcq 88(%rsi),%r13 + + movq %r8,32+0(%rsp) + movq %r9,32+8(%rsp) + movq %r10,32+16(%rsp) + movq %r11,32+24(%rsp) + movq %r12,32+32(%rsp) + movq %r13,32+40(%rsp) + + + movq 0(%rdx),%r8 + movq 8(%rdx),%r9 + movq 16(%rdx),%r10 + movq 24(%rdx),%r11 + movq 32(%rdx),%r12 + movq 40(%rdx),%r13 + + addq 48(%rdx),%r8 + adcq 56(%rdx),%r9 + adcq 64(%rdx),%r10 + adcq 72(%rdx),%r11 + adcq 80(%rdx),%r12 + adcq 88(%rdx),%r13 + + movq %r8,32+48(%rsp) + movq %r9,32+56(%rsp) + movq %r10,32+64(%rsp) + movq %r11,32+72(%rsp) + movq %r12,32+80(%rsp) + movq %r13,32+88(%rsp) + + + leaq 32+0(%rsp),%rsi + leaq 32+48(%rsp),%rbx + call __mulq_384 + + + movq 0(%rsp),%rsi + movq 8(%rsp),%rbx + leaq -96(%rdi),%rdi + call __mulq_384 + + + leaq 48(%rsi),%rsi + leaq 48(%rbx),%rbx + leaq 32(%rsp),%rdi + call __mulq_384 + + + movq 16(%rsp),%rsi + leaq 32(%rsp),%rdx + movq 24(%rsp),%rcx + movq %rsi,%rdi + call __subq_mod_384x384 + + + leaq 0(%rdi),%rsi + leaq -96(%rdi),%rdx + call __subq_mod_384x384 + + + leaq -96(%rdi),%rsi + leaq 32(%rsp),%rdx + leaq -96(%rdi),%rdi + call __subq_mod_384x384 + + leaq 136(%rsp),%r8 + movq 0(%r8),%r15 +.cfi_restore %r15 + movq 8(%r8),%r14 +.cfi_restore %r14 + movq 16(%r8),%r13 +.cfi_restore %r13 + movq 24(%r8),%r12 +.cfi_restore %r12 + movq 32(%r8),%rbx +.cfi_restore %rbx + movq 40(%r8),%rbp +.cfi_restore %rbp + leaq 48(%r8),%rsp +.cfi_adjust_cfa_offset -136-8*6 + + .byte 0xf3,0xc3 +.cfi_endproc + +.globl _sqr_382x +.private_extern _sqr_382x + +.p2align 5 +_sqr_382x: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + +#ifdef __BLST_PORTABLE__ + testl $1,___blst_platform_cap(%rip) + jnz sqr_382x$1 +#endif + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + pushq %rsi +.cfi_adjust_cfa_offset 8 + + + movq %rdx,%rcx + + + movq 0(%rsi),%r14 + movq 8(%rsi),%r15 + movq 16(%rsi),%rax + movq 24(%rsi),%rbx + movq 32(%rsi),%rbp + movq 40(%rsi),%rdx + + movq %r14,%r8 + addq 48(%rsi),%r14 + movq %r15,%r9 + adcq 56(%rsi),%r15 + movq %rax,%r10 + adcq 64(%rsi),%rax + movq %rbx,%r11 + adcq 72(%rsi),%rbx + movq %rbp,%r12 + adcq 80(%rsi),%rbp + movq %rdx,%r13 + adcq 88(%rsi),%rdx + + movq %r14,0(%rdi) + movq %r15,8(%rdi) + movq %rax,16(%rdi) + movq %rbx,24(%rdi) + movq %rbp,32(%rdi) + movq %rdx,40(%rdi) + + + leaq 48(%rsi),%rdx + leaq 48(%rdi),%rdi + call __subq_mod_384_a_is_loaded + + + leaq (%rdi),%rsi + leaq -48(%rdi),%rbx + leaq -48(%rdi),%rdi + call __mulq_384 + + + movq (%rsp),%rsi + leaq 48(%rsi),%rbx + leaq 96(%rdi),%rdi + call __mulq_384 + + movq 0(%rdi),%r8 + movq 8(%rdi),%r9 + movq 16(%rdi),%r10 + movq 24(%rdi),%r11 + movq 32(%rdi),%r12 + movq 40(%rdi),%r13 + movq 48(%rdi),%r14 + movq 56(%rdi),%r15 + movq 64(%rdi),%rax + movq 72(%rdi),%rbx + movq 80(%rdi),%rbp + addq %r8,%r8 + movq 88(%rdi),%rdx + adcq %r9,%r9 + movq %r8,0(%rdi) + adcq %r10,%r10 + movq %r9,8(%rdi) + adcq %r11,%r11 + movq %r10,16(%rdi) + adcq %r12,%r12 + movq %r11,24(%rdi) + adcq %r13,%r13 + movq %r12,32(%rdi) + adcq %r14,%r14 + movq %r13,40(%rdi) + adcq %r15,%r15 + movq %r14,48(%rdi) + adcq %rax,%rax + movq %r15,56(%rdi) + adcq %rbx,%rbx + movq %rax,64(%rdi) + adcq %rbp,%rbp + movq %rbx,72(%rdi) + adcq %rdx,%rdx + movq %rbp,80(%rdi) + movq %rdx,88(%rdi) + + movq 8(%rsp),%r15 +.cfi_restore %r15 + movq 16(%rsp),%r14 +.cfi_restore %r14 + movq 24(%rsp),%r13 +.cfi_restore %r13 + movq 32(%rsp),%r12 +.cfi_restore %r12 + movq 40(%rsp),%rbx +.cfi_restore %rbx + movq 48(%rsp),%rbp +.cfi_restore %rbp + leaq 56(%rsp),%rsp +.cfi_adjust_cfa_offset -8*7 + + .byte 0xf3,0xc3 +.cfi_endproc + +.globl _mul_384 +.private_extern _mul_384 + +.p2align 5 +_mul_384: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + +#ifdef __BLST_PORTABLE__ + testl $1,___blst_platform_cap(%rip) + jnz mul_384$1 +#endif + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + + + movq %rdx,%rbx + call __mulq_384 + + movq 0(%rsp),%r12 +.cfi_restore %r12 + movq 8(%rsp),%rbx +.cfi_restore %rbx + movq 16(%rsp),%rbp +.cfi_restore %rbp + leaq 24(%rsp),%rsp +.cfi_adjust_cfa_offset -24 + + .byte 0xf3,0xc3 +.cfi_endproc + + + +.p2align 5 +__mulq_384: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + movq 0(%rbx),%rax + + movq %rax,%rbp + mulq 0(%rsi) + movq %rax,0(%rdi) + movq %rbp,%rax + movq %rdx,%rcx + + mulq 8(%rsi) + addq %rax,%rcx + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%r8 + + mulq 16(%rsi) + addq %rax,%r8 + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%r9 + + mulq 24(%rsi) + addq %rax,%r9 + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%r10 + + mulq 32(%rsi) + addq %rax,%r10 + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%r11 + + mulq 40(%rsi) + addq %rax,%r11 + movq 8(%rbx),%rax + adcq $0,%rdx + movq %rdx,%r12 + movq %rax,%rbp + mulq 0(%rsi) + addq %rax,%rcx + movq %rbp,%rax + adcq $0,%rdx + movq %rcx,8(%rdi) + movq %rdx,%rcx + + mulq 8(%rsi) + addq %rax,%r8 + movq %rbp,%rax + adcq $0,%rdx + addq %r8,%rcx + adcq $0,%rdx + movq %rdx,%r8 + + mulq 16(%rsi) + addq %rax,%r9 + movq %rbp,%rax + adcq $0,%rdx + addq %r9,%r8 + adcq $0,%rdx + movq %rdx,%r9 + + mulq 24(%rsi) + addq %rax,%r10 + movq %rbp,%rax + adcq $0,%rdx + addq %r10,%r9 + adcq $0,%rdx + movq %rdx,%r10 + + mulq 32(%rsi) + addq %rax,%r11 + movq %rbp,%rax + adcq $0,%rdx + addq %r11,%r10 + adcq $0,%rdx + movq %rdx,%r11 + + mulq 40(%rsi) + addq %rax,%r12 + movq 16(%rbx),%rax + adcq $0,%rdx + addq %r12,%r11 + adcq $0,%rdx + movq %rdx,%r12 + movq %rax,%rbp + mulq 0(%rsi) + addq %rax,%rcx + movq %rbp,%rax + adcq $0,%rdx + movq %rcx,16(%rdi) + movq %rdx,%rcx + + mulq 8(%rsi) + addq %rax,%r8 + movq %rbp,%rax + adcq $0,%rdx + addq %r8,%rcx + adcq $0,%rdx + movq %rdx,%r8 + + mulq 16(%rsi) + addq %rax,%r9 + movq %rbp,%rax + adcq $0,%rdx + addq %r9,%r8 + adcq $0,%rdx + movq %rdx,%r9 + + mulq 24(%rsi) + addq %rax,%r10 + movq %rbp,%rax + adcq $0,%rdx + addq %r10,%r9 + adcq $0,%rdx + movq %rdx,%r10 + + mulq 32(%rsi) + addq %rax,%r11 + movq %rbp,%rax + adcq $0,%rdx + addq %r11,%r10 + adcq $0,%rdx + movq %rdx,%r11 + + mulq 40(%rsi) + addq %rax,%r12 + movq 24(%rbx),%rax + adcq $0,%rdx + addq %r12,%r11 + adcq $0,%rdx + movq %rdx,%r12 + movq %rax,%rbp + mulq 0(%rsi) + addq %rax,%rcx + movq %rbp,%rax + adcq $0,%rdx + movq %rcx,24(%rdi) + movq %rdx,%rcx + + mulq 8(%rsi) + addq %rax,%r8 + movq %rbp,%rax + adcq $0,%rdx + addq %r8,%rcx + adcq $0,%rdx + movq %rdx,%r8 + + mulq 16(%rsi) + addq %rax,%r9 + movq %rbp,%rax + adcq $0,%rdx + addq %r9,%r8 + adcq $0,%rdx + movq %rdx,%r9 + + mulq 24(%rsi) + addq %rax,%r10 + movq %rbp,%rax + adcq $0,%rdx + addq %r10,%r9 + adcq $0,%rdx + movq %rdx,%r10 + + mulq 32(%rsi) + addq %rax,%r11 + movq %rbp,%rax + adcq $0,%rdx + addq %r11,%r10 + adcq $0,%rdx + movq %rdx,%r11 + + mulq 40(%rsi) + addq %rax,%r12 + movq 32(%rbx),%rax + adcq $0,%rdx + addq %r12,%r11 + adcq $0,%rdx + movq %rdx,%r12 + movq %rax,%rbp + mulq 0(%rsi) + addq %rax,%rcx + movq %rbp,%rax + adcq $0,%rdx + movq %rcx,32(%rdi) + movq %rdx,%rcx + + mulq 8(%rsi) + addq %rax,%r8 + movq %rbp,%rax + adcq $0,%rdx + addq %r8,%rcx + adcq $0,%rdx + movq %rdx,%r8 + + mulq 16(%rsi) + addq %rax,%r9 + movq %rbp,%rax + adcq $0,%rdx + addq %r9,%r8 + adcq $0,%rdx + movq %rdx,%r9 + + mulq 24(%rsi) + addq %rax,%r10 + movq %rbp,%rax + adcq $0,%rdx + addq %r10,%r9 + adcq $0,%rdx + movq %rdx,%r10 + + mulq 32(%rsi) + addq %rax,%r11 + movq %rbp,%rax + adcq $0,%rdx + addq %r11,%r10 + adcq $0,%rdx + movq %rdx,%r11 + + mulq 40(%rsi) + addq %rax,%r12 + movq 40(%rbx),%rax + adcq $0,%rdx + addq %r12,%r11 + adcq $0,%rdx + movq %rdx,%r12 + movq %rax,%rbp + mulq 0(%rsi) + addq %rax,%rcx + movq %rbp,%rax + adcq $0,%rdx + movq %rcx,40(%rdi) + movq %rdx,%rcx + + mulq 8(%rsi) + addq %rax,%r8 + movq %rbp,%rax + adcq $0,%rdx + addq %r8,%rcx + adcq $0,%rdx + movq %rdx,%r8 + + mulq 16(%rsi) + addq %rax,%r9 + movq %rbp,%rax + adcq $0,%rdx + addq %r9,%r8 + adcq $0,%rdx + movq %rdx,%r9 + + mulq 24(%rsi) + addq %rax,%r10 + movq %rbp,%rax + adcq $0,%rdx + addq %r10,%r9 + adcq $0,%rdx + movq %rdx,%r10 + + mulq 32(%rsi) + addq %rax,%r11 + movq %rbp,%rax + adcq $0,%rdx + addq %r11,%r10 + adcq $0,%rdx + movq %rdx,%r11 + + mulq 40(%rsi) + addq %rax,%r12 + movq %rax,%rax + adcq $0,%rdx + addq %r12,%r11 + adcq $0,%rdx + movq %rdx,%r12 + movq %rcx,48(%rdi) + movq %r8,56(%rdi) + movq %r9,64(%rdi) + movq %r10,72(%rdi) + movq %r11,80(%rdi) + movq %r12,88(%rdi) + + .byte 0xf3,0xc3 +.cfi_endproc + +.globl _sqr_384 +.private_extern _sqr_384 + +.p2align 5 +_sqr_384: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + +#ifdef __BLST_PORTABLE__ + testl $1,___blst_platform_cap(%rip) + jnz sqr_384$1 +#endif + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + subq $8,%rsp +.cfi_adjust_cfa_offset 8 + + + call __sqrq_384 + + movq 8(%rsp),%r15 +.cfi_restore %r15 + movq 16(%rsp),%r14 +.cfi_restore %r14 + movq 24(%rsp),%r13 +.cfi_restore %r13 + movq 32(%rsp),%r12 +.cfi_restore %r12 + movq 40(%rsp),%rbx +.cfi_restore %rbx + movq 48(%rsp),%rbp +.cfi_restore %rbp + leaq 56(%rsp),%rsp +.cfi_adjust_cfa_offset -56 + + .byte 0xf3,0xc3 +.cfi_endproc + + + +.p2align 5 +__sqrq_384: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + movq 0(%rsi),%rax + movq 8(%rsi),%r15 + movq 16(%rsi),%rcx + movq 24(%rsi),%rbx + + + movq %rax,%r14 + mulq %r15 + movq %rax,%r9 + movq %r14,%rax + movq 32(%rsi),%rbp + movq %rdx,%r10 + + mulq %rcx + addq %rax,%r10 + movq %r14,%rax + adcq $0,%rdx + movq 40(%rsi),%rsi + movq %rdx,%r11 + + mulq %rbx + addq %rax,%r11 + movq %r14,%rax + adcq $0,%rdx + movq %rdx,%r12 + + mulq %rbp + addq %rax,%r12 + movq %r14,%rax + adcq $0,%rdx + movq %rdx,%r13 + + mulq %rsi + addq %rax,%r13 + movq %r14,%rax + adcq $0,%rdx + movq %rdx,%r14 + + mulq %rax + xorq %r8,%r8 + movq %rax,0(%rdi) + movq %r15,%rax + addq %r9,%r9 + adcq $0,%r8 + addq %rdx,%r9 + adcq $0,%r8 + movq %r9,8(%rdi) + + mulq %rcx + addq %rax,%r11 + movq %r15,%rax + adcq $0,%rdx + movq %rdx,%r9 + + mulq %rbx + addq %rax,%r12 + movq %r15,%rax + adcq $0,%rdx + addq %r9,%r12 + adcq $0,%rdx + movq %rdx,%r9 + + mulq %rbp + addq %rax,%r13 + movq %r15,%rax + adcq $0,%rdx + addq %r9,%r13 + adcq $0,%rdx + movq %rdx,%r9 + + mulq %rsi + addq %rax,%r14 + movq %r15,%rax + adcq $0,%rdx + addq %r9,%r14 + adcq $0,%rdx + movq %rdx,%r15 + + mulq %rax + xorq %r9,%r9 + addq %rax,%r8 + movq %rcx,%rax + addq %r10,%r10 + adcq %r11,%r11 + adcq $0,%r9 + addq %r8,%r10 + adcq %rdx,%r11 + adcq $0,%r9 + movq %r10,16(%rdi) + + mulq %rbx + addq %rax,%r13 + movq %rcx,%rax + adcq $0,%rdx + movq %r11,24(%rdi) + movq %rdx,%r8 + + mulq %rbp + addq %rax,%r14 + movq %rcx,%rax + adcq $0,%rdx + addq %r8,%r14 + adcq $0,%rdx + movq %rdx,%r8 + + mulq %rsi + addq %rax,%r15 + movq %rcx,%rax + adcq $0,%rdx + addq %r8,%r15 + adcq $0,%rdx + movq %rdx,%rcx + + mulq %rax + xorq %r11,%r11 + addq %rax,%r9 + movq %rbx,%rax + addq %r12,%r12 + adcq %r13,%r13 + adcq $0,%r11 + addq %r9,%r12 + adcq %rdx,%r13 + adcq $0,%r11 + movq %r12,32(%rdi) + + + mulq %rbp + addq %rax,%r15 + movq %rbx,%rax + adcq $0,%rdx + movq %r13,40(%rdi) + movq %rdx,%r8 + + mulq %rsi + addq %rax,%rcx + movq %rbx,%rax + adcq $0,%rdx + addq %r8,%rcx + adcq $0,%rdx + movq %rdx,%rbx + + mulq %rax + xorq %r12,%r12 + addq %rax,%r11 + movq %rbp,%rax + addq %r14,%r14 + adcq %r15,%r15 + adcq $0,%r12 + addq %r11,%r14 + adcq %rdx,%r15 + movq %r14,48(%rdi) + adcq $0,%r12 + movq %r15,56(%rdi) + + + mulq %rsi + addq %rax,%rbx + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%rbp + + mulq %rax + xorq %r13,%r13 + addq %rax,%r12 + movq %rsi,%rax + addq %rcx,%rcx + adcq %rbx,%rbx + adcq $0,%r13 + addq %r12,%rcx + adcq %rdx,%rbx + movq %rcx,64(%rdi) + adcq $0,%r13 + movq %rbx,72(%rdi) + + + mulq %rax + addq %r13,%rax + addq %rbp,%rbp + adcq $0,%rdx + addq %rbp,%rax + adcq $0,%rdx + movq %rax,80(%rdi) + movq %rdx,88(%rdi) + + .byte 0xf3,0xc3 +.cfi_endproc + + +.globl _sqr_mont_384 +.private_extern _sqr_mont_384 + +.p2align 5 +_sqr_mont_384: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + +#ifdef __BLST_PORTABLE__ + testl $1,___blst_platform_cap(%rip) + jnz sqr_mont_384$1 +#endif + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + subq $120,%rsp +.cfi_adjust_cfa_offset 8*15 + + + movq %rcx,96(%rsp) + movq %rdx,104(%rsp) + movq %rdi,112(%rsp) + + movq %rsp,%rdi + call __sqrq_384 + + leaq 0(%rsp),%rsi + movq 96(%rsp),%rcx + movq 104(%rsp),%rbx + movq 112(%rsp),%rdi + call __mulq_by_1_mont_384 + call __redq_tail_mont_384 + + leaq 120(%rsp),%r8 + movq 120(%rsp),%r15 +.cfi_restore %r15 + movq 8(%r8),%r14 +.cfi_restore %r14 + movq 16(%r8),%r13 +.cfi_restore %r13 + movq 24(%r8),%r12 +.cfi_restore %r12 + movq 32(%r8),%rbx +.cfi_restore %rbx + movq 40(%r8),%rbp +.cfi_restore %rbp + leaq 48(%r8),%rsp +.cfi_adjust_cfa_offset -8*21 + + .byte 0xf3,0xc3 +.cfi_endproc + + + + +.globl _redc_mont_384 +.private_extern _redc_mont_384 + +.p2align 5 +_redc_mont_384: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + +#ifdef __BLST_PORTABLE__ + testl $1,___blst_platform_cap(%rip) + jnz redc_mont_384$1 +#endif + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + subq $8,%rsp +.cfi_adjust_cfa_offset 8 + + + movq %rdx,%rbx + call __mulq_by_1_mont_384 + call __redq_tail_mont_384 + + movq 8(%rsp),%r15 +.cfi_restore %r15 + movq 16(%rsp),%r14 +.cfi_restore %r14 + movq 24(%rsp),%r13 +.cfi_restore %r13 + movq 32(%rsp),%r12 +.cfi_restore %r12 + movq 40(%rsp),%rbx +.cfi_restore %rbx + movq 48(%rsp),%rbp +.cfi_restore %rbp + leaq 56(%rsp),%rsp +.cfi_adjust_cfa_offset -56 + + .byte 0xf3,0xc3 +.cfi_endproc + + + + + +.globl _from_mont_384 +.private_extern _from_mont_384 + +.p2align 5 +_from_mont_384: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + +#ifdef __BLST_PORTABLE__ + testl $1,___blst_platform_cap(%rip) + jnz from_mont_384$1 +#endif + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + subq $8,%rsp +.cfi_adjust_cfa_offset 8 + + + movq %rdx,%rbx + call __mulq_by_1_mont_384 + + + + + + movq %r15,%rcx + movq %r8,%rdx + movq %r9,%rbp + + subq 0(%rbx),%r14 + sbbq 8(%rbx),%r15 + movq %r10,%r13 + sbbq 16(%rbx),%r8 + sbbq 24(%rbx),%r9 + sbbq 32(%rbx),%r10 + movq %r11,%rsi + sbbq 40(%rbx),%r11 + + cmovcq %rax,%r14 + cmovcq %rcx,%r15 + cmovcq %rdx,%r8 + movq %r14,0(%rdi) + cmovcq %rbp,%r9 + movq %r15,8(%rdi) + cmovcq %r13,%r10 + movq %r8,16(%rdi) + cmovcq %rsi,%r11 + movq %r9,24(%rdi) + movq %r10,32(%rdi) + movq %r11,40(%rdi) + + movq 8(%rsp),%r15 +.cfi_restore %r15 + movq 16(%rsp),%r14 +.cfi_restore %r14 + movq 24(%rsp),%r13 +.cfi_restore %r13 + movq 32(%rsp),%r12 +.cfi_restore %r12 + movq 40(%rsp),%rbx +.cfi_restore %rbx + movq 48(%rsp),%rbp +.cfi_restore %rbp + leaq 56(%rsp),%rsp +.cfi_adjust_cfa_offset -56 + + .byte 0xf3,0xc3 +.cfi_endproc + + +.p2align 5 +__mulq_by_1_mont_384: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + movq 0(%rsi),%rax + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + + movq %rax,%r14 + imulq %rcx,%rax + movq %rax,%r8 + + mulq 0(%rbx) + addq %rax,%r14 + movq %r8,%rax + adcq %rdx,%r14 + + mulq 8(%rbx) + addq %rax,%r9 + movq %r8,%rax + adcq $0,%rdx + addq %r14,%r9 + adcq $0,%rdx + movq %rdx,%r14 + + mulq 16(%rbx) + addq %rax,%r10 + movq %r8,%rax + adcq $0,%rdx + addq %r14,%r10 + adcq $0,%rdx + movq %rdx,%r14 + + mulq 24(%rbx) + addq %rax,%r11 + movq %r8,%rax + adcq $0,%rdx + movq %r9,%r15 + imulq %rcx,%r9 + addq %r14,%r11 + adcq $0,%rdx + movq %rdx,%r14 + + mulq 32(%rbx) + addq %rax,%r12 + movq %r8,%rax + adcq $0,%rdx + addq %r14,%r12 + adcq $0,%rdx + movq %rdx,%r14 + + mulq 40(%rbx) + addq %rax,%r13 + movq %r9,%rax + adcq $0,%rdx + addq %r14,%r13 + adcq $0,%rdx + movq %rdx,%r14 + + mulq 0(%rbx) + addq %rax,%r15 + movq %r9,%rax + adcq %rdx,%r15 + + mulq 8(%rbx) + addq %rax,%r10 + movq %r9,%rax + adcq $0,%rdx + addq %r15,%r10 + adcq $0,%rdx + movq %rdx,%r15 + + mulq 16(%rbx) + addq %rax,%r11 + movq %r9,%rax + adcq $0,%rdx + addq %r15,%r11 + adcq $0,%rdx + movq %rdx,%r15 + + mulq 24(%rbx) + addq %rax,%r12 + movq %r9,%rax + adcq $0,%rdx + movq %r10,%r8 + imulq %rcx,%r10 + addq %r15,%r12 + adcq $0,%rdx + movq %rdx,%r15 + + mulq 32(%rbx) + addq %rax,%r13 + movq %r9,%rax + adcq $0,%rdx + addq %r15,%r13 + adcq $0,%rdx + movq %rdx,%r15 + + mulq 40(%rbx) + addq %rax,%r14 + movq %r10,%rax + adcq $0,%rdx + addq %r15,%r14 + adcq $0,%rdx + movq %rdx,%r15 + + mulq 0(%rbx) + addq %rax,%r8 + movq %r10,%rax + adcq %rdx,%r8 + + mulq 8(%rbx) + addq %rax,%r11 + movq %r10,%rax + adcq $0,%rdx + addq %r8,%r11 + adcq $0,%rdx + movq %rdx,%r8 + + mulq 16(%rbx) + addq %rax,%r12 + movq %r10,%rax + adcq $0,%rdx + addq %r8,%r12 + adcq $0,%rdx + movq %rdx,%r8 + + mulq 24(%rbx) + addq %rax,%r13 + movq %r10,%rax + adcq $0,%rdx + movq %r11,%r9 + imulq %rcx,%r11 + addq %r8,%r13 + adcq $0,%rdx + movq %rdx,%r8 + + mulq 32(%rbx) + addq %rax,%r14 + movq %r10,%rax + adcq $0,%rdx + addq %r8,%r14 + adcq $0,%rdx + movq %rdx,%r8 + + mulq 40(%rbx) + addq %rax,%r15 + movq %r11,%rax + adcq $0,%rdx + addq %r8,%r15 + adcq $0,%rdx + movq %rdx,%r8 + + mulq 0(%rbx) + addq %rax,%r9 + movq %r11,%rax + adcq %rdx,%r9 + + mulq 8(%rbx) + addq %rax,%r12 + movq %r11,%rax + adcq $0,%rdx + addq %r9,%r12 + adcq $0,%rdx + movq %rdx,%r9 + + mulq 16(%rbx) + addq %rax,%r13 + movq %r11,%rax + adcq $0,%rdx + addq %r9,%r13 + adcq $0,%rdx + movq %rdx,%r9 + + mulq 24(%rbx) + addq %rax,%r14 + movq %r11,%rax + adcq $0,%rdx + movq %r12,%r10 + imulq %rcx,%r12 + addq %r9,%r14 + adcq $0,%rdx + movq %rdx,%r9 + + mulq 32(%rbx) + addq %rax,%r15 + movq %r11,%rax + adcq $0,%rdx + addq %r9,%r15 + adcq $0,%rdx + movq %rdx,%r9 + + mulq 40(%rbx) + addq %rax,%r8 + movq %r12,%rax + adcq $0,%rdx + addq %r9,%r8 + adcq $0,%rdx + movq %rdx,%r9 + + mulq 0(%rbx) + addq %rax,%r10 + movq %r12,%rax + adcq %rdx,%r10 + + mulq 8(%rbx) + addq %rax,%r13 + movq %r12,%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %rdx,%r10 + + mulq 16(%rbx) + addq %rax,%r14 + movq %r12,%rax + adcq $0,%rdx + addq %r10,%r14 + adcq $0,%rdx + movq %rdx,%r10 + + mulq 24(%rbx) + addq %rax,%r15 + movq %r12,%rax + adcq $0,%rdx + movq %r13,%r11 + imulq %rcx,%r13 + addq %r10,%r15 + adcq $0,%rdx + movq %rdx,%r10 + + mulq 32(%rbx) + addq %rax,%r8 + movq %r12,%rax + adcq $0,%rdx + addq %r10,%r8 + adcq $0,%rdx + movq %rdx,%r10 + + mulq 40(%rbx) + addq %rax,%r9 + movq %r13,%rax + adcq $0,%rdx + addq %r10,%r9 + adcq $0,%rdx + movq %rdx,%r10 + + mulq 0(%rbx) + addq %rax,%r11 + movq %r13,%rax + adcq %rdx,%r11 + + mulq 8(%rbx) + addq %rax,%r14 + movq %r13,%rax + adcq $0,%rdx + addq %r11,%r14 + adcq $0,%rdx + movq %rdx,%r11 + + mulq 16(%rbx) + addq %rax,%r15 + movq %r13,%rax + adcq $0,%rdx + addq %r11,%r15 + adcq $0,%rdx + movq %rdx,%r11 + + mulq 24(%rbx) + addq %rax,%r8 + movq %r13,%rax + adcq $0,%rdx + addq %r11,%r8 + adcq $0,%rdx + movq %rdx,%r11 + + mulq 32(%rbx) + addq %rax,%r9 + movq %r13,%rax + adcq $0,%rdx + addq %r11,%r9 + adcq $0,%rdx + movq %rdx,%r11 + + mulq 40(%rbx) + addq %rax,%r10 + movq %r14,%rax + adcq $0,%rdx + addq %r11,%r10 + adcq $0,%rdx + movq %rdx,%r11 + .byte 0xf3,0xc3 +.cfi_endproc + + + +.p2align 5 +__redq_tail_mont_384: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + addq 48(%rsi),%r14 + movq %r14,%rax + adcq 56(%rsi),%r15 + adcq 64(%rsi),%r8 + adcq 72(%rsi),%r9 + movq %r15,%rcx + adcq 80(%rsi),%r10 + adcq 88(%rsi),%r11 + sbbq %r12,%r12 + + + + + movq %r8,%rdx + movq %r9,%rbp + + subq 0(%rbx),%r14 + sbbq 8(%rbx),%r15 + movq %r10,%r13 + sbbq 16(%rbx),%r8 + sbbq 24(%rbx),%r9 + sbbq 32(%rbx),%r10 + movq %r11,%rsi + sbbq 40(%rbx),%r11 + sbbq $0,%r12 + + cmovcq %rax,%r14 + cmovcq %rcx,%r15 + cmovcq %rdx,%r8 + movq %r14,0(%rdi) + cmovcq %rbp,%r9 + movq %r15,8(%rdi) + cmovcq %r13,%r10 + movq %r8,16(%rdi) + cmovcq %rsi,%r11 + movq %r9,24(%rdi) + movq %r10,32(%rdi) + movq %r11,40(%rdi) + + .byte 0xf3,0xc3 +.cfi_endproc + + +.globl _sgn0_pty_mont_384 +.private_extern _sgn0_pty_mont_384 + +.p2align 5 +_sgn0_pty_mont_384: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + +#ifdef __BLST_PORTABLE__ + testl $1,___blst_platform_cap(%rip) + jnz sgn0_pty_mont_384$1 +#endif + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + subq $8,%rsp +.cfi_adjust_cfa_offset 8 + + + movq %rsi,%rbx + leaq 0(%rdi),%rsi + movq %rdx,%rcx + call __mulq_by_1_mont_384 + + xorq %rax,%rax + movq %r14,%r13 + addq %r14,%r14 + adcq %r15,%r15 + adcq %r8,%r8 + adcq %r9,%r9 + adcq %r10,%r10 + adcq %r11,%r11 + adcq $0,%rax + + subq 0(%rbx),%r14 + sbbq 8(%rbx),%r15 + sbbq 16(%rbx),%r8 + sbbq 24(%rbx),%r9 + sbbq 32(%rbx),%r10 + sbbq 40(%rbx),%r11 + sbbq $0,%rax + + notq %rax + andq $1,%r13 + andq $2,%rax + orq %r13,%rax + + movq 8(%rsp),%r15 +.cfi_restore %r15 + movq 16(%rsp),%r14 +.cfi_restore %r14 + movq 24(%rsp),%r13 +.cfi_restore %r13 + movq 32(%rsp),%r12 +.cfi_restore %r12 + movq 40(%rsp),%rbx +.cfi_restore %rbx + movq 48(%rsp),%rbp +.cfi_restore %rbp + leaq 56(%rsp),%rsp +.cfi_adjust_cfa_offset -56 + + .byte 0xf3,0xc3 +.cfi_endproc + + +.globl _sgn0_pty_mont_384x +.private_extern _sgn0_pty_mont_384x + +.p2align 5 +_sgn0_pty_mont_384x: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + +#ifdef __BLST_PORTABLE__ + testl $1,___blst_platform_cap(%rip) + jnz sgn0_pty_mont_384x$1 +#endif + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + subq $8,%rsp +.cfi_adjust_cfa_offset 8 + + + movq %rsi,%rbx + leaq 48(%rdi),%rsi + movq %rdx,%rcx + call __mulq_by_1_mont_384 + + movq %r14,%r12 + orq %r15,%r14 + orq %r8,%r14 + orq %r9,%r14 + orq %r10,%r14 + orq %r11,%r14 + + leaq 0(%rdi),%rsi + xorq %rdi,%rdi + movq %r12,%r13 + addq %r12,%r12 + adcq %r15,%r15 + adcq %r8,%r8 + adcq %r9,%r9 + adcq %r10,%r10 + adcq %r11,%r11 + adcq $0,%rdi + + subq 0(%rbx),%r12 + sbbq 8(%rbx),%r15 + sbbq 16(%rbx),%r8 + sbbq 24(%rbx),%r9 + sbbq 32(%rbx),%r10 + sbbq 40(%rbx),%r11 + sbbq $0,%rdi + + movq %r14,0(%rsp) + notq %rdi + andq $1,%r13 + andq $2,%rdi + orq %r13,%rdi + + call __mulq_by_1_mont_384 + + movq %r14,%r12 + orq %r15,%r14 + orq %r8,%r14 + orq %r9,%r14 + orq %r10,%r14 + orq %r11,%r14 + + xorq %rax,%rax + movq %r12,%r13 + addq %r12,%r12 + adcq %r15,%r15 + adcq %r8,%r8 + adcq %r9,%r9 + adcq %r10,%r10 + adcq %r11,%r11 + adcq $0,%rax + + subq 0(%rbx),%r12 + sbbq 8(%rbx),%r15 + sbbq 16(%rbx),%r8 + sbbq 24(%rbx),%r9 + sbbq 32(%rbx),%r10 + sbbq 40(%rbx),%r11 + sbbq $0,%rax + + movq 0(%rsp),%r12 + + notq %rax + + testq %r14,%r14 + cmovzq %rdi,%r13 + + testq %r12,%r12 + cmovnzq %rdi,%rax + + andq $1,%r13 + andq $2,%rax + orq %r13,%rax + + movq 8(%rsp),%r15 +.cfi_restore %r15 + movq 16(%rsp),%r14 +.cfi_restore %r14 + movq 24(%rsp),%r13 +.cfi_restore %r13 + movq 32(%rsp),%r12 +.cfi_restore %r12 + movq 40(%rsp),%rbx +.cfi_restore %rbx + movq 48(%rsp),%rbp +.cfi_restore %rbp + leaq 56(%rsp),%rsp +.cfi_adjust_cfa_offset -56 + + .byte 0xf3,0xc3 +.cfi_endproc + +.globl _mul_mont_384 +.private_extern _mul_mont_384 + +.p2align 5 +_mul_mont_384: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + +#ifdef __BLST_PORTABLE__ + testl $1,___blst_platform_cap(%rip) + jnz mul_mont_384$1 +#endif + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + subq $24,%rsp +.cfi_adjust_cfa_offset 8*3 + + + movq 0(%rdx),%rax + movq 0(%rsi),%r14 + movq 8(%rsi),%r15 + movq 16(%rsi),%r12 + movq 24(%rsi),%r13 + movq %rdx,%rbx + movq %r8,0(%rsp) + movq %rdi,8(%rsp) + + call __mulq_mont_384 + + movq 24(%rsp),%r15 +.cfi_restore %r15 + movq 32(%rsp),%r14 +.cfi_restore %r14 + movq 40(%rsp),%r13 +.cfi_restore %r13 + movq 48(%rsp),%r12 +.cfi_restore %r12 + movq 56(%rsp),%rbx +.cfi_restore %rbx + movq 64(%rsp),%rbp +.cfi_restore %rbp + leaq 72(%rsp),%rsp +.cfi_adjust_cfa_offset -72 + + .byte 0xf3,0xc3 +.cfi_endproc + + +.p2align 5 +__mulq_mont_384: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + movq %rax,%rdi + mulq %r14 + movq %rax,%r8 + movq %rdi,%rax + movq %rdx,%r9 + + mulq %r15 + addq %rax,%r9 + movq %rdi,%rax + adcq $0,%rdx + movq %rdx,%r10 + + mulq %r12 + addq %rax,%r10 + movq %rdi,%rax + adcq $0,%rdx + movq %rdx,%r11 + + movq %r8,%rbp + imulq 8(%rsp),%r8 + + mulq %r13 + addq %rax,%r11 + movq %rdi,%rax + adcq $0,%rdx + movq %rdx,%r12 + + mulq 32(%rsi) + addq %rax,%r12 + movq %rdi,%rax + adcq $0,%rdx + movq %rdx,%r13 + + mulq 40(%rsi) + addq %rax,%r13 + movq %r8,%rax + adcq $0,%rdx + xorq %r15,%r15 + movq %rdx,%r14 + + mulq 0(%rcx) + addq %rax,%rbp + movq %r8,%rax + adcq %rdx,%rbp + + mulq 8(%rcx) + addq %rax,%r9 + movq %r8,%rax + adcq $0,%rdx + addq %rbp,%r9 + adcq $0,%rdx + movq %rdx,%rbp + + mulq 16(%rcx) + addq %rax,%r10 + movq %r8,%rax + adcq $0,%rdx + addq %rbp,%r10 + adcq $0,%rdx + movq %rdx,%rbp + + mulq 24(%rcx) + addq %rbp,%r11 + adcq $0,%rdx + addq %rax,%r11 + movq %r8,%rax + adcq $0,%rdx + movq %rdx,%rbp + + mulq 32(%rcx) + addq %rax,%r12 + movq %r8,%rax + adcq $0,%rdx + addq %rbp,%r12 + adcq $0,%rdx + movq %rdx,%rbp + + mulq 40(%rcx) + addq %rax,%r13 + movq 8(%rbx),%rax + adcq $0,%rdx + addq %rbp,%r13 + adcq %rdx,%r14 + adcq $0,%r15 + + movq %rax,%rdi + mulq 0(%rsi) + addq %rax,%r9 + movq %rdi,%rax + adcq $0,%rdx + movq %rdx,%r8 + + mulq 8(%rsi) + addq %rax,%r10 + movq %rdi,%rax + adcq $0,%rdx + addq %r8,%r10 + adcq $0,%rdx + movq %rdx,%r8 + + mulq 16(%rsi) + addq %rax,%r11 + movq %rdi,%rax + adcq $0,%rdx + addq %r8,%r11 + adcq $0,%rdx + movq %rdx,%r8 + + movq %r9,%rbp + imulq 8(%rsp),%r9 + + mulq 24(%rsi) + addq %rax,%r12 + movq %rdi,%rax + adcq $0,%rdx + addq %r8,%r12 + adcq $0,%rdx + movq %rdx,%r8 + + mulq 32(%rsi) + addq %rax,%r13 + movq %rdi,%rax + adcq $0,%rdx + addq %r8,%r13 + adcq $0,%rdx + movq %rdx,%r8 + + mulq 40(%rsi) + addq %r8,%r14 + adcq $0,%rdx + xorq %r8,%r8 + addq %rax,%r14 + movq %r9,%rax + adcq %rdx,%r15 + adcq $0,%r8 + + mulq 0(%rcx) + addq %rax,%rbp + movq %r9,%rax + adcq %rdx,%rbp + + mulq 8(%rcx) + addq %rax,%r10 + movq %r9,%rax + adcq $0,%rdx + addq %rbp,%r10 + adcq $0,%rdx + movq %rdx,%rbp + + mulq 16(%rcx) + addq %rax,%r11 + movq %r9,%rax + adcq $0,%rdx + addq %rbp,%r11 + adcq $0,%rdx + movq %rdx,%rbp + + mulq 24(%rcx) + addq %rbp,%r12 + adcq $0,%rdx + addq %rax,%r12 + movq %r9,%rax + adcq $0,%rdx + movq %rdx,%rbp + + mulq 32(%rcx) + addq %rax,%r13 + movq %r9,%rax + adcq $0,%rdx + addq %rbp,%r13 + adcq $0,%rdx + movq %rdx,%rbp + + mulq 40(%rcx) + addq %rax,%r14 + movq 16(%rbx),%rax + adcq $0,%rdx + addq %rbp,%r14 + adcq %rdx,%r15 + adcq $0,%r8 + + movq %rax,%rdi + mulq 0(%rsi) + addq %rax,%r10 + movq %rdi,%rax + adcq $0,%rdx + movq %rdx,%r9 + + mulq 8(%rsi) + addq %rax,%r11 + movq %rdi,%rax + adcq $0,%rdx + addq %r9,%r11 + adcq $0,%rdx + movq %rdx,%r9 + + mulq 16(%rsi) + addq %rax,%r12 + movq %rdi,%rax + adcq $0,%rdx + addq %r9,%r12 + adcq $0,%rdx + movq %rdx,%r9 + + movq %r10,%rbp + imulq 8(%rsp),%r10 + + mulq 24(%rsi) + addq %rax,%r13 + movq %rdi,%rax + adcq $0,%rdx + addq %r9,%r13 + adcq $0,%rdx + movq %rdx,%r9 + + mulq 32(%rsi) + addq %rax,%r14 + movq %rdi,%rax + adcq $0,%rdx + addq %r9,%r14 + adcq $0,%rdx + movq %rdx,%r9 + + mulq 40(%rsi) + addq %r9,%r15 + adcq $0,%rdx + xorq %r9,%r9 + addq %rax,%r15 + movq %r10,%rax + adcq %rdx,%r8 + adcq $0,%r9 + + mulq 0(%rcx) + addq %rax,%rbp + movq %r10,%rax + adcq %rdx,%rbp + + mulq 8(%rcx) + addq %rax,%r11 + movq %r10,%rax + adcq $0,%rdx + addq %rbp,%r11 + adcq $0,%rdx + movq %rdx,%rbp + + mulq 16(%rcx) + addq %rax,%r12 + movq %r10,%rax + adcq $0,%rdx + addq %rbp,%r12 + adcq $0,%rdx + movq %rdx,%rbp + + mulq 24(%rcx) + addq %rbp,%r13 + adcq $0,%rdx + addq %rax,%r13 + movq %r10,%rax + adcq $0,%rdx + movq %rdx,%rbp + + mulq 32(%rcx) + addq %rax,%r14 + movq %r10,%rax + adcq $0,%rdx + addq %rbp,%r14 + adcq $0,%rdx + movq %rdx,%rbp + + mulq 40(%rcx) + addq %rax,%r15 + movq 24(%rbx),%rax + adcq $0,%rdx + addq %rbp,%r15 + adcq %rdx,%r8 + adcq $0,%r9 + + movq %rax,%rdi + mulq 0(%rsi) + addq %rax,%r11 + movq %rdi,%rax + adcq $0,%rdx + movq %rdx,%r10 + + mulq 8(%rsi) + addq %rax,%r12 + movq %rdi,%rax + adcq $0,%rdx + addq %r10,%r12 + adcq $0,%rdx + movq %rdx,%r10 + + mulq 16(%rsi) + addq %rax,%r13 + movq %rdi,%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %rdx,%r10 + + movq %r11,%rbp + imulq 8(%rsp),%r11 + + mulq 24(%rsi) + addq %rax,%r14 + movq %rdi,%rax + adcq $0,%rdx + addq %r10,%r14 + adcq $0,%rdx + movq %rdx,%r10 + + mulq 32(%rsi) + addq %rax,%r15 + movq %rdi,%rax + adcq $0,%rdx + addq %r10,%r15 + adcq $0,%rdx + movq %rdx,%r10 + + mulq 40(%rsi) + addq %r10,%r8 + adcq $0,%rdx + xorq %r10,%r10 + addq %rax,%r8 + movq %r11,%rax + adcq %rdx,%r9 + adcq $0,%r10 + + mulq 0(%rcx) + addq %rax,%rbp + movq %r11,%rax + adcq %rdx,%rbp + + mulq 8(%rcx) + addq %rax,%r12 + movq %r11,%rax + adcq $0,%rdx + addq %rbp,%r12 + adcq $0,%rdx + movq %rdx,%rbp + + mulq 16(%rcx) + addq %rax,%r13 + movq %r11,%rax + adcq $0,%rdx + addq %rbp,%r13 + adcq $0,%rdx + movq %rdx,%rbp + + mulq 24(%rcx) + addq %rbp,%r14 + adcq $0,%rdx + addq %rax,%r14 + movq %r11,%rax + adcq $0,%rdx + movq %rdx,%rbp + + mulq 32(%rcx) + addq %rax,%r15 + movq %r11,%rax + adcq $0,%rdx + addq %rbp,%r15 + adcq $0,%rdx + movq %rdx,%rbp + + mulq 40(%rcx) + addq %rax,%r8 + movq 32(%rbx),%rax + adcq $0,%rdx + addq %rbp,%r8 + adcq %rdx,%r9 + adcq $0,%r10 + + movq %rax,%rdi + mulq 0(%rsi) + addq %rax,%r12 + movq %rdi,%rax + adcq $0,%rdx + movq %rdx,%r11 + + mulq 8(%rsi) + addq %rax,%r13 + movq %rdi,%rax + adcq $0,%rdx + addq %r11,%r13 + adcq $0,%rdx + movq %rdx,%r11 + + mulq 16(%rsi) + addq %rax,%r14 + movq %rdi,%rax + adcq $0,%rdx + addq %r11,%r14 + adcq $0,%rdx + movq %rdx,%r11 + + movq %r12,%rbp + imulq 8(%rsp),%r12 + + mulq 24(%rsi) + addq %rax,%r15 + movq %rdi,%rax + adcq $0,%rdx + addq %r11,%r15 + adcq $0,%rdx + movq %rdx,%r11 + + mulq 32(%rsi) + addq %rax,%r8 + movq %rdi,%rax + adcq $0,%rdx + addq %r11,%r8 + adcq $0,%rdx + movq %rdx,%r11 + + mulq 40(%rsi) + addq %r11,%r9 + adcq $0,%rdx + xorq %r11,%r11 + addq %rax,%r9 + movq %r12,%rax + adcq %rdx,%r10 + adcq $0,%r11 + + mulq 0(%rcx) + addq %rax,%rbp + movq %r12,%rax + adcq %rdx,%rbp + + mulq 8(%rcx) + addq %rax,%r13 + movq %r12,%rax + adcq $0,%rdx + addq %rbp,%r13 + adcq $0,%rdx + movq %rdx,%rbp + + mulq 16(%rcx) + addq %rax,%r14 + movq %r12,%rax + adcq $0,%rdx + addq %rbp,%r14 + adcq $0,%rdx + movq %rdx,%rbp + + mulq 24(%rcx) + addq %rbp,%r15 + adcq $0,%rdx + addq %rax,%r15 + movq %r12,%rax + adcq $0,%rdx + movq %rdx,%rbp + + mulq 32(%rcx) + addq %rax,%r8 + movq %r12,%rax + adcq $0,%rdx + addq %rbp,%r8 + adcq $0,%rdx + movq %rdx,%rbp + + mulq 40(%rcx) + addq %rax,%r9 + movq 40(%rbx),%rax + adcq $0,%rdx + addq %rbp,%r9 + adcq %rdx,%r10 + adcq $0,%r11 + + movq %rax,%rdi + mulq 0(%rsi) + addq %rax,%r13 + movq %rdi,%rax + adcq $0,%rdx + movq %rdx,%r12 + + mulq 8(%rsi) + addq %rax,%r14 + movq %rdi,%rax + adcq $0,%rdx + addq %r12,%r14 + adcq $0,%rdx + movq %rdx,%r12 + + mulq 16(%rsi) + addq %rax,%r15 + movq %rdi,%rax + adcq $0,%rdx + addq %r12,%r15 + adcq $0,%rdx + movq %rdx,%r12 + + movq %r13,%rbp + imulq 8(%rsp),%r13 + + mulq 24(%rsi) + addq %rax,%r8 + movq %rdi,%rax + adcq $0,%rdx + addq %r12,%r8 + adcq $0,%rdx + movq %rdx,%r12 + + mulq 32(%rsi) + addq %rax,%r9 + movq %rdi,%rax + adcq $0,%rdx + addq %r12,%r9 + adcq $0,%rdx + movq %rdx,%r12 + + mulq 40(%rsi) + addq %r12,%r10 + adcq $0,%rdx + xorq %r12,%r12 + addq %rax,%r10 + movq %r13,%rax + adcq %rdx,%r11 + adcq $0,%r12 + + mulq 0(%rcx) + addq %rax,%rbp + movq %r13,%rax + adcq %rdx,%rbp + + mulq 8(%rcx) + addq %rax,%r14 + movq %r13,%rax + adcq $0,%rdx + addq %rbp,%r14 + adcq $0,%rdx + movq %rdx,%rbp + + mulq 16(%rcx) + addq %rax,%r15 + movq %r13,%rax + adcq $0,%rdx + addq %rbp,%r15 + adcq $0,%rdx + movq %rdx,%rbp + + mulq 24(%rcx) + addq %rbp,%r8 + adcq $0,%rdx + addq %rax,%r8 + movq %r13,%rax + adcq $0,%rdx + movq %rdx,%rbp + + mulq 32(%rcx) + addq %rax,%r9 + movq %r13,%rax + adcq $0,%rdx + addq %rbp,%r9 + adcq $0,%rdx + movq %rdx,%rbp + + mulq 40(%rcx) + addq %rax,%r10 + movq %r14,%rax + adcq $0,%rdx + addq %rbp,%r10 + adcq %rdx,%r11 + adcq $0,%r12 + + + + + movq 16(%rsp),%rdi + subq 0(%rcx),%r14 + movq %r15,%rdx + sbbq 8(%rcx),%r15 + movq %r8,%rbx + sbbq 16(%rcx),%r8 + movq %r9,%rsi + sbbq 24(%rcx),%r9 + movq %r10,%rbp + sbbq 32(%rcx),%r10 + movq %r11,%r13 + sbbq 40(%rcx),%r11 + sbbq $0,%r12 + + cmovcq %rax,%r14 + cmovcq %rdx,%r15 + cmovcq %rbx,%r8 + movq %r14,0(%rdi) + cmovcq %rsi,%r9 + movq %r15,8(%rdi) + cmovcq %rbp,%r10 + movq %r8,16(%rdi) + cmovcq %r13,%r11 + movq %r9,24(%rdi) + movq %r10,32(%rdi) + movq %r11,40(%rdi) + + .byte 0xf3,0xc3 +.cfi_endproc + +.globl _sqr_n_mul_mont_384 +.private_extern _sqr_n_mul_mont_384 + +.p2align 5 +_sqr_n_mul_mont_384: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + +#ifdef __BLST_PORTABLE__ + testl $1,___blst_platform_cap(%rip) + jnz sqr_n_mul_mont_384$1 +#endif + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + subq $136,%rsp +.cfi_adjust_cfa_offset 8*17 + + + movq %r8,0(%rsp) + movq %rdi,8(%rsp) + movq %rcx,16(%rsp) + leaq 32(%rsp),%rdi + movq %r9,24(%rsp) + movq (%r9),%xmm2 + +L$oop_sqr_384: + movd %edx,%xmm1 + + call __sqrq_384 + + leaq 0(%rdi),%rsi + movq 0(%rsp),%rcx + movq 16(%rsp),%rbx + call __mulq_by_1_mont_384 + call __redq_tail_mont_384 + + movd %xmm1,%edx + leaq 0(%rdi),%rsi + decl %edx + jnz L$oop_sqr_384 + +.byte 102,72,15,126,208 + movq %rbx,%rcx + movq 24(%rsp),%rbx + + + + + + + movq %r8,%r12 + movq %r9,%r13 + + call __mulq_mont_384 + + leaq 136(%rsp),%r8 + movq 136(%rsp),%r15 +.cfi_restore %r15 + movq 8(%r8),%r14 +.cfi_restore %r14 + movq 16(%r8),%r13 +.cfi_restore %r13 + movq 24(%r8),%r12 +.cfi_restore %r12 + movq 32(%r8),%rbx +.cfi_restore %rbx + movq 40(%r8),%rbp +.cfi_restore %rbp + leaq 48(%r8),%rsp +.cfi_adjust_cfa_offset -8*23 + + .byte 0xf3,0xc3 +.cfi_endproc + + +.globl _sqr_n_mul_mont_383 +.private_extern _sqr_n_mul_mont_383 + +.p2align 5 +_sqr_n_mul_mont_383: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + +#ifdef __BLST_PORTABLE__ + testl $1,___blst_platform_cap(%rip) + jnz sqr_n_mul_mont_383$1 +#endif + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + subq $136,%rsp +.cfi_adjust_cfa_offset 8*17 + + + movq %r8,0(%rsp) + movq %rdi,8(%rsp) + movq %rcx,16(%rsp) + leaq 32(%rsp),%rdi + movq %r9,24(%rsp) + movq (%r9),%xmm2 + +L$oop_sqr_383: + movd %edx,%xmm1 + + call __sqrq_384 + + leaq 0(%rdi),%rsi + movq 0(%rsp),%rcx + movq 16(%rsp),%rbx + call __mulq_by_1_mont_384 + + movd %xmm1,%edx + addq 48(%rsi),%r14 + adcq 56(%rsi),%r15 + adcq 64(%rsi),%r8 + adcq 72(%rsi),%r9 + adcq 80(%rsi),%r10 + adcq 88(%rsi),%r11 + leaq 0(%rdi),%rsi + + movq %r14,0(%rdi) + movq %r15,8(%rdi) + movq %r8,16(%rdi) + movq %r9,24(%rdi) + movq %r10,32(%rdi) + movq %r11,40(%rdi) + + decl %edx + jnz L$oop_sqr_383 + +.byte 102,72,15,126,208 + movq %rbx,%rcx + movq 24(%rsp),%rbx + + + + + + + movq %r8,%r12 + movq %r9,%r13 + + call __mulq_mont_384 + + leaq 136(%rsp),%r8 + movq 136(%rsp),%r15 +.cfi_restore %r15 + movq 8(%r8),%r14 +.cfi_restore %r14 + movq 16(%r8),%r13 +.cfi_restore %r13 + movq 24(%r8),%r12 +.cfi_restore %r12 + movq 32(%r8),%rbx +.cfi_restore %rbx + movq 40(%r8),%rbp +.cfi_restore %rbp + leaq 48(%r8),%rsp +.cfi_adjust_cfa_offset -8*23 + + .byte 0xf3,0xc3 +.cfi_endproc + + +.p2align 5 +__mulq_mont_383_nonred: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + movq %rax,%rbp + mulq %r14 + movq %rax,%r8 + movq %rbp,%rax + movq %rdx,%r9 + + mulq %r15 + addq %rax,%r9 + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%r10 + + mulq %r12 + addq %rax,%r10 + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%r11 + + movq %r8,%r15 + imulq 8(%rsp),%r8 + + mulq %r13 + addq %rax,%r11 + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%r12 + + mulq 32(%rsi) + addq %rax,%r12 + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%r13 + + mulq 40(%rsi) + addq %rax,%r13 + movq %r8,%rax + adcq $0,%rdx + movq %rdx,%r14 + + mulq 0(%rcx) + addq %rax,%r15 + movq %r8,%rax + adcq %rdx,%r15 + + mulq 8(%rcx) + addq %rax,%r9 + movq %r8,%rax + adcq $0,%rdx + addq %r15,%r9 + adcq $0,%rdx + movq %rdx,%r15 + + mulq 16(%rcx) + addq %rax,%r10 + movq %r8,%rax + adcq $0,%rdx + addq %r15,%r10 + adcq $0,%rdx + movq %rdx,%r15 + + mulq 24(%rcx) + addq %r15,%r11 + adcq $0,%rdx + addq %rax,%r11 + movq %r8,%rax + adcq $0,%rdx + movq %rdx,%r15 + + mulq 32(%rcx) + addq %rax,%r12 + movq %r8,%rax + adcq $0,%rdx + addq %r15,%r12 + adcq $0,%rdx + movq %rdx,%r15 + + mulq 40(%rcx) + addq %rax,%r13 + movq 8(%rbx),%rax + adcq $0,%rdx + addq %r15,%r13 + adcq %rdx,%r14 + + movq %rax,%rbp + mulq 0(%rsi) + addq %rax,%r9 + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%r15 + + mulq 8(%rsi) + addq %rax,%r10 + movq %rbp,%rax + adcq $0,%rdx + addq %r15,%r10 + adcq $0,%rdx + movq %rdx,%r15 + + mulq 16(%rsi) + addq %rax,%r11 + movq %rbp,%rax + adcq $0,%rdx + addq %r15,%r11 + adcq $0,%rdx + movq %rdx,%r15 + + movq %r9,%r8 + imulq 8(%rsp),%r9 + + mulq 24(%rsi) + addq %rax,%r12 + movq %rbp,%rax + adcq $0,%rdx + addq %r15,%r12 + adcq $0,%rdx + movq %rdx,%r15 + + mulq 32(%rsi) + addq %rax,%r13 + movq %rbp,%rax + adcq $0,%rdx + addq %r15,%r13 + adcq $0,%rdx + movq %rdx,%r15 + + mulq 40(%rsi) + addq %r15,%r14 + adcq $0,%rdx + addq %rax,%r14 + movq %r9,%rax + adcq $0,%rdx + movq %rdx,%r15 + + mulq 0(%rcx) + addq %rax,%r8 + movq %r9,%rax + adcq %rdx,%r8 + + mulq 8(%rcx) + addq %rax,%r10 + movq %r9,%rax + adcq $0,%rdx + addq %r8,%r10 + adcq $0,%rdx + movq %rdx,%r8 + + mulq 16(%rcx) + addq %rax,%r11 + movq %r9,%rax + adcq $0,%rdx + addq %r8,%r11 + adcq $0,%rdx + movq %rdx,%r8 + + mulq 24(%rcx) + addq %r8,%r12 + adcq $0,%rdx + addq %rax,%r12 + movq %r9,%rax + adcq $0,%rdx + movq %rdx,%r8 + + mulq 32(%rcx) + addq %rax,%r13 + movq %r9,%rax + adcq $0,%rdx + addq %r8,%r13 + adcq $0,%rdx + movq %rdx,%r8 + + mulq 40(%rcx) + addq %rax,%r14 + movq 16(%rbx),%rax + adcq $0,%rdx + addq %r8,%r14 + adcq %rdx,%r15 + + movq %rax,%rbp + mulq 0(%rsi) + addq %rax,%r10 + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%r8 + + mulq 8(%rsi) + addq %rax,%r11 + movq %rbp,%rax + adcq $0,%rdx + addq %r8,%r11 + adcq $0,%rdx + movq %rdx,%r8 + + mulq 16(%rsi) + addq %rax,%r12 + movq %rbp,%rax + adcq $0,%rdx + addq %r8,%r12 + adcq $0,%rdx + movq %rdx,%r8 + + movq %r10,%r9 + imulq 8(%rsp),%r10 + + mulq 24(%rsi) + addq %rax,%r13 + movq %rbp,%rax + adcq $0,%rdx + addq %r8,%r13 + adcq $0,%rdx + movq %rdx,%r8 + + mulq 32(%rsi) + addq %rax,%r14 + movq %rbp,%rax + adcq $0,%rdx + addq %r8,%r14 + adcq $0,%rdx + movq %rdx,%r8 + + mulq 40(%rsi) + addq %r8,%r15 + adcq $0,%rdx + addq %rax,%r15 + movq %r10,%rax + adcq $0,%rdx + movq %rdx,%r8 + + mulq 0(%rcx) + addq %rax,%r9 + movq %r10,%rax + adcq %rdx,%r9 + + mulq 8(%rcx) + addq %rax,%r11 + movq %r10,%rax + adcq $0,%rdx + addq %r9,%r11 + adcq $0,%rdx + movq %rdx,%r9 + + mulq 16(%rcx) + addq %rax,%r12 + movq %r10,%rax + adcq $0,%rdx + addq %r9,%r12 + adcq $0,%rdx + movq %rdx,%r9 + + mulq 24(%rcx) + addq %r9,%r13 + adcq $0,%rdx + addq %rax,%r13 + movq %r10,%rax + adcq $0,%rdx + movq %rdx,%r9 + + mulq 32(%rcx) + addq %rax,%r14 + movq %r10,%rax + adcq $0,%rdx + addq %r9,%r14 + adcq $0,%rdx + movq %rdx,%r9 + + mulq 40(%rcx) + addq %rax,%r15 + movq 24(%rbx),%rax + adcq $0,%rdx + addq %r9,%r15 + adcq %rdx,%r8 + + movq %rax,%rbp + mulq 0(%rsi) + addq %rax,%r11 + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%r9 + + mulq 8(%rsi) + addq %rax,%r12 + movq %rbp,%rax + adcq $0,%rdx + addq %r9,%r12 + adcq $0,%rdx + movq %rdx,%r9 + + mulq 16(%rsi) + addq %rax,%r13 + movq %rbp,%rax + adcq $0,%rdx + addq %r9,%r13 + adcq $0,%rdx + movq %rdx,%r9 + + movq %r11,%r10 + imulq 8(%rsp),%r11 + + mulq 24(%rsi) + addq %rax,%r14 + movq %rbp,%rax + adcq $0,%rdx + addq %r9,%r14 + adcq $0,%rdx + movq %rdx,%r9 + + mulq 32(%rsi) + addq %rax,%r15 + movq %rbp,%rax + adcq $0,%rdx + addq %r9,%r15 + adcq $0,%rdx + movq %rdx,%r9 + + mulq 40(%rsi) + addq %r9,%r8 + adcq $0,%rdx + addq %rax,%r8 + movq %r11,%rax + adcq $0,%rdx + movq %rdx,%r9 + + mulq 0(%rcx) + addq %rax,%r10 + movq %r11,%rax + adcq %rdx,%r10 + + mulq 8(%rcx) + addq %rax,%r12 + movq %r11,%rax + adcq $0,%rdx + addq %r10,%r12 + adcq $0,%rdx + movq %rdx,%r10 + + mulq 16(%rcx) + addq %rax,%r13 + movq %r11,%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %rdx,%r10 + + mulq 24(%rcx) + addq %r10,%r14 + adcq $0,%rdx + addq %rax,%r14 + movq %r11,%rax + adcq $0,%rdx + movq %rdx,%r10 + + mulq 32(%rcx) + addq %rax,%r15 + movq %r11,%rax + adcq $0,%rdx + addq %r10,%r15 + adcq $0,%rdx + movq %rdx,%r10 + + mulq 40(%rcx) + addq %rax,%r8 + movq 32(%rbx),%rax + adcq $0,%rdx + addq %r10,%r8 + adcq %rdx,%r9 + + movq %rax,%rbp + mulq 0(%rsi) + addq %rax,%r12 + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%r10 + + mulq 8(%rsi) + addq %rax,%r13 + movq %rbp,%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %rdx,%r10 + + mulq 16(%rsi) + addq %rax,%r14 + movq %rbp,%rax + adcq $0,%rdx + addq %r10,%r14 + adcq $0,%rdx + movq %rdx,%r10 + + movq %r12,%r11 + imulq 8(%rsp),%r12 + + mulq 24(%rsi) + addq %rax,%r15 + movq %rbp,%rax + adcq $0,%rdx + addq %r10,%r15 + adcq $0,%rdx + movq %rdx,%r10 + + mulq 32(%rsi) + addq %rax,%r8 + movq %rbp,%rax + adcq $0,%rdx + addq %r10,%r8 + adcq $0,%rdx + movq %rdx,%r10 + + mulq 40(%rsi) + addq %r10,%r9 + adcq $0,%rdx + addq %rax,%r9 + movq %r12,%rax + adcq $0,%rdx + movq %rdx,%r10 + + mulq 0(%rcx) + addq %rax,%r11 + movq %r12,%rax + adcq %rdx,%r11 + + mulq 8(%rcx) + addq %rax,%r13 + movq %r12,%rax + adcq $0,%rdx + addq %r11,%r13 + adcq $0,%rdx + movq %rdx,%r11 + + mulq 16(%rcx) + addq %rax,%r14 + movq %r12,%rax + adcq $0,%rdx + addq %r11,%r14 + adcq $0,%rdx + movq %rdx,%r11 + + mulq 24(%rcx) + addq %r11,%r15 + adcq $0,%rdx + addq %rax,%r15 + movq %r12,%rax + adcq $0,%rdx + movq %rdx,%r11 + + mulq 32(%rcx) + addq %rax,%r8 + movq %r12,%rax + adcq $0,%rdx + addq %r11,%r8 + adcq $0,%rdx + movq %rdx,%r11 + + mulq 40(%rcx) + addq %rax,%r9 + movq 40(%rbx),%rax + adcq $0,%rdx + addq %r11,%r9 + adcq %rdx,%r10 + + movq %rax,%rbp + mulq 0(%rsi) + addq %rax,%r13 + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%r11 + + mulq 8(%rsi) + addq %rax,%r14 + movq %rbp,%rax + adcq $0,%rdx + addq %r11,%r14 + adcq $0,%rdx + movq %rdx,%r11 + + mulq 16(%rsi) + addq %rax,%r15 + movq %rbp,%rax + adcq $0,%rdx + addq %r11,%r15 + adcq $0,%rdx + movq %rdx,%r11 + + movq %r13,%r12 + imulq 8(%rsp),%r13 + + mulq 24(%rsi) + addq %rax,%r8 + movq %rbp,%rax + adcq $0,%rdx + addq %r11,%r8 + adcq $0,%rdx + movq %rdx,%r11 + + mulq 32(%rsi) + addq %rax,%r9 + movq %rbp,%rax + adcq $0,%rdx + addq %r11,%r9 + adcq $0,%rdx + movq %rdx,%r11 + + mulq 40(%rsi) + addq %r11,%r10 + adcq $0,%rdx + addq %rax,%r10 + movq %r13,%rax + adcq $0,%rdx + movq %rdx,%r11 + + mulq 0(%rcx) + addq %rax,%r12 + movq %r13,%rax + adcq %rdx,%r12 + + mulq 8(%rcx) + addq %rax,%r14 + movq %r13,%rax + adcq $0,%rdx + addq %r12,%r14 + adcq $0,%rdx + movq %rdx,%r12 + + mulq 16(%rcx) + addq %rax,%r15 + movq %r13,%rax + adcq $0,%rdx + addq %r12,%r15 + adcq $0,%rdx + movq %rdx,%r12 + + mulq 24(%rcx) + addq %r12,%r8 + adcq $0,%rdx + addq %rax,%r8 + movq %r13,%rax + adcq $0,%rdx + movq %rdx,%r12 + + mulq 32(%rcx) + addq %rax,%r9 + movq %r13,%rax + adcq $0,%rdx + addq %r12,%r9 + adcq $0,%rdx + movq %rdx,%r12 + + mulq 40(%rcx) + addq %rax,%r10 + movq %r14,%rax + adcq $0,%rdx + addq %r12,%r10 + adcq %rdx,%r11 + .byte 0xf3,0xc3 +.cfi_endproc + +.globl _sqr_mont_382x +.private_extern _sqr_mont_382x + +.p2align 5 +_sqr_mont_382x: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + +#ifdef __BLST_PORTABLE__ + testl $1,___blst_platform_cap(%rip) + jnz sqr_mont_382x$1 +#endif + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + subq $136,%rsp +.cfi_adjust_cfa_offset 136 + + + movq %rcx,0(%rsp) + movq %rdx,%rcx + movq %rsi,16(%rsp) + movq %rdi,24(%rsp) + + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + + movq %r8,%r14 + addq 48(%rsi),%r8 + movq %r9,%r15 + adcq 56(%rsi),%r9 + movq %r10,%rax + adcq 64(%rsi),%r10 + movq %r11,%rdx + adcq 72(%rsi),%r11 + movq %r12,%rbx + adcq 80(%rsi),%r12 + movq %r13,%rbp + adcq 88(%rsi),%r13 + + subq 48(%rsi),%r14 + sbbq 56(%rsi),%r15 + sbbq 64(%rsi),%rax + sbbq 72(%rsi),%rdx + sbbq 80(%rsi),%rbx + sbbq 88(%rsi),%rbp + sbbq %rdi,%rdi + + movq %r8,32+0(%rsp) + movq %r9,32+8(%rsp) + movq %r10,32+16(%rsp) + movq %r11,32+24(%rsp) + movq %r12,32+32(%rsp) + movq %r13,32+40(%rsp) + + movq %r14,32+48(%rsp) + movq %r15,32+56(%rsp) + movq %rax,32+64(%rsp) + movq %rdx,32+72(%rsp) + movq %rbx,32+80(%rsp) + movq %rbp,32+88(%rsp) + movq %rdi,32+96(%rsp) + + + + leaq 48(%rsi),%rbx + + movq 48(%rsi),%rax + movq 0(%rsi),%r14 + movq 8(%rsi),%r15 + movq 16(%rsi),%r12 + movq 24(%rsi),%r13 + + movq 24(%rsp),%rdi + call __mulq_mont_383_nonred + addq %r14,%r14 + adcq %r15,%r15 + adcq %r8,%r8 + adcq %r9,%r9 + adcq %r10,%r10 + adcq %r11,%r11 + + movq %r14,48(%rdi) + movq %r15,56(%rdi) + movq %r8,64(%rdi) + movq %r9,72(%rdi) + movq %r10,80(%rdi) + movq %r11,88(%rdi) + + leaq 32(%rsp),%rsi + leaq 32+48(%rsp),%rbx + + movq 32+48(%rsp),%rax + movq 32+0(%rsp),%r14 + movq 32+8(%rsp),%r15 + movq 32+16(%rsp),%r12 + movq 32+24(%rsp),%r13 + + call __mulq_mont_383_nonred + movq 32+96(%rsp),%rsi + movq 32+0(%rsp),%r12 + movq 32+8(%rsp),%r13 + andq %rsi,%r12 + movq 32+16(%rsp),%rax + andq %rsi,%r13 + movq 32+24(%rsp),%rbx + andq %rsi,%rax + movq 32+32(%rsp),%rbp + andq %rsi,%rbx + andq %rsi,%rbp + andq 32+40(%rsp),%rsi + + subq %r12,%r14 + movq 0(%rcx),%r12 + sbbq %r13,%r15 + movq 8(%rcx),%r13 + sbbq %rax,%r8 + movq 16(%rcx),%rax + sbbq %rbx,%r9 + movq 24(%rcx),%rbx + sbbq %rbp,%r10 + movq 32(%rcx),%rbp + sbbq %rsi,%r11 + sbbq %rsi,%rsi + + andq %rsi,%r12 + andq %rsi,%r13 + andq %rsi,%rax + andq %rsi,%rbx + andq %rsi,%rbp + andq 40(%rcx),%rsi + + addq %r12,%r14 + adcq %r13,%r15 + adcq %rax,%r8 + adcq %rbx,%r9 + adcq %rbp,%r10 + adcq %rsi,%r11 + + movq %r14,0(%rdi) + movq %r15,8(%rdi) + movq %r8,16(%rdi) + movq %r9,24(%rdi) + movq %r10,32(%rdi) + movq %r11,40(%rdi) + leaq 136(%rsp),%r8 + movq 0(%r8),%r15 +.cfi_restore %r15 + movq 8(%r8),%r14 +.cfi_restore %r14 + movq 16(%r8),%r13 +.cfi_restore %r13 + movq 24(%r8),%r12 +.cfi_restore %r12 + movq 32(%r8),%rbx +.cfi_restore %rbx + movq 40(%r8),%rbp +.cfi_restore %rbp + leaq 48(%r8),%rsp +.cfi_adjust_cfa_offset -136-8*6 + + .byte 0xf3,0xc3 +.cfi_endproc + diff --git a/crypto/blst_src/build/mach-o/mulx_mont_256-x86_64.s b/crypto/blst_src/build/mach-o/mulx_mont_256-x86_64.s new file mode 100644 index 00000000000..ae9a76b739c --- /dev/null +++ b/crypto/blst_src/build/mach-o/mulx_mont_256-x86_64.s @@ -0,0 +1,623 @@ +.text + +.globl _mulx_mont_sparse_256 +.private_extern _mulx_mont_sparse_256 + +.p2align 5 +_mulx_mont_sparse_256: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + +mul_mont_sparse_256$1: + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + subq $8,%rsp +.cfi_adjust_cfa_offset 8 + + + movq %rdx,%rbx + movq 0(%rdx),%rdx + movq 0(%rsi),%r14 + movq 8(%rsi),%r15 + movq 16(%rsi),%rbp + movq 24(%rsi),%r9 + leaq -128(%rsi),%rsi + leaq -128(%rcx),%rcx + + mulxq %r14,%rax,%r11 + call __mulx_mont_sparse_256 + + movq 8(%rsp),%r15 +.cfi_restore %r15 + movq 16(%rsp),%r14 +.cfi_restore %r14 + movq 24(%rsp),%r13 +.cfi_restore %r13 + movq 32(%rsp),%r12 +.cfi_restore %r12 + movq 40(%rsp),%rbx +.cfi_restore %rbx + movq 48(%rsp),%rbp +.cfi_restore %rbp + leaq 56(%rsp),%rsp +.cfi_adjust_cfa_offset -56 + + .byte 0xf3,0xc3 +.cfi_endproc + + +.globl _sqrx_mont_sparse_256 +.private_extern _sqrx_mont_sparse_256 + +.p2align 5 +_sqrx_mont_sparse_256: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + +sqr_mont_sparse_256$1: + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + subq $8,%rsp +.cfi_adjust_cfa_offset 8 + + + movq %rsi,%rbx + movq %rcx,%r8 + movq %rdx,%rcx + movq 0(%rsi),%rdx + movq 8(%rsi),%r15 + movq 16(%rsi),%rbp + movq 24(%rsi),%r9 + leaq -128(%rbx),%rsi + leaq -128(%rcx),%rcx + + mulxq %rdx,%rax,%r11 + call __mulx_mont_sparse_256 + + movq 8(%rsp),%r15 +.cfi_restore %r15 + movq 16(%rsp),%r14 +.cfi_restore %r14 + movq 24(%rsp),%r13 +.cfi_restore %r13 + movq 32(%rsp),%r12 +.cfi_restore %r12 + movq 40(%rsp),%rbx +.cfi_restore %rbx + movq 48(%rsp),%rbp +.cfi_restore %rbp + leaq 56(%rsp),%rsp +.cfi_adjust_cfa_offset -56 + + .byte 0xf3,0xc3 +.cfi_endproc + + +.p2align 5 +__mulx_mont_sparse_256: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + mulxq %r15,%r15,%r12 + mulxq %rbp,%rbp,%r13 + addq %r15,%r11 + mulxq %r9,%r9,%r14 + movq 8(%rbx),%rdx + adcq %rbp,%r12 + adcq %r9,%r13 + adcq $0,%r14 + + movq %rax,%r10 + imulq %r8,%rax + + + xorq %r15,%r15 + mulxq 0+128(%rsi),%rbp,%r9 + adoxq %rbp,%r11 + adcxq %r9,%r12 + + mulxq 8+128(%rsi),%rbp,%r9 + adoxq %rbp,%r12 + adcxq %r9,%r13 + + mulxq 16+128(%rsi),%rbp,%r9 + adoxq %rbp,%r13 + adcxq %r9,%r14 + + mulxq 24+128(%rsi),%rbp,%r9 + movq %rax,%rdx + adoxq %rbp,%r14 + adcxq %r15,%r9 + adoxq %r9,%r15 + + + mulxq 0+128(%rcx),%rbp,%rax + adcxq %rbp,%r10 + adoxq %r11,%rax + + mulxq 8+128(%rcx),%rbp,%r9 + adcxq %rbp,%rax + adoxq %r9,%r12 + + mulxq 16+128(%rcx),%rbp,%r9 + adcxq %rbp,%r12 + adoxq %r9,%r13 + + mulxq 24+128(%rcx),%rbp,%r9 + movq 16(%rbx),%rdx + adcxq %rbp,%r13 + adoxq %r9,%r14 + adcxq %r10,%r14 + adoxq %r10,%r15 + adcxq %r10,%r15 + adoxq %r10,%r10 + adcq $0,%r10 + movq %rax,%r11 + imulq %r8,%rax + + + xorq %rbp,%rbp + mulxq 0+128(%rsi),%rbp,%r9 + adoxq %rbp,%r12 + adcxq %r9,%r13 + + mulxq 8+128(%rsi),%rbp,%r9 + adoxq %rbp,%r13 + adcxq %r9,%r14 + + mulxq 16+128(%rsi),%rbp,%r9 + adoxq %rbp,%r14 + adcxq %r9,%r15 + + mulxq 24+128(%rsi),%rbp,%r9 + movq %rax,%rdx + adoxq %rbp,%r15 + adcxq %r10,%r9 + adoxq %r9,%r10 + + + mulxq 0+128(%rcx),%rbp,%rax + adcxq %rbp,%r11 + adoxq %r12,%rax + + mulxq 8+128(%rcx),%rbp,%r9 + adcxq %rbp,%rax + adoxq %r9,%r13 + + mulxq 16+128(%rcx),%rbp,%r9 + adcxq %rbp,%r13 + adoxq %r9,%r14 + + mulxq 24+128(%rcx),%rbp,%r9 + movq 24(%rbx),%rdx + adcxq %rbp,%r14 + adoxq %r9,%r15 + adcxq %r11,%r15 + adoxq %r11,%r10 + adcxq %r11,%r10 + adoxq %r11,%r11 + adcq $0,%r11 + movq %rax,%r12 + imulq %r8,%rax + + + xorq %rbp,%rbp + mulxq 0+128(%rsi),%rbp,%r9 + adoxq %rbp,%r13 + adcxq %r9,%r14 + + mulxq 8+128(%rsi),%rbp,%r9 + adoxq %rbp,%r14 + adcxq %r9,%r15 + + mulxq 16+128(%rsi),%rbp,%r9 + adoxq %rbp,%r15 + adcxq %r9,%r10 + + mulxq 24+128(%rsi),%rbp,%r9 + movq %rax,%rdx + adoxq %rbp,%r10 + adcxq %r11,%r9 + adoxq %r9,%r11 + + + mulxq 0+128(%rcx),%rbp,%rax + adcxq %rbp,%r12 + adoxq %r13,%rax + + mulxq 8+128(%rcx),%rbp,%r9 + adcxq %rbp,%rax + adoxq %r9,%r14 + + mulxq 16+128(%rcx),%rbp,%r9 + adcxq %rbp,%r14 + adoxq %r9,%r15 + + mulxq 24+128(%rcx),%rbp,%r9 + movq %rax,%rdx + adcxq %rbp,%r15 + adoxq %r9,%r10 + adcxq %r12,%r10 + adoxq %r12,%r11 + adcxq %r12,%r11 + adoxq %r12,%r12 + adcq $0,%r12 + imulq %r8,%rdx + + + xorq %rbp,%rbp + mulxq 0+128(%rcx),%r13,%r9 + adcxq %rax,%r13 + adoxq %r9,%r14 + + mulxq 8+128(%rcx),%rbp,%r9 + adcxq %rbp,%r14 + adoxq %r9,%r15 + + mulxq 16+128(%rcx),%rbp,%r9 + adcxq %rbp,%r15 + adoxq %r9,%r10 + + mulxq 24+128(%rcx),%rbp,%r9 + movq %r14,%rdx + leaq 128(%rcx),%rcx + adcxq %rbp,%r10 + adoxq %r9,%r11 + movq %r15,%rax + adcxq %r13,%r11 + adoxq %r13,%r12 + adcq $0,%r12 + + + + + movq %r10,%rbp + subq 0(%rcx),%r14 + sbbq 8(%rcx),%r15 + sbbq 16(%rcx),%r10 + movq %r11,%r9 + sbbq 24(%rcx),%r11 + sbbq $0,%r12 + + cmovcq %rdx,%r14 + cmovcq %rax,%r15 + cmovcq %rbp,%r10 + movq %r14,0(%rdi) + cmovcq %r9,%r11 + movq %r15,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + + .byte 0xf3,0xc3 +.cfi_endproc + +.globl _fromx_mont_256 +.private_extern _fromx_mont_256 + +.p2align 5 +_fromx_mont_256: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + +from_mont_256$1: + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + subq $8,%rsp +.cfi_adjust_cfa_offset 8 + + + movq %rdx,%rbx + call __mulx_by_1_mont_256 + + + + + + movq %r15,%rdx + movq %r10,%r12 + movq %r11,%r13 + + subq 0(%rbx),%r14 + sbbq 8(%rbx),%r15 + sbbq 16(%rbx),%r10 + sbbq 24(%rbx),%r11 + + cmovncq %r14,%rax + cmovncq %r15,%rdx + cmovncq %r10,%r12 + movq %rax,0(%rdi) + cmovncq %r11,%r13 + movq %rdx,8(%rdi) + movq %r12,16(%rdi) + movq %r13,24(%rdi) + + movq 8(%rsp),%r15 +.cfi_restore %r15 + movq 16(%rsp),%r14 +.cfi_restore %r14 + movq 24(%rsp),%r13 +.cfi_restore %r13 + movq 32(%rsp),%r12 +.cfi_restore %r12 + movq 40(%rsp),%rbx +.cfi_restore %rbx + movq 48(%rsp),%rbp +.cfi_restore %rbp + leaq 56(%rsp),%rsp +.cfi_adjust_cfa_offset -56 + + .byte 0xf3,0xc3 +.cfi_endproc + + +.globl _redcx_mont_256 +.private_extern _redcx_mont_256 + +.p2align 5 +_redcx_mont_256: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + +redc_mont_256$1: + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + subq $8,%rsp +.cfi_adjust_cfa_offset 8 + + + movq %rdx,%rbx + call __mulx_by_1_mont_256 + + addq 32(%rsi),%r14 + adcq 40(%rsi),%r15 + movq %r14,%rax + adcq 48(%rsi),%r10 + movq %r15,%rdx + adcq 56(%rsi),%r11 + sbbq %rsi,%rsi + + + + + movq %r10,%r12 + subq 0(%rbx),%r14 + sbbq 8(%rbx),%r15 + sbbq 16(%rbx),%r10 + movq %r11,%r13 + sbbq 24(%rbx),%r11 + sbbq $0,%rsi + + cmovncq %r14,%rax + cmovncq %r15,%rdx + cmovncq %r10,%r12 + movq %rax,0(%rdi) + cmovncq %r11,%r13 + movq %rdx,8(%rdi) + movq %r12,16(%rdi) + movq %r13,24(%rdi) + + movq 8(%rsp),%r15 +.cfi_restore %r15 + movq 16(%rsp),%r14 +.cfi_restore %r14 + movq 24(%rsp),%r13 +.cfi_restore %r13 + movq 32(%rsp),%r12 +.cfi_restore %r12 + movq 40(%rsp),%rbx +.cfi_restore %rbx + movq 48(%rsp),%rbp +.cfi_restore %rbp + leaq 56(%rsp),%rsp +.cfi_adjust_cfa_offset -56 + + .byte 0xf3,0xc3 +.cfi_endproc + + +.p2align 5 +__mulx_by_1_mont_256: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + movq 0(%rsi),%rax + movq 8(%rsi),%r11 + movq 16(%rsi),%r12 + movq 24(%rsi),%r13 + + movq %rax,%r14 + imulq %rcx,%rax + movq %rax,%r10 + + mulq 0(%rbx) + addq %rax,%r14 + movq %r10,%rax + adcq %rdx,%r14 + + mulq 8(%rbx) + addq %rax,%r11 + movq %r10,%rax + adcq $0,%rdx + addq %r14,%r11 + adcq $0,%rdx + movq %rdx,%r14 + + mulq 16(%rbx) + movq %r11,%r15 + imulq %rcx,%r11 + addq %rax,%r12 + movq %r10,%rax + adcq $0,%rdx + addq %r14,%r12 + adcq $0,%rdx + movq %rdx,%r14 + + mulq 24(%rbx) + addq %rax,%r13 + movq %r11,%rax + adcq $0,%rdx + addq %r14,%r13 + adcq $0,%rdx + movq %rdx,%r14 + + mulq 0(%rbx) + addq %rax,%r15 + movq %r11,%rax + adcq %rdx,%r15 + + mulq 8(%rbx) + addq %rax,%r12 + movq %r11,%rax + adcq $0,%rdx + addq %r15,%r12 + adcq $0,%rdx + movq %rdx,%r15 + + mulq 16(%rbx) + movq %r12,%r10 + imulq %rcx,%r12 + addq %rax,%r13 + movq %r11,%rax + adcq $0,%rdx + addq %r15,%r13 + adcq $0,%rdx + movq %rdx,%r15 + + mulq 24(%rbx) + addq %rax,%r14 + movq %r12,%rax + adcq $0,%rdx + addq %r15,%r14 + adcq $0,%rdx + movq %rdx,%r15 + + mulq 0(%rbx) + addq %rax,%r10 + movq %r12,%rax + adcq %rdx,%r10 + + mulq 8(%rbx) + addq %rax,%r13 + movq %r12,%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %rdx,%r10 + + mulq 16(%rbx) + movq %r13,%r11 + imulq %rcx,%r13 + addq %rax,%r14 + movq %r12,%rax + adcq $0,%rdx + addq %r10,%r14 + adcq $0,%rdx + movq %rdx,%r10 + + mulq 24(%rbx) + addq %rax,%r15 + movq %r13,%rax + adcq $0,%rdx + addq %r10,%r15 + adcq $0,%rdx + movq %rdx,%r10 + + mulq 0(%rbx) + addq %rax,%r11 + movq %r13,%rax + adcq %rdx,%r11 + + mulq 8(%rbx) + addq %rax,%r14 + movq %r13,%rax + adcq $0,%rdx + addq %r11,%r14 + adcq $0,%rdx + movq %rdx,%r11 + + mulq 16(%rbx) + addq %rax,%r15 + movq %r13,%rax + adcq $0,%rdx + addq %r11,%r15 + adcq $0,%rdx + movq %rdx,%r11 + + mulq 24(%rbx) + addq %rax,%r10 + movq %r14,%rax + adcq $0,%rdx + addq %r11,%r10 + adcq $0,%rdx + movq %rdx,%r11 + .byte 0xf3,0xc3 +.cfi_endproc + diff --git a/crypto/blst_src/build/mach-o/mulx_mont_384-x86_64.s b/crypto/blst_src/build/mach-o/mulx_mont_384-x86_64.s new file mode 100644 index 00000000000..c5afeec8a51 --- /dev/null +++ b/crypto/blst_src/build/mach-o/mulx_mont_384-x86_64.s @@ -0,0 +1,2975 @@ +.text + + + + + + + + +.p2align 5 +__subx_mod_384x384: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + movq 48(%rsi),%r14 + + subq 0(%rdx),%r8 + movq 56(%rsi),%r15 + sbbq 8(%rdx),%r9 + movq 64(%rsi),%rax + sbbq 16(%rdx),%r10 + movq 72(%rsi),%rbx + sbbq 24(%rdx),%r11 + movq 80(%rsi),%rbp + sbbq 32(%rdx),%r12 + movq 88(%rsi),%rsi + sbbq 40(%rdx),%r13 + movq %r8,0(%rdi) + sbbq 48(%rdx),%r14 + movq 0(%rcx),%r8 + movq %r9,8(%rdi) + sbbq 56(%rdx),%r15 + movq 8(%rcx),%r9 + movq %r10,16(%rdi) + sbbq 64(%rdx),%rax + movq 16(%rcx),%r10 + movq %r11,24(%rdi) + sbbq 72(%rdx),%rbx + movq 24(%rcx),%r11 + movq %r12,32(%rdi) + sbbq 80(%rdx),%rbp + movq 32(%rcx),%r12 + movq %r13,40(%rdi) + sbbq 88(%rdx),%rsi + movq 40(%rcx),%r13 + sbbq %rdx,%rdx + + andq %rdx,%r8 + andq %rdx,%r9 + andq %rdx,%r10 + andq %rdx,%r11 + andq %rdx,%r12 + andq %rdx,%r13 + + addq %r8,%r14 + adcq %r9,%r15 + movq %r14,48(%rdi) + adcq %r10,%rax + movq %r15,56(%rdi) + adcq %r11,%rbx + movq %rax,64(%rdi) + adcq %r12,%rbp + movq %rbx,72(%rdi) + adcq %r13,%rsi + movq %rbp,80(%rdi) + movq %rsi,88(%rdi) + + .byte 0xf3,0xc3 +.cfi_endproc + + + +.p2align 5 +__addx_mod_384: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + + addq 0(%rdx),%r8 + adcq 8(%rdx),%r9 + adcq 16(%rdx),%r10 + movq %r8,%r14 + adcq 24(%rdx),%r11 + movq %r9,%r15 + adcq 32(%rdx),%r12 + movq %r10,%rax + adcq 40(%rdx),%r13 + movq %r11,%rbx + sbbq %rdx,%rdx + + subq 0(%rcx),%r8 + sbbq 8(%rcx),%r9 + movq %r12,%rbp + sbbq 16(%rcx),%r10 + sbbq 24(%rcx),%r11 + sbbq 32(%rcx),%r12 + movq %r13,%rsi + sbbq 40(%rcx),%r13 + sbbq $0,%rdx + + cmovcq %r14,%r8 + cmovcq %r15,%r9 + cmovcq %rax,%r10 + movq %r8,0(%rdi) + cmovcq %rbx,%r11 + movq %r9,8(%rdi) + cmovcq %rbp,%r12 + movq %r10,16(%rdi) + cmovcq %rsi,%r13 + movq %r11,24(%rdi) + movq %r12,32(%rdi) + movq %r13,40(%rdi) + + .byte 0xf3,0xc3 +.cfi_endproc + + + +.p2align 5 +__subx_mod_384: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + +__subx_mod_384_a_is_loaded: + subq 0(%rdx),%r8 + movq 0(%rcx),%r14 + sbbq 8(%rdx),%r9 + movq 8(%rcx),%r15 + sbbq 16(%rdx),%r10 + movq 16(%rcx),%rax + sbbq 24(%rdx),%r11 + movq 24(%rcx),%rbx + sbbq 32(%rdx),%r12 + movq 32(%rcx),%rbp + sbbq 40(%rdx),%r13 + movq 40(%rcx),%rsi + sbbq %rdx,%rdx + + andq %rdx,%r14 + andq %rdx,%r15 + andq %rdx,%rax + andq %rdx,%rbx + andq %rdx,%rbp + andq %rdx,%rsi + + addq %r14,%r8 + adcq %r15,%r9 + movq %r8,0(%rdi) + adcq %rax,%r10 + movq %r9,8(%rdi) + adcq %rbx,%r11 + movq %r10,16(%rdi) + adcq %rbp,%r12 + movq %r11,24(%rdi) + adcq %rsi,%r13 + movq %r12,32(%rdi) + movq %r13,40(%rdi) + + .byte 0xf3,0xc3 +.cfi_endproc + +.globl _mulx_mont_384x +.private_extern _mulx_mont_384x + +.p2align 5 +_mulx_mont_384x: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + +mul_mont_384x$1: + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + subq $328,%rsp +.cfi_adjust_cfa_offset 328 + + + movq %rdx,%rbx + movq %rdi,32(%rsp) + movq %rsi,24(%rsp) + movq %rdx,16(%rsp) + movq %rcx,8(%rsp) + movq %r8,0(%rsp) + + + + + leaq 40(%rsp),%rdi + call __mulx_384 + + + leaq 48(%rbx),%rbx + leaq 128+48(%rsi),%rsi + leaq 96(%rdi),%rdi + call __mulx_384 + + + movq 8(%rsp),%rcx + leaq (%rbx),%rsi + leaq -48(%rbx),%rdx + leaq 40+192+48(%rsp),%rdi + call __addx_mod_384 + + movq 24(%rsp),%rsi + leaq 48(%rsi),%rdx + leaq -48(%rdi),%rdi + call __addx_mod_384 + + leaq (%rdi),%rbx + leaq 48(%rdi),%rsi + call __mulx_384 + + + leaq (%rdi),%rsi + leaq 40(%rsp),%rdx + movq 8(%rsp),%rcx + call __subx_mod_384x384 + + leaq (%rdi),%rsi + leaq -96(%rdi),%rdx + call __subx_mod_384x384 + + + leaq 40(%rsp),%rsi + leaq 40+96(%rsp),%rdx + leaq 40(%rsp),%rdi + call __subx_mod_384x384 + + leaq (%rcx),%rbx + + + leaq 40(%rsp),%rsi + movq 0(%rsp),%rcx + movq 32(%rsp),%rdi + call __mulx_by_1_mont_384 + call __redx_tail_mont_384 + + + leaq 40+192(%rsp),%rsi + movq 0(%rsp),%rcx + leaq 48(%rdi),%rdi + call __mulx_by_1_mont_384 + call __redx_tail_mont_384 + + leaq 328(%rsp),%r8 + movq 0(%r8),%r15 +.cfi_restore %r15 + movq 8(%r8),%r14 +.cfi_restore %r14 + movq 16(%r8),%r13 +.cfi_restore %r13 + movq 24(%r8),%r12 +.cfi_restore %r12 + movq 32(%r8),%rbx +.cfi_restore %rbx + movq 40(%r8),%rbp +.cfi_restore %rbp + leaq 48(%r8),%rsp +.cfi_adjust_cfa_offset -328-8*6 + + .byte 0xf3,0xc3 +.cfi_endproc + +.globl _sqrx_mont_384x +.private_extern _sqrx_mont_384x + +.p2align 5 +_sqrx_mont_384x: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + +sqr_mont_384x$1: + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + subq $136,%rsp +.cfi_adjust_cfa_offset 136 + + + movq %rcx,0(%rsp) + movq %rdx,%rcx + + movq %rdi,16(%rsp) + movq %rsi,24(%rsp) + + + leaq 48(%rsi),%rdx + leaq 32(%rsp),%rdi + call __addx_mod_384 + + + movq 24(%rsp),%rsi + leaq 48(%rsi),%rdx + leaq 32+48(%rsp),%rdi + call __subx_mod_384 + + + movq 24(%rsp),%rsi + leaq 48(%rsi),%rbx + + movq 48(%rsi),%rdx + movq 0(%rsi),%r14 + movq 8(%rsi),%r15 + movq 16(%rsi),%rax + movq 24(%rsi),%r12 + movq 32(%rsi),%rdi + movq 40(%rsi),%rbp + leaq -128(%rsi),%rsi + leaq -128(%rcx),%rcx + + mulxq %r14,%r8,%r9 + call __mulx_mont_384 + addq %rdx,%rdx + adcq %r15,%r15 + adcq %rax,%rax + movq %rdx,%r8 + adcq %r12,%r12 + movq %r15,%r9 + adcq %rdi,%rdi + movq %rax,%r10 + adcq %rbp,%rbp + movq %r12,%r11 + sbbq %rsi,%rsi + + subq 0(%rcx),%rdx + sbbq 8(%rcx),%r15 + movq %rdi,%r13 + sbbq 16(%rcx),%rax + sbbq 24(%rcx),%r12 + sbbq 32(%rcx),%rdi + movq %rbp,%r14 + sbbq 40(%rcx),%rbp + sbbq $0,%rsi + + cmovcq %r8,%rdx + cmovcq %r9,%r15 + cmovcq %r10,%rax + movq %rdx,48(%rbx) + cmovcq %r11,%r12 + movq %r15,56(%rbx) + cmovcq %r13,%rdi + movq %rax,64(%rbx) + cmovcq %r14,%rbp + movq %r12,72(%rbx) + movq %rdi,80(%rbx) + movq %rbp,88(%rbx) + + leaq 32(%rsp),%rsi + leaq 32+48(%rsp),%rbx + + movq 32+48(%rsp),%rdx + movq 32+0(%rsp),%r14 + movq 32+8(%rsp),%r15 + movq 32+16(%rsp),%rax + movq 32+24(%rsp),%r12 + movq 32+32(%rsp),%rdi + movq 32+40(%rsp),%rbp + leaq -128(%rsi),%rsi + leaq -128(%rcx),%rcx + + mulxq %r14,%r8,%r9 + call __mulx_mont_384 + + leaq 136(%rsp),%r8 + movq 0(%r8),%r15 +.cfi_restore %r15 + movq 8(%r8),%r14 +.cfi_restore %r14 + movq 16(%r8),%r13 +.cfi_restore %r13 + movq 24(%r8),%r12 +.cfi_restore %r12 + movq 32(%r8),%rbx +.cfi_restore %rbx + movq 40(%r8),%rbp +.cfi_restore %rbp + leaq 48(%r8),%rsp +.cfi_adjust_cfa_offset -136-8*6 + + .byte 0xf3,0xc3 +.cfi_endproc + + +.globl _mulx_382x +.private_extern _mulx_382x + +.p2align 5 +_mulx_382x: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + +mul_382x$1: + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + subq $136,%rsp +.cfi_adjust_cfa_offset 136 + + + leaq 96(%rdi),%rdi + movq %rsi,0(%rsp) + movq %rdx,8(%rsp) + movq %rdi,16(%rsp) + movq %rcx,24(%rsp) + + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + + addq 48(%rsi),%r8 + adcq 56(%rsi),%r9 + adcq 64(%rsi),%r10 + adcq 72(%rsi),%r11 + adcq 80(%rsi),%r12 + adcq 88(%rsi),%r13 + + movq %r8,32+0(%rsp) + movq %r9,32+8(%rsp) + movq %r10,32+16(%rsp) + movq %r11,32+24(%rsp) + movq %r12,32+32(%rsp) + movq %r13,32+40(%rsp) + + + movq 0(%rdx),%r8 + movq 8(%rdx),%r9 + movq 16(%rdx),%r10 + movq 24(%rdx),%r11 + movq 32(%rdx),%r12 + movq 40(%rdx),%r13 + + addq 48(%rdx),%r8 + adcq 56(%rdx),%r9 + adcq 64(%rdx),%r10 + adcq 72(%rdx),%r11 + adcq 80(%rdx),%r12 + adcq 88(%rdx),%r13 + + movq %r8,32+48(%rsp) + movq %r9,32+56(%rsp) + movq %r10,32+64(%rsp) + movq %r11,32+72(%rsp) + movq %r12,32+80(%rsp) + movq %r13,32+88(%rsp) + + + leaq 32+0(%rsp),%rsi + leaq 32+48(%rsp),%rbx + call __mulx_384 + + + movq 0(%rsp),%rsi + movq 8(%rsp),%rbx + leaq -96(%rdi),%rdi + call __mulx_384 + + + leaq 48+128(%rsi),%rsi + leaq 48(%rbx),%rbx + leaq 32(%rsp),%rdi + call __mulx_384 + + + movq 16(%rsp),%rsi + leaq 32(%rsp),%rdx + movq 24(%rsp),%rcx + movq %rsi,%rdi + call __subx_mod_384x384 + + + leaq 0(%rdi),%rsi + leaq -96(%rdi),%rdx + call __subx_mod_384x384 + + + leaq -96(%rdi),%rsi + leaq 32(%rsp),%rdx + leaq -96(%rdi),%rdi + call __subx_mod_384x384 + + leaq 136(%rsp),%r8 + movq 0(%r8),%r15 +.cfi_restore %r15 + movq 8(%r8),%r14 +.cfi_restore %r14 + movq 16(%r8),%r13 +.cfi_restore %r13 + movq 24(%r8),%r12 +.cfi_restore %r12 + movq 32(%r8),%rbx +.cfi_restore %rbx + movq 40(%r8),%rbp +.cfi_restore %rbp + leaq 48(%r8),%rsp +.cfi_adjust_cfa_offset -136-8*6 + + .byte 0xf3,0xc3 +.cfi_endproc + +.globl _sqrx_382x +.private_extern _sqrx_382x + +.p2align 5 +_sqrx_382x: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + +sqr_382x$1: + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + pushq %rsi +.cfi_adjust_cfa_offset 8 + + + movq %rdx,%rcx + + + movq 0(%rsi),%r14 + movq 8(%rsi),%r15 + movq 16(%rsi),%rax + movq 24(%rsi),%rbx + movq 32(%rsi),%rbp + movq 40(%rsi),%rdx + + movq %r14,%r8 + addq 48(%rsi),%r14 + movq %r15,%r9 + adcq 56(%rsi),%r15 + movq %rax,%r10 + adcq 64(%rsi),%rax + movq %rbx,%r11 + adcq 72(%rsi),%rbx + movq %rbp,%r12 + adcq 80(%rsi),%rbp + movq %rdx,%r13 + adcq 88(%rsi),%rdx + + movq %r14,0(%rdi) + movq %r15,8(%rdi) + movq %rax,16(%rdi) + movq %rbx,24(%rdi) + movq %rbp,32(%rdi) + movq %rdx,40(%rdi) + + + leaq 48(%rsi),%rdx + leaq 48(%rdi),%rdi + call __subx_mod_384_a_is_loaded + + + leaq (%rdi),%rsi + leaq -48(%rdi),%rbx + leaq -48(%rdi),%rdi + call __mulx_384 + + + movq (%rsp),%rsi + leaq 48(%rsi),%rbx + leaq 96(%rdi),%rdi + call __mulx_384 + + movq 0(%rdi),%r8 + movq 8(%rdi),%r9 + movq 16(%rdi),%r10 + movq 24(%rdi),%r11 + movq 32(%rdi),%r12 + movq 40(%rdi),%r13 + movq 48(%rdi),%r14 + movq 56(%rdi),%r15 + movq 64(%rdi),%rax + movq 72(%rdi),%rbx + movq 80(%rdi),%rbp + addq %r8,%r8 + movq 88(%rdi),%rdx + adcq %r9,%r9 + movq %r8,0(%rdi) + adcq %r10,%r10 + movq %r9,8(%rdi) + adcq %r11,%r11 + movq %r10,16(%rdi) + adcq %r12,%r12 + movq %r11,24(%rdi) + adcq %r13,%r13 + movq %r12,32(%rdi) + adcq %r14,%r14 + movq %r13,40(%rdi) + adcq %r15,%r15 + movq %r14,48(%rdi) + adcq %rax,%rax + movq %r15,56(%rdi) + adcq %rbx,%rbx + movq %rax,64(%rdi) + adcq %rbp,%rbp + movq %rbx,72(%rdi) + adcq %rdx,%rdx + movq %rbp,80(%rdi) + movq %rdx,88(%rdi) + + movq 8(%rsp),%r15 +.cfi_restore %r15 + movq 16(%rsp),%r14 +.cfi_restore %r14 + movq 24(%rsp),%r13 +.cfi_restore %r13 + movq 32(%rsp),%r12 +.cfi_restore %r12 + movq 40(%rsp),%rbx +.cfi_restore %rbx + movq 48(%rsp),%rbp +.cfi_restore %rbp + leaq 56(%rsp),%rsp +.cfi_adjust_cfa_offset -8*7 + + .byte 0xf3,0xc3 +.cfi_endproc + +.globl _mulx_384 +.private_extern _mulx_384 + +.p2align 5 +_mulx_384: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + +mul_384$1: + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + + + movq %rdx,%rbx + call __mulx_384 + + movq 0(%rsp),%r15 +.cfi_restore %r15 + movq 8(%rsp),%r14 +.cfi_restore %r14 + movq 16(%rsp),%r13 +.cfi_restore %r13 + movq 24(%rsp),%r12 +.cfi_restore %r12 + movq 32(%rsp),%rbx +.cfi_restore %rbx + movq 40(%rsp),%rbp +.cfi_restore %rbp + leaq 48(%rsp),%rsp +.cfi_adjust_cfa_offset -48 + + .byte 0xf3,0xc3 +.cfi_endproc + + + +.p2align 5 +__mulx_384: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + movq 0(%rbx),%rdx + movq 0(%rsi),%r14 + movq 8(%rsi),%r15 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + leaq -128(%rsi),%rsi + + mulxq %r14,%r9,%rcx + xorq %rbp,%rbp + + mulxq %r15,%r8,%rax + adcxq %rcx,%r8 + movq %r9,0(%rdi) + + mulxq %r10,%r9,%rcx + adcxq %rax,%r9 + + mulxq %r11,%r10,%rax + adcxq %rcx,%r10 + + mulxq %r12,%r11,%rcx + adcxq %rax,%r11 + + mulxq %r13,%r12,%r13 + movq 8(%rbx),%rdx + adcxq %rcx,%r12 + adcxq %rbp,%r13 + mulxq %r14,%rax,%rcx + adcxq %r8,%rax + adoxq %rcx,%r9 + movq %rax,8(%rdi) + + mulxq %r15,%r8,%rcx + adcxq %r9,%r8 + adoxq %rcx,%r10 + + mulxq 128+16(%rsi),%r9,%rax + adcxq %r10,%r9 + adoxq %rax,%r11 + + mulxq 128+24(%rsi),%r10,%rcx + adcxq %r11,%r10 + adoxq %rcx,%r12 + + mulxq 128+32(%rsi),%r11,%rax + adcxq %r12,%r11 + adoxq %r13,%rax + + mulxq 128+40(%rsi),%r12,%r13 + movq 16(%rbx),%rdx + adcxq %rax,%r12 + adoxq %rbp,%r13 + adcxq %rbp,%r13 + mulxq %r14,%rax,%rcx + adcxq %r8,%rax + adoxq %rcx,%r9 + movq %rax,16(%rdi) + + mulxq %r15,%r8,%rcx + adcxq %r9,%r8 + adoxq %rcx,%r10 + + mulxq 128+16(%rsi),%r9,%rax + adcxq %r10,%r9 + adoxq %rax,%r11 + + mulxq 128+24(%rsi),%r10,%rcx + adcxq %r11,%r10 + adoxq %rcx,%r12 + + mulxq 128+32(%rsi),%r11,%rax + adcxq %r12,%r11 + adoxq %r13,%rax + + mulxq 128+40(%rsi),%r12,%r13 + movq 24(%rbx),%rdx + adcxq %rax,%r12 + adoxq %rbp,%r13 + adcxq %rbp,%r13 + mulxq %r14,%rax,%rcx + adcxq %r8,%rax + adoxq %rcx,%r9 + movq %rax,24(%rdi) + + mulxq %r15,%r8,%rcx + adcxq %r9,%r8 + adoxq %rcx,%r10 + + mulxq 128+16(%rsi),%r9,%rax + adcxq %r10,%r9 + adoxq %rax,%r11 + + mulxq 128+24(%rsi),%r10,%rcx + adcxq %r11,%r10 + adoxq %rcx,%r12 + + mulxq 128+32(%rsi),%r11,%rax + adcxq %r12,%r11 + adoxq %r13,%rax + + mulxq 128+40(%rsi),%r12,%r13 + movq 32(%rbx),%rdx + adcxq %rax,%r12 + adoxq %rbp,%r13 + adcxq %rbp,%r13 + mulxq %r14,%rax,%rcx + adcxq %r8,%rax + adoxq %rcx,%r9 + movq %rax,32(%rdi) + + mulxq %r15,%r8,%rcx + adcxq %r9,%r8 + adoxq %rcx,%r10 + + mulxq 128+16(%rsi),%r9,%rax + adcxq %r10,%r9 + adoxq %rax,%r11 + + mulxq 128+24(%rsi),%r10,%rcx + adcxq %r11,%r10 + adoxq %rcx,%r12 + + mulxq 128+32(%rsi),%r11,%rax + adcxq %r12,%r11 + adoxq %r13,%rax + + mulxq 128+40(%rsi),%r12,%r13 + movq 40(%rbx),%rdx + adcxq %rax,%r12 + adoxq %rbp,%r13 + adcxq %rbp,%r13 + mulxq %r14,%rax,%rcx + adcxq %r8,%rax + adoxq %rcx,%r9 + movq %rax,40(%rdi) + + mulxq %r15,%r8,%rcx + adcxq %r9,%r8 + adoxq %rcx,%r10 + + mulxq 128+16(%rsi),%r9,%rax + adcxq %r10,%r9 + adoxq %rax,%r11 + + mulxq 128+24(%rsi),%r10,%rcx + adcxq %r11,%r10 + adoxq %rcx,%r12 + + mulxq 128+32(%rsi),%r11,%rax + adcxq %r12,%r11 + adoxq %r13,%rax + + mulxq 128+40(%rsi),%r12,%r13 + movq %rax,%rdx + adcxq %rax,%r12 + adoxq %rbp,%r13 + adcxq %rbp,%r13 + movq %r8,48(%rdi) + movq %r9,56(%rdi) + movq %r10,64(%rdi) + movq %r11,72(%rdi) + movq %r12,80(%rdi) + movq %r13,88(%rdi) + + .byte 0xf3,0xc3 +.cfi_endproc + +.globl _sqrx_384 +.private_extern _sqrx_384 + +.p2align 5 +_sqrx_384: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + +sqr_384$1: + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + pushq %rdi +.cfi_adjust_cfa_offset 8 + + + call __sqrx_384 + + movq 8(%rsp),%r15 +.cfi_restore %r15 + movq 16(%rsp),%r14 +.cfi_restore %r14 + movq 24(%rsp),%r13 +.cfi_restore %r13 + movq 32(%rsp),%r12 +.cfi_restore %r12 + movq 40(%rsp),%rbx +.cfi_restore %rbx + movq 48(%rsp),%rbp +.cfi_restore %rbp + leaq 56(%rsp),%rsp +.cfi_adjust_cfa_offset -56 + + .byte 0xf3,0xc3 +.cfi_endproc + + +.p2align 5 +__sqrx_384: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + movq 0(%rsi),%rdx + movq 8(%rsi),%r14 + movq 16(%rsi),%r15 + movq 24(%rsi),%rcx + movq 32(%rsi),%rbx + + + mulxq %r14,%r8,%rdi + movq 40(%rsi),%rbp + mulxq %r15,%r9,%rax + addq %rdi,%r9 + mulxq %rcx,%r10,%rdi + adcq %rax,%r10 + mulxq %rbx,%r11,%rax + adcq %rdi,%r11 + mulxq %rbp,%r12,%r13 + movq %r14,%rdx + adcq %rax,%r12 + adcq $0,%r13 + + + xorq %r14,%r14 + mulxq %r15,%rdi,%rax + adcxq %rdi,%r10 + adoxq %rax,%r11 + + mulxq %rcx,%rdi,%rax + adcxq %rdi,%r11 + adoxq %rax,%r12 + + mulxq %rbx,%rdi,%rax + adcxq %rdi,%r12 + adoxq %rax,%r13 + + mulxq %rbp,%rdi,%rax + movq %r15,%rdx + adcxq %rdi,%r13 + adoxq %r14,%rax + adcxq %rax,%r14 + + + xorq %r15,%r15 + mulxq %rcx,%rdi,%rax + adcxq %rdi,%r12 + adoxq %rax,%r13 + + mulxq %rbx,%rdi,%rax + adcxq %rdi,%r13 + adoxq %rax,%r14 + + mulxq %rbp,%rdi,%rax + movq %rcx,%rdx + adcxq %rdi,%r14 + adoxq %r15,%rax + adcxq %rax,%r15 + + + xorq %rcx,%rcx + mulxq %rbx,%rdi,%rax + adcxq %rdi,%r14 + adoxq %rax,%r15 + + mulxq %rbp,%rdi,%rax + movq %rbx,%rdx + adcxq %rdi,%r15 + adoxq %rcx,%rax + adcxq %rax,%rcx + + + mulxq %rbp,%rdi,%rbx + movq 0(%rsi),%rdx + addq %rdi,%rcx + movq 8(%rsp),%rdi + adcq $0,%rbx + + + xorq %rbp,%rbp + adcxq %r8,%r8 + adcxq %r9,%r9 + adcxq %r10,%r10 + adcxq %r11,%r11 + adcxq %r12,%r12 + + + mulxq %rdx,%rdx,%rax + movq %rdx,0(%rdi) + movq 8(%rsi),%rdx + adoxq %rax,%r8 + movq %r8,8(%rdi) + + mulxq %rdx,%r8,%rax + movq 16(%rsi),%rdx + adoxq %r8,%r9 + adoxq %rax,%r10 + movq %r9,16(%rdi) + movq %r10,24(%rdi) + + mulxq %rdx,%r8,%r9 + movq 24(%rsi),%rdx + adoxq %r8,%r11 + adoxq %r9,%r12 + adcxq %r13,%r13 + adcxq %r14,%r14 + movq %r11,32(%rdi) + movq %r12,40(%rdi) + + mulxq %rdx,%r8,%r9 + movq 32(%rsi),%rdx + adoxq %r8,%r13 + adoxq %r9,%r14 + adcxq %r15,%r15 + adcxq %rcx,%rcx + movq %r13,48(%rdi) + movq %r14,56(%rdi) + + mulxq %rdx,%r8,%r9 + movq 40(%rsi),%rdx + adoxq %r8,%r15 + adoxq %r9,%rcx + adcxq %rbx,%rbx + adcxq %rbp,%rbp + movq %r15,64(%rdi) + movq %rcx,72(%rdi) + + mulxq %rdx,%r8,%r9 + adoxq %r8,%rbx + adoxq %r9,%rbp + + movq %rbx,80(%rdi) + movq %rbp,88(%rdi) + + .byte 0xf3,0xc3 +.cfi_endproc + + + + +.globl _redcx_mont_384 +.private_extern _redcx_mont_384 + +.p2align 5 +_redcx_mont_384: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + +redc_mont_384$1: + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + subq $8,%rsp +.cfi_adjust_cfa_offset 8 + + + movq %rdx,%rbx + call __mulx_by_1_mont_384 + call __redx_tail_mont_384 + + movq 8(%rsp),%r15 +.cfi_restore %r15 + movq 16(%rsp),%r14 +.cfi_restore %r14 + movq 24(%rsp),%r13 +.cfi_restore %r13 + movq 32(%rsp),%r12 +.cfi_restore %r12 + movq 40(%rsp),%rbx +.cfi_restore %rbx + movq 48(%rsp),%rbp +.cfi_restore %rbp + leaq 56(%rsp),%rsp +.cfi_adjust_cfa_offset -56 + + .byte 0xf3,0xc3 +.cfi_endproc + + + + + +.globl _fromx_mont_384 +.private_extern _fromx_mont_384 + +.p2align 5 +_fromx_mont_384: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + +from_mont_384$1: + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + subq $8,%rsp +.cfi_adjust_cfa_offset 8 + + + movq %rdx,%rbx + call __mulx_by_1_mont_384 + + + + + movq %r14,%rax + movq %r15,%rcx + movq %r8,%rdx + movq %r9,%rbp + + subq 0(%rbx),%r14 + sbbq 8(%rbx),%r15 + movq %r10,%r13 + sbbq 16(%rbx),%r8 + sbbq 24(%rbx),%r9 + sbbq 32(%rbx),%r10 + movq %r11,%rsi + sbbq 40(%rbx),%r11 + + cmovcq %rax,%r14 + cmovcq %rcx,%r15 + cmovcq %rdx,%r8 + movq %r14,0(%rdi) + cmovcq %rbp,%r9 + movq %r15,8(%rdi) + cmovcq %r13,%r10 + movq %r8,16(%rdi) + cmovcq %rsi,%r11 + movq %r9,24(%rdi) + movq %r10,32(%rdi) + movq %r11,40(%rdi) + + movq 8(%rsp),%r15 +.cfi_restore %r15 + movq 16(%rsp),%r14 +.cfi_restore %r14 + movq 24(%rsp),%r13 +.cfi_restore %r13 + movq 32(%rsp),%r12 +.cfi_restore %r12 + movq 40(%rsp),%rbx +.cfi_restore %rbx + movq 48(%rsp),%rbp +.cfi_restore %rbp + leaq 56(%rsp),%rsp +.cfi_adjust_cfa_offset -56 + + .byte 0xf3,0xc3 +.cfi_endproc + + +.p2align 5 +__mulx_by_1_mont_384: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + movq 0(%rsi),%r8 + movq %rcx,%rdx + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + imulq %r8,%rdx + + + xorq %r14,%r14 + mulxq 0(%rbx),%rax,%rbp + adcxq %rax,%r8 + adoxq %rbp,%r9 + + mulxq 8(%rbx),%rax,%rbp + adcxq %rax,%r9 + adoxq %rbp,%r10 + + mulxq 16(%rbx),%rax,%rbp + adcxq %rax,%r10 + adoxq %rbp,%r11 + + mulxq 24(%rbx),%rax,%rbp + adcxq %rax,%r11 + adoxq %rbp,%r12 + + mulxq 32(%rbx),%rax,%rbp + adcxq %rax,%r12 + adoxq %rbp,%r13 + + mulxq 40(%rbx),%rax,%rbp + movq %rcx,%rdx + adcxq %rax,%r13 + adoxq %r14,%rbp + adcxq %rbp,%r14 + imulq %r9,%rdx + + + xorq %r15,%r15 + mulxq 0(%rbx),%rax,%rbp + adcxq %rax,%r9 + adoxq %rbp,%r10 + + mulxq 8(%rbx),%rax,%rbp + adcxq %rax,%r10 + adoxq %rbp,%r11 + + mulxq 16(%rbx),%rax,%rbp + adcxq %rax,%r11 + adoxq %rbp,%r12 + + mulxq 24(%rbx),%rax,%rbp + adcxq %rax,%r12 + adoxq %rbp,%r13 + + mulxq 32(%rbx),%rax,%rbp + adcxq %rax,%r13 + adoxq %rbp,%r14 + + mulxq 40(%rbx),%rax,%rbp + movq %rcx,%rdx + adcxq %rax,%r14 + adoxq %r15,%rbp + adcxq %rbp,%r15 + imulq %r10,%rdx + + + xorq %r8,%r8 + mulxq 0(%rbx),%rax,%rbp + adcxq %rax,%r10 + adoxq %rbp,%r11 + + mulxq 8(%rbx),%rax,%rbp + adcxq %rax,%r11 + adoxq %rbp,%r12 + + mulxq 16(%rbx),%rax,%rbp + adcxq %rax,%r12 + adoxq %rbp,%r13 + + mulxq 24(%rbx),%rax,%rbp + adcxq %rax,%r13 + adoxq %rbp,%r14 + + mulxq 32(%rbx),%rax,%rbp + adcxq %rax,%r14 + adoxq %rbp,%r15 + + mulxq 40(%rbx),%rax,%rbp + movq %rcx,%rdx + adcxq %rax,%r15 + adoxq %r8,%rbp + adcxq %rbp,%r8 + imulq %r11,%rdx + + + xorq %r9,%r9 + mulxq 0(%rbx),%rax,%rbp + adcxq %rax,%r11 + adoxq %rbp,%r12 + + mulxq 8(%rbx),%rax,%rbp + adcxq %rax,%r12 + adoxq %rbp,%r13 + + mulxq 16(%rbx),%rax,%rbp + adcxq %rax,%r13 + adoxq %rbp,%r14 + + mulxq 24(%rbx),%rax,%rbp + adcxq %rax,%r14 + adoxq %rbp,%r15 + + mulxq 32(%rbx),%rax,%rbp + adcxq %rax,%r15 + adoxq %rbp,%r8 + + mulxq 40(%rbx),%rax,%rbp + movq %rcx,%rdx + adcxq %rax,%r8 + adoxq %r9,%rbp + adcxq %rbp,%r9 + imulq %r12,%rdx + + + xorq %r10,%r10 + mulxq 0(%rbx),%rax,%rbp + adcxq %rax,%r12 + adoxq %rbp,%r13 + + mulxq 8(%rbx),%rax,%rbp + adcxq %rax,%r13 + adoxq %rbp,%r14 + + mulxq 16(%rbx),%rax,%rbp + adcxq %rax,%r14 + adoxq %rbp,%r15 + + mulxq 24(%rbx),%rax,%rbp + adcxq %rax,%r15 + adoxq %rbp,%r8 + + mulxq 32(%rbx),%rax,%rbp + adcxq %rax,%r8 + adoxq %rbp,%r9 + + mulxq 40(%rbx),%rax,%rbp + movq %rcx,%rdx + adcxq %rax,%r9 + adoxq %r10,%rbp + adcxq %rbp,%r10 + imulq %r13,%rdx + + + xorq %r11,%r11 + mulxq 0(%rbx),%rax,%rbp + adcxq %rax,%r13 + adoxq %rbp,%r14 + + mulxq 8(%rbx),%rax,%rbp + adcxq %rax,%r14 + adoxq %rbp,%r15 + + mulxq 16(%rbx),%rax,%rbp + adcxq %rax,%r15 + adoxq %rbp,%r8 + + mulxq 24(%rbx),%rax,%rbp + adcxq %rax,%r8 + adoxq %rbp,%r9 + + mulxq 32(%rbx),%rax,%rbp + adcxq %rax,%r9 + adoxq %rbp,%r10 + + mulxq 40(%rbx),%rax,%rbp + movq %rcx,%rdx + adcxq %rax,%r10 + adoxq %r11,%rbp + adcxq %rbp,%r11 + .byte 0xf3,0xc3 +.cfi_endproc + + + +.p2align 5 +__redx_tail_mont_384: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + addq 48(%rsi),%r14 + movq %r14,%rax + adcq 56(%rsi),%r15 + adcq 64(%rsi),%r8 + adcq 72(%rsi),%r9 + movq %r15,%rcx + adcq 80(%rsi),%r10 + adcq 88(%rsi),%r11 + sbbq %r12,%r12 + + + + + movq %r8,%rdx + movq %r9,%rbp + + subq 0(%rbx),%r14 + sbbq 8(%rbx),%r15 + movq %r10,%r13 + sbbq 16(%rbx),%r8 + sbbq 24(%rbx),%r9 + sbbq 32(%rbx),%r10 + movq %r11,%rsi + sbbq 40(%rbx),%r11 + sbbq $0,%r12 + + cmovcq %rax,%r14 + cmovcq %rcx,%r15 + cmovcq %rdx,%r8 + movq %r14,0(%rdi) + cmovcq %rbp,%r9 + movq %r15,8(%rdi) + cmovcq %r13,%r10 + movq %r8,16(%rdi) + cmovcq %rsi,%r11 + movq %r9,24(%rdi) + movq %r10,32(%rdi) + movq %r11,40(%rdi) + + .byte 0xf3,0xc3 +.cfi_endproc + + +.globl _sgn0x_pty_mont_384 +.private_extern _sgn0x_pty_mont_384 + +.p2align 5 +_sgn0x_pty_mont_384: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + +sgn0_pty_mont_384$1: + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + subq $8,%rsp +.cfi_adjust_cfa_offset 8 + + + movq %rsi,%rbx + leaq 0(%rdi),%rsi + movq %rdx,%rcx + call __mulx_by_1_mont_384 + + xorq %rax,%rax + movq %r14,%r13 + addq %r14,%r14 + adcq %r15,%r15 + adcq %r8,%r8 + adcq %r9,%r9 + adcq %r10,%r10 + adcq %r11,%r11 + adcq $0,%rax + + subq 0(%rbx),%r14 + sbbq 8(%rbx),%r15 + sbbq 16(%rbx),%r8 + sbbq 24(%rbx),%r9 + sbbq 32(%rbx),%r10 + sbbq 40(%rbx),%r11 + sbbq $0,%rax + + notq %rax + andq $1,%r13 + andq $2,%rax + orq %r13,%rax + + movq 8(%rsp),%r15 +.cfi_restore %r15 + movq 16(%rsp),%r14 +.cfi_restore %r14 + movq 24(%rsp),%r13 +.cfi_restore %r13 + movq 32(%rsp),%r12 +.cfi_restore %r12 + movq 40(%rsp),%rbx +.cfi_restore %rbx + movq 48(%rsp),%rbp +.cfi_restore %rbp + leaq 56(%rsp),%rsp +.cfi_adjust_cfa_offset -56 + + .byte 0xf3,0xc3 +.cfi_endproc + + +.globl _sgn0x_pty_mont_384x +.private_extern _sgn0x_pty_mont_384x + +.p2align 5 +_sgn0x_pty_mont_384x: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + +sgn0_pty_mont_384x$1: + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + subq $8,%rsp +.cfi_adjust_cfa_offset 8 + + + movq %rsi,%rbx + leaq 48(%rdi),%rsi + movq %rdx,%rcx + call __mulx_by_1_mont_384 + + movq %r14,%r12 + orq %r15,%r14 + orq %r8,%r14 + orq %r9,%r14 + orq %r10,%r14 + orq %r11,%r14 + + leaq 0(%rdi),%rsi + xorq %rdi,%rdi + movq %r12,%r13 + addq %r12,%r12 + adcq %r15,%r15 + adcq %r8,%r8 + adcq %r9,%r9 + adcq %r10,%r10 + adcq %r11,%r11 + adcq $0,%rdi + + subq 0(%rbx),%r12 + sbbq 8(%rbx),%r15 + sbbq 16(%rbx),%r8 + sbbq 24(%rbx),%r9 + sbbq 32(%rbx),%r10 + sbbq 40(%rbx),%r11 + sbbq $0,%rdi + + movq %r14,0(%rsp) + notq %rdi + andq $1,%r13 + andq $2,%rdi + orq %r13,%rdi + + call __mulx_by_1_mont_384 + + movq %r14,%r12 + orq %r15,%r14 + orq %r8,%r14 + orq %r9,%r14 + orq %r10,%r14 + orq %r11,%r14 + + xorq %rax,%rax + movq %r12,%r13 + addq %r12,%r12 + adcq %r15,%r15 + adcq %r8,%r8 + adcq %r9,%r9 + adcq %r10,%r10 + adcq %r11,%r11 + adcq $0,%rax + + subq 0(%rbx),%r12 + sbbq 8(%rbx),%r15 + sbbq 16(%rbx),%r8 + sbbq 24(%rbx),%r9 + sbbq 32(%rbx),%r10 + sbbq 40(%rbx),%r11 + sbbq $0,%rax + + movq 0(%rsp),%r12 + + notq %rax + + testq %r14,%r14 + cmovzq %rdi,%r13 + + testq %r12,%r12 + cmovnzq %rdi,%rax + + andq $1,%r13 + andq $2,%rax + orq %r13,%rax + + movq 8(%rsp),%r15 +.cfi_restore %r15 + movq 16(%rsp),%r14 +.cfi_restore %r14 + movq 24(%rsp),%r13 +.cfi_restore %r13 + movq 32(%rsp),%r12 +.cfi_restore %r12 + movq 40(%rsp),%rbx +.cfi_restore %rbx + movq 48(%rsp),%rbp +.cfi_restore %rbp + leaq 56(%rsp),%rsp +.cfi_adjust_cfa_offset -56 + + .byte 0xf3,0xc3 +.cfi_endproc + +.globl _mulx_mont_384 +.private_extern _mulx_mont_384 + +.p2align 5 +_mulx_mont_384: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + +mul_mont_384$1: + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + leaq -24(%rsp),%rsp +.cfi_adjust_cfa_offset 8*3 + + + movq %rdx,%rbx + movq 0(%rdx),%rdx + movq 0(%rsi),%r14 + movq 8(%rsi),%r15 + movq 16(%rsi),%rax + movq 24(%rsi),%r12 + movq %rdi,16(%rsp) + movq 32(%rsi),%rdi + movq 40(%rsi),%rbp + leaq -128(%rsi),%rsi + leaq -128(%rcx),%rcx + movq %r8,(%rsp) + + mulxq %r14,%r8,%r9 + call __mulx_mont_384 + + movq 24(%rsp),%r15 +.cfi_restore %r15 + movq 32(%rsp),%r14 +.cfi_restore %r14 + movq 40(%rsp),%r13 +.cfi_restore %r13 + movq 48(%rsp),%r12 +.cfi_restore %r12 + movq 56(%rsp),%rbx +.cfi_restore %rbx + movq 64(%rsp),%rbp +.cfi_restore %rbp + leaq 72(%rsp),%rsp +.cfi_adjust_cfa_offset -8*9 + + .byte 0xf3,0xc3 +.cfi_endproc + + +.p2align 5 +__mulx_mont_384: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + + mulxq %r15,%r14,%r10 + mulxq %rax,%r15,%r11 + addq %r14,%r9 + mulxq %r12,%rax,%r12 + adcq %r15,%r10 + mulxq %rdi,%rdi,%r13 + adcq %rax,%r11 + mulxq %rbp,%rbp,%r14 + movq 8(%rbx),%rdx + adcq %rdi,%r12 + adcq %rbp,%r13 + adcq $0,%r14 + xorq %r15,%r15 + + movq %r8,16(%rsp) + imulq 8(%rsp),%r8 + + + xorq %rax,%rax + mulxq 0+128(%rsi),%rdi,%rbp + adoxq %rdi,%r9 + adcxq %rbp,%r10 + + mulxq 8+128(%rsi),%rdi,%rbp + adoxq %rdi,%r10 + adcxq %rbp,%r11 + + mulxq 16+128(%rsi),%rdi,%rbp + adoxq %rdi,%r11 + adcxq %rbp,%r12 + + mulxq 24+128(%rsi),%rdi,%rbp + adoxq %rdi,%r12 + adcxq %rbp,%r13 + + mulxq 32+128(%rsi),%rdi,%rbp + adoxq %rdi,%r13 + adcxq %rbp,%r14 + + mulxq 40+128(%rsi),%rdi,%rbp + movq %r8,%rdx + adoxq %rdi,%r14 + adcxq %rbp,%r15 + adoxq %rax,%r15 + adoxq %rax,%rax + + + xorq %r8,%r8 + mulxq 0+128(%rcx),%rdi,%rbp + adcxq 16(%rsp),%rdi + adoxq %rbp,%r9 + + mulxq 8+128(%rcx),%rdi,%rbp + adcxq %rdi,%r9 + adoxq %rbp,%r10 + + mulxq 16+128(%rcx),%rdi,%rbp + adcxq %rdi,%r10 + adoxq %rbp,%r11 + + mulxq 24+128(%rcx),%rdi,%rbp + adcxq %rdi,%r11 + adoxq %rbp,%r12 + + mulxq 32+128(%rcx),%rdi,%rbp + adcxq %rdi,%r12 + adoxq %rbp,%r13 + + mulxq 40+128(%rcx),%rdi,%rbp + movq 16(%rbx),%rdx + adcxq %rdi,%r13 + adoxq %rbp,%r14 + adcxq %r8,%r14 + adoxq %r8,%r15 + adcxq %r8,%r15 + adoxq %r8,%rax + adcxq %r8,%rax + movq %r9,16(%rsp) + imulq 8(%rsp),%r9 + + + xorq %r8,%r8 + mulxq 0+128(%rsi),%rdi,%rbp + adoxq %rdi,%r10 + adcxq %rbp,%r11 + + mulxq 8+128(%rsi),%rdi,%rbp + adoxq %rdi,%r11 + adcxq %rbp,%r12 + + mulxq 16+128(%rsi),%rdi,%rbp + adoxq %rdi,%r12 + adcxq %rbp,%r13 + + mulxq 24+128(%rsi),%rdi,%rbp + adoxq %rdi,%r13 + adcxq %rbp,%r14 + + mulxq 32+128(%rsi),%rdi,%rbp + adoxq %rdi,%r14 + adcxq %rbp,%r15 + + mulxq 40+128(%rsi),%rdi,%rbp + movq %r9,%rdx + adoxq %rdi,%r15 + adcxq %rbp,%rax + adoxq %r8,%rax + adoxq %r8,%r8 + + + xorq %r9,%r9 + mulxq 0+128(%rcx),%rdi,%rbp + adcxq 16(%rsp),%rdi + adoxq %rbp,%r10 + + mulxq 8+128(%rcx),%rdi,%rbp + adcxq %rdi,%r10 + adoxq %rbp,%r11 + + mulxq 16+128(%rcx),%rdi,%rbp + adcxq %rdi,%r11 + adoxq %rbp,%r12 + + mulxq 24+128(%rcx),%rdi,%rbp + adcxq %rdi,%r12 + adoxq %rbp,%r13 + + mulxq 32+128(%rcx),%rdi,%rbp + adcxq %rdi,%r13 + adoxq %rbp,%r14 + + mulxq 40+128(%rcx),%rdi,%rbp + movq 24(%rbx),%rdx + adcxq %rdi,%r14 + adoxq %rbp,%r15 + adcxq %r9,%r15 + adoxq %r9,%rax + adcxq %r9,%rax + adoxq %r9,%r8 + adcxq %r9,%r8 + movq %r10,16(%rsp) + imulq 8(%rsp),%r10 + + + xorq %r9,%r9 + mulxq 0+128(%rsi),%rdi,%rbp + adoxq %rdi,%r11 + adcxq %rbp,%r12 + + mulxq 8+128(%rsi),%rdi,%rbp + adoxq %rdi,%r12 + adcxq %rbp,%r13 + + mulxq 16+128(%rsi),%rdi,%rbp + adoxq %rdi,%r13 + adcxq %rbp,%r14 + + mulxq 24+128(%rsi),%rdi,%rbp + adoxq %rdi,%r14 + adcxq %rbp,%r15 + + mulxq 32+128(%rsi),%rdi,%rbp + adoxq %rdi,%r15 + adcxq %rbp,%rax + + mulxq 40+128(%rsi),%rdi,%rbp + movq %r10,%rdx + adoxq %rdi,%rax + adcxq %rbp,%r8 + adoxq %r9,%r8 + adoxq %r9,%r9 + + + xorq %r10,%r10 + mulxq 0+128(%rcx),%rdi,%rbp + adcxq 16(%rsp),%rdi + adoxq %rbp,%r11 + + mulxq 8+128(%rcx),%rdi,%rbp + adcxq %rdi,%r11 + adoxq %rbp,%r12 + + mulxq 16+128(%rcx),%rdi,%rbp + adcxq %rdi,%r12 + adoxq %rbp,%r13 + + mulxq 24+128(%rcx),%rdi,%rbp + adcxq %rdi,%r13 + adoxq %rbp,%r14 + + mulxq 32+128(%rcx),%rdi,%rbp + adcxq %rdi,%r14 + adoxq %rbp,%r15 + + mulxq 40+128(%rcx),%rdi,%rbp + movq 32(%rbx),%rdx + adcxq %rdi,%r15 + adoxq %rbp,%rax + adcxq %r10,%rax + adoxq %r10,%r8 + adcxq %r10,%r8 + adoxq %r10,%r9 + adcxq %r10,%r9 + movq %r11,16(%rsp) + imulq 8(%rsp),%r11 + + + xorq %r10,%r10 + mulxq 0+128(%rsi),%rdi,%rbp + adoxq %rdi,%r12 + adcxq %rbp,%r13 + + mulxq 8+128(%rsi),%rdi,%rbp + adoxq %rdi,%r13 + adcxq %rbp,%r14 + + mulxq 16+128(%rsi),%rdi,%rbp + adoxq %rdi,%r14 + adcxq %rbp,%r15 + + mulxq 24+128(%rsi),%rdi,%rbp + adoxq %rdi,%r15 + adcxq %rbp,%rax + + mulxq 32+128(%rsi),%rdi,%rbp + adoxq %rdi,%rax + adcxq %rbp,%r8 + + mulxq 40+128(%rsi),%rdi,%rbp + movq %r11,%rdx + adoxq %rdi,%r8 + adcxq %rbp,%r9 + adoxq %r10,%r9 + adoxq %r10,%r10 + + + xorq %r11,%r11 + mulxq 0+128(%rcx),%rdi,%rbp + adcxq 16(%rsp),%rdi + adoxq %rbp,%r12 + + mulxq 8+128(%rcx),%rdi,%rbp + adcxq %rdi,%r12 + adoxq %rbp,%r13 + + mulxq 16+128(%rcx),%rdi,%rbp + adcxq %rdi,%r13 + adoxq %rbp,%r14 + + mulxq 24+128(%rcx),%rdi,%rbp + adcxq %rdi,%r14 + adoxq %rbp,%r15 + + mulxq 32+128(%rcx),%rdi,%rbp + adcxq %rdi,%r15 + adoxq %rbp,%rax + + mulxq 40+128(%rcx),%rdi,%rbp + movq 40(%rbx),%rdx + adcxq %rdi,%rax + adoxq %rbp,%r8 + adcxq %r11,%r8 + adoxq %r11,%r9 + adcxq %r11,%r9 + adoxq %r11,%r10 + adcxq %r11,%r10 + movq %r12,16(%rsp) + imulq 8(%rsp),%r12 + + + xorq %r11,%r11 + mulxq 0+128(%rsi),%rdi,%rbp + adoxq %rdi,%r13 + adcxq %rbp,%r14 + + mulxq 8+128(%rsi),%rdi,%rbp + adoxq %rdi,%r14 + adcxq %rbp,%r15 + + mulxq 16+128(%rsi),%rdi,%rbp + adoxq %rdi,%r15 + adcxq %rbp,%rax + + mulxq 24+128(%rsi),%rdi,%rbp + adoxq %rdi,%rax + adcxq %rbp,%r8 + + mulxq 32+128(%rsi),%rdi,%rbp + adoxq %rdi,%r8 + adcxq %rbp,%r9 + + mulxq 40+128(%rsi),%rdi,%rbp + movq %r12,%rdx + adoxq %rdi,%r9 + adcxq %rbp,%r10 + adoxq %r11,%r10 + adoxq %r11,%r11 + + + xorq %r12,%r12 + mulxq 0+128(%rcx),%rdi,%rbp + adcxq 16(%rsp),%rdi + adoxq %rbp,%r13 + + mulxq 8+128(%rcx),%rdi,%rbp + adcxq %rdi,%r13 + adoxq %rbp,%r14 + + mulxq 16+128(%rcx),%rdi,%rbp + adcxq %rdi,%r14 + adoxq %rbp,%r15 + + mulxq 24+128(%rcx),%rdi,%rbp + adcxq %rdi,%r15 + adoxq %rbp,%rax + + mulxq 32+128(%rcx),%rdi,%rbp + adcxq %rdi,%rax + adoxq %rbp,%r8 + + mulxq 40+128(%rcx),%rdi,%rbp + movq %r13,%rdx + adcxq %rdi,%r8 + adoxq %rbp,%r9 + adcxq %r12,%r9 + adoxq %r12,%r10 + adcxq %r12,%r10 + adoxq %r12,%r11 + adcxq %r12,%r11 + imulq 8(%rsp),%rdx + movq 24(%rsp),%rbx + + + xorq %r12,%r12 + mulxq 0+128(%rcx),%rdi,%rbp + adcxq %rdi,%r13 + adoxq %rbp,%r14 + + mulxq 8+128(%rcx),%rdi,%rbp + adcxq %rdi,%r14 + adoxq %rbp,%r15 + + mulxq 16+128(%rcx),%rdi,%rbp + adcxq %rdi,%r15 + adoxq %rbp,%rax + + mulxq 24+128(%rcx),%rdi,%rbp + adcxq %rdi,%rax + adoxq %rbp,%r8 + movq %r15,%r13 + + mulxq 32+128(%rcx),%rdi,%rbp + adcxq %rdi,%r8 + adoxq %rbp,%r9 + movq %rax,%rsi + + mulxq 40+128(%rcx),%rdi,%rbp + adcxq %rdi,%r9 + adoxq %rbp,%r10 + movq %r14,%rdx + adcxq %r12,%r10 + adoxq %r12,%r11 + leaq 128(%rcx),%rcx + movq %r8,%r12 + adcq $0,%r11 + + + + + subq 0(%rcx),%r14 + sbbq 8(%rcx),%r15 + movq %r9,%rdi + sbbq 16(%rcx),%rax + sbbq 24(%rcx),%r8 + sbbq 32(%rcx),%r9 + movq %r10,%rbp + sbbq 40(%rcx),%r10 + sbbq $0,%r11 + + cmovncq %r14,%rdx + cmovcq %r13,%r15 + cmovcq %rsi,%rax + cmovncq %r8,%r12 + movq %rdx,0(%rbx) + cmovncq %r9,%rdi + movq %r15,8(%rbx) + cmovncq %r10,%rbp + movq %rax,16(%rbx) + movq %r12,24(%rbx) + movq %rdi,32(%rbx) + movq %rbp,40(%rbx) + + .byte 0xf3,0xc3 +.cfi_endproc + +.globl _sqrx_mont_384 +.private_extern _sqrx_mont_384 + +.p2align 5 +_sqrx_mont_384: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + +sqr_mont_384$1: + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + leaq -24(%rsp),%rsp +.cfi_adjust_cfa_offset 8*3 + + + movq %rcx,%r8 + leaq -128(%rdx),%rcx + movq 0(%rsi),%rdx + movq 8(%rsi),%r15 + movq 16(%rsi),%rax + movq 24(%rsi),%r12 + movq %rdi,16(%rsp) + movq 32(%rsi),%rdi + movq 40(%rsi),%rbp + + leaq (%rsi),%rbx + movq %r8,(%rsp) + leaq -128(%rsi),%rsi + + mulxq %rdx,%r8,%r9 + call __mulx_mont_384 + + movq 24(%rsp),%r15 +.cfi_restore %r15 + movq 32(%rsp),%r14 +.cfi_restore %r14 + movq 40(%rsp),%r13 +.cfi_restore %r13 + movq 48(%rsp),%r12 +.cfi_restore %r12 + movq 56(%rsp),%rbx +.cfi_restore %rbx + movq 64(%rsp),%rbp +.cfi_restore %rbp + leaq 72(%rsp),%rsp +.cfi_adjust_cfa_offset -8*9 + + .byte 0xf3,0xc3 +.cfi_endproc + + +.globl _sqrx_n_mul_mont_384 +.private_extern _sqrx_n_mul_mont_384 + +.p2align 5 +_sqrx_n_mul_mont_384: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + +sqr_n_mul_mont_384$1: + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + leaq -40(%rsp),%rsp +.cfi_adjust_cfa_offset 8*5 + + + movq %rdx,%r10 + movq 0(%rsi),%rdx + movq 8(%rsi),%r15 + movq 16(%rsi),%rax + movq %rsi,%rbx + movq 24(%rsi),%r12 + movq %rdi,16(%rsp) + movq 32(%rsi),%rdi + movq 40(%rsi),%rbp + + movq %r8,(%rsp) + movq %r9,24(%rsp) + movq 0(%r9),%xmm2 + +L$oop_sqrx_384: + movd %r10d,%xmm1 + leaq -128(%rbx),%rsi + leaq -128(%rcx),%rcx + + mulxq %rdx,%r8,%r9 + call __mulx_mont_384 + + movd %xmm1,%r10d + decl %r10d + jnz L$oop_sqrx_384 + + movq %rdx,%r14 +.byte 102,72,15,126,210 + leaq -128(%rbx),%rsi + movq 24(%rsp),%rbx + leaq -128(%rcx),%rcx + + mulxq %r14,%r8,%r9 + call __mulx_mont_384 + + movq 40(%rsp),%r15 +.cfi_restore %r15 + movq 48(%rsp),%r14 +.cfi_restore %r14 + movq 56(%rsp),%r13 +.cfi_restore %r13 + movq 64(%rsp),%r12 +.cfi_restore %r12 + movq 72(%rsp),%rbx +.cfi_restore %rbx + movq 80(%rsp),%rbp +.cfi_restore %rbp + leaq 88(%rsp),%rsp +.cfi_adjust_cfa_offset -8*11 + + .byte 0xf3,0xc3 +.cfi_endproc + + +.globl _sqrx_n_mul_mont_383 +.private_extern _sqrx_n_mul_mont_383 + +.p2align 5 +_sqrx_n_mul_mont_383: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + +sqr_n_mul_mont_383$1: + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + leaq -40(%rsp),%rsp +.cfi_adjust_cfa_offset 8*5 + + + movq %rdx,%r10 + movq 0(%rsi),%rdx + movq 8(%rsi),%r15 + movq 16(%rsi),%rax + movq %rsi,%rbx + movq 24(%rsi),%r12 + movq %rdi,16(%rsp) + movq 32(%rsi),%rdi + movq 40(%rsi),%rbp + + movq %r8,(%rsp) + movq %r9,24(%rsp) + movq 0(%r9),%xmm2 + leaq -128(%rcx),%rcx + +L$oop_sqrx_383: + movd %r10d,%xmm1 + leaq -128(%rbx),%rsi + + mulxq %rdx,%r8,%r9 + call __mulx_mont_383_nonred + + movd %xmm1,%r10d + decl %r10d + jnz L$oop_sqrx_383 + + movq %rdx,%r14 +.byte 102,72,15,126,210 + leaq -128(%rbx),%rsi + movq 24(%rsp),%rbx + + mulxq %r14,%r8,%r9 + call __mulx_mont_384 + + movq 40(%rsp),%r15 +.cfi_restore %r15 + movq 48(%rsp),%r14 +.cfi_restore %r14 + movq 56(%rsp),%r13 +.cfi_restore %r13 + movq 64(%rsp),%r12 +.cfi_restore %r12 + movq 72(%rsp),%rbx +.cfi_restore %rbx + movq 80(%rsp),%rbp +.cfi_restore %rbp + leaq 88(%rsp),%rsp +.cfi_adjust_cfa_offset -8*11 + + .byte 0xf3,0xc3 +.cfi_endproc + + +.p2align 5 +__mulx_mont_383_nonred: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + + mulxq %r15,%r14,%r10 + mulxq %rax,%r15,%r11 + addq %r14,%r9 + mulxq %r12,%rax,%r12 + adcq %r15,%r10 + mulxq %rdi,%rdi,%r13 + adcq %rax,%r11 + mulxq %rbp,%rbp,%r14 + movq 8(%rbx),%rdx + adcq %rdi,%r12 + adcq %rbp,%r13 + adcq $0,%r14 + movq %r8,%rax + imulq 8(%rsp),%r8 + + + xorq %r15,%r15 + mulxq 0+128(%rsi),%rdi,%rbp + adoxq %rdi,%r9 + adcxq %rbp,%r10 + + mulxq 8+128(%rsi),%rdi,%rbp + adoxq %rdi,%r10 + adcxq %rbp,%r11 + + mulxq 16+128(%rsi),%rdi,%rbp + adoxq %rdi,%r11 + adcxq %rbp,%r12 + + mulxq 24+128(%rsi),%rdi,%rbp + adoxq %rdi,%r12 + adcxq %rbp,%r13 + + mulxq 32+128(%rsi),%rdi,%rbp + adoxq %rdi,%r13 + adcxq %rbp,%r14 + + mulxq 40+128(%rsi),%rdi,%rbp + movq %r8,%rdx + adoxq %rdi,%r14 + adcxq %r15,%rbp + adoxq %rbp,%r15 + + + xorq %r8,%r8 + mulxq 0+128(%rcx),%rdi,%rbp + adcxq %rdi,%rax + adoxq %rbp,%r9 + + mulxq 8+128(%rcx),%rdi,%rbp + adcxq %rdi,%r9 + adoxq %rbp,%r10 + + mulxq 16+128(%rcx),%rdi,%rbp + adcxq %rdi,%r10 + adoxq %rbp,%r11 + + mulxq 24+128(%rcx),%rdi,%rbp + adcxq %rdi,%r11 + adoxq %rbp,%r12 + + mulxq 32+128(%rcx),%rdi,%rbp + adcxq %rdi,%r12 + adoxq %rbp,%r13 + + mulxq 40+128(%rcx),%rdi,%rbp + movq 16(%rbx),%rdx + adcxq %rdi,%r13 + adoxq %rbp,%r14 + adcxq %rax,%r14 + adoxq %rax,%r15 + adcxq %rax,%r15 + movq %r9,%r8 + imulq 8(%rsp),%r9 + + + xorq %rax,%rax + mulxq 0+128(%rsi),%rdi,%rbp + adoxq %rdi,%r10 + adcxq %rbp,%r11 + + mulxq 8+128(%rsi),%rdi,%rbp + adoxq %rdi,%r11 + adcxq %rbp,%r12 + + mulxq 16+128(%rsi),%rdi,%rbp + adoxq %rdi,%r12 + adcxq %rbp,%r13 + + mulxq 24+128(%rsi),%rdi,%rbp + adoxq %rdi,%r13 + adcxq %rbp,%r14 + + mulxq 32+128(%rsi),%rdi,%rbp + adoxq %rdi,%r14 + adcxq %rbp,%r15 + + mulxq 40+128(%rsi),%rdi,%rbp + movq %r9,%rdx + adoxq %rdi,%r15 + adcxq %rax,%rbp + adoxq %rbp,%rax + + + xorq %r9,%r9 + mulxq 0+128(%rcx),%rdi,%rbp + adcxq %rdi,%r8 + adoxq %rbp,%r10 + + mulxq 8+128(%rcx),%rdi,%rbp + adcxq %rdi,%r10 + adoxq %rbp,%r11 + + mulxq 16+128(%rcx),%rdi,%rbp + adcxq %rdi,%r11 + adoxq %rbp,%r12 + + mulxq 24+128(%rcx),%rdi,%rbp + adcxq %rdi,%r12 + adoxq %rbp,%r13 + + mulxq 32+128(%rcx),%rdi,%rbp + adcxq %rdi,%r13 + adoxq %rbp,%r14 + + mulxq 40+128(%rcx),%rdi,%rbp + movq 24(%rbx),%rdx + adcxq %rdi,%r14 + adoxq %rbp,%r15 + adcxq %r8,%r15 + adoxq %r8,%rax + adcxq %r8,%rax + movq %r10,%r9 + imulq 8(%rsp),%r10 + + + xorq %r8,%r8 + mulxq 0+128(%rsi),%rdi,%rbp + adoxq %rdi,%r11 + adcxq %rbp,%r12 + + mulxq 8+128(%rsi),%rdi,%rbp + adoxq %rdi,%r12 + adcxq %rbp,%r13 + + mulxq 16+128(%rsi),%rdi,%rbp + adoxq %rdi,%r13 + adcxq %rbp,%r14 + + mulxq 24+128(%rsi),%rdi,%rbp + adoxq %rdi,%r14 + adcxq %rbp,%r15 + + mulxq 32+128(%rsi),%rdi,%rbp + adoxq %rdi,%r15 + adcxq %rbp,%rax + + mulxq 40+128(%rsi),%rdi,%rbp + movq %r10,%rdx + adoxq %rdi,%rax + adcxq %r8,%rbp + adoxq %rbp,%r8 + + + xorq %r10,%r10 + mulxq 0+128(%rcx),%rdi,%rbp + adcxq %rdi,%r9 + adoxq %rbp,%r11 + + mulxq 8+128(%rcx),%rdi,%rbp + adcxq %rdi,%r11 + adoxq %rbp,%r12 + + mulxq 16+128(%rcx),%rdi,%rbp + adcxq %rdi,%r12 + adoxq %rbp,%r13 + + mulxq 24+128(%rcx),%rdi,%rbp + adcxq %rdi,%r13 + adoxq %rbp,%r14 + + mulxq 32+128(%rcx),%rdi,%rbp + adcxq %rdi,%r14 + adoxq %rbp,%r15 + + mulxq 40+128(%rcx),%rdi,%rbp + movq 32(%rbx),%rdx + adcxq %rdi,%r15 + adoxq %rbp,%rax + adcxq %r9,%rax + adoxq %r9,%r8 + adcxq %r9,%r8 + movq %r11,%r10 + imulq 8(%rsp),%r11 + + + xorq %r9,%r9 + mulxq 0+128(%rsi),%rdi,%rbp + adoxq %rdi,%r12 + adcxq %rbp,%r13 + + mulxq 8+128(%rsi),%rdi,%rbp + adoxq %rdi,%r13 + adcxq %rbp,%r14 + + mulxq 16+128(%rsi),%rdi,%rbp + adoxq %rdi,%r14 + adcxq %rbp,%r15 + + mulxq 24+128(%rsi),%rdi,%rbp + adoxq %rdi,%r15 + adcxq %rbp,%rax + + mulxq 32+128(%rsi),%rdi,%rbp + adoxq %rdi,%rax + adcxq %rbp,%r8 + + mulxq 40+128(%rsi),%rdi,%rbp + movq %r11,%rdx + adoxq %rdi,%r8 + adcxq %r9,%rbp + adoxq %rbp,%r9 + + + xorq %r11,%r11 + mulxq 0+128(%rcx),%rdi,%rbp + adcxq %rdi,%r10 + adoxq %rbp,%r12 + + mulxq 8+128(%rcx),%rdi,%rbp + adcxq %rdi,%r12 + adoxq %rbp,%r13 + + mulxq 16+128(%rcx),%rdi,%rbp + adcxq %rdi,%r13 + adoxq %rbp,%r14 + + mulxq 24+128(%rcx),%rdi,%rbp + adcxq %rdi,%r14 + adoxq %rbp,%r15 + + mulxq 32+128(%rcx),%rdi,%rbp + adcxq %rdi,%r15 + adoxq %rbp,%rax + + mulxq 40+128(%rcx),%rdi,%rbp + movq 40(%rbx),%rdx + adcxq %rdi,%rax + adoxq %rbp,%r8 + adcxq %r10,%r8 + adoxq %r10,%r9 + adcxq %r10,%r9 + movq %r12,%r11 + imulq 8(%rsp),%r12 + + + xorq %r10,%r10 + mulxq 0+128(%rsi),%rdi,%rbp + adoxq %rdi,%r13 + adcxq %rbp,%r14 + + mulxq 8+128(%rsi),%rdi,%rbp + adoxq %rdi,%r14 + adcxq %rbp,%r15 + + mulxq 16+128(%rsi),%rdi,%rbp + adoxq %rdi,%r15 + adcxq %rbp,%rax + + mulxq 24+128(%rsi),%rdi,%rbp + adoxq %rdi,%rax + adcxq %rbp,%r8 + + mulxq 32+128(%rsi),%rdi,%rbp + adoxq %rdi,%r8 + adcxq %rbp,%r9 + + mulxq 40+128(%rsi),%rdi,%rbp + movq %r12,%rdx + adoxq %rdi,%r9 + adcxq %r10,%rbp + adoxq %rbp,%r10 + + + xorq %r12,%r12 + mulxq 0+128(%rcx),%rdi,%rbp + adcxq %rdi,%r11 + adoxq %rbp,%r13 + + mulxq 8+128(%rcx),%rdi,%rbp + adcxq %rdi,%r13 + adoxq %rbp,%r14 + + mulxq 16+128(%rcx),%rdi,%rbp + adcxq %rdi,%r14 + adoxq %rbp,%r15 + + mulxq 24+128(%rcx),%rdi,%rbp + adcxq %rdi,%r15 + adoxq %rbp,%rax + + mulxq 32+128(%rcx),%rdi,%rbp + adcxq %rdi,%rax + adoxq %rbp,%r8 + + mulxq 40+128(%rcx),%rdi,%rbp + movq %r13,%rdx + adcxq %rdi,%r8 + adoxq %rbp,%r9 + adcxq %r11,%r9 + adoxq %r11,%r10 + adcxq %r11,%r10 + imulq 8(%rsp),%rdx + movq 24(%rsp),%rbx + + + xorq %r12,%r12 + mulxq 0+128(%rcx),%rdi,%rbp + adcxq %rdi,%r13 + adoxq %rbp,%r14 + + mulxq 8+128(%rcx),%rdi,%rbp + adcxq %rdi,%r14 + adoxq %rbp,%r15 + + mulxq 16+128(%rcx),%rdi,%rbp + adcxq %rdi,%r15 + adoxq %rbp,%rax + + mulxq 24+128(%rcx),%rdi,%rbp + adcxq %rdi,%rax + adoxq %rbp,%r8 + + mulxq 32+128(%rcx),%rdi,%rbp + adcxq %rdi,%r8 + adoxq %rbp,%r9 + + mulxq 40+128(%rcx),%rdi,%rbp + movq %r14,%rdx + adcxq %rdi,%r9 + adoxq %rbp,%r10 + adcq $0,%r10 + movq %r8,%r12 + + movq %r14,0(%rbx) + movq %r15,8(%rbx) + movq %rax,16(%rbx) + movq %r9,%rdi + movq %r8,24(%rbx) + movq %r9,32(%rbx) + movq %r10,40(%rbx) + movq %r10,%rbp + + .byte 0xf3,0xc3 +.cfi_endproc + +.globl _sqrx_mont_382x +.private_extern _sqrx_mont_382x + +.p2align 5 +_sqrx_mont_382x: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + +sqr_mont_382x$1: + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + subq $136,%rsp +.cfi_adjust_cfa_offset 136 + + + movq %rcx,0(%rsp) + movq %rdx,%rcx + movq %rdi,16(%rsp) + movq %rsi,24(%rsp) + + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + + movq %r8,%r14 + addq 48(%rsi),%r8 + movq %r9,%r15 + adcq 56(%rsi),%r9 + movq %r10,%rax + adcq 64(%rsi),%r10 + movq %r11,%rdx + adcq 72(%rsi),%r11 + movq %r12,%rbx + adcq 80(%rsi),%r12 + movq %r13,%rbp + adcq 88(%rsi),%r13 + + subq 48(%rsi),%r14 + sbbq 56(%rsi),%r15 + sbbq 64(%rsi),%rax + sbbq 72(%rsi),%rdx + sbbq 80(%rsi),%rbx + sbbq 88(%rsi),%rbp + sbbq %rdi,%rdi + + movq %r8,32+0(%rsp) + movq %r9,32+8(%rsp) + movq %r10,32+16(%rsp) + movq %r11,32+24(%rsp) + movq %r12,32+32(%rsp) + movq %r13,32+40(%rsp) + + movq %r14,32+48(%rsp) + movq %r15,32+56(%rsp) + movq %rax,32+64(%rsp) + movq %rdx,32+72(%rsp) + movq %rbx,32+80(%rsp) + movq %rbp,32+88(%rsp) + movq %rdi,32+96(%rsp) + + + + leaq 48(%rsi),%rbx + + movq 48(%rsi),%rdx + movq 0(%rsi),%r14 + movq 8(%rsi),%r15 + movq 16(%rsi),%rax + movq 24(%rsi),%r12 + movq 32(%rsi),%rdi + movq 40(%rsi),%rbp + leaq -128(%rsi),%rsi + leaq -128(%rcx),%rcx + + mulxq %r14,%r8,%r9 + call __mulx_mont_383_nonred + addq %rdx,%rdx + adcq %r15,%r15 + adcq %rax,%rax + adcq %r12,%r12 + adcq %rdi,%rdi + adcq %rbp,%rbp + + movq %rdx,48(%rbx) + movq %r15,56(%rbx) + movq %rax,64(%rbx) + movq %r12,72(%rbx) + movq %rdi,80(%rbx) + movq %rbp,88(%rbx) + + leaq 32-128(%rsp),%rsi + leaq 32+48(%rsp),%rbx + + movq 32+48(%rsp),%rdx + movq 32+0(%rsp),%r14 + movq 32+8(%rsp),%r15 + movq 32+16(%rsp),%rax + movq 32+24(%rsp),%r12 + movq 32+32(%rsp),%rdi + movq 32+40(%rsp),%rbp + + + + mulxq %r14,%r8,%r9 + call __mulx_mont_383_nonred + movq 32+96(%rsp),%r14 + leaq 128(%rcx),%rcx + movq 32+0(%rsp),%r8 + andq %r14,%r8 + movq 32+8(%rsp),%r9 + andq %r14,%r9 + movq 32+16(%rsp),%r10 + andq %r14,%r10 + movq 32+24(%rsp),%r11 + andq %r14,%r11 + movq 32+32(%rsp),%r13 + andq %r14,%r13 + andq 32+40(%rsp),%r14 + + subq %r8,%rdx + movq 0(%rcx),%r8 + sbbq %r9,%r15 + movq 8(%rcx),%r9 + sbbq %r10,%rax + movq 16(%rcx),%r10 + sbbq %r11,%r12 + movq 24(%rcx),%r11 + sbbq %r13,%rdi + movq 32(%rcx),%r13 + sbbq %r14,%rbp + sbbq %r14,%r14 + + andq %r14,%r8 + andq %r14,%r9 + andq %r14,%r10 + andq %r14,%r11 + andq %r14,%r13 + andq 40(%rcx),%r14 + + addq %r8,%rdx + adcq %r9,%r15 + adcq %r10,%rax + adcq %r11,%r12 + adcq %r13,%rdi + adcq %r14,%rbp + + movq %rdx,0(%rbx) + movq %r15,8(%rbx) + movq %rax,16(%rbx) + movq %r12,24(%rbx) + movq %rdi,32(%rbx) + movq %rbp,40(%rbx) + leaq 136(%rsp),%r8 + movq 0(%r8),%r15 +.cfi_restore %r15 + movq 8(%r8),%r14 +.cfi_restore %r14 + movq 16(%r8),%r13 +.cfi_restore %r13 + movq 24(%r8),%r12 +.cfi_restore %r12 + movq 32(%r8),%rbx +.cfi_restore %rbx + movq 40(%r8),%rbp +.cfi_restore %rbp + leaq 48(%r8),%rsp +.cfi_adjust_cfa_offset -136-8*6 + + .byte 0xf3,0xc3 +.cfi_endproc + diff --git a/crypto/blst_src/build/mach-o/sha256-armv8.S b/crypto/blst_src/build/mach-o/sha256-armv8.S new file mode 100644 index 00000000000..3f3c1266dcd --- /dev/null +++ b/crypto/blst_src/build/mach-o/sha256-armv8.S @@ -0,0 +1,1083 @@ +// +// Copyright Supranational LLC +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 +// +// ==================================================================== +// Written by Andy Polyakov, @dot-asm, initially for the OpenSSL +// project. +// ==================================================================== +// +// sha256_block procedure for ARMv8. +// +// This module is stripped of scalar code paths, with rationale that all +// known processors are NEON-capable. +// +// See original module at CRYPTOGAMS for further details. + +.comm ___blst_platform_cap,4 +.text + +.align 6 + +LK256: +.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 +.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 +.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 +.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 +.long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc +.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da +.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 +.long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 +.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 +.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 +.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 +.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 +.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 +.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 +.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 +.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 +.long 0 //terminator + +.byte 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,64,100,111,116,45,97,115,109,0 +.align 2 +.align 2 +.globl _blst_sha256_block_armv8 + +.align 6 +_blst_sha256_block_armv8: +Lv8_entry: + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + + ld1 {v0.4s,v1.4s},[x0] + adr x3,LK256 + +Loop_hw: + ld1 {v4.16b,v5.16b,v6.16b,v7.16b},[x1],#64 + sub x2,x2,#1 + ld1 {v16.4s},[x3],#16 + rev32 v4.16b,v4.16b + rev32 v5.16b,v5.16b + rev32 v6.16b,v6.16b + rev32 v7.16b,v7.16b + orr v18.16b,v0.16b,v0.16b // offload + orr v19.16b,v1.16b,v1.16b + ld1 {v17.4s},[x3],#16 + add v16.4s,v16.4s,v4.4s +.long 0x5e2828a4 //sha256su0 v4.16b,v5.16b + orr v2.16b,v0.16b,v0.16b +.long 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s +.long 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s +.long 0x5e0760c4 //sha256su1 v4.16b,v6.16b,v7.16b + ld1 {v16.4s},[x3],#16 + add v17.4s,v17.4s,v5.4s +.long 0x5e2828c5 //sha256su0 v5.16b,v6.16b + orr v2.16b,v0.16b,v0.16b +.long 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s +.long 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s +.long 0x5e0460e5 //sha256su1 v5.16b,v7.16b,v4.16b + ld1 {v17.4s},[x3],#16 + add v16.4s,v16.4s,v6.4s +.long 0x5e2828e6 //sha256su0 v6.16b,v7.16b + orr v2.16b,v0.16b,v0.16b +.long 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s +.long 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s +.long 0x5e056086 //sha256su1 v6.16b,v4.16b,v5.16b + ld1 {v16.4s},[x3],#16 + add v17.4s,v17.4s,v7.4s +.long 0x5e282887 //sha256su0 v7.16b,v4.16b + orr v2.16b,v0.16b,v0.16b +.long 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s +.long 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s +.long 0x5e0660a7 //sha256su1 v7.16b,v5.16b,v6.16b + ld1 {v17.4s},[x3],#16 + add v16.4s,v16.4s,v4.4s +.long 0x5e2828a4 //sha256su0 v4.16b,v5.16b + orr v2.16b,v0.16b,v0.16b +.long 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s +.long 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s +.long 0x5e0760c4 //sha256su1 v4.16b,v6.16b,v7.16b + ld1 {v16.4s},[x3],#16 + add v17.4s,v17.4s,v5.4s +.long 0x5e2828c5 //sha256su0 v5.16b,v6.16b + orr v2.16b,v0.16b,v0.16b +.long 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s +.long 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s +.long 0x5e0460e5 //sha256su1 v5.16b,v7.16b,v4.16b + ld1 {v17.4s},[x3],#16 + add v16.4s,v16.4s,v6.4s +.long 0x5e2828e6 //sha256su0 v6.16b,v7.16b + orr v2.16b,v0.16b,v0.16b +.long 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s +.long 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s +.long 0x5e056086 //sha256su1 v6.16b,v4.16b,v5.16b + ld1 {v16.4s},[x3],#16 + add v17.4s,v17.4s,v7.4s +.long 0x5e282887 //sha256su0 v7.16b,v4.16b + orr v2.16b,v0.16b,v0.16b +.long 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s +.long 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s +.long 0x5e0660a7 //sha256su1 v7.16b,v5.16b,v6.16b + ld1 {v17.4s},[x3],#16 + add v16.4s,v16.4s,v4.4s +.long 0x5e2828a4 //sha256su0 v4.16b,v5.16b + orr v2.16b,v0.16b,v0.16b +.long 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s +.long 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s +.long 0x5e0760c4 //sha256su1 v4.16b,v6.16b,v7.16b + ld1 {v16.4s},[x3],#16 + add v17.4s,v17.4s,v5.4s +.long 0x5e2828c5 //sha256su0 v5.16b,v6.16b + orr v2.16b,v0.16b,v0.16b +.long 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s +.long 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s +.long 0x5e0460e5 //sha256su1 v5.16b,v7.16b,v4.16b + ld1 {v17.4s},[x3],#16 + add v16.4s,v16.4s,v6.4s +.long 0x5e2828e6 //sha256su0 v6.16b,v7.16b + orr v2.16b,v0.16b,v0.16b +.long 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s +.long 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s +.long 0x5e056086 //sha256su1 v6.16b,v4.16b,v5.16b + ld1 {v16.4s},[x3],#16 + add v17.4s,v17.4s,v7.4s +.long 0x5e282887 //sha256su0 v7.16b,v4.16b + orr v2.16b,v0.16b,v0.16b +.long 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s +.long 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s +.long 0x5e0660a7 //sha256su1 v7.16b,v5.16b,v6.16b + ld1 {v17.4s},[x3],#16 + add v16.4s,v16.4s,v4.4s + orr v2.16b,v0.16b,v0.16b +.long 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s +.long 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s + + ld1 {v16.4s},[x3],#16 + add v17.4s,v17.4s,v5.4s + orr v2.16b,v0.16b,v0.16b +.long 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s +.long 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s + + ld1 {v17.4s},[x3] + add v16.4s,v16.4s,v6.4s + sub x3,x3,#64*4-16 // rewind + orr v2.16b,v0.16b,v0.16b +.long 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s +.long 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s + + add v17.4s,v17.4s,v7.4s + orr v2.16b,v0.16b,v0.16b +.long 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s +.long 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s + + add v0.4s,v0.4s,v18.4s + add v1.4s,v1.4s,v19.4s + + cbnz x2,Loop_hw + + st1 {v0.4s,v1.4s},[x0] + + ldr x29,[sp],#16 + ret + +.globl _blst_sha256_block_data_order + +.align 4 +_blst_sha256_block_data_order: + adrp x16,___blst_platform_cap@PAGE + ldr w16,[x16,___blst_platform_cap@PAGEOFF] + tst w16,#1 + b.ne Lv8_entry + + stp x29, x30, [sp, #-16]! + mov x29, sp + sub sp,sp,#16*4 + + adr x16,LK256 + add x2,x1,x2,lsl#6 // len to point at the end of inp + + ld1 {v0.16b},[x1], #16 + ld1 {v1.16b},[x1], #16 + ld1 {v2.16b},[x1], #16 + ld1 {v3.16b},[x1], #16 + ld1 {v4.4s},[x16], #16 + ld1 {v5.4s},[x16], #16 + ld1 {v6.4s},[x16], #16 + ld1 {v7.4s},[x16], #16 + rev32 v0.16b,v0.16b // yes, even on + rev32 v1.16b,v1.16b // big-endian + rev32 v2.16b,v2.16b + rev32 v3.16b,v3.16b + mov x17,sp + add v4.4s,v4.4s,v0.4s + add v5.4s,v5.4s,v1.4s + add v6.4s,v6.4s,v2.4s + st1 {v4.4s,v5.4s},[x17], #32 + add v7.4s,v7.4s,v3.4s + st1 {v6.4s,v7.4s},[x17] + sub x17,x17,#32 + + ldp w3,w4,[x0] + ldp w5,w6,[x0,#8] + ldp w7,w8,[x0,#16] + ldp w9,w10,[x0,#24] + ldr w12,[sp,#0] + mov w13,wzr + eor w14,w4,w5 + mov w15,wzr + b L_00_48 + +.align 4 +L_00_48: + ext v4.16b,v0.16b,v1.16b,#4 + add w10,w10,w12 + add w3,w3,w15 + and w12,w8,w7 + bic w15,w9,w7 + ext v7.16b,v2.16b,v3.16b,#4 + eor w11,w7,w7,ror#5 + add w3,w3,w13 + mov d19,v3.d[1] + orr w12,w12,w15 + eor w11,w11,w7,ror#19 + ushr v6.4s,v4.4s,#7 + eor w15,w3,w3,ror#11 + ushr v5.4s,v4.4s,#3 + add w10,w10,w12 + add v0.4s,v0.4s,v7.4s + ror w11,w11,#6 + sli v6.4s,v4.4s,#25 + eor w13,w3,w4 + eor w15,w15,w3,ror#20 + ushr v7.4s,v4.4s,#18 + add w10,w10,w11 + ldr w12,[sp,#4] + and w14,w14,w13 + eor v5.16b,v5.16b,v6.16b + ror w15,w15,#2 + add w6,w6,w10 + sli v7.4s,v4.4s,#14 + eor w14,w14,w4 + ushr v16.4s,v19.4s,#17 + add w9,w9,w12 + add w10,w10,w15 + and w12,w7,w6 + eor v5.16b,v5.16b,v7.16b + bic w15,w8,w6 + eor w11,w6,w6,ror#5 + sli v16.4s,v19.4s,#15 + add w10,w10,w14 + orr w12,w12,w15 + ushr v17.4s,v19.4s,#10 + eor w11,w11,w6,ror#19 + eor w15,w10,w10,ror#11 + ushr v7.4s,v19.4s,#19 + add w9,w9,w12 + ror w11,w11,#6 + add v0.4s,v0.4s,v5.4s + eor w14,w10,w3 + eor w15,w15,w10,ror#20 + sli v7.4s,v19.4s,#13 + add w9,w9,w11 + ldr w12,[sp,#8] + and w13,w13,w14 + eor v17.16b,v17.16b,v16.16b + ror w15,w15,#2 + add w5,w5,w9 + eor w13,w13,w3 + eor v17.16b,v17.16b,v7.16b + add w8,w8,w12 + add w9,w9,w15 + and w12,w6,w5 + add v0.4s,v0.4s,v17.4s + bic w15,w7,w5 + eor w11,w5,w5,ror#5 + add w9,w9,w13 + ushr v18.4s,v0.4s,#17 + orr w12,w12,w15 + ushr v19.4s,v0.4s,#10 + eor w11,w11,w5,ror#19 + eor w15,w9,w9,ror#11 + sli v18.4s,v0.4s,#15 + add w8,w8,w12 + ushr v17.4s,v0.4s,#19 + ror w11,w11,#6 + eor w13,w9,w10 + eor v19.16b,v19.16b,v18.16b + eor w15,w15,w9,ror#20 + add w8,w8,w11 + sli v17.4s,v0.4s,#13 + ldr w12,[sp,#12] + and w14,w14,w13 + ror w15,w15,#2 + ld1 {v4.4s},[x16], #16 + add w4,w4,w8 + eor v19.16b,v19.16b,v17.16b + eor w14,w14,w10 + eor v17.16b,v17.16b,v17.16b + add w7,w7,w12 + add w8,w8,w15 + and w12,w5,w4 + mov v17.d[1],v19.d[0] + bic w15,w6,w4 + eor w11,w4,w4,ror#5 + add w8,w8,w14 + add v0.4s,v0.4s,v17.4s + orr w12,w12,w15 + eor w11,w11,w4,ror#19 + eor w15,w8,w8,ror#11 + add v4.4s,v4.4s,v0.4s + add w7,w7,w12 + ror w11,w11,#6 + eor w14,w8,w9 + eor w15,w15,w8,ror#20 + add w7,w7,w11 + ldr w12,[sp,#16] + and w13,w13,w14 + ror w15,w15,#2 + add w3,w3,w7 + eor w13,w13,w9 + st1 {v4.4s},[x17], #16 + ext v4.16b,v1.16b,v2.16b,#4 + add w6,w6,w12 + add w7,w7,w15 + and w12,w4,w3 + bic w15,w5,w3 + ext v7.16b,v3.16b,v0.16b,#4 + eor w11,w3,w3,ror#5 + add w7,w7,w13 + mov d19,v0.d[1] + orr w12,w12,w15 + eor w11,w11,w3,ror#19 + ushr v6.4s,v4.4s,#7 + eor w15,w7,w7,ror#11 + ushr v5.4s,v4.4s,#3 + add w6,w6,w12 + add v1.4s,v1.4s,v7.4s + ror w11,w11,#6 + sli v6.4s,v4.4s,#25 + eor w13,w7,w8 + eor w15,w15,w7,ror#20 + ushr v7.4s,v4.4s,#18 + add w6,w6,w11 + ldr w12,[sp,#20] + and w14,w14,w13 + eor v5.16b,v5.16b,v6.16b + ror w15,w15,#2 + add w10,w10,w6 + sli v7.4s,v4.4s,#14 + eor w14,w14,w8 + ushr v16.4s,v19.4s,#17 + add w5,w5,w12 + add w6,w6,w15 + and w12,w3,w10 + eor v5.16b,v5.16b,v7.16b + bic w15,w4,w10 + eor w11,w10,w10,ror#5 + sli v16.4s,v19.4s,#15 + add w6,w6,w14 + orr w12,w12,w15 + ushr v17.4s,v19.4s,#10 + eor w11,w11,w10,ror#19 + eor w15,w6,w6,ror#11 + ushr v7.4s,v19.4s,#19 + add w5,w5,w12 + ror w11,w11,#6 + add v1.4s,v1.4s,v5.4s + eor w14,w6,w7 + eor w15,w15,w6,ror#20 + sli v7.4s,v19.4s,#13 + add w5,w5,w11 + ldr w12,[sp,#24] + and w13,w13,w14 + eor v17.16b,v17.16b,v16.16b + ror w15,w15,#2 + add w9,w9,w5 + eor w13,w13,w7 + eor v17.16b,v17.16b,v7.16b + add w4,w4,w12 + add w5,w5,w15 + and w12,w10,w9 + add v1.4s,v1.4s,v17.4s + bic w15,w3,w9 + eor w11,w9,w9,ror#5 + add w5,w5,w13 + ushr v18.4s,v1.4s,#17 + orr w12,w12,w15 + ushr v19.4s,v1.4s,#10 + eor w11,w11,w9,ror#19 + eor w15,w5,w5,ror#11 + sli v18.4s,v1.4s,#15 + add w4,w4,w12 + ushr v17.4s,v1.4s,#19 + ror w11,w11,#6 + eor w13,w5,w6 + eor v19.16b,v19.16b,v18.16b + eor w15,w15,w5,ror#20 + add w4,w4,w11 + sli v17.4s,v1.4s,#13 + ldr w12,[sp,#28] + and w14,w14,w13 + ror w15,w15,#2 + ld1 {v4.4s},[x16], #16 + add w8,w8,w4 + eor v19.16b,v19.16b,v17.16b + eor w14,w14,w6 + eor v17.16b,v17.16b,v17.16b + add w3,w3,w12 + add w4,w4,w15 + and w12,w9,w8 + mov v17.d[1],v19.d[0] + bic w15,w10,w8 + eor w11,w8,w8,ror#5 + add w4,w4,w14 + add v1.4s,v1.4s,v17.4s + orr w12,w12,w15 + eor w11,w11,w8,ror#19 + eor w15,w4,w4,ror#11 + add v4.4s,v4.4s,v1.4s + add w3,w3,w12 + ror w11,w11,#6 + eor w14,w4,w5 + eor w15,w15,w4,ror#20 + add w3,w3,w11 + ldr w12,[sp,#32] + and w13,w13,w14 + ror w15,w15,#2 + add w7,w7,w3 + eor w13,w13,w5 + st1 {v4.4s},[x17], #16 + ext v4.16b,v2.16b,v3.16b,#4 + add w10,w10,w12 + add w3,w3,w15 + and w12,w8,w7 + bic w15,w9,w7 + ext v7.16b,v0.16b,v1.16b,#4 + eor w11,w7,w7,ror#5 + add w3,w3,w13 + mov d19,v1.d[1] + orr w12,w12,w15 + eor w11,w11,w7,ror#19 + ushr v6.4s,v4.4s,#7 + eor w15,w3,w3,ror#11 + ushr v5.4s,v4.4s,#3 + add w10,w10,w12 + add v2.4s,v2.4s,v7.4s + ror w11,w11,#6 + sli v6.4s,v4.4s,#25 + eor w13,w3,w4 + eor w15,w15,w3,ror#20 + ushr v7.4s,v4.4s,#18 + add w10,w10,w11 + ldr w12,[sp,#36] + and w14,w14,w13 + eor v5.16b,v5.16b,v6.16b + ror w15,w15,#2 + add w6,w6,w10 + sli v7.4s,v4.4s,#14 + eor w14,w14,w4 + ushr v16.4s,v19.4s,#17 + add w9,w9,w12 + add w10,w10,w15 + and w12,w7,w6 + eor v5.16b,v5.16b,v7.16b + bic w15,w8,w6 + eor w11,w6,w6,ror#5 + sli v16.4s,v19.4s,#15 + add w10,w10,w14 + orr w12,w12,w15 + ushr v17.4s,v19.4s,#10 + eor w11,w11,w6,ror#19 + eor w15,w10,w10,ror#11 + ushr v7.4s,v19.4s,#19 + add w9,w9,w12 + ror w11,w11,#6 + add v2.4s,v2.4s,v5.4s + eor w14,w10,w3 + eor w15,w15,w10,ror#20 + sli v7.4s,v19.4s,#13 + add w9,w9,w11 + ldr w12,[sp,#40] + and w13,w13,w14 + eor v17.16b,v17.16b,v16.16b + ror w15,w15,#2 + add w5,w5,w9 + eor w13,w13,w3 + eor v17.16b,v17.16b,v7.16b + add w8,w8,w12 + add w9,w9,w15 + and w12,w6,w5 + add v2.4s,v2.4s,v17.4s + bic w15,w7,w5 + eor w11,w5,w5,ror#5 + add w9,w9,w13 + ushr v18.4s,v2.4s,#17 + orr w12,w12,w15 + ushr v19.4s,v2.4s,#10 + eor w11,w11,w5,ror#19 + eor w15,w9,w9,ror#11 + sli v18.4s,v2.4s,#15 + add w8,w8,w12 + ushr v17.4s,v2.4s,#19 + ror w11,w11,#6 + eor w13,w9,w10 + eor v19.16b,v19.16b,v18.16b + eor w15,w15,w9,ror#20 + add w8,w8,w11 + sli v17.4s,v2.4s,#13 + ldr w12,[sp,#44] + and w14,w14,w13 + ror w15,w15,#2 + ld1 {v4.4s},[x16], #16 + add w4,w4,w8 + eor v19.16b,v19.16b,v17.16b + eor w14,w14,w10 + eor v17.16b,v17.16b,v17.16b + add w7,w7,w12 + add w8,w8,w15 + and w12,w5,w4 + mov v17.d[1],v19.d[0] + bic w15,w6,w4 + eor w11,w4,w4,ror#5 + add w8,w8,w14 + add v2.4s,v2.4s,v17.4s + orr w12,w12,w15 + eor w11,w11,w4,ror#19 + eor w15,w8,w8,ror#11 + add v4.4s,v4.4s,v2.4s + add w7,w7,w12 + ror w11,w11,#6 + eor w14,w8,w9 + eor w15,w15,w8,ror#20 + add w7,w7,w11 + ldr w12,[sp,#48] + and w13,w13,w14 + ror w15,w15,#2 + add w3,w3,w7 + eor w13,w13,w9 + st1 {v4.4s},[x17], #16 + ext v4.16b,v3.16b,v0.16b,#4 + add w6,w6,w12 + add w7,w7,w15 + and w12,w4,w3 + bic w15,w5,w3 + ext v7.16b,v1.16b,v2.16b,#4 + eor w11,w3,w3,ror#5 + add w7,w7,w13 + mov d19,v2.d[1] + orr w12,w12,w15 + eor w11,w11,w3,ror#19 + ushr v6.4s,v4.4s,#7 + eor w15,w7,w7,ror#11 + ushr v5.4s,v4.4s,#3 + add w6,w6,w12 + add v3.4s,v3.4s,v7.4s + ror w11,w11,#6 + sli v6.4s,v4.4s,#25 + eor w13,w7,w8 + eor w15,w15,w7,ror#20 + ushr v7.4s,v4.4s,#18 + add w6,w6,w11 + ldr w12,[sp,#52] + and w14,w14,w13 + eor v5.16b,v5.16b,v6.16b + ror w15,w15,#2 + add w10,w10,w6 + sli v7.4s,v4.4s,#14 + eor w14,w14,w8 + ushr v16.4s,v19.4s,#17 + add w5,w5,w12 + add w6,w6,w15 + and w12,w3,w10 + eor v5.16b,v5.16b,v7.16b + bic w15,w4,w10 + eor w11,w10,w10,ror#5 + sli v16.4s,v19.4s,#15 + add w6,w6,w14 + orr w12,w12,w15 + ushr v17.4s,v19.4s,#10 + eor w11,w11,w10,ror#19 + eor w15,w6,w6,ror#11 + ushr v7.4s,v19.4s,#19 + add w5,w5,w12 + ror w11,w11,#6 + add v3.4s,v3.4s,v5.4s + eor w14,w6,w7 + eor w15,w15,w6,ror#20 + sli v7.4s,v19.4s,#13 + add w5,w5,w11 + ldr w12,[sp,#56] + and w13,w13,w14 + eor v17.16b,v17.16b,v16.16b + ror w15,w15,#2 + add w9,w9,w5 + eor w13,w13,w7 + eor v17.16b,v17.16b,v7.16b + add w4,w4,w12 + add w5,w5,w15 + and w12,w10,w9 + add v3.4s,v3.4s,v17.4s + bic w15,w3,w9 + eor w11,w9,w9,ror#5 + add w5,w5,w13 + ushr v18.4s,v3.4s,#17 + orr w12,w12,w15 + ushr v19.4s,v3.4s,#10 + eor w11,w11,w9,ror#19 + eor w15,w5,w5,ror#11 + sli v18.4s,v3.4s,#15 + add w4,w4,w12 + ushr v17.4s,v3.4s,#19 + ror w11,w11,#6 + eor w13,w5,w6 + eor v19.16b,v19.16b,v18.16b + eor w15,w15,w5,ror#20 + add w4,w4,w11 + sli v17.4s,v3.4s,#13 + ldr w12,[sp,#60] + and w14,w14,w13 + ror w15,w15,#2 + ld1 {v4.4s},[x16], #16 + add w8,w8,w4 + eor v19.16b,v19.16b,v17.16b + eor w14,w14,w6 + eor v17.16b,v17.16b,v17.16b + add w3,w3,w12 + add w4,w4,w15 + and w12,w9,w8 + mov v17.d[1],v19.d[0] + bic w15,w10,w8 + eor w11,w8,w8,ror#5 + add w4,w4,w14 + add v3.4s,v3.4s,v17.4s + orr w12,w12,w15 + eor w11,w11,w8,ror#19 + eor w15,w4,w4,ror#11 + add v4.4s,v4.4s,v3.4s + add w3,w3,w12 + ror w11,w11,#6 + eor w14,w4,w5 + eor w15,w15,w4,ror#20 + add w3,w3,w11 + ldr w12,[x16] + and w13,w13,w14 + ror w15,w15,#2 + add w7,w7,w3 + eor w13,w13,w5 + st1 {v4.4s},[x17], #16 + cmp w12,#0 // check for K256 terminator + ldr w12,[sp,#0] + sub x17,x17,#64 + bne L_00_48 + + sub x16,x16,#256 // rewind x16 + cmp x1,x2 + mov x17, #64 + csel x17, x17, xzr, eq + sub x1,x1,x17 // avoid SEGV + mov x17,sp + add w10,w10,w12 + add w3,w3,w15 + and w12,w8,w7 + ld1 {v0.16b},[x1],#16 + bic w15,w9,w7 + eor w11,w7,w7,ror#5 + ld1 {v4.4s},[x16],#16 + add w3,w3,w13 + orr w12,w12,w15 + eor w11,w11,w7,ror#19 + eor w15,w3,w3,ror#11 + rev32 v0.16b,v0.16b + add w10,w10,w12 + ror w11,w11,#6 + eor w13,w3,w4 + eor w15,w15,w3,ror#20 + add v4.4s,v4.4s,v0.4s + add w10,w10,w11 + ldr w12,[sp,#4] + and w14,w14,w13 + ror w15,w15,#2 + add w6,w6,w10 + eor w14,w14,w4 + add w9,w9,w12 + add w10,w10,w15 + and w12,w7,w6 + bic w15,w8,w6 + eor w11,w6,w6,ror#5 + add w10,w10,w14 + orr w12,w12,w15 + eor w11,w11,w6,ror#19 + eor w15,w10,w10,ror#11 + add w9,w9,w12 + ror w11,w11,#6 + eor w14,w10,w3 + eor w15,w15,w10,ror#20 + add w9,w9,w11 + ldr w12,[sp,#8] + and w13,w13,w14 + ror w15,w15,#2 + add w5,w5,w9 + eor w13,w13,w3 + add w8,w8,w12 + add w9,w9,w15 + and w12,w6,w5 + bic w15,w7,w5 + eor w11,w5,w5,ror#5 + add w9,w9,w13 + orr w12,w12,w15 + eor w11,w11,w5,ror#19 + eor w15,w9,w9,ror#11 + add w8,w8,w12 + ror w11,w11,#6 + eor w13,w9,w10 + eor w15,w15,w9,ror#20 + add w8,w8,w11 + ldr w12,[sp,#12] + and w14,w14,w13 + ror w15,w15,#2 + add w4,w4,w8 + eor w14,w14,w10 + add w7,w7,w12 + add w8,w8,w15 + and w12,w5,w4 + bic w15,w6,w4 + eor w11,w4,w4,ror#5 + add w8,w8,w14 + orr w12,w12,w15 + eor w11,w11,w4,ror#19 + eor w15,w8,w8,ror#11 + add w7,w7,w12 + ror w11,w11,#6 + eor w14,w8,w9 + eor w15,w15,w8,ror#20 + add w7,w7,w11 + ldr w12,[sp,#16] + and w13,w13,w14 + ror w15,w15,#2 + add w3,w3,w7 + eor w13,w13,w9 + st1 {v4.4s},[x17], #16 + add w6,w6,w12 + add w7,w7,w15 + and w12,w4,w3 + ld1 {v1.16b},[x1],#16 + bic w15,w5,w3 + eor w11,w3,w3,ror#5 + ld1 {v4.4s},[x16],#16 + add w7,w7,w13 + orr w12,w12,w15 + eor w11,w11,w3,ror#19 + eor w15,w7,w7,ror#11 + rev32 v1.16b,v1.16b + add w6,w6,w12 + ror w11,w11,#6 + eor w13,w7,w8 + eor w15,w15,w7,ror#20 + add v4.4s,v4.4s,v1.4s + add w6,w6,w11 + ldr w12,[sp,#20] + and w14,w14,w13 + ror w15,w15,#2 + add w10,w10,w6 + eor w14,w14,w8 + add w5,w5,w12 + add w6,w6,w15 + and w12,w3,w10 + bic w15,w4,w10 + eor w11,w10,w10,ror#5 + add w6,w6,w14 + orr w12,w12,w15 + eor w11,w11,w10,ror#19 + eor w15,w6,w6,ror#11 + add w5,w5,w12 + ror w11,w11,#6 + eor w14,w6,w7 + eor w15,w15,w6,ror#20 + add w5,w5,w11 + ldr w12,[sp,#24] + and w13,w13,w14 + ror w15,w15,#2 + add w9,w9,w5 + eor w13,w13,w7 + add w4,w4,w12 + add w5,w5,w15 + and w12,w10,w9 + bic w15,w3,w9 + eor w11,w9,w9,ror#5 + add w5,w5,w13 + orr w12,w12,w15 + eor w11,w11,w9,ror#19 + eor w15,w5,w5,ror#11 + add w4,w4,w12 + ror w11,w11,#6 + eor w13,w5,w6 + eor w15,w15,w5,ror#20 + add w4,w4,w11 + ldr w12,[sp,#28] + and w14,w14,w13 + ror w15,w15,#2 + add w8,w8,w4 + eor w14,w14,w6 + add w3,w3,w12 + add w4,w4,w15 + and w12,w9,w8 + bic w15,w10,w8 + eor w11,w8,w8,ror#5 + add w4,w4,w14 + orr w12,w12,w15 + eor w11,w11,w8,ror#19 + eor w15,w4,w4,ror#11 + add w3,w3,w12 + ror w11,w11,#6 + eor w14,w4,w5 + eor w15,w15,w4,ror#20 + add w3,w3,w11 + ldr w12,[sp,#32] + and w13,w13,w14 + ror w15,w15,#2 + add w7,w7,w3 + eor w13,w13,w5 + st1 {v4.4s},[x17], #16 + add w10,w10,w12 + add w3,w3,w15 + and w12,w8,w7 + ld1 {v2.16b},[x1],#16 + bic w15,w9,w7 + eor w11,w7,w7,ror#5 + ld1 {v4.4s},[x16],#16 + add w3,w3,w13 + orr w12,w12,w15 + eor w11,w11,w7,ror#19 + eor w15,w3,w3,ror#11 + rev32 v2.16b,v2.16b + add w10,w10,w12 + ror w11,w11,#6 + eor w13,w3,w4 + eor w15,w15,w3,ror#20 + add v4.4s,v4.4s,v2.4s + add w10,w10,w11 + ldr w12,[sp,#36] + and w14,w14,w13 + ror w15,w15,#2 + add w6,w6,w10 + eor w14,w14,w4 + add w9,w9,w12 + add w10,w10,w15 + and w12,w7,w6 + bic w15,w8,w6 + eor w11,w6,w6,ror#5 + add w10,w10,w14 + orr w12,w12,w15 + eor w11,w11,w6,ror#19 + eor w15,w10,w10,ror#11 + add w9,w9,w12 + ror w11,w11,#6 + eor w14,w10,w3 + eor w15,w15,w10,ror#20 + add w9,w9,w11 + ldr w12,[sp,#40] + and w13,w13,w14 + ror w15,w15,#2 + add w5,w5,w9 + eor w13,w13,w3 + add w8,w8,w12 + add w9,w9,w15 + and w12,w6,w5 + bic w15,w7,w5 + eor w11,w5,w5,ror#5 + add w9,w9,w13 + orr w12,w12,w15 + eor w11,w11,w5,ror#19 + eor w15,w9,w9,ror#11 + add w8,w8,w12 + ror w11,w11,#6 + eor w13,w9,w10 + eor w15,w15,w9,ror#20 + add w8,w8,w11 + ldr w12,[sp,#44] + and w14,w14,w13 + ror w15,w15,#2 + add w4,w4,w8 + eor w14,w14,w10 + add w7,w7,w12 + add w8,w8,w15 + and w12,w5,w4 + bic w15,w6,w4 + eor w11,w4,w4,ror#5 + add w8,w8,w14 + orr w12,w12,w15 + eor w11,w11,w4,ror#19 + eor w15,w8,w8,ror#11 + add w7,w7,w12 + ror w11,w11,#6 + eor w14,w8,w9 + eor w15,w15,w8,ror#20 + add w7,w7,w11 + ldr w12,[sp,#48] + and w13,w13,w14 + ror w15,w15,#2 + add w3,w3,w7 + eor w13,w13,w9 + st1 {v4.4s},[x17], #16 + add w6,w6,w12 + add w7,w7,w15 + and w12,w4,w3 + ld1 {v3.16b},[x1],#16 + bic w15,w5,w3 + eor w11,w3,w3,ror#5 + ld1 {v4.4s},[x16],#16 + add w7,w7,w13 + orr w12,w12,w15 + eor w11,w11,w3,ror#19 + eor w15,w7,w7,ror#11 + rev32 v3.16b,v3.16b + add w6,w6,w12 + ror w11,w11,#6 + eor w13,w7,w8 + eor w15,w15,w7,ror#20 + add v4.4s,v4.4s,v3.4s + add w6,w6,w11 + ldr w12,[sp,#52] + and w14,w14,w13 + ror w15,w15,#2 + add w10,w10,w6 + eor w14,w14,w8 + add w5,w5,w12 + add w6,w6,w15 + and w12,w3,w10 + bic w15,w4,w10 + eor w11,w10,w10,ror#5 + add w6,w6,w14 + orr w12,w12,w15 + eor w11,w11,w10,ror#19 + eor w15,w6,w6,ror#11 + add w5,w5,w12 + ror w11,w11,#6 + eor w14,w6,w7 + eor w15,w15,w6,ror#20 + add w5,w5,w11 + ldr w12,[sp,#56] + and w13,w13,w14 + ror w15,w15,#2 + add w9,w9,w5 + eor w13,w13,w7 + add w4,w4,w12 + add w5,w5,w15 + and w12,w10,w9 + bic w15,w3,w9 + eor w11,w9,w9,ror#5 + add w5,w5,w13 + orr w12,w12,w15 + eor w11,w11,w9,ror#19 + eor w15,w5,w5,ror#11 + add w4,w4,w12 + ror w11,w11,#6 + eor w13,w5,w6 + eor w15,w15,w5,ror#20 + add w4,w4,w11 + ldr w12,[sp,#60] + and w14,w14,w13 + ror w15,w15,#2 + add w8,w8,w4 + eor w14,w14,w6 + add w3,w3,w12 + add w4,w4,w15 + and w12,w9,w8 + bic w15,w10,w8 + eor w11,w8,w8,ror#5 + add w4,w4,w14 + orr w12,w12,w15 + eor w11,w11,w8,ror#19 + eor w15,w4,w4,ror#11 + add w3,w3,w12 + ror w11,w11,#6 + eor w14,w4,w5 + eor w15,w15,w4,ror#20 + add w3,w3,w11 + and w13,w13,w14 + ror w15,w15,#2 + add w7,w7,w3 + eor w13,w13,w5 + st1 {v4.4s},[x17], #16 + add w3,w3,w15 // h+=Sigma0(a) from the past + ldp w11,w12,[x0,#0] + add w3,w3,w13 // h+=Maj(a,b,c) from the past + ldp w13,w14,[x0,#8] + add w3,w3,w11 // accumulate + add w4,w4,w12 + ldp w11,w12,[x0,#16] + add w5,w5,w13 + add w6,w6,w14 + ldp w13,w14,[x0,#24] + add w7,w7,w11 + add w8,w8,w12 + ldr w12,[sp,#0] + stp w3,w4,[x0,#0] + add w9,w9,w13 + mov w13,wzr + stp w5,w6,[x0,#8] + add w10,w10,w14 + stp w7,w8,[x0,#16] + eor w14,w4,w5 + stp w9,w10,[x0,#24] + mov w15,wzr + mov x17,sp + b.ne L_00_48 + + ldr x29,[x29] + add sp,sp,#16*4+16 + ret + +.globl _blst_sha256_emit +.private_extern _blst_sha256_emit + +.align 4 +_blst_sha256_emit: + ldp x4,x5,[x1] + ldp x6,x7,[x1,#16] +#ifndef __AARCH64EB__ + rev x4,x4 + rev x5,x5 + rev x6,x6 + rev x7,x7 +#endif + str w4,[x0,#4] + lsr x4,x4,#32 + str w5,[x0,#12] + lsr x5,x5,#32 + str w6,[x0,#20] + lsr x6,x6,#32 + str w7,[x0,#28] + lsr x7,x7,#32 + str w4,[x0,#0] + str w5,[x0,#8] + str w6,[x0,#16] + str w7,[x0,#24] + ret + + +.globl _blst_sha256_bcopy +.private_extern _blst_sha256_bcopy + +.align 4 +_blst_sha256_bcopy: +Loop_bcopy: + ldrb w3,[x1],#1 + sub x2,x2,#1 + strb w3,[x0],#1 + cbnz x2,Loop_bcopy + ret + + +.globl _blst_sha256_hcopy +.private_extern _blst_sha256_hcopy + +.align 4 +_blst_sha256_hcopy: + ldp x4,x5,[x1] + ldp x6,x7,[x1,#16] + stp x4,x5,[x0] + stp x6,x7,[x0,#16] + ret + diff --git a/crypto/blst_src/build/mach-o/sha256-portable-x86_64.s b/crypto/blst_src/build/mach-o/sha256-portable-x86_64.s new file mode 100644 index 00000000000..9f0a4f84ff0 --- /dev/null +++ b/crypto/blst_src/build/mach-o/sha256-portable-x86_64.s @@ -0,0 +1,1750 @@ +.comm ___blst_platform_cap,4 +.text + +.globl _blst_sha256_block_data_order + +.p2align 4 +_blst_sha256_block_data_order: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + movq %rsp,%rbp +.cfi_def_cfa_register %rbp +#ifdef __BLST_PORTABLE__ + testl $2,___blst_platform_cap(%rip) + jnz L$blst_sha256_block_data_order$2 +#endif + pushq %rbx +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_offset %r15,-56 + shlq $4,%rdx + subq $64+24,%rsp + +.cfi_def_cfa %rsp,144 + + leaq (%rsi,%rdx,4),%rdx + movq %rdi,64+0(%rsp) + movq %rsi,64+8(%rsp) + movq %rdx,64+16(%rsp) + + movl 0(%rdi),%eax + movl 4(%rdi),%ebx + movl 8(%rdi),%ecx + movl 12(%rdi),%edx + movl 16(%rdi),%r8d + movl 20(%rdi),%r9d + movl 24(%rdi),%r10d + movl 28(%rdi),%r11d + jmp L$loop + +.p2align 4 +L$loop: + movl %ebx,%edi + leaq K256(%rip),%rbp + xorl %ecx,%edi + movl 0(%rsi),%r12d + movl %r8d,%r13d + movl %eax,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r9d,%r15d + + xorl %r8d,%r13d + rorl $9,%r14d + xorl %r10d,%r15d + + movl %r12d,0(%rsp) + xorl %eax,%r14d + andl %r8d,%r15d + + rorl $5,%r13d + addl %r11d,%r12d + xorl %r10d,%r15d + + rorl $11,%r14d + xorl %r8d,%r13d + addl %r15d,%r12d + + movl %eax,%r15d + addl 0(%rbp),%r12d + xorl %eax,%r14d + + xorl %ebx,%r15d + rorl $6,%r13d + movl %ebx,%r11d + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%r11d + addl %r12d,%edx + addl %r12d,%r11d + addl %r14d,%r11d + movl 4(%rsi),%r12d + movl %edx,%r13d + movl %r11d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r8d,%edi + + xorl %edx,%r13d + rorl $9,%r14d + xorl %r9d,%edi + + movl %r12d,4(%rsp) + xorl %r11d,%r14d + andl %edx,%edi + + rorl $5,%r13d + addl %r10d,%r12d + xorl %r9d,%edi + + rorl $11,%r14d + xorl %edx,%r13d + addl %edi,%r12d + + movl %r11d,%edi + addl 4(%rbp),%r12d + xorl %r11d,%r14d + + xorl %eax,%edi + rorl $6,%r13d + movl %eax,%r10d + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%r10d + addl %r12d,%ecx + addl %r12d,%r10d + addl %r14d,%r10d + movl 8(%rsi),%r12d + movl %ecx,%r13d + movl %r10d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %edx,%r15d + + xorl %ecx,%r13d + rorl $9,%r14d + xorl %r8d,%r15d + + movl %r12d,8(%rsp) + xorl %r10d,%r14d + andl %ecx,%r15d + + rorl $5,%r13d + addl %r9d,%r12d + xorl %r8d,%r15d + + rorl $11,%r14d + xorl %ecx,%r13d + addl %r15d,%r12d + + movl %r10d,%r15d + addl 8(%rbp),%r12d + xorl %r10d,%r14d + + xorl %r11d,%r15d + rorl $6,%r13d + movl %r11d,%r9d + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%r9d + addl %r12d,%ebx + addl %r12d,%r9d + addl %r14d,%r9d + movl 12(%rsi),%r12d + movl %ebx,%r13d + movl %r9d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %ecx,%edi + + xorl %ebx,%r13d + rorl $9,%r14d + xorl %edx,%edi + + movl %r12d,12(%rsp) + xorl %r9d,%r14d + andl %ebx,%edi + + rorl $5,%r13d + addl %r8d,%r12d + xorl %edx,%edi + + rorl $11,%r14d + xorl %ebx,%r13d + addl %edi,%r12d + + movl %r9d,%edi + addl 12(%rbp),%r12d + xorl %r9d,%r14d + + xorl %r10d,%edi + rorl $6,%r13d + movl %r10d,%r8d + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%r8d + addl %r12d,%eax + addl %r12d,%r8d + addl %r14d,%r8d + movl 16(%rsi),%r12d + movl %eax,%r13d + movl %r8d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %ebx,%r15d + + xorl %eax,%r13d + rorl $9,%r14d + xorl %ecx,%r15d + + movl %r12d,16(%rsp) + xorl %r8d,%r14d + andl %eax,%r15d + + rorl $5,%r13d + addl %edx,%r12d + xorl %ecx,%r15d + + rorl $11,%r14d + xorl %eax,%r13d + addl %r15d,%r12d + + movl %r8d,%r15d + addl 16(%rbp),%r12d + xorl %r8d,%r14d + + xorl %r9d,%r15d + rorl $6,%r13d + movl %r9d,%edx + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%edx + addl %r12d,%r11d + addl %r12d,%edx + addl %r14d,%edx + movl 20(%rsi),%r12d + movl %r11d,%r13d + movl %edx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %eax,%edi + + xorl %r11d,%r13d + rorl $9,%r14d + xorl %ebx,%edi + + movl %r12d,20(%rsp) + xorl %edx,%r14d + andl %r11d,%edi + + rorl $5,%r13d + addl %ecx,%r12d + xorl %ebx,%edi + + rorl $11,%r14d + xorl %r11d,%r13d + addl %edi,%r12d + + movl %edx,%edi + addl 20(%rbp),%r12d + xorl %edx,%r14d + + xorl %r8d,%edi + rorl $6,%r13d + movl %r8d,%ecx + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%ecx + addl %r12d,%r10d + addl %r12d,%ecx + addl %r14d,%ecx + movl 24(%rsi),%r12d + movl %r10d,%r13d + movl %ecx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r11d,%r15d + + xorl %r10d,%r13d + rorl $9,%r14d + xorl %eax,%r15d + + movl %r12d,24(%rsp) + xorl %ecx,%r14d + andl %r10d,%r15d + + rorl $5,%r13d + addl %ebx,%r12d + xorl %eax,%r15d + + rorl $11,%r14d + xorl %r10d,%r13d + addl %r15d,%r12d + + movl %ecx,%r15d + addl 24(%rbp),%r12d + xorl %ecx,%r14d + + xorl %edx,%r15d + rorl $6,%r13d + movl %edx,%ebx + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%ebx + addl %r12d,%r9d + addl %r12d,%ebx + addl %r14d,%ebx + movl 28(%rsi),%r12d + movl %r9d,%r13d + movl %ebx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r10d,%edi + + xorl %r9d,%r13d + rorl $9,%r14d + xorl %r11d,%edi + + movl %r12d,28(%rsp) + xorl %ebx,%r14d + andl %r9d,%edi + + rorl $5,%r13d + addl %eax,%r12d + xorl %r11d,%edi + + rorl $11,%r14d + xorl %r9d,%r13d + addl %edi,%r12d + + movl %ebx,%edi + addl 28(%rbp),%r12d + xorl %ebx,%r14d + + xorl %ecx,%edi + rorl $6,%r13d + movl %ecx,%eax + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%eax + addl %r12d,%r8d + addl %r12d,%eax + addl %r14d,%eax + movl 32(%rsi),%r12d + movl %r8d,%r13d + movl %eax,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r9d,%r15d + + xorl %r8d,%r13d + rorl $9,%r14d + xorl %r10d,%r15d + + movl %r12d,32(%rsp) + xorl %eax,%r14d + andl %r8d,%r15d + + rorl $5,%r13d + addl %r11d,%r12d + xorl %r10d,%r15d + + rorl $11,%r14d + xorl %r8d,%r13d + addl %r15d,%r12d + + movl %eax,%r15d + addl 32(%rbp),%r12d + xorl %eax,%r14d + + xorl %ebx,%r15d + rorl $6,%r13d + movl %ebx,%r11d + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%r11d + addl %r12d,%edx + addl %r12d,%r11d + addl %r14d,%r11d + movl 36(%rsi),%r12d + movl %edx,%r13d + movl %r11d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r8d,%edi + + xorl %edx,%r13d + rorl $9,%r14d + xorl %r9d,%edi + + movl %r12d,36(%rsp) + xorl %r11d,%r14d + andl %edx,%edi + + rorl $5,%r13d + addl %r10d,%r12d + xorl %r9d,%edi + + rorl $11,%r14d + xorl %edx,%r13d + addl %edi,%r12d + + movl %r11d,%edi + addl 36(%rbp),%r12d + xorl %r11d,%r14d + + xorl %eax,%edi + rorl $6,%r13d + movl %eax,%r10d + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%r10d + addl %r12d,%ecx + addl %r12d,%r10d + addl %r14d,%r10d + movl 40(%rsi),%r12d + movl %ecx,%r13d + movl %r10d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %edx,%r15d + + xorl %ecx,%r13d + rorl $9,%r14d + xorl %r8d,%r15d + + movl %r12d,40(%rsp) + xorl %r10d,%r14d + andl %ecx,%r15d + + rorl $5,%r13d + addl %r9d,%r12d + xorl %r8d,%r15d + + rorl $11,%r14d + xorl %ecx,%r13d + addl %r15d,%r12d + + movl %r10d,%r15d + addl 40(%rbp),%r12d + xorl %r10d,%r14d + + xorl %r11d,%r15d + rorl $6,%r13d + movl %r11d,%r9d + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%r9d + addl %r12d,%ebx + addl %r12d,%r9d + addl %r14d,%r9d + movl 44(%rsi),%r12d + movl %ebx,%r13d + movl %r9d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %ecx,%edi + + xorl %ebx,%r13d + rorl $9,%r14d + xorl %edx,%edi + + movl %r12d,44(%rsp) + xorl %r9d,%r14d + andl %ebx,%edi + + rorl $5,%r13d + addl %r8d,%r12d + xorl %edx,%edi + + rorl $11,%r14d + xorl %ebx,%r13d + addl %edi,%r12d + + movl %r9d,%edi + addl 44(%rbp),%r12d + xorl %r9d,%r14d + + xorl %r10d,%edi + rorl $6,%r13d + movl %r10d,%r8d + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%r8d + addl %r12d,%eax + addl %r12d,%r8d + addl %r14d,%r8d + movl 48(%rsi),%r12d + movl %eax,%r13d + movl %r8d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %ebx,%r15d + + xorl %eax,%r13d + rorl $9,%r14d + xorl %ecx,%r15d + + movl %r12d,48(%rsp) + xorl %r8d,%r14d + andl %eax,%r15d + + rorl $5,%r13d + addl %edx,%r12d + xorl %ecx,%r15d + + rorl $11,%r14d + xorl %eax,%r13d + addl %r15d,%r12d + + movl %r8d,%r15d + addl 48(%rbp),%r12d + xorl %r8d,%r14d + + xorl %r9d,%r15d + rorl $6,%r13d + movl %r9d,%edx + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%edx + addl %r12d,%r11d + addl %r12d,%edx + addl %r14d,%edx + movl 52(%rsi),%r12d + movl %r11d,%r13d + movl %edx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %eax,%edi + + xorl %r11d,%r13d + rorl $9,%r14d + xorl %ebx,%edi + + movl %r12d,52(%rsp) + xorl %edx,%r14d + andl %r11d,%edi + + rorl $5,%r13d + addl %ecx,%r12d + xorl %ebx,%edi + + rorl $11,%r14d + xorl %r11d,%r13d + addl %edi,%r12d + + movl %edx,%edi + addl 52(%rbp),%r12d + xorl %edx,%r14d + + xorl %r8d,%edi + rorl $6,%r13d + movl %r8d,%ecx + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%ecx + addl %r12d,%r10d + addl %r12d,%ecx + addl %r14d,%ecx + movl 56(%rsi),%r12d + movl %r10d,%r13d + movl %ecx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r11d,%r15d + + xorl %r10d,%r13d + rorl $9,%r14d + xorl %eax,%r15d + + movl %r12d,56(%rsp) + xorl %ecx,%r14d + andl %r10d,%r15d + + rorl $5,%r13d + addl %ebx,%r12d + xorl %eax,%r15d + + rorl $11,%r14d + xorl %r10d,%r13d + addl %r15d,%r12d + + movl %ecx,%r15d + addl 56(%rbp),%r12d + xorl %ecx,%r14d + + xorl %edx,%r15d + rorl $6,%r13d + movl %edx,%ebx + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%ebx + addl %r12d,%r9d + addl %r12d,%ebx + addl %r14d,%ebx + movl 60(%rsi),%r12d + movl %r9d,%r13d + movl %ebx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r10d,%edi + + xorl %r9d,%r13d + rorl $9,%r14d + xorl %r11d,%edi + + movl %r12d,60(%rsp) + xorl %ebx,%r14d + andl %r9d,%edi + + rorl $5,%r13d + addl %eax,%r12d + xorl %r11d,%edi + + rorl $11,%r14d + xorl %r9d,%r13d + addl %edi,%r12d + + movl %ebx,%edi + addl 60(%rbp),%r12d + xorl %ebx,%r14d + + xorl %ecx,%edi + rorl $6,%r13d + movl %ecx,%eax + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%eax + addl %r12d,%r8d + addl %r12d,%eax + jmp L$rounds_16_xx +.p2align 4 +L$rounds_16_xx: + movl 4(%rsp),%r13d + movl 56(%rsp),%r15d + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%eax + movl %r15d,%r14d + rorl $2,%r15d + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 36(%rsp),%r12d + + addl 0(%rsp),%r12d + movl %r8d,%r13d + addl %r15d,%r12d + movl %eax,%r14d + rorl $14,%r13d + movl %r9d,%r15d + + xorl %r8d,%r13d + rorl $9,%r14d + xorl %r10d,%r15d + + movl %r12d,0(%rsp) + xorl %eax,%r14d + andl %r8d,%r15d + + rorl $5,%r13d + addl %r11d,%r12d + xorl %r10d,%r15d + + rorl $11,%r14d + xorl %r8d,%r13d + addl %r15d,%r12d + + movl %eax,%r15d + addl 64(%rbp),%r12d + xorl %eax,%r14d + + xorl %ebx,%r15d + rorl $6,%r13d + movl %ebx,%r11d + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%r11d + addl %r12d,%edx + addl %r12d,%r11d + movl 8(%rsp),%r13d + movl 60(%rsp),%edi + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r11d + movl %edi,%r14d + rorl $2,%edi + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi + shrl $10,%r14d + + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 40(%rsp),%r12d + + addl 4(%rsp),%r12d + movl %edx,%r13d + addl %edi,%r12d + movl %r11d,%r14d + rorl $14,%r13d + movl %r8d,%edi + + xorl %edx,%r13d + rorl $9,%r14d + xorl %r9d,%edi + + movl %r12d,4(%rsp) + xorl %r11d,%r14d + andl %edx,%edi + + rorl $5,%r13d + addl %r10d,%r12d + xorl %r9d,%edi + + rorl $11,%r14d + xorl %edx,%r13d + addl %edi,%r12d + + movl %r11d,%edi + addl 68(%rbp),%r12d + xorl %r11d,%r14d + + xorl %eax,%edi + rorl $6,%r13d + movl %eax,%r10d + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%r10d + addl %r12d,%ecx + addl %r12d,%r10d + movl 12(%rsp),%r13d + movl 0(%rsp),%r15d + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r10d + movl %r15d,%r14d + rorl $2,%r15d + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 44(%rsp),%r12d + + addl 8(%rsp),%r12d + movl %ecx,%r13d + addl %r15d,%r12d + movl %r10d,%r14d + rorl $14,%r13d + movl %edx,%r15d + + xorl %ecx,%r13d + rorl $9,%r14d + xorl %r8d,%r15d + + movl %r12d,8(%rsp) + xorl %r10d,%r14d + andl %ecx,%r15d + + rorl $5,%r13d + addl %r9d,%r12d + xorl %r8d,%r15d + + rorl $11,%r14d + xorl %ecx,%r13d + addl %r15d,%r12d + + movl %r10d,%r15d + addl 72(%rbp),%r12d + xorl %r10d,%r14d + + xorl %r11d,%r15d + rorl $6,%r13d + movl %r11d,%r9d + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%r9d + addl %r12d,%ebx + addl %r12d,%r9d + movl 16(%rsp),%r13d + movl 4(%rsp),%edi + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r9d + movl %edi,%r14d + rorl $2,%edi + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi + shrl $10,%r14d + + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 48(%rsp),%r12d + + addl 12(%rsp),%r12d + movl %ebx,%r13d + addl %edi,%r12d + movl %r9d,%r14d + rorl $14,%r13d + movl %ecx,%edi + + xorl %ebx,%r13d + rorl $9,%r14d + xorl %edx,%edi + + movl %r12d,12(%rsp) + xorl %r9d,%r14d + andl %ebx,%edi + + rorl $5,%r13d + addl %r8d,%r12d + xorl %edx,%edi + + rorl $11,%r14d + xorl %ebx,%r13d + addl %edi,%r12d + + movl %r9d,%edi + addl 76(%rbp),%r12d + xorl %r9d,%r14d + + xorl %r10d,%edi + rorl $6,%r13d + movl %r10d,%r8d + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%r8d + addl %r12d,%eax + addl %r12d,%r8d + movl 20(%rsp),%r13d + movl 8(%rsp),%r15d + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r8d + movl %r15d,%r14d + rorl $2,%r15d + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 52(%rsp),%r12d + + addl 16(%rsp),%r12d + movl %eax,%r13d + addl %r15d,%r12d + movl %r8d,%r14d + rorl $14,%r13d + movl %ebx,%r15d + + xorl %eax,%r13d + rorl $9,%r14d + xorl %ecx,%r15d + + movl %r12d,16(%rsp) + xorl %r8d,%r14d + andl %eax,%r15d + + rorl $5,%r13d + addl %edx,%r12d + xorl %ecx,%r15d + + rorl $11,%r14d + xorl %eax,%r13d + addl %r15d,%r12d + + movl %r8d,%r15d + addl 80(%rbp),%r12d + xorl %r8d,%r14d + + xorl %r9d,%r15d + rorl $6,%r13d + movl %r9d,%edx + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%edx + addl %r12d,%r11d + addl %r12d,%edx + movl 24(%rsp),%r13d + movl 12(%rsp),%edi + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%edx + movl %edi,%r14d + rorl $2,%edi + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi + shrl $10,%r14d + + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 56(%rsp),%r12d + + addl 20(%rsp),%r12d + movl %r11d,%r13d + addl %edi,%r12d + movl %edx,%r14d + rorl $14,%r13d + movl %eax,%edi + + xorl %r11d,%r13d + rorl $9,%r14d + xorl %ebx,%edi + + movl %r12d,20(%rsp) + xorl %edx,%r14d + andl %r11d,%edi + + rorl $5,%r13d + addl %ecx,%r12d + xorl %ebx,%edi + + rorl $11,%r14d + xorl %r11d,%r13d + addl %edi,%r12d + + movl %edx,%edi + addl 84(%rbp),%r12d + xorl %edx,%r14d + + xorl %r8d,%edi + rorl $6,%r13d + movl %r8d,%ecx + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%ecx + addl %r12d,%r10d + addl %r12d,%ecx + movl 28(%rsp),%r13d + movl 16(%rsp),%r15d + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%ecx + movl %r15d,%r14d + rorl $2,%r15d + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 60(%rsp),%r12d + + addl 24(%rsp),%r12d + movl %r10d,%r13d + addl %r15d,%r12d + movl %ecx,%r14d + rorl $14,%r13d + movl %r11d,%r15d + + xorl %r10d,%r13d + rorl $9,%r14d + xorl %eax,%r15d + + movl %r12d,24(%rsp) + xorl %ecx,%r14d + andl %r10d,%r15d + + rorl $5,%r13d + addl %ebx,%r12d + xorl %eax,%r15d + + rorl $11,%r14d + xorl %r10d,%r13d + addl %r15d,%r12d + + movl %ecx,%r15d + addl 88(%rbp),%r12d + xorl %ecx,%r14d + + xorl %edx,%r15d + rorl $6,%r13d + movl %edx,%ebx + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%ebx + addl %r12d,%r9d + addl %r12d,%ebx + movl 32(%rsp),%r13d + movl 20(%rsp),%edi + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%ebx + movl %edi,%r14d + rorl $2,%edi + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi + shrl $10,%r14d + + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 0(%rsp),%r12d + + addl 28(%rsp),%r12d + movl %r9d,%r13d + addl %edi,%r12d + movl %ebx,%r14d + rorl $14,%r13d + movl %r10d,%edi + + xorl %r9d,%r13d + rorl $9,%r14d + xorl %r11d,%edi + + movl %r12d,28(%rsp) + xorl %ebx,%r14d + andl %r9d,%edi + + rorl $5,%r13d + addl %eax,%r12d + xorl %r11d,%edi + + rorl $11,%r14d + xorl %r9d,%r13d + addl %edi,%r12d + + movl %ebx,%edi + addl 92(%rbp),%r12d + xorl %ebx,%r14d + + xorl %ecx,%edi + rorl $6,%r13d + movl %ecx,%eax + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%eax + addl %r12d,%r8d + addl %r12d,%eax + movl 36(%rsp),%r13d + movl 24(%rsp),%r15d + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%eax + movl %r15d,%r14d + rorl $2,%r15d + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 4(%rsp),%r12d + + addl 32(%rsp),%r12d + movl %r8d,%r13d + addl %r15d,%r12d + movl %eax,%r14d + rorl $14,%r13d + movl %r9d,%r15d + + xorl %r8d,%r13d + rorl $9,%r14d + xorl %r10d,%r15d + + movl %r12d,32(%rsp) + xorl %eax,%r14d + andl %r8d,%r15d + + rorl $5,%r13d + addl %r11d,%r12d + xorl %r10d,%r15d + + rorl $11,%r14d + xorl %r8d,%r13d + addl %r15d,%r12d + + movl %eax,%r15d + addl 96(%rbp),%r12d + xorl %eax,%r14d + + xorl %ebx,%r15d + rorl $6,%r13d + movl %ebx,%r11d + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%r11d + addl %r12d,%edx + addl %r12d,%r11d + movl 40(%rsp),%r13d + movl 28(%rsp),%edi + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r11d + movl %edi,%r14d + rorl $2,%edi + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi + shrl $10,%r14d + + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 8(%rsp),%r12d + + addl 36(%rsp),%r12d + movl %edx,%r13d + addl %edi,%r12d + movl %r11d,%r14d + rorl $14,%r13d + movl %r8d,%edi + + xorl %edx,%r13d + rorl $9,%r14d + xorl %r9d,%edi + + movl %r12d,36(%rsp) + xorl %r11d,%r14d + andl %edx,%edi + + rorl $5,%r13d + addl %r10d,%r12d + xorl %r9d,%edi + + rorl $11,%r14d + xorl %edx,%r13d + addl %edi,%r12d + + movl %r11d,%edi + addl 100(%rbp),%r12d + xorl %r11d,%r14d + + xorl %eax,%edi + rorl $6,%r13d + movl %eax,%r10d + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%r10d + addl %r12d,%ecx + addl %r12d,%r10d + movl 44(%rsp),%r13d + movl 32(%rsp),%r15d + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r10d + movl %r15d,%r14d + rorl $2,%r15d + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 12(%rsp),%r12d + + addl 40(%rsp),%r12d + movl %ecx,%r13d + addl %r15d,%r12d + movl %r10d,%r14d + rorl $14,%r13d + movl %edx,%r15d + + xorl %ecx,%r13d + rorl $9,%r14d + xorl %r8d,%r15d + + movl %r12d,40(%rsp) + xorl %r10d,%r14d + andl %ecx,%r15d + + rorl $5,%r13d + addl %r9d,%r12d + xorl %r8d,%r15d + + rorl $11,%r14d + xorl %ecx,%r13d + addl %r15d,%r12d + + movl %r10d,%r15d + addl 104(%rbp),%r12d + xorl %r10d,%r14d + + xorl %r11d,%r15d + rorl $6,%r13d + movl %r11d,%r9d + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%r9d + addl %r12d,%ebx + addl %r12d,%r9d + movl 48(%rsp),%r13d + movl 36(%rsp),%edi + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r9d + movl %edi,%r14d + rorl $2,%edi + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi + shrl $10,%r14d + + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 16(%rsp),%r12d + + addl 44(%rsp),%r12d + movl %ebx,%r13d + addl %edi,%r12d + movl %r9d,%r14d + rorl $14,%r13d + movl %ecx,%edi + + xorl %ebx,%r13d + rorl $9,%r14d + xorl %edx,%edi + + movl %r12d,44(%rsp) + xorl %r9d,%r14d + andl %ebx,%edi + + rorl $5,%r13d + addl %r8d,%r12d + xorl %edx,%edi + + rorl $11,%r14d + xorl %ebx,%r13d + addl %edi,%r12d + + movl %r9d,%edi + addl 108(%rbp),%r12d + xorl %r9d,%r14d + + xorl %r10d,%edi + rorl $6,%r13d + movl %r10d,%r8d + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%r8d + addl %r12d,%eax + addl %r12d,%r8d + movl 52(%rsp),%r13d + movl 40(%rsp),%r15d + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r8d + movl %r15d,%r14d + rorl $2,%r15d + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 20(%rsp),%r12d + + addl 48(%rsp),%r12d + movl %eax,%r13d + addl %r15d,%r12d + movl %r8d,%r14d + rorl $14,%r13d + movl %ebx,%r15d + + xorl %eax,%r13d + rorl $9,%r14d + xorl %ecx,%r15d + + movl %r12d,48(%rsp) + xorl %r8d,%r14d + andl %eax,%r15d + + rorl $5,%r13d + addl %edx,%r12d + xorl %ecx,%r15d + + rorl $11,%r14d + xorl %eax,%r13d + addl %r15d,%r12d + + movl %r8d,%r15d + addl 112(%rbp),%r12d + xorl %r8d,%r14d + + xorl %r9d,%r15d + rorl $6,%r13d + movl %r9d,%edx + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%edx + addl %r12d,%r11d + addl %r12d,%edx + movl 56(%rsp),%r13d + movl 44(%rsp),%edi + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%edx + movl %edi,%r14d + rorl $2,%edi + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi + shrl $10,%r14d + + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 24(%rsp),%r12d + + addl 52(%rsp),%r12d + movl %r11d,%r13d + addl %edi,%r12d + movl %edx,%r14d + rorl $14,%r13d + movl %eax,%edi + + xorl %r11d,%r13d + rorl $9,%r14d + xorl %ebx,%edi + + movl %r12d,52(%rsp) + xorl %edx,%r14d + andl %r11d,%edi + + rorl $5,%r13d + addl %ecx,%r12d + xorl %ebx,%edi + + rorl $11,%r14d + xorl %r11d,%r13d + addl %edi,%r12d + + movl %edx,%edi + addl 116(%rbp),%r12d + xorl %edx,%r14d + + xorl %r8d,%edi + rorl $6,%r13d + movl %r8d,%ecx + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%ecx + addl %r12d,%r10d + addl %r12d,%ecx + movl 60(%rsp),%r13d + movl 48(%rsp),%r15d + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%ecx + movl %r15d,%r14d + rorl $2,%r15d + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 28(%rsp),%r12d + + addl 56(%rsp),%r12d + movl %r10d,%r13d + addl %r15d,%r12d + movl %ecx,%r14d + rorl $14,%r13d + movl %r11d,%r15d + + xorl %r10d,%r13d + rorl $9,%r14d + xorl %eax,%r15d + + movl %r12d,56(%rsp) + xorl %ecx,%r14d + andl %r10d,%r15d + + rorl $5,%r13d + addl %ebx,%r12d + xorl %eax,%r15d + + rorl $11,%r14d + xorl %r10d,%r13d + addl %r15d,%r12d + + movl %ecx,%r15d + addl 120(%rbp),%r12d + xorl %ecx,%r14d + + xorl %edx,%r15d + rorl $6,%r13d + movl %edx,%ebx + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%ebx + addl %r12d,%r9d + addl %r12d,%ebx + movl 0(%rsp),%r13d + movl 52(%rsp),%edi + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%ebx + movl %edi,%r14d + rorl $2,%edi + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi + shrl $10,%r14d + + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 32(%rsp),%r12d + + addl 60(%rsp),%r12d + movl %r9d,%r13d + addl %edi,%r12d + movl %ebx,%r14d + rorl $14,%r13d + movl %r10d,%edi + + xorl %r9d,%r13d + rorl $9,%r14d + xorl %r11d,%edi + + movl %r12d,60(%rsp) + xorl %ebx,%r14d + andl %r9d,%edi + + rorl $5,%r13d + addl %eax,%r12d + xorl %r11d,%edi + + rorl $11,%r14d + xorl %r9d,%r13d + addl %edi,%r12d + + movl %ebx,%edi + addl 124(%rbp),%r12d + xorl %ebx,%r14d + + xorl %ecx,%edi + rorl $6,%r13d + movl %ecx,%eax + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%eax + addl %r12d,%r8d + addl %r12d,%eax + leaq 64(%rbp),%rbp + cmpb $0x19,3(%rbp) + jnz L$rounds_16_xx + + movq 64+0(%rsp),%rdi + addl %r14d,%eax + leaq 64(%rsi),%rsi + + addl 0(%rdi),%eax + addl 4(%rdi),%ebx + addl 8(%rdi),%ecx + addl 12(%rdi),%edx + addl 16(%rdi),%r8d + addl 20(%rdi),%r9d + addl 24(%rdi),%r10d + addl 28(%rdi),%r11d + + cmpq 64+16(%rsp),%rsi + + movl %eax,0(%rdi) + movl %ebx,4(%rdi) + movl %ecx,8(%rdi) + movl %edx,12(%rdi) + movl %r8d,16(%rdi) + movl %r9d,20(%rdi) + movl %r10d,24(%rdi) + movl %r11d,28(%rdi) + jb L$loop + + leaq 64+24+48(%rsp),%r11 +.cfi_def_cfa %r11,8 + movq 64+24(%rsp),%r15 + movq -40(%r11),%r14 + movq -32(%r11),%r13 + movq -24(%r11),%r12 + movq -16(%r11),%rbx + movq -8(%r11),%rbp +.cfi_restore %r12 +.cfi_restore %r13 +.cfi_restore %r14 +.cfi_restore %r15 +.cfi_restore %rbp +.cfi_restore %rbx + leaq (%r11),%rsp + .byte 0xf3,0xc3 +.cfi_endproc + + +#ifndef __BLST_PORTABLE__ +.p2align 6 + +K256: +.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 +.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 +.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 +.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 +.long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc +.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da +.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 +.long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 +.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 +.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 +.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 +.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 +.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 +.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 +.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 +.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 + +.byte 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,64,100,111,116,45,97,115,109,0 +.globl _blst_sha256_emit +.private_extern _blst_sha256_emit + +.p2align 4 +_blst_sha256_emit: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + bswapq %r8 + movq 24(%rsi),%r11 + bswapq %r9 + movl %r8d,4(%rdi) + bswapq %r10 + movl %r9d,12(%rdi) + bswapq %r11 + movl %r10d,20(%rdi) + shrq $32,%r8 + movl %r11d,28(%rdi) + shrq $32,%r9 + movl %r8d,0(%rdi) + shrq $32,%r10 + movl %r9d,8(%rdi) + shrq $32,%r11 + movl %r10d,16(%rdi) + movl %r11d,24(%rdi) + .byte 0xf3,0xc3 +.cfi_endproc + + +.globl _blst_sha256_bcopy +.private_extern _blst_sha256_bcopy + +.p2align 4 +_blst_sha256_bcopy: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + subq %rsi,%rdi +L$oop_bcopy: + movzbl (%rsi),%eax + leaq 1(%rsi),%rsi + movb %al,-1(%rdi,%rsi,1) + decq %rdx + jnz L$oop_bcopy + .byte 0xf3,0xc3 +.cfi_endproc + + +.globl _blst_sha256_hcopy +.private_extern _blst_sha256_hcopy + +.p2align 4 +_blst_sha256_hcopy: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq %r8,0(%rdi) + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + .byte 0xf3,0xc3 +.cfi_endproc + +#endif diff --git a/crypto/blst_src/build/mach-o/sha256-x86_64.s b/crypto/blst_src/build/mach-o/sha256-x86_64.s new file mode 100644 index 00000000000..cff024eed4f --- /dev/null +++ b/crypto/blst_src/build/mach-o/sha256-x86_64.s @@ -0,0 +1,1447 @@ +.comm ___blst_platform_cap,4 +.text + +.p2align 6 + +K256: +.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 +.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 +.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 +.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 +.long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc +.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da +.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 +.long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 +.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 +.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 +.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 +.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 +.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 +.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 +.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 +.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 + +.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f +.long 0x03020100,0x0b0a0908,0xffffffff,0xffffffff +.long 0xffffffff,0xffffffff,0x03020100,0x0b0a0908 +.byte 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,64,100,111,116,45,97,115,109,0 +.globl _blst_sha256_block_data_order_shaext +.private_extern _blst_sha256_block_data_order_shaext + +.p2align 6 +_blst_sha256_block_data_order_shaext: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + movq %rsp,%rbp +.cfi_def_cfa_register %rbp +L$blst_sha256_block_data_order$2: + + leaq K256+128(%rip),%rcx + movdqu (%rdi),%xmm1 + movdqu 16(%rdi),%xmm2 + movdqa 256-128(%rcx),%xmm7 + + pshufd $0x1b,%xmm1,%xmm0 + pshufd $0xb1,%xmm1,%xmm1 + pshufd $0x1b,%xmm2,%xmm2 + movdqa %xmm7,%xmm8 +.byte 102,15,58,15,202,8 + punpcklqdq %xmm0,%xmm2 + jmp L$oop_shaext + +.p2align 4 +L$oop_shaext: + movdqu (%rsi),%xmm3 + movdqu 16(%rsi),%xmm4 + movdqu 32(%rsi),%xmm5 +.byte 102,15,56,0,223 + movdqu 48(%rsi),%xmm6 + + movdqa 0-128(%rcx),%xmm0 + paddd %xmm3,%xmm0 +.byte 102,15,56,0,231 + movdqa %xmm2,%xmm10 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + nop + movdqa %xmm1,%xmm9 +.byte 15,56,203,202 + + movdqa 16-128(%rcx),%xmm0 + paddd %xmm4,%xmm0 +.byte 102,15,56,0,239 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + leaq 64(%rsi),%rsi +.byte 15,56,204,220 +.byte 15,56,203,202 + + movdqa 32-128(%rcx),%xmm0 + paddd %xmm5,%xmm0 +.byte 102,15,56,0,247 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movdqa %xmm6,%xmm7 +.byte 102,15,58,15,253,4 + nop + paddd %xmm7,%xmm3 +.byte 15,56,204,229 +.byte 15,56,203,202 + + movdqa 48-128(%rcx),%xmm0 + paddd %xmm6,%xmm0 +.byte 15,56,205,222 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movdqa %xmm3,%xmm7 +.byte 102,15,58,15,254,4 + nop + paddd %xmm7,%xmm4 +.byte 15,56,204,238 +.byte 15,56,203,202 + movdqa 64-128(%rcx),%xmm0 + paddd %xmm3,%xmm0 +.byte 15,56,205,227 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movdqa %xmm4,%xmm7 +.byte 102,15,58,15,251,4 + nop + paddd %xmm7,%xmm5 +.byte 15,56,204,243 +.byte 15,56,203,202 + movdqa 80-128(%rcx),%xmm0 + paddd %xmm4,%xmm0 +.byte 15,56,205,236 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movdqa %xmm5,%xmm7 +.byte 102,15,58,15,252,4 + nop + paddd %xmm7,%xmm6 +.byte 15,56,204,220 +.byte 15,56,203,202 + movdqa 96-128(%rcx),%xmm0 + paddd %xmm5,%xmm0 +.byte 15,56,205,245 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movdqa %xmm6,%xmm7 +.byte 102,15,58,15,253,4 + nop + paddd %xmm7,%xmm3 +.byte 15,56,204,229 +.byte 15,56,203,202 + movdqa 112-128(%rcx),%xmm0 + paddd %xmm6,%xmm0 +.byte 15,56,205,222 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movdqa %xmm3,%xmm7 +.byte 102,15,58,15,254,4 + nop + paddd %xmm7,%xmm4 +.byte 15,56,204,238 +.byte 15,56,203,202 + movdqa 128-128(%rcx),%xmm0 + paddd %xmm3,%xmm0 +.byte 15,56,205,227 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movdqa %xmm4,%xmm7 +.byte 102,15,58,15,251,4 + nop + paddd %xmm7,%xmm5 +.byte 15,56,204,243 +.byte 15,56,203,202 + movdqa 144-128(%rcx),%xmm0 + paddd %xmm4,%xmm0 +.byte 15,56,205,236 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movdqa %xmm5,%xmm7 +.byte 102,15,58,15,252,4 + nop + paddd %xmm7,%xmm6 +.byte 15,56,204,220 +.byte 15,56,203,202 + movdqa 160-128(%rcx),%xmm0 + paddd %xmm5,%xmm0 +.byte 15,56,205,245 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movdqa %xmm6,%xmm7 +.byte 102,15,58,15,253,4 + nop + paddd %xmm7,%xmm3 +.byte 15,56,204,229 +.byte 15,56,203,202 + movdqa 176-128(%rcx),%xmm0 + paddd %xmm6,%xmm0 +.byte 15,56,205,222 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movdqa %xmm3,%xmm7 +.byte 102,15,58,15,254,4 + nop + paddd %xmm7,%xmm4 +.byte 15,56,204,238 +.byte 15,56,203,202 + movdqa 192-128(%rcx),%xmm0 + paddd %xmm3,%xmm0 +.byte 15,56,205,227 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movdqa %xmm4,%xmm7 +.byte 102,15,58,15,251,4 + nop + paddd %xmm7,%xmm5 +.byte 15,56,204,243 +.byte 15,56,203,202 + movdqa 208-128(%rcx),%xmm0 + paddd %xmm4,%xmm0 +.byte 15,56,205,236 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movdqa %xmm5,%xmm7 +.byte 102,15,58,15,252,4 +.byte 15,56,203,202 + paddd %xmm7,%xmm6 + + movdqa 224-128(%rcx),%xmm0 + paddd %xmm5,%xmm0 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 +.byte 15,56,205,245 + movdqa %xmm8,%xmm7 +.byte 15,56,203,202 + + movdqa 240-128(%rcx),%xmm0 + paddd %xmm6,%xmm0 + nop +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + decq %rdx + nop +.byte 15,56,203,202 + + paddd %xmm10,%xmm2 + paddd %xmm9,%xmm1 + jnz L$oop_shaext + + pshufd $0xb1,%xmm2,%xmm2 + pshufd $0x1b,%xmm1,%xmm7 + pshufd $0xb1,%xmm1,%xmm1 + punpckhqdq %xmm2,%xmm1 +.byte 102,15,58,15,215,8 + + movdqu %xmm1,(%rdi) + movdqu %xmm2,16(%rdi) +.cfi_def_cfa_register %rsp + popq %rbp +.cfi_adjust_cfa_offset -8 +.cfi_restore %rbp + + .byte 0xf3,0xc3 +.cfi_endproc + +.globl _blst_sha256_block_data_order +.private_extern _blst_sha256_block_data_order + +.p2align 6 +_blst_sha256_block_data_order: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + movq %rsp,%rbp +.cfi_def_cfa_register %rbp + testl $2,___blst_platform_cap(%rip) + jnz L$blst_sha256_block_data_order$2 + pushq %rbx +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_offset %r15,-56 + shlq $4,%rdx + subq $24,%rsp + + leaq (%rsi,%rdx,4),%rdx + movq %rdi,-64(%rbp) + + movq %rdx,-48(%rbp) + + + leaq -64(%rsp),%rsp + movl 0(%rdi),%eax + andq $-64,%rsp + movl 4(%rdi),%ebx + movl 8(%rdi),%ecx + movl 12(%rdi),%edx + movl 16(%rdi),%r8d + movl 20(%rdi),%r9d + movl 24(%rdi),%r10d + movl 28(%rdi),%r11d + + + jmp L$loop_ssse3 +.p2align 4 +L$loop_ssse3: + movdqa K256+256(%rip),%xmm7 + movq %rsi,-56(%rbp) + movdqu 0(%rsi),%xmm0 + movdqu 16(%rsi),%xmm1 + movdqu 32(%rsi),%xmm2 +.byte 102,15,56,0,199 + movdqu 48(%rsi),%xmm3 + leaq K256(%rip),%rsi +.byte 102,15,56,0,207 + movdqa 0(%rsi),%xmm4 + movdqa 16(%rsi),%xmm5 +.byte 102,15,56,0,215 + paddd %xmm0,%xmm4 + movdqa 32(%rsi),%xmm6 +.byte 102,15,56,0,223 + movdqa 48(%rsi),%xmm7 + paddd %xmm1,%xmm5 + paddd %xmm2,%xmm6 + paddd %xmm3,%xmm7 + movdqa %xmm4,0(%rsp) + movl %eax,%r14d + movdqa %xmm5,16(%rsp) + movl %ebx,%edi + movdqa %xmm6,32(%rsp) + xorl %ecx,%edi + movdqa %xmm7,48(%rsp) + movl %r8d,%r13d + jmp L$ssse3_00_47 + +.p2align 4 +L$ssse3_00_47: + subq $-64,%rsi + rorl $14,%r13d + movdqa %xmm1,%xmm4 + movl %r14d,%eax + movl %r9d,%r12d + movdqa %xmm3,%xmm7 + rorl $9,%r14d + xorl %r8d,%r13d + xorl %r10d,%r12d + rorl $5,%r13d + xorl %eax,%r14d +.byte 102,15,58,15,224,4 + andl %r8d,%r12d + xorl %r8d,%r13d +.byte 102,15,58,15,250,4 + addl 0(%rsp),%r11d + movl %eax,%r15d + xorl %r10d,%r12d + rorl $11,%r14d + movdqa %xmm4,%xmm5 + xorl %ebx,%r15d + addl %r12d,%r11d + movdqa %xmm4,%xmm6 + rorl $6,%r13d + andl %r15d,%edi + psrld $3,%xmm4 + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%edi + paddd %xmm7,%xmm0 + rorl $2,%r14d + addl %r11d,%edx + psrld $7,%xmm6 + addl %edi,%r11d + movl %edx,%r13d + pshufd $250,%xmm3,%xmm7 + addl %r11d,%r14d + rorl $14,%r13d + pslld $14,%xmm5 + movl %r14d,%r11d + movl %r8d,%r12d + pxor %xmm6,%xmm4 + rorl $9,%r14d + xorl %edx,%r13d + xorl %r9d,%r12d + rorl $5,%r13d + psrld $11,%xmm6 + xorl %r11d,%r14d + pxor %xmm5,%xmm4 + andl %edx,%r12d + xorl %edx,%r13d + pslld $11,%xmm5 + addl 4(%rsp),%r10d + movl %r11d,%edi + pxor %xmm6,%xmm4 + xorl %r9d,%r12d + rorl $11,%r14d + movdqa %xmm7,%xmm6 + xorl %eax,%edi + addl %r12d,%r10d + pxor %xmm5,%xmm4 + rorl $6,%r13d + andl %edi,%r15d + xorl %r11d,%r14d + psrld $10,%xmm7 + addl %r13d,%r10d + xorl %eax,%r15d + paddd %xmm4,%xmm0 + rorl $2,%r14d + addl %r10d,%ecx + psrlq $17,%xmm6 + addl %r15d,%r10d + movl %ecx,%r13d + addl %r10d,%r14d + pxor %xmm6,%xmm7 + rorl $14,%r13d + movl %r14d,%r10d + movl %edx,%r12d + rorl $9,%r14d + psrlq $2,%xmm6 + xorl %ecx,%r13d + xorl %r8d,%r12d + pxor %xmm6,%xmm7 + rorl $5,%r13d + xorl %r10d,%r14d + andl %ecx,%r12d + pshufd $128,%xmm7,%xmm7 + xorl %ecx,%r13d + addl 8(%rsp),%r9d + movl %r10d,%r15d + psrldq $8,%xmm7 + xorl %r8d,%r12d + rorl $11,%r14d + xorl %r11d,%r15d + addl %r12d,%r9d + rorl $6,%r13d + paddd %xmm7,%xmm0 + andl %r15d,%edi + xorl %r10d,%r14d + addl %r13d,%r9d + pshufd $80,%xmm0,%xmm7 + xorl %r11d,%edi + rorl $2,%r14d + addl %r9d,%ebx + movdqa %xmm7,%xmm6 + addl %edi,%r9d + movl %ebx,%r13d + psrld $10,%xmm7 + addl %r9d,%r14d + rorl $14,%r13d + psrlq $17,%xmm6 + movl %r14d,%r9d + movl %ecx,%r12d + pxor %xmm6,%xmm7 + rorl $9,%r14d + xorl %ebx,%r13d + xorl %edx,%r12d + rorl $5,%r13d + xorl %r9d,%r14d + psrlq $2,%xmm6 + andl %ebx,%r12d + xorl %ebx,%r13d + addl 12(%rsp),%r8d + pxor %xmm6,%xmm7 + movl %r9d,%edi + xorl %edx,%r12d + rorl $11,%r14d + pshufd $8,%xmm7,%xmm7 + xorl %r10d,%edi + addl %r12d,%r8d + movdqa 0(%rsi),%xmm6 + rorl $6,%r13d + andl %edi,%r15d + pslldq $8,%xmm7 + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + paddd %xmm7,%xmm0 + rorl $2,%r14d + addl %r8d,%eax + addl %r15d,%r8d + paddd %xmm0,%xmm6 + movl %eax,%r13d + addl %r8d,%r14d + movdqa %xmm6,0(%rsp) + rorl $14,%r13d + movdqa %xmm2,%xmm4 + movl %r14d,%r8d + movl %ebx,%r12d + movdqa %xmm0,%xmm7 + rorl $9,%r14d + xorl %eax,%r13d + xorl %ecx,%r12d + rorl $5,%r13d + xorl %r8d,%r14d +.byte 102,15,58,15,225,4 + andl %eax,%r12d + xorl %eax,%r13d +.byte 102,15,58,15,251,4 + addl 16(%rsp),%edx + movl %r8d,%r15d + xorl %ecx,%r12d + rorl $11,%r14d + movdqa %xmm4,%xmm5 + xorl %r9d,%r15d + addl %r12d,%edx + movdqa %xmm4,%xmm6 + rorl $6,%r13d + andl %r15d,%edi + psrld $3,%xmm4 + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%edi + paddd %xmm7,%xmm1 + rorl $2,%r14d + addl %edx,%r11d + psrld $7,%xmm6 + addl %edi,%edx + movl %r11d,%r13d + pshufd $250,%xmm0,%xmm7 + addl %edx,%r14d + rorl $14,%r13d + pslld $14,%xmm5 + movl %r14d,%edx + movl %eax,%r12d + pxor %xmm6,%xmm4 + rorl $9,%r14d + xorl %r11d,%r13d + xorl %ebx,%r12d + rorl $5,%r13d + psrld $11,%xmm6 + xorl %edx,%r14d + pxor %xmm5,%xmm4 + andl %r11d,%r12d + xorl %r11d,%r13d + pslld $11,%xmm5 + addl 20(%rsp),%ecx + movl %edx,%edi + pxor %xmm6,%xmm4 + xorl %ebx,%r12d + rorl $11,%r14d + movdqa %xmm7,%xmm6 + xorl %r8d,%edi + addl %r12d,%ecx + pxor %xmm5,%xmm4 + rorl $6,%r13d + andl %edi,%r15d + xorl %edx,%r14d + psrld $10,%xmm7 + addl %r13d,%ecx + xorl %r8d,%r15d + paddd %xmm4,%xmm1 + rorl $2,%r14d + addl %ecx,%r10d + psrlq $17,%xmm6 + addl %r15d,%ecx + movl %r10d,%r13d + addl %ecx,%r14d + pxor %xmm6,%xmm7 + rorl $14,%r13d + movl %r14d,%ecx + movl %r11d,%r12d + rorl $9,%r14d + psrlq $2,%xmm6 + xorl %r10d,%r13d + xorl %eax,%r12d + pxor %xmm6,%xmm7 + rorl $5,%r13d + xorl %ecx,%r14d + andl %r10d,%r12d + pshufd $128,%xmm7,%xmm7 + xorl %r10d,%r13d + addl 24(%rsp),%ebx + movl %ecx,%r15d + psrldq $8,%xmm7 + xorl %eax,%r12d + rorl $11,%r14d + xorl %edx,%r15d + addl %r12d,%ebx + rorl $6,%r13d + paddd %xmm7,%xmm1 + andl %r15d,%edi + xorl %ecx,%r14d + addl %r13d,%ebx + pshufd $80,%xmm1,%xmm7 + xorl %edx,%edi + rorl $2,%r14d + addl %ebx,%r9d + movdqa %xmm7,%xmm6 + addl %edi,%ebx + movl %r9d,%r13d + psrld $10,%xmm7 + addl %ebx,%r14d + rorl $14,%r13d + psrlq $17,%xmm6 + movl %r14d,%ebx + movl %r10d,%r12d + pxor %xmm6,%xmm7 + rorl $9,%r14d + xorl %r9d,%r13d + xorl %r11d,%r12d + rorl $5,%r13d + xorl %ebx,%r14d + psrlq $2,%xmm6 + andl %r9d,%r12d + xorl %r9d,%r13d + addl 28(%rsp),%eax + pxor %xmm6,%xmm7 + movl %ebx,%edi + xorl %r11d,%r12d + rorl $11,%r14d + pshufd $8,%xmm7,%xmm7 + xorl %ecx,%edi + addl %r12d,%eax + movdqa 16(%rsi),%xmm6 + rorl $6,%r13d + andl %edi,%r15d + pslldq $8,%xmm7 + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + paddd %xmm7,%xmm1 + rorl $2,%r14d + addl %eax,%r8d + addl %r15d,%eax + paddd %xmm1,%xmm6 + movl %r8d,%r13d + addl %eax,%r14d + movdqa %xmm6,16(%rsp) + rorl $14,%r13d + movdqa %xmm3,%xmm4 + movl %r14d,%eax + movl %r9d,%r12d + movdqa %xmm1,%xmm7 + rorl $9,%r14d + xorl %r8d,%r13d + xorl %r10d,%r12d + rorl $5,%r13d + xorl %eax,%r14d +.byte 102,15,58,15,226,4 + andl %r8d,%r12d + xorl %r8d,%r13d +.byte 102,15,58,15,248,4 + addl 32(%rsp),%r11d + movl %eax,%r15d + xorl %r10d,%r12d + rorl $11,%r14d + movdqa %xmm4,%xmm5 + xorl %ebx,%r15d + addl %r12d,%r11d + movdqa %xmm4,%xmm6 + rorl $6,%r13d + andl %r15d,%edi + psrld $3,%xmm4 + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%edi + paddd %xmm7,%xmm2 + rorl $2,%r14d + addl %r11d,%edx + psrld $7,%xmm6 + addl %edi,%r11d + movl %edx,%r13d + pshufd $250,%xmm1,%xmm7 + addl %r11d,%r14d + rorl $14,%r13d + pslld $14,%xmm5 + movl %r14d,%r11d + movl %r8d,%r12d + pxor %xmm6,%xmm4 + rorl $9,%r14d + xorl %edx,%r13d + xorl %r9d,%r12d + rorl $5,%r13d + psrld $11,%xmm6 + xorl %r11d,%r14d + pxor %xmm5,%xmm4 + andl %edx,%r12d + xorl %edx,%r13d + pslld $11,%xmm5 + addl 36(%rsp),%r10d + movl %r11d,%edi + pxor %xmm6,%xmm4 + xorl %r9d,%r12d + rorl $11,%r14d + movdqa %xmm7,%xmm6 + xorl %eax,%edi + addl %r12d,%r10d + pxor %xmm5,%xmm4 + rorl $6,%r13d + andl %edi,%r15d + xorl %r11d,%r14d + psrld $10,%xmm7 + addl %r13d,%r10d + xorl %eax,%r15d + paddd %xmm4,%xmm2 + rorl $2,%r14d + addl %r10d,%ecx + psrlq $17,%xmm6 + addl %r15d,%r10d + movl %ecx,%r13d + addl %r10d,%r14d + pxor %xmm6,%xmm7 + rorl $14,%r13d + movl %r14d,%r10d + movl %edx,%r12d + rorl $9,%r14d + psrlq $2,%xmm6 + xorl %ecx,%r13d + xorl %r8d,%r12d + pxor %xmm6,%xmm7 + rorl $5,%r13d + xorl %r10d,%r14d + andl %ecx,%r12d + pshufd $128,%xmm7,%xmm7 + xorl %ecx,%r13d + addl 40(%rsp),%r9d + movl %r10d,%r15d + psrldq $8,%xmm7 + xorl %r8d,%r12d + rorl $11,%r14d + xorl %r11d,%r15d + addl %r12d,%r9d + rorl $6,%r13d + paddd %xmm7,%xmm2 + andl %r15d,%edi + xorl %r10d,%r14d + addl %r13d,%r9d + pshufd $80,%xmm2,%xmm7 + xorl %r11d,%edi + rorl $2,%r14d + addl %r9d,%ebx + movdqa %xmm7,%xmm6 + addl %edi,%r9d + movl %ebx,%r13d + psrld $10,%xmm7 + addl %r9d,%r14d + rorl $14,%r13d + psrlq $17,%xmm6 + movl %r14d,%r9d + movl %ecx,%r12d + pxor %xmm6,%xmm7 + rorl $9,%r14d + xorl %ebx,%r13d + xorl %edx,%r12d + rorl $5,%r13d + xorl %r9d,%r14d + psrlq $2,%xmm6 + andl %ebx,%r12d + xorl %ebx,%r13d + addl 44(%rsp),%r8d + pxor %xmm6,%xmm7 + movl %r9d,%edi + xorl %edx,%r12d + rorl $11,%r14d + pshufd $8,%xmm7,%xmm7 + xorl %r10d,%edi + addl %r12d,%r8d + movdqa 32(%rsi),%xmm6 + rorl $6,%r13d + andl %edi,%r15d + pslldq $8,%xmm7 + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + paddd %xmm7,%xmm2 + rorl $2,%r14d + addl %r8d,%eax + addl %r15d,%r8d + paddd %xmm2,%xmm6 + movl %eax,%r13d + addl %r8d,%r14d + movdqa %xmm6,32(%rsp) + rorl $14,%r13d + movdqa %xmm0,%xmm4 + movl %r14d,%r8d + movl %ebx,%r12d + movdqa %xmm2,%xmm7 + rorl $9,%r14d + xorl %eax,%r13d + xorl %ecx,%r12d + rorl $5,%r13d + xorl %r8d,%r14d +.byte 102,15,58,15,227,4 + andl %eax,%r12d + xorl %eax,%r13d +.byte 102,15,58,15,249,4 + addl 48(%rsp),%edx + movl %r8d,%r15d + xorl %ecx,%r12d + rorl $11,%r14d + movdqa %xmm4,%xmm5 + xorl %r9d,%r15d + addl %r12d,%edx + movdqa %xmm4,%xmm6 + rorl $6,%r13d + andl %r15d,%edi + psrld $3,%xmm4 + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%edi + paddd %xmm7,%xmm3 + rorl $2,%r14d + addl %edx,%r11d + psrld $7,%xmm6 + addl %edi,%edx + movl %r11d,%r13d + pshufd $250,%xmm2,%xmm7 + addl %edx,%r14d + rorl $14,%r13d + pslld $14,%xmm5 + movl %r14d,%edx + movl %eax,%r12d + pxor %xmm6,%xmm4 + rorl $9,%r14d + xorl %r11d,%r13d + xorl %ebx,%r12d + rorl $5,%r13d + psrld $11,%xmm6 + xorl %edx,%r14d + pxor %xmm5,%xmm4 + andl %r11d,%r12d + xorl %r11d,%r13d + pslld $11,%xmm5 + addl 52(%rsp),%ecx + movl %edx,%edi + pxor %xmm6,%xmm4 + xorl %ebx,%r12d + rorl $11,%r14d + movdqa %xmm7,%xmm6 + xorl %r8d,%edi + addl %r12d,%ecx + pxor %xmm5,%xmm4 + rorl $6,%r13d + andl %edi,%r15d + xorl %edx,%r14d + psrld $10,%xmm7 + addl %r13d,%ecx + xorl %r8d,%r15d + paddd %xmm4,%xmm3 + rorl $2,%r14d + addl %ecx,%r10d + psrlq $17,%xmm6 + addl %r15d,%ecx + movl %r10d,%r13d + addl %ecx,%r14d + pxor %xmm6,%xmm7 + rorl $14,%r13d + movl %r14d,%ecx + movl %r11d,%r12d + rorl $9,%r14d + psrlq $2,%xmm6 + xorl %r10d,%r13d + xorl %eax,%r12d + pxor %xmm6,%xmm7 + rorl $5,%r13d + xorl %ecx,%r14d + andl %r10d,%r12d + pshufd $128,%xmm7,%xmm7 + xorl %r10d,%r13d + addl 56(%rsp),%ebx + movl %ecx,%r15d + psrldq $8,%xmm7 + xorl %eax,%r12d + rorl $11,%r14d + xorl %edx,%r15d + addl %r12d,%ebx + rorl $6,%r13d + paddd %xmm7,%xmm3 + andl %r15d,%edi + xorl %ecx,%r14d + addl %r13d,%ebx + pshufd $80,%xmm3,%xmm7 + xorl %edx,%edi + rorl $2,%r14d + addl %ebx,%r9d + movdqa %xmm7,%xmm6 + addl %edi,%ebx + movl %r9d,%r13d + psrld $10,%xmm7 + addl %ebx,%r14d + rorl $14,%r13d + psrlq $17,%xmm6 + movl %r14d,%ebx + movl %r10d,%r12d + pxor %xmm6,%xmm7 + rorl $9,%r14d + xorl %r9d,%r13d + xorl %r11d,%r12d + rorl $5,%r13d + xorl %ebx,%r14d + psrlq $2,%xmm6 + andl %r9d,%r12d + xorl %r9d,%r13d + addl 60(%rsp),%eax + pxor %xmm6,%xmm7 + movl %ebx,%edi + xorl %r11d,%r12d + rorl $11,%r14d + pshufd $8,%xmm7,%xmm7 + xorl %ecx,%edi + addl %r12d,%eax + movdqa 48(%rsi),%xmm6 + rorl $6,%r13d + andl %edi,%r15d + pslldq $8,%xmm7 + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + paddd %xmm7,%xmm3 + rorl $2,%r14d + addl %eax,%r8d + addl %r15d,%eax + paddd %xmm3,%xmm6 + movl %r8d,%r13d + addl %eax,%r14d + movdqa %xmm6,48(%rsp) + cmpb $0,67(%rsi) + jne L$ssse3_00_47 + rorl $14,%r13d + movl %r14d,%eax + movl %r9d,%r12d + rorl $9,%r14d + xorl %r8d,%r13d + xorl %r10d,%r12d + rorl $5,%r13d + xorl %eax,%r14d + andl %r8d,%r12d + xorl %r8d,%r13d + addl 0(%rsp),%r11d + movl %eax,%r15d + xorl %r10d,%r12d + rorl $11,%r14d + xorl %ebx,%r15d + addl %r12d,%r11d + rorl $6,%r13d + andl %r15d,%edi + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%edi + rorl $2,%r14d + addl %r11d,%edx + addl %edi,%r11d + movl %edx,%r13d + addl %r11d,%r14d + rorl $14,%r13d + movl %r14d,%r11d + movl %r8d,%r12d + rorl $9,%r14d + xorl %edx,%r13d + xorl %r9d,%r12d + rorl $5,%r13d + xorl %r11d,%r14d + andl %edx,%r12d + xorl %edx,%r13d + addl 4(%rsp),%r10d + movl %r11d,%edi + xorl %r9d,%r12d + rorl $11,%r14d + xorl %eax,%edi + addl %r12d,%r10d + rorl $6,%r13d + andl %edi,%r15d + xorl %r11d,%r14d + addl %r13d,%r10d + xorl %eax,%r15d + rorl $2,%r14d + addl %r10d,%ecx + addl %r15d,%r10d + movl %ecx,%r13d + addl %r10d,%r14d + rorl $14,%r13d + movl %r14d,%r10d + movl %edx,%r12d + rorl $9,%r14d + xorl %ecx,%r13d + xorl %r8d,%r12d + rorl $5,%r13d + xorl %r10d,%r14d + andl %ecx,%r12d + xorl %ecx,%r13d + addl 8(%rsp),%r9d + movl %r10d,%r15d + xorl %r8d,%r12d + rorl $11,%r14d + xorl %r11d,%r15d + addl %r12d,%r9d + rorl $6,%r13d + andl %r15d,%edi + xorl %r10d,%r14d + addl %r13d,%r9d + xorl %r11d,%edi + rorl $2,%r14d + addl %r9d,%ebx + addl %edi,%r9d + movl %ebx,%r13d + addl %r9d,%r14d + rorl $14,%r13d + movl %r14d,%r9d + movl %ecx,%r12d + rorl $9,%r14d + xorl %ebx,%r13d + xorl %edx,%r12d + rorl $5,%r13d + xorl %r9d,%r14d + andl %ebx,%r12d + xorl %ebx,%r13d + addl 12(%rsp),%r8d + movl %r9d,%edi + xorl %edx,%r12d + rorl $11,%r14d + xorl %r10d,%edi + addl %r12d,%r8d + rorl $6,%r13d + andl %edi,%r15d + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + rorl $2,%r14d + addl %r8d,%eax + addl %r15d,%r8d + movl %eax,%r13d + addl %r8d,%r14d + rorl $14,%r13d + movl %r14d,%r8d + movl %ebx,%r12d + rorl $9,%r14d + xorl %eax,%r13d + xorl %ecx,%r12d + rorl $5,%r13d + xorl %r8d,%r14d + andl %eax,%r12d + xorl %eax,%r13d + addl 16(%rsp),%edx + movl %r8d,%r15d + xorl %ecx,%r12d + rorl $11,%r14d + xorl %r9d,%r15d + addl %r12d,%edx + rorl $6,%r13d + andl %r15d,%edi + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%edi + rorl $2,%r14d + addl %edx,%r11d + addl %edi,%edx + movl %r11d,%r13d + addl %edx,%r14d + rorl $14,%r13d + movl %r14d,%edx + movl %eax,%r12d + rorl $9,%r14d + xorl %r11d,%r13d + xorl %ebx,%r12d + rorl $5,%r13d + xorl %edx,%r14d + andl %r11d,%r12d + xorl %r11d,%r13d + addl 20(%rsp),%ecx + movl %edx,%edi + xorl %ebx,%r12d + rorl $11,%r14d + xorl %r8d,%edi + addl %r12d,%ecx + rorl $6,%r13d + andl %edi,%r15d + xorl %edx,%r14d + addl %r13d,%ecx + xorl %r8d,%r15d + rorl $2,%r14d + addl %ecx,%r10d + addl %r15d,%ecx + movl %r10d,%r13d + addl %ecx,%r14d + rorl $14,%r13d + movl %r14d,%ecx + movl %r11d,%r12d + rorl $9,%r14d + xorl %r10d,%r13d + xorl %eax,%r12d + rorl $5,%r13d + xorl %ecx,%r14d + andl %r10d,%r12d + xorl %r10d,%r13d + addl 24(%rsp),%ebx + movl %ecx,%r15d + xorl %eax,%r12d + rorl $11,%r14d + xorl %edx,%r15d + addl %r12d,%ebx + rorl $6,%r13d + andl %r15d,%edi + xorl %ecx,%r14d + addl %r13d,%ebx + xorl %edx,%edi + rorl $2,%r14d + addl %ebx,%r9d + addl %edi,%ebx + movl %r9d,%r13d + addl %ebx,%r14d + rorl $14,%r13d + movl %r14d,%ebx + movl %r10d,%r12d + rorl $9,%r14d + xorl %r9d,%r13d + xorl %r11d,%r12d + rorl $5,%r13d + xorl %ebx,%r14d + andl %r9d,%r12d + xorl %r9d,%r13d + addl 28(%rsp),%eax + movl %ebx,%edi + xorl %r11d,%r12d + rorl $11,%r14d + xorl %ecx,%edi + addl %r12d,%eax + rorl $6,%r13d + andl %edi,%r15d + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + rorl $2,%r14d + addl %eax,%r8d + addl %r15d,%eax + movl %r8d,%r13d + addl %eax,%r14d + rorl $14,%r13d + movl %r14d,%eax + movl %r9d,%r12d + rorl $9,%r14d + xorl %r8d,%r13d + xorl %r10d,%r12d + rorl $5,%r13d + xorl %eax,%r14d + andl %r8d,%r12d + xorl %r8d,%r13d + addl 32(%rsp),%r11d + movl %eax,%r15d + xorl %r10d,%r12d + rorl $11,%r14d + xorl %ebx,%r15d + addl %r12d,%r11d + rorl $6,%r13d + andl %r15d,%edi + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%edi + rorl $2,%r14d + addl %r11d,%edx + addl %edi,%r11d + movl %edx,%r13d + addl %r11d,%r14d + rorl $14,%r13d + movl %r14d,%r11d + movl %r8d,%r12d + rorl $9,%r14d + xorl %edx,%r13d + xorl %r9d,%r12d + rorl $5,%r13d + xorl %r11d,%r14d + andl %edx,%r12d + xorl %edx,%r13d + addl 36(%rsp),%r10d + movl %r11d,%edi + xorl %r9d,%r12d + rorl $11,%r14d + xorl %eax,%edi + addl %r12d,%r10d + rorl $6,%r13d + andl %edi,%r15d + xorl %r11d,%r14d + addl %r13d,%r10d + xorl %eax,%r15d + rorl $2,%r14d + addl %r10d,%ecx + addl %r15d,%r10d + movl %ecx,%r13d + addl %r10d,%r14d + rorl $14,%r13d + movl %r14d,%r10d + movl %edx,%r12d + rorl $9,%r14d + xorl %ecx,%r13d + xorl %r8d,%r12d + rorl $5,%r13d + xorl %r10d,%r14d + andl %ecx,%r12d + xorl %ecx,%r13d + addl 40(%rsp),%r9d + movl %r10d,%r15d + xorl %r8d,%r12d + rorl $11,%r14d + xorl %r11d,%r15d + addl %r12d,%r9d + rorl $6,%r13d + andl %r15d,%edi + xorl %r10d,%r14d + addl %r13d,%r9d + xorl %r11d,%edi + rorl $2,%r14d + addl %r9d,%ebx + addl %edi,%r9d + movl %ebx,%r13d + addl %r9d,%r14d + rorl $14,%r13d + movl %r14d,%r9d + movl %ecx,%r12d + rorl $9,%r14d + xorl %ebx,%r13d + xorl %edx,%r12d + rorl $5,%r13d + xorl %r9d,%r14d + andl %ebx,%r12d + xorl %ebx,%r13d + addl 44(%rsp),%r8d + movl %r9d,%edi + xorl %edx,%r12d + rorl $11,%r14d + xorl %r10d,%edi + addl %r12d,%r8d + rorl $6,%r13d + andl %edi,%r15d + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + rorl $2,%r14d + addl %r8d,%eax + addl %r15d,%r8d + movl %eax,%r13d + addl %r8d,%r14d + rorl $14,%r13d + movl %r14d,%r8d + movl %ebx,%r12d + rorl $9,%r14d + xorl %eax,%r13d + xorl %ecx,%r12d + rorl $5,%r13d + xorl %r8d,%r14d + andl %eax,%r12d + xorl %eax,%r13d + addl 48(%rsp),%edx + movl %r8d,%r15d + xorl %ecx,%r12d + rorl $11,%r14d + xorl %r9d,%r15d + addl %r12d,%edx + rorl $6,%r13d + andl %r15d,%edi + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%edi + rorl $2,%r14d + addl %edx,%r11d + addl %edi,%edx + movl %r11d,%r13d + addl %edx,%r14d + rorl $14,%r13d + movl %r14d,%edx + movl %eax,%r12d + rorl $9,%r14d + xorl %r11d,%r13d + xorl %ebx,%r12d + rorl $5,%r13d + xorl %edx,%r14d + andl %r11d,%r12d + xorl %r11d,%r13d + addl 52(%rsp),%ecx + movl %edx,%edi + xorl %ebx,%r12d + rorl $11,%r14d + xorl %r8d,%edi + addl %r12d,%ecx + rorl $6,%r13d + andl %edi,%r15d + xorl %edx,%r14d + addl %r13d,%ecx + xorl %r8d,%r15d + rorl $2,%r14d + addl %ecx,%r10d + addl %r15d,%ecx + movl %r10d,%r13d + addl %ecx,%r14d + rorl $14,%r13d + movl %r14d,%ecx + movl %r11d,%r12d + rorl $9,%r14d + xorl %r10d,%r13d + xorl %eax,%r12d + rorl $5,%r13d + xorl %ecx,%r14d + andl %r10d,%r12d + xorl %r10d,%r13d + addl 56(%rsp),%ebx + movl %ecx,%r15d + xorl %eax,%r12d + rorl $11,%r14d + xorl %edx,%r15d + addl %r12d,%ebx + rorl $6,%r13d + andl %r15d,%edi + xorl %ecx,%r14d + addl %r13d,%ebx + xorl %edx,%edi + rorl $2,%r14d + addl %ebx,%r9d + addl %edi,%ebx + movl %r9d,%r13d + addl %ebx,%r14d + rorl $14,%r13d + movl %r14d,%ebx + movl %r10d,%r12d + rorl $9,%r14d + xorl %r9d,%r13d + xorl %r11d,%r12d + rorl $5,%r13d + xorl %ebx,%r14d + andl %r9d,%r12d + xorl %r9d,%r13d + addl 60(%rsp),%eax + movl %ebx,%edi + xorl %r11d,%r12d + rorl $11,%r14d + xorl %ecx,%edi + addl %r12d,%eax + rorl $6,%r13d + andl %edi,%r15d + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + rorl $2,%r14d + addl %eax,%r8d + addl %r15d,%eax + movl %r8d,%r13d + addl %eax,%r14d + movq -64(%rbp),%rdi + movl %r14d,%eax + movq -56(%rbp),%rsi + + addl 0(%rdi),%eax + addl 4(%rdi),%ebx + addl 8(%rdi),%ecx + addl 12(%rdi),%edx + addl 16(%rdi),%r8d + addl 20(%rdi),%r9d + addl 24(%rdi),%r10d + addl 28(%rdi),%r11d + + leaq 64(%rsi),%rsi + cmpq -48(%rbp),%rsi + + movl %eax,0(%rdi) + movl %ebx,4(%rdi) + movl %ecx,8(%rdi) + movl %edx,12(%rdi) + movl %r8d,16(%rdi) + movl %r9d,20(%rdi) + movl %r10d,24(%rdi) + movl %r11d,28(%rdi) + jb L$loop_ssse3 + + xorps %xmm0,%xmm0 + movaps %xmm0,0(%rsp) + movaps %xmm0,16(%rsp) + movaps %xmm0,32(%rsp) + movaps %xmm0,48(%rsp) + movq -40(%rbp),%r15 + movq -32(%rbp),%r14 + movq -24(%rbp),%r13 + movq -16(%rbp),%r12 + movq -8(%rbp),%rbx + movq %rbp,%rsp +.cfi_def_cfa_register %rsp + popq %rbp +.cfi_adjust_cfa_offset -8 +.cfi_restore %rbp +.cfi_restore %r12 +.cfi_restore %r13 +.cfi_restore %r14 +.cfi_restore %r15 +.cfi_restore %rbx + .byte 0xf3,0xc3 +.cfi_endproc + +.globl _blst_sha256_emit +.private_extern _blst_sha256_emit + +.p2align 4 +_blst_sha256_emit: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + bswapq %r8 + movq 24(%rsi),%r11 + bswapq %r9 + movl %r8d,4(%rdi) + bswapq %r10 + movl %r9d,12(%rdi) + bswapq %r11 + movl %r10d,20(%rdi) + shrq $32,%r8 + movl %r11d,28(%rdi) + shrq $32,%r9 + movl %r8d,0(%rdi) + shrq $32,%r10 + movl %r9d,8(%rdi) + shrq $32,%r11 + movl %r10d,16(%rdi) + movl %r11d,24(%rdi) + .byte 0xf3,0xc3 +.cfi_endproc + + +.globl _blst_sha256_bcopy +.private_extern _blst_sha256_bcopy + +.p2align 4 +_blst_sha256_bcopy: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + subq %rsi,%rdi +L$oop_bcopy: + movzbl (%rsi),%eax + leaq 1(%rsi),%rsi + movb %al,-1(%rdi,%rsi,1) + decq %rdx + jnz L$oop_bcopy + .byte 0xf3,0xc3 +.cfi_endproc + + +.globl _blst_sha256_hcopy +.private_extern _blst_sha256_hcopy + +.p2align 4 +_blst_sha256_hcopy: +.cfi_startproc + .byte 0xf3,0x0f,0x1e,0xfa + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq %r8,0(%rdi) + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + .byte 0xf3,0xc3 +.cfi_endproc + diff --git a/crypto/blst_src/build/refresh.sh b/crypto/blst_src/build/refresh.sh new file mode 100755 index 00000000000..56b0b279c69 --- /dev/null +++ b/crypto/blst_src/build/refresh.sh @@ -0,0 +1,48 @@ +#!/bin/sh + +HERE=`dirname $0` +cd "${HERE}" + +PERL=${PERL:-perl} + +for pl in ../src/asm/*-x86_64.pl; do + s=`basename $pl .pl`.asm + expr $s : '.*portable' > /dev/null || (set -x; ${PERL} $pl masm > win64/$s) + s=`basename $pl .pl`.s + (set -x; ${PERL} $pl elf > elf/$s) + (set -x; ${PERL} $pl mingw64 > coff/$s) + (set -x; ${PERL} $pl macosx > mach-o/$s) +done + +for pl in ../src/asm/*-armv8.pl; do + s=`basename $pl .pl`.asm + (set -x; ${PERL} $pl win64 > win64/$s) + s=`basename $pl .pl`.S + (set -x; ${PERL} $pl linux64 > elf/$s) + (set -x; ${PERL} $pl coff64 > coff/$s) + (set -x; ${PERL} $pl ios64 > mach-o/$s) +done + +( cd ../bindings; + echo "LIBRARY blst" + echo + echo "EXPORTS" + cc -E blst.h | \ + ${PERL} -ne '{ (/(blst_[\w]+)\s*\(/ || /(BLS12_[\w]+);/) && print "\t$1\n" }' + echo +) > win64/blst.def + +if which bindgen > /dev/null 2>&1; then + ( cd ../bindings; set -x; + bindgen --opaque-type blst_pairing \ + --opaque-type blst_uniq \ + --with-derive-default \ + --with-derive-eq \ + --rustified-enum BLST.\* \ + blst.h -- -D__BLST_RUST_BINDGEN__ \ + | ${PERL} ../build/bindings_trim.pl > rust/src/bindings.rs + ) +else + echo "Install Rust bindgen with 'cargo install bindgen-cli'" 1>&2 + exit 1 +fi diff --git a/crypto/blst_src/build/win64/add_mod_256-armv8.asm b/crypto/blst_src/build/win64/add_mod_256-armv8.asm new file mode 100644 index 00000000000..8d6975185a6 --- /dev/null +++ b/crypto/blst_src/build/win64/add_mod_256-armv8.asm @@ -0,0 +1,380 @@ + AREA |.text|,CODE,ALIGN=8,ARM64 + + + + EXPORT |add_mod_256|[FUNC] + ALIGN 32 +|add_mod_256| PROC + ldp x8,x9,[x1] + ldp x12,x13,[x2] + + ldp x10,x11,[x1,#16] + adds x8,x8,x12 + ldp x14,x15,[x2,#16] + adcs x9,x9,x13 + ldp x4,x5,[x3] + adcs x10,x10,x14 + ldp x6,x7,[x3,#16] + adcs x11,x11,x15 + adc x3,xzr,xzr + + subs x16,x8,x4 + sbcs x17,x9,x5 + sbcs x1,x10,x6 + sbcs x2,x11,x7 + sbcs xzr,x3,xzr + + csello x8,x8,x16 + csello x9,x9,x17 + csello x10,x10,x1 + stp x8,x9,[x0] + csello x11,x11,x2 + stp x10,x11,[x0,#16] + + ret + ENDP + + + + EXPORT |mul_by_3_mod_256|[FUNC] + ALIGN 32 +|mul_by_3_mod_256| PROC + ldp x12,x13,[x1] + ldp x14,x15,[x1,#16] + + adds x8,x12,x12 + ldp x4,x5,[x2] + adcs x9,x13,x13 + ldp x6,x7,[x2,#16] + adcs x10,x14,x14 + adcs x11,x15,x15 + adc x3,xzr,xzr + + subs x16,x8,x4 + sbcs x17,x9,x5 + sbcs x1,x10,x6 + sbcs x2,x11,x7 + sbcs xzr,x3,xzr + + csello x8,x8,x16 + csello x9,x9,x17 + csello x10,x10,x1 + csello x11,x11,x2 + + adds x8,x8,x12 + adcs x9,x9,x13 + adcs x10,x10,x14 + adcs x11,x11,x15 + adc x3,xzr,xzr + + subs x16,x8,x4 + sbcs x17,x9,x5 + sbcs x1,x10,x6 + sbcs x2,x11,x7 + sbcs xzr,x3,xzr + + csello x8,x8,x16 + csello x9,x9,x17 + csello x10,x10,x1 + stp x8,x9,[x0] + csello x11,x11,x2 + stp x10,x11,[x0,#16] + + ret + ENDP + + + + EXPORT |lshift_mod_256|[FUNC] + ALIGN 32 +|lshift_mod_256| PROC + ldp x8,x9,[x1] + ldp x10,x11,[x1,#16] + + ldp x4,x5,[x3] + ldp x6,x7,[x3,#16] + +|$Loop_lshift_mod_256| + adds x8,x8,x8 + sub x2,x2,#1 + adcs x9,x9,x9 + adcs x10,x10,x10 + adcs x11,x11,x11 + adc x3,xzr,xzr + + subs x12,x8,x4 + sbcs x13,x9,x5 + sbcs x14,x10,x6 + sbcs x15,x11,x7 + sbcs xzr,x3,xzr + + csello x8,x8,x12 + csello x9,x9,x13 + csello x10,x10,x14 + csello x11,x11,x15 + + cbnz x2,|$Loop_lshift_mod_256| + + stp x8,x9,[x0] + stp x10,x11,[x0,#16] + + ret + ENDP + + + + EXPORT |rshift_mod_256|[FUNC] + ALIGN 32 +|rshift_mod_256| PROC + ldp x8,x9,[x1] + ldp x10,x11,[x1,#16] + + ldp x4,x5,[x3] + ldp x6,x7,[x3,#16] + +|$Loop_rshift| + adds x12,x8,x4 + sub x2,x2,#1 + adcs x13,x9,x5 + adcs x14,x10,x6 + adcs x15,x11,x7 + adc x3,xzr,xzr + tst x8,#1 + + cselne x12,x12,x8 + cselne x13,x13,x9 + cselne x14,x14,x10 + cselne x15,x15,x11 + cselne x3,x3,xzr + + extr x8,x13,x12,#1 + extr x9,x14,x13,#1 + extr x10,x15,x14,#1 + extr x11,x3,x15,#1 + + cbnz x2,|$Loop_rshift| + + stp x8,x9,[x0] + stp x10,x11,[x0,#16] + + ret + ENDP + + + + EXPORT |cneg_mod_256|[FUNC] + ALIGN 32 +|cneg_mod_256| PROC + ldp x8,x9,[x1] + ldp x4,x5,[x3] + + ldp x10,x11,[x1,#16] + subs x12,x4,x8 + ldp x6,x7,[x3,#16] + orr x4,x8,x9 + sbcs x13,x5,x9 + orr x5,x10,x11 + sbcs x14,x6,x10 + orr x3,x4,x5 + sbc x15,x7,x11 + + cmp x3,#0 + csetmne x3 + ands x2,x2,x3 + + cseleq x8,x8,x12 + cseleq x9,x9,x13 + cseleq x10,x10,x14 + stp x8,x9,[x0] + cseleq x11,x11,x15 + stp x10,x11,[x0,#16] + + ret + ENDP + + + + EXPORT |sub_mod_256|[FUNC] + ALIGN 32 +|sub_mod_256| PROC + ldp x8,x9,[x1] + ldp x12,x13,[x2] + + ldp x10,x11,[x1,#16] + subs x8,x8,x12 + ldp x14,x15,[x2,#16] + sbcs x9,x9,x13 + ldp x4,x5,[x3] + sbcs x10,x10,x14 + ldp x6,x7,[x3,#16] + sbcs x11,x11,x15 + sbc x3,xzr,xzr + + and x4,x4,x3 + and x5,x5,x3 + adds x8,x8,x4 + and x6,x6,x3 + adcs x9,x9,x5 + and x7,x7,x3 + adcs x10,x10,x6 + stp x8,x9,[x0] + adc x11,x11,x7 + stp x10,x11,[x0,#16] + + ret + ENDP + + + + EXPORT |check_mod_256|[FUNC] + ALIGN 32 +|check_mod_256| PROC + ldp x8,x9,[x0] + ldp x10,x11,[x0,#16] + ldp x4,x5,[x1] + ldp x6,x7,[x1,#16] + +#ifdef __AARCH64EB__ + rev x8,x8 + rev x9,x9 + rev x10,x10 + rev x11,x11 +#endif + + subs xzr,x8,x4 + sbcs xzr,x9,x5 + orr x8,x8,x9 + sbcs xzr,x10,x6 + orr x8,x8,x10 + sbcs xzr,x11,x7 + orr x8,x8,x11 + sbc x1,xzr,xzr + + cmp x8,#0 + mov x0,#1 + cselne x0,x0,xzr + and x0,x0,x1 + + ret + ENDP + + + + EXPORT |add_n_check_mod_256|[FUNC] + ALIGN 32 +|add_n_check_mod_256| PROC + ldp x8,x9,[x1] + ldp x12,x13,[x2] + ldp x10,x11,[x1,#16] + ldp x14,x15,[x2,#16] + +#ifdef __AARCH64EB__ + rev x8,x8 + rev x12,x12 + rev x9,x9 + rev x13,x13 + rev x10,x10 + rev x14,x14 + rev x11,x11 + rev x15,x15 +#endif + + adds x8,x8,x12 + ldp x4,x5,[x3] + adcs x9,x9,x13 + ldp x6,x7,[x3,#16] + adcs x10,x10,x14 + adcs x11,x11,x15 + adc x3,xzr,xzr + + subs x16,x8,x4 + sbcs x17,x9,x5 + sbcs x1,x10,x6 + sbcs x2,x11,x7 + sbcs xzr,x3,xzr + + csello x8,x8,x16 + csello x9,x9,x17 + csello x10,x10,x1 + csello x11,x11,x2 + + orr x16, x8, x9 + orr x17, x10, x11 + orr x16, x16, x17 + +#ifdef __AARCH64EB__ + rev x8,x8 + rev x9,x9 + rev x10,x10 + rev x11,x11 +#endif + + stp x8,x9,[x0] + stp x10,x11,[x0,#16] + + mov x17, #1 + cmp x16, #0 + cselne x0,x17,xzr + + ret + ENDP + + + + EXPORT |sub_n_check_mod_256|[FUNC] + ALIGN 32 +|sub_n_check_mod_256| PROC + ldp x8,x9,[x1] + ldp x12,x13,[x2] + ldp x10,x11,[x1,#16] + ldp x14,x15,[x2,#16] + +#ifdef __AARCH64EB__ + rev x8,x8 + rev x12,x12 + rev x9,x9 + rev x13,x13 + rev x10,x10 + rev x14,x14 + rev x11,x11 + rev x15,x15 +#endif + + subs x8,x8,x12 + sbcs x9,x9,x13 + ldp x4,x5,[x3] + sbcs x10,x10,x14 + ldp x6,x7,[x3,#16] + sbcs x11,x11,x15 + sbc x3,xzr,xzr + + and x4,x4,x3 + and x5,x5,x3 + adds x8,x8,x4 + and x6,x6,x3 + adcs x9,x9,x5 + and x7,x7,x3 + adcs x10,x10,x6 + adc x11,x11,x7 + + orr x16, x8, x9 + orr x17, x10, x11 + orr x16, x16, x17 + +#ifdef __AARCH64EB__ + rev x8,x8 + rev x9,x9 + rev x10,x10 + rev x11,x11 +#endif + + stp x8,x9,[x0] + stp x10,x11,[x0,#16] + + mov x17, #1 + cmp x16, #0 + cselne x0,x17,xzr + + ret + ENDP + END diff --git a/crypto/blst_src/build/win64/add_mod_256-x86_64.asm b/crypto/blst_src/build/win64/add_mod_256-x86_64.asm new file mode 100644 index 00000000000..d5308b8f809 --- /dev/null +++ b/crypto/blst_src/build/win64/add_mod_256-x86_64.asm @@ -0,0 +1,939 @@ +OPTION DOTNAME +.text$ SEGMENT ALIGN(256) 'CODE' + +PUBLIC add_mod_256 + + +ALIGN 32 +add_mod_256 PROC PUBLIC + DB 243,15,30,250 + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov r11,rsp +$L$SEH_begin_add_mod_256:: + + + push rbp + + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + push rbx + + sub rsp,8 + +$L$SEH_body_add_mod_256:: + + + mov r8,QWORD PTR[rsi] + mov r9,QWORD PTR[8+rsi] + mov r10,QWORD PTR[16+rsi] + mov r11,QWORD PTR[24+rsi] + +$L$oaded_a_add_mod_256:: + add r8,QWORD PTR[rdx] + adc r9,QWORD PTR[8+rdx] + mov rax,r8 + adc r10,QWORD PTR[16+rdx] + mov rsi,r9 + adc r11,QWORD PTR[24+rdx] + sbb rdx,rdx + + mov rbx,r10 + sub r8,QWORD PTR[rcx] + sbb r9,QWORD PTR[8+rcx] + sbb r10,QWORD PTR[16+rcx] + mov rbp,r11 + sbb r11,QWORD PTR[24+rcx] + sbb rdx,0 + + cmovc r8,rax + cmovc r9,rsi + mov QWORD PTR[rdi],r8 + cmovc r10,rbx + mov QWORD PTR[8+rdi],r9 + cmovc r11,rbp + mov QWORD PTR[16+rdi],r10 + mov QWORD PTR[24+rdi],r11 + + mov rbx,QWORD PTR[8+rsp] + + mov rbp,QWORD PTR[16+rsp] + + lea rsp,QWORD PTR[24+rsp] + +$L$SEH_epilogue_add_mod_256:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + + DB 0F3h,0C3h ;repret + +$L$SEH_end_add_mod_256:: +add_mod_256 ENDP + + +PUBLIC mul_by_3_mod_256 + + +ALIGN 32 +mul_by_3_mod_256 PROC PUBLIC + DB 243,15,30,250 + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov r11,rsp +$L$SEH_begin_mul_by_3_mod_256:: + + + push rbp + + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + push rbx + + push r12 + +$L$SEH_body_mul_by_3_mod_256:: + + + mov rcx,rdx + mov r8,QWORD PTR[rsi] + mov r9,QWORD PTR[8+rsi] + mov r10,QWORD PTR[16+rsi] + mov rdx,rsi + mov r11,QWORD PTR[24+rsi] + + call __lshift_mod_256 + mov r12,QWORD PTR[rsp] + + jmp $L$oaded_a_add_mod_256 + + mov rbx,QWORD PTR[8+rsp] + + mov rbp,QWORD PTR[16+rsp] + + lea rsp,QWORD PTR[24+rsp] + +$L$SEH_epilogue_mul_by_3_mod_256:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + + DB 0F3h,0C3h ;repret + +$L$SEH_end_mul_by_3_mod_256:: +mul_by_3_mod_256 ENDP + + +ALIGN 32 +__lshift_mod_256 PROC PRIVATE + DB 243,15,30,250 + + add r8,r8 + adc r9,r9 + mov rax,r8 + adc r10,r10 + mov rsi,r9 + adc r11,r11 + sbb r12,r12 + + mov rbx,r10 + sub r8,QWORD PTR[rcx] + sbb r9,QWORD PTR[8+rcx] + sbb r10,QWORD PTR[16+rcx] + mov rbp,r11 + sbb r11,QWORD PTR[24+rcx] + sbb r12,0 + + cmovc r8,rax + cmovc r9,rsi + cmovc r10,rbx + cmovc r11,rbp + + DB 0F3h,0C3h ;repret +__lshift_mod_256 ENDP + + +PUBLIC lshift_mod_256 + + +ALIGN 32 +lshift_mod_256 PROC PUBLIC + DB 243,15,30,250 + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov r11,rsp +$L$SEH_begin_lshift_mod_256:: + + + push rbp + + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + push rbx + + push r12 + +$L$SEH_body_lshift_mod_256:: + + + mov r8,QWORD PTR[rsi] + mov r9,QWORD PTR[8+rsi] + mov r10,QWORD PTR[16+rsi] + mov r11,QWORD PTR[24+rsi] + +$L$oop_lshift_mod_256:: + call __lshift_mod_256 + dec edx + jnz $L$oop_lshift_mod_256 + + mov QWORD PTR[rdi],r8 + mov QWORD PTR[8+rdi],r9 + mov QWORD PTR[16+rdi],r10 + mov QWORD PTR[24+rdi],r11 + + mov r12,QWORD PTR[rsp] + + mov rbx,QWORD PTR[8+rsp] + + mov rbp,QWORD PTR[16+rsp] + + lea rsp,QWORD PTR[24+rsp] + +$L$SEH_epilogue_lshift_mod_256:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + + DB 0F3h,0C3h ;repret + +$L$SEH_end_lshift_mod_256:: +lshift_mod_256 ENDP + + +PUBLIC rshift_mod_256 + + +ALIGN 32 +rshift_mod_256 PROC PUBLIC + DB 243,15,30,250 + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov r11,rsp +$L$SEH_begin_rshift_mod_256:: + + + push rbp + + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + push rbx + + sub rsp,8 + +$L$SEH_body_rshift_mod_256:: + + + mov rbp,QWORD PTR[rsi] + mov r9,QWORD PTR[8+rsi] + mov r10,QWORD PTR[16+rsi] + mov r11,QWORD PTR[24+rsi] + +$L$oop_rshift_mod_256:: + mov r8,rbp + and rbp,1 + mov rax,QWORD PTR[rcx] + neg rbp + mov rsi,QWORD PTR[8+rcx] + mov rbx,QWORD PTR[16+rcx] + + and rax,rbp + and rsi,rbp + and rbx,rbp + and rbp,QWORD PTR[24+rcx] + + add r8,rax + adc r9,rsi + adc r10,rbx + adc r11,rbp + sbb rax,rax + + shr r8,1 + mov rbp,r9 + shr r9,1 + mov rbx,r10 + shr r10,1 + mov rsi,r11 + shr r11,1 + + shl rbp,63 + shl rbx,63 + or rbp,r8 + shl rsi,63 + or r9,rbx + shl rax,63 + or r10,rsi + or r11,rax + + dec edx + jnz $L$oop_rshift_mod_256 + + mov QWORD PTR[rdi],rbp + mov QWORD PTR[8+rdi],r9 + mov QWORD PTR[16+rdi],r10 + mov QWORD PTR[24+rdi],r11 + + mov rbx,QWORD PTR[8+rsp] + + mov rbp,QWORD PTR[16+rsp] + + lea rsp,QWORD PTR[24+rsp] + +$L$SEH_epilogue_rshift_mod_256:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + + DB 0F3h,0C3h ;repret + +$L$SEH_end_rshift_mod_256:: +rshift_mod_256 ENDP + + +PUBLIC cneg_mod_256 + + +ALIGN 32 +cneg_mod_256 PROC PUBLIC + DB 243,15,30,250 + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov r11,rsp +$L$SEH_begin_cneg_mod_256:: + + + push rbp + + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + push rbx + + push r12 + +$L$SEH_body_cneg_mod_256:: + + + mov r12,QWORD PTR[rsi] + mov r9,QWORD PTR[8+rsi] + mov r10,QWORD PTR[16+rsi] + mov r8,r12 + mov r11,QWORD PTR[24+rsi] + or r12,r9 + or r12,r10 + or r12,r11 + mov rbp,-1 + + mov rax,QWORD PTR[rcx] + cmovnz r12,rbp + mov rsi,QWORD PTR[8+rcx] + mov rbx,QWORD PTR[16+rcx] + and rax,r12 + mov rbp,QWORD PTR[24+rcx] + and rsi,r12 + and rbx,r12 + and rbp,r12 + + sub rax,r8 + sbb rsi,r9 + sbb rbx,r10 + sbb rbp,r11 + + or rdx,rdx + + cmovz rax,r8 + cmovz rsi,r9 + mov QWORD PTR[rdi],rax + cmovz rbx,r10 + mov QWORD PTR[8+rdi],rsi + cmovz rbp,r11 + mov QWORD PTR[16+rdi],rbx + mov QWORD PTR[24+rdi],rbp + + mov r12,QWORD PTR[rsp] + + mov rbx,QWORD PTR[8+rsp] + + mov rbp,QWORD PTR[16+rsp] + + lea rsp,QWORD PTR[24+rsp] + +$L$SEH_epilogue_cneg_mod_256:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + + DB 0F3h,0C3h ;repret + +$L$SEH_end_cneg_mod_256:: +cneg_mod_256 ENDP + + +PUBLIC sub_mod_256 + + +ALIGN 32 +sub_mod_256 PROC PUBLIC + DB 243,15,30,250 + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov r11,rsp +$L$SEH_begin_sub_mod_256:: + + + push rbp + + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + push rbx + + sub rsp,8 + +$L$SEH_body_sub_mod_256:: + + + mov r8,QWORD PTR[rsi] + mov r9,QWORD PTR[8+rsi] + mov r10,QWORD PTR[16+rsi] + mov r11,QWORD PTR[24+rsi] + + sub r8,QWORD PTR[rdx] + mov rax,QWORD PTR[rcx] + sbb r9,QWORD PTR[8+rdx] + mov rsi,QWORD PTR[8+rcx] + sbb r10,QWORD PTR[16+rdx] + mov rbx,QWORD PTR[16+rcx] + sbb r11,QWORD PTR[24+rdx] + mov rbp,QWORD PTR[24+rcx] + sbb rdx,rdx + + and rax,rdx + and rsi,rdx + and rbx,rdx + and rbp,rdx + + add r8,rax + adc r9,rsi + mov QWORD PTR[rdi],r8 + adc r10,rbx + mov QWORD PTR[8+rdi],r9 + adc r11,rbp + mov QWORD PTR[16+rdi],r10 + mov QWORD PTR[24+rdi],r11 + + mov rbx,QWORD PTR[8+rsp] + + mov rbp,QWORD PTR[16+rsp] + + lea rsp,QWORD PTR[24+rsp] + +$L$SEH_epilogue_sub_mod_256:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + + DB 0F3h,0C3h ;repret + +$L$SEH_end_sub_mod_256:: +sub_mod_256 ENDP + + +PUBLIC check_mod_256 + + +ALIGN 32 +check_mod_256 PROC PUBLIC + DB 243,15,30,250 + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov r11,rsp +$L$SEH_begin_check_mod_256:: + + + mov rdi,rcx + mov rsi,rdx + mov rax,QWORD PTR[rdi] + mov r9,QWORD PTR[8+rdi] + mov r10,QWORD PTR[16+rdi] + mov r11,QWORD PTR[24+rdi] + + mov r8,rax + or rax,r9 + or rax,r10 + or rax,r11 + + sub r8,QWORD PTR[rsi] + sbb r9,QWORD PTR[8+rsi] + sbb r10,QWORD PTR[16+rsi] + sbb r11,QWORD PTR[24+rsi] + sbb rsi,rsi + + mov rdx,1 + cmp rax,0 + cmovne rax,rdx + and rax,rsi +$L$SEH_epilogue_check_mod_256:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + + DB 0F3h,0C3h ;repret + +$L$SEH_end_check_mod_256:: +check_mod_256 ENDP + + +PUBLIC add_n_check_mod_256 + + +ALIGN 32 +add_n_check_mod_256 PROC PUBLIC + DB 243,15,30,250 + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov r11,rsp +$L$SEH_begin_add_n_check_mod_256:: + + + push rbp + + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + push rbx + + sub rsp,8 + +$L$SEH_body_add_n_check_mod_256:: + + + mov r8,QWORD PTR[rsi] + mov r9,QWORD PTR[8+rsi] + mov r10,QWORD PTR[16+rsi] + mov r11,QWORD PTR[24+rsi] + + add r8,QWORD PTR[rdx] + adc r9,QWORD PTR[8+rdx] + mov rax,r8 + adc r10,QWORD PTR[16+rdx] + mov rsi,r9 + adc r11,QWORD PTR[24+rdx] + sbb rdx,rdx + + mov rbx,r10 + sub r8,QWORD PTR[rcx] + sbb r9,QWORD PTR[8+rcx] + sbb r10,QWORD PTR[16+rcx] + mov rbp,r11 + sbb r11,QWORD PTR[24+rcx] + sbb rdx,0 + + cmovc r8,rax + cmovc r9,rsi + mov QWORD PTR[rdi],r8 + cmovc r10,rbx + mov QWORD PTR[8+rdi],r9 + cmovc r11,rbp + mov QWORD PTR[16+rdi],r10 + mov QWORD PTR[24+rdi],r11 + + or r8,r9 + or r10,r11 + or r8,r10 + mov rax,1 + cmovz rax,r8 + + mov rbx,QWORD PTR[8+rsp] + + mov rbp,QWORD PTR[16+rsp] + + lea rsp,QWORD PTR[24+rsp] + +$L$SEH_epilogue_add_n_check_mod_256:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + + DB 0F3h,0C3h ;repret + +$L$SEH_end_add_n_check_mod_256:: +add_n_check_mod_256 ENDP + + +PUBLIC sub_n_check_mod_256 + + +ALIGN 32 +sub_n_check_mod_256 PROC PUBLIC + DB 243,15,30,250 + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov r11,rsp +$L$SEH_begin_sub_n_check_mod_256:: + + + push rbp + + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + push rbx + + sub rsp,8 + +$L$SEH_body_sub_n_check_mod_256:: + + + mov r8,QWORD PTR[rsi] + mov r9,QWORD PTR[8+rsi] + mov r10,QWORD PTR[16+rsi] + mov r11,QWORD PTR[24+rsi] + + sub r8,QWORD PTR[rdx] + mov rax,QWORD PTR[rcx] + sbb r9,QWORD PTR[8+rdx] + mov rsi,QWORD PTR[8+rcx] + sbb r10,QWORD PTR[16+rdx] + mov rbx,QWORD PTR[16+rcx] + sbb r11,QWORD PTR[24+rdx] + mov rbp,QWORD PTR[24+rcx] + sbb rdx,rdx + + and rax,rdx + and rsi,rdx + and rbx,rdx + and rbp,rdx + + add r8,rax + adc r9,rsi + mov QWORD PTR[rdi],r8 + adc r10,rbx + mov QWORD PTR[8+rdi],r9 + adc r11,rbp + mov QWORD PTR[16+rdi],r10 + mov QWORD PTR[24+rdi],r11 + + or r8,r9 + or r10,r11 + or r8,r10 + mov rax,1 + cmovz rax,r8 + + mov rbx,QWORD PTR[8+rsp] + + mov rbp,QWORD PTR[16+rsp] + + lea rsp,QWORD PTR[24+rsp] + +$L$SEH_epilogue_sub_n_check_mod_256:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + + DB 0F3h,0C3h ;repret + +$L$SEH_end_sub_n_check_mod_256:: +sub_n_check_mod_256 ENDP +.text$ ENDS +.pdata SEGMENT READONLY ALIGN(4) +ALIGN 4 + DD imagerel $L$SEH_begin_add_mod_256 + DD imagerel $L$SEH_body_add_mod_256 + DD imagerel $L$SEH_info_add_mod_256_prologue + + DD imagerel $L$SEH_body_add_mod_256 + DD imagerel $L$SEH_epilogue_add_mod_256 + DD imagerel $L$SEH_info_add_mod_256_body + + DD imagerel $L$SEH_epilogue_add_mod_256 + DD imagerel $L$SEH_end_add_mod_256 + DD imagerel $L$SEH_info_add_mod_256_epilogue + + DD imagerel $L$SEH_begin_mul_by_3_mod_256 + DD imagerel $L$SEH_body_mul_by_3_mod_256 + DD imagerel $L$SEH_info_mul_by_3_mod_256_prologue + + DD imagerel $L$SEH_body_mul_by_3_mod_256 + DD imagerel $L$SEH_epilogue_mul_by_3_mod_256 + DD imagerel $L$SEH_info_mul_by_3_mod_256_body + + DD imagerel $L$SEH_epilogue_mul_by_3_mod_256 + DD imagerel $L$SEH_end_mul_by_3_mod_256 + DD imagerel $L$SEH_info_mul_by_3_mod_256_epilogue + + DD imagerel $L$SEH_begin_lshift_mod_256 + DD imagerel $L$SEH_body_lshift_mod_256 + DD imagerel $L$SEH_info_lshift_mod_256_prologue + + DD imagerel $L$SEH_body_lshift_mod_256 + DD imagerel $L$SEH_epilogue_lshift_mod_256 + DD imagerel $L$SEH_info_lshift_mod_256_body + + DD imagerel $L$SEH_epilogue_lshift_mod_256 + DD imagerel $L$SEH_end_lshift_mod_256 + DD imagerel $L$SEH_info_lshift_mod_256_epilogue + + DD imagerel $L$SEH_begin_rshift_mod_256 + DD imagerel $L$SEH_body_rshift_mod_256 + DD imagerel $L$SEH_info_rshift_mod_256_prologue + + DD imagerel $L$SEH_body_rshift_mod_256 + DD imagerel $L$SEH_epilogue_rshift_mod_256 + DD imagerel $L$SEH_info_rshift_mod_256_body + + DD imagerel $L$SEH_epilogue_rshift_mod_256 + DD imagerel $L$SEH_end_rshift_mod_256 + DD imagerel $L$SEH_info_rshift_mod_256_epilogue + + DD imagerel $L$SEH_begin_cneg_mod_256 + DD imagerel $L$SEH_body_cneg_mod_256 + DD imagerel $L$SEH_info_cneg_mod_256_prologue + + DD imagerel $L$SEH_body_cneg_mod_256 + DD imagerel $L$SEH_epilogue_cneg_mod_256 + DD imagerel $L$SEH_info_cneg_mod_256_body + + DD imagerel $L$SEH_epilogue_cneg_mod_256 + DD imagerel $L$SEH_end_cneg_mod_256 + DD imagerel $L$SEH_info_cneg_mod_256_epilogue + + DD imagerel $L$SEH_begin_sub_mod_256 + DD imagerel $L$SEH_body_sub_mod_256 + DD imagerel $L$SEH_info_sub_mod_256_prologue + + DD imagerel $L$SEH_body_sub_mod_256 + DD imagerel $L$SEH_epilogue_sub_mod_256 + DD imagerel $L$SEH_info_sub_mod_256_body + + DD imagerel $L$SEH_epilogue_sub_mod_256 + DD imagerel $L$SEH_end_sub_mod_256 + DD imagerel $L$SEH_info_sub_mod_256_epilogue + + DD imagerel $L$SEH_epilogue_check_mod_256 + DD imagerel $L$SEH_end_check_mod_256 + DD imagerel $L$SEH_info_check_mod_256_epilogue + + DD imagerel $L$SEH_begin_add_n_check_mod_256 + DD imagerel $L$SEH_body_add_n_check_mod_256 + DD imagerel $L$SEH_info_add_n_check_mod_256_prologue + + DD imagerel $L$SEH_body_add_n_check_mod_256 + DD imagerel $L$SEH_epilogue_add_n_check_mod_256 + DD imagerel $L$SEH_info_add_n_check_mod_256_body + + DD imagerel $L$SEH_epilogue_add_n_check_mod_256 + DD imagerel $L$SEH_end_add_n_check_mod_256 + DD imagerel $L$SEH_info_add_n_check_mod_256_epilogue + + DD imagerel $L$SEH_begin_sub_n_check_mod_256 + DD imagerel $L$SEH_body_sub_n_check_mod_256 + DD imagerel $L$SEH_info_sub_n_check_mod_256_prologue + + DD imagerel $L$SEH_body_sub_n_check_mod_256 + DD imagerel $L$SEH_epilogue_sub_n_check_mod_256 + DD imagerel $L$SEH_info_sub_n_check_mod_256_body + + DD imagerel $L$SEH_epilogue_sub_n_check_mod_256 + DD imagerel $L$SEH_end_sub_n_check_mod_256 + DD imagerel $L$SEH_info_sub_n_check_mod_256_epilogue + +.pdata ENDS +.xdata SEGMENT READONLY ALIGN(8) +ALIGN 8 +$L$SEH_info_add_mod_256_prologue:: +DB 1,0,5,00bh +DB 0,074h,1,0 +DB 0,064h,2,0 +DB 0,0b3h +DB 0,0 + DD 0,0 +$L$SEH_info_add_mod_256_body:: +DB 1,0,9,0 +DB 000h,034h,001h,000h +DB 000h,054h,002h,000h +DB 000h,074h,004h,000h +DB 000h,064h,005h,000h +DB 000h,022h +DB 000h,000h,000h,000h,000h,000h +DB 000h,000h,000h,000h +$L$SEH_info_add_mod_256_epilogue:: +DB 1,0,4,0 +DB 000h,074h,001h,000h +DB 000h,064h,002h,000h +DB 000h,000h,000h,000h + +$L$SEH_info_mul_by_3_mod_256_prologue:: +DB 1,0,5,00bh +DB 0,074h,1,0 +DB 0,064h,2,0 +DB 0,0b3h +DB 0,0 + DD 0,0 +$L$SEH_info_mul_by_3_mod_256_body:: +DB 1,0,11,0 +DB 000h,0c4h,000h,000h +DB 000h,034h,001h,000h +DB 000h,054h,002h,000h +DB 000h,074h,004h,000h +DB 000h,064h,005h,000h +DB 000h,022h +DB 000h,000h,000h,000h,000h,000h +$L$SEH_info_mul_by_3_mod_256_epilogue:: +DB 1,0,4,0 +DB 000h,074h,001h,000h +DB 000h,064h,002h,000h +DB 000h,000h,000h,000h + +$L$SEH_info_lshift_mod_256_prologue:: +DB 1,0,5,00bh +DB 0,074h,1,0 +DB 0,064h,2,0 +DB 0,0b3h +DB 0,0 + DD 0,0 +$L$SEH_info_lshift_mod_256_body:: +DB 1,0,11,0 +DB 000h,0c4h,000h,000h +DB 000h,034h,001h,000h +DB 000h,054h,002h,000h +DB 000h,074h,004h,000h +DB 000h,064h,005h,000h +DB 000h,022h +DB 000h,000h,000h,000h,000h,000h +$L$SEH_info_lshift_mod_256_epilogue:: +DB 1,0,4,0 +DB 000h,074h,001h,000h +DB 000h,064h,002h,000h +DB 000h,000h,000h,000h + +$L$SEH_info_rshift_mod_256_prologue:: +DB 1,0,5,00bh +DB 0,074h,1,0 +DB 0,064h,2,0 +DB 0,0b3h +DB 0,0 + DD 0,0 +$L$SEH_info_rshift_mod_256_body:: +DB 1,0,9,0 +DB 000h,034h,001h,000h +DB 000h,054h,002h,000h +DB 000h,074h,004h,000h +DB 000h,064h,005h,000h +DB 000h,022h +DB 000h,000h,000h,000h,000h,000h +DB 000h,000h,000h,000h +$L$SEH_info_rshift_mod_256_epilogue:: +DB 1,0,4,0 +DB 000h,074h,001h,000h +DB 000h,064h,002h,000h +DB 000h,000h,000h,000h + +$L$SEH_info_cneg_mod_256_prologue:: +DB 1,0,5,00bh +DB 0,074h,1,0 +DB 0,064h,2,0 +DB 0,0b3h +DB 0,0 + DD 0,0 +$L$SEH_info_cneg_mod_256_body:: +DB 1,0,11,0 +DB 000h,0c4h,000h,000h +DB 000h,034h,001h,000h +DB 000h,054h,002h,000h +DB 000h,074h,004h,000h +DB 000h,064h,005h,000h +DB 000h,022h +DB 000h,000h,000h,000h,000h,000h +$L$SEH_info_cneg_mod_256_epilogue:: +DB 1,0,4,0 +DB 000h,074h,001h,000h +DB 000h,064h,002h,000h +DB 000h,000h,000h,000h + +$L$SEH_info_sub_mod_256_prologue:: +DB 1,0,5,00bh +DB 0,074h,1,0 +DB 0,064h,2,0 +DB 0,0b3h +DB 0,0 + DD 0,0 +$L$SEH_info_sub_mod_256_body:: +DB 1,0,9,0 +DB 000h,034h,001h,000h +DB 000h,054h,002h,000h +DB 000h,074h,004h,000h +DB 000h,064h,005h,000h +DB 000h,022h +DB 000h,000h,000h,000h,000h,000h +DB 000h,000h,000h,000h +$L$SEH_info_sub_mod_256_epilogue:: +DB 1,0,4,0 +DB 000h,074h,001h,000h +DB 000h,064h,002h,000h +DB 000h,000h,000h,000h + +$L$SEH_info_check_mod_256_epilogue:: +DB 1,0,4,0 +DB 000h,074h,001h,000h +DB 000h,064h,002h,000h +DB 000h,000h,000h,000h + +$L$SEH_info_add_n_check_mod_256_prologue:: +DB 1,0,5,00bh +DB 0,074h,1,0 +DB 0,064h,2,0 +DB 0,0b3h +DB 0,0 + DD 0,0 +$L$SEH_info_add_n_check_mod_256_body:: +DB 1,0,9,0 +DB 000h,034h,001h,000h +DB 000h,054h,002h,000h +DB 000h,074h,004h,000h +DB 000h,064h,005h,000h +DB 000h,022h +DB 000h,000h,000h,000h,000h,000h +DB 000h,000h,000h,000h +$L$SEH_info_add_n_check_mod_256_epilogue:: +DB 1,0,4,0 +DB 000h,074h,001h,000h +DB 000h,064h,002h,000h +DB 000h,000h,000h,000h + +$L$SEH_info_sub_n_check_mod_256_prologue:: +DB 1,0,5,00bh +DB 0,074h,1,0 +DB 0,064h,2,0 +DB 0,0b3h +DB 0,0 + DD 0,0 +$L$SEH_info_sub_n_check_mod_256_body:: +DB 1,0,9,0 +DB 000h,034h,001h,000h +DB 000h,054h,002h,000h +DB 000h,074h,004h,000h +DB 000h,064h,005h,000h +DB 000h,022h +DB 000h,000h,000h,000h,000h,000h +DB 000h,000h,000h,000h +$L$SEH_info_sub_n_check_mod_256_epilogue:: +DB 1,0,4,0 +DB 000h,074h,001h,000h +DB 000h,064h,002h,000h +DB 000h,000h,000h,000h + + +.xdata ENDS +END diff --git a/crypto/blst_src/build/win64/add_mod_384-armv8.asm b/crypto/blst_src/build/win64/add_mod_384-armv8.asm new file mode 100644 index 00000000000..4bf703a6da0 --- /dev/null +++ b/crypto/blst_src/build/win64/add_mod_384-armv8.asm @@ -0,0 +1,1001 @@ + AREA |.text|,CODE,ALIGN=8,ARM64 + + + + EXPORT |add_mod_384|[FUNC] + ALIGN 32 +|add_mod_384| PROC + DCDU 3573752639 + stp x29,x30,[sp,#-48]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + + ldp x4,x5,[x3] + ldp x6,x7,[x3,#16] + ldp x8,x9,[x3,#32] + + bl __add_mod_384 + ldr x30,[sp,#8] + + stp x10,x11,[x0] + stp x12,x13,[x0,#16] + stp x14,x15,[x0,#32] + + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldr x29,[sp],#48 + DCDU 3573752767 + ret + ENDP + + + ALIGN 32 +|__add_mod_384| PROC + ldp x10,x11,[x1] + ldp x16,x17,[x2] + ldp x12,x13,[x1,#16] + ldp x19,x20,[x2,#16] + ldp x14,x15,[x1,#32] + ldp x21,x22,[x2,#32] + +|__add_mod_384_ab_are_loaded| + adds x10,x10,x16 + adcs x11,x11,x17 + adcs x12,x12,x19 + adcs x13,x13,x20 + adcs x14,x14,x21 + adcs x15,x15,x22 + adc x3,xzr,xzr + + subs x16,x10,x4 + sbcs x17,x11,x5 + sbcs x19,x12,x6 + sbcs x20,x13,x7 + sbcs x21,x14,x8 + sbcs x22,x15,x9 + sbcs xzr,x3,xzr + + csello x10,x10,x16 + csello x11,x11,x17 + csello x12,x12,x19 + csello x13,x13,x20 + csello x14,x14,x21 + csello x15,x15,x22 + + ret + ENDP + + + + EXPORT |add_mod_384x|[FUNC] + ALIGN 32 +|add_mod_384x| PROC + DCDU 3573752639 + stp x29,x30,[sp,#-48]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + + ldp x4,x5,[x3] + ldp x6,x7,[x3,#16] + ldp x8,x9,[x3,#32] + + bl __add_mod_384 + + stp x10,x11,[x0] + add x1,x1,#48 + stp x12,x13,[x0,#16] + add x2,x2,#48 + stp x14,x15,[x0,#32] + + bl __add_mod_384 + ldr x30,[sp,#8] + + stp x10,x11,[x0,#48] + stp x12,x13,[x0,#64] + stp x14,x15,[x0,#80] + + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldr x29,[sp],#48 + DCDU 3573752767 + ret + ENDP + + + + EXPORT |rshift_mod_384|[FUNC] + ALIGN 32 +|rshift_mod_384| PROC + DCDU 3573752639 + stp x29,x30,[sp,#-48]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + + ldp x10,x11,[x1] + ldp x12,x13,[x1,#16] + ldp x14,x15,[x1,#32] + + ldp x4,x5,[x3] + ldp x6,x7,[x3,#16] + ldp x8,x9,[x3,#32] + +|$Loop_rshift_mod_384| + sub x2,x2,#1 + bl __rshift_mod_384 + cbnz x2,|$Loop_rshift_mod_384| + + ldr x30,[sp,#8] + stp x10,x11,[x0] + stp x12,x13,[x0,#16] + stp x14,x15,[x0,#32] + + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldr x29,[sp],#48 + DCDU 3573752767 + ret + ENDP + + + ALIGN 32 +|__rshift_mod_384| PROC + sbfx x22,x10,#0,#1 + and x16,x22,x4 + and x17,x22,x5 + adds x10,x10,x16 + and x19,x22,x6 + adcs x11,x11,x17 + and x20,x22,x7 + adcs x12,x12,x19 + and x21,x22,x8 + adcs x13,x13,x20 + and x22,x22,x9 + adcs x14,x14,x21 + extr x10,x11,x10,#1 // a[0:5] >>= 1 + adcs x15,x15,x22 + extr x11,x12,x11,#1 + adc x22,xzr,xzr + extr x12,x13,x12,#1 + extr x13,x14,x13,#1 + extr x14,x15,x14,#1 + extr x15,x22,x15,#1 + ret + ENDP + + + + EXPORT |div_by_2_mod_384|[FUNC] + ALIGN 32 +|div_by_2_mod_384| PROC + DCDU 3573752639 + stp x29,x30,[sp,#-48]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + + ldp x10,x11,[x1] + ldp x12,x13,[x1,#16] + ldp x14,x15,[x1,#32] + + ldp x4,x5,[x2] + ldp x6,x7,[x2,#16] + ldp x8,x9,[x2,#32] + + bl __rshift_mod_384 + + ldr x30,[sp,#8] + stp x10,x11,[x0] + stp x12,x13,[x0,#16] + stp x14,x15,[x0,#32] + + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldr x29,[sp],#48 + DCDU 3573752767 + ret + ENDP + + + + EXPORT |lshift_mod_384|[FUNC] + ALIGN 32 +|lshift_mod_384| PROC + DCDU 3573752639 + stp x29,x30,[sp,#-48]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + + ldp x10,x11,[x1] + ldp x12,x13,[x1,#16] + ldp x14,x15,[x1,#32] + + ldp x4,x5,[x3] + ldp x6,x7,[x3,#16] + ldp x8,x9,[x3,#32] + +|$Loop_lshift_mod_384| + sub x2,x2,#1 + bl __lshift_mod_384 + cbnz x2,|$Loop_lshift_mod_384| + + ldr x30,[sp,#8] + stp x10,x11,[x0] + stp x12,x13,[x0,#16] + stp x14,x15,[x0,#32] + + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldr x29,[sp],#48 + DCDU 3573752767 + ret + ENDP + + + ALIGN 32 +|__lshift_mod_384| PROC + adds x10,x10,x10 + adcs x11,x11,x11 + adcs x12,x12,x12 + adcs x13,x13,x13 + adcs x14,x14,x14 + adcs x15,x15,x15 + adc x3,xzr,xzr + + subs x16,x10,x4 + sbcs x17,x11,x5 + sbcs x19,x12,x6 + sbcs x20,x13,x7 + sbcs x21,x14,x8 + sbcs x22,x15,x9 + sbcs xzr,x3,xzr + + csello x10,x10,x16 + csello x11,x11,x17 + csello x12,x12,x19 + csello x13,x13,x20 + csello x14,x14,x21 + csello x15,x15,x22 + + ret + ENDP + + + + EXPORT |mul_by_3_mod_384|[FUNC] + ALIGN 32 +|mul_by_3_mod_384| PROC + DCDU 3573752639 + stp x29,x30,[sp,#-48]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + + ldp x10,x11,[x1] + ldp x12,x13,[x1,#16] + ldp x14,x15,[x1,#32] + + ldp x4,x5,[x2] + ldp x6,x7,[x2,#16] + ldp x8,x9,[x2,#32] + + bl __lshift_mod_384 + + ldp x16,x17,[x1] + ldp x19,x20,[x1,#16] + ldp x21,x22,[x1,#32] + + bl __add_mod_384_ab_are_loaded + ldr x30,[sp,#8] + + stp x10,x11,[x0] + stp x12,x13,[x0,#16] + stp x14,x15,[x0,#32] + + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldr x29,[sp],#48 + DCDU 3573752767 + ret + ENDP + + + + EXPORT |mul_by_8_mod_384|[FUNC] + ALIGN 32 +|mul_by_8_mod_384| PROC + DCDU 3573752639 + stp x29,x30,[sp,#-48]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + + ldp x10,x11,[x1] + ldp x12,x13,[x1,#16] + ldp x14,x15,[x1,#32] + + ldp x4,x5,[x2] + ldp x6,x7,[x2,#16] + ldp x8,x9,[x2,#32] + + bl __lshift_mod_384 + bl __lshift_mod_384 + bl __lshift_mod_384 + ldr x30,[sp,#8] + + stp x10,x11,[x0] + stp x12,x13,[x0,#16] + stp x14,x15,[x0,#32] + + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldr x29,[sp],#48 + DCDU 3573752767 + ret + ENDP + + + + EXPORT |mul_by_3_mod_384x|[FUNC] + ALIGN 32 +|mul_by_3_mod_384x| PROC + DCDU 3573752639 + stp x29,x30,[sp,#-48]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + + ldp x10,x11,[x1] + ldp x12,x13,[x1,#16] + ldp x14,x15,[x1,#32] + + ldp x4,x5,[x2] + ldp x6,x7,[x2,#16] + ldp x8,x9,[x2,#32] + + bl __lshift_mod_384 + + ldp x16,x17,[x1] + ldp x19,x20,[x1,#16] + ldp x21,x22,[x1,#32] + + bl __add_mod_384_ab_are_loaded + + stp x10,x11,[x0] + ldp x10,x11,[x1,#48] + stp x12,x13,[x0,#16] + ldp x12,x13,[x1,#64] + stp x14,x15,[x0,#32] + ldp x14,x15,[x1,#80] + + bl __lshift_mod_384 + + ldp x16,x17,[x1,#48] + ldp x19,x20,[x1,#64] + ldp x21,x22,[x1,#80] + + bl __add_mod_384_ab_are_loaded + ldr x30,[sp,#8] + + stp x10,x11,[x0,#48] + stp x12,x13,[x0,#64] + stp x14,x15,[x0,#80] + + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldr x29,[sp],#48 + DCDU 3573752767 + ret + ENDP + + + + EXPORT |mul_by_8_mod_384x|[FUNC] + ALIGN 32 +|mul_by_8_mod_384x| PROC + DCDU 3573752639 + stp x29,x30,[sp,#-48]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + + ldp x10,x11,[x1] + ldp x12,x13,[x1,#16] + ldp x14,x15,[x1,#32] + + ldp x4,x5,[x2] + ldp x6,x7,[x2,#16] + ldp x8,x9,[x2,#32] + + bl __lshift_mod_384 + bl __lshift_mod_384 + bl __lshift_mod_384 + + stp x10,x11,[x0] + ldp x10,x11,[x1,#48] + stp x12,x13,[x0,#16] + ldp x12,x13,[x1,#64] + stp x14,x15,[x0,#32] + ldp x14,x15,[x1,#80] + + bl __lshift_mod_384 + bl __lshift_mod_384 + bl __lshift_mod_384 + ldr x30,[sp,#8] + + stp x10,x11,[x0,#48] + stp x12,x13,[x0,#64] + stp x14,x15,[x0,#80] + + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldr x29,[sp],#48 + DCDU 3573752767 + ret + ENDP + + + + EXPORT |cneg_mod_384|[FUNC] + ALIGN 32 +|cneg_mod_384| PROC + DCDU 3573752639 + stp x29,x30,[sp,#-48]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + + ldp x10,x11,[x1] + ldp x4,x5,[x3] + ldp x12,x13,[x1,#16] + ldp x6,x7,[x3,#16] + + subs x16,x4,x10 + ldp x14,x15,[x1,#32] + ldp x8,x9,[x3,#32] + orr x3,x10,x11 + sbcs x17,x5,x11 + orr x3,x3,x12 + sbcs x19,x6,x12 + orr x3,x3,x13 + sbcs x20,x7,x13 + orr x3,x3,x14 + sbcs x21,x8,x14 + orr x3,x3,x15 + sbc x22,x9,x15 + + cmp x3,#0 + csetmne x3 + ands x2,x2,x3 + + cseleq x10,x10,x16 + cseleq x11,x11,x17 + cseleq x12,x12,x19 + cseleq x13,x13,x20 + stp x10,x11,[x0] + cseleq x14,x14,x21 + stp x12,x13,[x0,#16] + cseleq x15,x15,x22 + stp x14,x15,[x0,#32] + + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldr x29,[sp],#48 + DCDU 3573752767 + ret + ENDP + + + + EXPORT |sub_mod_384|[FUNC] + ALIGN 32 +|sub_mod_384| PROC + DCDU 3573752639 + stp x29,x30,[sp,#-48]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + + ldp x4,x5,[x3] + ldp x6,x7,[x3,#16] + ldp x8,x9,[x3,#32] + + bl __sub_mod_384 + ldr x30,[sp,#8] + + stp x10,x11,[x0] + stp x12,x13,[x0,#16] + stp x14,x15,[x0,#32] + + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldr x29,[sp],#48 + DCDU 3573752767 + ret + ENDP + + + ALIGN 32 +|__sub_mod_384| PROC + ldp x10,x11,[x1] + ldp x16,x17,[x2] + ldp x12,x13,[x1,#16] + ldp x19,x20,[x2,#16] + ldp x14,x15,[x1,#32] + ldp x21,x22,[x2,#32] + + subs x10,x10,x16 + sbcs x11,x11,x17 + sbcs x12,x12,x19 + sbcs x13,x13,x20 + sbcs x14,x14,x21 + sbcs x15,x15,x22 + sbc x3,xzr,xzr + + and x16,x4,x3 + and x17,x5,x3 + adds x10,x10,x16 + and x19,x6,x3 + adcs x11,x11,x17 + and x20,x7,x3 + adcs x12,x12,x19 + and x21,x8,x3 + adcs x13,x13,x20 + and x22,x9,x3 + adcs x14,x14,x21 + adc x15,x15,x22 + + ret + ENDP + + + + EXPORT |sub_mod_384x|[FUNC] + ALIGN 32 +|sub_mod_384x| PROC + DCDU 3573752639 + stp x29,x30,[sp,#-48]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + + ldp x4,x5,[x3] + ldp x6,x7,[x3,#16] + ldp x8,x9,[x3,#32] + + bl __sub_mod_384 + + stp x10,x11,[x0] + add x1,x1,#48 + stp x12,x13,[x0,#16] + add x2,x2,#48 + stp x14,x15,[x0,#32] + + bl __sub_mod_384 + ldr x30,[sp,#8] + + stp x10,x11,[x0,#48] + stp x12,x13,[x0,#64] + stp x14,x15,[x0,#80] + + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldr x29,[sp],#48 + DCDU 3573752767 + ret + ENDP + + + + EXPORT |mul_by_1_plus_i_mod_384x|[FUNC] + ALIGN 32 +|mul_by_1_plus_i_mod_384x| PROC + DCDU 3573752639 + stp x29,x30,[sp,#-48]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + + ldp x4,x5,[x2] + ldp x6,x7,[x2,#16] + ldp x8,x9,[x2,#32] + add x2,x1,#48 + + bl __sub_mod_384 // a->re - a->im + + ldp x16,x17,[x1] + ldp x19,x20,[x1,#16] + ldp x21,x22,[x1,#32] + stp x10,x11,[x0] + ldp x10,x11,[x1,#48] + stp x12,x13,[x0,#16] + ldp x12,x13,[x1,#64] + stp x14,x15,[x0,#32] + ldp x14,x15,[x1,#80] + + bl __add_mod_384_ab_are_loaded // a->re + a->im + ldr x30,[sp,#8] + + stp x10,x11,[x0,#48] + stp x12,x13,[x0,#64] + stp x14,x15,[x0,#80] + + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldr x29,[sp],#48 + DCDU 3573752767 + ret + ENDP + + + + EXPORT |sgn0_pty_mod_384|[FUNC] + ALIGN 32 +|sgn0_pty_mod_384| PROC + ldp x10,x11,[x0] + ldp x12,x13,[x0,#16] + ldp x14,x15,[x0,#32] + + ldp x4,x5,[x1] + ldp x6,x7,[x1,#16] + ldp x8,x9,[x1,#32] + + and x0,x10,#1 + adds x10,x10,x10 + adcs x11,x11,x11 + adcs x12,x12,x12 + adcs x13,x13,x13 + adcs x14,x14,x14 + adcs x15,x15,x15 + adc x3,xzr,xzr + + subs x10,x10,x4 + sbcs x11,x11,x5 + sbcs x12,x12,x6 + sbcs x13,x13,x7 + sbcs x14,x14,x8 + sbcs x15,x15,x9 + sbc x3,x3,xzr + + mvn x3,x3 + and x3,x3,#2 + orr x0,x0,x3 + + ret + ENDP + + + + EXPORT |sgn0_pty_mod_384x|[FUNC] + ALIGN 32 +|sgn0_pty_mod_384x| PROC + ldp x10,x11,[x0] + ldp x12,x13,[x0,#16] + ldp x14,x15,[x0,#32] + + ldp x4,x5,[x1] + ldp x6,x7,[x1,#16] + ldp x8,x9,[x1,#32] + + and x2,x10,#1 + orr x3,x10,x11 + adds x10,x10,x10 + orr x3,x3,x12 + adcs x11,x11,x11 + orr x3,x3,x13 + adcs x12,x12,x12 + orr x3,x3,x14 + adcs x13,x13,x13 + orr x3,x3,x15 + adcs x14,x14,x14 + adcs x15,x15,x15 + adc x16,xzr,xzr + + subs x10,x10,x4 + sbcs x11,x11,x5 + sbcs x12,x12,x6 + sbcs x13,x13,x7 + sbcs x14,x14,x8 + sbcs x15,x15,x9 + sbc x16,x16,xzr + + ldp x10,x11,[x0,#48] + ldp x12,x13,[x0,#64] + ldp x14,x15,[x0,#80] + + mvn x16,x16 + and x16,x16,#2 + orr x2,x2,x16 + + and x0,x10,#1 + orr x1,x10,x11 + adds x10,x10,x10 + orr x1,x1,x12 + adcs x11,x11,x11 + orr x1,x1,x13 + adcs x12,x12,x12 + orr x1,x1,x14 + adcs x13,x13,x13 + orr x1,x1,x15 + adcs x14,x14,x14 + adcs x15,x15,x15 + adc x16,xzr,xzr + + subs x10,x10,x4 + sbcs x11,x11,x5 + sbcs x12,x12,x6 + sbcs x13,x13,x7 + sbcs x14,x14,x8 + sbcs x15,x15,x9 + sbc x16,x16,xzr + + mvn x16,x16 + and x16,x16,#2 + orr x0,x0,x16 + + cmp x3,#0 + cseleq x3,x0,x2 + + cmp x1,#0 + cselne x1,x0,x2 + + and x3,x3,#1 + and x1,x1,#2 + orr x0,x1,x3 // pack sign and parity + + ret + ENDP + + + EXPORT |vec_select_32|[FUNC] + ALIGN 32 +|vec_select_32| PROC + dup v6.2d, x3 + ld1 {v0.2d, v1.2d, v2.2d}, [x1],#48 + cmeq v6.2d, v6.2d, #0 + ld1 {v3.2d, v4.2d, v5.2d}, [x2],#48 + bit v0.16b, v3.16b, v6.16b + bit v1.16b, v4.16b, v6.16b + bit v2.16b, v5.16b, v6.16b + st1 {v0.2d, v1.2d, v2.2d}, [x0] + ret + ENDP + + + EXPORT |vec_select_48|[FUNC] + ALIGN 32 +|vec_select_48| PROC + dup v6.2d, x3 + ld1 {v0.2d, v1.2d, v2.2d}, [x1],#48 + cmeq v6.2d, v6.2d, #0 + ld1 {v3.2d, v4.2d, v5.2d}, [x2],#48 + bit v0.16b, v3.16b, v6.16b + bit v1.16b, v4.16b, v6.16b + bit v2.16b, v5.16b, v6.16b + st1 {v0.2d, v1.2d, v2.2d}, [x0] + ret + ENDP + + + EXPORT |vec_select_96|[FUNC] + ALIGN 32 +|vec_select_96| PROC + dup v6.2d, x3 + ld1 {v0.2d, v1.2d, v2.2d}, [x1],#48 + cmeq v6.2d, v6.2d, #0 + ld1 {v3.2d, v4.2d, v5.2d}, [x2],#48 + bit v0.16b, v3.16b, v6.16b + ld1 {v16.2d, v17.2d, v18.2d}, [x1],#48 + bit v1.16b, v4.16b, v6.16b + ld1 {v19.2d, v20.2d, v21.2d}, [x2],#48 + bit v2.16b, v5.16b, v6.16b + st1 {v0.2d, v1.2d, v2.2d}, [x0],#48 + bit v16.16b, v19.16b, v6.16b + bit v17.16b, v20.16b, v6.16b + bit v18.16b, v21.16b, v6.16b + st1 {v16.2d, v17.2d, v18.2d}, [x0] + ret + ENDP + + + EXPORT |vec_select_192|[FUNC] + ALIGN 32 +|vec_select_192| PROC + dup v6.2d, x3 + ld1 {v0.2d, v1.2d, v2.2d}, [x1],#48 + cmeq v6.2d, v6.2d, #0 + ld1 {v3.2d, v4.2d, v5.2d}, [x2],#48 + bit v0.16b, v3.16b, v6.16b + ld1 {v16.2d, v17.2d, v18.2d}, [x1],#48 + bit v1.16b, v4.16b, v6.16b + ld1 {v19.2d, v20.2d, v21.2d}, [x2],#48 + bit v2.16b, v5.16b, v6.16b + st1 {v0.2d, v1.2d, v2.2d}, [x0],#48 + bit v16.16b, v19.16b, v6.16b + ld1 {v0.2d, v1.2d, v2.2d}, [x1],#48 + bit v17.16b, v20.16b, v6.16b + ld1 {v3.2d, v4.2d, v5.2d}, [x2],#48 + bit v18.16b, v21.16b, v6.16b + st1 {v16.2d, v17.2d, v18.2d}, [x0],#48 + bit v0.16b, v3.16b, v6.16b + ld1 {v16.2d, v17.2d, v18.2d}, [x1],#48 + bit v1.16b, v4.16b, v6.16b + ld1 {v19.2d, v20.2d, v21.2d}, [x2],#48 + bit v2.16b, v5.16b, v6.16b + st1 {v0.2d, v1.2d, v2.2d}, [x0],#48 + bit v16.16b, v19.16b, v6.16b + bit v17.16b, v20.16b, v6.16b + bit v18.16b, v21.16b, v6.16b + st1 {v16.2d, v17.2d, v18.2d}, [x0] + ret + ENDP + + + EXPORT |vec_select_144|[FUNC] + ALIGN 32 +|vec_select_144| PROC + dup v6.2d, x3 + ld1 {v0.2d, v1.2d, v2.2d}, [x1],#48 + cmeq v6.2d, v6.2d, #0 + ld1 {v3.2d, v4.2d, v5.2d}, [x2],#48 + bit v0.16b, v3.16b, v6.16b + ld1 {v16.2d, v17.2d, v18.2d}, [x1],#48 + bit v1.16b, v4.16b, v6.16b + ld1 {v19.2d, v20.2d, v21.2d}, [x2],#48 + bit v2.16b, v5.16b, v6.16b + st1 {v0.2d, v1.2d, v2.2d}, [x0],#48 + bit v16.16b, v19.16b, v6.16b + ld1 {v0.2d, v1.2d, v2.2d}, [x1],#48 + bit v17.16b, v20.16b, v6.16b + ld1 {v3.2d, v4.2d, v5.2d}, [x2],#48 + bit v18.16b, v21.16b, v6.16b + st1 {v16.2d, v17.2d, v18.2d}, [x0],#48 + bit v0.16b, v3.16b, v6.16b + bit v1.16b, v4.16b, v6.16b + bit v2.16b, v5.16b, v6.16b + st1 {v0.2d, v1.2d, v2.2d}, [x0] + ret + ENDP + + + EXPORT |vec_select_288|[FUNC] + ALIGN 32 +|vec_select_288| PROC + dup v6.2d, x3 + ld1 {v0.2d, v1.2d, v2.2d}, [x1],#48 + cmeq v6.2d, v6.2d, #0 + ld1 {v3.2d, v4.2d, v5.2d}, [x2],#48 + bit v0.16b, v3.16b, v6.16b + ld1 {v16.2d, v17.2d, v18.2d}, [x1],#48 + bit v1.16b, v4.16b, v6.16b + ld1 {v19.2d, v20.2d, v21.2d}, [x2],#48 + bit v2.16b, v5.16b, v6.16b + st1 {v0.2d, v1.2d, v2.2d}, [x0],#48 + bit v16.16b, v19.16b, v6.16b + ld1 {v0.2d, v1.2d, v2.2d}, [x1],#48 + bit v17.16b, v20.16b, v6.16b + ld1 {v3.2d, v4.2d, v5.2d}, [x2],#48 + bit v18.16b, v21.16b, v6.16b + st1 {v16.2d, v17.2d, v18.2d}, [x0],#48 + bit v0.16b, v3.16b, v6.16b + ld1 {v16.2d, v17.2d, v18.2d}, [x1],#48 + bit v1.16b, v4.16b, v6.16b + ld1 {v19.2d, v20.2d, v21.2d}, [x2],#48 + bit v2.16b, v5.16b, v6.16b + st1 {v0.2d, v1.2d, v2.2d}, [x0],#48 + bit v16.16b, v19.16b, v6.16b + ld1 {v0.2d, v1.2d, v2.2d}, [x1],#48 + bit v17.16b, v20.16b, v6.16b + ld1 {v3.2d, v4.2d, v5.2d}, [x2],#48 + bit v18.16b, v21.16b, v6.16b + st1 {v16.2d, v17.2d, v18.2d}, [x0],#48 + bit v0.16b, v3.16b, v6.16b + ld1 {v16.2d, v17.2d, v18.2d}, [x1],#48 + bit v1.16b, v4.16b, v6.16b + ld1 {v19.2d, v20.2d, v21.2d}, [x2],#48 + bit v2.16b, v5.16b, v6.16b + st1 {v0.2d, v1.2d, v2.2d}, [x0],#48 + bit v16.16b, v19.16b, v6.16b + bit v17.16b, v20.16b, v6.16b + bit v18.16b, v21.16b, v6.16b + st1 {v16.2d, v17.2d, v18.2d}, [x0] + ret + ENDP + + + EXPORT |vec_prefetch|[FUNC] + ALIGN 32 +|vec_prefetch| PROC + add x1, x1, x0 + sub x1, x1, #1 + mov x2, #64 + prfm pldl1keep, [x0] + add x0, x0, x2 + cmp x0, x1 + cselhi x0,x1,x0 + cselhi x2,xzr,x2 + prfm pldl1keep, [x0] + add x0, x0, x2 + cmp x0, x1 + cselhi x0,x1,x0 + cselhi x2,xzr,x2 + prfm pldl1keep, [x0] + add x0, x0, x2 + cmp x0, x1 + cselhi x0,x1,x0 + cselhi x2,xzr,x2 + prfm pldl1keep, [x0] + add x0, x0, x2 + cmp x0, x1 + cselhi x0,x1,x0 + cselhi x2,xzr,x2 + prfm pldl1keep, [x0] + add x0, x0, x2 + cmp x0, x1 + cselhi x0,x1,x0 + cselhi x2,xzr,x2 + prfm pldl1keep, [x0] + add x0, x0, x2 + cmp x0, x1 + cselhi x0,x1,x0 + prfm pldl1keep, [x0] + ret + ENDP + + + EXPORT |vec_is_zero_16x|[FUNC] + ALIGN 32 +|vec_is_zero_16x| PROC + ld1 {v0.2d}, [x0], #16 + lsr x1, x1, #4 + sub x1, x1, #1 + cbz x1, |$Loop_is_zero_done| + +|$Loop_is_zero| + ld1 {v1.2d}, [x0], #16 + orr v0.16b, v0.16b, v1.16b + sub x1, x1, #1 + cbnz x1, |$Loop_is_zero| + +|$Loop_is_zero_done| + dup v1.2d, v0.d[1] + orr v0.16b, v0.16b, v1.16b + mov x1, v0.d[0] + mov x0, #1 + cmp x1, #0 + cseleq x0,x0,xzr + ret + ENDP + + + EXPORT |vec_is_equal_16x|[FUNC] + ALIGN 32 +|vec_is_equal_16x| PROC + ld1 {v0.2d}, [x0], #16 + ld1 {v1.2d}, [x1], #16 + lsr x2, x2, #4 + eor v0.16b, v0.16b, v1.16b + +|$Loop_is_equal| + sub x2, x2, #1 + cbz x2, |$Loop_is_equal_done| + ld1 {v1.2d}, [x0], #16 + ld1 {v2.2d}, [x1], #16 + eor v1.16b, v1.16b, v2.16b + orr v0.16b, v0.16b, v1.16b + b |$Loop_is_equal| + nop + +|$Loop_is_equal_done| + dup v1.2d, v0.d[1] + orr v0.16b, v0.16b, v1.16b + mov x1, v0.d[0] + mov x0, #1 + cmp x1, #0 + cseleq x0,x0,xzr + ret + ENDP + END diff --git a/crypto/blst_src/build/win64/add_mod_384-x86_64.asm b/crypto/blst_src/build/win64/add_mod_384-x86_64.asm new file mode 100644 index 00000000000..560e02ee105 --- /dev/null +++ b/crypto/blst_src/build/win64/add_mod_384-x86_64.asm @@ -0,0 +1,2531 @@ +OPTION DOTNAME +.text$ SEGMENT ALIGN(256) 'CODE' + +PUBLIC add_mod_384 + + +ALIGN 32 +add_mod_384 PROC PUBLIC + DB 243,15,30,250 + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov r11,rsp +$L$SEH_begin_add_mod_384:: + + + push rbp + + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + push rbx + + push r12 + + push r13 + + push r14 + + push r15 + + sub rsp,8 + +$L$SEH_body_add_mod_384:: + + + call __add_mod_384 + + mov r15,QWORD PTR[8+rsp] + + mov r14,QWORD PTR[16+rsp] + + mov r13,QWORD PTR[24+rsp] + + mov r12,QWORD PTR[32+rsp] + + mov rbx,QWORD PTR[40+rsp] + + mov rbp,QWORD PTR[48+rsp] + + lea rsp,QWORD PTR[56+rsp] + +$L$SEH_epilogue_add_mod_384:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + + DB 0F3h,0C3h ;repret + +$L$SEH_end_add_mod_384:: +add_mod_384 ENDP + + +ALIGN 32 +__add_mod_384 PROC PRIVATE + DB 243,15,30,250 + + mov r8,QWORD PTR[rsi] + mov r9,QWORD PTR[8+rsi] + mov r10,QWORD PTR[16+rsi] + mov r11,QWORD PTR[24+rsi] + mov r12,QWORD PTR[32+rsi] + mov r13,QWORD PTR[40+rsi] + +__add_mod_384_a_is_loaded:: + add r8,QWORD PTR[rdx] + adc r9,QWORD PTR[8+rdx] + adc r10,QWORD PTR[16+rdx] + mov r14,r8 + adc r11,QWORD PTR[24+rdx] + mov r15,r9 + adc r12,QWORD PTR[32+rdx] + mov rax,r10 + adc r13,QWORD PTR[40+rdx] + mov rbx,r11 + sbb rdx,rdx + + sub r8,QWORD PTR[rcx] + sbb r9,QWORD PTR[8+rcx] + mov rbp,r12 + sbb r10,QWORD PTR[16+rcx] + sbb r11,QWORD PTR[24+rcx] + sbb r12,QWORD PTR[32+rcx] + mov rsi,r13 + sbb r13,QWORD PTR[40+rcx] + sbb rdx,0 + + cmovc r8,r14 + cmovc r9,r15 + cmovc r10,rax + mov QWORD PTR[rdi],r8 + cmovc r11,rbx + mov QWORD PTR[8+rdi],r9 + cmovc r12,rbp + mov QWORD PTR[16+rdi],r10 + cmovc r13,rsi + mov QWORD PTR[24+rdi],r11 + mov QWORD PTR[32+rdi],r12 + mov QWORD PTR[40+rdi],r13 + + DB 0F3h,0C3h ;repret +__add_mod_384 ENDP + +PUBLIC add_mod_384x + + +ALIGN 32 +add_mod_384x PROC PUBLIC + DB 243,15,30,250 + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov r11,rsp +$L$SEH_begin_add_mod_384x:: + + + push rbp + + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + push rbx + + push r12 + + push r13 + + push r14 + + push r15 + + sub rsp,24 + +$L$SEH_body_add_mod_384x:: + + + mov QWORD PTR[rsp],rsi + mov QWORD PTR[8+rsp],rdx + lea rsi,QWORD PTR[48+rsi] + lea rdx,QWORD PTR[48+rdx] + lea rdi,QWORD PTR[48+rdi] + call __add_mod_384 + + mov rsi,QWORD PTR[rsp] + mov rdx,QWORD PTR[8+rsp] + lea rdi,QWORD PTR[((-48))+rdi] + call __add_mod_384 + + mov r15,QWORD PTR[((24+0))+rsp] + + mov r14,QWORD PTR[((24+8))+rsp] + + mov r13,QWORD PTR[((24+16))+rsp] + + mov r12,QWORD PTR[((24+24))+rsp] + + mov rbx,QWORD PTR[((24+32))+rsp] + + mov rbp,QWORD PTR[((24+40))+rsp] + + lea rsp,QWORD PTR[((24+48))+rsp] + +$L$SEH_epilogue_add_mod_384x:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + + DB 0F3h,0C3h ;repret + +$L$SEH_end_add_mod_384x:: +add_mod_384x ENDP + + +PUBLIC rshift_mod_384 + + +ALIGN 32 +rshift_mod_384 PROC PUBLIC + DB 243,15,30,250 + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov r11,rsp +$L$SEH_begin_rshift_mod_384:: + + + push rbp + + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + push rbx + + push r12 + + push r13 + + push r14 + + push r15 + + push rdi + +$L$SEH_body_rshift_mod_384:: + + + mov r8,QWORD PTR[rsi] + mov r9,QWORD PTR[8+rsi] + mov r10,QWORD PTR[16+rsi] + mov r11,QWORD PTR[24+rsi] + mov r12,QWORD PTR[32+rsi] + mov r13,QWORD PTR[40+rsi] + +$L$oop_rshift_mod_384:: + call __rshift_mod_384 + dec edx + jnz $L$oop_rshift_mod_384 + + mov QWORD PTR[rdi],r8 + mov QWORD PTR[8+rdi],r9 + mov QWORD PTR[16+rdi],r10 + mov QWORD PTR[24+rdi],r11 + mov QWORD PTR[32+rdi],r12 + mov QWORD PTR[40+rdi],r13 + + mov r15,QWORD PTR[8+rsp] + + mov r14,QWORD PTR[16+rsp] + + mov r13,QWORD PTR[24+rsp] + + mov r12,QWORD PTR[32+rsp] + + mov rbx,QWORD PTR[40+rsp] + + mov rbp,QWORD PTR[48+rsp] + + lea rsp,QWORD PTR[56+rsp] + +$L$SEH_epilogue_rshift_mod_384:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + + DB 0F3h,0C3h ;repret + +$L$SEH_end_rshift_mod_384:: +rshift_mod_384 ENDP + + +ALIGN 32 +__rshift_mod_384 PROC PRIVATE + DB 243,15,30,250 + + mov rsi,1 + mov r14,QWORD PTR[rcx] + and rsi,r8 + mov r15,QWORD PTR[8+rcx] + neg rsi + mov rax,QWORD PTR[16+rcx] + and r14,rsi + mov rbx,QWORD PTR[24+rcx] + and r15,rsi + mov rbp,QWORD PTR[32+rcx] + and rax,rsi + and rbx,rsi + and rbp,rsi + and rsi,QWORD PTR[40+rcx] + + add r14,r8 + adc r15,r9 + adc rax,r10 + adc rbx,r11 + adc rbp,r12 + adc rsi,r13 + sbb r13,r13 + + shr r14,1 + mov r8,r15 + shr r15,1 + mov r9,rax + shr rax,1 + mov r10,rbx + shr rbx,1 + mov r11,rbp + shr rbp,1 + mov r12,rsi + shr rsi,1 + shl r8,63 + shl r9,63 + or r8,r14 + shl r10,63 + or r9,r15 + shl r11,63 + or r10,rax + shl r12,63 + or r11,rbx + shl r13,63 + or r12,rbp + or r13,rsi + + DB 0F3h,0C3h ;repret +__rshift_mod_384 ENDP + +PUBLIC div_by_2_mod_384 + + +ALIGN 32 +div_by_2_mod_384 PROC PUBLIC + DB 243,15,30,250 + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov r11,rsp +$L$SEH_begin_div_by_2_mod_384:: + + + push rbp + + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + push rbx + + push r12 + + push r13 + + push r14 + + push r15 + + push rdi + +$L$SEH_body_div_by_2_mod_384:: + + + mov r8,QWORD PTR[rsi] + mov rcx,rdx + mov r9,QWORD PTR[8+rsi] + mov r10,QWORD PTR[16+rsi] + mov r11,QWORD PTR[24+rsi] + mov r12,QWORD PTR[32+rsi] + mov r13,QWORD PTR[40+rsi] + + call __rshift_mod_384 + + mov QWORD PTR[rdi],r8 + mov QWORD PTR[8+rdi],r9 + mov QWORD PTR[16+rdi],r10 + mov QWORD PTR[24+rdi],r11 + mov QWORD PTR[32+rdi],r12 + mov QWORD PTR[40+rdi],r13 + + mov r15,QWORD PTR[8+rsp] + + mov r14,QWORD PTR[16+rsp] + + mov r13,QWORD PTR[24+rsp] + + mov r12,QWORD PTR[32+rsp] + + mov rbx,QWORD PTR[40+rsp] + + mov rbp,QWORD PTR[48+rsp] + + lea rsp,QWORD PTR[56+rsp] + +$L$SEH_epilogue_div_by_2_mod_384:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + + DB 0F3h,0C3h ;repret + +$L$SEH_end_div_by_2_mod_384:: +div_by_2_mod_384 ENDP + + +PUBLIC lshift_mod_384 + + +ALIGN 32 +lshift_mod_384 PROC PUBLIC + DB 243,15,30,250 + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov r11,rsp +$L$SEH_begin_lshift_mod_384:: + + + push rbp + + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + push rbx + + push r12 + + push r13 + + push r14 + + push r15 + + push rdi + +$L$SEH_body_lshift_mod_384:: + + + mov r8,QWORD PTR[rsi] + mov r9,QWORD PTR[8+rsi] + mov r10,QWORD PTR[16+rsi] + mov r11,QWORD PTR[24+rsi] + mov r12,QWORD PTR[32+rsi] + mov r13,QWORD PTR[40+rsi] + +$L$oop_lshift_mod_384:: + add r8,r8 + adc r9,r9 + adc r10,r10 + mov r14,r8 + adc r11,r11 + mov r15,r9 + adc r12,r12 + mov rax,r10 + adc r13,r13 + mov rbx,r11 + sbb rdi,rdi + + sub r8,QWORD PTR[rcx] + sbb r9,QWORD PTR[8+rcx] + mov rbp,r12 + sbb r10,QWORD PTR[16+rcx] + sbb r11,QWORD PTR[24+rcx] + sbb r12,QWORD PTR[32+rcx] + mov rsi,r13 + sbb r13,QWORD PTR[40+rcx] + sbb rdi,0 + + mov rdi,QWORD PTR[rsp] + cmovc r8,r14 + cmovc r9,r15 + cmovc r10,rax + cmovc r11,rbx + cmovc r12,rbp + cmovc r13,rsi + + dec edx + jnz $L$oop_lshift_mod_384 + + mov QWORD PTR[rdi],r8 + mov QWORD PTR[8+rdi],r9 + mov QWORD PTR[16+rdi],r10 + mov QWORD PTR[24+rdi],r11 + mov QWORD PTR[32+rdi],r12 + mov QWORD PTR[40+rdi],r13 + + mov r15,QWORD PTR[8+rsp] + + mov r14,QWORD PTR[16+rsp] + + mov r13,QWORD PTR[24+rsp] + + mov r12,QWORD PTR[32+rsp] + + mov rbx,QWORD PTR[40+rsp] + + mov rbp,QWORD PTR[48+rsp] + + lea rsp,QWORD PTR[56+rsp] + +$L$SEH_epilogue_lshift_mod_384:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + + DB 0F3h,0C3h ;repret + +$L$SEH_end_lshift_mod_384:: +lshift_mod_384 ENDP + + +ALIGN 32 +__lshift_mod_384 PROC PRIVATE + DB 243,15,30,250 + + add r8,r8 + adc r9,r9 + adc r10,r10 + mov r14,r8 + adc r11,r11 + mov r15,r9 + adc r12,r12 + mov rax,r10 + adc r13,r13 + mov rbx,r11 + sbb rdx,rdx + + sub r8,QWORD PTR[rcx] + sbb r9,QWORD PTR[8+rcx] + mov rbp,r12 + sbb r10,QWORD PTR[16+rcx] + sbb r11,QWORD PTR[24+rcx] + sbb r12,QWORD PTR[32+rcx] + mov rsi,r13 + sbb r13,QWORD PTR[40+rcx] + sbb rdx,0 + + cmovc r8,r14 + cmovc r9,r15 + cmovc r10,rax + cmovc r11,rbx + cmovc r12,rbp + cmovc r13,rsi + + DB 0F3h,0C3h ;repret +__lshift_mod_384 ENDP + + +PUBLIC mul_by_3_mod_384 + + +ALIGN 32 +mul_by_3_mod_384 PROC PUBLIC + DB 243,15,30,250 + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov r11,rsp +$L$SEH_begin_mul_by_3_mod_384:: + + + push rbp + + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + push rbx + + push r12 + + push r13 + + push r14 + + push r15 + + push rsi + +$L$SEH_body_mul_by_3_mod_384:: + + + mov r8,QWORD PTR[rsi] + mov r9,QWORD PTR[8+rsi] + mov r10,QWORD PTR[16+rsi] + mov r11,QWORD PTR[24+rsi] + mov r12,QWORD PTR[32+rsi] + mov r13,QWORD PTR[40+rsi] + mov rcx,rdx + + call __lshift_mod_384 + + mov rdx,QWORD PTR[rsp] + call __add_mod_384_a_is_loaded + + mov r15,QWORD PTR[8+rsp] + + mov r14,QWORD PTR[16+rsp] + + mov r13,QWORD PTR[24+rsp] + + mov r12,QWORD PTR[32+rsp] + + mov rbx,QWORD PTR[40+rsp] + + mov rbp,QWORD PTR[48+rsp] + + lea rsp,QWORD PTR[56+rsp] + +$L$SEH_epilogue_mul_by_3_mod_384:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + + DB 0F3h,0C3h ;repret + +$L$SEH_end_mul_by_3_mod_384:: +mul_by_3_mod_384 ENDP + +PUBLIC mul_by_8_mod_384 + + +ALIGN 32 +mul_by_8_mod_384 PROC PUBLIC + DB 243,15,30,250 + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov r11,rsp +$L$SEH_begin_mul_by_8_mod_384:: + + + push rbp + + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + push rbx + + push r12 + + push r13 + + push r14 + + push r15 + + sub rsp,8 + +$L$SEH_body_mul_by_8_mod_384:: + + + mov r8,QWORD PTR[rsi] + mov r9,QWORD PTR[8+rsi] + mov r10,QWORD PTR[16+rsi] + mov r11,QWORD PTR[24+rsi] + mov r12,QWORD PTR[32+rsi] + mov r13,QWORD PTR[40+rsi] + mov rcx,rdx + + call __lshift_mod_384 + call __lshift_mod_384 + call __lshift_mod_384 + + mov QWORD PTR[rdi],r8 + mov QWORD PTR[8+rdi],r9 + mov QWORD PTR[16+rdi],r10 + mov QWORD PTR[24+rdi],r11 + mov QWORD PTR[32+rdi],r12 + mov QWORD PTR[40+rdi],r13 + + mov r15,QWORD PTR[8+rsp] + + mov r14,QWORD PTR[16+rsp] + + mov r13,QWORD PTR[24+rsp] + + mov r12,QWORD PTR[32+rsp] + + mov rbx,QWORD PTR[40+rsp] + + mov rbp,QWORD PTR[48+rsp] + + lea rsp,QWORD PTR[56+rsp] + +$L$SEH_epilogue_mul_by_8_mod_384:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + + DB 0F3h,0C3h ;repret + +$L$SEH_end_mul_by_8_mod_384:: +mul_by_8_mod_384 ENDP + + +PUBLIC mul_by_3_mod_384x + + +ALIGN 32 +mul_by_3_mod_384x PROC PUBLIC + DB 243,15,30,250 + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov r11,rsp +$L$SEH_begin_mul_by_3_mod_384x:: + + + push rbp + + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + push rbx + + push r12 + + push r13 + + push r14 + + push r15 + + push rsi + +$L$SEH_body_mul_by_3_mod_384x:: + + + mov r8,QWORD PTR[rsi] + mov r9,QWORD PTR[8+rsi] + mov r10,QWORD PTR[16+rsi] + mov r11,QWORD PTR[24+rsi] + mov r12,QWORD PTR[32+rsi] + mov r13,QWORD PTR[40+rsi] + mov rcx,rdx + + call __lshift_mod_384 + + mov rdx,QWORD PTR[rsp] + call __add_mod_384_a_is_loaded + + mov rsi,QWORD PTR[rsp] + lea rdi,QWORD PTR[48+rdi] + + mov r8,QWORD PTR[48+rsi] + mov r9,QWORD PTR[56+rsi] + mov r10,QWORD PTR[64+rsi] + mov r11,QWORD PTR[72+rsi] + mov r12,QWORD PTR[80+rsi] + mov r13,QWORD PTR[88+rsi] + + call __lshift_mod_384 + + mov rdx,8*6 + add rdx,QWORD PTR[rsp] + call __add_mod_384_a_is_loaded + + mov r15,QWORD PTR[8+rsp] + + mov r14,QWORD PTR[16+rsp] + + mov r13,QWORD PTR[24+rsp] + + mov r12,QWORD PTR[32+rsp] + + mov rbx,QWORD PTR[40+rsp] + + mov rbp,QWORD PTR[48+rsp] + + lea rsp,QWORD PTR[56+rsp] + +$L$SEH_epilogue_mul_by_3_mod_384x:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + + DB 0F3h,0C3h ;repret + +$L$SEH_end_mul_by_3_mod_384x:: +mul_by_3_mod_384x ENDP + +PUBLIC mul_by_8_mod_384x + + +ALIGN 32 +mul_by_8_mod_384x PROC PUBLIC + DB 243,15,30,250 + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov r11,rsp +$L$SEH_begin_mul_by_8_mod_384x:: + + + push rbp + + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + push rbx + + push r12 + + push r13 + + push r14 + + push r15 + + push rsi + +$L$SEH_body_mul_by_8_mod_384x:: + + + mov r8,QWORD PTR[rsi] + mov r9,QWORD PTR[8+rsi] + mov r10,QWORD PTR[16+rsi] + mov r11,QWORD PTR[24+rsi] + mov r12,QWORD PTR[32+rsi] + mov r13,QWORD PTR[40+rsi] + mov rcx,rdx + + call __lshift_mod_384 + call __lshift_mod_384 + call __lshift_mod_384 + + mov rsi,QWORD PTR[rsp] + mov QWORD PTR[rdi],r8 + mov QWORD PTR[8+rdi],r9 + mov QWORD PTR[16+rdi],r10 + mov QWORD PTR[24+rdi],r11 + mov QWORD PTR[32+rdi],r12 + mov QWORD PTR[40+rdi],r13 + + mov r8,QWORD PTR[((48+0))+rsi] + mov r9,QWORD PTR[((48+8))+rsi] + mov r10,QWORD PTR[((48+16))+rsi] + mov r11,QWORD PTR[((48+24))+rsi] + mov r12,QWORD PTR[((48+32))+rsi] + mov r13,QWORD PTR[((48+40))+rsi] + + call __lshift_mod_384 + call __lshift_mod_384 + call __lshift_mod_384 + + mov QWORD PTR[((48+0))+rdi],r8 + mov QWORD PTR[((48+8))+rdi],r9 + mov QWORD PTR[((48+16))+rdi],r10 + mov QWORD PTR[((48+24))+rdi],r11 + mov QWORD PTR[((48+32))+rdi],r12 + mov QWORD PTR[((48+40))+rdi],r13 + + mov r15,QWORD PTR[8+rsp] + + mov r14,QWORD PTR[16+rsp] + + mov r13,QWORD PTR[24+rsp] + + mov r12,QWORD PTR[32+rsp] + + mov rbx,QWORD PTR[40+rsp] + + mov rbp,QWORD PTR[48+rsp] + + lea rsp,QWORD PTR[56+rsp] + +$L$SEH_epilogue_mul_by_8_mod_384x:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + + DB 0F3h,0C3h ;repret + +$L$SEH_end_mul_by_8_mod_384x:: +mul_by_8_mod_384x ENDP + + +PUBLIC cneg_mod_384 + + +ALIGN 32 +cneg_mod_384 PROC PUBLIC + DB 243,15,30,250 + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov r11,rsp +$L$SEH_begin_cneg_mod_384:: + + + push rbp + + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + push rbx + + push r12 + + push r13 + + push r14 + + push r15 + + push rdx + +$L$SEH_body_cneg_mod_384:: + + + mov rdx,QWORD PTR[rsi] + mov r9,QWORD PTR[8+rsi] + mov r10,QWORD PTR[16+rsi] + mov r8,rdx + mov r11,QWORD PTR[24+rsi] + or rdx,r9 + mov r12,QWORD PTR[32+rsi] + or rdx,r10 + mov r13,QWORD PTR[40+rsi] + or rdx,r11 + mov rsi,-1 + or rdx,r12 + or rdx,r13 + + mov r14,QWORD PTR[rcx] + cmovnz rdx,rsi + mov r15,QWORD PTR[8+rcx] + mov rax,QWORD PTR[16+rcx] + and r14,rdx + mov rbx,QWORD PTR[24+rcx] + and r15,rdx + mov rbp,QWORD PTR[32+rcx] + and rax,rdx + mov rsi,QWORD PTR[40+rcx] + and rbx,rdx + mov rcx,QWORD PTR[rsp] + and rbp,rdx + and rsi,rdx + + sub r14,r8 + sbb r15,r9 + sbb rax,r10 + sbb rbx,r11 + sbb rbp,r12 + sbb rsi,r13 + + or rcx,rcx + + cmovz r14,r8 + cmovz r15,r9 + cmovz rax,r10 + mov QWORD PTR[rdi],r14 + cmovz rbx,r11 + mov QWORD PTR[8+rdi],r15 + cmovz rbp,r12 + mov QWORD PTR[16+rdi],rax + cmovz rsi,r13 + mov QWORD PTR[24+rdi],rbx + mov QWORD PTR[32+rdi],rbp + mov QWORD PTR[40+rdi],rsi + + mov r15,QWORD PTR[8+rsp] + + mov r14,QWORD PTR[16+rsp] + + mov r13,QWORD PTR[24+rsp] + + mov r12,QWORD PTR[32+rsp] + + mov rbx,QWORD PTR[40+rsp] + + mov rbp,QWORD PTR[48+rsp] + + lea rsp,QWORD PTR[56+rsp] + +$L$SEH_epilogue_cneg_mod_384:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + + DB 0F3h,0C3h ;repret + +$L$SEH_end_cneg_mod_384:: +cneg_mod_384 ENDP + + +PUBLIC sub_mod_384 + + +ALIGN 32 +sub_mod_384 PROC PUBLIC + DB 243,15,30,250 + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov r11,rsp +$L$SEH_begin_sub_mod_384:: + + + push rbp + + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + push rbx + + push r12 + + push r13 + + push r14 + + push r15 + + sub rsp,8 + +$L$SEH_body_sub_mod_384:: + + + call __sub_mod_384 + + mov r15,QWORD PTR[8+rsp] + + mov r14,QWORD PTR[16+rsp] + + mov r13,QWORD PTR[24+rsp] + + mov r12,QWORD PTR[32+rsp] + + mov rbx,QWORD PTR[40+rsp] + + mov rbp,QWORD PTR[48+rsp] + + lea rsp,QWORD PTR[56+rsp] + +$L$SEH_epilogue_sub_mod_384:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + + DB 0F3h,0C3h ;repret + +$L$SEH_end_sub_mod_384:: +sub_mod_384 ENDP + + +ALIGN 32 +__sub_mod_384 PROC PRIVATE + DB 243,15,30,250 + + mov r8,QWORD PTR[rsi] + mov r9,QWORD PTR[8+rsi] + mov r10,QWORD PTR[16+rsi] + mov r11,QWORD PTR[24+rsi] + mov r12,QWORD PTR[32+rsi] + mov r13,QWORD PTR[40+rsi] + + sub r8,QWORD PTR[rdx] + mov r14,QWORD PTR[rcx] + sbb r9,QWORD PTR[8+rdx] + mov r15,QWORD PTR[8+rcx] + sbb r10,QWORD PTR[16+rdx] + mov rax,QWORD PTR[16+rcx] + sbb r11,QWORD PTR[24+rdx] + mov rbx,QWORD PTR[24+rcx] + sbb r12,QWORD PTR[32+rdx] + mov rbp,QWORD PTR[32+rcx] + sbb r13,QWORD PTR[40+rdx] + mov rsi,QWORD PTR[40+rcx] + sbb rdx,rdx + + and r14,rdx + and r15,rdx + and rax,rdx + and rbx,rdx + and rbp,rdx + and rsi,rdx + + add r8,r14 + adc r9,r15 + mov QWORD PTR[rdi],r8 + adc r10,rax + mov QWORD PTR[8+rdi],r9 + adc r11,rbx + mov QWORD PTR[16+rdi],r10 + adc r12,rbp + mov QWORD PTR[24+rdi],r11 + adc r13,rsi + mov QWORD PTR[32+rdi],r12 + mov QWORD PTR[40+rdi],r13 + + DB 0F3h,0C3h ;repret +__sub_mod_384 ENDP + +PUBLIC sub_mod_384x + + +ALIGN 32 +sub_mod_384x PROC PUBLIC + DB 243,15,30,250 + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov r11,rsp +$L$SEH_begin_sub_mod_384x:: + + + push rbp + + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + push rbx + + push r12 + + push r13 + + push r14 + + push r15 + + sub rsp,24 + +$L$SEH_body_sub_mod_384x:: + + + mov QWORD PTR[rsp],rsi + mov QWORD PTR[8+rsp],rdx + lea rsi,QWORD PTR[48+rsi] + lea rdx,QWORD PTR[48+rdx] + lea rdi,QWORD PTR[48+rdi] + call __sub_mod_384 + + mov rsi,QWORD PTR[rsp] + mov rdx,QWORD PTR[8+rsp] + lea rdi,QWORD PTR[((-48))+rdi] + call __sub_mod_384 + + mov r15,QWORD PTR[((24+0))+rsp] + + mov r14,QWORD PTR[((24+8))+rsp] + + mov r13,QWORD PTR[((24+16))+rsp] + + mov r12,QWORD PTR[((24+24))+rsp] + + mov rbx,QWORD PTR[((24+32))+rsp] + + mov rbp,QWORD PTR[((24+40))+rsp] + + lea rsp,QWORD PTR[((24+48))+rsp] + +$L$SEH_epilogue_sub_mod_384x:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + + DB 0F3h,0C3h ;repret + +$L$SEH_end_sub_mod_384x:: +sub_mod_384x ENDP +PUBLIC mul_by_1_plus_i_mod_384x + + +ALIGN 32 +mul_by_1_plus_i_mod_384x PROC PUBLIC + DB 243,15,30,250 + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov r11,rsp +$L$SEH_begin_mul_by_1_plus_i_mod_384x:: + + + push rbp + + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + push rbx + + push r12 + + push r13 + + push r14 + + push r15 + + sub rsp,56 + +$L$SEH_body_mul_by_1_plus_i_mod_384x:: + + + mov r8,QWORD PTR[rsi] + mov r9,QWORD PTR[8+rsi] + mov r10,QWORD PTR[16+rsi] + mov r11,QWORD PTR[24+rsi] + mov r12,QWORD PTR[32+rsi] + mov r13,QWORD PTR[40+rsi] + + mov r14,r8 + add r8,QWORD PTR[48+rsi] + mov r15,r9 + adc r9,QWORD PTR[56+rsi] + mov rax,r10 + adc r10,QWORD PTR[64+rsi] + mov rbx,r11 + adc r11,QWORD PTR[72+rsi] + mov rcx,r12 + adc r12,QWORD PTR[80+rsi] + mov rbp,r13 + adc r13,QWORD PTR[88+rsi] + mov QWORD PTR[48+rsp],rdi + sbb rdi,rdi + + sub r14,QWORD PTR[48+rsi] + sbb r15,QWORD PTR[56+rsi] + sbb rax,QWORD PTR[64+rsi] + sbb rbx,QWORD PTR[72+rsi] + sbb rcx,QWORD PTR[80+rsi] + sbb rbp,QWORD PTR[88+rsi] + sbb rsi,rsi + + mov QWORD PTR[rsp],r8 + mov r8,QWORD PTR[rdx] + mov QWORD PTR[8+rsp],r9 + mov r9,QWORD PTR[8+rdx] + mov QWORD PTR[16+rsp],r10 + mov r10,QWORD PTR[16+rdx] + mov QWORD PTR[24+rsp],r11 + mov r11,QWORD PTR[24+rdx] + mov QWORD PTR[32+rsp],r12 + and r8,rsi + mov r12,QWORD PTR[32+rdx] + mov QWORD PTR[40+rsp],r13 + and r9,rsi + mov r13,QWORD PTR[40+rdx] + and r10,rsi + and r11,rsi + and r12,rsi + and r13,rsi + mov rsi,QWORD PTR[48+rsp] + + add r14,r8 + mov r8,QWORD PTR[rsp] + adc r15,r9 + mov r9,QWORD PTR[8+rsp] + adc rax,r10 + mov r10,QWORD PTR[16+rsp] + adc rbx,r11 + mov r11,QWORD PTR[24+rsp] + adc rcx,r12 + mov r12,QWORD PTR[32+rsp] + adc rbp,r13 + mov r13,QWORD PTR[40+rsp] + + mov QWORD PTR[rsi],r14 + mov r14,r8 + mov QWORD PTR[8+rsi],r15 + mov QWORD PTR[16+rsi],rax + mov r15,r9 + mov QWORD PTR[24+rsi],rbx + mov QWORD PTR[32+rsi],rcx + mov rax,r10 + mov QWORD PTR[40+rsi],rbp + + sub r8,QWORD PTR[rdx] + mov rbx,r11 + sbb r9,QWORD PTR[8+rdx] + sbb r10,QWORD PTR[16+rdx] + mov rcx,r12 + sbb r11,QWORD PTR[24+rdx] + sbb r12,QWORD PTR[32+rdx] + mov rbp,r13 + sbb r13,QWORD PTR[40+rdx] + sbb rdi,0 + + cmovc r8,r14 + cmovc r9,r15 + cmovc r10,rax + mov QWORD PTR[48+rsi],r8 + cmovc r11,rbx + mov QWORD PTR[56+rsi],r9 + cmovc r12,rcx + mov QWORD PTR[64+rsi],r10 + cmovc r13,rbp + mov QWORD PTR[72+rsi],r11 + mov QWORD PTR[80+rsi],r12 + mov QWORD PTR[88+rsi],r13 + + mov r15,QWORD PTR[((56+0))+rsp] + + mov r14,QWORD PTR[((56+8))+rsp] + + mov r13,QWORD PTR[((56+16))+rsp] + + mov r12,QWORD PTR[((56+24))+rsp] + + mov rbx,QWORD PTR[((56+32))+rsp] + + mov rbp,QWORD PTR[((56+40))+rsp] + + lea rsp,QWORD PTR[((56+48))+rsp] + +$L$SEH_epilogue_mul_by_1_plus_i_mod_384x:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + + DB 0F3h,0C3h ;repret + +$L$SEH_end_mul_by_1_plus_i_mod_384x:: +mul_by_1_plus_i_mod_384x ENDP +PUBLIC sgn0_pty_mod_384 + + +ALIGN 32 +sgn0_pty_mod_384 PROC PUBLIC + DB 243,15,30,250 + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov r11,rsp +$L$SEH_begin_sgn0_pty_mod_384:: + + + mov rdi,rcx + mov rsi,rdx +$L$SEH_body_sgn0_pty_mod_384:: + + mov r8,QWORD PTR[rdi] + mov r9,QWORD PTR[8+rdi] + mov r10,QWORD PTR[16+rdi] + mov r11,QWORD PTR[24+rdi] + mov rcx,QWORD PTR[32+rdi] + mov rdx,QWORD PTR[40+rdi] + + xor rax,rax + mov rdi,r8 + add r8,r8 + adc r9,r9 + adc r10,r10 + adc r11,r11 + adc rcx,rcx + adc rdx,rdx + adc rax,0 + + sub r8,QWORD PTR[rsi] + sbb r9,QWORD PTR[8+rsi] + sbb r10,QWORD PTR[16+rsi] + sbb r11,QWORD PTR[24+rsi] + sbb rcx,QWORD PTR[32+rsi] + sbb rdx,QWORD PTR[40+rsi] + sbb rax,0 + + not rax + and rdi,1 + and rax,2 + or rax,rdi + +$L$SEH_epilogue_sgn0_pty_mod_384:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + + DB 0F3h,0C3h ;repret + +$L$SEH_end_sgn0_pty_mod_384:: +sgn0_pty_mod_384 ENDP + +PUBLIC sgn0_pty_mod_384x + + +ALIGN 32 +sgn0_pty_mod_384x PROC PUBLIC + DB 243,15,30,250 + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov r11,rsp +$L$SEH_begin_sgn0_pty_mod_384x:: + + + push rbp + + mov rdi,rcx + mov rsi,rdx + push rbx + + sub rsp,8 + +$L$SEH_body_sgn0_pty_mod_384x:: + + + mov r8,QWORD PTR[48+rdi] + mov r9,QWORD PTR[56+rdi] + mov r10,QWORD PTR[64+rdi] + mov r11,QWORD PTR[72+rdi] + mov rcx,QWORD PTR[80+rdi] + mov rdx,QWORD PTR[88+rdi] + + mov rbx,r8 + or r8,r9 + or r8,r10 + or r8,r11 + or r8,rcx + or r8,rdx + + lea rax,QWORD PTR[rdi] + xor rdi,rdi + mov rbp,rbx + add rbx,rbx + adc r9,r9 + adc r10,r10 + adc r11,r11 + adc rcx,rcx + adc rdx,rdx + adc rdi,0 + + sub rbx,QWORD PTR[rsi] + sbb r9,QWORD PTR[8+rsi] + sbb r10,QWORD PTR[16+rsi] + sbb r11,QWORD PTR[24+rsi] + sbb rcx,QWORD PTR[32+rsi] + sbb rdx,QWORD PTR[40+rsi] + sbb rdi,0 + + mov QWORD PTR[rsp],r8 + not rdi + and rbp,1 + and rdi,2 + or rdi,rbp + + mov r8,QWORD PTR[rax] + mov r9,QWORD PTR[8+rax] + mov r10,QWORD PTR[16+rax] + mov r11,QWORD PTR[24+rax] + mov rcx,QWORD PTR[32+rax] + mov rdx,QWORD PTR[40+rax] + + mov rbx,r8 + or r8,r9 + or r8,r10 + or r8,r11 + or r8,rcx + or r8,rdx + + xor rax,rax + mov rbp,rbx + add rbx,rbx + adc r9,r9 + adc r10,r10 + adc r11,r11 + adc rcx,rcx + adc rdx,rdx + adc rax,0 + + sub rbx,QWORD PTR[rsi] + sbb r9,QWORD PTR[8+rsi] + sbb r10,QWORD PTR[16+rsi] + sbb r11,QWORD PTR[24+rsi] + sbb rcx,QWORD PTR[32+rsi] + sbb rdx,QWORD PTR[40+rsi] + sbb rax,0 + + mov rbx,QWORD PTR[rsp] + + not rax + + test r8,r8 + cmovz rbp,rdi + + test rbx,rbx + cmovnz rax,rdi + + and rbp,1 + and rax,2 + or rax,rbp + + mov rbx,QWORD PTR[8+rsp] + + mov rbp,QWORD PTR[16+rsp] + + lea rsp,QWORD PTR[24+rsp] + +$L$SEH_epilogue_sgn0_pty_mod_384x:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + + DB 0F3h,0C3h ;repret + +$L$SEH_end_sgn0_pty_mod_384x:: +sgn0_pty_mod_384x ENDP +PUBLIC vec_select_32 + + +ALIGN 32 +vec_select_32 PROC PUBLIC + DB 243,15,30,250 + + movd xmm5,r9d + pxor xmm4,xmm4 + pshufd xmm5,xmm5,0 + movdqu xmm0,XMMWORD PTR[rdx] + lea rdx,QWORD PTR[16+rdx] + pcmpeqd xmm5,xmm4 + movdqu xmm1,XMMWORD PTR[r8] + lea r8,QWORD PTR[16+r8] + pcmpeqd xmm4,xmm5 + lea rcx,QWORD PTR[16+rcx] + pand xmm0,xmm4 + movdqu xmm2,XMMWORD PTR[((0+16-16))+rdx] + pand xmm1,xmm5 + movdqu xmm3,XMMWORD PTR[((0+16-16))+r8] + por xmm0,xmm1 + movdqu XMMWORD PTR[(0-16)+rcx],xmm0 + pand xmm2,xmm4 + pand xmm3,xmm5 + por xmm2,xmm3 + movdqu XMMWORD PTR[(16-16)+rcx],xmm2 + DB 0F3h,0C3h ;repret +vec_select_32 ENDP +PUBLIC vec_select_48 + + +ALIGN 32 +vec_select_48 PROC PUBLIC + DB 243,15,30,250 + + movd xmm5,r9d + pxor xmm4,xmm4 + pshufd xmm5,xmm5,0 + movdqu xmm0,XMMWORD PTR[rdx] + lea rdx,QWORD PTR[24+rdx] + pcmpeqd xmm5,xmm4 + movdqu xmm1,XMMWORD PTR[r8] + lea r8,QWORD PTR[24+r8] + pcmpeqd xmm4,xmm5 + lea rcx,QWORD PTR[24+rcx] + pand xmm0,xmm4 + movdqu xmm2,XMMWORD PTR[((0+16-24))+rdx] + pand xmm1,xmm5 + movdqu xmm3,XMMWORD PTR[((0+16-24))+r8] + por xmm0,xmm1 + movdqu XMMWORD PTR[(0-24)+rcx],xmm0 + pand xmm2,xmm4 + movdqu xmm0,XMMWORD PTR[((16+16-24))+rdx] + pand xmm3,xmm5 + movdqu xmm1,XMMWORD PTR[((16+16-24))+r8] + por xmm2,xmm3 + movdqu XMMWORD PTR[(16-24)+rcx],xmm2 + pand xmm0,xmm4 + pand xmm1,xmm5 + por xmm0,xmm1 + movdqu XMMWORD PTR[(32-24)+rcx],xmm0 + DB 0F3h,0C3h ;repret +vec_select_48 ENDP +PUBLIC vec_select_96 + + +ALIGN 32 +vec_select_96 PROC PUBLIC + DB 243,15,30,250 + + movd xmm5,r9d + pxor xmm4,xmm4 + pshufd xmm5,xmm5,0 + movdqu xmm0,XMMWORD PTR[rdx] + lea rdx,QWORD PTR[48+rdx] + pcmpeqd xmm5,xmm4 + movdqu xmm1,XMMWORD PTR[r8] + lea r8,QWORD PTR[48+r8] + pcmpeqd xmm4,xmm5 + lea rcx,QWORD PTR[48+rcx] + pand xmm0,xmm4 + movdqu xmm2,XMMWORD PTR[((0+16-48))+rdx] + pand xmm1,xmm5 + movdqu xmm3,XMMWORD PTR[((0+16-48))+r8] + por xmm0,xmm1 + movdqu XMMWORD PTR[(0-48)+rcx],xmm0 + pand xmm2,xmm4 + movdqu xmm0,XMMWORD PTR[((16+16-48))+rdx] + pand xmm3,xmm5 + movdqu xmm1,XMMWORD PTR[((16+16-48))+r8] + por xmm2,xmm3 + movdqu XMMWORD PTR[(16-48)+rcx],xmm2 + pand xmm0,xmm4 + movdqu xmm2,XMMWORD PTR[((32+16-48))+rdx] + pand xmm1,xmm5 + movdqu xmm3,XMMWORD PTR[((32+16-48))+r8] + por xmm0,xmm1 + movdqu XMMWORD PTR[(32-48)+rcx],xmm0 + pand xmm2,xmm4 + movdqu xmm0,XMMWORD PTR[((48+16-48))+rdx] + pand xmm3,xmm5 + movdqu xmm1,XMMWORD PTR[((48+16-48))+r8] + por xmm2,xmm3 + movdqu XMMWORD PTR[(48-48)+rcx],xmm2 + pand xmm0,xmm4 + movdqu xmm2,XMMWORD PTR[((64+16-48))+rdx] + pand xmm1,xmm5 + movdqu xmm3,XMMWORD PTR[((64+16-48))+r8] + por xmm0,xmm1 + movdqu XMMWORD PTR[(64-48)+rcx],xmm0 + pand xmm2,xmm4 + pand xmm3,xmm5 + por xmm2,xmm3 + movdqu XMMWORD PTR[(80-48)+rcx],xmm2 + DB 0F3h,0C3h ;repret +vec_select_96 ENDP +PUBLIC vec_select_192 + + +ALIGN 32 +vec_select_192 PROC PUBLIC + DB 243,15,30,250 + + movd xmm5,r9d + pxor xmm4,xmm4 + pshufd xmm5,xmm5,0 + movdqu xmm0,XMMWORD PTR[rdx] + lea rdx,QWORD PTR[96+rdx] + pcmpeqd xmm5,xmm4 + movdqu xmm1,XMMWORD PTR[r8] + lea r8,QWORD PTR[96+r8] + pcmpeqd xmm4,xmm5 + lea rcx,QWORD PTR[96+rcx] + pand xmm0,xmm4 + movdqu xmm2,XMMWORD PTR[((0+16-96))+rdx] + pand xmm1,xmm5 + movdqu xmm3,XMMWORD PTR[((0+16-96))+r8] + por xmm0,xmm1 + movdqu XMMWORD PTR[(0-96)+rcx],xmm0 + pand xmm2,xmm4 + movdqu xmm0,XMMWORD PTR[((16+16-96))+rdx] + pand xmm3,xmm5 + movdqu xmm1,XMMWORD PTR[((16+16-96))+r8] + por xmm2,xmm3 + movdqu XMMWORD PTR[(16-96)+rcx],xmm2 + pand xmm0,xmm4 + movdqu xmm2,XMMWORD PTR[((32+16-96))+rdx] + pand xmm1,xmm5 + movdqu xmm3,XMMWORD PTR[((32+16-96))+r8] + por xmm0,xmm1 + movdqu XMMWORD PTR[(32-96)+rcx],xmm0 + pand xmm2,xmm4 + movdqu xmm0,XMMWORD PTR[((48+16-96))+rdx] + pand xmm3,xmm5 + movdqu xmm1,XMMWORD PTR[((48+16-96))+r8] + por xmm2,xmm3 + movdqu XMMWORD PTR[(48-96)+rcx],xmm2 + pand xmm0,xmm4 + movdqu xmm2,XMMWORD PTR[((64+16-96))+rdx] + pand xmm1,xmm5 + movdqu xmm3,XMMWORD PTR[((64+16-96))+r8] + por xmm0,xmm1 + movdqu XMMWORD PTR[(64-96)+rcx],xmm0 + pand xmm2,xmm4 + movdqu xmm0,XMMWORD PTR[((80+16-96))+rdx] + pand xmm3,xmm5 + movdqu xmm1,XMMWORD PTR[((80+16-96))+r8] + por xmm2,xmm3 + movdqu XMMWORD PTR[(80-96)+rcx],xmm2 + pand xmm0,xmm4 + movdqu xmm2,XMMWORD PTR[((96+16-96))+rdx] + pand xmm1,xmm5 + movdqu xmm3,XMMWORD PTR[((96+16-96))+r8] + por xmm0,xmm1 + movdqu XMMWORD PTR[(96-96)+rcx],xmm0 + pand xmm2,xmm4 + movdqu xmm0,XMMWORD PTR[((112+16-96))+rdx] + pand xmm3,xmm5 + movdqu xmm1,XMMWORD PTR[((112+16-96))+r8] + por xmm2,xmm3 + movdqu XMMWORD PTR[(112-96)+rcx],xmm2 + pand xmm0,xmm4 + movdqu xmm2,XMMWORD PTR[((128+16-96))+rdx] + pand xmm1,xmm5 + movdqu xmm3,XMMWORD PTR[((128+16-96))+r8] + por xmm0,xmm1 + movdqu XMMWORD PTR[(128-96)+rcx],xmm0 + pand xmm2,xmm4 + movdqu xmm0,XMMWORD PTR[((144+16-96))+rdx] + pand xmm3,xmm5 + movdqu xmm1,XMMWORD PTR[((144+16-96))+r8] + por xmm2,xmm3 + movdqu XMMWORD PTR[(144-96)+rcx],xmm2 + pand xmm0,xmm4 + movdqu xmm2,XMMWORD PTR[((160+16-96))+rdx] + pand xmm1,xmm5 + movdqu xmm3,XMMWORD PTR[((160+16-96))+r8] + por xmm0,xmm1 + movdqu XMMWORD PTR[(160-96)+rcx],xmm0 + pand xmm2,xmm4 + pand xmm3,xmm5 + por xmm2,xmm3 + movdqu XMMWORD PTR[(176-96)+rcx],xmm2 + DB 0F3h,0C3h ;repret +vec_select_192 ENDP +PUBLIC vec_select_144 + + +ALIGN 32 +vec_select_144 PROC PUBLIC + DB 243,15,30,250 + + movd xmm5,r9d + pxor xmm4,xmm4 + pshufd xmm5,xmm5,0 + movdqu xmm0,XMMWORD PTR[rdx] + lea rdx,QWORD PTR[72+rdx] + pcmpeqd xmm5,xmm4 + movdqu xmm1,XMMWORD PTR[r8] + lea r8,QWORD PTR[72+r8] + pcmpeqd xmm4,xmm5 + lea rcx,QWORD PTR[72+rcx] + pand xmm0,xmm4 + movdqu xmm2,XMMWORD PTR[((0+16-72))+rdx] + pand xmm1,xmm5 + movdqu xmm3,XMMWORD PTR[((0+16-72))+r8] + por xmm0,xmm1 + movdqu XMMWORD PTR[(0-72)+rcx],xmm0 + pand xmm2,xmm4 + movdqu xmm0,XMMWORD PTR[((16+16-72))+rdx] + pand xmm3,xmm5 + movdqu xmm1,XMMWORD PTR[((16+16-72))+r8] + por xmm2,xmm3 + movdqu XMMWORD PTR[(16-72)+rcx],xmm2 + pand xmm0,xmm4 + movdqu xmm2,XMMWORD PTR[((32+16-72))+rdx] + pand xmm1,xmm5 + movdqu xmm3,XMMWORD PTR[((32+16-72))+r8] + por xmm0,xmm1 + movdqu XMMWORD PTR[(32-72)+rcx],xmm0 + pand xmm2,xmm4 + movdqu xmm0,XMMWORD PTR[((48+16-72))+rdx] + pand xmm3,xmm5 + movdqu xmm1,XMMWORD PTR[((48+16-72))+r8] + por xmm2,xmm3 + movdqu XMMWORD PTR[(48-72)+rcx],xmm2 + pand xmm0,xmm4 + movdqu xmm2,XMMWORD PTR[((64+16-72))+rdx] + pand xmm1,xmm5 + movdqu xmm3,XMMWORD PTR[((64+16-72))+r8] + por xmm0,xmm1 + movdqu XMMWORD PTR[(64-72)+rcx],xmm0 + pand xmm2,xmm4 + movdqu xmm0,XMMWORD PTR[((80+16-72))+rdx] + pand xmm3,xmm5 + movdqu xmm1,XMMWORD PTR[((80+16-72))+r8] + por xmm2,xmm3 + movdqu XMMWORD PTR[(80-72)+rcx],xmm2 + pand xmm0,xmm4 + movdqu xmm2,XMMWORD PTR[((96+16-72))+rdx] + pand xmm1,xmm5 + movdqu xmm3,XMMWORD PTR[((96+16-72))+r8] + por xmm0,xmm1 + movdqu XMMWORD PTR[(96-72)+rcx],xmm0 + pand xmm2,xmm4 + movdqu xmm0,XMMWORD PTR[((112+16-72))+rdx] + pand xmm3,xmm5 + movdqu xmm1,XMMWORD PTR[((112+16-72))+r8] + por xmm2,xmm3 + movdqu XMMWORD PTR[(112-72)+rcx],xmm2 + pand xmm0,xmm4 + pand xmm1,xmm5 + por xmm0,xmm1 + movdqu XMMWORD PTR[(128-72)+rcx],xmm0 + DB 0F3h,0C3h ;repret +vec_select_144 ENDP +PUBLIC vec_select_288 + + +ALIGN 32 +vec_select_288 PROC PUBLIC + DB 243,15,30,250 + + movd xmm5,r9d + pxor xmm4,xmm4 + pshufd xmm5,xmm5,0 + movdqu xmm0,XMMWORD PTR[rdx] + lea rdx,QWORD PTR[144+rdx] + pcmpeqd xmm5,xmm4 + movdqu xmm1,XMMWORD PTR[r8] + lea r8,QWORD PTR[144+r8] + pcmpeqd xmm4,xmm5 + lea rcx,QWORD PTR[144+rcx] + pand xmm0,xmm4 + movdqu xmm2,XMMWORD PTR[((0+16-144))+rdx] + pand xmm1,xmm5 + movdqu xmm3,XMMWORD PTR[((0+16-144))+r8] + por xmm0,xmm1 + movdqu XMMWORD PTR[(0-144)+rcx],xmm0 + pand xmm2,xmm4 + movdqu xmm0,XMMWORD PTR[((16+16-144))+rdx] + pand xmm3,xmm5 + movdqu xmm1,XMMWORD PTR[((16+16-144))+r8] + por xmm2,xmm3 + movdqu XMMWORD PTR[(16-144)+rcx],xmm2 + pand xmm0,xmm4 + movdqu xmm2,XMMWORD PTR[((32+16-144))+rdx] + pand xmm1,xmm5 + movdqu xmm3,XMMWORD PTR[((32+16-144))+r8] + por xmm0,xmm1 + movdqu XMMWORD PTR[(32-144)+rcx],xmm0 + pand xmm2,xmm4 + movdqu xmm0,XMMWORD PTR[((48+16-144))+rdx] + pand xmm3,xmm5 + movdqu xmm1,XMMWORD PTR[((48+16-144))+r8] + por xmm2,xmm3 + movdqu XMMWORD PTR[(48-144)+rcx],xmm2 + pand xmm0,xmm4 + movdqu xmm2,XMMWORD PTR[((64+16-144))+rdx] + pand xmm1,xmm5 + movdqu xmm3,XMMWORD PTR[((64+16-144))+r8] + por xmm0,xmm1 + movdqu XMMWORD PTR[(64-144)+rcx],xmm0 + pand xmm2,xmm4 + movdqu xmm0,XMMWORD PTR[((80+16-144))+rdx] + pand xmm3,xmm5 + movdqu xmm1,XMMWORD PTR[((80+16-144))+r8] + por xmm2,xmm3 + movdqu XMMWORD PTR[(80-144)+rcx],xmm2 + pand xmm0,xmm4 + movdqu xmm2,XMMWORD PTR[((96+16-144))+rdx] + pand xmm1,xmm5 + movdqu xmm3,XMMWORD PTR[((96+16-144))+r8] + por xmm0,xmm1 + movdqu XMMWORD PTR[(96-144)+rcx],xmm0 + pand xmm2,xmm4 + movdqu xmm0,XMMWORD PTR[((112+16-144))+rdx] + pand xmm3,xmm5 + movdqu xmm1,XMMWORD PTR[((112+16-144))+r8] + por xmm2,xmm3 + movdqu XMMWORD PTR[(112-144)+rcx],xmm2 + pand xmm0,xmm4 + movdqu xmm2,XMMWORD PTR[((128+16-144))+rdx] + pand xmm1,xmm5 + movdqu xmm3,XMMWORD PTR[((128+16-144))+r8] + por xmm0,xmm1 + movdqu XMMWORD PTR[(128-144)+rcx],xmm0 + pand xmm2,xmm4 + movdqu xmm0,XMMWORD PTR[((144+16-144))+rdx] + pand xmm3,xmm5 + movdqu xmm1,XMMWORD PTR[((144+16-144))+r8] + por xmm2,xmm3 + movdqu XMMWORD PTR[(144-144)+rcx],xmm2 + pand xmm0,xmm4 + movdqu xmm2,XMMWORD PTR[((160+16-144))+rdx] + pand xmm1,xmm5 + movdqu xmm3,XMMWORD PTR[((160+16-144))+r8] + por xmm0,xmm1 + movdqu XMMWORD PTR[(160-144)+rcx],xmm0 + pand xmm2,xmm4 + movdqu xmm0,XMMWORD PTR[((176+16-144))+rdx] + pand xmm3,xmm5 + movdqu xmm1,XMMWORD PTR[((176+16-144))+r8] + por xmm2,xmm3 + movdqu XMMWORD PTR[(176-144)+rcx],xmm2 + pand xmm0,xmm4 + movdqu xmm2,XMMWORD PTR[((192+16-144))+rdx] + pand xmm1,xmm5 + movdqu xmm3,XMMWORD PTR[((192+16-144))+r8] + por xmm0,xmm1 + movdqu XMMWORD PTR[(192-144)+rcx],xmm0 + pand xmm2,xmm4 + movdqu xmm0,XMMWORD PTR[((208+16-144))+rdx] + pand xmm3,xmm5 + movdqu xmm1,XMMWORD PTR[((208+16-144))+r8] + por xmm2,xmm3 + movdqu XMMWORD PTR[(208-144)+rcx],xmm2 + pand xmm0,xmm4 + movdqu xmm2,XMMWORD PTR[((224+16-144))+rdx] + pand xmm1,xmm5 + movdqu xmm3,XMMWORD PTR[((224+16-144))+r8] + por xmm0,xmm1 + movdqu XMMWORD PTR[(224-144)+rcx],xmm0 + pand xmm2,xmm4 + movdqu xmm0,XMMWORD PTR[((240+16-144))+rdx] + pand xmm3,xmm5 + movdqu xmm1,XMMWORD PTR[((240+16-144))+r8] + por xmm2,xmm3 + movdqu XMMWORD PTR[(240-144)+rcx],xmm2 + pand xmm0,xmm4 + movdqu xmm2,XMMWORD PTR[((256+16-144))+rdx] + pand xmm1,xmm5 + movdqu xmm3,XMMWORD PTR[((256+16-144))+r8] + por xmm0,xmm1 + movdqu XMMWORD PTR[(256-144)+rcx],xmm0 + pand xmm2,xmm4 + pand xmm3,xmm5 + por xmm2,xmm3 + movdqu XMMWORD PTR[(272-144)+rcx],xmm2 + DB 0F3h,0C3h ;repret +vec_select_288 ENDP +PUBLIC vec_prefetch + + +ALIGN 32 +vec_prefetch PROC PUBLIC + DB 243,15,30,250 + + lea rdx,QWORD PTR[((-1))+rdx*1+rcx] + mov rax,64 + xor r8,r8 + prefetchnta [rcx] + lea rcx,QWORD PTR[rax*1+rcx] + cmp rcx,rdx + cmova rcx,rdx + cmova rax,r8 + prefetchnta [rcx] + lea rcx,QWORD PTR[rax*1+rcx] + cmp rcx,rdx + cmova rcx,rdx + cmova rax,r8 + prefetchnta [rcx] + lea rcx,QWORD PTR[rax*1+rcx] + cmp rcx,rdx + cmova rcx,rdx + cmova rax,r8 + prefetchnta [rcx] + lea rcx,QWORD PTR[rax*1+rcx] + cmp rcx,rdx + cmova rcx,rdx + cmova rax,r8 + prefetchnta [rcx] + lea rcx,QWORD PTR[rax*1+rcx] + cmp rcx,rdx + cmova rcx,rdx + cmova rax,r8 + prefetchnta [rcx] + lea rcx,QWORD PTR[rax*1+rcx] + cmp rcx,rdx + cmova rcx,rdx + prefetchnta [rcx] + DB 0F3h,0C3h ;repret +vec_prefetch ENDP +PUBLIC vec_is_zero_16x + + +ALIGN 32 +vec_is_zero_16x PROC PUBLIC + DB 243,15,30,250 + + shr edx,4 + movdqu xmm0,XMMWORD PTR[rcx] + lea rcx,QWORD PTR[16+rcx] + +$L$oop_is_zero:: + dec edx + jz $L$oop_is_zero_done + movdqu xmm1,XMMWORD PTR[rcx] + lea rcx,QWORD PTR[16+rcx] + por xmm0,xmm1 + jmp $L$oop_is_zero + +$L$oop_is_zero_done:: + pshufd xmm1,xmm0,04eh + por xmm0,xmm1 +DB 102,72,15,126,192 + inc edx + test rax,rax + cmovnz eax,edx + xor eax,1 + DB 0F3h,0C3h ;repret +vec_is_zero_16x ENDP +PUBLIC vec_is_equal_16x + + +ALIGN 32 +vec_is_equal_16x PROC PUBLIC + DB 243,15,30,250 + + shr r8d,4 + movdqu xmm0,XMMWORD PTR[rcx] + movdqu xmm1,XMMWORD PTR[rdx] + sub rdx,rcx + lea rcx,QWORD PTR[16+rcx] + pxor xmm0,xmm1 + +$L$oop_is_equal:: + dec r8d + jz $L$oop_is_equal_done + movdqu xmm1,XMMWORD PTR[rcx] + movdqu xmm2,XMMWORD PTR[rdx*1+rcx] + lea rcx,QWORD PTR[16+rcx] + pxor xmm1,xmm2 + por xmm0,xmm1 + jmp $L$oop_is_equal + +$L$oop_is_equal_done:: + pshufd xmm1,xmm0,04eh + por xmm0,xmm1 +DB 102,72,15,126,192 + inc r8d + test rax,rax + cmovnz eax,r8d + xor eax,1 + DB 0F3h,0C3h ;repret +vec_is_equal_16x ENDP +.text$ ENDS +.pdata SEGMENT READONLY ALIGN(4) +ALIGN 4 + DD imagerel $L$SEH_begin_add_mod_384 + DD imagerel $L$SEH_body_add_mod_384 + DD imagerel $L$SEH_info_add_mod_384_prologue + + DD imagerel $L$SEH_body_add_mod_384 + DD imagerel $L$SEH_epilogue_add_mod_384 + DD imagerel $L$SEH_info_add_mod_384_body + + DD imagerel $L$SEH_epilogue_add_mod_384 + DD imagerel $L$SEH_end_add_mod_384 + DD imagerel $L$SEH_info_add_mod_384_epilogue + + DD imagerel $L$SEH_begin_add_mod_384x + DD imagerel $L$SEH_body_add_mod_384x + DD imagerel $L$SEH_info_add_mod_384x_prologue + + DD imagerel $L$SEH_body_add_mod_384x + DD imagerel $L$SEH_epilogue_add_mod_384x + DD imagerel $L$SEH_info_add_mod_384x_body + + DD imagerel $L$SEH_epilogue_add_mod_384x + DD imagerel $L$SEH_end_add_mod_384x + DD imagerel $L$SEH_info_add_mod_384x_epilogue + + DD imagerel $L$SEH_begin_rshift_mod_384 + DD imagerel $L$SEH_body_rshift_mod_384 + DD imagerel $L$SEH_info_rshift_mod_384_prologue + + DD imagerel $L$SEH_body_rshift_mod_384 + DD imagerel $L$SEH_epilogue_rshift_mod_384 + DD imagerel $L$SEH_info_rshift_mod_384_body + + DD imagerel $L$SEH_epilogue_rshift_mod_384 + DD imagerel $L$SEH_end_rshift_mod_384 + DD imagerel $L$SEH_info_rshift_mod_384_epilogue + + DD imagerel $L$SEH_begin_div_by_2_mod_384 + DD imagerel $L$SEH_body_div_by_2_mod_384 + DD imagerel $L$SEH_info_div_by_2_mod_384_prologue + + DD imagerel $L$SEH_body_div_by_2_mod_384 + DD imagerel $L$SEH_epilogue_div_by_2_mod_384 + DD imagerel $L$SEH_info_div_by_2_mod_384_body + + DD imagerel $L$SEH_epilogue_div_by_2_mod_384 + DD imagerel $L$SEH_end_div_by_2_mod_384 + DD imagerel $L$SEH_info_div_by_2_mod_384_epilogue + + DD imagerel $L$SEH_begin_lshift_mod_384 + DD imagerel $L$SEH_body_lshift_mod_384 + DD imagerel $L$SEH_info_lshift_mod_384_prologue + + DD imagerel $L$SEH_body_lshift_mod_384 + DD imagerel $L$SEH_epilogue_lshift_mod_384 + DD imagerel $L$SEH_info_lshift_mod_384_body + + DD imagerel $L$SEH_epilogue_lshift_mod_384 + DD imagerel $L$SEH_end_lshift_mod_384 + DD imagerel $L$SEH_info_lshift_mod_384_epilogue + + DD imagerel $L$SEH_begin_mul_by_3_mod_384 + DD imagerel $L$SEH_body_mul_by_3_mod_384 + DD imagerel $L$SEH_info_mul_by_3_mod_384_prologue + + DD imagerel $L$SEH_body_mul_by_3_mod_384 + DD imagerel $L$SEH_epilogue_mul_by_3_mod_384 + DD imagerel $L$SEH_info_mul_by_3_mod_384_body + + DD imagerel $L$SEH_epilogue_mul_by_3_mod_384 + DD imagerel $L$SEH_end_mul_by_3_mod_384 + DD imagerel $L$SEH_info_mul_by_3_mod_384_epilogue + + DD imagerel $L$SEH_begin_mul_by_8_mod_384 + DD imagerel $L$SEH_body_mul_by_8_mod_384 + DD imagerel $L$SEH_info_mul_by_8_mod_384_prologue + + DD imagerel $L$SEH_body_mul_by_8_mod_384 + DD imagerel $L$SEH_epilogue_mul_by_8_mod_384 + DD imagerel $L$SEH_info_mul_by_8_mod_384_body + + DD imagerel $L$SEH_epilogue_mul_by_8_mod_384 + DD imagerel $L$SEH_end_mul_by_8_mod_384 + DD imagerel $L$SEH_info_mul_by_8_mod_384_epilogue + + DD imagerel $L$SEH_begin_mul_by_3_mod_384x + DD imagerel $L$SEH_body_mul_by_3_mod_384x + DD imagerel $L$SEH_info_mul_by_3_mod_384x_prologue + + DD imagerel $L$SEH_body_mul_by_3_mod_384x + DD imagerel $L$SEH_epilogue_mul_by_3_mod_384x + DD imagerel $L$SEH_info_mul_by_3_mod_384x_body + + DD imagerel $L$SEH_epilogue_mul_by_3_mod_384x + DD imagerel $L$SEH_end_mul_by_3_mod_384x + DD imagerel $L$SEH_info_mul_by_3_mod_384x_epilogue + + DD imagerel $L$SEH_begin_mul_by_8_mod_384x + DD imagerel $L$SEH_body_mul_by_8_mod_384x + DD imagerel $L$SEH_info_mul_by_8_mod_384x_prologue + + DD imagerel $L$SEH_body_mul_by_8_mod_384x + DD imagerel $L$SEH_epilogue_mul_by_8_mod_384x + DD imagerel $L$SEH_info_mul_by_8_mod_384x_body + + DD imagerel $L$SEH_epilogue_mul_by_8_mod_384x + DD imagerel $L$SEH_end_mul_by_8_mod_384x + DD imagerel $L$SEH_info_mul_by_8_mod_384x_epilogue + + DD imagerel $L$SEH_begin_cneg_mod_384 + DD imagerel $L$SEH_body_cneg_mod_384 + DD imagerel $L$SEH_info_cneg_mod_384_prologue + + DD imagerel $L$SEH_body_cneg_mod_384 + DD imagerel $L$SEH_epilogue_cneg_mod_384 + DD imagerel $L$SEH_info_cneg_mod_384_body + + DD imagerel $L$SEH_epilogue_cneg_mod_384 + DD imagerel $L$SEH_end_cneg_mod_384 + DD imagerel $L$SEH_info_cneg_mod_384_epilogue + + DD imagerel $L$SEH_begin_sub_mod_384 + DD imagerel $L$SEH_body_sub_mod_384 + DD imagerel $L$SEH_info_sub_mod_384_prologue + + DD imagerel $L$SEH_body_sub_mod_384 + DD imagerel $L$SEH_epilogue_sub_mod_384 + DD imagerel $L$SEH_info_sub_mod_384_body + + DD imagerel $L$SEH_epilogue_sub_mod_384 + DD imagerel $L$SEH_end_sub_mod_384 + DD imagerel $L$SEH_info_sub_mod_384_epilogue + + DD imagerel $L$SEH_begin_sub_mod_384x + DD imagerel $L$SEH_body_sub_mod_384x + DD imagerel $L$SEH_info_sub_mod_384x_prologue + + DD imagerel $L$SEH_body_sub_mod_384x + DD imagerel $L$SEH_epilogue_sub_mod_384x + DD imagerel $L$SEH_info_sub_mod_384x_body + + DD imagerel $L$SEH_epilogue_sub_mod_384x + DD imagerel $L$SEH_end_sub_mod_384x + DD imagerel $L$SEH_info_sub_mod_384x_epilogue + + DD imagerel $L$SEH_begin_mul_by_1_plus_i_mod_384x + DD imagerel $L$SEH_body_mul_by_1_plus_i_mod_384x + DD imagerel $L$SEH_info_mul_by_1_plus_i_mod_384x_prologue + + DD imagerel $L$SEH_body_mul_by_1_plus_i_mod_384x + DD imagerel $L$SEH_epilogue_mul_by_1_plus_i_mod_384x + DD imagerel $L$SEH_info_mul_by_1_plus_i_mod_384x_body + + DD imagerel $L$SEH_epilogue_mul_by_1_plus_i_mod_384x + DD imagerel $L$SEH_end_mul_by_1_plus_i_mod_384x + DD imagerel $L$SEH_info_mul_by_1_plus_i_mod_384x_epilogue + + DD imagerel $L$SEH_begin_sgn0_pty_mod_384 + DD imagerel $L$SEH_body_sgn0_pty_mod_384 + DD imagerel $L$SEH_info_sgn0_pty_mod_384_prologue + + DD imagerel $L$SEH_body_sgn0_pty_mod_384 + DD imagerel $L$SEH_epilogue_sgn0_pty_mod_384 + DD imagerel $L$SEH_info_sgn0_pty_mod_384_body + + DD imagerel $L$SEH_epilogue_sgn0_pty_mod_384 + DD imagerel $L$SEH_end_sgn0_pty_mod_384 + DD imagerel $L$SEH_info_sgn0_pty_mod_384_epilogue + + DD imagerel $L$SEH_begin_sgn0_pty_mod_384x + DD imagerel $L$SEH_body_sgn0_pty_mod_384x + DD imagerel $L$SEH_info_sgn0_pty_mod_384x_prologue + + DD imagerel $L$SEH_body_sgn0_pty_mod_384x + DD imagerel $L$SEH_epilogue_sgn0_pty_mod_384x + DD imagerel $L$SEH_info_sgn0_pty_mod_384x_body + + DD imagerel $L$SEH_epilogue_sgn0_pty_mod_384x + DD imagerel $L$SEH_end_sgn0_pty_mod_384x + DD imagerel $L$SEH_info_sgn0_pty_mod_384x_epilogue + +.pdata ENDS +.xdata SEGMENT READONLY ALIGN(8) +ALIGN 8 +$L$SEH_info_add_mod_384_prologue:: +DB 1,0,5,00bh +DB 0,074h,1,0 +DB 0,064h,2,0 +DB 0,0b3h +DB 0,0 + DD 0,0 +$L$SEH_info_add_mod_384_body:: +DB 1,0,17,0 +DB 000h,0f4h,001h,000h +DB 000h,0e4h,002h,000h +DB 000h,0d4h,003h,000h +DB 000h,0c4h,004h,000h +DB 000h,034h,005h,000h +DB 000h,054h,006h,000h +DB 000h,074h,008h,000h +DB 000h,064h,009h,000h +DB 000h,062h +DB 000h,000h,000h,000h,000h,000h +DB 000h,000h,000h,000h +$L$SEH_info_add_mod_384_epilogue:: +DB 1,0,4,0 +DB 000h,074h,001h,000h +DB 000h,064h,002h,000h +DB 000h,000h,000h,000h + +$L$SEH_info_add_mod_384x_prologue:: +DB 1,0,5,00bh +DB 0,074h,1,0 +DB 0,064h,2,0 +DB 0,0b3h +DB 0,0 + DD 0,0 +$L$SEH_info_add_mod_384x_body:: +DB 1,0,17,0 +DB 000h,0f4h,003h,000h +DB 000h,0e4h,004h,000h +DB 000h,0d4h,005h,000h +DB 000h,0c4h,006h,000h +DB 000h,034h,007h,000h +DB 000h,054h,008h,000h +DB 000h,074h,00ah,000h +DB 000h,064h,00bh,000h +DB 000h,082h +DB 000h,000h,000h,000h,000h,000h +DB 000h,000h,000h,000h +$L$SEH_info_add_mod_384x_epilogue:: +DB 1,0,4,0 +DB 000h,074h,001h,000h +DB 000h,064h,002h,000h +DB 000h,000h,000h,000h + +$L$SEH_info_rshift_mod_384_prologue:: +DB 1,0,5,00bh +DB 0,074h,1,0 +DB 0,064h,2,0 +DB 0,0b3h +DB 0,0 + DD 0,0 +$L$SEH_info_rshift_mod_384_body:: +DB 1,0,17,0 +DB 000h,0f4h,001h,000h +DB 000h,0e4h,002h,000h +DB 000h,0d4h,003h,000h +DB 000h,0c4h,004h,000h +DB 000h,034h,005h,000h +DB 000h,054h,006h,000h +DB 000h,074h,008h,000h +DB 000h,064h,009h,000h +DB 000h,062h +DB 000h,000h,000h,000h,000h,000h +DB 000h,000h,000h,000h +$L$SEH_info_rshift_mod_384_epilogue:: +DB 1,0,4,0 +DB 000h,074h,001h,000h +DB 000h,064h,002h,000h +DB 000h,000h,000h,000h + +$L$SEH_info_div_by_2_mod_384_prologue:: +DB 1,0,5,00bh +DB 0,074h,1,0 +DB 0,064h,2,0 +DB 0,0b3h +DB 0,0 + DD 0,0 +$L$SEH_info_div_by_2_mod_384_body:: +DB 1,0,17,0 +DB 000h,0f4h,001h,000h +DB 000h,0e4h,002h,000h +DB 000h,0d4h,003h,000h +DB 000h,0c4h,004h,000h +DB 000h,034h,005h,000h +DB 000h,054h,006h,000h +DB 000h,074h,008h,000h +DB 000h,064h,009h,000h +DB 000h,062h +DB 000h,000h,000h,000h,000h,000h +DB 000h,000h,000h,000h +$L$SEH_info_div_by_2_mod_384_epilogue:: +DB 1,0,4,0 +DB 000h,074h,001h,000h +DB 000h,064h,002h,000h +DB 000h,000h,000h,000h + +$L$SEH_info_lshift_mod_384_prologue:: +DB 1,0,5,00bh +DB 0,074h,1,0 +DB 0,064h,2,0 +DB 0,0b3h +DB 0,0 + DD 0,0 +$L$SEH_info_lshift_mod_384_body:: +DB 1,0,17,0 +DB 000h,0f4h,001h,000h +DB 000h,0e4h,002h,000h +DB 000h,0d4h,003h,000h +DB 000h,0c4h,004h,000h +DB 000h,034h,005h,000h +DB 000h,054h,006h,000h +DB 000h,074h,008h,000h +DB 000h,064h,009h,000h +DB 000h,062h +DB 000h,000h,000h,000h,000h,000h +DB 000h,000h,000h,000h +$L$SEH_info_lshift_mod_384_epilogue:: +DB 1,0,4,0 +DB 000h,074h,001h,000h +DB 000h,064h,002h,000h +DB 000h,000h,000h,000h + +$L$SEH_info_mul_by_3_mod_384_prologue:: +DB 1,0,5,00bh +DB 0,074h,1,0 +DB 0,064h,2,0 +DB 0,0b3h +DB 0,0 + DD 0,0 +$L$SEH_info_mul_by_3_mod_384_body:: +DB 1,0,17,0 +DB 000h,0f4h,001h,000h +DB 000h,0e4h,002h,000h +DB 000h,0d4h,003h,000h +DB 000h,0c4h,004h,000h +DB 000h,034h,005h,000h +DB 000h,054h,006h,000h +DB 000h,074h,008h,000h +DB 000h,064h,009h,000h +DB 000h,062h +DB 000h,000h,000h,000h,000h,000h +DB 000h,000h,000h,000h +$L$SEH_info_mul_by_3_mod_384_epilogue:: +DB 1,0,4,0 +DB 000h,074h,001h,000h +DB 000h,064h,002h,000h +DB 000h,000h,000h,000h + +$L$SEH_info_mul_by_8_mod_384_prologue:: +DB 1,0,5,00bh +DB 0,074h,1,0 +DB 0,064h,2,0 +DB 0,0b3h +DB 0,0 + DD 0,0 +$L$SEH_info_mul_by_8_mod_384_body:: +DB 1,0,17,0 +DB 000h,0f4h,001h,000h +DB 000h,0e4h,002h,000h +DB 000h,0d4h,003h,000h +DB 000h,0c4h,004h,000h +DB 000h,034h,005h,000h +DB 000h,054h,006h,000h +DB 000h,074h,008h,000h +DB 000h,064h,009h,000h +DB 000h,062h +DB 000h,000h,000h,000h,000h,000h +DB 000h,000h,000h,000h +$L$SEH_info_mul_by_8_mod_384_epilogue:: +DB 1,0,4,0 +DB 000h,074h,001h,000h +DB 000h,064h,002h,000h +DB 000h,000h,000h,000h + +$L$SEH_info_mul_by_3_mod_384x_prologue:: +DB 1,0,5,00bh +DB 0,074h,1,0 +DB 0,064h,2,0 +DB 0,0b3h +DB 0,0 + DD 0,0 +$L$SEH_info_mul_by_3_mod_384x_body:: +DB 1,0,17,0 +DB 000h,0f4h,001h,000h +DB 000h,0e4h,002h,000h +DB 000h,0d4h,003h,000h +DB 000h,0c4h,004h,000h +DB 000h,034h,005h,000h +DB 000h,054h,006h,000h +DB 000h,074h,008h,000h +DB 000h,064h,009h,000h +DB 000h,062h +DB 000h,000h,000h,000h,000h,000h +DB 000h,000h,000h,000h +$L$SEH_info_mul_by_3_mod_384x_epilogue:: +DB 1,0,4,0 +DB 000h,074h,001h,000h +DB 000h,064h,002h,000h +DB 000h,000h,000h,000h + +$L$SEH_info_mul_by_8_mod_384x_prologue:: +DB 1,0,5,00bh +DB 0,074h,1,0 +DB 0,064h,2,0 +DB 0,0b3h +DB 0,0 + DD 0,0 +$L$SEH_info_mul_by_8_mod_384x_body:: +DB 1,0,17,0 +DB 000h,0f4h,001h,000h +DB 000h,0e4h,002h,000h +DB 000h,0d4h,003h,000h +DB 000h,0c4h,004h,000h +DB 000h,034h,005h,000h +DB 000h,054h,006h,000h +DB 000h,074h,008h,000h +DB 000h,064h,009h,000h +DB 000h,062h +DB 000h,000h,000h,000h,000h,000h +DB 000h,000h,000h,000h +$L$SEH_info_mul_by_8_mod_384x_epilogue:: +DB 1,0,4,0 +DB 000h,074h,001h,000h +DB 000h,064h,002h,000h +DB 000h,000h,000h,000h + +$L$SEH_info_cneg_mod_384_prologue:: +DB 1,0,5,00bh +DB 0,074h,1,0 +DB 0,064h,2,0 +DB 0,0b3h +DB 0,0 + DD 0,0 +$L$SEH_info_cneg_mod_384_body:: +DB 1,0,17,0 +DB 000h,0f4h,001h,000h +DB 000h,0e4h,002h,000h +DB 000h,0d4h,003h,000h +DB 000h,0c4h,004h,000h +DB 000h,034h,005h,000h +DB 000h,054h,006h,000h +DB 000h,074h,008h,000h +DB 000h,064h,009h,000h +DB 000h,062h +DB 000h,000h,000h,000h,000h,000h +DB 000h,000h,000h,000h +$L$SEH_info_cneg_mod_384_epilogue:: +DB 1,0,4,0 +DB 000h,074h,001h,000h +DB 000h,064h,002h,000h +DB 000h,000h,000h,000h + +$L$SEH_info_sub_mod_384_prologue:: +DB 1,0,5,00bh +DB 0,074h,1,0 +DB 0,064h,2,0 +DB 0,0b3h +DB 0,0 + DD 0,0 +$L$SEH_info_sub_mod_384_body:: +DB 1,0,17,0 +DB 000h,0f4h,001h,000h +DB 000h,0e4h,002h,000h +DB 000h,0d4h,003h,000h +DB 000h,0c4h,004h,000h +DB 000h,034h,005h,000h +DB 000h,054h,006h,000h +DB 000h,074h,008h,000h +DB 000h,064h,009h,000h +DB 000h,062h +DB 000h,000h,000h,000h,000h,000h +DB 000h,000h,000h,000h +$L$SEH_info_sub_mod_384_epilogue:: +DB 1,0,4,0 +DB 000h,074h,001h,000h +DB 000h,064h,002h,000h +DB 000h,000h,000h,000h + +$L$SEH_info_sub_mod_384x_prologue:: +DB 1,0,5,00bh +DB 0,074h,1,0 +DB 0,064h,2,0 +DB 0,0b3h +DB 0,0 + DD 0,0 +$L$SEH_info_sub_mod_384x_body:: +DB 1,0,17,0 +DB 000h,0f4h,003h,000h +DB 000h,0e4h,004h,000h +DB 000h,0d4h,005h,000h +DB 000h,0c4h,006h,000h +DB 000h,034h,007h,000h +DB 000h,054h,008h,000h +DB 000h,074h,00ah,000h +DB 000h,064h,00bh,000h +DB 000h,082h +DB 000h,000h,000h,000h,000h,000h +DB 000h,000h,000h,000h +$L$SEH_info_sub_mod_384x_epilogue:: +DB 1,0,4,0 +DB 000h,074h,001h,000h +DB 000h,064h,002h,000h +DB 000h,000h,000h,000h + +$L$SEH_info_mul_by_1_plus_i_mod_384x_prologue:: +DB 1,0,5,00bh +DB 0,074h,1,0 +DB 0,064h,2,0 +DB 0,0b3h +DB 0,0 + DD 0,0 +$L$SEH_info_mul_by_1_plus_i_mod_384x_body:: +DB 1,0,17,0 +DB 000h,0f4h,007h,000h +DB 000h,0e4h,008h,000h +DB 000h,0d4h,009h,000h +DB 000h,0c4h,00ah,000h +DB 000h,034h,00bh,000h +DB 000h,054h,00ch,000h +DB 000h,074h,00eh,000h +DB 000h,064h,00fh,000h +DB 000h,0c2h +DB 000h,000h,000h,000h,000h,000h +DB 000h,000h,000h,000h +$L$SEH_info_mul_by_1_plus_i_mod_384x_epilogue:: +DB 1,0,4,0 +DB 000h,074h,001h,000h +DB 000h,064h,002h,000h +DB 000h,000h,000h,000h + +$L$SEH_info_sgn0_pty_mod_384_prologue:: +DB 1,0,5,00bh +DB 0,074h,1,0 +DB 0,064h,2,0 +DB 0,0b3h +DB 0,0 + DD 0,0 +$L$SEH_info_sgn0_pty_mod_384_body:: +DB 1,0,4,0 +DB 000h,074h,001h,000h +DB 000h,064h,002h,000h +DB 000h,000h,000h,000h +$L$SEH_info_sgn0_pty_mod_384_epilogue:: +DB 1,0,4,0 +DB 000h,074h,001h,000h +DB 000h,064h,002h,000h +DB 000h,000h,000h,000h + +$L$SEH_info_sgn0_pty_mod_384x_prologue:: +DB 1,0,5,00bh +DB 0,074h,1,0 +DB 0,064h,2,0 +DB 0,0b3h +DB 0,0 + DD 0,0 +$L$SEH_info_sgn0_pty_mod_384x_body:: +DB 1,0,9,0 +DB 000h,034h,001h,000h +DB 000h,054h,002h,000h +DB 000h,074h,004h,000h +DB 000h,064h,005h,000h +DB 000h,022h +DB 000h,000h,000h,000h,000h,000h +DB 000h,000h,000h,000h +$L$SEH_info_sgn0_pty_mod_384x_epilogue:: +DB 1,0,4,0 +DB 000h,074h,001h,000h +DB 000h,064h,002h,000h +DB 000h,000h,000h,000h + + +.xdata ENDS +END diff --git a/crypto/blst_src/build/win64/add_mod_384x384-x86_64.asm b/crypto/blst_src/build/win64/add_mod_384x384-x86_64.asm new file mode 100644 index 00000000000..59b51a910ce --- /dev/null +++ b/crypto/blst_src/build/win64/add_mod_384x384-x86_64.asm @@ -0,0 +1,338 @@ +OPTION DOTNAME +.text$ SEGMENT ALIGN(256) 'CODE' + + +ALIGN 32 +__add_mod_384x384 PROC PRIVATE + DB 243,15,30,250 + + mov r8,QWORD PTR[rsi] + mov r9,QWORD PTR[8+rsi] + mov r10,QWORD PTR[16+rsi] + mov r11,QWORD PTR[24+rsi] + mov r12,QWORD PTR[32+rsi] + mov r13,QWORD PTR[40+rsi] + mov r14,QWORD PTR[48+rsi] + + add r8,QWORD PTR[rdx] + mov r15,QWORD PTR[56+rsi] + adc r9,QWORD PTR[8+rdx] + mov rax,QWORD PTR[64+rsi] + adc r10,QWORD PTR[16+rdx] + mov rbx,QWORD PTR[72+rsi] + adc r11,QWORD PTR[24+rdx] + mov rbp,QWORD PTR[80+rsi] + adc r12,QWORD PTR[32+rdx] + mov rsi,QWORD PTR[88+rsi] + adc r13,QWORD PTR[40+rdx] + mov QWORD PTR[rdi],r8 + adc r14,QWORD PTR[48+rdx] + mov QWORD PTR[8+rdi],r9 + adc r15,QWORD PTR[56+rdx] + mov QWORD PTR[16+rdi],r10 + adc rax,QWORD PTR[64+rdx] + mov QWORD PTR[32+rdi],r12 + mov r8,r14 + adc rbx,QWORD PTR[72+rdx] + mov QWORD PTR[24+rdi],r11 + mov r9,r15 + adc rbp,QWORD PTR[80+rdx] + mov QWORD PTR[40+rdi],r13 + mov r10,rax + adc rsi,QWORD PTR[88+rdx] + mov r11,rbx + sbb rdx,rdx + + sub r14,QWORD PTR[rcx] + sbb r15,QWORD PTR[8+rcx] + mov r12,rbp + sbb rax,QWORD PTR[16+rcx] + sbb rbx,QWORD PTR[24+rcx] + sbb rbp,QWORD PTR[32+rcx] + mov r13,rsi + sbb rsi,QWORD PTR[40+rcx] + sbb rdx,0 + + cmovc r14,r8 + cmovc r15,r9 + cmovc rax,r10 + mov QWORD PTR[48+rdi],r14 + cmovc rbx,r11 + mov QWORD PTR[56+rdi],r15 + cmovc rbp,r12 + mov QWORD PTR[64+rdi],rax + cmovc rsi,r13 + mov QWORD PTR[72+rdi],rbx + mov QWORD PTR[80+rdi],rbp + mov QWORD PTR[88+rdi],rsi + + DB 0F3h,0C3h ;repret +__add_mod_384x384 ENDP + + +ALIGN 32 +__sub_mod_384x384 PROC PRIVATE + DB 243,15,30,250 + + mov r8,QWORD PTR[rsi] + mov r9,QWORD PTR[8+rsi] + mov r10,QWORD PTR[16+rsi] + mov r11,QWORD PTR[24+rsi] + mov r12,QWORD PTR[32+rsi] + mov r13,QWORD PTR[40+rsi] + mov r14,QWORD PTR[48+rsi] + + sub r8,QWORD PTR[rdx] + mov r15,QWORD PTR[56+rsi] + sbb r9,QWORD PTR[8+rdx] + mov rax,QWORD PTR[64+rsi] + sbb r10,QWORD PTR[16+rdx] + mov rbx,QWORD PTR[72+rsi] + sbb r11,QWORD PTR[24+rdx] + mov rbp,QWORD PTR[80+rsi] + sbb r12,QWORD PTR[32+rdx] + mov rsi,QWORD PTR[88+rsi] + sbb r13,QWORD PTR[40+rdx] + mov QWORD PTR[rdi],r8 + sbb r14,QWORD PTR[48+rdx] + mov r8,QWORD PTR[rcx] + mov QWORD PTR[8+rdi],r9 + sbb r15,QWORD PTR[56+rdx] + mov r9,QWORD PTR[8+rcx] + mov QWORD PTR[16+rdi],r10 + sbb rax,QWORD PTR[64+rdx] + mov r10,QWORD PTR[16+rcx] + mov QWORD PTR[24+rdi],r11 + sbb rbx,QWORD PTR[72+rdx] + mov r11,QWORD PTR[24+rcx] + mov QWORD PTR[32+rdi],r12 + sbb rbp,QWORD PTR[80+rdx] + mov r12,QWORD PTR[32+rcx] + mov QWORD PTR[40+rdi],r13 + sbb rsi,QWORD PTR[88+rdx] + mov r13,QWORD PTR[40+rcx] + sbb rdx,rdx + + and r8,rdx + and r9,rdx + and r10,rdx + and r11,rdx + and r12,rdx + and r13,rdx + + add r14,r8 + adc r15,r9 + mov QWORD PTR[48+rdi],r14 + adc rax,r10 + mov QWORD PTR[56+rdi],r15 + adc rbx,r11 + mov QWORD PTR[64+rdi],rax + adc rbp,r12 + mov QWORD PTR[72+rdi],rbx + adc rsi,r13 + mov QWORD PTR[80+rdi],rbp + mov QWORD PTR[88+rdi],rsi + + DB 0F3h,0C3h ;repret +__sub_mod_384x384 ENDP + +PUBLIC add_mod_384x384 + + +ALIGN 32 +add_mod_384x384 PROC PUBLIC + DB 243,15,30,250 + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov r11,rsp +$L$SEH_begin_add_mod_384x384:: + + + push rbp + + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + push rbx + + push r12 + + push r13 + + push r14 + + push r15 + + sub rsp,8 + +$L$SEH_body_add_mod_384x384:: + + + call __add_mod_384x384 + + mov r15,QWORD PTR[8+rsp] + + mov r14,QWORD PTR[16+rsp] + + mov r13,QWORD PTR[24+rsp] + + mov r12,QWORD PTR[32+rsp] + + mov rbx,QWORD PTR[40+rsp] + + mov rbp,QWORD PTR[48+rsp] + + lea rsp,QWORD PTR[56+rsp] + +$L$SEH_epilogue_add_mod_384x384:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + + DB 0F3h,0C3h ;repret + +$L$SEH_end_add_mod_384x384:: +add_mod_384x384 ENDP + +PUBLIC sub_mod_384x384 + + +ALIGN 32 +sub_mod_384x384 PROC PUBLIC + DB 243,15,30,250 + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov r11,rsp +$L$SEH_begin_sub_mod_384x384:: + + + push rbp + + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + push rbx + + push r12 + + push r13 + + push r14 + + push r15 + + sub rsp,8 + +$L$SEH_body_sub_mod_384x384:: + + + call __sub_mod_384x384 + + mov r15,QWORD PTR[8+rsp] + + mov r14,QWORD PTR[16+rsp] + + mov r13,QWORD PTR[24+rsp] + + mov r12,QWORD PTR[32+rsp] + + mov rbx,QWORD PTR[40+rsp] + + mov rbp,QWORD PTR[48+rsp] + + lea rsp,QWORD PTR[56+rsp] + +$L$SEH_epilogue_sub_mod_384x384:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + + DB 0F3h,0C3h ;repret + +$L$SEH_end_sub_mod_384x384:: +sub_mod_384x384 ENDP +.text$ ENDS +.pdata SEGMENT READONLY ALIGN(4) +ALIGN 4 + DD imagerel $L$SEH_begin_add_mod_384x384 + DD imagerel $L$SEH_body_add_mod_384x384 + DD imagerel $L$SEH_info_add_mod_384x384_prologue + + DD imagerel $L$SEH_body_add_mod_384x384 + DD imagerel $L$SEH_epilogue_add_mod_384x384 + DD imagerel $L$SEH_info_add_mod_384x384_body + + DD imagerel $L$SEH_epilogue_add_mod_384x384 + DD imagerel $L$SEH_end_add_mod_384x384 + DD imagerel $L$SEH_info_add_mod_384x384_epilogue + + DD imagerel $L$SEH_begin_sub_mod_384x384 + DD imagerel $L$SEH_body_sub_mod_384x384 + DD imagerel $L$SEH_info_sub_mod_384x384_prologue + + DD imagerel $L$SEH_body_sub_mod_384x384 + DD imagerel $L$SEH_epilogue_sub_mod_384x384 + DD imagerel $L$SEH_info_sub_mod_384x384_body + + DD imagerel $L$SEH_epilogue_sub_mod_384x384 + DD imagerel $L$SEH_end_sub_mod_384x384 + DD imagerel $L$SEH_info_sub_mod_384x384_epilogue + +.pdata ENDS +.xdata SEGMENT READONLY ALIGN(8) +ALIGN 8 +$L$SEH_info_add_mod_384x384_prologue:: +DB 1,0,5,00bh +DB 0,074h,1,0 +DB 0,064h,2,0 +DB 0,0b3h +DB 0,0 + DD 0,0 +$L$SEH_info_add_mod_384x384_body:: +DB 1,0,17,0 +DB 000h,0f4h,001h,000h +DB 000h,0e4h,002h,000h +DB 000h,0d4h,003h,000h +DB 000h,0c4h,004h,000h +DB 000h,034h,005h,000h +DB 000h,054h,006h,000h +DB 000h,074h,008h,000h +DB 000h,064h,009h,000h +DB 000h,062h +DB 000h,000h,000h,000h,000h,000h +DB 000h,000h,000h,000h +$L$SEH_info_add_mod_384x384_epilogue:: +DB 1,0,4,0 +DB 000h,074h,001h,000h +DB 000h,064h,002h,000h +DB 000h,000h,000h,000h + +$L$SEH_info_sub_mod_384x384_prologue:: +DB 1,0,5,00bh +DB 0,074h,1,0 +DB 0,064h,2,0 +DB 0,0b3h +DB 0,0 + DD 0,0 +$L$SEH_info_sub_mod_384x384_body:: +DB 1,0,17,0 +DB 000h,0f4h,001h,000h +DB 000h,0e4h,002h,000h +DB 000h,0d4h,003h,000h +DB 000h,0c4h,004h,000h +DB 000h,034h,005h,000h +DB 000h,054h,006h,000h +DB 000h,074h,008h,000h +DB 000h,064h,009h,000h +DB 000h,062h +DB 000h,000h,000h,000h,000h,000h +DB 000h,000h,000h,000h +$L$SEH_info_sub_mod_384x384_epilogue:: +DB 1,0,4,0 +DB 000h,074h,001h,000h +DB 000h,064h,002h,000h +DB 000h,000h,000h,000h + + +.xdata ENDS +END diff --git a/crypto/blst_src/build/win64/blst.def b/crypto/blst_src/build/win64/blst.def new file mode 100644 index 00000000000..dda95336a93 --- /dev/null +++ b/crypto/blst_src/build/win64/blst.def @@ -0,0 +1,221 @@ +LIBRARY blst + +EXPORTS + blst_scalar_from_uint32 + blst_uint32_from_scalar + blst_scalar_from_uint64 + blst_uint64_from_scalar + blst_scalar_from_bendian + blst_bendian_from_scalar + blst_scalar_from_lendian + blst_lendian_from_scalar + blst_scalar_fr_check + blst_sk_check + blst_sk_add_n_check + blst_sk_sub_n_check + blst_sk_mul_n_check + blst_sk_inverse + blst_scalar_from_le_bytes + blst_scalar_from_be_bytes + blst_fr_add + blst_fr_sub + blst_fr_mul_by_3 + blst_fr_lshift + blst_fr_rshift + blst_fr_mul + blst_fr_sqr + blst_fr_cneg + blst_fr_eucl_inverse + blst_fr_inverse + blst_fr_from_uint64 + blst_uint64_from_fr + blst_fr_from_scalar + blst_scalar_from_fr + blst_fp_add + blst_fp_sub + blst_fp_mul_by_3 + blst_fp_mul_by_8 + blst_fp_lshift + blst_fp_mul + blst_fp_sqr + blst_fp_cneg + blst_fp_eucl_inverse + blst_fp_inverse + blst_fp_sqrt + blst_fp_from_uint32 + blst_uint32_from_fp + blst_fp_from_uint64 + blst_uint64_from_fp + blst_fp_from_bendian + blst_bendian_from_fp + blst_fp_from_lendian + blst_lendian_from_fp + blst_fp2_add + blst_fp2_sub + blst_fp2_mul_by_3 + blst_fp2_mul_by_8 + blst_fp2_lshift + blst_fp2_mul + blst_fp2_sqr + blst_fp2_cneg + blst_fp2_eucl_inverse + blst_fp2_inverse + blst_fp2_sqrt + blst_fp12_sqr + blst_fp12_cyclotomic_sqr + blst_fp12_mul + blst_fp12_mul_by_xy00z0 + blst_fp12_conjugate + blst_fp12_inverse + blst_fp12_frobenius_map + blst_fp12_is_equal + blst_fp12_is_one + blst_fp12_in_group + blst_fp12_one + blst_p1_add + blst_p1_add_or_double + blst_p1_add_affine + blst_p1_add_or_double_affine + blst_p1_double + blst_p1_mult + blst_p1_cneg + blst_p1_to_affine + blst_p1_from_affine + blst_p1_on_curve + blst_p1_in_g1 + blst_p1_is_equal + blst_p1_is_inf + blst_p1_generator + blst_p1_affine_on_curve + blst_p1_affine_in_g1 + blst_p1_affine_is_equal + blst_p1_affine_is_inf + blst_p1_affine_generator + blst_p2_add + blst_p2_add_or_double + blst_p2_add_affine + blst_p2_add_or_double_affine + blst_p2_double + blst_p2_mult + blst_p2_cneg + blst_p2_to_affine + blst_p2_from_affine + blst_p2_on_curve + blst_p2_in_g2 + blst_p2_is_equal + blst_p2_is_inf + blst_p2_generator + blst_p2_affine_on_curve + blst_p2_affine_in_g2 + blst_p2_affine_is_equal + blst_p2_affine_is_inf + blst_p2_affine_generator + blst_p1s_to_affine + blst_p1s_add + blst_p1s_mult_wbits_precompute_sizeof + blst_p1s_mult_wbits_precompute + blst_p1s_mult_wbits_scratch_sizeof + blst_p1s_mult_wbits + blst_p1s_mult_pippenger_scratch_sizeof + blst_p1s_mult_pippenger + blst_p1s_tile_pippenger + blst_p2s_to_affine + blst_p2s_add + blst_p2s_mult_wbits_precompute_sizeof + blst_p2s_mult_wbits_precompute + blst_p2s_mult_wbits_scratch_sizeof + blst_p2s_mult_wbits + blst_p2s_mult_pippenger_scratch_sizeof + blst_p2s_mult_pippenger + blst_p2s_tile_pippenger + blst_map_to_g1 + blst_map_to_g2 + blst_encode_to_g1 + blst_hash_to_g1 + blst_encode_to_g2 + blst_hash_to_g2 + blst_p1_serialize + blst_p1_compress + blst_p1_affine_serialize + blst_p1_affine_compress + blst_p1_uncompress + blst_p1_deserialize + blst_p2_serialize + blst_p2_compress + blst_p2_affine_serialize + blst_p2_affine_compress + blst_p2_uncompress + blst_p2_deserialize + blst_keygen + blst_sk_to_pk_in_g1 + blst_sign_pk_in_g1 + blst_sk_to_pk_in_g2 + blst_sign_pk_in_g2 + blst_miller_loop + blst_miller_loop_n + blst_final_exp + blst_precompute_lines + blst_miller_loop_lines + blst_fp12_finalverify + blst_pairing_sizeof + blst_pairing_init + blst_pairing_get_dst + blst_pairing_commit + blst_pairing_aggregate_pk_in_g2 + blst_pairing_chk_n_aggr_pk_in_g2 + blst_pairing_mul_n_aggregate_pk_in_g2 + blst_pairing_chk_n_mul_n_aggr_pk_in_g2 + blst_pairing_aggregate_pk_in_g1 + blst_pairing_chk_n_aggr_pk_in_g1 + blst_pairing_mul_n_aggregate_pk_in_g1 + blst_pairing_chk_n_mul_n_aggr_pk_in_g1 + blst_pairing_merge + blst_pairing_finalverify + blst_aggregate_in_g1 + blst_aggregate_in_g2 + blst_aggregated_in_g1 + blst_aggregated_in_g2 + blst_core_verify_pk_in_g1 + blst_core_verify_pk_in_g2 + BLS12_381_G1 + BLS12_381_NEG_G1 + BLS12_381_G2 + BLS12_381_NEG_G2 + blst_fr_ct_bfly + blst_fr_gs_bfly + blst_fr_to + blst_fr_from + blst_fp_to + blst_fp_from + blst_fp_is_square + blst_fp2_is_square + blst_p1_from_jacobian + blst_p2_from_jacobian + blst_sk_to_pk2_in_g1 + blst_sign_pk2_in_g1 + blst_sk_to_pk2_in_g2 + blst_sign_pk2_in_g2 + blst_uniq_sizeof + blst_uniq_init + blst_uniq_test + blst_expand_message_xmd + blst_p1_unchecked_mult + blst_p2_unchecked_mult + blst_pairing_raw_aggregate + blst_pairing_as_fp12 + blst_bendian_from_fp12 + blst_keygen_v3 + blst_keygen_v4_5 + blst_keygen_v5 + blst_derive_master_eip2333 + blst_derive_child_eip2333 + blst_scalar_from_hexascii + blst_fr_from_hexascii + blst_fp_from_hexascii + blst_p1_sizeof + blst_p1_affine_sizeof + blst_p2_sizeof + blst_p2_affine_sizeof + blst_fp12_sizeof + blst_sha256 + diff --git a/crypto/blst_src/build/win64/ct_inverse_mod_256-armv8.asm b/crypto/blst_src/build/win64/ct_inverse_mod_256-armv8.asm new file mode 100644 index 00000000000..a4467904612 --- /dev/null +++ b/crypto/blst_src/build/win64/ct_inverse_mod_256-armv8.asm @@ -0,0 +1,786 @@ + AREA |.text|,CODE,ALIGN=8,ARM64 + + + + EXPORT |ct_inverse_mod_256|[FUNC] + ALIGN 32 +|ct_inverse_mod_256| PROC + DCDU 3573752639 + stp x29, x30, [sp,#-80]! + add x29, sp, #0 + stp x19, x20, [sp,#16] + stp x21, x22, [sp,#32] + stp x23, x24, [sp,#48] + stp x25, x26, [sp,#64] + sub sp, sp, #1040 + + ldp x4, x5, [x1,#8*0] + ldp x6, x7, [x1,#8*2] + + add x1, sp, #16+511 // find closest 512-byte-aligned spot + and x1, x1, #-512 // in the frame... + str x0, [sp] + + ldp x8, x9, [x2,#8*0] + ldp x10, x11, [x2,#8*2] + + stp x4, x5, [x1,#8*0] // copy input to |a| + stp x6, x7, [x1,#8*2] + stp x8, x9, [x1,#8*4] // copy modulus to |b| + stp x10, x11, [x1,#8*6] + + ////////////////////////////////////////// first iteration + bl |$Lab_approximation_31_256_loaded| + + eor x0, x1, #256 // pointer to dst |a|b|u|v| + bl __smul_256_n_shift_by_31 + str x12,[x0,#8*8] // initialize |u| with |f0| + + mov x12, x14 // |f1| + mov x13, x15 // |g1| + add x0, x0, #8*4 // pointer to dst |b| + bl __smul_256_n_shift_by_31 + str x12, [x0,#8*9] // initialize |v| with |f1| + + ////////////////////////////////////////// second iteration + eor x1, x1, #256 // flip-flop src |a|b|u|v| + bl __ab_approximation_31_256 + + eor x0, x1, #256 // pointer to dst |a|b|u|v| + bl __smul_256_n_shift_by_31 + mov x16, x12 // corrected |f0| + mov x17, x13 // corrected |g0| + + mov x12, x14 // |f1| + mov x13, x15 // |g1| + add x0, x0, #8*4 // pointer to destination |b| + bl __smul_256_n_shift_by_31 + + ldr x8, [x1,#8*8] // |u| + ldr x9, [x1,#8*13] // |v| + madd x4, x16, x8, xzr // |u|*|f0| + madd x4, x17, x9, x4 // |v|*|g0| + str x4, [x0,#8*4] + asr x5, x4, #63 // sign extension + stp x5, x5, [x0,#8*5] + stp x5, x5, [x0,#8*7] + + madd x4, x12, x8, xzr // |u|*|f1| + madd x4, x13, x9, x4 // |v|*|g1| + str x4, [x0,#8*9] + asr x5, x4, #63 // sign extension + stp x5, x5, [x0,#8*10] + stp x5, x5, [x0,#8*12] + eor x1, x1, #256 // flip-flop src |a|b|u|v| + bl __ab_approximation_31_256 + + eor x0, x1, #256 // pointer to dst |a|b|u|v| + bl __smul_256_n_shift_by_31 + mov x16, x12 // corrected |f0| + mov x17, x13 // corrected |g0| + + mov x12, x14 // |f1| + mov x13, x15 // |g1| + add x0, x0, #8*4 // pointer to destination |b| + bl __smul_256_n_shift_by_31 + + add x0, x0, #8*4 // pointer to destination |u| + bl __smul_256x63 + adc x22, x22, x23 + str x22, [x0,#8*4] + + mov x16, x12 // corrected |f1| + mov x17, x13 // corrected |g1| + add x0, x0, #8*5 // pointer to destination |v| + bl __smul_256x63 + adc x22, x22, x23 + stp x22, x22, [x0,#8*4] + stp x22, x22, [x0,#8*6] + eor x1, x1, #256 // flip-flop src |a|b|u|v| + bl __ab_approximation_31_256 + + eor x0, x1, #256 // pointer to dst |a|b|u|v| + bl __smul_256_n_shift_by_31 + mov x16, x12 // corrected |f0| + mov x17, x13 // corrected |g0| + + mov x12, x14 // |f1| + mov x13, x15 // |g1| + add x0, x0, #8*4 // pointer to destination |b| + bl __smul_256_n_shift_by_31 + + add x0, x0, #8*4 // pointer to destination |u| + bl __smul_256x63 + adc x22, x22, x23 + str x22, [x0,#8*4] + + mov x16, x12 // corrected |f1| + mov x17, x13 // corrected |g1| + add x0, x0, #8*5 // pointer to destination |v| + bl __smul_256x63 + adc x22, x22, x23 + stp x22, x22, [x0,#8*4] + stp x22, x22, [x0,#8*6] + eor x1, x1, #256 // flip-flop src |a|b|u|v| + bl __ab_approximation_31_256 + + eor x0, x1, #256 // pointer to dst |a|b|u|v| + bl __smul_256_n_shift_by_31 + mov x16, x12 // corrected |f0| + mov x17, x13 // corrected |g0| + + mov x12, x14 // |f1| + mov x13, x15 // |g1| + add x0, x0, #8*4 // pointer to destination |b| + bl __smul_256_n_shift_by_31 + + add x0, x0, #8*4 // pointer to destination |u| + bl __smul_256x63 + adc x22, x22, x23 + str x22, [x0,#8*4] + + mov x16, x12 // corrected |f1| + mov x17, x13 // corrected |g1| + add x0, x0, #8*5 // pointer to destination |v| + bl __smul_256x63 + adc x22, x22, x23 + stp x22, x22, [x0,#8*4] + stp x22, x22, [x0,#8*6] + eor x1, x1, #256 // flip-flop src |a|b|u|v| + bl __ab_approximation_31_256 + + eor x0, x1, #256 // pointer to dst |a|b|u|v| + bl __smul_256_n_shift_by_31 + mov x16, x12 // corrected |f0| + mov x17, x13 // corrected |g0| + + mov x12, x14 // |f1| + mov x13, x15 // |g1| + add x0, x0, #8*4 // pointer to destination |b| + bl __smul_256_n_shift_by_31 + + add x0, x0, #8*4 // pointer to destination |u| + bl __smul_256x63 + adc x22, x22, x23 + str x22, [x0,#8*4] + + mov x16, x12 // corrected |f1| + mov x17, x13 // corrected |g1| + add x0, x0, #8*5 // pointer to destination |v| + bl __smul_256x63 + adc x22, x22, x23 + stp x22, x22, [x0,#8*4] + stp x22, x22, [x0,#8*6] + eor x1, x1, #256 // flip-flop src |a|b|u|v| + bl __ab_approximation_31_256 + + eor x0, x1, #256 // pointer to dst |a|b|u|v| + bl __smul_256_n_shift_by_31 + mov x16, x12 // corrected |f0| + mov x17, x13 // corrected |g0| + + mov x12, x14 // |f1| + mov x13, x15 // |g1| + add x0, x0, #8*4 // pointer to destination |b| + bl __smul_256_n_shift_by_31 + + add x0, x0, #8*4 // pointer to destination |u| + bl __smul_256x63 + adc x22, x22, x23 + str x22, [x0,#8*4] + + mov x16, x12 // corrected |f1| + mov x17, x13 // corrected |g1| + add x0, x0, #8*5 // pointer to destination |v| + bl __smul_256x63 + adc x22, x22, x23 + stp x22, x22, [x0,#8*4] + stp x22, x22, [x0,#8*6] + eor x1, x1, #256 // flip-flop src |a|b|u|v| + bl __ab_approximation_31_256 + + eor x0, x1, #256 // pointer to dst |a|b|u|v| + bl __smul_256_n_shift_by_31 + mov x16, x12 // corrected |f0| + mov x17, x13 // corrected |g0| + + mov x12, x14 // |f1| + mov x13, x15 // |g1| + add x0, x0, #8*4 // pointer to destination |b| + bl __smul_256_n_shift_by_31 + + add x0, x0, #8*4 // pointer to destination |u| + bl __smul_256x63 + adc x22, x22, x23 + str x22, [x0,#8*4] + + mov x16, x12 // corrected |f1| + mov x17, x13 // corrected |g1| + add x0, x0, #8*5 // pointer to destination |v| + bl __smul_256x63 + adc x22, x22, x23 + stp x22, x22, [x0,#8*4] + stp x22, x22, [x0,#8*6] + eor x1, x1, #256 // flip-flop src |a|b|u|v| + bl __ab_approximation_31_256 + + eor x0, x1, #256 // pointer to dst |a|b|u|v| + bl __smul_256_n_shift_by_31 + mov x16, x12 // corrected |f0| + mov x17, x13 // corrected |g0| + + mov x12, x14 // |f1| + mov x13, x15 // |g1| + add x0, x0, #8*4 // pointer to destination |b| + bl __smul_256_n_shift_by_31 + + add x0, x0, #8*4 // pointer to destination |u| + bl __smul_256x63 + adc x22, x22, x23 + str x22, [x0,#8*4] + + mov x16, x12 // corrected |f1| + mov x17, x13 // corrected |g1| + add x0, x0, #8*5 // pointer to destination |v| + bl __smul_256x63 + bl __smul_512x63_tail + eor x1, x1, #256 // flip-flop src |a|b|u|v| + bl __ab_approximation_31_256 + + eor x0, x1, #256 // pointer to dst |a|b|u|v| + bl __smul_256_n_shift_by_31 + mov x16, x12 // corrected |f0| + mov x17, x13 // corrected |g0| + + mov x12, x14 // |f1| + mov x13, x15 // |g1| + add x0, x0, #8*4 // pointer to destination |b| + bl __smul_256_n_shift_by_31 + + add x0, x0, #8*4 // pointer to destination |u| + bl __smul_256x63 + adc x22, x22, x23 + str x22, [x0,#8*4] + + mov x16, x12 // corrected |f1| + mov x17, x13 // corrected |g1| + add x0, x0, #8*5 // pointer to destination |v| + bl __smul_256x63 + bl __smul_512x63_tail + eor x1, x1, #256 // flip-flop src |a|b|u|v| + bl __ab_approximation_31_256 + + eor x0, x1, #256 // pointer to dst |a|b|u|v| + bl __smul_256_n_shift_by_31 + mov x16, x12 // corrected |f0| + mov x17, x13 // corrected |g0| + + mov x12, x14 // |f1| + mov x13, x15 // |g1| + add x0, x0, #8*4 // pointer to destination |b| + bl __smul_256_n_shift_by_31 + + add x0, x0, #8*4 // pointer to destination |u| + bl __smul_256x63 + adc x22, x22, x23 + str x22, [x0,#8*4] + + mov x16, x12 // corrected |f1| + mov x17, x13 // corrected |g1| + add x0, x0, #8*5 // pointer to destination |v| + bl __smul_256x63 + bl __smul_512x63_tail + eor x1, x1, #256 // flip-flop src |a|b|u|v| + bl __ab_approximation_31_256 + + eor x0, x1, #256 // pointer to dst |a|b|u|v| + bl __smul_256_n_shift_by_31 + mov x16, x12 // corrected |f0| + mov x17, x13 // corrected |g0| + + mov x12, x14 // |f1| + mov x13, x15 // |g1| + add x0, x0, #8*4 // pointer to destination |b| + bl __smul_256_n_shift_by_31 + + add x0, x0, #8*4 // pointer to destination |u| + bl __smul_256x63 + adc x22, x22, x23 + str x22, [x0,#8*4] + + mov x16, x12 // corrected |f1| + mov x17, x13 // corrected |g1| + add x0, x0, #8*5 // pointer to destination |v| + bl __smul_256x63 + bl __smul_512x63_tail + eor x1, x1, #256 // flip-flop src |a|b|u|v| + bl __ab_approximation_31_256 + + eor x0, x1, #256 // pointer to dst |a|b|u|v| + bl __smul_256_n_shift_by_31 + mov x16, x12 // corrected |f0| + mov x17, x13 // corrected |g0| + + mov x12, x14 // |f1| + mov x13, x15 // |g1| + add x0, x0, #8*4 // pointer to destination |b| + bl __smul_256_n_shift_by_31 + + add x0, x0, #8*4 // pointer to destination |u| + bl __smul_256x63 + adc x22, x22, x23 + str x22, [x0,#8*4] + + mov x16, x12 // corrected |f1| + mov x17, x13 // corrected |g1| + add x0, x0, #8*5 // pointer to destination |v| + bl __smul_256x63 + bl __smul_512x63_tail + eor x1, x1, #256 // flip-flop src |a|b|u|v| + bl __ab_approximation_31_256 + + eor x0, x1, #256 // pointer to dst |a|b|u|v| + bl __smul_256_n_shift_by_31 + mov x16, x12 // corrected |f0| + mov x17, x13 // corrected |g0| + + mov x12, x14 // |f1| + mov x13, x15 // |g1| + add x0, x0, #8*4 // pointer to destination |b| + bl __smul_256_n_shift_by_31 + + add x0, x0, #8*4 // pointer to destination |u| + bl __smul_256x63 + adc x22, x22, x23 + str x22, [x0,#8*4] + + mov x16, x12 // corrected |f1| + mov x17, x13 // corrected |g1| + add x0, x0, #8*5 // pointer to destination |v| + bl __smul_256x63 + bl __smul_512x63_tail + eor x1, x1, #256 // flip-flop src |a|b|u|v| + bl __ab_approximation_31_256 + + eor x0, x1, #256 // pointer to dst |a|b|u|v| + bl __smul_256_n_shift_by_31 + mov x16, x12 // corrected |f0| + mov x17, x13 // corrected |g0| + + mov x12, x14 // |f1| + mov x13, x15 // |g1| + add x0, x0, #8*4 // pointer to destination |b| + bl __smul_256_n_shift_by_31 + + add x0, x0, #8*4 // pointer to destination |u| + bl __smul_256x63 + adc x22, x22, x23 + str x22, [x0,#8*4] + + mov x16, x12 // corrected |f1| + mov x17, x13 // corrected |g1| + add x0, x0, #8*5 // pointer to destination |v| + bl __smul_256x63 + bl __smul_512x63_tail + ////////////////////////////////////////// two[!] last iterations + eor x1, x1, #256 // flip-flop src |a|b|u|v| + mov x2, #47 // 31 + 512 % 31 + //bl __ab_approximation_62_256 // |a| and |b| are exact, + ldr x7, [x1,#8*0] // just load + ldr x11, [x1,#8*4] + bl __inner_loop_62_256 + + mov x16, x14 + mov x17, x15 + ldr x0, [sp] // original out_ptr + bl __smul_256x63 + bl __smul_512x63_tail + ldr x30, [x29,#8] + + smulh x20, x7, x17 // figure out top-most limb + ldp x8, x9, [x3,#8*0] + adc x23, x23, x25 + ldp x10, x11, [x3,#8*2] + + add x20, x20, x23 // x20 is 1, 0 or -1 + asr x19, x20, #63 // sign as mask + + and x23, x8, x19 // add mod<<256 conditionally + and x24, x9, x19 + adds x4, x4, x23 + and x25, x10, x19 + adcs x5, x5, x24 + and x26, x11, x19 + adcs x6, x6, x25 + adcs x7, x22, x26 + adc x20, x20, xzr // x20 is 1, 0 or -1 + + neg x19, x20 + orr x20, x20, x19 // excess bit or sign as mask + asr x19, x19, #63 // excess bit as mask + + and x8, x8, x20 // mask |mod| + and x9, x9, x20 + and x10, x10, x20 + and x11, x11, x20 + + eor x8, x8, x19 // conditionally negate |mod| + eor x9, x9, x19 + adds x8, x8, x19, lsr#63 + eor x10, x10, x19 + adcs x9, x9, xzr + eor x11, x11, x19 + adcs x10, x10, xzr + adc x11, x11, xzr + + adds x4, x4, x8 // final adjustment for |mod|<<256 + adcs x5, x5, x9 + adcs x6, x6, x10 + stp x4, x5, [x0,#8*4] + adc x7, x7, x11 + stp x6, x7, [x0,#8*6] + + add sp, sp, #1040 + ldp x19, x20, [x29,#16] + ldp x21, x22, [x29,#32] + ldp x23, x24, [x29,#48] + ldp x25, x26, [x29,#64] + ldr x29, [sp],#80 + DCDU 3573752767 + ret + ENDP + +//////////////////////////////////////////////////////////////////////// + + ALIGN 32 +|__smul_256x63| PROC + ldp x4, x5, [x1,#8*0+64] // load |u| (or |v|) + asr x14, x16, #63 // |f_|'s sign as mask (or |g_|'s) + ldp x6, x7, [x1,#8*2+64] + eor x16, x16, x14 // conditionally negate |f_| (or |g_|) + ldr x22, [x1,#8*4+64] + + eor x4, x4, x14 // conditionally negate |u| (or |v|) + sub x16, x16, x14 + eor x5, x5, x14 + adds x4, x4, x14, lsr#63 + eor x6, x6, x14 + adcs x5, x5, xzr + eor x7, x7, x14 + adcs x6, x6, xzr + eor x22, x22, x14 + umulh x19, x4, x16 + adcs x7, x7, xzr + umulh x20, x5, x16 + adcs x22, x22, xzr + umulh x21, x6, x16 + mul x4, x4, x16 + cmp x16, #0 + mul x5, x5, x16 + cselne x22,x22,xzr + mul x6, x6, x16 + adds x5, x5, x19 + mul x24, x7, x16 + adcs x6, x6, x20 + adcs x24, x24, x21 + adc x26, xzr, xzr + ldp x8, x9, [x1,#8*0+104] // load |u| (or |v|) + asr x14, x17, #63 // |f_|'s sign as mask (or |g_|'s) + ldp x10, x11, [x1,#8*2+104] + eor x17, x17, x14 // conditionally negate |f_| (or |g_|) + ldr x23, [x1,#8*4+104] + + eor x8, x8, x14 // conditionally negate |u| (or |v|) + sub x17, x17, x14 + eor x9, x9, x14 + adds x8, x8, x14, lsr#63 + eor x10, x10, x14 + adcs x9, x9, xzr + eor x11, x11, x14 + adcs x10, x10, xzr + eor x23, x23, x14 + umulh x19, x8, x17 + adcs x11, x11, xzr + umulh x20, x9, x17 + adcs x23, x23, xzr + umulh x21, x10, x17 + adc x15, xzr, xzr // used in __smul_512x63_tail + mul x8, x8, x17 + cmp x17, #0 + mul x9, x9, x17 + cselne x23,x23,xzr + mul x10, x10, x17 + adds x9, x9, x19 + mul x25, x11, x17 + adcs x10, x10, x20 + adcs x25, x25, x21 + adc x26, x26, xzr + + adds x4, x4, x8 + adcs x5, x5, x9 + adcs x6, x6, x10 + stp x4, x5, [x0,#8*0] + adcs x24, x24, x25 + stp x6, x24, [x0,#8*2] + + ret + ENDP + + + ALIGN 32 +|__smul_512x63_tail| PROC + umulh x24, x7, x16 + ldp x5, x6, [x1,#8*18] // load rest of |v| + adc x26, x26, xzr + ldr x7, [x1,#8*20] + and x22, x22, x16 + + umulh x11, x11, x17 // resume |v|*|g1| chain + + sub x24, x24, x22 // tie up |u|*|f1| chain + asr x25, x24, #63 + + eor x5, x5, x14 // conditionally negate rest of |v| + eor x6, x6, x14 + adds x5, x5, x15 + eor x7, x7, x14 + adcs x6, x6, xzr + umulh x19, x23, x17 + adc x7, x7, xzr + umulh x20, x5, x17 + add x11, x11, x26 + umulh x21, x6, x17 + + mul x4, x23, x17 + mul x5, x5, x17 + adds x4, x4, x11 + mul x6, x6, x17 + adcs x5, x5, x19 + mul x22, x7, x17 + adcs x6, x6, x20 + adcs x22, x22, x21 + adc x23, xzr, xzr // used in the final step + + adds x4, x4, x24 + adcs x5, x5, x25 + adcs x6, x6, x25 + stp x4, x5, [x0,#8*4] + adcs x22, x22, x25 // carry is used in the final step + stp x6, x22, [x0,#8*6] + + ret + ENDP + + + ALIGN 32 +|__smul_256_n_shift_by_31| PROC + ldp x4, x5, [x1,#8*0+0] // load |a| (or |b|) + asr x24, x12, #63 // |f0|'s sign as mask (or |g0|'s) + ldp x6, x7, [x1,#8*2+0] + eor x25, x12, x24 // conditionally negate |f0| (or |g0|) + + eor x4, x4, x24 // conditionally negate |a| (or |b|) + sub x25, x25, x24 + eor x5, x5, x24 + adds x4, x4, x24, lsr#63 + eor x6, x6, x24 + adcs x5, x5, xzr + eor x7, x7, x24 + umulh x19, x4, x25 + adcs x6, x6, xzr + umulh x20, x5, x25 + adc x7, x7, xzr + umulh x21, x6, x25 + and x24, x24, x25 + umulh x22, x7, x25 + neg x24, x24 + + mul x4, x4, x25 + mul x5, x5, x25 + mul x6, x6, x25 + adds x5, x5, x19 + mul x7, x7, x25 + adcs x6, x6, x20 + adcs x7, x7, x21 + adc x22, x22, x24 + ldp x8, x9, [x1,#8*0+32] // load |a| (or |b|) + asr x24, x13, #63 // |f0|'s sign as mask (or |g0|'s) + ldp x10, x11, [x1,#8*2+32] + eor x25, x13, x24 // conditionally negate |f0| (or |g0|) + + eor x8, x8, x24 // conditionally negate |a| (or |b|) + sub x25, x25, x24 + eor x9, x9, x24 + adds x8, x8, x24, lsr#63 + eor x10, x10, x24 + adcs x9, x9, xzr + eor x11, x11, x24 + umulh x19, x8, x25 + adcs x10, x10, xzr + umulh x20, x9, x25 + adc x11, x11, xzr + umulh x21, x10, x25 + and x24, x24, x25 + umulh x23, x11, x25 + neg x24, x24 + + mul x8, x8, x25 + mul x9, x9, x25 + mul x10, x10, x25 + adds x9, x9, x19 + mul x11, x11, x25 + adcs x10, x10, x20 + adcs x11, x11, x21 + adc x23, x23, x24 + adds x4, x4, x8 + adcs x5, x5, x9 + adcs x6, x6, x10 + adcs x7, x7, x11 + adc x8, x22, x23 + + extr x4, x5, x4, #31 + extr x5, x6, x5, #31 + extr x6, x7, x6, #31 + asr x23, x8, #63 // result's sign as mask + extr x7, x8, x7, #31 + + eor x4, x4, x23 // ensure the result is positive + eor x5, x5, x23 + adds x4, x4, x23, lsr#63 + eor x6, x6, x23 + adcs x5, x5, xzr + eor x7, x7, x23 + adcs x6, x6, xzr + stp x4, x5, [x0,#8*0] + adc x7, x7, xzr + stp x6, x7, [x0,#8*2] + + eor x12, x12, x23 // adjust |f/g| accordingly + eor x13, x13, x23 + sub x12, x12, x23 + sub x13, x13, x23 + + ret + ENDP + + ALIGN 16 +|__ab_approximation_31_256| PROC + ldp x6, x7, [x1,#8*2] + ldp x10, x11, [x1,#8*6] + ldp x4, x5, [x1,#8*0] + ldp x8, x9, [x1,#8*4] + +|$Lab_approximation_31_256_loaded| + orr x19, x7, x11 // check top-most limbs, ... + cmp x19, #0 + cselne x7,x7,x6 + cselne x11,x11,x10 + cselne x6,x6,x5 + orr x19, x7, x11 // and ones before top-most, ... + cselne x10,x10,x9 + + cmp x19, #0 + cselne x7,x7,x6 + cselne x11,x11,x10 + cselne x6,x6,x4 + orr x19, x7, x11 // and one more, ... + cselne x10,x10,x8 + + clz x19, x19 + cmp x19, #64 + cselne x19,x19,xzr + cselne x7,x7,x6 + cselne x11,x11,x10 + neg x20, x19 + + lslv x7, x7, x19 // align high limbs to the left + lslv x11, x11, x19 + lsrv x6, x6, x20 + lsrv x10, x10, x20 + and x6, x6, x20, asr#6 + and x10, x10, x20, asr#6 + orr x7, x7, x6 + orr x11, x11, x10 + + bfxil x7, x4, #0, #31 + bfxil x11, x8, #0, #31 + + b __inner_loop_31_256 + ret + ENDP + + + ALIGN 16 +|__inner_loop_31_256| PROC + mov x2, #31 + mov x13, #0x7FFFFFFF80000000 // |f0|=1, |g0|=0 + mov x15, #0x800000007FFFFFFF // |f1|=0, |g1|=1 + mov x23,#0x7FFFFFFF7FFFFFFF + +|$Loop_31_256| + sbfx x22, x7, #0, #1 // if |a_| is odd, then we'll be subtracting + sub x2, x2, #1 + and x19, x11, x22 + sub x20, x11, x7 // |b_|-|a_| + subs x21, x7, x19 // |a_|-|b_| (or |a_|-0 if |a_| was even) + mov x19, x15 + cselhs x11,x11,x7 + cselhs x7,x21,x20 + cselhs x15,x15,x13 + cselhs x13,x13,x19 + lsr x7, x7, #1 + and x19, x15, x22 + and x20, x23, x22 + sub x13, x13, x19 // |f0|-=|f1| (or |f0-=0| if |a_| was even) + add x15, x15, x15 // |f1|<<=1 + add x13, x13, x20 + sub x15, x15, x23 + cbnz x2, |$Loop_31_256| + + mov x23, #0x7FFFFFFF + ubfx x12, x13, #0, #32 + ubfx x13, x13, #32, #32 + ubfx x14, x15, #0, #32 + ubfx x15, x15, #32, #32 + sub x12, x12, x23 // remove bias + sub x13, x13, x23 + sub x14, x14, x23 + sub x15, x15, x23 + + ret + ENDP + + + ALIGN 16 +|__inner_loop_62_256| PROC + mov x12, #1 // |f0|=1 + mov x13, #0 // |g0|=0 + mov x14, #0 // |f1|=0 + mov x15, #1 // |g1|=1 + +|$Loop_62_256| + sbfx x22, x7, #0, #1 // if |a_| is odd, then we'll be subtracting + sub x2, x2, #1 + and x19, x11, x22 + sub x20, x11, x7 // |b_|-|a_| + subs x21, x7, x19 // |a_|-|b_| (or |a_|-0 if |a_| was even) + mov x19, x12 + cselhs x11,x11,x7 + cselhs x7,x21,x20 + mov x20, x13 + cselhs x12,x12,x14 + cselhs x14,x14,x19 + cselhs x13,x13,x15 + cselhs x15,x15,x20 + lsr x7, x7, #1 + and x19, x14, x22 + and x20, x15, x22 + add x14, x14, x14 // |f1|<<=1 + add x15, x15, x15 // |g1|<<=1 + sub x12, x12, x19 // |f0|-=|f1| (or |f0-=0| if |a_| was even) + sub x13, x13, x20 // |g0|-=|g1| (or |g0-=0| ...) + cbnz x2, |$Loop_62_256| + + ret + ENDP + END diff --git a/crypto/blst_src/build/win64/ct_inverse_mod_256-x86_64.asm b/crypto/blst_src/build/win64/ct_inverse_mod_256-x86_64.asm new file mode 100644 index 00000000000..5cd09a1d8f2 --- /dev/null +++ b/crypto/blst_src/build/win64/ct_inverse_mod_256-x86_64.asm @@ -0,0 +1,1220 @@ +OPTION DOTNAME +.text$ SEGMENT ALIGN(256) 'CODE' + +PUBLIC ct_inverse_mod_256 + + +ALIGN 32 +ct_inverse_mod_256 PROC PUBLIC + DB 243,15,30,250 + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov r11,rsp +$L$SEH_begin_ct_inverse_mod_256:: + + + push rbp + + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + push rbx + + push r12 + + push r13 + + push r14 + + push r15 + + sub rsp,1072 + +$L$SEH_body_ct_inverse_mod_256:: + + + lea rax,QWORD PTR[((48+511))+rsp] + and rax,-512 + mov QWORD PTR[32+rsp],rdi + mov QWORD PTR[40+rsp],rcx + + mov r8,QWORD PTR[rsi] + mov r9,QWORD PTR[8+rsi] + mov r10,QWORD PTR[16+rsi] + mov r11,QWORD PTR[24+rsi] + + mov r12,QWORD PTR[rdx] + mov r13,QWORD PTR[8+rdx] + mov r14,QWORD PTR[16+rdx] + mov r15,QWORD PTR[24+rdx] + + mov QWORD PTR[rax],r8 + mov QWORD PTR[8+rax],r9 + mov QWORD PTR[16+rax],r10 + mov QWORD PTR[24+rax],r11 + + mov QWORD PTR[32+rax],r12 + mov QWORD PTR[40+rax],r13 + mov QWORD PTR[48+rax],r14 + mov QWORD PTR[56+rax],r15 + mov rsi,rax + + + mov edx,31 + call __ab_approximation_31_256 + + + mov QWORD PTR[16+rsp],r12 + mov QWORD PTR[24+rsp],r13 + + mov rdi,256 + xor rdi,rsi + call __smulq_256_n_shift_by_31 + + + mov QWORD PTR[64+rdi],rdx + + mov rdx,QWORD PTR[16+rsp] + mov rcx,QWORD PTR[24+rsp] + lea rdi,QWORD PTR[32+rdi] + call __smulq_256_n_shift_by_31 + + + mov QWORD PTR[72+rdi],rdx + + + xor rsi,256 + mov edx,31 + call __ab_approximation_31_256 + + + mov QWORD PTR[16+rsp],r12 + mov QWORD PTR[24+rsp],r13 + + mov rdi,256 + xor rdi,rsi + call __smulq_256_n_shift_by_31 + mov QWORD PTR[rsp],rdx + mov QWORD PTR[8+rsp],rcx + + mov rdx,QWORD PTR[16+rsp] + mov rcx,QWORD PTR[24+rsp] + lea rdi,QWORD PTR[32+rdi] + call __smulq_256_n_shift_by_31 + + + + mov r8,QWORD PTR[64+rsi] + mov r12,QWORD PTR[104+rsi] + mov r9,r8 + imul r8,QWORD PTR[rsp] + mov r13,r12 + imul r12,QWORD PTR[8+rsp] + add r8,r12 + mov QWORD PTR[32+rdi],r8 + sar r8,63 + mov QWORD PTR[40+rdi],r8 + mov QWORD PTR[48+rdi],r8 + mov QWORD PTR[56+rdi],r8 + mov QWORD PTR[64+rdi],r8 + lea rsi,QWORD PTR[64+rsi] + + imul r9,rdx + imul r13,rcx + add r9,r13 + mov QWORD PTR[72+rdi],r9 + sar r9,63 + mov QWORD PTR[80+rdi],r9 + mov QWORD PTR[88+rdi],r9 + mov QWORD PTR[96+rdi],r9 + mov QWORD PTR[104+rdi],r9 + xor rsi,256+8*8 + mov edx,31 + call __ab_approximation_31_256 + + + mov QWORD PTR[16+rsp],r12 + mov QWORD PTR[24+rsp],r13 + + mov rdi,256 + xor rdi,rsi + call __smulq_256_n_shift_by_31 + mov QWORD PTR[rsp],rdx + mov QWORD PTR[8+rsp],rcx + + mov rdx,QWORD PTR[16+rsp] + mov rcx,QWORD PTR[24+rsp] + lea rdi,QWORD PTR[32+rdi] + call __smulq_256_n_shift_by_31 + mov QWORD PTR[16+rsp],rdx + mov QWORD PTR[24+rsp],rcx + + mov rdx,QWORD PTR[rsp] + mov rcx,QWORD PTR[8+rsp] + lea rsi,QWORD PTR[64+rsi] + lea rdi,QWORD PTR[32+rdi] + call __smulq_256x63 + + mov rdx,QWORD PTR[16+rsp] + mov rcx,QWORD PTR[24+rsp] + lea rdi,QWORD PTR[40+rdi] + call __smulq_256x63 + xor rsi,256+8*8 + mov edx,31 + call __ab_approximation_31_256 + + + mov QWORD PTR[16+rsp],r12 + mov QWORD PTR[24+rsp],r13 + + mov rdi,256 + xor rdi,rsi + call __smulq_256_n_shift_by_31 + mov QWORD PTR[rsp],rdx + mov QWORD PTR[8+rsp],rcx + + mov rdx,QWORD PTR[16+rsp] + mov rcx,QWORD PTR[24+rsp] + lea rdi,QWORD PTR[32+rdi] + call __smulq_256_n_shift_by_31 + mov QWORD PTR[16+rsp],rdx + mov QWORD PTR[24+rsp],rcx + + mov rdx,QWORD PTR[rsp] + mov rcx,QWORD PTR[8+rsp] + lea rsi,QWORD PTR[64+rsi] + lea rdi,QWORD PTR[32+rdi] + call __smulq_256x63 + + mov rdx,QWORD PTR[16+rsp] + mov rcx,QWORD PTR[24+rsp] + lea rdi,QWORD PTR[40+rdi] + call __smulq_256x63 + xor rsi,256+8*8 + mov edx,31 + call __ab_approximation_31_256 + + + mov QWORD PTR[16+rsp],r12 + mov QWORD PTR[24+rsp],r13 + + mov rdi,256 + xor rdi,rsi + call __smulq_256_n_shift_by_31 + mov QWORD PTR[rsp],rdx + mov QWORD PTR[8+rsp],rcx + + mov rdx,QWORD PTR[16+rsp] + mov rcx,QWORD PTR[24+rsp] + lea rdi,QWORD PTR[32+rdi] + call __smulq_256_n_shift_by_31 + mov QWORD PTR[16+rsp],rdx + mov QWORD PTR[24+rsp],rcx + + mov rdx,QWORD PTR[rsp] + mov rcx,QWORD PTR[8+rsp] + lea rsi,QWORD PTR[64+rsi] + lea rdi,QWORD PTR[32+rdi] + call __smulq_256x63 + + mov rdx,QWORD PTR[16+rsp] + mov rcx,QWORD PTR[24+rsp] + lea rdi,QWORD PTR[40+rdi] + call __smulq_256x63 + xor rsi,256+8*8 + mov edx,31 + call __ab_approximation_31_256 + + + mov QWORD PTR[16+rsp],r12 + mov QWORD PTR[24+rsp],r13 + + mov rdi,256 + xor rdi,rsi + call __smulq_256_n_shift_by_31 + mov QWORD PTR[rsp],rdx + mov QWORD PTR[8+rsp],rcx + + mov rdx,QWORD PTR[16+rsp] + mov rcx,QWORD PTR[24+rsp] + lea rdi,QWORD PTR[32+rdi] + call __smulq_256_n_shift_by_31 + mov QWORD PTR[16+rsp],rdx + mov QWORD PTR[24+rsp],rcx + + mov rdx,QWORD PTR[rsp] + mov rcx,QWORD PTR[8+rsp] + lea rsi,QWORD PTR[64+rsi] + lea rdi,QWORD PTR[32+rdi] + call __smulq_256x63 + + mov rdx,QWORD PTR[16+rsp] + mov rcx,QWORD PTR[24+rsp] + lea rdi,QWORD PTR[40+rdi] + call __smulq_256x63 + xor rsi,256+8*8 + mov edx,31 + call __ab_approximation_31_256 + + + mov QWORD PTR[16+rsp],r12 + mov QWORD PTR[24+rsp],r13 + + mov rdi,256 + xor rdi,rsi + call __smulq_256_n_shift_by_31 + mov QWORD PTR[rsp],rdx + mov QWORD PTR[8+rsp],rcx + + mov rdx,QWORD PTR[16+rsp] + mov rcx,QWORD PTR[24+rsp] + lea rdi,QWORD PTR[32+rdi] + call __smulq_256_n_shift_by_31 + mov QWORD PTR[16+rsp],rdx + mov QWORD PTR[24+rsp],rcx + + mov rdx,QWORD PTR[rsp] + mov rcx,QWORD PTR[8+rsp] + lea rsi,QWORD PTR[64+rsi] + lea rdi,QWORD PTR[32+rdi] + call __smulq_256x63 + + mov rdx,QWORD PTR[16+rsp] + mov rcx,QWORD PTR[24+rsp] + lea rdi,QWORD PTR[40+rdi] + call __smulq_256x63 + xor rsi,256+8*8 + mov edx,31 + call __ab_approximation_31_256 + + + mov QWORD PTR[16+rsp],r12 + mov QWORD PTR[24+rsp],r13 + + mov rdi,256 + xor rdi,rsi + call __smulq_256_n_shift_by_31 + mov QWORD PTR[rsp],rdx + mov QWORD PTR[8+rsp],rcx + + mov rdx,QWORD PTR[16+rsp] + mov rcx,QWORD PTR[24+rsp] + lea rdi,QWORD PTR[32+rdi] + call __smulq_256_n_shift_by_31 + mov QWORD PTR[16+rsp],rdx + mov QWORD PTR[24+rsp],rcx + + mov rdx,QWORD PTR[rsp] + mov rcx,QWORD PTR[8+rsp] + lea rsi,QWORD PTR[64+rsi] + lea rdi,QWORD PTR[32+rdi] + call __smulq_256x63 + + mov rdx,QWORD PTR[16+rsp] + mov rcx,QWORD PTR[24+rsp] + lea rdi,QWORD PTR[40+rdi] + call __smulq_256x63 + xor rsi,256+8*8 + mov edx,31 + call __ab_approximation_31_256 + + + mov QWORD PTR[16+rsp],r12 + mov QWORD PTR[24+rsp],r13 + + mov rdi,256 + xor rdi,rsi + call __smulq_256_n_shift_by_31 + mov QWORD PTR[rsp],rdx + mov QWORD PTR[8+rsp],rcx + + mov rdx,QWORD PTR[16+rsp] + mov rcx,QWORD PTR[24+rsp] + lea rdi,QWORD PTR[32+rdi] + call __smulq_256_n_shift_by_31 + mov QWORD PTR[16+rsp],rdx + mov QWORD PTR[24+rsp],rcx + + mov rdx,QWORD PTR[rsp] + mov rcx,QWORD PTR[8+rsp] + lea rsi,QWORD PTR[64+rsi] + lea rdi,QWORD PTR[32+rdi] + call __smulq_256x63 + + mov rdx,QWORD PTR[16+rsp] + mov rcx,QWORD PTR[24+rsp] + lea rdi,QWORD PTR[40+rdi] + call __smulq_256x63 + sar rbp,63 + mov QWORD PTR[40+rdi],rbp + mov QWORD PTR[48+rdi],rbp + mov QWORD PTR[56+rdi],rbp + xor rsi,256+8*8 + mov edx,31 + call __ab_approximation_31_256 + + + mov QWORD PTR[16+rsp],r12 + mov QWORD PTR[24+rsp],r13 + + mov rdi,256 + xor rdi,rsi + call __smulq_256_n_shift_by_31 + mov QWORD PTR[rsp],rdx + mov QWORD PTR[8+rsp],rcx + + mov rdx,QWORD PTR[16+rsp] + mov rcx,QWORD PTR[24+rsp] + lea rdi,QWORD PTR[32+rdi] + call __smulq_256_n_shift_by_31 + mov QWORD PTR[16+rsp],rdx + mov QWORD PTR[24+rsp],rcx + + mov rdx,QWORD PTR[rsp] + mov rcx,QWORD PTR[8+rsp] + lea rsi,QWORD PTR[64+rsi] + lea rdi,QWORD PTR[32+rdi] + call __smulq_256x63 + + mov rdx,QWORD PTR[16+rsp] + mov rcx,QWORD PTR[24+rsp] + lea rdi,QWORD PTR[40+rdi] + call __smulq_512x63 + xor rsi,256+8*8 + mov edx,31 + call __ab_approximation_31_256 + + + mov QWORD PTR[16+rsp],r12 + mov QWORD PTR[24+rsp],r13 + + mov rdi,256 + xor rdi,rsi + call __smulq_256_n_shift_by_31 + mov QWORD PTR[rsp],rdx + mov QWORD PTR[8+rsp],rcx + + mov rdx,QWORD PTR[16+rsp] + mov rcx,QWORD PTR[24+rsp] + lea rdi,QWORD PTR[32+rdi] + call __smulq_256_n_shift_by_31 + mov QWORD PTR[16+rsp],rdx + mov QWORD PTR[24+rsp],rcx + + mov rdx,QWORD PTR[rsp] + mov rcx,QWORD PTR[8+rsp] + lea rsi,QWORD PTR[64+rsi] + lea rdi,QWORD PTR[32+rdi] + call __smulq_256x63 + + mov rdx,QWORD PTR[16+rsp] + mov rcx,QWORD PTR[24+rsp] + lea rdi,QWORD PTR[40+rdi] + call __smulq_512x63 + xor rsi,256+8*8 + mov edx,31 + call __ab_approximation_31_256 + + + mov QWORD PTR[16+rsp],r12 + mov QWORD PTR[24+rsp],r13 + + mov rdi,256 + xor rdi,rsi + call __smulq_256_n_shift_by_31 + mov QWORD PTR[rsp],rdx + mov QWORD PTR[8+rsp],rcx + + mov rdx,QWORD PTR[16+rsp] + mov rcx,QWORD PTR[24+rsp] + lea rdi,QWORD PTR[32+rdi] + call __smulq_256_n_shift_by_31 + mov QWORD PTR[16+rsp],rdx + mov QWORD PTR[24+rsp],rcx + + mov rdx,QWORD PTR[rsp] + mov rcx,QWORD PTR[8+rsp] + lea rsi,QWORD PTR[64+rsi] + lea rdi,QWORD PTR[32+rdi] + call __smulq_256x63 + + mov rdx,QWORD PTR[16+rsp] + mov rcx,QWORD PTR[24+rsp] + lea rdi,QWORD PTR[40+rdi] + call __smulq_512x63 + xor rsi,256+8*8 + mov edx,31 + call __ab_approximation_31_256 + + + mov QWORD PTR[16+rsp],r12 + mov QWORD PTR[24+rsp],r13 + + mov rdi,256 + xor rdi,rsi + call __smulq_256_n_shift_by_31 + mov QWORD PTR[rsp],rdx + mov QWORD PTR[8+rsp],rcx + + mov rdx,QWORD PTR[16+rsp] + mov rcx,QWORD PTR[24+rsp] + lea rdi,QWORD PTR[32+rdi] + call __smulq_256_n_shift_by_31 + mov QWORD PTR[16+rsp],rdx + mov QWORD PTR[24+rsp],rcx + + mov rdx,QWORD PTR[rsp] + mov rcx,QWORD PTR[8+rsp] + lea rsi,QWORD PTR[64+rsi] + lea rdi,QWORD PTR[32+rdi] + call __smulq_256x63 + + mov rdx,QWORD PTR[16+rsp] + mov rcx,QWORD PTR[24+rsp] + lea rdi,QWORD PTR[40+rdi] + call __smulq_512x63 + xor rsi,256+8*8 + mov edx,31 + call __ab_approximation_31_256 + + + mov QWORD PTR[16+rsp],r12 + mov QWORD PTR[24+rsp],r13 + + mov rdi,256 + xor rdi,rsi + call __smulq_256_n_shift_by_31 + mov QWORD PTR[rsp],rdx + mov QWORD PTR[8+rsp],rcx + + mov rdx,QWORD PTR[16+rsp] + mov rcx,QWORD PTR[24+rsp] + lea rdi,QWORD PTR[32+rdi] + call __smulq_256_n_shift_by_31 + mov QWORD PTR[16+rsp],rdx + mov QWORD PTR[24+rsp],rcx + + mov rdx,QWORD PTR[rsp] + mov rcx,QWORD PTR[8+rsp] + lea rsi,QWORD PTR[64+rsi] + lea rdi,QWORD PTR[32+rdi] + call __smulq_256x63 + + mov rdx,QWORD PTR[16+rsp] + mov rcx,QWORD PTR[24+rsp] + lea rdi,QWORD PTR[40+rdi] + call __smulq_512x63 + xor rsi,256+8*8 + mov edx,31 + call __ab_approximation_31_256 + + + mov QWORD PTR[16+rsp],r12 + mov QWORD PTR[24+rsp],r13 + + mov rdi,256 + xor rdi,rsi + call __smulq_256_n_shift_by_31 + mov QWORD PTR[rsp],rdx + mov QWORD PTR[8+rsp],rcx + + mov rdx,QWORD PTR[16+rsp] + mov rcx,QWORD PTR[24+rsp] + lea rdi,QWORD PTR[32+rdi] + call __smulq_256_n_shift_by_31 + mov QWORD PTR[16+rsp],rdx + mov QWORD PTR[24+rsp],rcx + + mov rdx,QWORD PTR[rsp] + mov rcx,QWORD PTR[8+rsp] + lea rsi,QWORD PTR[64+rsi] + lea rdi,QWORD PTR[32+rdi] + call __smulq_256x63 + + mov rdx,QWORD PTR[16+rsp] + mov rcx,QWORD PTR[24+rsp] + lea rdi,QWORD PTR[40+rdi] + call __smulq_512x63 + + xor rsi,256+8*8 + mov edx,47 + + mov r8,QWORD PTR[rsi] + + mov r10,QWORD PTR[32+rsi] + + call __inner_loop_62_256 + + + + + + + + lea rsi,QWORD PTR[64+rsi] + + + + + + mov rdx,r12 + mov rcx,r13 + mov rdi,QWORD PTR[32+rsp] + call __smulq_512x63 + adc rdx,rbp + + mov rsi,QWORD PTR[40+rsp] + mov rax,rdx + sar rdx,63 + + mov r8,rdx + mov r9,rdx + and r8,QWORD PTR[rsi] + mov r10,rdx + and r9,QWORD PTR[8+rsi] + and r10,QWORD PTR[16+rsi] + and rdx,QWORD PTR[24+rsi] + + add r12,r8 + adc r13,r9 + adc r14,r10 + adc r15,rdx + adc rax,0 + + mov rdx,rax + neg rax + or rdx,rax + sar rax,63 + + mov r8,rdx + mov r9,rdx + and r8,QWORD PTR[rsi] + mov r10,rdx + and r9,QWORD PTR[8+rsi] + and r10,QWORD PTR[16+rsi] + and rdx,QWORD PTR[24+rsi] + + xor r8,rax + xor rcx,rcx + xor r9,rax + sub rcx,rax + xor r10,rax + xor rdx,rax + add r8,rcx + adc r9,0 + adc r10,0 + adc rdx,0 + + add r12,r8 + adc r13,r9 + adc r14,r10 + adc r15,rdx + + mov QWORD PTR[32+rdi],r12 + mov QWORD PTR[40+rdi],r13 + mov QWORD PTR[48+rdi],r14 + mov QWORD PTR[56+rdi],r15 + + lea r8,QWORD PTR[1072+rsp] + mov r15,QWORD PTR[r8] + + mov r14,QWORD PTR[8+r8] + + mov r13,QWORD PTR[16+r8] + + mov r12,QWORD PTR[24+r8] + + mov rbx,QWORD PTR[32+r8] + + mov rbp,QWORD PTR[40+r8] + + lea rsp,QWORD PTR[48+r8] + +$L$SEH_epilogue_ct_inverse_mod_256:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + + DB 0F3h,0C3h ;repret + +$L$SEH_end_ct_inverse_mod_256:: +ct_inverse_mod_256 ENDP + +ALIGN 32 +__smulq_512x63 PROC PRIVATE + DB 243,15,30,250 + + mov r8,QWORD PTR[rsi] + mov r9,QWORD PTR[8+rsi] + mov r10,QWORD PTR[16+rsi] + mov r11,QWORD PTR[24+rsi] + mov rbp,QWORD PTR[32+rsi] + + mov rbx,rdx + sar rdx,63 + xor rax,rax + sub rax,rdx + + xor rbx,rdx + add rbx,rax + + xor r8,rdx + xor r9,rdx + xor r10,rdx + xor r11,rdx + xor rbp,rdx + add rax,r8 + adc r9,0 + adc r10,0 + adc r11,0 + adc rbp,0 + + mul rbx + mov QWORD PTR[rdi],rax + mov rax,r9 + mov r9,rdx + mul rbx + add r9,rax + mov rax,r10 + adc rdx,0 + mov QWORD PTR[8+rdi],r9 + mov r10,rdx + mul rbx + add r10,rax + mov rax,r11 + adc rdx,0 + mov QWORD PTR[16+rdi],r10 + mov r11,rdx + and rbp,rbx + neg rbp + mul rbx + add r11,rax + adc rbp,rdx + mov QWORD PTR[24+rdi],r11 + + mov r8,QWORD PTR[40+rsi] + mov r9,QWORD PTR[48+rsi] + mov r10,QWORD PTR[56+rsi] + mov r11,QWORD PTR[64+rsi] + mov r12,QWORD PTR[72+rsi] + mov r13,QWORD PTR[80+rsi] + mov r14,QWORD PTR[88+rsi] + mov r15,QWORD PTR[96+rsi] + + mov rdx,rcx + sar rdx,63 + xor rax,rax + sub rax,rdx + + xor rcx,rdx + add rcx,rax + + xor r8,rdx + xor r9,rdx + xor r10,rdx + xor r11,rdx + xor r12,rdx + xor r13,rdx + xor r14,rdx + xor r15,rdx + add rax,r8 + adc r9,0 + adc r10,0 + adc r11,0 + adc r12,0 + adc r13,0 + adc r14,0 + adc r15,0 + + mul rcx + mov r8,rax + mov rax,r9 + mov r9,rdx + mul rcx + add r9,rax + mov rax,r10 + adc rdx,0 + mov r10,rdx + mul rcx + add r10,rax + mov rax,r11 + adc rdx,0 + mov r11,rdx + mul rcx + add r11,rax + mov rax,r12 + adc rdx,0 + mov r12,rdx + mul rcx + add r12,rax + mov rax,r13 + adc rdx,0 + mov r13,rdx + mul rcx + add r13,rax + mov rax,r14 + adc rdx,0 + mov r14,rdx + mul rcx + add r14,rax + mov rax,r15 + adc rdx,0 + mov r15,rdx + imul rcx + add r15,rax + adc rdx,0 + + mov rbx,rbp + sar rbp,63 + + add r8,QWORD PTR[rdi] + adc r9,QWORD PTR[8+rdi] + adc r10,QWORD PTR[16+rdi] + adc r11,QWORD PTR[24+rdi] + adc r12,rbx + adc r13,rbp + adc r14,rbp + adc r15,rbp + + mov QWORD PTR[rdi],r8 + mov QWORD PTR[8+rdi],r9 + mov QWORD PTR[16+rdi],r10 + mov QWORD PTR[24+rdi],r11 + mov QWORD PTR[32+rdi],r12 + mov QWORD PTR[40+rdi],r13 + mov QWORD PTR[48+rdi],r14 + mov QWORD PTR[56+rdi],r15 + + DB 0F3h,0C3h ;repret +__smulq_512x63 ENDP + + +ALIGN 32 +__smulq_256x63 PROC PRIVATE + DB 243,15,30,250 + + mov r8,QWORD PTR[((0+0))+rsi] + mov r9,QWORD PTR[((0+8))+rsi] + mov r10,QWORD PTR[((0+16))+rsi] + mov r11,QWORD PTR[((0+24))+rsi] + mov rbp,QWORD PTR[((0+32))+rsi] + + mov rbx,rdx + sar rdx,63 + xor rax,rax + sub rax,rdx + + xor rbx,rdx + add rbx,rax + + xor r8,rdx + xor r9,rdx + xor r10,rdx + xor r11,rdx + xor rbp,rdx + add rax,r8 + adc r9,0 + adc r10,0 + adc r11,0 + adc rbp,0 + + mul rbx + mov r8,rax + mov rax,r9 + mov r9,rdx + mul rbx + add r9,rax + mov rax,r10 + adc rdx,0 + mov r10,rdx + mul rbx + add r10,rax + mov rax,r11 + adc rdx,0 + mov r11,rdx + and rbp,rbx + neg rbp + mul rbx + add r11,rax + adc rbp,rdx + mov rdx,rcx + mov r12,QWORD PTR[((40+0))+rsi] + mov r13,QWORD PTR[((40+8))+rsi] + mov r14,QWORD PTR[((40+16))+rsi] + mov r15,QWORD PTR[((40+24))+rsi] + mov rcx,QWORD PTR[((40+32))+rsi] + + mov rbx,rdx + sar rdx,63 + xor rax,rax + sub rax,rdx + + xor rbx,rdx + add rbx,rax + + xor r12,rdx + xor r13,rdx + xor r14,rdx + xor r15,rdx + xor rcx,rdx + add rax,r12 + adc r13,0 + adc r14,0 + adc r15,0 + adc rcx,0 + + mul rbx + mov r12,rax + mov rax,r13 + mov r13,rdx + mul rbx + add r13,rax + mov rax,r14 + adc rdx,0 + mov r14,rdx + mul rbx + add r14,rax + mov rax,r15 + adc rdx,0 + mov r15,rdx + and rcx,rbx + neg rcx + mul rbx + add r15,rax + adc rcx,rdx + add r8,r12 + adc r9,r13 + adc r10,r14 + adc r11,r15 + adc rbp,rcx + + mov QWORD PTR[rdi],r8 + mov QWORD PTR[8+rdi],r9 + mov QWORD PTR[16+rdi],r10 + mov QWORD PTR[24+rdi],r11 + mov QWORD PTR[32+rdi],rbp + + DB 0F3h,0C3h ;repret +__smulq_256x63 ENDP + +ALIGN 32 +__smulq_256_n_shift_by_31 PROC PRIVATE + DB 243,15,30,250 + + mov QWORD PTR[rdi],rdx + mov QWORD PTR[8+rdi],rcx + mov rbp,rdx + mov r8,QWORD PTR[((0+0))+rsi] + mov r9,QWORD PTR[((0+8))+rsi] + mov r10,QWORD PTR[((0+16))+rsi] + mov r11,QWORD PTR[((0+24))+rsi] + + mov rbx,rbp + sar rbp,63 + xor rax,rax + sub rax,rbp + + xor rbx,rbp + add rbx,rax + + xor r8,rbp + xor r9,rbp + xor r10,rbp + xor r11,rbp + add rax,r8 + adc r9,0 + adc r10,0 + adc r11,0 + + mul rbx + mov r8,rax + mov rax,r9 + and rbp,rbx + neg rbp + mov r9,rdx + mul rbx + add r9,rax + mov rax,r10 + adc rdx,0 + mov r10,rdx + mul rbx + add r10,rax + mov rax,r11 + adc rdx,0 + mov r11,rdx + mul rbx + add r11,rax + adc rbp,rdx + mov r12,QWORD PTR[((32+0))+rsi] + mov r13,QWORD PTR[((32+8))+rsi] + mov r14,QWORD PTR[((32+16))+rsi] + mov r15,QWORD PTR[((32+24))+rsi] + + mov rbx,rcx + sar rcx,63 + xor rax,rax + sub rax,rcx + + xor rbx,rcx + add rbx,rax + + xor r12,rcx + xor r13,rcx + xor r14,rcx + xor r15,rcx + add rax,r12 + adc r13,0 + adc r14,0 + adc r15,0 + + mul rbx + mov r12,rax + mov rax,r13 + and rcx,rbx + neg rcx + mov r13,rdx + mul rbx + add r13,rax + mov rax,r14 + adc rdx,0 + mov r14,rdx + mul rbx + add r14,rax + mov rax,r15 + adc rdx,0 + mov r15,rdx + mul rbx + add r15,rax + adc rcx,rdx + add r8,r12 + adc r9,r13 + adc r10,r14 + adc r11,r15 + adc rbp,rcx + + mov rdx,QWORD PTR[rdi] + mov rcx,QWORD PTR[8+rdi] + + shrd r8,r9,31 + shrd r9,r10,31 + shrd r10,r11,31 + shrd r11,rbp,31 + + sar rbp,63 + xor rax,rax + sub rax,rbp + + xor r8,rbp + xor r9,rbp + xor r10,rbp + xor r11,rbp + add r8,rax + adc r9,0 + adc r10,0 + adc r11,0 + + mov QWORD PTR[rdi],r8 + mov QWORD PTR[8+rdi],r9 + mov QWORD PTR[16+rdi],r10 + mov QWORD PTR[24+rdi],r11 + + xor rdx,rbp + xor rcx,rbp + add rdx,rax + add rcx,rax + + DB 0F3h,0C3h ;repret +__smulq_256_n_shift_by_31 ENDP + +ALIGN 32 +__ab_approximation_31_256 PROC PRIVATE + DB 243,15,30,250 + + mov r9,QWORD PTR[24+rsi] + mov r11,QWORD PTR[56+rsi] + mov rbx,QWORD PTR[16+rsi] + mov rbp,QWORD PTR[48+rsi] + mov r8,QWORD PTR[8+rsi] + mov r10,QWORD PTR[40+rsi] + + mov rax,r9 + or rax,r11 + cmovz r9,rbx + cmovz r11,rbp + cmovz rbx,r8 + mov r8,QWORD PTR[rsi] + cmovz rbp,r10 + mov r10,QWORD PTR[32+rsi] + + mov rax,r9 + or rax,r11 + cmovz r9,rbx + cmovz r11,rbp + cmovz rbx,r8 + cmovz rbp,r10 + + mov rax,r9 + or rax,r11 + bsr rcx,rax + lea rcx,QWORD PTR[1+rcx] + cmovz r9,r8 + cmovz r11,r10 + cmovz rcx,rax + neg rcx + + + shld r9,rbx,cl + shld r11,rbp,cl + + mov eax,07FFFFFFFh + and r8,rax + and r10,rax + not rax + and r9,rax + and r11,rax + or r8,r9 + or r10,r11 + + jmp __inner_loop_31_256 + + DB 0F3h,0C3h ;repret +__ab_approximation_31_256 ENDP + +ALIGN 32 +__inner_loop_31_256 PROC PRIVATE + DB 243,15,30,250 + + mov rcx,07FFFFFFF80000000h + mov r13,0800000007FFFFFFFh + mov r15,07FFFFFFF7FFFFFFFh + +$L$oop_31_256:: + cmp r8,r10 + mov rax,r8 + mov rbx,r10 + mov rbp,rcx + mov r14,r13 + cmovb r8,r10 + cmovb r10,rax + cmovb rcx,r13 + cmovb r13,rbp + + sub r8,r10 + sub rcx,r13 + add rcx,r15 + + test rax,1 + cmovz r8,rax + cmovz r10,rbx + cmovz rcx,rbp + cmovz r13,r14 + + shr r8,1 + add r13,r13 + sub r13,r15 + sub edx,1 + jnz $L$oop_31_256 + + shr r15,32 + mov edx,ecx + mov r12d,r13d + shr rcx,32 + shr r13,32 + sub rdx,r15 + sub rcx,r15 + sub r12,r15 + sub r13,r15 + + DB 0F3h,0C3h ;repret +__inner_loop_31_256 ENDP + + +ALIGN 32 +__inner_loop_62_256 PROC PRIVATE + DB 243,15,30,250 + + mov r15d,edx + mov rdx,1 + xor rcx,rcx + xor r12,r12 + mov r13,rdx + mov r14,rdx + +$L$oop_62_256:: + xor rax,rax + test r8,r14 + mov rbx,r10 + cmovnz rax,r10 + sub rbx,r8 + mov rbp,r8 + sub r8,rax + cmovc r8,rbx + cmovc r10,rbp + mov rax,rdx + cmovc rdx,r12 + cmovc r12,rax + mov rbx,rcx + cmovc rcx,r13 + cmovc r13,rbx + xor rax,rax + xor rbx,rbx + shr r8,1 + test rbp,r14 + cmovnz rax,r12 + cmovnz rbx,r13 + add r12,r12 + add r13,r13 + sub rdx,rax + sub rcx,rbx + sub r15d,1 + jnz $L$oop_62_256 + + DB 0F3h,0C3h ;repret +__inner_loop_62_256 ENDP +.text$ ENDS +.pdata SEGMENT READONLY ALIGN(4) +ALIGN 4 + DD imagerel $L$SEH_begin_ct_inverse_mod_256 + DD imagerel $L$SEH_body_ct_inverse_mod_256 + DD imagerel $L$SEH_info_ct_inverse_mod_256_prologue + + DD imagerel $L$SEH_body_ct_inverse_mod_256 + DD imagerel $L$SEH_epilogue_ct_inverse_mod_256 + DD imagerel $L$SEH_info_ct_inverse_mod_256_body + + DD imagerel $L$SEH_epilogue_ct_inverse_mod_256 + DD imagerel $L$SEH_end_ct_inverse_mod_256 + DD imagerel $L$SEH_info_ct_inverse_mod_256_epilogue + +.pdata ENDS +.xdata SEGMENT READONLY ALIGN(8) +ALIGN 8 +$L$SEH_info_ct_inverse_mod_256_prologue:: +DB 1,0,5,00bh +DB 0,074h,1,0 +DB 0,064h,2,0 +DB 0,0b3h +DB 0,0 + DD 0,0 +$L$SEH_info_ct_inverse_mod_256_body:: +DB 1,0,18,0 +DB 000h,0f4h,086h,000h +DB 000h,0e4h,087h,000h +DB 000h,0d4h,088h,000h +DB 000h,0c4h,089h,000h +DB 000h,034h,08ah,000h +DB 000h,054h,08bh,000h +DB 000h,074h,08dh,000h +DB 000h,064h,08eh,000h +DB 000h,001h,08ch,000h +DB 000h,000h,000h,000h +DB 000h,000h,000h,000h +$L$SEH_info_ct_inverse_mod_256_epilogue:: +DB 1,0,4,0 +DB 000h,074h,001h,000h +DB 000h,064h,002h,000h +DB 000h,000h,000h,000h + + +.xdata ENDS +END diff --git a/crypto/blst_src/build/win64/ct_inverse_mod_384-armv8.asm b/crypto/blst_src/build/win64/ct_inverse_mod_384-armv8.asm new file mode 100644 index 00000000000..311ce7638ce --- /dev/null +++ b/crypto/blst_src/build/win64/ct_inverse_mod_384-armv8.asm @@ -0,0 +1,719 @@ + AREA |.text|,CODE,ALIGN=8,ARM64 + + + + EXPORT |ct_inverse_mod_383|[FUNC] + ALIGN 32 +|ct_inverse_mod_383| PROC + DCDU 3573752639 + stp x29, x30, [sp,#-128]! + add x29, sp, #0 + stp x19, x20, [sp,#16] + stp x21, x22, [sp,#32] + stp x23, x24, [sp,#48] + stp x25, x26, [sp,#64] + stp x27, x28, [sp,#80] + sub sp, sp, #1040 + + ldp x22, x4, [x1,#8*0] + ldp x5, x6, [x1,#8*2] + ldp x7, x8, [x1,#8*4] + + add x1, sp, #16+511 // find closest 512-byte-aligned spot + and x1, x1, #-512 // in the frame... + stp x0, x3, [sp] + + ldp x9, x10, [x2,#8*0] + ldp x11, x12, [x2,#8*2] + ldp x13, x14, [x2,#8*4] + + stp x22, x4, [x1,#8*0] // copy input to |a| + stp x5, x6, [x1,#8*2] + stp x7, x8, [x1,#8*4] + stp x9, x10, [x1,#8*6] // copy modulus to |b| + stp x11, x12, [x1,#8*8] + stp x13, x14, [x1,#8*10] + + ////////////////////////////////////////// first iteration + mov x2, #62 + bl |$Lab_approximation_62_loaded| + + eor x0, x1, #256 // pointer to dst |a|b|u|v| + bl __smul_383_n_shift_by_62 + str x15,[x0,#8*12] // initialize |u| with |f0| + + mov x15, x17 // |f1| + mov x16, x19 // |g1| + add x0, x0, #8*6 // pointer to dst |b| + bl __smul_383_n_shift_by_62 + str x15, [x0,#8*12] // initialize |v| with |f1| + + ////////////////////////////////////////// second iteration + eor x1, x1, #256 // flip-flop src |a|b|u|v| + mov x2, #62 + bl __ab_approximation_62 + + eor x0, x1, #256 // pointer to dst |a|b|u|v| + bl __smul_383_n_shift_by_62 + mov x20, x15 // corrected |f0| + mov x21, x16 // corrected |g0| + + mov x15, x17 // |f1| + mov x16, x19 // |g1| + add x0, x0, #8*6 // pointer to destination |b| + bl __smul_383_n_shift_by_62 + + ldr x7, [x1,#8*12] // |u| + ldr x8, [x1,#8*18] // |v| + mul x3, x20, x7 // |u|*|f0| + smulh x4, x20, x7 + mul x5, x21, x8 // |v|*|g0| + smulh x6, x21, x8 + adds x3, x3, x5 + adc x4, x4, x6 + stp x3, x4, [x0,#8*6] + asr x5, x4, #63 // sign extension + stp x5, x5, [x0,#8*8] + stp x5, x5, [x0,#8*10] + + mul x3, x15, x7 // |u|*|f1| + smulh x4, x15, x7 + mul x5, x16, x8 // |v|*|g1| + smulh x6, x16, x8 + adds x3, x3, x5 + adc x4, x4, x6 + stp x3, x4, [x0,#8*12] + asr x5, x4, #63 // sign extension + stp x5, x5, [x0,#8*14] + stp x5, x5, [x0,#8*16] + eor x1, x1, #256 // flip-flop src |a|b|u|v| + mov x2, #62 + bl __ab_approximation_62 + + eor x0, x1, #256 // pointer to dst |a|b|u|v| + bl __smul_383_n_shift_by_62 + mov x20, x15 // corrected |f0| + mov x21, x16 // corrected |g0| + + mov x15, x17 // |f1| + mov x16, x19 // |g1| + add x0, x0, #8*6 // pointer to destination |b| + bl __smul_383_n_shift_by_62 + + add x0, x0, #8*6 // pointer to destination |u| + bl __smul_383x63 + + mov x20, x15 // corrected |f1| + mov x21, x16 // corrected |g1| + add x0, x0, #8*6 // pointer to destination |v| + bl __smul_383x63 + eor x1, x1, #256 // flip-flop src |a|b|u|v| + mov x2, #62 + bl __ab_approximation_62 + + eor x0, x1, #256 // pointer to dst |a|b|u|v| + bl __smul_383_n_shift_by_62 + mov x20, x15 // corrected |f0| + mov x21, x16 // corrected |g0| + + mov x15, x17 // |f1| + mov x16, x19 // |g1| + add x0, x0, #8*6 // pointer to destination |b| + bl __smul_383_n_shift_by_62 + + add x0, x0, #8*6 // pointer to destination |u| + bl __smul_383x63 + + mov x20, x15 // corrected |f1| + mov x21, x16 // corrected |g1| + add x0, x0, #8*6 // pointer to destination |v| + bl __smul_383x63 + eor x1, x1, #256 // flip-flop src |a|b|u|v| + mov x2, #62 + bl __ab_approximation_62 + + eor x0, x1, #256 // pointer to dst |a|b|u|v| + bl __smul_383_n_shift_by_62 + mov x20, x15 // corrected |f0| + mov x21, x16 // corrected |g0| + + mov x15, x17 // |f1| + mov x16, x19 // |g1| + add x0, x0, #8*6 // pointer to destination |b| + bl __smul_383_n_shift_by_62 + + add x0, x0, #8*6 // pointer to destination |u| + bl __smul_383x63 + + mov x20, x15 // corrected |f1| + mov x21, x16 // corrected |g1| + add x0, x0, #8*6 // pointer to destination |v| + bl __smul_383x63 + eor x1, x1, #256 // flip-flop src |a|b|u|v| + mov x2, #62 + bl __ab_approximation_62 + + eor x0, x1, #256 // pointer to dst |a|b|u|v| + bl __smul_383_n_shift_by_62 + mov x20, x15 // corrected |f0| + mov x21, x16 // corrected |g0| + + mov x15, x17 // |f1| + mov x16, x19 // |g1| + add x0, x0, #8*6 // pointer to destination |b| + bl __smul_383_n_shift_by_62 + + add x0, x0, #8*6 // pointer to destination |u| + bl __smul_383x63 + + mov x20, x15 // corrected |f1| + mov x21, x16 // corrected |g1| + add x0, x0, #8*6 // pointer to destination |v| + bl __smul_383x63 + asr x27, x27, #63 // sign extension + stp x27, x27, [x0,#8*6] + stp x27, x27, [x0,#8*8] + stp x27, x27, [x0,#8*10] + eor x1, x1, #256 // flip-flop src |a|b|u|v| + mov x2, #62 + bl __ab_approximation_62 + + eor x0, x1, #256 // pointer to dst |a|b|u|v| + bl __smul_383_n_shift_by_62 + mov x20, x15 // corrected |f0| + mov x21, x16 // corrected |g0| + + mov x15, x17 // |f1| + mov x16, x19 // |g1| + add x0, x0, #8*6 // pointer to destination |b| + bl __smul_383_n_shift_by_62 + + add x0, x0, #8*6 // pointer to destination |u| + bl __smul_383x63 + + mov x20, x15 // corrected |f1| + mov x21, x16 // corrected |g1| + add x0, x0, #8*6 // pointer to destination |v| + bl __smul_383x63 + bl __smul_767x63_tail + eor x1, x1, #256 // flip-flop src |a|b|u|v| + mov x2, #62 + bl __ab_approximation_62 + + eor x0, x1, #256 // pointer to dst |a|b|u|v| + bl __smul_383_n_shift_by_62 + mov x20, x15 // corrected |f0| + mov x21, x16 // corrected |g0| + + mov x15, x17 // |f1| + mov x16, x19 // |g1| + add x0, x0, #8*6 // pointer to destination |b| + bl __smul_383_n_shift_by_62 + + add x0, x0, #8*6 // pointer to destination |u| + bl __smul_383x63 + + mov x20, x15 // corrected |f1| + mov x21, x16 // corrected |g1| + add x0, x0, #8*6 // pointer to destination |v| + bl __smul_383x63 + bl __smul_767x63_tail + eor x1, x1, #256 // flip-flop src |a|b|u|v| + mov x2, #62 + bl __ab_approximation_62 + + eor x0, x1, #256 // pointer to dst |a|b|u|v| + bl __smul_383_n_shift_by_62 + mov x20, x15 // corrected |f0| + mov x21, x16 // corrected |g0| + + mov x15, x17 // |f1| + mov x16, x19 // |g1| + add x0, x0, #8*6 // pointer to destination |b| + bl __smul_383_n_shift_by_62 + + add x0, x0, #8*6 // pointer to destination |u| + bl __smul_383x63 + + mov x20, x15 // corrected |f1| + mov x21, x16 // corrected |g1| + add x0, x0, #8*6 // pointer to destination |v| + bl __smul_383x63 + bl __smul_767x63_tail + eor x1, x1, #256 // flip-flop src |a|b|u|v| + mov x2, #62 + bl __ab_approximation_62 + + eor x0, x1, #256 // pointer to dst |a|b|u|v| + bl __smul_383_n_shift_by_62 + mov x20, x15 // corrected |f0| + mov x21, x16 // corrected |g0| + + mov x15, x17 // |f1| + mov x16, x19 // |g1| + add x0, x0, #8*6 // pointer to destination |b| + bl __smul_383_n_shift_by_62 + + add x0, x0, #8*6 // pointer to destination |u| + bl __smul_383x63 + + mov x20, x15 // corrected |f1| + mov x21, x16 // corrected |g1| + add x0, x0, #8*6 // pointer to destination |v| + bl __smul_383x63 + bl __smul_767x63_tail + eor x1, x1, #256 // flip-flop src |a|b|u|v| + mov x2, #62 + bl __ab_approximation_62 + + eor x0, x1, #256 // pointer to dst |a|b|u|v| + bl __smul_383_n_shift_by_62 + mov x20, x15 // corrected |f0| + mov x21, x16 // corrected |g0| + + mov x15, x17 // |f1| + mov x16, x19 // |g1| + add x0, x0, #8*6 // pointer to destination |b| + bl __smul_383_n_shift_by_62 + + add x0, x0, #8*6 // pointer to destination |u| + bl __smul_383x63 + + mov x20, x15 // corrected |f1| + mov x21, x16 // corrected |g1| + add x0, x0, #8*6 // pointer to destination |v| + bl __smul_383x63 + bl __smul_767x63_tail + ////////////////////////////////////////// iteration before last + eor x1, x1, #256 // flip-flop src |a|b|u|v| + mov x2, #62 + //bl __ab_approximation_62 // |a| and |b| are exact, + ldp x3, x8, [x1,#8*0] // just load + ldp x9, x14, [x1,#8*6] + bl __inner_loop_62 + + eor x0, x1, #256 // pointer to dst |a|b|u|v| + str x3, [x0,#8*0] + str x9, [x0,#8*6] + + mov x20, x15 // exact |f0| + mov x21, x16 // exact |g0| + mov x15, x17 + mov x16, x19 + add x0, x0, #8*12 // pointer to dst |u| + bl __smul_383x63 + + mov x20, x15 // exact |f1| + mov x21, x16 // exact |g1| + add x0, x0, #8*6 // pointer to dst |v| + bl __smul_383x63 + bl __smul_767x63_tail + + ////////////////////////////////////////// last iteration + eor x1, x1, #256 // flip-flop src |a|b|u|v| + mov x2, #22 // 766 % 62 + //bl __ab_approximation_62 // |a| and |b| are exact, + ldr x3, [x1,#8*0] // just load + eor x8, x8, x8 + ldr x9, [x1,#8*6] + eor x14, x14, x14 + bl __inner_loop_62 + + mov x20, x17 + mov x21, x19 + ldp x0, x15, [sp] // original out_ptr and n_ptr + bl __smul_383x63 + bl __smul_767x63_tail + ldr x30, [x29,#8] + + asr x22, x8, #63 // sign as mask + ldp x9, x10, [x15,#8*0] + ldp x11, x12, [x15,#8*2] + ldp x13, x14, [x15,#8*4] + + and x9, x9, x22 // add mod<<384 conditionally + and x10, x10, x22 + adds x3, x3, x9 + and x11, x11, x22 + adcs x4, x4, x10 + and x12, x12, x22 + adcs x5, x5, x11 + and x13, x13, x22 + adcs x6, x6, x12 + and x14, x14, x22 + stp x3, x4, [x0,#8*6] + adcs x7, x7, x13 + stp x5, x6, [x0,#8*8] + adc x8, x8, x14 + stp x7, x8, [x0,#8*10] + + add sp, sp, #1040 + ldp x19, x20, [x29,#16] + ldp x21, x22, [x29,#32] + ldp x23, x24, [x29,#48] + ldp x25, x26, [x29,#64] + ldp x27, x28, [x29,#80] + ldr x29, [sp],#128 + DCDU 3573752767 + ret + ENDP + +//////////////////////////////////////////////////////////////////////// +// see corresponding commentary in ctx_inverse_mod_384-x86_64... + + ALIGN 32 +|__smul_383x63| PROC + ldp x3, x4, [x1,#8*0+96] // load |u| (or |v|) + asr x17, x20, #63 // |f_|'s sign as mask (or |g_|'s) + ldp x5, x6, [x1,#8*2+96] + eor x20, x20, x17 // conditionally negate |f_| (or |g_|) + ldp x7, x8, [x1,#8*4+96] + + eor x3, x3, x17 // conditionally negate |u| (or |v|) + sub x20, x20, x17 + eor x4, x4, x17 + adds x3, x3, x17, lsr#63 + eor x5, x5, x17 + adcs x4, x4, xzr + eor x6, x6, x17 + adcs x5, x5, xzr + eor x7, x7, x17 + adcs x6, x6, xzr + umulh x22, x3, x20 + eor x8, x8, x17 + umulh x23, x4, x20 + adcs x7, x7, xzr + umulh x24, x5, x20 + adcs x8, x8, xzr + umulh x25, x6, x20 + umulh x26, x7, x20 + mul x3, x3, x20 + mul x4, x4, x20 + mul x5, x5, x20 + adds x4, x4, x22 + mul x6, x6, x20 + adcs x5, x5, x23 + mul x7, x7, x20 + adcs x6, x6, x24 + mul x27,x8, x20 + adcs x7, x7, x25 + adcs x27,x27,x26 + adc x2, xzr, xzr + ldp x9, x10, [x1,#8*0+144] // load |u| (or |v|) + asr x17, x21, #63 // |f_|'s sign as mask (or |g_|'s) + ldp x11, x12, [x1,#8*2+144] + eor x21, x21, x17 // conditionally negate |f_| (or |g_|) + ldp x13, x14, [x1,#8*4+144] + + eor x9, x9, x17 // conditionally negate |u| (or |v|) + sub x21, x21, x17 + eor x10, x10, x17 + adds x9, x9, x17, lsr#63 + eor x11, x11, x17 + adcs x10, x10, xzr + eor x12, x12, x17 + adcs x11, x11, xzr + eor x13, x13, x17 + adcs x12, x12, xzr + umulh x22, x9, x21 + eor x14, x14, x17 + umulh x23, x10, x21 + adcs x13, x13, xzr + umulh x24, x11, x21 + adcs x14, x14, xzr + umulh x25, x12, x21 + adc x19, xzr, xzr // used in __smul_767x63_tail + umulh x26, x13, x21 + mul x9, x9, x21 + mul x10, x10, x21 + mul x11, x11, x21 + adds x10, x10, x22 + mul x12, x12, x21 + adcs x11, x11, x23 + mul x13, x13, x21 + adcs x12, x12, x24 + mul x28,x14, x21 + adcs x13, x13, x25 + adcs x28,x28,x26 + adc x2, x2, xzr + + adds x3, x3, x9 + adcs x4, x4, x10 + adcs x5, x5, x11 + adcs x6, x6, x12 + stp x3, x4, [x0,#8*0] + adcs x7, x7, x13 + stp x5, x6, [x0,#8*2] + adcs x27, x27, x28 + stp x7, x27, [x0,#8*4] + adc x28, x2, xzr // used in __smul_767x63_tail + + ret + ENDP + + + ALIGN 32 +|__smul_767x63_tail| PROC + smulh x27, x8, x20 + ldp x3, x4, [x1,#8*24] // load rest of |v| + umulh x14,x14, x21 + ldp x5, x6, [x1,#8*26] + ldp x7, x8, [x1,#8*28] + + eor x3, x3, x17 // conditionally negate rest of |v| + eor x4, x4, x17 + eor x5, x5, x17 + adds x3, x3, x19 + eor x6, x6, x17 + adcs x4, x4, xzr + eor x7, x7, x17 + adcs x5, x5, xzr + eor x8, x8, x17 + adcs x6, x6, xzr + umulh x22, x3, x21 + adcs x7, x7, xzr + umulh x23, x4, x21 + adc x8, x8, xzr + + umulh x24, x5, x21 + add x14, x14, x28 + umulh x25, x6, x21 + asr x28, x27, #63 + umulh x26, x7, x21 + mul x3, x3, x21 + mul x4, x4, x21 + mul x5, x5, x21 + adds x3, x3, x14 + mul x6, x6, x21 + adcs x4, x4, x22 + mul x7, x7, x21 + adcs x5, x5, x23 + mul x8, x8, x21 + adcs x6, x6, x24 + adcs x7, x7, x25 + adc x8, x8, x26 + + adds x3, x3, x27 + adcs x4, x4, x28 + adcs x5, x5, x28 + adcs x6, x6, x28 + stp x3, x4, [x0,#8*6] + adcs x7, x7, x28 + stp x5, x6, [x0,#8*8] + adc x8, x8, x28 + stp x7, x8, [x0,#8*10] + + ret + ENDP + + + ALIGN 32 +|__smul_383_n_shift_by_62| PROC + ldp x3, x4, [x1,#8*0+0] // load |a| (or |b|) + asr x28, x15, #63 // |f0|'s sign as mask (or |g0|'s) + ldp x5, x6, [x1,#8*2+0] + eor x2, x15, x28 // conditionally negate |f0| (or |g0|) + ldp x7, x8, [x1,#8*4+0] + + eor x3, x3, x28 // conditionally negate |a| (or |b|) + sub x2, x2, x28 + eor x4, x4, x28 + adds x3, x3, x28, lsr#63 + eor x5, x5, x28 + adcs x4, x4, xzr + eor x6, x6, x28 + adcs x5, x5, xzr + eor x7, x7, x28 + umulh x22, x3, x2 + adcs x6, x6, xzr + umulh x23, x4, x2 + eor x8, x8, x28 + umulh x24, x5, x2 + adcs x7, x7, xzr + umulh x25, x6, x2 + adc x8, x8, xzr + + umulh x26, x7, x2 + smulh x27, x8, x2 + mul x3, x3, x2 + mul x4, x4, x2 + mul x5, x5, x2 + adds x4, x4, x22 + mul x6, x6, x2 + adcs x5, x5, x23 + mul x7, x7, x2 + adcs x6, x6, x24 + mul x8, x8, x2 + adcs x7, x7, x25 + adcs x8, x8 ,x26 + adc x27, x27, xzr + ldp x9, x10, [x1,#8*0+48] // load |a| (or |b|) + asr x28, x16, #63 // |f0|'s sign as mask (or |g0|'s) + ldp x11, x12, [x1,#8*2+48] + eor x2, x16, x28 // conditionally negate |f0| (or |g0|) + ldp x13, x14, [x1,#8*4+48] + + eor x9, x9, x28 // conditionally negate |a| (or |b|) + sub x2, x2, x28 + eor x10, x10, x28 + adds x9, x9, x28, lsr#63 + eor x11, x11, x28 + adcs x10, x10, xzr + eor x12, x12, x28 + adcs x11, x11, xzr + eor x13, x13, x28 + umulh x22, x9, x2 + adcs x12, x12, xzr + umulh x23, x10, x2 + eor x14, x14, x28 + umulh x24, x11, x2 + adcs x13, x13, xzr + umulh x25, x12, x2 + adc x14, x14, xzr + + umulh x26, x13, x2 + smulh x28, x14, x2 + mul x9, x9, x2 + mul x10, x10, x2 + mul x11, x11, x2 + adds x10, x10, x22 + mul x12, x12, x2 + adcs x11, x11, x23 + mul x13, x13, x2 + adcs x12, x12, x24 + mul x14, x14, x2 + adcs x13, x13, x25 + adcs x14, x14 ,x26 + adc x28, x28, xzr + adds x3, x3, x9 + adcs x4, x4, x10 + adcs x5, x5, x11 + adcs x6, x6, x12 + adcs x7, x7, x13 + adcs x8, x8, x14 + adc x9, x27, x28 + + extr x3, x4, x3, #62 + extr x4, x5, x4, #62 + extr x5, x6, x5, #62 + asr x28, x9, #63 + extr x6, x7, x6, #62 + extr x7, x8, x7, #62 + extr x8, x9, x8, #62 + + eor x3, x3, x28 + eor x4, x4, x28 + adds x3, x3, x28, lsr#63 + eor x5, x5, x28 + adcs x4, x4, xzr + eor x6, x6, x28 + adcs x5, x5, xzr + eor x7, x7, x28 + adcs x6, x6, xzr + eor x8, x8, x28 + stp x3, x4, [x0,#8*0] + adcs x7, x7, xzr + stp x5, x6, [x0,#8*2] + adc x8, x8, xzr + stp x7, x8, [x0,#8*4] + + eor x15, x15, x28 + eor x16, x16, x28 + sub x15, x15, x28 + sub x16, x16, x28 + + ret + ENDP + + ALIGN 16 +|__ab_approximation_62| PROC + ldp x7, x8, [x1,#8*4] + ldp x13, x14, [x1,#8*10] + ldp x5, x6, [x1,#8*2] + ldp x11, x12, [x1,#8*8] + +|$Lab_approximation_62_loaded| + orr x22, x8, x14 // check top-most limbs, ... + cmp x22, #0 + cselne x8,x8,x7 + cselne x14,x14,x13 + cselne x7,x7,x6 + orr x22, x8, x14 // ... ones before top-most, ... + cselne x13,x13,x12 + + ldp x3, x4, [x1,#8*0] + ldp x9, x10, [x1,#8*6] + + cmp x22, #0 + cselne x8,x8,x7 + cselne x14,x14,x13 + cselne x7,x7,x5 + orr x22, x8, x14 // ... and ones before that ... + cselne x13,x13,x11 + + cmp x22, #0 + cselne x8,x8,x7 + cselne x14,x14,x13 + cselne x7,x7,x4 + orr x22, x8, x14 + cselne x13,x13,x10 + + clz x22, x22 + cmp x22, #64 + cselne x22,x22,xzr + cselne x8,x8,x7 + cselne x14,x14,x13 + neg x23, x22 + + lslv x8, x8, x22 // align high limbs to the left + lslv x14, x14, x22 + lsrv x7, x7, x23 + lsrv x13, x13, x23 + and x7, x7, x23, asr#6 + and x13, x13, x23, asr#6 + orr x8, x8, x7 + orr x14, x14, x13 + + b __inner_loop_62 + ret + ENDP + + ALIGN 16 +|__inner_loop_62| PROC + mov x15, #1 // |f0|=1 + mov x16, #0 // |g0|=0 + mov x17, #0 // |f1|=0 + mov x19, #1 // |g1|=1 + +|$Loop_62| + sbfx x28, x3, #0, #1 // if |a_| is odd, then we'll be subtracting + sub x2, x2, #1 + subs x24, x9, x3 // |b_|-|a_| + and x22, x9, x28 + sbc x25, x14, x8 + and x23, x14, x28 + subs x26, x3, x22 // |a_|-|b_| (or |a_|-0 if |a_| was even) + mov x22, x15 + sbcs x27, x8, x23 + mov x23, x16 + cselhs x9,x9,x3 + cselhs x14,x14,x8 + cselhs x3,x26,x24 + cselhs x8,x27,x25 + cselhs x15,x15,x17 + cselhs x17,x17,x22 + cselhs x16,x16,x19 + cselhs x19,x19,x23 + extr x3, x8, x3, #1 + lsr x8, x8, #1 + and x22, x17, x28 + and x23, x19, x28 + add x17, x17, x17 // |f1|<<=1 + add x19, x19, x19 // |g1|<<=1 + sub x15, x15, x22 // |f0|-=|f1| (or |f0-=0| if |a_| was even) + sub x16, x16, x23 // |g0|-=|g1| (or |g0-=0| ...) + cbnz x2, |$Loop_62| + + ret + ENDP + END diff --git a/crypto/blst_src/build/win64/ct_is_square_mod_384-armv8.asm b/crypto/blst_src/build/win64/ct_is_square_mod_384-armv8.asm new file mode 100644 index 00000000000..e2454897b33 --- /dev/null +++ b/crypto/blst_src/build/win64/ct_is_square_mod_384-armv8.asm @@ -0,0 +1,326 @@ + AREA |.text|,CODE,ALIGN=8,ARM64 + + + + EXPORT |ct_is_square_mod_384|[FUNC] + ALIGN 32 +|ct_is_square_mod_384| PROC + DCDU 3573752639 + stp x29, x30, [sp,#-128]! + add x29, sp, #0 + stp x19, x20, [sp,#16] + stp x21, x22, [sp,#32] + stp x23, x24, [sp,#48] + stp x25, x26, [sp,#64] + stp x27, x28, [sp,#80] + sub sp, sp, #512 + + ldp x3, x4, [x0,#8*0] // load input + ldp x5, x6, [x0,#8*2] + ldp x7, x8, [x0,#8*4] + + add x0, sp, #255 // find closest 256-byte-aligned spot + and x0, x0, #-256 // in the frame... + + ldp x9, x10, [x1,#8*0] // load modulus + ldp x11, x12, [x1,#8*2] + ldp x13, x14, [x1,#8*4] + + stp x3, x4, [x0,#8*6] // copy input to |a| + stp x5, x6, [x0,#8*8] + stp x7, x8, [x0,#8*10] + stp x9, x10, [x0,#8*0] // copy modulus to |b| + stp x11, x12, [x0,#8*2] + stp x13, x14, [x0,#8*4] + + eor x2, x2, x2 // init the |$Legendre| symbol + mov x15, #24 // 24 is 768/30-1 + b |$Loop_is_square| + + ALIGN 16 +|$Loop_is_square| + bl __ab_approximation_30 + sub x15, x15, #1 + + eor x1, x0, #128 // pointer to dst |b| + bl __smul_384_n_shift_by_30 + + mov x19, x16 // |f0| + mov x20, x17 // |g0| + add x1, x1, #8*6 // pointer to dst |a| + bl __smul_384_n_shift_by_30 + + ldp x9, x10, [x1,#-8*6] + eor x0, x0, #128 // flip-flop src |a|b| + and x27, x27, x9 // if |a| was negative, + add x2, x2, x27, lsr#1 // adjust |L| + + cbnz x15, |$Loop_is_square| + + ////////////////////////////////////////// last iteration + //bl __ab_approximation_30 // |a| and |b| are exact, + //ldr x8, [x0,#8*6] // and loaded + //ldr x14, [x0,#8*0] + mov x15, #48 // 48 is 768%30 + 30 + bl __inner_loop_48 + ldr x30, [x29,#8] + + and x0, x2, #1 + eor x0, x0, #1 + + add sp, sp, #512 + ldp x19, x20, [x29,#16] + ldp x21, x22, [x29,#32] + ldp x23, x24, [x29,#48] + ldp x25, x26, [x29,#64] + ldp x27, x28, [x29,#80] + ldr x29, [sp],#128 + DCDU 3573752767 + ret + ENDP + + + ALIGN 32 +|__smul_384_n_shift_by_30| PROC + ldp x3, x4, [x0,#8*0+0] // load |b| (or |a|) + asr x27, x20, #63 // |g1|'s sign as mask (or |f1|'s) + ldp x5, x6, [x0,#8*2+0] + eor x20, x20, x27 // conditionally negate |g1| (or |f1|) + ldp x7, x8, [x0,#8*4+0] + + eor x3, x3, x27 // conditionally negate |b| (or |a|) + sub x20, x20, x27 + eor x4, x4, x27 + adds x3, x3, x27, lsr#63 + eor x5, x5, x27 + adcs x4, x4, xzr + eor x6, x6, x27 + adcs x5, x5, xzr + eor x7, x7, x27 + umulh x21, x3, x20 + adcs x6, x6, xzr + umulh x22, x4, x20 + eor x8, x8, x27 + umulh x23, x5, x20 + adcs x7, x7, xzr + umulh x24, x6, x20 + adc x8, x8, xzr + + umulh x25, x7, x20 + and x28, x20, x27 + umulh x26, x8, x20 + neg x28, x28 + mul x3, x3, x20 + mul x4, x4, x20 + mul x5, x5, x20 + adds x4, x4, x21 + mul x6, x6, x20 + adcs x5, x5, x22 + mul x7, x7, x20 + adcs x6, x6, x23 + mul x8, x8, x20 + adcs x7, x7, x24 + adcs x8, x8 ,x25 + adc x26, x26, x28 + ldp x9, x10, [x0,#8*0+48] // load |b| (or |a|) + asr x27, x19, #63 // |g1|'s sign as mask (or |f1|'s) + ldp x11, x12, [x0,#8*2+48] + eor x19, x19, x27 // conditionally negate |g1| (or |f1|) + ldp x13, x14, [x0,#8*4+48] + + eor x9, x9, x27 // conditionally negate |b| (or |a|) + sub x19, x19, x27 + eor x10, x10, x27 + adds x9, x9, x27, lsr#63 + eor x11, x11, x27 + adcs x10, x10, xzr + eor x12, x12, x27 + adcs x11, x11, xzr + eor x13, x13, x27 + umulh x21, x9, x19 + adcs x12, x12, xzr + umulh x22, x10, x19 + eor x14, x14, x27 + umulh x23, x11, x19 + adcs x13, x13, xzr + umulh x24, x12, x19 + adc x14, x14, xzr + + umulh x25, x13, x19 + and x28, x19, x27 + umulh x27, x14, x19 + neg x28, x28 + mul x9, x9, x19 + mul x10, x10, x19 + mul x11, x11, x19 + adds x10, x10, x21 + mul x12, x12, x19 + adcs x11, x11, x22 + mul x13, x13, x19 + adcs x12, x12, x23 + mul x14, x14, x19 + adcs x13, x13, x24 + adcs x14, x14 ,x25 + adc x27, x27, x28 + adds x3, x3, x9 + adcs x4, x4, x10 + adcs x5, x5, x11 + adcs x6, x6, x12 + adcs x7, x7, x13 + adcs x8, x8, x14 + adc x9, x26, x27 + + extr x3, x4, x3, #30 + extr x4, x5, x4, #30 + extr x5, x6, x5, #30 + asr x27, x9, #63 + extr x6, x7, x6, #30 + extr x7, x8, x7, #30 + extr x8, x9, x8, #30 + + eor x3, x3, x27 + eor x4, x4, x27 + adds x3, x3, x27, lsr#63 + eor x5, x5, x27 + adcs x4, x4, xzr + eor x6, x6, x27 + adcs x5, x5, xzr + eor x7, x7, x27 + adcs x6, x6, xzr + eor x8, x8, x27 + stp x3, x4, [x1,#8*0] + adcs x7, x7, xzr + stp x5, x6, [x1,#8*2] + adc x8, x8, xzr + stp x7, x8, [x1,#8*4] + + ret + ENDP + + ALIGN 16 +|__ab_approximation_30| PROC + ldp x13, x14, [x0,#8*4] // |a| is still in registers + ldp x11, x12, [x0,#8*2] + + orr x21, x8, x14 // check top-most limbs, ... + cmp x21, #0 + cselne x8,x8,x7 + cselne x14,x14,x13 + cselne x7,x7,x6 + orr x21, x8, x14 // ... ones before top-most, ... + cselne x13,x13,x12 + + cmp x21, #0 + cselne x8,x8,x7 + cselne x14,x14,x13 + cselne x7,x7,x5 + orr x21, x8, x14 // ... and ones before that ... + cselne x13,x13,x11 + + cmp x21, #0 + cselne x8,x8,x7 + cselne x14,x14,x13 + cselne x7,x7,x4 + orr x21, x8, x14 // and one more, ... + cselne x13,x13,x10 + + cmp x21, #0 + cselne x8,x8,x7 + cselne x14,x14,x13 + cselne x7,x7,x3 + orr x21, x8, x14 + cselne x13,x13,x9 + + clz x21, x21 + cmp x21, #64 + cselne x21,x21,xzr + cselne x8,x8,x7 + cselne x14,x14,x13 + neg x22, x21 + + lslv x8, x8, x21 // align high limbs to the left + lslv x14, x14, x21 + lsrv x7, x7, x22 + lsrv x13, x13, x22 + and x7, x7, x22, asr#6 + and x13, x13, x22, asr#6 + orr x8, x8, x7 + orr x14, x14, x13 + + bfxil x8, x3, #0, #32 + bfxil x14, x9, #0, #32 + + b __inner_loop_30 + ret + ENDP + + + ALIGN 16 +|__inner_loop_30| PROC + mov x28, #30 + mov x17, #0x7FFFFFFF80000000 // |f0|=1, |g0|=0 + mov x20, #0x800000007FFFFFFF // |f1|=0, |g1|=1 + mov x27,#0x7FFFFFFF7FFFFFFF + +|$Loop_30| + sbfx x24, x8, #0, #1 // if |a_| is odd, then we'll be subtracting + and x25, x8, x14 + sub x28, x28, #1 + and x21, x14, x24 + + sub x22, x14, x8 // |b_|-|a_| + subs x23, x8, x21 // |a_|-|b_| (or |a_|-0 if |a_| was even) + add x25, x2, x25, lsr#1 // L + (a_ & b_) >> 1 + mov x21, x20 + cselhs x14,x14,x8 + cselhs x8,x23,x22 + cselhs x20,x20,x17 + cselhs x17,x17,x21 + cselhs x2,x2,x25 + lsr x8, x8, #1 + and x21, x20, x24 + and x22, x27, x24 + add x23, x14, #2 + sub x17, x17, x21 // |f0|-=|f1| (or |f0-=0| if |a_| was even) + add x20, x20, x20 // |f1|<<=1 + add x2, x2, x23, lsr#2 // "negate" |L| if |b|%8 is 3 or 5 + add x17, x17, x22 + sub x20, x20, x27 + + cbnz x28, |$Loop_30| + + mov x27, #0x7FFFFFFF + ubfx x16, x17, #0, #32 + ubfx x17, x17, #32, #32 + ubfx x19, x20, #0, #32 + ubfx x20, x20, #32, #32 + sub x16, x16, x27 // remove the bias + sub x17, x17, x27 + sub x19, x19, x27 + sub x20, x20, x27 + + ret + ENDP + + ALIGN 16 +|__inner_loop_48| PROC +|$Loop_48| + sbfx x24, x3, #0, #1 // if |a_| is odd, then we'll be subtracting + and x25, x3, x9 + sub x15, x15, #1 + and x21, x9, x24 + sub x22, x9, x3 // |b_|-|a_| + subs x23, x3, x21 // |a_|-|b_| (or |a_|-0 if |a_| was even) + add x25, x2, x25, lsr#1 + cselhs x9,x9,x3 + cselhs x3,x23,x22 + cselhs x2,x2,x25 + add x23, x9, #2 + lsr x3, x3, #1 + add x2, x2, x23, lsr#2 // "negate" |L| if |b|%8 is 3 or 5 + + cbnz x15, |$Loop_48| + + ret + ENDP + END diff --git a/crypto/blst_src/build/win64/ct_is_square_mod_384-x86_64.asm b/crypto/blst_src/build/win64/ct_is_square_mod_384-x86_64.asm new file mode 100644 index 00000000000..be00f479efb --- /dev/null +++ b/crypto/blst_src/build/win64/ct_is_square_mod_384-x86_64.asm @@ -0,0 +1,516 @@ +OPTION DOTNAME +.text$ SEGMENT ALIGN(256) 'CODE' + +PUBLIC ct_is_square_mod_384 + + +ALIGN 32 +ct_is_square_mod_384 PROC PUBLIC + DB 243,15,30,250 + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov r11,rsp +$L$SEH_begin_ct_is_square_mod_384:: + + + push rbp + + mov rdi,rcx + mov rsi,rdx + push rbx + + push r12 + + push r13 + + push r14 + + push r15 + + sub rsp,536 + +$L$SEH_body_ct_is_square_mod_384:: + + + lea rax,QWORD PTR[((24+255))+rsp] + and rax,-256 + + mov r8,QWORD PTR[rdi] + mov r9,QWORD PTR[8+rdi] + mov r10,QWORD PTR[16+rdi] + mov r11,QWORD PTR[24+rdi] + mov r12,QWORD PTR[32+rdi] + mov r13,QWORD PTR[40+rdi] + + mov r14,QWORD PTR[rsi] + mov r15,QWORD PTR[8+rsi] + mov rbx,QWORD PTR[16+rsi] + mov rcx,QWORD PTR[24+rsi] + mov rdx,QWORD PTR[32+rsi] + mov rdi,QWORD PTR[40+rsi] + mov rsi,rax + + mov QWORD PTR[rax],r8 + mov QWORD PTR[8+rax],r9 + mov QWORD PTR[16+rax],r10 + mov QWORD PTR[24+rax],r11 + mov QWORD PTR[32+rax],r12 + mov QWORD PTR[40+rax],r13 + + mov QWORD PTR[48+rax],r14 + mov QWORD PTR[56+rax],r15 + mov QWORD PTR[64+rax],rbx + mov QWORD PTR[72+rax],rcx + mov QWORD PTR[80+rax],rdx + mov QWORD PTR[88+rax],rdi + + xor rbp,rbp + mov ecx,24 + jmp $L$oop_is_square + +ALIGN 32 +$L$oop_is_square:: + mov DWORD PTR[16+rsp],ecx + + call __ab_approximation_30 + mov QWORD PTR[rsp],rax + mov QWORD PTR[8+rsp],rbx + + mov rdi,128+8*6 + xor rdi,rsi + call __smulq_384_n_shift_by_30 + + mov rdx,QWORD PTR[rsp] + mov rcx,QWORD PTR[8+rsp] + lea rdi,QWORD PTR[((-48))+rdi] + call __smulq_384_n_shift_by_30 + + mov ecx,DWORD PTR[16+rsp] + xor rsi,128 + + and r14,QWORD PTR[48+rdi] + shr r14,1 + add rbp,r14 + + sub ecx,1 + jnz $L$oop_is_square + + + + + mov r9,QWORD PTR[48+rsi] + call __inner_loop_48 + + mov rax,1 + and rax,rbp + xor rax,1 + + lea r8,QWORD PTR[536+rsp] + mov r15,QWORD PTR[r8] + + mov r14,QWORD PTR[8+r8] + + mov r13,QWORD PTR[16+r8] + + mov r12,QWORD PTR[24+r8] + + mov rbx,QWORD PTR[32+r8] + + mov rbp,QWORD PTR[40+r8] + + lea rsp,QWORD PTR[48+r8] + +$L$SEH_epilogue_ct_is_square_mod_384:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + + DB 0F3h,0C3h ;repret + +$L$SEH_end_ct_is_square_mod_384:: +ct_is_square_mod_384 ENDP + + +ALIGN 32 +__smulq_384_n_shift_by_30 PROC PRIVATE + DB 243,15,30,250 + + mov r8,QWORD PTR[rsi] + mov r9,QWORD PTR[8+rsi] + mov r10,QWORD PTR[16+rsi] + mov r11,QWORD PTR[24+rsi] + mov r12,QWORD PTR[32+rsi] + mov r13,QWORD PTR[40+rsi] + + mov rbx,rdx + sar rdx,63 + xor rax,rax + sub rax,rdx + + xor rbx,rdx + add rbx,rax + + xor r8,rdx + xor r9,rdx + xor r10,rdx + xor r11,rdx + xor r12,rdx + xor r13,rdx + add rax,r8 + adc r9,0 + adc r10,0 + adc r11,0 + adc r12,0 + adc r13,0 + + mov r14,rdx + and r14,rbx + mul rbx + mov r8,rax + mov rax,r9 + mov r9,rdx + mul rbx + add r9,rax + mov rax,r10 + adc rdx,0 + mov r10,rdx + mul rbx + add r10,rax + mov rax,r11 + adc rdx,0 + mov r11,rdx + mul rbx + add r11,rax + mov rax,r12 + adc rdx,0 + mov r12,rdx + mul rbx + add r12,rax + mov rax,r13 + adc rdx,0 + mov r13,rdx + neg r14 + mul rbx + add r13,rax + adc r14,rdx + lea rsi,QWORD PTR[48+rsi] + mov rdx,rcx + + mov QWORD PTR[rdi],r8 + mov QWORD PTR[8+rdi],r9 + mov QWORD PTR[16+rdi],r10 + mov QWORD PTR[24+rdi],r11 + mov QWORD PTR[32+rdi],r12 + mov QWORD PTR[40+rdi],r13 + mov r8,QWORD PTR[rsi] + mov r9,QWORD PTR[8+rsi] + mov r10,QWORD PTR[16+rsi] + mov r11,QWORD PTR[24+rsi] + mov r12,QWORD PTR[32+rsi] + mov r13,QWORD PTR[40+rsi] + + mov rbx,rdx + sar rdx,63 + xor rax,rax + sub rax,rdx + + xor rbx,rdx + add rbx,rax + + xor r8,rdx + xor r9,rdx + xor r10,rdx + xor r11,rdx + xor r12,rdx + xor r13,rdx + add rax,r8 + adc r9,0 + adc r10,0 + adc r11,0 + adc r12,0 + adc r13,0 + + mov r15,rdx + and r15,rbx + mul rbx + mov r8,rax + mov rax,r9 + mov r9,rdx + mul rbx + add r9,rax + mov rax,r10 + adc rdx,0 + mov r10,rdx + mul rbx + add r10,rax + mov rax,r11 + adc rdx,0 + mov r11,rdx + mul rbx + add r11,rax + mov rax,r12 + adc rdx,0 + mov r12,rdx + mul rbx + add r12,rax + mov rax,r13 + adc rdx,0 + mov r13,rdx + neg r15 + mul rbx + add r13,rax + adc r15,rdx + lea rsi,QWORD PTR[((-48))+rsi] + + add r8,QWORD PTR[rdi] + adc r9,QWORD PTR[8+rdi] + adc r10,QWORD PTR[16+rdi] + adc r11,QWORD PTR[24+rdi] + adc r12,QWORD PTR[32+rdi] + adc r13,QWORD PTR[40+rdi] + adc r14,r15 + + shrd r8,r9,30 + shrd r9,r10,30 + shrd r10,r11,30 + shrd r11,r12,30 + shrd r12,r13,30 + shrd r13,r14,30 + + sar r14,63 + xor rbx,rbx + sub rbx,r14 + + xor r8,r14 + xor r9,r14 + xor r10,r14 + xor r11,r14 + xor r12,r14 + xor r13,r14 + add r8,rbx + adc r9,0 + adc r10,0 + adc r11,0 + adc r12,0 + adc r13,0 + + mov QWORD PTR[rdi],r8 + mov QWORD PTR[8+rdi],r9 + mov QWORD PTR[16+rdi],r10 + mov QWORD PTR[24+rdi],r11 + mov QWORD PTR[32+rdi],r12 + mov QWORD PTR[40+rdi],r13 + + DB 0F3h,0C3h ;repret +__smulq_384_n_shift_by_30 ENDP + +ALIGN 32 +__ab_approximation_30 PROC PRIVATE + DB 243,15,30,250 + + mov rbx,QWORD PTR[88+rsi] + mov r15,QWORD PTR[80+rsi] + mov r14,QWORD PTR[72+rsi] + + mov rax,r13 + or rax,rbx + cmovz r13,r12 + cmovz rbx,r15 + cmovz r12,r11 + mov r11,QWORD PTR[64+rsi] + cmovz r15,r14 + + mov rax,r13 + or rax,rbx + cmovz r13,r12 + cmovz rbx,r15 + cmovz r12,r10 + mov r10,QWORD PTR[56+rsi] + cmovz r15,r11 + + mov rax,r13 + or rax,rbx + cmovz r13,r12 + cmovz rbx,r15 + cmovz r12,r9 + mov r9,QWORD PTR[48+rsi] + cmovz r15,r10 + + mov rax,r13 + or rax,rbx + cmovz r13,r12 + cmovz rbx,r15 + cmovz r12,r8 + cmovz r15,r9 + + mov rax,r13 + or rax,rbx + bsr rcx,rax + lea rcx,QWORD PTR[1+rcx] + cmovz r13,r8 + cmovz rbx,r9 + cmovz rcx,rax + neg rcx + + + shld r13,r12,cl + shld rbx,r15,cl + + mov rax,0FFFFFFFF00000000h + mov r8d,r8d + mov r9d,r9d + and r13,rax + and rbx,rax + or r8,r13 + or r9,rbx + + jmp __inner_loop_30 + + DB 0F3h,0C3h ;repret +__ab_approximation_30 ENDP + +ALIGN 32 +__inner_loop_30 PROC PRIVATE + DB 243,15,30,250 + + mov rbx,07FFFFFFF80000000h + mov rcx,0800000007FFFFFFFh + lea r15,QWORD PTR[((-1))+rbx] + mov edi,30 + +$L$oop_30:: + mov rax,r8 + and rax,r9 + shr rax,1 + + cmp r8,r9 + mov r10,r8 + mov r11,r9 + lea rax,QWORD PTR[rbp*1+rax] + mov r12,rbx + mov r13,rcx + mov r14,rbp + cmovb r8,r9 + cmovb r9,r10 + cmovb rbx,rcx + cmovb rcx,r12 + cmovb rbp,rax + + sub r8,r9 + sub rbx,rcx + add rbx,r15 + + test r10,1 + cmovz r8,r10 + cmovz r9,r11 + cmovz rbx,r12 + cmovz rcx,r13 + cmovz rbp,r14 + + lea rax,QWORD PTR[2+r9] + shr r8,1 + shr rax,2 + add rcx,rcx + lea rbp,QWORD PTR[rbp*1+rax] + sub rcx,r15 + + sub edi,1 + jnz $L$oop_30 + + shr r15,32 + mov eax,ebx + shr rbx,32 + mov edx,ecx + shr rcx,32 + sub rax,r15 + sub rbx,r15 + sub rdx,r15 + sub rcx,r15 + + DB 0F3h,0C3h ;repret +__inner_loop_30 ENDP + + +ALIGN 32 +__inner_loop_48 PROC PRIVATE + DB 243,15,30,250 + + mov edi,48 + +$L$oop_48:: + mov rax,r8 + and rax,r9 + shr rax,1 + + cmp r8,r9 + mov r10,r8 + mov r11,r9 + lea rax,QWORD PTR[rbp*1+rax] + mov r12,rbp + cmovb r8,r9 + cmovb r9,r10 + cmovb rbp,rax + + sub r8,r9 + + test r10,1 + cmovz r8,r10 + cmovz r9,r11 + cmovz rbp,r12 + + lea rax,QWORD PTR[2+r9] + shr r8,1 + shr rax,2 + add rbp,rax + + sub edi,1 + jnz $L$oop_48 + + DB 0F3h,0C3h ;repret +__inner_loop_48 ENDP +.text$ ENDS +.pdata SEGMENT READONLY ALIGN(4) +ALIGN 4 + DD imagerel $L$SEH_begin_ct_is_square_mod_384 + DD imagerel $L$SEH_body_ct_is_square_mod_384 + DD imagerel $L$SEH_info_ct_is_square_mod_384_prologue + + DD imagerel $L$SEH_body_ct_is_square_mod_384 + DD imagerel $L$SEH_epilogue_ct_is_square_mod_384 + DD imagerel $L$SEH_info_ct_is_square_mod_384_body + + DD imagerel $L$SEH_epilogue_ct_is_square_mod_384 + DD imagerel $L$SEH_end_ct_is_square_mod_384 + DD imagerel $L$SEH_info_ct_is_square_mod_384_epilogue + +.pdata ENDS +.xdata SEGMENT READONLY ALIGN(8) +ALIGN 8 +$L$SEH_info_ct_is_square_mod_384_prologue:: +DB 1,0,5,00bh +DB 0,074h,1,0 +DB 0,064h,2,0 +DB 0,0b3h +DB 0,0 + DD 0,0 +$L$SEH_info_ct_is_square_mod_384_body:: +DB 1,0,18,0 +DB 000h,0f4h,043h,000h +DB 000h,0e4h,044h,000h +DB 000h,0d4h,045h,000h +DB 000h,0c4h,046h,000h +DB 000h,034h,047h,000h +DB 000h,054h,048h,000h +DB 000h,074h,04ah,000h +DB 000h,064h,04bh,000h +DB 000h,001h,049h,000h +DB 000h,000h,000h,000h +DB 000h,000h,000h,000h +$L$SEH_info_ct_is_square_mod_384_epilogue:: +DB 1,0,4,0 +DB 000h,074h,001h,000h +DB 000h,064h,002h,000h +DB 000h,000h,000h,000h + + +.xdata ENDS +END diff --git a/crypto/blst_src/build/win64/ctq_inverse_mod_384-x86_64.asm b/crypto/blst_src/build/win64/ctq_inverse_mod_384-x86_64.asm new file mode 100644 index 00000000000..89fbe5d0666 --- /dev/null +++ b/crypto/blst_src/build/win64/ctq_inverse_mod_384-x86_64.asm @@ -0,0 +1,1240 @@ +OPTION DOTNAME +EXTERN ct_inverse_mod_383$1:NEAR +_DATA SEGMENT +COMM __blst_platform_cap:DWORD:1 +_DATA ENDS +.text$ SEGMENT ALIGN(256) 'CODE' + +PUBLIC ct_inverse_mod_383 + + +ALIGN 32 +ct_inverse_mod_383 PROC PUBLIC + DB 243,15,30,250 + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov r11,rsp +$L$SEH_begin_ct_inverse_mod_383:: + + + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 +ifdef __BLST_PORTABLE__ + test DWORD PTR[__blst_platform_cap],1 + jnz ct_inverse_mod_383$1 +endif + push rbp + + push rbx + + push r12 + + push r13 + + push r14 + + push r15 + + sub rsp,1112 + +$L$SEH_body_ct_inverse_mod_383:: + + + lea rax,QWORD PTR[((88+511))+rsp] + and rax,-512 + mov QWORD PTR[32+rsp],rdi + mov QWORD PTR[40+rsp],rcx + + mov r8,QWORD PTR[rsi] + mov r9,QWORD PTR[8+rsi] + mov r10,QWORD PTR[16+rsi] + mov r11,QWORD PTR[24+rsi] + mov r12,QWORD PTR[32+rsi] + mov r13,QWORD PTR[40+rsi] + + mov r14,QWORD PTR[rdx] + mov r15,QWORD PTR[8+rdx] + mov rbx,QWORD PTR[16+rdx] + mov rbp,QWORD PTR[24+rdx] + mov rsi,QWORD PTR[32+rdx] + mov rdi,QWORD PTR[40+rdx] + + mov QWORD PTR[rax],r8 + mov QWORD PTR[8+rax],r9 + mov QWORD PTR[16+rax],r10 + mov QWORD PTR[24+rax],r11 + mov QWORD PTR[32+rax],r12 + mov QWORD PTR[40+rax],r13 + + mov QWORD PTR[48+rax],r14 + mov QWORD PTR[56+rax],r15 + mov QWORD PTR[64+rax],rbx + mov QWORD PTR[72+rax],rbp + mov QWORD PTR[80+rax],rsi + mov rsi,rax + mov QWORD PTR[88+rax],rdi + + + mov edi,62 + call __ab_approximation_62 + + + mov QWORD PTR[72+rsp],r12 + mov QWORD PTR[80+rsp],r13 + + mov rdi,256 + xor rdi,rsi + call __smulq_383_n_shift_by_62 + + + mov QWORD PTR[96+rdi],rdx + + mov rdx,QWORD PTR[72+rsp] + mov rcx,QWORD PTR[80+rsp] + lea rdi,QWORD PTR[48+rdi] + call __smulq_383_n_shift_by_62 + + + mov QWORD PTR[96+rdi],rdx + + + xor rsi,256 + mov edi,62 + call __ab_approximation_62 + + + mov QWORD PTR[72+rsp],r12 + mov QWORD PTR[80+rsp],r13 + + mov rdi,256 + xor rdi,rsi + call __smulq_383_n_shift_by_62 + mov QWORD PTR[56+rsp],rdx + mov QWORD PTR[64+rsp],rcx + + mov rdx,QWORD PTR[72+rsp] + mov rcx,QWORD PTR[80+rsp] + lea rdi,QWORD PTR[48+rdi] + call __smulq_383_n_shift_by_62 + + + + mov rax,QWORD PTR[96+rsi] + mov r11,QWORD PTR[144+rsi] + mov rbx,rdx + mov r10,rax + imul QWORD PTR[56+rsp] + mov r8,rax + mov rax,r11 + mov r9,rdx + imul QWORD PTR[64+rsp] + add r8,rax + adc r9,rdx + mov QWORD PTR[48+rdi],r8 + mov QWORD PTR[56+rdi],r9 + sar r9,63 + mov QWORD PTR[64+rdi],r9 + mov QWORD PTR[72+rdi],r9 + mov QWORD PTR[80+rdi],r9 + mov QWORD PTR[88+rdi],r9 + lea rsi,QWORD PTR[96+rsi] + + mov rax,r10 + imul rbx + mov r8,rax + mov rax,r11 + mov r9,rdx + imul rcx + add r8,rax + adc r9,rdx + mov QWORD PTR[96+rdi],r8 + mov QWORD PTR[104+rdi],r9 + sar r9,63 + mov QWORD PTR[112+rdi],r9 + mov QWORD PTR[120+rdi],r9 + mov QWORD PTR[128+rdi],r9 + mov QWORD PTR[136+rdi],r9 + xor rsi,256+8*12 + mov edi,62 + call __ab_approximation_62 + + + mov QWORD PTR[72+rsp],r12 + mov QWORD PTR[80+rsp],r13 + + mov rdi,256 + xor rdi,rsi + call __smulq_383_n_shift_by_62 + mov QWORD PTR[56+rsp],rdx + mov QWORD PTR[64+rsp],rcx + + mov rdx,QWORD PTR[72+rsp] + mov rcx,QWORD PTR[80+rsp] + lea rdi,QWORD PTR[48+rdi] + call __smulq_383_n_shift_by_62 + mov QWORD PTR[72+rsp],rdx + mov QWORD PTR[80+rsp],rcx + + mov rdx,QWORD PTR[56+rsp] + mov rcx,QWORD PTR[64+rsp] + lea rsi,QWORD PTR[96+rsi] + lea rdi,QWORD PTR[48+rdi] + call __smulq_383x63 + + mov rdx,QWORD PTR[72+rsp] + mov rcx,QWORD PTR[80+rsp] + lea rdi,QWORD PTR[48+rdi] + call __smulq_383x63 + xor rsi,256+8*12 + mov edi,62 + call __ab_approximation_62 + + + mov QWORD PTR[72+rsp],r12 + mov QWORD PTR[80+rsp],r13 + + mov rdi,256 + xor rdi,rsi + call __smulq_383_n_shift_by_62 + mov QWORD PTR[56+rsp],rdx + mov QWORD PTR[64+rsp],rcx + + mov rdx,QWORD PTR[72+rsp] + mov rcx,QWORD PTR[80+rsp] + lea rdi,QWORD PTR[48+rdi] + call __smulq_383_n_shift_by_62 + mov QWORD PTR[72+rsp],rdx + mov QWORD PTR[80+rsp],rcx + + mov rdx,QWORD PTR[56+rsp] + mov rcx,QWORD PTR[64+rsp] + lea rsi,QWORD PTR[96+rsi] + lea rdi,QWORD PTR[48+rdi] + call __smulq_383x63 + + mov rdx,QWORD PTR[72+rsp] + mov rcx,QWORD PTR[80+rsp] + lea rdi,QWORD PTR[48+rdi] + call __smulq_383x63 + xor rsi,256+8*12 + mov edi,62 + call __ab_approximation_62 + + + mov QWORD PTR[72+rsp],r12 + mov QWORD PTR[80+rsp],r13 + + mov rdi,256 + xor rdi,rsi + call __smulq_383_n_shift_by_62 + mov QWORD PTR[56+rsp],rdx + mov QWORD PTR[64+rsp],rcx + + mov rdx,QWORD PTR[72+rsp] + mov rcx,QWORD PTR[80+rsp] + lea rdi,QWORD PTR[48+rdi] + call __smulq_383_n_shift_by_62 + mov QWORD PTR[72+rsp],rdx + mov QWORD PTR[80+rsp],rcx + + mov rdx,QWORD PTR[56+rsp] + mov rcx,QWORD PTR[64+rsp] + lea rsi,QWORD PTR[96+rsi] + lea rdi,QWORD PTR[48+rdi] + call __smulq_383x63 + + mov rdx,QWORD PTR[72+rsp] + mov rcx,QWORD PTR[80+rsp] + lea rdi,QWORD PTR[48+rdi] + call __smulq_383x63 + xor rsi,256+8*12 + mov edi,62 + call __ab_approximation_62 + + + mov QWORD PTR[72+rsp],r12 + mov QWORD PTR[80+rsp],r13 + + mov rdi,256 + xor rdi,rsi + call __smulq_383_n_shift_by_62 + mov QWORD PTR[56+rsp],rdx + mov QWORD PTR[64+rsp],rcx + + mov rdx,QWORD PTR[72+rsp] + mov rcx,QWORD PTR[80+rsp] + lea rdi,QWORD PTR[48+rdi] + call __smulq_383_n_shift_by_62 + mov QWORD PTR[72+rsp],rdx + mov QWORD PTR[80+rsp],rcx + + mov rdx,QWORD PTR[56+rsp] + mov rcx,QWORD PTR[64+rsp] + lea rsi,QWORD PTR[96+rsi] + lea rdi,QWORD PTR[48+rdi] + call __smulq_383x63 + + mov rdx,QWORD PTR[72+rsp] + mov rcx,QWORD PTR[80+rsp] + lea rdi,QWORD PTR[48+rdi] + call __smulq_383x63 + sar r13,63 + mov QWORD PTR[48+rdi],r13 + mov QWORD PTR[56+rdi],r13 + mov QWORD PTR[64+rdi],r13 + mov QWORD PTR[72+rdi],r13 + mov QWORD PTR[80+rdi],r13 + mov QWORD PTR[88+rdi],r13 + xor rsi,256+8*12 + mov edi,62 + call __ab_approximation_62 + + + mov QWORD PTR[72+rsp],r12 + mov QWORD PTR[80+rsp],r13 + + mov rdi,256 + xor rdi,rsi + call __smulq_383_n_shift_by_62 + mov QWORD PTR[56+rsp],rdx + mov QWORD PTR[64+rsp],rcx + + mov rdx,QWORD PTR[72+rsp] + mov rcx,QWORD PTR[80+rsp] + lea rdi,QWORD PTR[48+rdi] + call __smulq_383_n_shift_by_62 + mov QWORD PTR[72+rsp],rdx + mov QWORD PTR[80+rsp],rcx + + mov rdx,QWORD PTR[56+rsp] + mov rcx,QWORD PTR[64+rsp] + lea rsi,QWORD PTR[96+rsi] + lea rdi,QWORD PTR[48+rdi] + call __smulq_383x63 + + mov rdx,QWORD PTR[72+rsp] + mov rcx,QWORD PTR[80+rsp] + lea rdi,QWORD PTR[48+rdi] + call __smulq_767x63 + xor rsi,256+8*12 + mov edi,62 + call __ab_approximation_62 + + + mov QWORD PTR[72+rsp],r12 + mov QWORD PTR[80+rsp],r13 + + mov rdi,256 + xor rdi,rsi + call __smulq_383_n_shift_by_62 + mov QWORD PTR[56+rsp],rdx + mov QWORD PTR[64+rsp],rcx + + mov rdx,QWORD PTR[72+rsp] + mov rcx,QWORD PTR[80+rsp] + lea rdi,QWORD PTR[48+rdi] + call __smulq_383_n_shift_by_62 + mov QWORD PTR[72+rsp],rdx + mov QWORD PTR[80+rsp],rcx + + mov rdx,QWORD PTR[56+rsp] + mov rcx,QWORD PTR[64+rsp] + lea rsi,QWORD PTR[96+rsi] + lea rdi,QWORD PTR[48+rdi] + call __smulq_383x63 + + mov rdx,QWORD PTR[72+rsp] + mov rcx,QWORD PTR[80+rsp] + lea rdi,QWORD PTR[48+rdi] + call __smulq_767x63 + xor rsi,256+8*12 + mov edi,62 + call __ab_approximation_62 + + + mov QWORD PTR[72+rsp],r12 + mov QWORD PTR[80+rsp],r13 + + mov rdi,256 + xor rdi,rsi + call __smulq_383_n_shift_by_62 + mov QWORD PTR[56+rsp],rdx + mov QWORD PTR[64+rsp],rcx + + mov rdx,QWORD PTR[72+rsp] + mov rcx,QWORD PTR[80+rsp] + lea rdi,QWORD PTR[48+rdi] + call __smulq_383_n_shift_by_62 + mov QWORD PTR[72+rsp],rdx + mov QWORD PTR[80+rsp],rcx + + mov rdx,QWORD PTR[56+rsp] + mov rcx,QWORD PTR[64+rsp] + lea rsi,QWORD PTR[96+rsi] + lea rdi,QWORD PTR[48+rdi] + call __smulq_383x63 + + mov rdx,QWORD PTR[72+rsp] + mov rcx,QWORD PTR[80+rsp] + lea rdi,QWORD PTR[48+rdi] + call __smulq_767x63 + xor rsi,256+8*12 + mov edi,62 + call __ab_approximation_62 + + + mov QWORD PTR[72+rsp],r12 + mov QWORD PTR[80+rsp],r13 + + mov rdi,256 + xor rdi,rsi + call __smulq_383_n_shift_by_62 + mov QWORD PTR[56+rsp],rdx + mov QWORD PTR[64+rsp],rcx + + mov rdx,QWORD PTR[72+rsp] + mov rcx,QWORD PTR[80+rsp] + lea rdi,QWORD PTR[48+rdi] + call __smulq_383_n_shift_by_62 + mov QWORD PTR[72+rsp],rdx + mov QWORD PTR[80+rsp],rcx + + mov rdx,QWORD PTR[56+rsp] + mov rcx,QWORD PTR[64+rsp] + lea rsi,QWORD PTR[96+rsi] + lea rdi,QWORD PTR[48+rdi] + call __smulq_383x63 + + mov rdx,QWORD PTR[72+rsp] + mov rcx,QWORD PTR[80+rsp] + lea rdi,QWORD PTR[48+rdi] + call __smulq_767x63 + xor rsi,256+8*12 + mov edi,62 + call __ab_approximation_62 + + + mov QWORD PTR[72+rsp],r12 + mov QWORD PTR[80+rsp],r13 + + mov rdi,256 + xor rdi,rsi + call __smulq_383_n_shift_by_62 + mov QWORD PTR[56+rsp],rdx + mov QWORD PTR[64+rsp],rcx + + mov rdx,QWORD PTR[72+rsp] + mov rcx,QWORD PTR[80+rsp] + lea rdi,QWORD PTR[48+rdi] + call __smulq_383_n_shift_by_62 + mov QWORD PTR[72+rsp],rdx + mov QWORD PTR[80+rsp],rcx + + mov rdx,QWORD PTR[56+rsp] + mov rcx,QWORD PTR[64+rsp] + lea rsi,QWORD PTR[96+rsi] + lea rdi,QWORD PTR[48+rdi] + call __smulq_383x63 + + mov rdx,QWORD PTR[72+rsp] + mov rcx,QWORD PTR[80+rsp] + lea rdi,QWORD PTR[48+rdi] + call __smulq_767x63 + + xor rsi,256+8*12 + mov edi,62 + + mov r8,QWORD PTR[rsi] + mov r9,QWORD PTR[8+rsi] + mov r10,QWORD PTR[48+rsi] + mov r11,QWORD PTR[56+rsi] + call __inner_loop_62 + + + mov QWORD PTR[72+rsp],r12 + mov QWORD PTR[80+rsp],r13 + + mov rdi,256 + xor rdi,rsi + mov QWORD PTR[rdi],r8 + mov QWORD PTR[48+rdi],r10 + + + + lea rsi,QWORD PTR[96+rsi] + lea rdi,QWORD PTR[96+rdi] + call __smulq_383x63 + + mov rdx,QWORD PTR[72+rsp] + mov rcx,QWORD PTR[80+rsp] + lea rdi,QWORD PTR[48+rdi] + call __smulq_767x63 + + + xor rsi,256+8*12 + mov edi,22 + + mov r8,QWORD PTR[rsi] + xor r9,r9 + mov r10,QWORD PTR[48+rsi] + xor r11,r11 + call __inner_loop_62 + + + + + + + + lea rsi,QWORD PTR[96+rsi] + + + + + + mov rdx,r12 + mov rcx,r13 + mov rdi,QWORD PTR[32+rsp] + call __smulq_767x63 + + mov rsi,QWORD PTR[40+rsp] + mov rdx,rax + sar rax,63 + + mov r8,rax + mov r9,rax + mov r10,rax + and r8,QWORD PTR[rsi] + and r9,QWORD PTR[8+rsi] + mov r11,rax + and r10,QWORD PTR[16+rsi] + and r11,QWORD PTR[24+rsi] + mov r12,rax + and r12,QWORD PTR[32+rsi] + and rax,QWORD PTR[40+rsi] + + add r14,r8 + adc r15,r9 + adc rbx,r10 + adc rbp,r11 + adc rcx,r12 + adc rdx,rax + + mov QWORD PTR[48+rdi],r14 + mov QWORD PTR[56+rdi],r15 + mov QWORD PTR[64+rdi],rbx + mov QWORD PTR[72+rdi],rbp + mov QWORD PTR[80+rdi],rcx + mov QWORD PTR[88+rdi],rdx + + lea r8,QWORD PTR[1112+rsp] + mov r15,QWORD PTR[r8] + + mov r14,QWORD PTR[8+r8] + + mov r13,QWORD PTR[16+r8] + + mov r12,QWORD PTR[24+r8] + + mov rbx,QWORD PTR[32+r8] + + mov rbp,QWORD PTR[40+r8] + + lea rsp,QWORD PTR[48+r8] + +$L$SEH_epilogue_ct_inverse_mod_383:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + + DB 0F3h,0C3h ;repret + +$L$SEH_end_ct_inverse_mod_383:: +ct_inverse_mod_383 ENDP + +ALIGN 32 +__smulq_767x63 PROC PRIVATE + DB 243,15,30,250 + + mov r8,QWORD PTR[rsi] + mov r9,QWORD PTR[8+rsi] + mov r10,QWORD PTR[16+rsi] + mov r11,QWORD PTR[24+rsi] + mov r12,QWORD PTR[32+rsi] + mov r13,QWORD PTR[40+rsi] + + mov rbp,rdx + sar rdx,63 + xor rax,rax + sub rax,rdx + + mov QWORD PTR[8+rsp],rdi + mov QWORD PTR[16+rsp],rsi + lea rsi,QWORD PTR[48+rsi] + + xor rbp,rdx + add rbp,rax + + xor r8,rdx + xor r9,rdx + xor r10,rdx + xor r11,rdx + xor r12,rdx + xor r13,rdx + add rax,r8 + adc r9,0 + adc r10,0 + adc r11,0 + adc r12,0 + adc r13,0 + + mul rbp + mov QWORD PTR[rdi],rax + mov rax,r9 + mov r9,rdx + mul rbp + add r9,rax + mov rax,r10 + adc rdx,0 + mov r10,rdx + mov QWORD PTR[8+rdi],r9 + mul rbp + add r10,rax + mov rax,r11 + adc rdx,0 + mov r11,rdx + mov QWORD PTR[16+rdi],r10 + mul rbp + add r11,rax + mov rax,r12 + adc rdx,0 + mov r12,rdx + mov QWORD PTR[24+rdi],r11 + mul rbp + add r12,rax + mov rax,r13 + adc rdx,0 + mov r13,rdx + mov QWORD PTR[32+rdi],r12 + imul rbp + add r13,rax + adc rdx,0 + + mov QWORD PTR[40+rdi],r13 + mov QWORD PTR[48+rdi],rdx + sar rdx,63 + mov QWORD PTR[56+rdi],rdx + mov rdx,rcx + + mov r8,QWORD PTR[rsi] + mov r9,QWORD PTR[8+rsi] + mov r10,QWORD PTR[16+rsi] + mov r11,QWORD PTR[24+rsi] + mov r12,QWORD PTR[32+rsi] + mov r13,QWORD PTR[40+rsi] + mov r14,QWORD PTR[48+rsi] + mov r15,QWORD PTR[56+rsi] + mov rbx,QWORD PTR[64+rsi] + mov rbp,QWORD PTR[72+rsi] + mov rcx,QWORD PTR[80+rsi] + mov rdi,QWORD PTR[88+rsi] + + mov rsi,rdx + sar rdx,63 + xor rax,rax + sub rax,rdx + + xor rsi,rdx + add rsi,rax + + xor r8,rdx + xor r9,rdx + xor r10,rdx + xor r11,rdx + xor r12,rdx + xor r13,rdx + xor r14,rdx + xor r15,rdx + xor rbx,rdx + xor rbp,rdx + xor rcx,rdx + xor rdi,rdx + add rax,r8 + adc r9,0 + adc r10,0 + adc r11,0 + adc r12,0 + adc r13,0 + adc r14,0 + adc r15,0 + adc rbx,0 + adc rbp,0 + adc rcx,0 + adc rdi,0 + + mul rsi + mov r8,rax + mov rax,r9 + mov r9,rdx + mul rsi + add r9,rax + mov rax,r10 + adc rdx,0 + mov r10,rdx + mul rsi + add r10,rax + mov rax,r11 + adc rdx,0 + mov r11,rdx + mul rsi + add r11,rax + mov rax,r12 + adc rdx,0 + mov r12,rdx + mul rsi + add r12,rax + mov rax,r13 + adc rdx,0 + mov r13,rdx + mul rsi + add r13,rax + mov rax,r14 + adc rdx,0 + mov r14,rdx + mul rsi + add r14,rax + mov rax,r15 + adc rdx,0 + mov r15,rdx + mul rsi + add r15,rax + mov rax,rbx + adc rdx,0 + mov rbx,rdx + mul rsi + add rbx,rax + mov rax,rbp + adc rdx,0 + mov rbp,rdx + mul rsi + add rbp,rax + mov rax,rcx + adc rdx,0 + mov rcx,rdx + mul rsi + add rcx,rax + mov rax,rdi + adc rdx,0 + mov rdi,rdx + mov rdx,QWORD PTR[8+rsp] + imul rax,rsi + mov rsi,QWORD PTR[16+rsp] + add rax,rdi + + add r8,QWORD PTR[rdx] + adc r9,QWORD PTR[8+rdx] + adc r10,QWORD PTR[16+rdx] + adc r11,QWORD PTR[24+rdx] + adc r12,QWORD PTR[32+rdx] + adc r13,QWORD PTR[40+rdx] + adc r14,QWORD PTR[48+rdx] + mov rdi,QWORD PTR[56+rdx] + adc r15,rdi + adc rbx,rdi + adc rbp,rdi + adc rcx,rdi + adc rax,rdi + + mov rdi,rdx + + mov QWORD PTR[rdx],r8 + mov QWORD PTR[8+rdx],r9 + mov QWORD PTR[16+rdx],r10 + mov QWORD PTR[24+rdx],r11 + mov QWORD PTR[32+rdx],r12 + mov QWORD PTR[40+rdx],r13 + mov QWORD PTR[48+rdx],r14 + mov QWORD PTR[56+rdx],r15 + mov QWORD PTR[64+rdx],rbx + mov QWORD PTR[72+rdx],rbp + mov QWORD PTR[80+rdx],rcx + mov QWORD PTR[88+rdx],rax + + DB 0F3h,0C3h ;repret +__smulq_767x63 ENDP + +ALIGN 32 +__smulq_383x63 PROC PRIVATE + DB 243,15,30,250 + + mov r8,QWORD PTR[rsi] + mov r9,QWORD PTR[8+rsi] + mov r10,QWORD PTR[16+rsi] + mov r11,QWORD PTR[24+rsi] + mov r12,QWORD PTR[32+rsi] + mov r13,QWORD PTR[40+rsi] + + mov rbp,rdx + sar rdx,63 + xor rax,rax + sub rax,rdx + + xor rbp,rdx + add rbp,rax + + xor r8,rdx + xor r9,rdx + xor r10,rdx + xor r11,rdx + xor r12,rdx + xor r13,rdx + add rax,r8 + adc r9,0 + adc r10,0 + adc r11,0 + adc r12,0 + adc r13,0 + + mul rbp + mov r8,rax + mov rax,r9 + mov r9,rdx + mul rbp + add r9,rax + mov rax,r10 + adc rdx,0 + mov r10,rdx + mul rbp + add r10,rax + mov rax,r11 + adc rdx,0 + mov r11,rdx + mul rbp + add r11,rax + mov rax,r12 + adc rdx,0 + mov r12,rdx + mul rbp + add r12,rax + mov rax,r13 + adc rdx,0 + mov r13,rdx + imul rax,rbp + add r13,rax + + lea rsi,QWORD PTR[48+rsi] + mov rdx,rcx + + mov QWORD PTR[rdi],r8 + mov QWORD PTR[8+rdi],r9 + mov QWORD PTR[16+rdi],r10 + mov QWORD PTR[24+rdi],r11 + mov QWORD PTR[32+rdi],r12 + mov QWORD PTR[40+rdi],r13 + mov r8,QWORD PTR[rsi] + mov r9,QWORD PTR[8+rsi] + mov r10,QWORD PTR[16+rsi] + mov r11,QWORD PTR[24+rsi] + mov r12,QWORD PTR[32+rsi] + mov r13,QWORD PTR[40+rsi] + + mov rbp,rdx + sar rdx,63 + xor rax,rax + sub rax,rdx + + xor rbp,rdx + add rbp,rax + + xor r8,rdx + xor r9,rdx + xor r10,rdx + xor r11,rdx + xor r12,rdx + xor r13,rdx + add rax,r8 + adc r9,0 + adc r10,0 + adc r11,0 + adc r12,0 + adc r13,0 + + mul rbp + mov r8,rax + mov rax,r9 + mov r9,rdx + mul rbp + add r9,rax + mov rax,r10 + adc rdx,0 + mov r10,rdx + mul rbp + add r10,rax + mov rax,r11 + adc rdx,0 + mov r11,rdx + mul rbp + add r11,rax + mov rax,r12 + adc rdx,0 + mov r12,rdx + mul rbp + add r12,rax + mov rax,r13 + adc rdx,0 + mov r13,rdx + imul rax,rbp + add r13,rax + + lea rsi,QWORD PTR[((-48))+rsi] + + add r8,QWORD PTR[rdi] + adc r9,QWORD PTR[8+rdi] + adc r10,QWORD PTR[16+rdi] + adc r11,QWORD PTR[24+rdi] + adc r12,QWORD PTR[32+rdi] + adc r13,QWORD PTR[40+rdi] + + mov QWORD PTR[rdi],r8 + mov QWORD PTR[8+rdi],r9 + mov QWORD PTR[16+rdi],r10 + mov QWORD PTR[24+rdi],r11 + mov QWORD PTR[32+rdi],r12 + mov QWORD PTR[40+rdi],r13 + + DB 0F3h,0C3h ;repret +__smulq_383x63 ENDP + +ALIGN 32 +__smulq_383_n_shift_by_62 PROC PRIVATE + DB 243,15,30,250 + + mov rbx,rdx + mov r8,QWORD PTR[rsi] + mov r9,QWORD PTR[8+rsi] + mov r10,QWORD PTR[16+rsi] + mov r11,QWORD PTR[24+rsi] + mov r12,QWORD PTR[32+rsi] + mov r13,QWORD PTR[40+rsi] + + mov rbp,rdx + sar rdx,63 + xor rax,rax + sub rax,rdx + + xor rbp,rdx + add rbp,rax + + xor r8,rdx + xor r9,rdx + xor r10,rdx + xor r11,rdx + xor r12,rdx + xor r13,rdx + add rax,r8 + adc r9,0 + adc r10,0 + adc r11,0 + adc r12,0 + adc r13,0 + + mul rbp + mov r8,rax + mov rax,r9 + mov r9,rdx + mul rbp + add r9,rax + mov rax,r10 + adc rdx,0 + mov r10,rdx + mul rbp + add r10,rax + mov rax,r11 + adc rdx,0 + mov r11,rdx + mul rbp + add r11,rax + mov rax,r12 + adc rdx,0 + mov r12,rdx + mul rbp + add r12,rax + mov rax,r13 + adc rdx,0 + mov r13,rdx + imul rbp + add r13,rax + adc rdx,0 + + lea rsi,QWORD PTR[48+rsi] + mov r14,rdx + mov rdx,rcx + + mov QWORD PTR[rdi],r8 + mov QWORD PTR[8+rdi],r9 + mov QWORD PTR[16+rdi],r10 + mov QWORD PTR[24+rdi],r11 + mov QWORD PTR[32+rdi],r12 + mov QWORD PTR[40+rdi],r13 + mov r8,QWORD PTR[rsi] + mov r9,QWORD PTR[8+rsi] + mov r10,QWORD PTR[16+rsi] + mov r11,QWORD PTR[24+rsi] + mov r12,QWORD PTR[32+rsi] + mov r13,QWORD PTR[40+rsi] + + mov rbp,rdx + sar rdx,63 + xor rax,rax + sub rax,rdx + + xor rbp,rdx + add rbp,rax + + xor r8,rdx + xor r9,rdx + xor r10,rdx + xor r11,rdx + xor r12,rdx + xor r13,rdx + add rax,r8 + adc r9,0 + adc r10,0 + adc r11,0 + adc r12,0 + adc r13,0 + + mul rbp + mov r8,rax + mov rax,r9 + mov r9,rdx + mul rbp + add r9,rax + mov rax,r10 + adc rdx,0 + mov r10,rdx + mul rbp + add r10,rax + mov rax,r11 + adc rdx,0 + mov r11,rdx + mul rbp + add r11,rax + mov rax,r12 + adc rdx,0 + mov r12,rdx + mul rbp + add r12,rax + mov rax,r13 + adc rdx,0 + mov r13,rdx + imul rbp + add r13,rax + adc rdx,0 + + lea rsi,QWORD PTR[((-48))+rsi] + + add r8,QWORD PTR[rdi] + adc r9,QWORD PTR[8+rdi] + adc r10,QWORD PTR[16+rdi] + adc r11,QWORD PTR[24+rdi] + adc r12,QWORD PTR[32+rdi] + adc r13,QWORD PTR[40+rdi] + adc r14,rdx + mov rdx,rbx + + shrd r8,r9,62 + shrd r9,r10,62 + shrd r10,r11,62 + shrd r11,r12,62 + shrd r12,r13,62 + shrd r13,r14,62 + + sar r14,63 + xor rbp,rbp + sub rbp,r14 + + xor r8,r14 + xor r9,r14 + xor r10,r14 + xor r11,r14 + xor r12,r14 + xor r13,r14 + add r8,rbp + adc r9,0 + adc r10,0 + adc r11,0 + adc r12,0 + adc r13,0 + + mov QWORD PTR[rdi],r8 + mov QWORD PTR[8+rdi],r9 + mov QWORD PTR[16+rdi],r10 + mov QWORD PTR[24+rdi],r11 + mov QWORD PTR[32+rdi],r12 + mov QWORD PTR[40+rdi],r13 + + xor rdx,r14 + xor rcx,r14 + add rdx,rbp + add rcx,rbp + + DB 0F3h,0C3h ;repret +__smulq_383_n_shift_by_62 ENDP + +ALIGN 32 +__ab_approximation_62 PROC PRIVATE + DB 243,15,30,250 + + mov r9,QWORD PTR[40+rsi] + mov r11,QWORD PTR[88+rsi] + mov rbx,QWORD PTR[32+rsi] + mov rbp,QWORD PTR[80+rsi] + mov r8,QWORD PTR[24+rsi] + mov r10,QWORD PTR[72+rsi] + + mov rax,r9 + or rax,r11 + cmovz r9,rbx + cmovz r11,rbp + cmovz rbx,r8 + cmovz rbp,r10 + mov r8,QWORD PTR[16+rsi] + mov r10,QWORD PTR[64+rsi] + + mov rax,r9 + or rax,r11 + cmovz r9,rbx + cmovz r11,rbp + cmovz rbx,r8 + cmovz rbp,r10 + mov r8,QWORD PTR[8+rsi] + mov r10,QWORD PTR[56+rsi] + + mov rax,r9 + or rax,r11 + cmovz r9,rbx + cmovz r11,rbp + cmovz rbx,r8 + cmovz rbp,r10 + mov r8,QWORD PTR[rsi] + mov r10,QWORD PTR[48+rsi] + + mov rax,r9 + or rax,r11 + bsr rcx,rax + lea rcx,QWORD PTR[1+rcx] + cmovz r9,rbx + cmovz r11,rbp + cmovz rcx,rax + neg rcx + + + shld r9,rbx,cl + shld r11,rbp,cl + + jmp __inner_loop_62 + + DB 0F3h,0C3h ;repret +__ab_approximation_62 ENDP + +ALIGN 8 + DD 0 +__inner_loop_62 PROC PRIVATE + DB 243,15,30,250 + + mov rdx,1 + xor rcx,rcx + xor r12,r12 + mov r13,1 + mov QWORD PTR[8+rsp],rsi + +$L$oop_62:: + xor rax,rax + xor rbx,rbx + test r8,1 + mov rbp,r10 + mov r14,r11 + cmovnz rax,r10 + cmovnz rbx,r11 + sub rbp,r8 + sbb r14,r9 + mov r15,r8 + mov rsi,r9 + sub r8,rax + sbb r9,rbx + cmovc r8,rbp + cmovc r9,r14 + cmovc r10,r15 + cmovc r11,rsi + mov rax,rdx + cmovc rdx,r12 + cmovc r12,rax + mov rbx,rcx + cmovc rcx,r13 + cmovc r13,rbx + xor rax,rax + xor rbx,rbx + shrd r8,r9,1 + shr r9,1 + test r15,1 + cmovnz rax,r12 + cmovnz rbx,r13 + add r12,r12 + add r13,r13 + sub rdx,rax + sub rcx,rbx + sub edi,1 + jnz $L$oop_62 + + mov rsi,QWORD PTR[8+rsp] + DB 0F3h,0C3h ;repret +__inner_loop_62 ENDP +.text$ ENDS +.pdata SEGMENT READONLY ALIGN(4) +ALIGN 4 + DD imagerel $L$SEH_begin_ct_inverse_mod_383 + DD imagerel $L$SEH_body_ct_inverse_mod_383 + DD imagerel $L$SEH_info_ct_inverse_mod_383_prologue + + DD imagerel $L$SEH_body_ct_inverse_mod_383 + DD imagerel $L$SEH_epilogue_ct_inverse_mod_383 + DD imagerel $L$SEH_info_ct_inverse_mod_383_body + + DD imagerel $L$SEH_epilogue_ct_inverse_mod_383 + DD imagerel $L$SEH_end_ct_inverse_mod_383 + DD imagerel $L$SEH_info_ct_inverse_mod_383_epilogue + +.pdata ENDS +.xdata SEGMENT READONLY ALIGN(8) +ALIGN 8 +$L$SEH_info_ct_inverse_mod_383_prologue:: +DB 1,0,5,00bh +DB 0,074h,1,0 +DB 0,064h,2,0 +DB 0,0b3h +DB 0,0 + DD 0,0 +$L$SEH_info_ct_inverse_mod_383_body:: +DB 1,0,18,0 +DB 000h,0f4h,08bh,000h +DB 000h,0e4h,08ch,000h +DB 000h,0d4h,08dh,000h +DB 000h,0c4h,08eh,000h +DB 000h,034h,08fh,000h +DB 000h,054h,090h,000h +DB 000h,074h,092h,000h +DB 000h,064h,093h,000h +DB 000h,001h,091h,000h +DB 000h,000h,000h,000h +DB 000h,000h,000h,000h +$L$SEH_info_ct_inverse_mod_383_epilogue:: +DB 1,0,4,0 +DB 000h,074h,001h,000h +DB 000h,064h,002h,000h +DB 000h,000h,000h,000h + + +.xdata ENDS +END diff --git a/crypto/blst_src/build/win64/ctx_inverse_mod_384-x86_64.asm b/crypto/blst_src/build/win64/ctx_inverse_mod_384-x86_64.asm new file mode 100644 index 00000000000..024da69a645 --- /dev/null +++ b/crypto/blst_src/build/win64/ctx_inverse_mod_384-x86_64.asm @@ -0,0 +1,1609 @@ +OPTION DOTNAME +PUBLIC ct_inverse_mod_383$1 +.text$ SEGMENT ALIGN(256) 'CODE' + +PUBLIC ctx_inverse_mod_383 + + +ALIGN 32 +ctx_inverse_mod_383 PROC PUBLIC + DB 243,15,30,250 + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov r11,rsp +$L$SEH_begin_ctx_inverse_mod_383:: + + + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 +ct_inverse_mod_383$1:: + push rbp + + push rbx + + push r12 + + push r13 + + push r14 + + push r15 + + sub rsp,1112 + +$L$SEH_body_ctx_inverse_mod_383:: + + + lea rax,QWORD PTR[((88+511))+rsp] + and rax,-512 + mov QWORD PTR[32+rsp],rdi + mov QWORD PTR[40+rsp],rcx + + mov r8,QWORD PTR[rsi] + mov r9,QWORD PTR[8+rsi] + mov r10,QWORD PTR[16+rsi] + mov r11,QWORD PTR[24+rsi] + mov r12,QWORD PTR[32+rsi] + mov r13,QWORD PTR[40+rsi] + + mov r14,QWORD PTR[rdx] + mov r15,QWORD PTR[8+rdx] + mov rbx,QWORD PTR[16+rdx] + mov rbp,QWORD PTR[24+rdx] + mov rsi,QWORD PTR[32+rdx] + mov rdi,QWORD PTR[40+rdx] + + mov QWORD PTR[rax],r8 + mov QWORD PTR[8+rax],r9 + mov QWORD PTR[16+rax],r10 + mov QWORD PTR[24+rax],r11 + mov QWORD PTR[32+rax],r12 + mov QWORD PTR[40+rax],r13 + + mov QWORD PTR[48+rax],r14 + mov QWORD PTR[56+rax],r15 + mov QWORD PTR[64+rax],rbx + mov QWORD PTR[72+rax],rbp + mov QWORD PTR[80+rax],rsi + mov rsi,rax + mov QWORD PTR[88+rax],rdi + + + mov edi,31 + call __ab_approximation_31 + + + mov QWORD PTR[72+rsp],r12 + mov QWORD PTR[80+rsp],r13 + + mov rdi,256 + xor rdi,rsi + call __smulx_383_n_shift_by_31 + + + mov QWORD PTR[96+rdi],rdx + + mov rdx,QWORD PTR[72+rsp] + mov rcx,QWORD PTR[80+rsp] + lea rdi,QWORD PTR[48+rdi] + call __smulx_383_n_shift_by_31 + + + mov QWORD PTR[96+rdi],rdx + + + xor rsi,256 + mov edi,31 + call __ab_approximation_31 + + + mov QWORD PTR[72+rsp],r12 + mov QWORD PTR[80+rsp],r13 + + mov rdi,256 + xor rdi,rsi + call __smulx_383_n_shift_by_31 + mov QWORD PTR[56+rsp],rdx + mov QWORD PTR[64+rsp],rcx + + mov rdx,QWORD PTR[72+rsp] + mov rcx,QWORD PTR[80+rsp] + lea rdi,QWORD PTR[48+rdi] + call __smulx_383_n_shift_by_31 + + + + mov rax,QWORD PTR[96+rsi] + mov r11,QWORD PTR[144+rsi] + mov rbx,rdx + mov r10,rax + imul QWORD PTR[56+rsp] + mov r8,rax + mov rax,r11 + mov r9,rdx + imul QWORD PTR[64+rsp] + add r8,rax + adc r9,rdx + mov QWORD PTR[48+rdi],r8 + mov QWORD PTR[56+rdi],r9 + sar r9,63 + mov QWORD PTR[64+rdi],r9 + mov QWORD PTR[72+rdi],r9 + mov QWORD PTR[80+rdi],r9 + mov QWORD PTR[88+rdi],r9 + lea rsi,QWORD PTR[96+rsi] + + mov rax,r10 + imul rbx + mov r8,rax + mov rax,r11 + mov r9,rdx + imul rcx + add r8,rax + adc r9,rdx + mov QWORD PTR[96+rdi],r8 + mov QWORD PTR[104+rdi],r9 + sar r9,63 + mov QWORD PTR[112+rdi],r9 + mov QWORD PTR[120+rdi],r9 + mov QWORD PTR[128+rdi],r9 + mov QWORD PTR[136+rdi],r9 + xor rsi,256+8*12 + mov edi,31 + call __ab_approximation_31 + + + mov QWORD PTR[72+rsp],r12 + mov QWORD PTR[80+rsp],r13 + + mov rdi,256 + xor rdi,rsi + call __smulx_383_n_shift_by_31 + mov QWORD PTR[56+rsp],rdx + mov QWORD PTR[64+rsp],rcx + + mov rdx,QWORD PTR[72+rsp] + mov rcx,QWORD PTR[80+rsp] + lea rdi,QWORD PTR[48+rdi] + call __smulx_383_n_shift_by_31 + mov QWORD PTR[72+rsp],rdx + mov QWORD PTR[80+rsp],rcx + + mov rdx,QWORD PTR[56+rsp] + mov rcx,QWORD PTR[64+rsp] + lea rsi,QWORD PTR[96+rsi] + lea rdi,QWORD PTR[48+rdi] + call __smulx_383x63 + + mov rdx,QWORD PTR[72+rsp] + mov rcx,QWORD PTR[80+rsp] + lea rdi,QWORD PTR[48+rdi] + call __smulx_383x63 + xor rsi,256+8*12 + mov edi,31 + call __ab_approximation_31 + + + mov QWORD PTR[72+rsp],r12 + mov QWORD PTR[80+rsp],r13 + + mov rdi,256 + xor rdi,rsi + call __smulx_383_n_shift_by_31 + mov QWORD PTR[56+rsp],rdx + mov QWORD PTR[64+rsp],rcx + + mov rdx,QWORD PTR[72+rsp] + mov rcx,QWORD PTR[80+rsp] + lea rdi,QWORD PTR[48+rdi] + call __smulx_383_n_shift_by_31 + mov QWORD PTR[72+rsp],rdx + mov QWORD PTR[80+rsp],rcx + + mov rdx,QWORD PTR[56+rsp] + mov rcx,QWORD PTR[64+rsp] + lea rsi,QWORD PTR[96+rsi] + lea rdi,QWORD PTR[48+rdi] + call __smulx_383x63 + + mov rdx,QWORD PTR[72+rsp] + mov rcx,QWORD PTR[80+rsp] + lea rdi,QWORD PTR[48+rdi] + call __smulx_383x63 + xor rsi,256+8*12 + mov edi,31 + call __ab_approximation_31 + + + mov QWORD PTR[72+rsp],r12 + mov QWORD PTR[80+rsp],r13 + + mov rdi,256 + xor rdi,rsi + call __smulx_383_n_shift_by_31 + mov QWORD PTR[56+rsp],rdx + mov QWORD PTR[64+rsp],rcx + + mov rdx,QWORD PTR[72+rsp] + mov rcx,QWORD PTR[80+rsp] + lea rdi,QWORD PTR[48+rdi] + call __smulx_383_n_shift_by_31 + mov QWORD PTR[72+rsp],rdx + mov QWORD PTR[80+rsp],rcx + + mov rdx,QWORD PTR[56+rsp] + mov rcx,QWORD PTR[64+rsp] + lea rsi,QWORD PTR[96+rsi] + lea rdi,QWORD PTR[48+rdi] + call __smulx_383x63 + + mov rdx,QWORD PTR[72+rsp] + mov rcx,QWORD PTR[80+rsp] + lea rdi,QWORD PTR[48+rdi] + call __smulx_383x63 + xor rsi,256+8*12 + mov edi,31 + call __ab_approximation_31 + + + mov QWORD PTR[72+rsp],r12 + mov QWORD PTR[80+rsp],r13 + + mov rdi,256 + xor rdi,rsi + call __smulx_383_n_shift_by_31 + mov QWORD PTR[56+rsp],rdx + mov QWORD PTR[64+rsp],rcx + + mov rdx,QWORD PTR[72+rsp] + mov rcx,QWORD PTR[80+rsp] + lea rdi,QWORD PTR[48+rdi] + call __smulx_383_n_shift_by_31 + mov QWORD PTR[72+rsp],rdx + mov QWORD PTR[80+rsp],rcx + + mov rdx,QWORD PTR[56+rsp] + mov rcx,QWORD PTR[64+rsp] + lea rsi,QWORD PTR[96+rsi] + lea rdi,QWORD PTR[48+rdi] + call __smulx_383x63 + + mov rdx,QWORD PTR[72+rsp] + mov rcx,QWORD PTR[80+rsp] + lea rdi,QWORD PTR[48+rdi] + call __smulx_383x63 + xor rsi,256+8*12 + mov edi,31 + call __ab_approximation_31 + + + mov QWORD PTR[72+rsp],r12 + mov QWORD PTR[80+rsp],r13 + + mov rdi,256 + xor rdi,rsi + call __smulx_383_n_shift_by_31 + mov QWORD PTR[56+rsp],rdx + mov QWORD PTR[64+rsp],rcx + + mov rdx,QWORD PTR[72+rsp] + mov rcx,QWORD PTR[80+rsp] + lea rdi,QWORD PTR[48+rdi] + call __smulx_383_n_shift_by_31 + mov QWORD PTR[72+rsp],rdx + mov QWORD PTR[80+rsp],rcx + + mov rdx,QWORD PTR[56+rsp] + mov rcx,QWORD PTR[64+rsp] + lea rsi,QWORD PTR[96+rsi] + lea rdi,QWORD PTR[48+rdi] + call __smulx_383x63 + + mov rdx,QWORD PTR[72+rsp] + mov rcx,QWORD PTR[80+rsp] + lea rdi,QWORD PTR[48+rdi] + call __smulx_383x63 + xor rsi,256+8*12 + mov edi,31 + call __ab_approximation_31 + + + mov QWORD PTR[72+rsp],r12 + mov QWORD PTR[80+rsp],r13 + + mov rdi,256 + xor rdi,rsi + call __smulx_383_n_shift_by_31 + mov QWORD PTR[56+rsp],rdx + mov QWORD PTR[64+rsp],rcx + + mov rdx,QWORD PTR[72+rsp] + mov rcx,QWORD PTR[80+rsp] + lea rdi,QWORD PTR[48+rdi] + call __smulx_383_n_shift_by_31 + mov QWORD PTR[72+rsp],rdx + mov QWORD PTR[80+rsp],rcx + + mov rdx,QWORD PTR[56+rsp] + mov rcx,QWORD PTR[64+rsp] + lea rsi,QWORD PTR[96+rsi] + lea rdi,QWORD PTR[48+rdi] + call __smulx_383x63 + + mov rdx,QWORD PTR[72+rsp] + mov rcx,QWORD PTR[80+rsp] + lea rdi,QWORD PTR[48+rdi] + call __smulx_383x63 + xor rsi,256+8*12 + mov edi,31 + call __ab_approximation_31 + + + mov QWORD PTR[72+rsp],r12 + mov QWORD PTR[80+rsp],r13 + + mov rdi,256 + xor rdi,rsi + call __smulx_383_n_shift_by_31 + mov QWORD PTR[56+rsp],rdx + mov QWORD PTR[64+rsp],rcx + + mov rdx,QWORD PTR[72+rsp] + mov rcx,QWORD PTR[80+rsp] + lea rdi,QWORD PTR[48+rdi] + call __smulx_383_n_shift_by_31 + mov QWORD PTR[72+rsp],rdx + mov QWORD PTR[80+rsp],rcx + + mov rdx,QWORD PTR[56+rsp] + mov rcx,QWORD PTR[64+rsp] + lea rsi,QWORD PTR[96+rsi] + lea rdi,QWORD PTR[48+rdi] + call __smulx_383x63 + + mov rdx,QWORD PTR[72+rsp] + mov rcx,QWORD PTR[80+rsp] + lea rdi,QWORD PTR[48+rdi] + call __smulx_383x63 + xor rsi,256+8*12 + mov edi,31 + call __ab_approximation_31 + + + mov QWORD PTR[72+rsp],r12 + mov QWORD PTR[80+rsp],r13 + + mov rdi,256 + xor rdi,rsi + call __smulx_383_n_shift_by_31 + mov QWORD PTR[56+rsp],rdx + mov QWORD PTR[64+rsp],rcx + + mov rdx,QWORD PTR[72+rsp] + mov rcx,QWORD PTR[80+rsp] + lea rdi,QWORD PTR[48+rdi] + call __smulx_383_n_shift_by_31 + mov QWORD PTR[72+rsp],rdx + mov QWORD PTR[80+rsp],rcx + + mov rdx,QWORD PTR[56+rsp] + mov rcx,QWORD PTR[64+rsp] + lea rsi,QWORD PTR[96+rsi] + lea rdi,QWORD PTR[48+rdi] + call __smulx_383x63 + + mov rdx,QWORD PTR[72+rsp] + mov rcx,QWORD PTR[80+rsp] + lea rdi,QWORD PTR[48+rdi] + call __smulx_383x63 + xor rsi,256+8*12 + mov edi,31 + call __ab_approximation_31 + + + mov QWORD PTR[72+rsp],r12 + mov QWORD PTR[80+rsp],r13 + + mov rdi,256 + xor rdi,rsi + call __smulx_383_n_shift_by_31 + mov QWORD PTR[56+rsp],rdx + mov QWORD PTR[64+rsp],rcx + + mov rdx,QWORD PTR[72+rsp] + mov rcx,QWORD PTR[80+rsp] + lea rdi,QWORD PTR[48+rdi] + call __smulx_383_n_shift_by_31 + mov QWORD PTR[72+rsp],rdx + mov QWORD PTR[80+rsp],rcx + + mov rdx,QWORD PTR[56+rsp] + mov rcx,QWORD PTR[64+rsp] + lea rsi,QWORD PTR[96+rsi] + lea rdi,QWORD PTR[48+rdi] + call __smulx_383x63 + + mov rdx,QWORD PTR[72+rsp] + mov rcx,QWORD PTR[80+rsp] + lea rdi,QWORD PTR[48+rdi] + call __smulx_383x63 + xor rsi,256+8*12 + mov edi,31 + call __ab_approximation_31 + + + mov QWORD PTR[72+rsp],r12 + mov QWORD PTR[80+rsp],r13 + + mov rdi,256 + xor rdi,rsi + call __smulx_383_n_shift_by_31 + mov QWORD PTR[56+rsp],rdx + mov QWORD PTR[64+rsp],rcx + + mov rdx,QWORD PTR[72+rsp] + mov rcx,QWORD PTR[80+rsp] + lea rdi,QWORD PTR[48+rdi] + call __smulx_383_n_shift_by_31 + mov QWORD PTR[72+rsp],rdx + mov QWORD PTR[80+rsp],rcx + + mov rdx,QWORD PTR[56+rsp] + mov rcx,QWORD PTR[64+rsp] + lea rsi,QWORD PTR[96+rsi] + lea rdi,QWORD PTR[48+rdi] + call __smulx_383x63 + + mov rdx,QWORD PTR[72+rsp] + mov rcx,QWORD PTR[80+rsp] + lea rdi,QWORD PTR[48+rdi] + call __smulx_383x63 + sar r13,63 + mov QWORD PTR[48+rdi],r13 + mov QWORD PTR[56+rdi],r13 + mov QWORD PTR[64+rdi],r13 + mov QWORD PTR[72+rdi],r13 + mov QWORD PTR[80+rdi],r13 + mov QWORD PTR[88+rdi],r13 + xor rsi,256+8*12 + mov edi,31 + call __ab_approximation_31 + + + mov QWORD PTR[72+rsp],r12 + mov QWORD PTR[80+rsp],r13 + + mov rdi,256 + xor rdi,rsi + call __smulx_383_n_shift_by_31 + mov QWORD PTR[56+rsp],rdx + mov QWORD PTR[64+rsp],rcx + + mov rdx,QWORD PTR[72+rsp] + mov rcx,QWORD PTR[80+rsp] + lea rdi,QWORD PTR[48+rdi] + call __smulx_383_n_shift_by_31 + mov QWORD PTR[72+rsp],rdx + mov QWORD PTR[80+rsp],rcx + + mov rdx,QWORD PTR[56+rsp] + mov rcx,QWORD PTR[64+rsp] + lea rsi,QWORD PTR[96+rsi] + lea rdi,QWORD PTR[48+rdi] + call __smulx_383x63 + + mov rdx,QWORD PTR[72+rsp] + mov rcx,QWORD PTR[80+rsp] + lea rdi,QWORD PTR[48+rdi] + call __smulx_767x63 + xor rsi,256+8*12 + mov edi,31 + call __ab_approximation_31 + + + mov QWORD PTR[72+rsp],r12 + mov QWORD PTR[80+rsp],r13 + + mov rdi,256 + xor rdi,rsi + call __smulx_383_n_shift_by_31 + mov QWORD PTR[56+rsp],rdx + mov QWORD PTR[64+rsp],rcx + + mov rdx,QWORD PTR[72+rsp] + mov rcx,QWORD PTR[80+rsp] + lea rdi,QWORD PTR[48+rdi] + call __smulx_383_n_shift_by_31 + mov QWORD PTR[72+rsp],rdx + mov QWORD PTR[80+rsp],rcx + + mov rdx,QWORD PTR[56+rsp] + mov rcx,QWORD PTR[64+rsp] + lea rsi,QWORD PTR[96+rsi] + lea rdi,QWORD PTR[48+rdi] + call __smulx_383x63 + + mov rdx,QWORD PTR[72+rsp] + mov rcx,QWORD PTR[80+rsp] + lea rdi,QWORD PTR[48+rdi] + call __smulx_767x63 + xor rsi,256+8*12 + mov edi,31 + call __ab_approximation_31 + + + mov QWORD PTR[72+rsp],r12 + mov QWORD PTR[80+rsp],r13 + + mov rdi,256 + xor rdi,rsi + call __smulx_383_n_shift_by_31 + mov QWORD PTR[56+rsp],rdx + mov QWORD PTR[64+rsp],rcx + + mov rdx,QWORD PTR[72+rsp] + mov rcx,QWORD PTR[80+rsp] + lea rdi,QWORD PTR[48+rdi] + call __smulx_383_n_shift_by_31 + mov QWORD PTR[72+rsp],rdx + mov QWORD PTR[80+rsp],rcx + + mov rdx,QWORD PTR[56+rsp] + mov rcx,QWORD PTR[64+rsp] + lea rsi,QWORD PTR[96+rsi] + lea rdi,QWORD PTR[48+rdi] + call __smulx_383x63 + + mov rdx,QWORD PTR[72+rsp] + mov rcx,QWORD PTR[80+rsp] + lea rdi,QWORD PTR[48+rdi] + call __smulx_767x63 + xor rsi,256+8*12 + mov edi,31 + call __ab_approximation_31 + + + mov QWORD PTR[72+rsp],r12 + mov QWORD PTR[80+rsp],r13 + + mov rdi,256 + xor rdi,rsi + call __smulx_383_n_shift_by_31 + mov QWORD PTR[56+rsp],rdx + mov QWORD PTR[64+rsp],rcx + + mov rdx,QWORD PTR[72+rsp] + mov rcx,QWORD PTR[80+rsp] + lea rdi,QWORD PTR[48+rdi] + call __smulx_383_n_shift_by_31 + mov QWORD PTR[72+rsp],rdx + mov QWORD PTR[80+rsp],rcx + + mov rdx,QWORD PTR[56+rsp] + mov rcx,QWORD PTR[64+rsp] + lea rsi,QWORD PTR[96+rsi] + lea rdi,QWORD PTR[48+rdi] + call __smulx_383x63 + + mov rdx,QWORD PTR[72+rsp] + mov rcx,QWORD PTR[80+rsp] + lea rdi,QWORD PTR[48+rdi] + call __smulx_767x63 + xor rsi,256+8*12 + mov edi,31 + call __ab_approximation_31 + + + mov QWORD PTR[72+rsp],r12 + mov QWORD PTR[80+rsp],r13 + + mov rdi,256 + xor rdi,rsi + call __smulx_383_n_shift_by_31 + mov QWORD PTR[56+rsp],rdx + mov QWORD PTR[64+rsp],rcx + + mov rdx,QWORD PTR[72+rsp] + mov rcx,QWORD PTR[80+rsp] + lea rdi,QWORD PTR[48+rdi] + call __smulx_383_n_shift_by_31 + mov QWORD PTR[72+rsp],rdx + mov QWORD PTR[80+rsp],rcx + + mov rdx,QWORD PTR[56+rsp] + mov rcx,QWORD PTR[64+rsp] + lea rsi,QWORD PTR[96+rsi] + lea rdi,QWORD PTR[48+rdi] + call __smulx_383x63 + + mov rdx,QWORD PTR[72+rsp] + mov rcx,QWORD PTR[80+rsp] + lea rdi,QWORD PTR[48+rdi] + call __smulx_767x63 + xor rsi,256+8*12 + mov edi,31 + call __ab_approximation_31 + + + mov QWORD PTR[72+rsp],r12 + mov QWORD PTR[80+rsp],r13 + + mov rdi,256 + xor rdi,rsi + call __smulx_383_n_shift_by_31 + mov QWORD PTR[56+rsp],rdx + mov QWORD PTR[64+rsp],rcx + + mov rdx,QWORD PTR[72+rsp] + mov rcx,QWORD PTR[80+rsp] + lea rdi,QWORD PTR[48+rdi] + call __smulx_383_n_shift_by_31 + mov QWORD PTR[72+rsp],rdx + mov QWORD PTR[80+rsp],rcx + + mov rdx,QWORD PTR[56+rsp] + mov rcx,QWORD PTR[64+rsp] + lea rsi,QWORD PTR[96+rsi] + lea rdi,QWORD PTR[48+rdi] + call __smulx_383x63 + + mov rdx,QWORD PTR[72+rsp] + mov rcx,QWORD PTR[80+rsp] + lea rdi,QWORD PTR[48+rdi] + call __smulx_767x63 + xor rsi,256+8*12 + mov edi,31 + call __ab_approximation_31 + + + mov QWORD PTR[72+rsp],r12 + mov QWORD PTR[80+rsp],r13 + + mov rdi,256 + xor rdi,rsi + call __smulx_383_n_shift_by_31 + mov QWORD PTR[56+rsp],rdx + mov QWORD PTR[64+rsp],rcx + + mov rdx,QWORD PTR[72+rsp] + mov rcx,QWORD PTR[80+rsp] + lea rdi,QWORD PTR[48+rdi] + call __smulx_383_n_shift_by_31 + mov QWORD PTR[72+rsp],rdx + mov QWORD PTR[80+rsp],rcx + + mov rdx,QWORD PTR[56+rsp] + mov rcx,QWORD PTR[64+rsp] + lea rsi,QWORD PTR[96+rsi] + lea rdi,QWORD PTR[48+rdi] + call __smulx_383x63 + + mov rdx,QWORD PTR[72+rsp] + mov rcx,QWORD PTR[80+rsp] + lea rdi,QWORD PTR[48+rdi] + call __smulx_767x63 + xor rsi,256+8*12 + mov edi,31 + call __ab_approximation_31 + + + mov QWORD PTR[72+rsp],r12 + mov QWORD PTR[80+rsp],r13 + + mov rdi,256 + xor rdi,rsi + call __smulx_191_n_shift_by_31 + mov QWORD PTR[56+rsp],rdx + mov QWORD PTR[64+rsp],rcx + + mov rdx,QWORD PTR[72+rsp] + mov rcx,QWORD PTR[80+rsp] + lea rdi,QWORD PTR[48+rdi] + call __smulx_191_n_shift_by_31 + mov QWORD PTR[72+rsp],rdx + mov QWORD PTR[80+rsp],rcx + + mov rdx,QWORD PTR[56+rsp] + mov rcx,QWORD PTR[64+rsp] + lea rsi,QWORD PTR[96+rsi] + lea rdi,QWORD PTR[48+rdi] + call __smulx_383x63 + + mov rdx,QWORD PTR[72+rsp] + mov rcx,QWORD PTR[80+rsp] + lea rdi,QWORD PTR[48+rdi] + call __smulx_767x63 + xor rsi,256+8*12 + mov edi,31 + call __ab_approximation_31 + + + mov QWORD PTR[72+rsp],r12 + mov QWORD PTR[80+rsp],r13 + + mov rdi,256 + xor rdi,rsi + call __smulx_191_n_shift_by_31 + mov QWORD PTR[56+rsp],rdx + mov QWORD PTR[64+rsp],rcx + + mov rdx,QWORD PTR[72+rsp] + mov rcx,QWORD PTR[80+rsp] + lea rdi,QWORD PTR[48+rdi] + call __smulx_191_n_shift_by_31 + mov QWORD PTR[72+rsp],rdx + mov QWORD PTR[80+rsp],rcx + + mov rdx,QWORD PTR[56+rsp] + mov rcx,QWORD PTR[64+rsp] + lea rsi,QWORD PTR[96+rsi] + lea rdi,QWORD PTR[48+rdi] + call __smulx_383x63 + + mov rdx,QWORD PTR[72+rsp] + mov rcx,QWORD PTR[80+rsp] + lea rdi,QWORD PTR[48+rdi] + call __smulx_767x63 + xor rsi,256+8*12 + mov edi,31 + call __ab_approximation_31 + + + mov QWORD PTR[72+rsp],r12 + mov QWORD PTR[80+rsp],r13 + + mov rdi,256 + xor rdi,rsi + call __smulx_191_n_shift_by_31 + mov QWORD PTR[56+rsp],rdx + mov QWORD PTR[64+rsp],rcx + + mov rdx,QWORD PTR[72+rsp] + mov rcx,QWORD PTR[80+rsp] + lea rdi,QWORD PTR[48+rdi] + call __smulx_191_n_shift_by_31 + mov QWORD PTR[72+rsp],rdx + mov QWORD PTR[80+rsp],rcx + + mov rdx,QWORD PTR[56+rsp] + mov rcx,QWORD PTR[64+rsp] + lea rsi,QWORD PTR[96+rsi] + lea rdi,QWORD PTR[48+rdi] + call __smulx_383x63 + + mov rdx,QWORD PTR[72+rsp] + mov rcx,QWORD PTR[80+rsp] + lea rdi,QWORD PTR[48+rdi] + call __smulx_767x63 + xor rsi,256+8*12 + mov edi,31 + call __ab_approximation_31 + + + mov QWORD PTR[72+rsp],r12 + mov QWORD PTR[80+rsp],r13 + + mov rdi,256 + xor rdi,rsi + call __smulx_191_n_shift_by_31 + mov QWORD PTR[56+rsp],rdx + mov QWORD PTR[64+rsp],rcx + + mov rdx,QWORD PTR[72+rsp] + mov rcx,QWORD PTR[80+rsp] + lea rdi,QWORD PTR[48+rdi] + call __smulx_191_n_shift_by_31 + mov QWORD PTR[72+rsp],rdx + mov QWORD PTR[80+rsp],rcx + + mov rdx,QWORD PTR[56+rsp] + mov rcx,QWORD PTR[64+rsp] + lea rsi,QWORD PTR[96+rsi] + lea rdi,QWORD PTR[48+rdi] + call __smulx_383x63 + + mov rdx,QWORD PTR[72+rsp] + mov rcx,QWORD PTR[80+rsp] + lea rdi,QWORD PTR[48+rdi] + call __smulx_767x63 + + xor rsi,256+8*12 + mov edi,53 + + mov r8,QWORD PTR[rsi] + + mov r10,QWORD PTR[48+rsi] + + call __tail_loop_53 + + + + + + + + lea rsi,QWORD PTR[96+rsi] + + + + + + mov rdx,r12 + mov rcx,r13 + mov rdi,QWORD PTR[32+rsp] + call __smulx_767x63 + + mov rsi,QWORD PTR[40+rsp] + mov rdx,rax + sar rax,63 + + mov r8,rax + mov r9,rax + mov r10,rax + and r8,QWORD PTR[rsi] + and r9,QWORD PTR[8+rsi] + mov r11,rax + and r10,QWORD PTR[16+rsi] + and r11,QWORD PTR[24+rsi] + mov r12,rax + and r12,QWORD PTR[32+rsi] + and rax,QWORD PTR[40+rsi] + + add r14,r8 + adc r15,r9 + adc rbx,r10 + adc rbp,r11 + adc rcx,r12 + adc rdx,rax + + mov QWORD PTR[48+rdi],r14 + mov QWORD PTR[56+rdi],r15 + mov QWORD PTR[64+rdi],rbx + mov QWORD PTR[72+rdi],rbp + mov QWORD PTR[80+rdi],rcx + mov QWORD PTR[88+rdi],rdx + + lea r8,QWORD PTR[1112+rsp] + mov r15,QWORD PTR[r8] + + mov r14,QWORD PTR[8+r8] + + mov r13,QWORD PTR[16+r8] + + mov r12,QWORD PTR[24+r8] + + mov rbx,QWORD PTR[32+r8] + + mov rbp,QWORD PTR[40+r8] + + lea rsp,QWORD PTR[48+r8] + +$L$SEH_epilogue_ctx_inverse_mod_383:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + + DB 0F3h,0C3h ;repret + +$L$SEH_end_ctx_inverse_mod_383:: +ctx_inverse_mod_383 ENDP + +ALIGN 32 +__smulx_767x63 PROC PRIVATE + DB 243,15,30,250 + + mov r8,QWORD PTR[rsi] + mov r9,QWORD PTR[8+rsi] + mov r10,QWORD PTR[16+rsi] + mov r11,QWORD PTR[24+rsi] + mov r12,QWORD PTR[32+rsi] + mov r13,QWORD PTR[40+rsi] + + mov rax,rdx + sar rax,63 + xor rbp,rbp + sub rbp,rax + + mov QWORD PTR[8+rsp],rdi + mov QWORD PTR[16+rsp],rsi + lea rsi,QWORD PTR[48+rsi] + + xor rdx,rax + add rdx,rbp + + xor r8,rax + xor r9,rax + xor r10,rax + xor r11,rax + xor r12,rax + xor rax,r13 + add r8,rbp + adc r9,0 + adc r10,0 + adc r11,0 + adc r12,0 + adc rax,0 + + mulx rbp,r8,r8 + mulx r13,r9,r9 + add r9,rbp + mulx rbp,r10,r10 + adc r10,r13 + mulx r13,r11,r11 + adc r11,rbp + mulx rbp,r12,r12 + adc r12,r13 + adc rbp,0 + imul rdx + add rax,rbp + adc rdx,0 + + mov QWORD PTR[rdi],r8 + mov QWORD PTR[8+rdi],r9 + mov QWORD PTR[16+rdi],r10 + mov QWORD PTR[24+rdi],r11 + mov QWORD PTR[32+rdi],r12 + mov QWORD PTR[40+rdi],rax + mov QWORD PTR[48+rdi],rdx + sar rdx,63 + mov QWORD PTR[56+rdi],rdx + mov rdx,rcx + mov rax,rcx + + mov r8,QWORD PTR[rsi] + mov r9,QWORD PTR[8+rsi] + mov r10,QWORD PTR[16+rsi] + mov r11,QWORD PTR[24+rsi] + mov r12,QWORD PTR[32+rsi] + mov r13,QWORD PTR[40+rsi] + mov r14,QWORD PTR[48+rsi] + mov r15,QWORD PTR[56+rsi] + mov rbx,QWORD PTR[64+rsi] + mov rbp,QWORD PTR[72+rsi] + mov rcx,QWORD PTR[80+rsi] + mov rdi,QWORD PTR[88+rsi] + + sar rax,63 + xor rsi,rsi + sub rsi,rax + + xor rdx,rax + add rdx,rsi + + xor r8,rax + xor r9,rax + xor r10,rax + xor r11,rax + xor r12,rax + xor r13,rax + xor r14,rax + xor r15,rax + xor rbx,rax + xor rbp,rax + xor rcx,rax + xor rdi,rax + add r8,rsi + adc r9,0 + adc r10,0 + adc r11,0 + adc r12,0 + adc r13,0 + adc r14,0 + adc r15,0 + adc rbx,0 + adc rbp,0 + adc rcx,0 + adc rdi,0 + + mulx rax,r8,r8 + mulx rsi,r9,r9 + add r9,rax + mulx rax,r10,r10 + adc r10,rsi + mulx rsi,r11,r11 + adc r11,rax + mulx rax,r12,r12 + adc r12,rsi + mulx rsi,r13,r13 + adc r13,rax + mulx rax,r14,r14 + adc r14,rsi + mulx rsi,r15,r15 + adc r15,rax + mulx rax,rbx,rbx + adc rbx,rsi + mulx rsi,rbp,rbp + adc rbp,rax + mulx rax,rcx,rcx + adc rcx,rsi + mulx rsi,rdi,rdi + mov rdx,QWORD PTR[8+rsp] + mov rsi,QWORD PTR[16+rsp] + adc rax,rdi + + add r8,QWORD PTR[rdx] + adc r9,QWORD PTR[8+rdx] + adc r10,QWORD PTR[16+rdx] + adc r11,QWORD PTR[24+rdx] + adc r12,QWORD PTR[32+rdx] + adc r13,QWORD PTR[40+rdx] + adc r14,QWORD PTR[48+rdx] + mov rdi,QWORD PTR[56+rdx] + adc r15,rdi + adc rbx,rdi + adc rbp,rdi + adc rcx,rdi + adc rax,rdi + + mov rdi,rdx + + mov QWORD PTR[rdx],r8 + mov QWORD PTR[8+rdx],r9 + mov QWORD PTR[16+rdx],r10 + mov QWORD PTR[24+rdx],r11 + mov QWORD PTR[32+rdx],r12 + mov QWORD PTR[40+rdx],r13 + mov QWORD PTR[48+rdx],r14 + mov QWORD PTR[56+rdx],r15 + mov QWORD PTR[64+rdx],rbx + mov QWORD PTR[72+rdx],rbp + mov QWORD PTR[80+rdx],rcx + mov QWORD PTR[88+rdx],rax + + DB 0F3h,0C3h ;repret +__smulx_767x63 ENDP + +ALIGN 32 +__smulx_383x63 PROC PRIVATE + DB 243,15,30,250 + + mov r8,QWORD PTR[((0+0))+rsi] + mov r9,QWORD PTR[((0+8))+rsi] + mov r10,QWORD PTR[((0+16))+rsi] + mov r11,QWORD PTR[((0+24))+rsi] + mov r12,QWORD PTR[((0+32))+rsi] + mov r13,QWORD PTR[((0+40))+rsi] + + mov rbp,rdx + sar rbp,63 + xor rax,rax + sub rax,rbp + + xor rdx,rbp + add rdx,rax + + xor r8,rbp + xor r9,rbp + xor r10,rbp + xor r11,rbp + xor r12,rbp + xor r13,rbp + add r8,rax + adc r9,0 + adc r10,0 + adc r11,0 + adc r12,0 + adc r13,0 + + mulx rbp,r8,r8 + mulx rax,r9,r9 + add r9,rbp + mulx rbp,r10,r10 + adc r10,rax + mulx rax,r11,r11 + adc r11,rbp + mulx rbp,r12,r12 + adc r12,rax + mulx rax,r13,r13 + mov rdx,rcx + adc r13,rbp + + mov QWORD PTR[rdi],r8 + mov QWORD PTR[8+rdi],r9 + mov QWORD PTR[16+rdi],r10 + mov QWORD PTR[24+rdi],r11 + mov QWORD PTR[32+rdi],r12 + mov QWORD PTR[40+rdi],r13 + mov r8,QWORD PTR[((48+0))+rsi] + mov r9,QWORD PTR[((48+8))+rsi] + mov r10,QWORD PTR[((48+16))+rsi] + mov r11,QWORD PTR[((48+24))+rsi] + mov r12,QWORD PTR[((48+32))+rsi] + mov r13,QWORD PTR[((48+40))+rsi] + + mov rbp,rdx + sar rbp,63 + xor rax,rax + sub rax,rbp + + xor rdx,rbp + add rdx,rax + + xor r8,rbp + xor r9,rbp + xor r10,rbp + xor r11,rbp + xor r12,rbp + xor r13,rbp + add r8,rax + adc r9,0 + adc r10,0 + adc r11,0 + adc r12,0 + adc r13,0 + + mulx rbp,r8,r8 + mulx rax,r9,r9 + add r9,rbp + mulx rbp,r10,r10 + adc r10,rax + mulx rax,r11,r11 + adc r11,rbp + mulx rbp,r12,r12 + adc r12,rax + mulx rax,r13,r13 + adc r13,rbp + + add r8,QWORD PTR[rdi] + adc r9,QWORD PTR[8+rdi] + adc r10,QWORD PTR[16+rdi] + adc r11,QWORD PTR[24+rdi] + adc r12,QWORD PTR[32+rdi] + adc r13,QWORD PTR[40+rdi] + + mov QWORD PTR[rdi],r8 + mov QWORD PTR[8+rdi],r9 + mov QWORD PTR[16+rdi],r10 + mov QWORD PTR[24+rdi],r11 + mov QWORD PTR[32+rdi],r12 + mov QWORD PTR[40+rdi],r13 + + DB 0F3h,0C3h ;repret +__smulx_383x63 ENDP + +ALIGN 32 +__smulx_383_n_shift_by_31 PROC PRIVATE + DB 243,15,30,250 + + mov rbx,rdx + xor r14,r14 + mov r8,QWORD PTR[((0+0))+rsi] + mov r9,QWORD PTR[((0+8))+rsi] + mov r10,QWORD PTR[((0+16))+rsi] + mov r11,QWORD PTR[((0+24))+rsi] + mov r12,QWORD PTR[((0+32))+rsi] + mov r13,QWORD PTR[((0+40))+rsi] + + mov rax,rdx + sar rax,63 + xor rbp,rbp + sub rbp,rax + + xor rdx,rax + add rdx,rbp + + xor r8,rax + xor r9,rax + xor r10,rax + xor r11,rax + xor r12,rax + xor rax,r13 + add r8,rbp + adc r9,0 + adc r10,0 + adc r11,0 + adc r12,0 + adc rax,0 + + mulx rbp,r8,r8 + mulx r13,r9,r9 + add r9,rbp + mulx rbp,r10,r10 + adc r10,r13 + mulx r13,r11,r11 + adc r11,rbp + mulx rbp,r12,r12 + adc r12,r13 + adc rbp,0 + imul rdx + add rax,rbp + adc r14,rdx + + mov rdx,rcx + + mov QWORD PTR[rdi],r8 + mov QWORD PTR[8+rdi],r9 + mov QWORD PTR[16+rdi],r10 + mov QWORD PTR[24+rdi],r11 + mov QWORD PTR[32+rdi],r12 + mov QWORD PTR[40+rdi],rax + mov r8,QWORD PTR[((48+0))+rsi] + mov r9,QWORD PTR[((48+8))+rsi] + mov r10,QWORD PTR[((48+16))+rsi] + mov r11,QWORD PTR[((48+24))+rsi] + mov r12,QWORD PTR[((48+32))+rsi] + mov r13,QWORD PTR[((48+40))+rsi] + + mov rax,rdx + sar rax,63 + xor rbp,rbp + sub rbp,rax + + xor rdx,rax + add rdx,rbp + + xor r8,rax + xor r9,rax + xor r10,rax + xor r11,rax + xor r12,rax + xor rax,r13 + add r8,rbp + adc r9,0 + adc r10,0 + adc r11,0 + adc r12,0 + adc rax,0 + + mulx rbp,r8,r8 + mulx r13,r9,r9 + add r9,rbp + mulx rbp,r10,r10 + adc r10,r13 + mulx r13,r11,r11 + adc r11,rbp + mulx rbp,r12,r12 + adc r12,r13 + adc rbp,0 + imul rdx + add rax,rbp + adc rdx,0 + + add r8,QWORD PTR[rdi] + adc r9,QWORD PTR[8+rdi] + adc r10,QWORD PTR[16+rdi] + adc r11,QWORD PTR[24+rdi] + adc r12,QWORD PTR[32+rdi] + adc rax,QWORD PTR[40+rdi] + adc r14,rdx + mov rdx,rbx + + shrd r8,r9,31 + shrd r9,r10,31 + shrd r10,r11,31 + shrd r11,r12,31 + shrd r12,rax,31 + shrd rax,r14,31 + + sar r14,63 + xor rbp,rbp + sub rbp,r14 + + xor r8,r14 + xor r9,r14 + xor r10,r14 + xor r11,r14 + xor r12,r14 + xor rax,r14 + add r8,rbp + adc r9,0 + adc r10,0 + adc r11,0 + adc r12,0 + adc rax,0 + + mov QWORD PTR[rdi],r8 + mov QWORD PTR[8+rdi],r9 + mov QWORD PTR[16+rdi],r10 + mov QWORD PTR[24+rdi],r11 + mov QWORD PTR[32+rdi],r12 + mov QWORD PTR[40+rdi],rax + + xor rdx,r14 + xor rcx,r14 + add rdx,rbp + add rcx,rbp + + DB 0F3h,0C3h ;repret +__smulx_383_n_shift_by_31 ENDP + +ALIGN 32 +__smulx_191_n_shift_by_31 PROC PRIVATE + DB 243,15,30,250 + + mov rbx,rdx + mov r8,QWORD PTR[((0+0))+rsi] + mov r9,QWORD PTR[((0+8))+rsi] + mov r10,QWORD PTR[((0+16))+rsi] + + mov rax,rdx + sar rax,63 + xor rbp,rbp + sub rbp,rax + + xor rdx,rax + add rdx,rbp + + xor r8,rax + xor r9,rax + xor rax,r10 + add r8,rbp + adc r9,0 + adc rax,0 + + mulx rbp,r8,r8 + mulx r10,r9,r9 + add r9,rbp + adc r10,0 + imul rdx + add r10,rax + adc rdx,0 + mov r14,rdx + mov rdx,rcx + mov r11,QWORD PTR[((48+0))+rsi] + mov r12,QWORD PTR[((48+8))+rsi] + mov r13,QWORD PTR[((48+16))+rsi] + + mov rax,rdx + sar rax,63 + xor rbp,rbp + sub rbp,rax + + xor rdx,rax + add rdx,rbp + + xor r11,rax + xor r12,rax + xor rax,r13 + add r11,rbp + adc r12,0 + adc rax,0 + + mulx rbp,r11,r11 + mulx r13,r12,r12 + add r12,rbp + adc r13,0 + imul rdx + add r13,rax + adc rdx,0 + add r11,r8 + adc r12,r9 + adc r13,r10 + adc r14,rdx + mov rdx,rbx + + shrd r11,r12,31 + shrd r12,r13,31 + shrd r13,r14,31 + + sar r14,63 + xor rbp,rbp + sub rbp,r14 + + xor r11,r14 + xor r12,r14 + xor r13,r14 + add r11,rbp + adc r12,0 + adc r13,0 + + mov QWORD PTR[rdi],r11 + mov QWORD PTR[8+rdi],r12 + mov QWORD PTR[16+rdi],r13 + + xor rdx,r14 + xor rcx,r14 + add rdx,rbp + add rcx,rbp + + DB 0F3h,0C3h ;repret +__smulx_191_n_shift_by_31 ENDP + +ALIGN 32 +__ab_approximation_31 PROC PRIVATE + DB 243,15,30,250 + + mov r9,QWORD PTR[40+rsi] + mov r11,QWORD PTR[88+rsi] + mov rbx,QWORD PTR[32+rsi] + mov rbp,QWORD PTR[80+rsi] + mov r8,QWORD PTR[24+rsi] + mov r10,QWORD PTR[72+rsi] + + mov rax,r9 + or rax,r11 + cmovz r9,rbx + cmovz r11,rbp + cmovz rbx,r8 + mov r8,QWORD PTR[16+rsi] + cmovz rbp,r10 + mov r10,QWORD PTR[64+rsi] + + mov rax,r9 + or rax,r11 + cmovz r9,rbx + cmovz r11,rbp + cmovz rbx,r8 + mov r8,QWORD PTR[8+rsi] + cmovz rbp,r10 + mov r10,QWORD PTR[56+rsi] + + mov rax,r9 + or rax,r11 + cmovz r9,rbx + cmovz r11,rbp + cmovz rbx,r8 + mov r8,QWORD PTR[rsi] + cmovz rbp,r10 + mov r10,QWORD PTR[48+rsi] + + mov rax,r9 + or rax,r11 + cmovz r9,rbx + cmovz r11,rbp + cmovz rbx,r8 + cmovz rbp,r10 + + mov rax,r9 + or rax,r11 + bsr rcx,rax + lea rcx,QWORD PTR[1+rcx] + cmovz r9,r8 + cmovz r11,r10 + cmovz rcx,rax + neg rcx + + + shld r9,rbx,cl + shld r11,rbp,cl + + mov eax,07FFFFFFFh + and r8,rax + and r10,rax + andn r9,rax,r9 + andn r11,rax,r11 + or r8,r9 + or r10,r11 + + jmp __inner_loop_31 + + DB 0F3h,0C3h ;repret +__ab_approximation_31 ENDP + +ALIGN 32 +__inner_loop_31 PROC PRIVATE + DB 243,15,30,250 + + mov rcx,07FFFFFFF80000000h + mov r13,0800000007FFFFFFFh + mov r15,07FFFFFFF7FFFFFFFh + +$L$oop_31:: + cmp r8,r10 + mov rax,r8 + mov rbx,r10 + mov rbp,rcx + mov r14,r13 + cmovb r8,r10 + cmovb r10,rax + cmovb rcx,r13 + cmovb r13,rbp + + sub r8,r10 + sub rcx,r13 + add rcx,r15 + + test rax,1 + cmovz r8,rax + cmovz r10,rbx + cmovz rcx,rbp + cmovz r13,r14 + + shr r8,1 + add r13,r13 + sub r13,r15 + sub edi,1 + jnz $L$oop_31 + + shr r15,32 + mov edx,ecx + mov r12d,r13d + shr rcx,32 + shr r13,32 + sub rdx,r15 + sub rcx,r15 + sub r12,r15 + sub r13,r15 + + DB 0F3h,0C3h ;repret +__inner_loop_31 ENDP + + +ALIGN 32 +__tail_loop_53 PROC PRIVATE + DB 243,15,30,250 + + mov rdx,1 + xor rcx,rcx + xor r12,r12 + mov r13,1 + +$L$oop_53:: + xor rax,rax + test r8,1 + mov rbx,r10 + cmovnz rax,r10 + sub rbx,r8 + mov rbp,r8 + sub r8,rax + cmovc r8,rbx + cmovc r10,rbp + mov rax,rdx + cmovc rdx,r12 + cmovc r12,rax + mov rbx,rcx + cmovc rcx,r13 + cmovc r13,rbx + xor rax,rax + xor rbx,rbx + shr r8,1 + test rbp,1 + cmovnz rax,r12 + cmovnz rbx,r13 + add r12,r12 + add r13,r13 + sub rdx,rax + sub rcx,rbx + sub edi,1 + jnz $L$oop_53 + + DB 0F3h,0C3h ;repret +__tail_loop_53 ENDP +.text$ ENDS +.pdata SEGMENT READONLY ALIGN(4) +ALIGN 4 + DD imagerel $L$SEH_begin_ctx_inverse_mod_383 + DD imagerel $L$SEH_body_ctx_inverse_mod_383 + DD imagerel $L$SEH_info_ctx_inverse_mod_383_prologue + + DD imagerel $L$SEH_body_ctx_inverse_mod_383 + DD imagerel $L$SEH_epilogue_ctx_inverse_mod_383 + DD imagerel $L$SEH_info_ctx_inverse_mod_383_body + + DD imagerel $L$SEH_epilogue_ctx_inverse_mod_383 + DD imagerel $L$SEH_end_ctx_inverse_mod_383 + DD imagerel $L$SEH_info_ctx_inverse_mod_383_epilogue + +.pdata ENDS +.xdata SEGMENT READONLY ALIGN(8) +ALIGN 8 +$L$SEH_info_ctx_inverse_mod_383_prologue:: +DB 1,0,5,00bh +DB 0,074h,1,0 +DB 0,064h,2,0 +DB 0,0b3h +DB 0,0 + DD 0,0 +$L$SEH_info_ctx_inverse_mod_383_body:: +DB 1,0,18,0 +DB 000h,0f4h,08bh,000h +DB 000h,0e4h,08ch,000h +DB 000h,0d4h,08dh,000h +DB 000h,0c4h,08eh,000h +DB 000h,034h,08fh,000h +DB 000h,054h,090h,000h +DB 000h,074h,092h,000h +DB 000h,064h,093h,000h +DB 000h,001h,091h,000h +DB 000h,000h,000h,000h +DB 000h,000h,000h,000h +$L$SEH_info_ctx_inverse_mod_383_epilogue:: +DB 1,0,4,0 +DB 000h,074h,001h,000h +DB 000h,064h,002h,000h +DB 000h,000h,000h,000h + + +.xdata ENDS +END diff --git a/crypto/blst_src/build/win64/div3w-armv8.asm b/crypto/blst_src/build/win64/div3w-armv8.asm new file mode 100644 index 00000000000..aec90679eea --- /dev/null +++ b/crypto/blst_src/build/win64/div3w-armv8.asm @@ -0,0 +1,89 @@ + AREA |.text|,CODE,ALIGN=8,ARM64 + + + EXPORT |div_3_limbs|[FUNC] + ALIGN 32 +|div_3_limbs| PROC + ldp x4,x5,[x0] // load R + eor x0,x0,x0 // Q = 0 + mov x3,#64 // loop counter + nop + +|$Loop| + subs x6,x4,x1 // R - D + add x0,x0,x0 // Q <<= 1 + sbcs x7,x5,x2 + add x0,x0,#1 // Q + speculative bit + csello x4,x4,x6 + extr x1,x2,x1,#1 // D >>= 1 + csello x5,x5,x7 + lsr x2,x2,#1 + sbc x0,x0,xzr // subtract speculative bit + sub x3,x3,#1 + cbnz x3,|$Loop| + + asr x3,x0,#63 // top bit -> mask + add x0,x0,x0 // Q <<= 1 + subs x6,x4,x1 // R - D + add x0,x0,#1 // Q + speculative bit + sbcs x7,x5,x2 + sbc x0,x0,xzr // subtract speculative bit + + orr x0,x0,x3 // all ones if overflow + + ret + ENDP + + EXPORT |quot_rem_128|[FUNC] + ALIGN 32 +|quot_rem_128| PROC + ldp x3,x4,[x1] + + mul x5,x3,x2 // divisor[0:1} * quotient + umulh x6,x3,x2 + mul x11, x4,x2 + umulh x7,x4,x2 + + ldp x8,x9,[x0] // load 3 limbs of the dividend + ldr x10,[x0,#16] + + adds x6,x6,x11 + adc x7,x7,xzr + + subs x8,x8,x5 // dividend - divisor * quotient + sbcs x9,x9,x6 + sbcs x10,x10,x7 + sbc x5,xzr,xzr // borrow -> mask + + add x2,x2,x5 // if borrowed, adjust the quotient ... + and x3,x3,x5 + and x4,x4,x5 + adds x8,x8,x3 // ... and add divisor + adc x9,x9,x4 + + stp x8,x9,[x0] // save 2 limbs of the remainder + str x2,[x0,#16] // and one limb of the quotient + + mov x0,x2 // return adjusted quotient + + ret + ENDP + + + EXPORT |quot_rem_64|[FUNC] + ALIGN 32 +|quot_rem_64| PROC + ldr x3,[x1] + ldr x8,[x0] // load 1 limb of the dividend + + mul x5,x3,x2 // divisor * quotient + + sub x8,x8,x5 // dividend - divisor * quotient + + stp x8,x2,[x0] // save remainder and quotient + + mov x0,x2 // return quotient + + ret + ENDP + END diff --git a/crypto/blst_src/build/win64/div3w-x86_64.asm b/crypto/blst_src/build/win64/div3w-x86_64.asm new file mode 100644 index 00000000000..805c5b1fcb0 --- /dev/null +++ b/crypto/blst_src/build/win64/div3w-x86_64.asm @@ -0,0 +1,257 @@ +OPTION DOTNAME +.text$ SEGMENT ALIGN(256) 'CODE' + +PUBLIC div_3_limbs + + +ALIGN 32 +div_3_limbs PROC PUBLIC + DB 243,15,30,250 + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov r11,rsp +$L$SEH_begin_div_3_limbs:: + + + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 +$L$SEH_body_div_3_limbs:: + + mov r8,QWORD PTR[rdi] + mov r9,QWORD PTR[8+rdi] + xor rax,rax + mov ecx,64 + +$L$oop:: + mov r10,r8 + sub r8,rsi + mov r11,r9 + sbb r9,rdx + lea rax,QWORD PTR[1+rax*1+rax] + mov rdi,rdx + cmovc r8,r10 + cmovc r9,r11 + sbb rax,0 + shl rdi,63 + shr rsi,1 + shr rdx,1 + or rsi,rdi + sub ecx,1 + jnz $L$oop + + lea rcx,QWORD PTR[1+rax*1+rax] + sar rax,63 + + sub r8,rsi + sbb r9,rdx + sbb rcx,0 + + or rax,rcx + +$L$SEH_epilogue_div_3_limbs:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + + DB 0F3h,0C3h ;repret + +$L$SEH_end_div_3_limbs:: +div_3_limbs ENDP +PUBLIC quot_rem_128 + + +ALIGN 32 +quot_rem_128 PROC PUBLIC + DB 243,15,30,250 + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov r11,rsp +$L$SEH_begin_quot_rem_128:: + + + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 +$L$SEH_body_quot_rem_128:: + + mov rax,rdx + mov rcx,rdx + + mul QWORD PTR[rsi] + mov r8,rax + mov rax,rcx + mov r9,rdx + + mul QWORD PTR[8+rsi] + add r9,rax + adc rdx,0 + + mov r10,QWORD PTR[rdi] + mov r11,QWORD PTR[8+rdi] + mov rax,QWORD PTR[16+rdi] + + sub r10,r8 + sbb r11,r9 + sbb rax,rdx + sbb r8,r8 + + add rcx,r8 + mov r9,r8 + and r8,QWORD PTR[rsi] + and r9,QWORD PTR[8+rsi] + add r10,r8 + adc r11,r9 + + mov QWORD PTR[rdi],r10 + mov QWORD PTR[8+rdi],r11 + mov QWORD PTR[16+rdi],rcx + + mov rax,rcx + +$L$SEH_epilogue_quot_rem_128:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + + DB 0F3h,0C3h ;repret + +$L$SEH_end_quot_rem_128:: +quot_rem_128 ENDP + + + + + +PUBLIC quot_rem_64 + + +ALIGN 32 +quot_rem_64 PROC PUBLIC + DB 243,15,30,250 + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov r11,rsp +$L$SEH_begin_quot_rem_64:: + + + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 +$L$SEH_body_quot_rem_64:: + + mov rax,rdx + imul rdx,QWORD PTR[rsi] + + mov r10,QWORD PTR[rdi] + + sub r10,rdx + + mov QWORD PTR[rdi],r10 + mov QWORD PTR[8+rdi],rax + +$L$SEH_epilogue_quot_rem_64:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + + DB 0F3h,0C3h ;repret + +$L$SEH_end_quot_rem_64:: +quot_rem_64 ENDP +.text$ ENDS +.pdata SEGMENT READONLY ALIGN(4) +ALIGN 4 + DD imagerel $L$SEH_begin_div_3_limbs + DD imagerel $L$SEH_body_div_3_limbs + DD imagerel $L$SEH_info_div_3_limbs_prologue + + DD imagerel $L$SEH_body_div_3_limbs + DD imagerel $L$SEH_epilogue_div_3_limbs + DD imagerel $L$SEH_info_div_3_limbs_body + + DD imagerel $L$SEH_epilogue_div_3_limbs + DD imagerel $L$SEH_end_div_3_limbs + DD imagerel $L$SEH_info_div_3_limbs_epilogue + + DD imagerel $L$SEH_begin_quot_rem_128 + DD imagerel $L$SEH_body_quot_rem_128 + DD imagerel $L$SEH_info_quot_rem_128_prologue + + DD imagerel $L$SEH_body_quot_rem_128 + DD imagerel $L$SEH_epilogue_quot_rem_128 + DD imagerel $L$SEH_info_quot_rem_128_body + + DD imagerel $L$SEH_epilogue_quot_rem_128 + DD imagerel $L$SEH_end_quot_rem_128 + DD imagerel $L$SEH_info_quot_rem_128_epilogue + + DD imagerel $L$SEH_begin_quot_rem_64 + DD imagerel $L$SEH_body_quot_rem_64 + DD imagerel $L$SEH_info_quot_rem_64_prologue + + DD imagerel $L$SEH_body_quot_rem_64 + DD imagerel $L$SEH_epilogue_quot_rem_64 + DD imagerel $L$SEH_info_quot_rem_64_body + + DD imagerel $L$SEH_epilogue_quot_rem_64 + DD imagerel $L$SEH_end_quot_rem_64 + DD imagerel $L$SEH_info_quot_rem_64_epilogue + +.pdata ENDS +.xdata SEGMENT READONLY ALIGN(8) +ALIGN 8 +$L$SEH_info_div_3_limbs_prologue:: +DB 1,0,5,00bh +DB 0,074h,1,0 +DB 0,064h,2,0 +DB 0,0b3h +DB 0,0 + DD 0,0 +$L$SEH_info_div_3_limbs_body:: +DB 1,0,4,0 +DB 000h,074h,001h,000h +DB 000h,064h,002h,000h +DB 000h,000h,000h,000h +$L$SEH_info_div_3_limbs_epilogue:: +DB 1,0,4,0 +DB 000h,074h,001h,000h +DB 000h,064h,002h,000h +DB 000h,000h,000h,000h + +$L$SEH_info_quot_rem_128_prologue:: +DB 1,0,5,00bh +DB 0,074h,1,0 +DB 0,064h,2,0 +DB 0,0b3h +DB 0,0 + DD 0,0 +$L$SEH_info_quot_rem_128_body:: +DB 1,0,4,0 +DB 000h,074h,001h,000h +DB 000h,064h,002h,000h +DB 000h,000h,000h,000h +$L$SEH_info_quot_rem_128_epilogue:: +DB 1,0,4,0 +DB 000h,074h,001h,000h +DB 000h,064h,002h,000h +DB 000h,000h,000h,000h + +$L$SEH_info_quot_rem_64_prologue:: +DB 1,0,5,00bh +DB 0,074h,1,0 +DB 0,064h,2,0 +DB 0,0b3h +DB 0,0 + DD 0,0 +$L$SEH_info_quot_rem_64_body:: +DB 1,0,4,0 +DB 000h,074h,001h,000h +DB 000h,064h,002h,000h +DB 000h,000h,000h,000h +$L$SEH_info_quot_rem_64_epilogue:: +DB 1,0,4,0 +DB 000h,074h,001h,000h +DB 000h,064h,002h,000h +DB 000h,000h,000h,000h + + +.xdata ENDS +END diff --git a/crypto/blst_src/build/win64/dll.c b/crypto/blst_src/build/win64/dll.c new file mode 100644 index 00000000000..a70d0c98a23 --- /dev/null +++ b/crypto/blst_src/build/win64/dll.c @@ -0,0 +1,32 @@ +#include + +#if defined(_MSC_VER) +/* + * Even though we don't have memcpy/memset anywhere, MSVC compiler + * generates calls to them as it recognizes corresponding patterns. + */ +void *memcpy(unsigned char *dst, const unsigned char *src, size_t n) +{ + void *ret = dst; + + while(n--) + *dst++ = *src++; + + return ret; +} + +void *memset(unsigned char *dst, int c, size_t n) +{ + void *ret = dst; + + while(n--) + *dst++ = (unsigned char)c; + + return ret; +} +#elif defined(__GNUC__) +# pragma GCC diagnostic ignored "-Wunused-parameter" +#endif + +BOOL WINAPI DllMain(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpvReserved) +{ return TRUE; } diff --git a/crypto/blst_src/build/win64/mul_mont_256-armv8.asm b/crypto/blst_src/build/win64/mul_mont_256-armv8.asm new file mode 100644 index 00000000000..bb2dfe043c7 --- /dev/null +++ b/crypto/blst_src/build/win64/mul_mont_256-armv8.asm @@ -0,0 +1,465 @@ + AREA |.text|,CODE,ALIGN=8,ARM64 + + + + EXPORT |mul_mont_sparse_256|[FUNC] + ALIGN 32 +|mul_mont_sparse_256| PROC + stp x29,x30,[sp,#-64]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + + ldp x10,x11,[x1] + ldr x9, [x2] + ldp x12,x13,[x1,#16] + + mul x19,x10,x9 + ldp x5,x6,[x3] + mul x20,x11,x9 + ldp x7,x8,[x3,#16] + mul x21,x12,x9 + mul x22,x13,x9 + + umulh x14,x10,x9 + umulh x15,x11,x9 + mul x3,x4,x19 + umulh x16,x12,x9 + umulh x17,x13,x9 + adds x20,x20,x14 + //mul x14,x5,x3 + adcs x21,x21,x15 + mul x15,x6,x3 + adcs x22,x22,x16 + mul x16,x7,x3 + adc x23,xzr, x17 + mul x17,x8,x3 + ldr x9,[x2,8*1] + subs xzr,x19,#1 //adds x19,x19,x14 + umulh x14,x5,x3 + adcs x20,x20,x15 + umulh x15,x6,x3 + adcs x21,x21,x16 + umulh x16,x7,x3 + adcs x22,x22,x17 + umulh x17,x8,x3 + adc x23,x23,xzr + + adds x19,x20,x14 + mul x14,x10,x9 + adcs x20,x21,x15 + mul x15,x11,x9 + adcs x21,x22,x16 + mul x16,x12,x9 + adcs x22,x23,x17 + mul x17,x13,x9 + adc x23,xzr,xzr + + adds x19,x19,x14 + umulh x14,x10,x9 + adcs x20,x20,x15 + umulh x15,x11,x9 + adcs x21,x21,x16 + mul x3,x4,x19 + umulh x16,x12,x9 + adcs x22,x22,x17 + umulh x17,x13,x9 + adc x23,x23,xzr + + adds x20,x20,x14 + //mul x14,x5,x3 + adcs x21,x21,x15 + mul x15,x6,x3 + adcs x22,x22,x16 + mul x16,x7,x3 + adc x23,x23,x17 + mul x17,x8,x3 + ldr x9,[x2,8*2] + subs xzr,x19,#1 //adds x19,x19,x14 + umulh x14,x5,x3 + adcs x20,x20,x15 + umulh x15,x6,x3 + adcs x21,x21,x16 + umulh x16,x7,x3 + adcs x22,x22,x17 + umulh x17,x8,x3 + adc x23,x23,xzr + + adds x19,x20,x14 + mul x14,x10,x9 + adcs x20,x21,x15 + mul x15,x11,x9 + adcs x21,x22,x16 + mul x16,x12,x9 + adcs x22,x23,x17 + mul x17,x13,x9 + adc x23,xzr,xzr + + adds x19,x19,x14 + umulh x14,x10,x9 + adcs x20,x20,x15 + umulh x15,x11,x9 + adcs x21,x21,x16 + mul x3,x4,x19 + umulh x16,x12,x9 + adcs x22,x22,x17 + umulh x17,x13,x9 + adc x23,x23,xzr + + adds x20,x20,x14 + //mul x14,x5,x3 + adcs x21,x21,x15 + mul x15,x6,x3 + adcs x22,x22,x16 + mul x16,x7,x3 + adc x23,x23,x17 + mul x17,x8,x3 + ldr x9,[x2,8*3] + subs xzr,x19,#1 //adds x19,x19,x14 + umulh x14,x5,x3 + adcs x20,x20,x15 + umulh x15,x6,x3 + adcs x21,x21,x16 + umulh x16,x7,x3 + adcs x22,x22,x17 + umulh x17,x8,x3 + adc x23,x23,xzr + + adds x19,x20,x14 + mul x14,x10,x9 + adcs x20,x21,x15 + mul x15,x11,x9 + adcs x21,x22,x16 + mul x16,x12,x9 + adcs x22,x23,x17 + mul x17,x13,x9 + adc x23,xzr,xzr + + adds x19,x19,x14 + umulh x14,x10,x9 + adcs x20,x20,x15 + umulh x15,x11,x9 + adcs x21,x21,x16 + mul x3,x4,x19 + umulh x16,x12,x9 + adcs x22,x22,x17 + umulh x17,x13,x9 + adc x23,x23,xzr + + adds x20,x20,x14 + //mul x14,x5,x3 + adcs x21,x21,x15 + mul x15,x6,x3 + adcs x22,x22,x16 + mul x16,x7,x3 + adc x23,x23,x17 + mul x17,x8,x3 + subs xzr,x19,#1 //adds x19,x19,x14 + umulh x14,x5,x3 + adcs x20,x20,x15 + umulh x15,x6,x3 + adcs x21,x21,x16 + umulh x16,x7,x3 + adcs x22,x22,x17 + umulh x17,x8,x3 + adc x23,x23,xzr + + adds x19,x20,x14 + adcs x20,x21,x15 + adcs x21,x22,x16 + adcs x22,x23,x17 + adc x23,xzr,xzr + + subs x14,x19,x5 + sbcs x15,x20,x6 + sbcs x16,x21,x7 + sbcs x17,x22,x8 + sbcs xzr, x23,xzr + + csello x19,x19,x14 + csello x20,x20,x15 + csello x21,x21,x16 + csello x22,x22,x17 + + stp x19,x20,[x0] + stp x21,x22,[x0,#16] + + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldr x29,[sp],#64 + ret + ENDP + + + EXPORT |sqr_mont_sparse_256|[FUNC] + ALIGN 32 +|sqr_mont_sparse_256| PROC + DCDU 3573752639 + stp x29,x30,[sp,#-48]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + + ldp x5,x6,[x1] + ldp x7,x8,[x1,#16] + mov x4,x3 + + //////////////////////////////////////////////////////////////// + // | | | | | |a1*a0| | + // | | | | |a2*a0| | | + // | |a3*a2|a3*a0| | | | + // | | | |a2*a1| | | | + // | | |a3*a1| | | | | + // *| | | | | | | | 2| + // +|a3*a3|a2*a2|a1*a1|a0*a0| + // |--+--+--+--+--+--+--+--| + // |A7|A6|A5|A4|A3|A2|A1|A0|, where Ax is x10 + // + // "can't overflow" below mark carrying into high part of + // multiplication result, which can't overflow, because it + // can never be all ones. + + mul x11,x6,x5 // a[1]*a[0] + umulh x15,x6,x5 + mul x12,x7,x5 // a[2]*a[0] + umulh x16,x7,x5 + mul x13,x8,x5 // a[3]*a[0] + umulh x19,x8,x5 + + adds x12,x12,x15 // accumulate high parts of multiplication + mul x14,x7,x6 // a[2]*a[1] + umulh x15,x7,x6 + adcs x13,x13,x16 + mul x16,x8,x6 // a[3]*a[1] + umulh x17,x8,x6 + adc x19,x19,xzr // can't overflow + + mul x20,x8,x7 // a[3]*a[2] + umulh x21,x8,x7 + + adds x15,x15,x16 // accumulate high parts of multiplication + mul x10,x5,x5 // a[0]*a[0] + adc x16,x17,xzr // can't overflow + + adds x13,x13,x14 // accumulate low parts of multiplication + umulh x5,x5,x5 + adcs x19,x19,x15 + mul x15,x6,x6 // a[1]*a[1] + adcs x20,x20,x16 + umulh x6,x6,x6 + adc x21,x21,xzr // can't overflow + + adds x11,x11,x11 // acc[1-6]*=2 + mul x16,x7,x7 // a[2]*a[2] + adcs x12,x12,x12 + umulh x7,x7,x7 + adcs x13,x13,x13 + mul x17,x8,x8 // a[3]*a[3] + adcs x19,x19,x19 + umulh x8,x8,x8 + adcs x20,x20,x20 + adcs x21,x21,x21 + adc x22,xzr,xzr + + adds x11,x11,x5 // +a[i]*a[i] + adcs x12,x12,x15 + adcs x13,x13,x6 + adcs x19,x19,x16 + adcs x20,x20,x7 + adcs x21,x21,x17 + adc x22,x22,x8 + + bl __mul_by_1_mont_256 + ldr x30,[x29,#8] + + adds x10,x10,x19 // accumulate upper half + adcs x11,x11,x20 + adcs x12,x12,x21 + adcs x13,x13,x22 + adc x19,xzr,xzr + + subs x14,x10,x5 + sbcs x15,x11,x6 + sbcs x16,x12,x7 + sbcs x17,x13,x8 + sbcs xzr, x19,xzr + + csello x10,x10,x14 + csello x11,x11,x15 + csello x12,x12,x16 + csello x13,x13,x17 + + stp x10,x11,[x0] + stp x12,x13,[x0,#16] + + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldr x29,[sp],#48 + DCDU 3573752767 + ret + ENDP + + + EXPORT |from_mont_256|[FUNC] + ALIGN 32 +|from_mont_256| PROC + DCDU 3573752639 + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + + mov x4,x3 + ldp x10,x11,[x1] + ldp x12,x13,[x1,#16] + + bl __mul_by_1_mont_256 + ldr x30,[x29,#8] + + subs x14,x10,x5 + sbcs x15,x11,x6 + sbcs x16,x12,x7 + sbcs x17,x13,x8 + + csello x10,x10,x14 + csello x11,x11,x15 + csello x12,x12,x16 + csello x13,x13,x17 + + stp x10,x11,[x0] + stp x12,x13,[x0,#16] + + ldr x29,[sp],#16 + DCDU 3573752767 + ret + ENDP + + + + EXPORT |redc_mont_256|[FUNC] + ALIGN 32 +|redc_mont_256| PROC + DCDU 3573752639 + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + + mov x4,x3 + ldp x10,x11,[x1] + ldp x12,x13,[x1,#16] + + bl __mul_by_1_mont_256 + ldr x30,[x29,#8] + + ldp x14,x15,[x1,#32] + ldp x16,x17,[x1,#48] + + adds x10,x10,x14 + adcs x11,x11,x15 + adcs x12,x12,x16 + adcs x13,x13,x17 + adc x9,xzr,xzr + + subs x14,x10,x5 + sbcs x15,x11,x6 + sbcs x16,x12,x7 + sbcs x17,x13,x8 + sbcs xzr, x9,xzr + + csello x10,x10,x14 + csello x11,x11,x15 + csello x12,x12,x16 + csello x13,x13,x17 + + stp x10,x11,[x0] + stp x12,x13,[x0,#16] + + ldr x29,[sp],#16 + DCDU 3573752767 + ret + ENDP + + + ALIGN 32 +|__mul_by_1_mont_256| PROC + mul x3,x4,x10 + ldp x5,x6,[x2] + ldp x7,x8,[x2,#16] + //mul x14,x5,x3 + mul x15,x6,x3 + mul x16,x7,x3 + mul x17,x8,x3 + subs xzr,x10,#1 //adds x10,x10,x14 + umulh x14,x5,x3 + adcs x11,x11,x15 + umulh x15,x6,x3 + adcs x12,x12,x16 + umulh x16,x7,x3 + adcs x13,x13,x17 + umulh x17,x8,x3 + adc x9,xzr,xzr + + adds x10,x11,x14 + adcs x11,x12,x15 + adcs x12,x13,x16 + mul x3,x4,x10 + adc x13,x9,x17 + //mul x14,x5,x3 + mul x15,x6,x3 + mul x16,x7,x3 + mul x17,x8,x3 + subs xzr,x10,#1 //adds x10,x10,x14 + umulh x14,x5,x3 + adcs x11,x11,x15 + umulh x15,x6,x3 + adcs x12,x12,x16 + umulh x16,x7,x3 + adcs x13,x13,x17 + umulh x17,x8,x3 + adc x9,xzr,xzr + + adds x10,x11,x14 + adcs x11,x12,x15 + adcs x12,x13,x16 + mul x3,x4,x10 + adc x13,x9,x17 + //mul x14,x5,x3 + mul x15,x6,x3 + mul x16,x7,x3 + mul x17,x8,x3 + subs xzr,x10,#1 //adds x10,x10,x14 + umulh x14,x5,x3 + adcs x11,x11,x15 + umulh x15,x6,x3 + adcs x12,x12,x16 + umulh x16,x7,x3 + adcs x13,x13,x17 + umulh x17,x8,x3 + adc x9,xzr,xzr + + adds x10,x11,x14 + adcs x11,x12,x15 + adcs x12,x13,x16 + mul x3,x4,x10 + adc x13,x9,x17 + //mul x14,x5,x3 + mul x15,x6,x3 + mul x16,x7,x3 + mul x17,x8,x3 + subs xzr,x10,#1 //adds x10,x10,x14 + umulh x14,x5,x3 + adcs x11,x11,x15 + umulh x15,x6,x3 + adcs x12,x12,x16 + umulh x16,x7,x3 + adcs x13,x13,x17 + umulh x17,x8,x3 + adc x9,xzr,xzr + + adds x10,x11,x14 + adcs x11,x12,x15 + adcs x12,x13,x16 + adc x13,x9,x17 + + ret + ENDP + END diff --git a/crypto/blst_src/build/win64/mul_mont_384-armv8.asm b/crypto/blst_src/build/win64/mul_mont_384-armv8.asm new file mode 100644 index 00000000000..a309dfa4121 --- /dev/null +++ b/crypto/blst_src/build/win64/mul_mont_384-armv8.asm @@ -0,0 +1,2373 @@ + AREA |.text|,CODE,ALIGN=8,ARM64 + + + EXPORT |add_mod_384x384|[FUNC] + ALIGN 32 +|add_mod_384x384| PROC + DCDU 3573752639 + stp x29,x30,[sp,#-64]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + + ldp x5,x6,[x3] + ldp x7,x8,[x3,#16] + ldp x9,x10,[x3,#32] + + bl __add_mod_384x384 + ldr x30,[x29,#8] + + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldr x29,[sp],#64 + DCDU 3573752767 + ret + ENDP + + + ALIGN 32 +|__add_mod_384x384| PROC + ldp x11, x12, [x1] + ldp x19,x20,[x2] + ldp x13, x14, [x1,#16] + adds x11,x11,x19 + ldp x21,x22,[x2,#16] + adcs x12,x12,x20 + ldp x15, x16, [x1,#32] + adcs x13,x13,x21 + ldp x23,x24,[x2,#32] + adcs x14,x14,x22 + stp x11, x12, [x0] + adcs x15,x15,x23 + ldp x11, x12, [x1,#48] + adcs x16,x16,x24 + + ldp x19,x20,[x2,#48] + stp x13, x14, [x0,#16] + ldp x13, x14, [x1,#64] + ldp x21,x22,[x2,#64] + + adcs x11,x11,x19 + stp x15, x16, [x0,#32] + adcs x12,x12,x20 + ldp x15, x16, [x1,#80] + adcs x13,x13,x21 + ldp x23,x24,[x2,#80] + adcs x14,x14,x22 + adcs x15,x15,x23 + adcs x16,x16,x24 + adc x17,xzr,xzr + + subs x19,x11,x5 + sbcs x20,x12,x6 + sbcs x21,x13,x7 + sbcs x22,x14,x8 + sbcs x23,x15,x9 + sbcs x24,x16,x10 + sbcs xzr,x17,xzr + + csello x11,x11,x19 + csello x12,x12,x20 + csello x13,x13,x21 + csello x14,x14,x22 + stp x11,x12,[x0,#48] + csello x15,x15,x23 + stp x13,x14,[x0,#64] + csello x16,x16,x24 + stp x15,x16,[x0,#80] + + ret + ENDP + + + EXPORT |sub_mod_384x384|[FUNC] + ALIGN 32 +|sub_mod_384x384| PROC + DCDU 3573752639 + stp x29,x30,[sp,#-64]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + + ldp x5,x6,[x3] + ldp x7,x8,[x3,#16] + ldp x9,x10,[x3,#32] + + bl __sub_mod_384x384 + ldr x30,[x29,#8] + + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldr x29,[sp],#64 + DCDU 3573752767 + ret + ENDP + + + ALIGN 32 +|__sub_mod_384x384| PROC + ldp x11, x12, [x1] + ldp x19,x20,[x2] + ldp x13, x14, [x1,#16] + subs x11,x11,x19 + ldp x21,x22,[x2,#16] + sbcs x12,x12,x20 + ldp x15, x16, [x1,#32] + sbcs x13,x13,x21 + ldp x23,x24,[x2,#32] + sbcs x14,x14,x22 + stp x11, x12, [x0] + sbcs x15,x15,x23 + ldp x11, x12, [x1,#48] + sbcs x16,x16,x24 + + ldp x19,x20,[x2,#48] + stp x13, x14, [x0,#16] + ldp x13, x14, [x1,#64] + ldp x21,x22,[x2,#64] + + sbcs x11,x11,x19 + stp x15, x16, [x0,#32] + sbcs x12,x12,x20 + ldp x15, x16, [x1,#80] + sbcs x13,x13,x21 + ldp x23,x24,[x2,#80] + sbcs x14,x14,x22 + sbcs x15,x15,x23 + sbcs x16,x16,x24 + sbc x17,xzr,xzr + + and x19,x5,x17 + and x20,x6,x17 + adds x11,x11,x19 + and x21,x7,x17 + adcs x12,x12,x20 + and x22,x8,x17 + adcs x13,x13,x21 + and x23,x9,x17 + adcs x14,x14,x22 + and x24,x10,x17 + adcs x15,x15,x23 + stp x11,x12,[x0,#48] + adc x16,x16,x24 + stp x13,x14,[x0,#64] + stp x15,x16,[x0,#80] + + ret + ENDP + + + ALIGN 32 +|__add_mod_384| PROC + ldp x11, x12, [x1] + ldp x19,x20,[x2] + ldp x13, x14, [x1,#16] + adds x11,x11,x19 + ldp x21,x22,[x2,#16] + adcs x12,x12,x20 + ldp x15, x16, [x1,#32] + adcs x13,x13,x21 + ldp x23,x24,[x2,#32] + adcs x14,x14,x22 + adcs x15,x15,x23 + adcs x16,x16,x24 + adc x17,xzr,xzr + + subs x19,x11,x5 + sbcs x20,x12,x6 + sbcs x21,x13,x7 + sbcs x22,x14,x8 + sbcs x23,x15,x9 + sbcs x24,x16,x10 + sbcs xzr,x17,xzr + + csello x11,x11,x19 + csello x12,x12,x20 + csello x13,x13,x21 + csello x14,x14,x22 + csello x15,x15,x23 + stp x11,x12,[x0] + csello x16,x16,x24 + stp x13,x14,[x0,#16] + stp x15,x16,[x0,#32] + + ret + ENDP + + + ALIGN 32 +|__sub_mod_384| PROC + ldp x11, x12, [x1] + ldp x19,x20,[x2] + ldp x13, x14, [x1,#16] + subs x11,x11,x19 + ldp x21,x22,[x2,#16] + sbcs x12,x12,x20 + ldp x15, x16, [x1,#32] + sbcs x13,x13,x21 + ldp x23,x24,[x2,#32] + sbcs x14,x14,x22 + sbcs x15,x15,x23 + sbcs x16,x16,x24 + sbc x17,xzr,xzr + + and x19,x5,x17 + and x20,x6,x17 + adds x11,x11,x19 + and x21,x7,x17 + adcs x12,x12,x20 + and x22,x8,x17 + adcs x13,x13,x21 + and x23,x9,x17 + adcs x14,x14,x22 + and x24,x10,x17 + adcs x15,x15,x23 + stp x11,x12,[x0] + adc x16,x16,x24 + stp x13,x14,[x0,#16] + stp x15,x16,[x0,#32] + + ret + ENDP + + + + EXPORT |mul_mont_384x|[FUNC] + ALIGN 32 +|mul_mont_384x| PROC + DCDU 3573752639 + stp x29,x30,[sp,#-128]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + sub sp,sp,#288 // space for 3 768-bit vectors + + mov x26,x0 // save r_ptr + mov x27,x1 // save b_ptr + mov x28,x2 // save b_ptr + + sub x0,sp,#0 // mul_384(t0, a->re, b->re) + bl __mul_384 + + add x1,x1,#48 // mul_384(t1, a->im, b->im) + add x2,x2,#48 + add x0,sp,#96 + bl __mul_384 + + ldp x5,x6,[x3] + ldp x7,x8,[x3,#16] + ldp x9,x10,[x3,#32] + + sub x2,x1,#48 + add x0,sp,#240 + bl __add_mod_384 + + add x1,x28,#0 + add x2,x28,#48 + add x0,sp,#192 // t2 + bl __add_mod_384 + + add x1,x0,#0 + add x2,x0,#48 + bl __mul_384 // mul_384(t2, a->re+a->im, b->re+b->im) + + ldp x5,x6,[x3] + ldp x7,x8,[x3,#16] + ldp x9,x10,[x3,#32] + + mov x1,x0 + add x2,sp,#0 + bl __sub_mod_384x384 + + add x2,sp,#96 + bl __sub_mod_384x384 // t2 = t2-t0-t1 + + add x1,sp,#0 + add x2,sp,#96 + add x0,sp,#0 + bl __sub_mod_384x384 // t0 = t0-t1 + + add x1,sp,#0 // ret->re = redc(t0) + add x0,x26,#0 + bl __mul_by_1_mont_384 + bl __redc_tail_mont_384 + + add x1,sp,#192 // ret->im = redc(t2) + add x0,x0,#48 + bl __mul_by_1_mont_384 + bl __redc_tail_mont_384 + ldr x30,[x29,#8] + + add sp,sp,#288 + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldr x29,[sp],#128 + DCDU 3573752767 + ret + ENDP + + + + EXPORT |sqr_mont_384x|[FUNC] + ALIGN 32 +|sqr_mont_384x| PROC + DCDU 3573752639 + stp x29,x30,[sp,#-128]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + stp x3,x0,[sp,#96] // __mul_mont_384 wants them there + sub sp,sp,#96 // space for 2 384-bit vectors + mov x4,x3 // adjust for missing b_ptr + + ldp x5,x6,[x2] + ldp x7,x8,[x2,#16] + ldp x9,x10,[x2,#32] + + add x2,x1,#48 + add x0,sp,#0 + bl __add_mod_384 // t0 = a->re + a->im + + add x0,sp,#48 + bl __sub_mod_384 // t1 = a->re - a->im + + ldp x11,x12,[x1] + ldr x17, [x2] + ldp x13,x14,[x1,#16] + ldp x15,x16,[x1,#32] + + bl __mul_mont_384 // mul_mont_384(ret->im, a->re, a->im) + + adds x11,x11,x11 // add with itself + adcs x12,x12,x12 + adcs x13,x13,x13 + adcs x14,x14,x14 + adcs x15,x15,x15 + adcs x16,x16,x16 + adc x25,xzr,xzr + + subs x19,x11,x5 + sbcs x20,x12,x6 + sbcs x21,x13,x7 + sbcs x22,x14,x8 + sbcs x23,x15,x9 + sbcs x24,x16,x10 + sbcs xzr,x25,xzr + + csello x19,x11,x19 + csello x20,x12,x20 + csello x21,x13,x21 + ldp x11,x12,[sp] + csello x22,x14,x22 + ldr x17, [sp,#48] + csello x23,x15,x23 + ldp x13,x14,[sp,#16] + csello x24,x16,x24 + ldp x15,x16,[sp,#32] + + stp x19,x20,[x2,#48] + stp x21,x22,[x2,#64] + stp x23,x24,[x2,#80] + + add x2,sp,#48 + bl __mul_mont_384 // mul_mont_384(ret->re, t0, t1) + ldr x30,[x29,#8] + + stp x11,x12,[x2] + stp x13,x14,[x2,#16] + stp x15,x16,[x2,#32] + + add sp,sp,#96 + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldr x29,[sp],#128 + DCDU 3573752767 + ret + ENDP + + + + EXPORT |mul_mont_384|[FUNC] + ALIGN 32 +|mul_mont_384| PROC + DCDU 3573752639 + stp x29,x30,[sp,#-128]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + stp x4,x0,[sp,#96] // __mul_mont_384 wants them there + + ldp x11,x12,[x1] + ldr x17, [x2] + ldp x13,x14,[x1,#16] + ldp x15,x16,[x1,#32] + + ldp x5,x6,[x3] + ldp x7,x8,[x3,#16] + ldp x9,x10,[x3,#32] + + bl __mul_mont_384 + ldr x30,[x29,#8] + + stp x11,x12,[x2] + stp x13,x14,[x2,#16] + stp x15,x16,[x2,#32] + + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldr x29,[sp],#128 + DCDU 3573752767 + ret + ENDP + + + ALIGN 32 +|__mul_mont_384| PROC + mul x19,x11,x17 + mul x20,x12,x17 + mul x21,x13,x17 + mul x22,x14,x17 + mul x23,x15,x17 + mul x24,x16,x17 + mul x4,x4,x19 + + umulh x26,x11,x17 + umulh x27,x12,x17 + umulh x28,x13,x17 + umulh x0,x14,x17 + umulh x1,x15,x17 + umulh x3,x16,x17 + + adds x20,x20,x26 + // mul x26,x5,x4 + adcs x21,x21,x27 + mul x27,x6,x4 + adcs x22,x22,x28 + mul x28,x7,x4 + adcs x23,x23,x0 + mul x0,x8,x4 + adcs x24,x24,x1 + mul x1,x9,x4 + adc x25,xzr, x3 + mul x3,x10,x4 + mov x17,xzr + subs xzr,x19,#1 // adds x19,x19,x26 + umulh x26,x5,x4 + adcs x20,x20,x27 + umulh x27,x6,x4 + adcs x21,x21,x28 + umulh x28,x7,x4 + adcs x22,x22,x0 + umulh x0,x8,x4 + adcs x23,x23,x1 + umulh x1,x9,x4 + adcs x24,x24,x3 + umulh x3,x10,x4 + adcs x25,x25,xzr + adc x4,x17,xzr + ldr x17,[x2,8*1] + + adds x19,x20,x26 + mul x26,x11,x17 + adcs x20,x21,x27 + mul x27,x12,x17 + adcs x21,x22,x28 + mul x28,x13,x17 + adcs x22,x23,x0 + mul x0,x14,x17 + adcs x23,x24,x1 + mul x1,x15,x17 + adcs x24,x25,x3 + mul x3,x16,x17 + adc x25,x4,xzr + ldr x4,[x29,#96] + + adds x19,x19,x26 + umulh x26,x11,x17 + adcs x20,x20,x27 + umulh x27,x12,x17 + adcs x21,x21,x28 + mul x4,x4,x19 + umulh x28,x13,x17 + adcs x22,x22,x0 + umulh x0,x14,x17 + adcs x23,x23,x1 + umulh x1,x15,x17 + adcs x24,x24,x3 + umulh x3,x16,x17 + adcs x25,x25,xzr + adc x17,xzr,xzr + + adds x20,x20,x26 + // mul x26,x5,x4 + adcs x21,x21,x27 + mul x27,x6,x4 + adcs x22,x22,x28 + mul x28,x7,x4 + adcs x23,x23,x0 + mul x0,x8,x4 + adcs x24,x24,x1 + mul x1,x9,x4 + adcs x25,x25,x3 + mul x3,x10,x4 + adc x17,x17,xzr + subs xzr,x19,#1 // adds x19,x19,x26 + umulh x26,x5,x4 + adcs x20,x20,x27 + umulh x27,x6,x4 + adcs x21,x21,x28 + umulh x28,x7,x4 + adcs x22,x22,x0 + umulh x0,x8,x4 + adcs x23,x23,x1 + umulh x1,x9,x4 + adcs x24,x24,x3 + umulh x3,x10,x4 + adcs x25,x25,xzr + adc x4,x17,xzr + ldr x17,[x2,8*2] + + adds x19,x20,x26 + mul x26,x11,x17 + adcs x20,x21,x27 + mul x27,x12,x17 + adcs x21,x22,x28 + mul x28,x13,x17 + adcs x22,x23,x0 + mul x0,x14,x17 + adcs x23,x24,x1 + mul x1,x15,x17 + adcs x24,x25,x3 + mul x3,x16,x17 + adc x25,x4,xzr + ldr x4,[x29,#96] + + adds x19,x19,x26 + umulh x26,x11,x17 + adcs x20,x20,x27 + umulh x27,x12,x17 + adcs x21,x21,x28 + mul x4,x4,x19 + umulh x28,x13,x17 + adcs x22,x22,x0 + umulh x0,x14,x17 + adcs x23,x23,x1 + umulh x1,x15,x17 + adcs x24,x24,x3 + umulh x3,x16,x17 + adcs x25,x25,xzr + adc x17,xzr,xzr + + adds x20,x20,x26 + // mul x26,x5,x4 + adcs x21,x21,x27 + mul x27,x6,x4 + adcs x22,x22,x28 + mul x28,x7,x4 + adcs x23,x23,x0 + mul x0,x8,x4 + adcs x24,x24,x1 + mul x1,x9,x4 + adcs x25,x25,x3 + mul x3,x10,x4 + adc x17,x17,xzr + subs xzr,x19,#1 // adds x19,x19,x26 + umulh x26,x5,x4 + adcs x20,x20,x27 + umulh x27,x6,x4 + adcs x21,x21,x28 + umulh x28,x7,x4 + adcs x22,x22,x0 + umulh x0,x8,x4 + adcs x23,x23,x1 + umulh x1,x9,x4 + adcs x24,x24,x3 + umulh x3,x10,x4 + adcs x25,x25,xzr + adc x4,x17,xzr + ldr x17,[x2,8*3] + + adds x19,x20,x26 + mul x26,x11,x17 + adcs x20,x21,x27 + mul x27,x12,x17 + adcs x21,x22,x28 + mul x28,x13,x17 + adcs x22,x23,x0 + mul x0,x14,x17 + adcs x23,x24,x1 + mul x1,x15,x17 + adcs x24,x25,x3 + mul x3,x16,x17 + adc x25,x4,xzr + ldr x4,[x29,#96] + + adds x19,x19,x26 + umulh x26,x11,x17 + adcs x20,x20,x27 + umulh x27,x12,x17 + adcs x21,x21,x28 + mul x4,x4,x19 + umulh x28,x13,x17 + adcs x22,x22,x0 + umulh x0,x14,x17 + adcs x23,x23,x1 + umulh x1,x15,x17 + adcs x24,x24,x3 + umulh x3,x16,x17 + adcs x25,x25,xzr + adc x17,xzr,xzr + + adds x20,x20,x26 + // mul x26,x5,x4 + adcs x21,x21,x27 + mul x27,x6,x4 + adcs x22,x22,x28 + mul x28,x7,x4 + adcs x23,x23,x0 + mul x0,x8,x4 + adcs x24,x24,x1 + mul x1,x9,x4 + adcs x25,x25,x3 + mul x3,x10,x4 + adc x17,x17,xzr + subs xzr,x19,#1 // adds x19,x19,x26 + umulh x26,x5,x4 + adcs x20,x20,x27 + umulh x27,x6,x4 + adcs x21,x21,x28 + umulh x28,x7,x4 + adcs x22,x22,x0 + umulh x0,x8,x4 + adcs x23,x23,x1 + umulh x1,x9,x4 + adcs x24,x24,x3 + umulh x3,x10,x4 + adcs x25,x25,xzr + adc x4,x17,xzr + ldr x17,[x2,8*4] + + adds x19,x20,x26 + mul x26,x11,x17 + adcs x20,x21,x27 + mul x27,x12,x17 + adcs x21,x22,x28 + mul x28,x13,x17 + adcs x22,x23,x0 + mul x0,x14,x17 + adcs x23,x24,x1 + mul x1,x15,x17 + adcs x24,x25,x3 + mul x3,x16,x17 + adc x25,x4,xzr + ldr x4,[x29,#96] + + adds x19,x19,x26 + umulh x26,x11,x17 + adcs x20,x20,x27 + umulh x27,x12,x17 + adcs x21,x21,x28 + mul x4,x4,x19 + umulh x28,x13,x17 + adcs x22,x22,x0 + umulh x0,x14,x17 + adcs x23,x23,x1 + umulh x1,x15,x17 + adcs x24,x24,x3 + umulh x3,x16,x17 + adcs x25,x25,xzr + adc x17,xzr,xzr + + adds x20,x20,x26 + // mul x26,x5,x4 + adcs x21,x21,x27 + mul x27,x6,x4 + adcs x22,x22,x28 + mul x28,x7,x4 + adcs x23,x23,x0 + mul x0,x8,x4 + adcs x24,x24,x1 + mul x1,x9,x4 + adcs x25,x25,x3 + mul x3,x10,x4 + adc x17,x17,xzr + subs xzr,x19,#1 // adds x19,x19,x26 + umulh x26,x5,x4 + adcs x20,x20,x27 + umulh x27,x6,x4 + adcs x21,x21,x28 + umulh x28,x7,x4 + adcs x22,x22,x0 + umulh x0,x8,x4 + adcs x23,x23,x1 + umulh x1,x9,x4 + adcs x24,x24,x3 + umulh x3,x10,x4 + adcs x25,x25,xzr + adc x4,x17,xzr + ldr x17,[x2,8*5] + + adds x19,x20,x26 + mul x26,x11,x17 + adcs x20,x21,x27 + mul x27,x12,x17 + adcs x21,x22,x28 + mul x28,x13,x17 + adcs x22,x23,x0 + mul x0,x14,x17 + adcs x23,x24,x1 + mul x1,x15,x17 + adcs x24,x25,x3 + mul x3,x16,x17 + adc x25,x4,xzr + ldr x4,[x29,#96] + + adds x19,x19,x26 + umulh x26,x11,x17 + adcs x20,x20,x27 + umulh x27,x12,x17 + adcs x21,x21,x28 + mul x4,x4,x19 + umulh x28,x13,x17 + adcs x22,x22,x0 + umulh x0,x14,x17 + adcs x23,x23,x1 + umulh x1,x15,x17 + adcs x24,x24,x3 + umulh x3,x16,x17 + adcs x25,x25,xzr + adc x17,xzr,xzr + + adds x20,x20,x26 + // mul x26,x5,x4 + adcs x21,x21,x27 + mul x27,x6,x4 + adcs x22,x22,x28 + mul x28,x7,x4 + adcs x23,x23,x0 + mul x0,x8,x4 + adcs x24,x24,x1 + mul x1,x9,x4 + adcs x25,x25,x3 + mul x3,x10,x4 + adc x17,x17,xzr + subs xzr,x19,#1 // adds x19,x19,x26 + umulh x26,x5,x4 + adcs x20,x20,x27 + umulh x27,x6,x4 + adcs x21,x21,x28 + umulh x28,x7,x4 + adcs x22,x22,x0 + umulh x0,x8,x4 + adcs x23,x23,x1 + umulh x1,x9,x4 + adcs x24,x24,x3 + umulh x3,x10,x4 + adcs x25,x25,xzr + ldp x4,x2,[x29,#96] // pull r_ptr + adc x17,x17,xzr + + adds x19,x20,x26 + adcs x20,x21,x27 + adcs x21,x22,x28 + adcs x22,x23,x0 + adcs x23,x24,x1 + adcs x24,x25,x3 + adc x25,x17,xzr + + subs x26,x19,x5 + sbcs x27,x20,x6 + sbcs x28,x21,x7 + sbcs x0,x22,x8 + sbcs x1,x23,x9 + sbcs x3,x24,x10 + sbcs xzr, x25,xzr + + csello x11,x19,x26 + csello x12,x20,x27 + csello x13,x21,x28 + csello x14,x22,x0 + csello x15,x23,x1 + csello x16,x24,x3 + ret + ENDP + + + + EXPORT |sqr_mont_384|[FUNC] + ALIGN 32 +|sqr_mont_384| PROC + DCDU 3573752639 + stp x29,x30,[sp,#-128]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + sub sp,sp,#96 // space for 768-bit vector + mov x4,x3 // adjust for missing b_ptr + + mov x3,x0 // save r_ptr + mov x0,sp + + ldp x11,x12,[x1] + ldp x13,x14,[x1,#16] + ldp x15,x16,[x1,#32] + + bl __sqr_384 + + ldp x5,x6,[x2] + ldp x7,x8,[x2,#16] + ldp x9,x10,[x2,#32] + + mov x1,sp + mov x0,x3 // restore r_ptr + bl __mul_by_1_mont_384 + bl __redc_tail_mont_384 + ldr x30,[x29,#8] + + add sp,sp,#96 + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldr x29,[sp],#128 + DCDU 3573752767 + ret + ENDP + + + + EXPORT |sqr_n_mul_mont_383|[FUNC] + ALIGN 32 +|sqr_n_mul_mont_383| PROC + DCDU 3573752639 + stp x29,x30,[sp,#-128]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + stp x4,x0,[sp,#96] // __mul_mont_384 wants them there + sub sp,sp,#96 // space for 768-bit vector + mov x17,x5 // save b_ptr + + ldp x11,x12,[x1] + ldp x13,x14,[x1,#16] + ldp x15,x16,[x1,#32] + mov x0,sp +|$Loop_sqr_383| + bl __sqr_384 + sub x2,x2,#1 // counter + + ldp x5,x6,[x3] + ldp x7,x8,[x3,#16] + ldp x9,x10,[x3,#32] + + mov x1,sp + bl __mul_by_1_mont_384 + + ldp x19,x20,[x1,#48] + ldp x21,x22,[x1,#64] + ldp x23,x24,[x1,#80] + + adds x11,x11,x19 // just accumulate upper half + adcs x12,x12,x20 + adcs x13,x13,x21 + adcs x14,x14,x22 + adcs x15,x15,x23 + adc x16,x16,x24 + + cbnz x2,|$Loop_sqr_383| + + mov x2,x17 + ldr x17,[x17] + bl __mul_mont_384 + ldr x30,[x29,#8] + + stp x11,x12,[x2] + stp x13,x14,[x2,#16] + stp x15,x16,[x2,#32] + + add sp,sp,#96 + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldr x29,[sp],#128 + DCDU 3573752767 + ret + ENDP + + ALIGN 32 +|__sqr_384| PROC + mul x19,x12,x11 + mul x20,x13,x11 + mul x21,x14,x11 + mul x22,x15,x11 + mul x23,x16,x11 + + umulh x6,x12,x11 + umulh x7,x13,x11 + umulh x8,x14,x11 + umulh x9,x15,x11 + adds x20,x20,x6 + umulh x10,x16,x11 + adcs x21,x21,x7 + mul x7,x13,x12 + adcs x22,x22,x8 + mul x8,x14,x12 + adcs x23,x23,x9 + mul x9,x15,x12 + adc x24,xzr, x10 + mul x10,x16,x12 + + adds x21,x21,x7 + umulh x7,x13,x12 + adcs x22,x22,x8 + umulh x8,x14,x12 + adcs x23,x23,x9 + umulh x9,x15,x12 + adcs x24,x24,x10 + umulh x10,x16,x12 + adc x25,xzr,xzr + + mul x5,x11,x11 + adds x22,x22,x7 + umulh x11, x11,x11 + adcs x23,x23,x8 + mul x8,x14,x13 + adcs x24,x24,x9 + mul x9,x15,x13 + adc x25,x25,x10 + mul x10,x16,x13 + + adds x23,x23,x8 + umulh x8,x14,x13 + adcs x24,x24,x9 + umulh x9,x15,x13 + adcs x25,x25,x10 + umulh x10,x16,x13 + adc x26,xzr,xzr + + mul x6,x12,x12 + adds x24,x24,x8 + umulh x12, x12,x12 + adcs x25,x25,x9 + mul x9,x15,x14 + adc x26,x26,x10 + mul x10,x16,x14 + + adds x25,x25,x9 + umulh x9,x15,x14 + adcs x26,x26,x10 + umulh x10,x16,x14 + adc x27,xzr,xzr + mul x7,x13,x13 + adds x26,x26,x9 + umulh x13, x13,x13 + adc x27,x27,x10 + mul x8,x14,x14 + + mul x10,x16,x15 + umulh x14, x14,x14 + adds x27,x27,x10 + umulh x10,x16,x15 + mul x9,x15,x15 + adc x28,x10,xzr + + adds x19,x19,x19 + adcs x20,x20,x20 + adcs x21,x21,x21 + adcs x22,x22,x22 + adcs x23,x23,x23 + adcs x24,x24,x24 + adcs x25,x25,x25 + adcs x26,x26,x26 + umulh x15, x15,x15 + adcs x27,x27,x27 + mul x10,x16,x16 + adcs x28,x28,x28 + umulh x16, x16,x16 + adc x1,xzr,xzr + + adds x19,x19,x11 + adcs x20,x20,x6 + adcs x21,x21,x12 + adcs x22,x22,x7 + adcs x23,x23,x13 + adcs x24,x24,x8 + adcs x25,x25,x14 + stp x5,x19,[x0] + adcs x26,x26,x9 + stp x20,x21,[x0,#16] + adcs x27,x27,x15 + stp x22,x23,[x0,#32] + adcs x28,x28,x10 + stp x24,x25,[x0,#48] + adc x16,x16,x1 + stp x26,x27,[x0,#64] + stp x28,x16,[x0,#80] + + ret + ENDP + + + EXPORT |sqr_384|[FUNC] + ALIGN 32 +|sqr_384| PROC + DCDU 3573752639 + stp x29,x30,[sp,#-128]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + + ldp x11,x12,[x1] + ldp x13,x14,[x1,#16] + ldp x15,x16,[x1,#32] + + bl __sqr_384 + ldr x30,[x29,#8] + + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldr x29,[sp],#128 + DCDU 3573752767 + ret + ENDP + + + + EXPORT |redc_mont_384|[FUNC] + ALIGN 32 +|redc_mont_384| PROC + DCDU 3573752639 + stp x29,x30,[sp,#-128]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + mov x4,x3 // adjust for missing b_ptr + + ldp x5,x6,[x2] + ldp x7,x8,[x2,#16] + ldp x9,x10,[x2,#32] + + bl __mul_by_1_mont_384 + bl __redc_tail_mont_384 + ldr x30,[x29,#8] + + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldr x29,[sp],#128 + DCDU 3573752767 + ret + ENDP + + + + EXPORT |from_mont_384|[FUNC] + ALIGN 32 +|from_mont_384| PROC + DCDU 3573752639 + stp x29,x30,[sp,#-128]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + mov x4,x3 // adjust for missing b_ptr + + ldp x5,x6,[x2] + ldp x7,x8,[x2,#16] + ldp x9,x10,[x2,#32] + + bl __mul_by_1_mont_384 + ldr x30,[x29,#8] + + subs x19,x11,x5 + sbcs x20,x12,x6 + sbcs x21,x13,x7 + sbcs x22,x14,x8 + sbcs x23,x15,x9 + sbcs x24,x16,x10 + + csello x11,x11,x19 + csello x12,x12,x20 + csello x13,x13,x21 + csello x14,x14,x22 + csello x15,x15,x23 + csello x16,x16,x24 + + stp x11,x12,[x0] + stp x13,x14,[x0,#16] + stp x15,x16,[x0,#32] + + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldr x29,[sp],#128 + DCDU 3573752767 + ret + ENDP + + + ALIGN 32 +|__mul_by_1_mont_384| PROC + ldp x11,x12,[x1] + ldp x13,x14,[x1,#16] + mul x26,x4,x11 + ldp x15,x16,[x1,#32] + + // mul x19,x5,x26 + mul x20,x6,x26 + mul x21,x7,x26 + mul x22,x8,x26 + mul x23,x9,x26 + mul x24,x10,x26 + subs xzr,x11,#1 // adds x19,x19,x11 + umulh x11,x5,x26 + adcs x20,x20,x12 + umulh x12,x6,x26 + adcs x21,x21,x13 + umulh x13,x7,x26 + adcs x22,x22,x14 + umulh x14,x8,x26 + adcs x23,x23,x15 + umulh x15,x9,x26 + adcs x24,x24,x16 + umulh x16,x10,x26 + adc x25,xzr,xzr + adds x11,x11,x20 + adcs x12,x12,x21 + adcs x13,x13,x22 + mul x26,x4,x11 + adcs x14,x14,x23 + adcs x15,x15,x24 + adc x16,x16,x25 + + // mul x19,x5,x26 + mul x20,x6,x26 + mul x21,x7,x26 + mul x22,x8,x26 + mul x23,x9,x26 + mul x24,x10,x26 + subs xzr,x11,#1 // adds x19,x19,x11 + umulh x11,x5,x26 + adcs x20,x20,x12 + umulh x12,x6,x26 + adcs x21,x21,x13 + umulh x13,x7,x26 + adcs x22,x22,x14 + umulh x14,x8,x26 + adcs x23,x23,x15 + umulh x15,x9,x26 + adcs x24,x24,x16 + umulh x16,x10,x26 + adc x25,xzr,xzr + adds x11,x11,x20 + adcs x12,x12,x21 + adcs x13,x13,x22 + mul x26,x4,x11 + adcs x14,x14,x23 + adcs x15,x15,x24 + adc x16,x16,x25 + + // mul x19,x5,x26 + mul x20,x6,x26 + mul x21,x7,x26 + mul x22,x8,x26 + mul x23,x9,x26 + mul x24,x10,x26 + subs xzr,x11,#1 // adds x19,x19,x11 + umulh x11,x5,x26 + adcs x20,x20,x12 + umulh x12,x6,x26 + adcs x21,x21,x13 + umulh x13,x7,x26 + adcs x22,x22,x14 + umulh x14,x8,x26 + adcs x23,x23,x15 + umulh x15,x9,x26 + adcs x24,x24,x16 + umulh x16,x10,x26 + adc x25,xzr,xzr + adds x11,x11,x20 + adcs x12,x12,x21 + adcs x13,x13,x22 + mul x26,x4,x11 + adcs x14,x14,x23 + adcs x15,x15,x24 + adc x16,x16,x25 + + // mul x19,x5,x26 + mul x20,x6,x26 + mul x21,x7,x26 + mul x22,x8,x26 + mul x23,x9,x26 + mul x24,x10,x26 + subs xzr,x11,#1 // adds x19,x19,x11 + umulh x11,x5,x26 + adcs x20,x20,x12 + umulh x12,x6,x26 + adcs x21,x21,x13 + umulh x13,x7,x26 + adcs x22,x22,x14 + umulh x14,x8,x26 + adcs x23,x23,x15 + umulh x15,x9,x26 + adcs x24,x24,x16 + umulh x16,x10,x26 + adc x25,xzr,xzr + adds x11,x11,x20 + adcs x12,x12,x21 + adcs x13,x13,x22 + mul x26,x4,x11 + adcs x14,x14,x23 + adcs x15,x15,x24 + adc x16,x16,x25 + + // mul x19,x5,x26 + mul x20,x6,x26 + mul x21,x7,x26 + mul x22,x8,x26 + mul x23,x9,x26 + mul x24,x10,x26 + subs xzr,x11,#1 // adds x19,x19,x11 + umulh x11,x5,x26 + adcs x20,x20,x12 + umulh x12,x6,x26 + adcs x21,x21,x13 + umulh x13,x7,x26 + adcs x22,x22,x14 + umulh x14,x8,x26 + adcs x23,x23,x15 + umulh x15,x9,x26 + adcs x24,x24,x16 + umulh x16,x10,x26 + adc x25,xzr,xzr + adds x11,x11,x20 + adcs x12,x12,x21 + adcs x13,x13,x22 + mul x26,x4,x11 + adcs x14,x14,x23 + adcs x15,x15,x24 + adc x16,x16,x25 + + // mul x19,x5,x26 + mul x20,x6,x26 + mul x21,x7,x26 + mul x22,x8,x26 + mul x23,x9,x26 + mul x24,x10,x26 + subs xzr,x11,#1 // adds x19,x19,x11 + umulh x11,x5,x26 + adcs x20,x20,x12 + umulh x12,x6,x26 + adcs x21,x21,x13 + umulh x13,x7,x26 + adcs x22,x22,x14 + umulh x14,x8,x26 + adcs x23,x23,x15 + umulh x15,x9,x26 + adcs x24,x24,x16 + umulh x16,x10,x26 + adc x25,xzr,xzr + adds x11,x11,x20 + adcs x12,x12,x21 + adcs x13,x13,x22 + adcs x14,x14,x23 + adcs x15,x15,x24 + adc x16,x16,x25 + + ret + ENDP + + + ALIGN 32 +|__redc_tail_mont_384| PROC + ldp x19,x20,[x1,#48] + ldp x21,x22,[x1,#64] + ldp x23,x24,[x1,#80] + + adds x11,x11,x19 // accumulate upper half + adcs x12,x12,x20 + adcs x13,x13,x21 + adcs x14,x14,x22 + adcs x15,x15,x23 + adcs x16,x16,x24 + adc x25,xzr,xzr + + subs x19,x11,x5 + sbcs x20,x12,x6 + sbcs x21,x13,x7 + sbcs x22,x14,x8 + sbcs x23,x15,x9 + sbcs x24,x16,x10 + sbcs xzr,x25,xzr + + csello x11,x11,x19 + csello x12,x12,x20 + csello x13,x13,x21 + csello x14,x14,x22 + csello x15,x15,x23 + csello x16,x16,x24 + + stp x11,x12,[x0] + stp x13,x14,[x0,#16] + stp x15,x16,[x0,#32] + + ret + ENDP + + + + EXPORT |mul_384|[FUNC] + ALIGN 32 +|mul_384| PROC + DCDU 3573752639 + stp x29,x30,[sp,#-128]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + + bl __mul_384 + ldr x30,[x29,#8] + + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldr x29,[sp],#128 + DCDU 3573752767 + ret + ENDP + + + ALIGN 32 +|__mul_384| PROC + ldp x11,x12,[x1] + ldr x17, [x2] + ldp x13,x14,[x1,#16] + ldp x15,x16,[x1,#32] + + mul x19,x11,x17 + mul x20,x12,x17 + mul x21,x13,x17 + mul x22,x14,x17 + mul x23,x15,x17 + mul x24,x16,x17 + + umulh x5,x11,x17 + umulh x6,x12,x17 + umulh x7,x13,x17 + umulh x8,x14,x17 + umulh x9,x15,x17 + umulh x10,x16,x17 + ldr x17,[x2,8*1] + + str x19,[x0] + adds x19,x20,x5 + mul x5,x11,x17 + adcs x20,x21,x6 + mul x6,x12,x17 + adcs x21,x22,x7 + mul x7,x13,x17 + adcs x22,x23,x8 + mul x8,x14,x17 + adcs x23,x24,x9 + mul x9,x15,x17 + adc x24,xzr, x10 + mul x10,x16,x17 + adds x19,x19,x5 + umulh x5,x11,x17 + adcs x20,x20,x6 + umulh x6,x12,x17 + adcs x21,x21,x7 + umulh x7,x13,x17 + adcs x22,x22,x8 + umulh x8,x14,x17 + adcs x23,x23,x9 + umulh x9,x15,x17 + adcs x24,x24,x10 + umulh x10,x16,x17 + ldr x17,[x2,#8*(1+1)] + adc x25,xzr,xzr + + str x19,[x0,8*1] + adds x19,x20,x5 + mul x5,x11,x17 + adcs x20,x21,x6 + mul x6,x12,x17 + adcs x21,x22,x7 + mul x7,x13,x17 + adcs x22,x23,x8 + mul x8,x14,x17 + adcs x23,x24,x9 + mul x9,x15,x17 + adc x24,x25,x10 + mul x10,x16,x17 + adds x19,x19,x5 + umulh x5,x11,x17 + adcs x20,x20,x6 + umulh x6,x12,x17 + adcs x21,x21,x7 + umulh x7,x13,x17 + adcs x22,x22,x8 + umulh x8,x14,x17 + adcs x23,x23,x9 + umulh x9,x15,x17 + adcs x24,x24,x10 + umulh x10,x16,x17 + ldr x17,[x2,#8*(2+1)] + adc x25,xzr,xzr + + str x19,[x0,8*2] + adds x19,x20,x5 + mul x5,x11,x17 + adcs x20,x21,x6 + mul x6,x12,x17 + adcs x21,x22,x7 + mul x7,x13,x17 + adcs x22,x23,x8 + mul x8,x14,x17 + adcs x23,x24,x9 + mul x9,x15,x17 + adc x24,x25,x10 + mul x10,x16,x17 + adds x19,x19,x5 + umulh x5,x11,x17 + adcs x20,x20,x6 + umulh x6,x12,x17 + adcs x21,x21,x7 + umulh x7,x13,x17 + adcs x22,x22,x8 + umulh x8,x14,x17 + adcs x23,x23,x9 + umulh x9,x15,x17 + adcs x24,x24,x10 + umulh x10,x16,x17 + ldr x17,[x2,#8*(3+1)] + adc x25,xzr,xzr + + str x19,[x0,8*3] + adds x19,x20,x5 + mul x5,x11,x17 + adcs x20,x21,x6 + mul x6,x12,x17 + adcs x21,x22,x7 + mul x7,x13,x17 + adcs x22,x23,x8 + mul x8,x14,x17 + adcs x23,x24,x9 + mul x9,x15,x17 + adc x24,x25,x10 + mul x10,x16,x17 + adds x19,x19,x5 + umulh x5,x11,x17 + adcs x20,x20,x6 + umulh x6,x12,x17 + adcs x21,x21,x7 + umulh x7,x13,x17 + adcs x22,x22,x8 + umulh x8,x14,x17 + adcs x23,x23,x9 + umulh x9,x15,x17 + adcs x24,x24,x10 + umulh x10,x16,x17 + ldr x17,[x2,#8*(4+1)] + adc x25,xzr,xzr + + str x19,[x0,8*4] + adds x19,x20,x5 + mul x5,x11,x17 + adcs x20,x21,x6 + mul x6,x12,x17 + adcs x21,x22,x7 + mul x7,x13,x17 + adcs x22,x23,x8 + mul x8,x14,x17 + adcs x23,x24,x9 + mul x9,x15,x17 + adc x24,x25,x10 + mul x10,x16,x17 + adds x19,x19,x5 + umulh x5,x11,x17 + adcs x20,x20,x6 + umulh x6,x12,x17 + adcs x21,x21,x7 + umulh x7,x13,x17 + adcs x22,x22,x8 + umulh x8,x14,x17 + adcs x23,x23,x9 + umulh x9,x15,x17 + adcs x24,x24,x10 + umulh x10,x16,x17 + adc x25,xzr,xzr + + str x19,[x0,8*5] + adds x19,x20,x5 + adcs x20,x21,x6 + adcs x21,x22,x7 + adcs x22,x23,x8 + adcs x23,x24,x9 + adc x24,x25,x10 + + stp x19,x20,[x0,#48] + stp x21,x22,[x0,#64] + stp x23,x24,[x0,#80] + + ret + ENDP + + + + EXPORT |mul_382x|[FUNC] + ALIGN 32 +|mul_382x| PROC + DCDU 3573752639 + stp x29,x30,[sp,#-128]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + sub sp,sp,#96 // space for two 384-bit vectors + + ldp x11,x12,[x1] + mov x26,x0 // save r_ptr + ldp x19,x20,[x1,#48] + mov x27,x1 // save a_ptr + ldp x13,x14,[x1,#16] + mov x28,x2 // save b_ptr + ldp x21,x22,[x1,#64] + ldp x15,x16,[x1,#32] + adds x5,x11,x19 // t0 = a->re + a->im + ldp x23,x24,[x1,#80] + adcs x6,x12,x20 + ldp x11,x12,[x2] + adcs x7,x13,x21 + ldp x19,x20,[x2,#48] + adcs x8,x14,x22 + ldp x13,x14,[x2,#16] + adcs x9,x15,x23 + ldp x21,x22,[x2,#64] + adc x10,x16,x24 + ldp x15,x16,[x2,#32] + + stp x5,x6,[sp] + adds x5,x11,x19 // t1 = b->re + b->im + ldp x23,x24,[x2,#80] + adcs x6,x12,x20 + stp x7,x8,[sp,#16] + adcs x7,x13,x21 + adcs x8,x14,x22 + stp x9,x10,[sp,#32] + adcs x9,x15,x23 + stp x5,x6,[sp,#48] + adc x10,x16,x24 + stp x7,x8,[sp,#64] + stp x9,x10,[sp,#80] + + bl __mul_384 // mul_384(ret->re, a->re, b->re) + + add x1,sp,#0 // mul_384(ret->im, t0, t1) + add x2,sp,#48 + add x0,x26,#96 + bl __mul_384 + + add x1,x27,#48 // mul_384(tx, a->im, b->im) + add x2,x28,#48 + add x0,sp,#0 + bl __mul_384 + + ldp x5,x6,[x3] + ldp x7,x8,[x3,#16] + ldp x9,x10,[x3,#32] + + add x1,x26,#96 // ret->im -= tx + add x2,sp,#0 + add x0,x26,#96 + bl __sub_mod_384x384 + + add x2,x26,#0 // ret->im -= ret->re + bl __sub_mod_384x384 + + add x1,x26,#0 // ret->re -= tx + add x2,sp,#0 + add x0,x26,#0 + bl __sub_mod_384x384 + ldr x30,[x29,#8] + + add sp,sp,#96 + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldr x29,[sp],#128 + DCDU 3573752767 + ret + ENDP + + + + EXPORT |sqr_382x|[FUNC] + ALIGN 32 +|sqr_382x| PROC + DCDU 3573752639 + stp x29,x30,[sp,#-128]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + + ldp x11,x12,[x1] + ldp x19,x20,[x1,#48] + ldp x13,x14,[x1,#16] + adds x5,x11,x19 // t0 = a->re + a->im + ldp x21,x22,[x1,#64] + adcs x6,x12,x20 + ldp x15,x16,[x1,#32] + adcs x7,x13,x21 + ldp x23,x24,[x1,#80] + adcs x8,x14,x22 + stp x5,x6,[x0] + adcs x9,x15,x23 + ldp x5,x6,[x2] + adc x10,x16,x24 + stp x7,x8,[x0,#16] + + subs x11,x11,x19 // t1 = a->re - a->im + ldp x7,x8,[x2,#16] + sbcs x12,x12,x20 + stp x9,x10,[x0,#32] + sbcs x13,x13,x21 + ldp x9,x10,[x2,#32] + sbcs x14,x14,x22 + sbcs x15,x15,x23 + sbcs x16,x16,x24 + sbc x25,xzr,xzr + + and x19,x5,x25 + and x20,x6,x25 + adds x11,x11,x19 + and x21,x7,x25 + adcs x12,x12,x20 + and x22,x8,x25 + adcs x13,x13,x21 + and x23,x9,x25 + adcs x14,x14,x22 + and x24,x10,x25 + adcs x15,x15,x23 + stp x11,x12,[x0,#48] + adc x16,x16,x24 + stp x13,x14,[x0,#64] + stp x15,x16,[x0,#80] + + mov x4,x1 // save a_ptr + add x1,x0,#0 // mul_384(ret->re, t0, t1) + add x2,x0,#48 + bl __mul_384 + + add x1,x4,#0 // mul_384(ret->im, a->re, a->im) + add x2,x4,#48 + add x0,x0,#96 + bl __mul_384 + ldr x30,[x29,#8] + + ldp x11,x12,[x0] + ldp x13,x14,[x0,#16] + adds x11,x11,x11 // add with itself + ldp x15,x16,[x0,#32] + adcs x12,x12,x12 + adcs x13,x13,x13 + adcs x14,x14,x14 + adcs x15,x15,x15 + adcs x16,x16,x16 + adcs x19,x19,x19 + adcs x20,x20,x20 + stp x11,x12,[x0] + adcs x21,x21,x21 + stp x13,x14,[x0,#16] + adcs x22,x22,x22 + stp x15,x16,[x0,#32] + adcs x23,x23,x23 + stp x19,x20,[x0,#48] + adc x24,x24,x24 + stp x21,x22,[x0,#64] + stp x23,x24,[x0,#80] + + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldr x29,[sp],#128 + DCDU 3573752767 + ret + ENDP + + + + EXPORT |sqr_mont_382x|[FUNC] + ALIGN 32 +|sqr_mont_382x| PROC + DCDU 3573752639 + stp x29,x30,[sp,#-128]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + stp x3,x0,[sp,#96] // __mul_mont_384 wants them there + sub sp,sp,#112 // space for two 384-bit vectors + word + mov x4,x3 // adjust for missing b_ptr + + ldp x11,x12,[x1] + ldp x13,x14,[x1,#16] + ldp x15,x16,[x1,#32] + + ldp x17,x20,[x1,#48] + ldp x21,x22,[x1,#64] + ldp x23,x24,[x1,#80] + + adds x5,x11,x17 // t0 = a->re + a->im + adcs x6,x12,x20 + adcs x7,x13,x21 + adcs x8,x14,x22 + adcs x9,x15,x23 + adc x10,x16,x24 + + subs x19,x11,x17 // t1 = a->re - a->im + sbcs x20,x12,x20 + sbcs x21,x13,x21 + sbcs x22,x14,x22 + sbcs x23,x15,x23 + sbcs x24,x16,x24 + sbc x25,xzr,xzr // borrow flag as mask + + stp x5,x6,[sp] + stp x7,x8,[sp,#16] + stp x9,x10,[sp,#32] + stp x19,x20,[sp,#48] + stp x21,x22,[sp,#64] + stp x23,x24,[sp,#80] + str x25,[sp,#96] + + ldp x5,x6,[x2] + ldp x7,x8,[x2,#16] + ldp x9,x10,[x2,#32] + + add x2,x1,#48 + bl __mul_mont_383_nonred // mul_mont_384(ret->im, a->re, a->im) + + adds x19,x11,x11 // add with itself + adcs x20,x12,x12 + adcs x21,x13,x13 + adcs x22,x14,x14 + adcs x23,x15,x15 + adc x24,x16,x16 + + stp x19,x20,[x2,#48] + stp x21,x22,[x2,#64] + stp x23,x24,[x2,#80] + + ldp x11,x12,[sp] + ldr x17,[sp,#48] + ldp x13,x14,[sp,#16] + ldp x15,x16,[sp,#32] + + add x2,sp,#48 + bl __mul_mont_383_nonred // mul_mont_384(ret->im, t0, t1) + ldr x30,[x29,#8] + + ldr x25,[sp,#96] // account for sign from a->re - a->im + ldp x19,x20,[sp] + ldp x21,x22,[sp,#16] + ldp x23,x24,[sp,#32] + + and x19,x19,x25 + and x20,x20,x25 + and x21,x21,x25 + and x22,x22,x25 + and x23,x23,x25 + and x24,x24,x25 + + subs x11,x11,x19 + sbcs x12,x12,x20 + sbcs x13,x13,x21 + sbcs x14,x14,x22 + sbcs x15,x15,x23 + sbcs x16,x16,x24 + sbc x25,xzr,xzr + + and x19,x5,x25 + and x20,x6,x25 + and x21,x7,x25 + and x22,x8,x25 + and x23,x9,x25 + and x24,x10,x25 + + adds x11,x11,x19 + adcs x12,x12,x20 + adcs x13,x13,x21 + adcs x14,x14,x22 + adcs x15,x15,x23 + adc x16,x16,x24 + + stp x11,x12,[x2] + stp x13,x14,[x2,#16] + stp x15,x16,[x2,#32] + + add sp,sp,#112 + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldr x29,[sp],#128 + DCDU 3573752767 + ret + ENDP + + + ALIGN 32 +|__mul_mont_383_nonred| PROC + mul x19,x11,x17 + mul x20,x12,x17 + mul x21,x13,x17 + mul x22,x14,x17 + mul x23,x15,x17 + mul x24,x16,x17 + mul x4,x4,x19 + + umulh x26,x11,x17 + umulh x27,x12,x17 + umulh x28,x13,x17 + umulh x0,x14,x17 + umulh x1,x15,x17 + umulh x3,x16,x17 + + adds x20,x20,x26 + mul x26,x5,x4 + adcs x21,x21,x27 + mul x27,x6,x4 + adcs x22,x22,x28 + mul x28,x7,x4 + adcs x23,x23,x0 + mul x0,x8,x4 + adcs x24,x24,x1 + mul x1,x9,x4 + adc x25,xzr, x3 + mul x3,x10,x4 + ldr x17,[x2,8*1] + adds x19,x19,x26 + umulh x26,x5,x4 + adcs x20,x20,x27 + umulh x27,x6,x4 + adcs x21,x21,x28 + umulh x28,x7,x4 + adcs x22,x22,x0 + umulh x0,x8,x4 + adcs x23,x23,x1 + umulh x1,x9,x4 + adcs x24,x24,x3 + umulh x3,x10,x4 + adc x25,x25,xzr + + ldr x4,[x29,#96] + adds x19,x20,x26 + mul x26,x11,x17 + adcs x20,x21,x27 + mul x27,x12,x17 + adcs x21,x22,x28 + mul x28,x13,x17 + adcs x22,x23,x0 + mul x0,x14,x17 + adcs x23,x24,x1 + mul x1,x15,x17 + adcs x24,x25,x3 + mul x3,x16,x17 + adc x25,xzr,xzr + + adds x19,x19,x26 + umulh x26,x11,x17 + adcs x20,x20,x27 + umulh x27,x12,x17 + adcs x21,x21,x28 + mul x4,x4,x19 + umulh x28,x13,x17 + adcs x22,x22,x0 + umulh x0,x14,x17 + adcs x23,x23,x1 + umulh x1,x15,x17 + adcs x24,x24,x3 + umulh x3,x16,x17 + adc x25,x25,xzr + + adds x20,x20,x26 + mul x26,x5,x4 + adcs x21,x21,x27 + mul x27,x6,x4 + adcs x22,x22,x28 + mul x28,x7,x4 + adcs x23,x23,x0 + mul x0,x8,x4 + adcs x24,x24,x1 + mul x1,x9,x4 + adc x25,x25,x3 + mul x3,x10,x4 + ldr x17,[x2,8*2] + adds x19,x19,x26 + umulh x26,x5,x4 + adcs x20,x20,x27 + umulh x27,x6,x4 + adcs x21,x21,x28 + umulh x28,x7,x4 + adcs x22,x22,x0 + umulh x0,x8,x4 + adcs x23,x23,x1 + umulh x1,x9,x4 + adcs x24,x24,x3 + umulh x3,x10,x4 + adc x25,x25,xzr + + ldr x4,[x29,#96] + adds x19,x20,x26 + mul x26,x11,x17 + adcs x20,x21,x27 + mul x27,x12,x17 + adcs x21,x22,x28 + mul x28,x13,x17 + adcs x22,x23,x0 + mul x0,x14,x17 + adcs x23,x24,x1 + mul x1,x15,x17 + adcs x24,x25,x3 + mul x3,x16,x17 + adc x25,xzr,xzr + + adds x19,x19,x26 + umulh x26,x11,x17 + adcs x20,x20,x27 + umulh x27,x12,x17 + adcs x21,x21,x28 + mul x4,x4,x19 + umulh x28,x13,x17 + adcs x22,x22,x0 + umulh x0,x14,x17 + adcs x23,x23,x1 + umulh x1,x15,x17 + adcs x24,x24,x3 + umulh x3,x16,x17 + adc x25,x25,xzr + + adds x20,x20,x26 + mul x26,x5,x4 + adcs x21,x21,x27 + mul x27,x6,x4 + adcs x22,x22,x28 + mul x28,x7,x4 + adcs x23,x23,x0 + mul x0,x8,x4 + adcs x24,x24,x1 + mul x1,x9,x4 + adc x25,x25,x3 + mul x3,x10,x4 + ldr x17,[x2,8*3] + adds x19,x19,x26 + umulh x26,x5,x4 + adcs x20,x20,x27 + umulh x27,x6,x4 + adcs x21,x21,x28 + umulh x28,x7,x4 + adcs x22,x22,x0 + umulh x0,x8,x4 + adcs x23,x23,x1 + umulh x1,x9,x4 + adcs x24,x24,x3 + umulh x3,x10,x4 + adc x25,x25,xzr + + ldr x4,[x29,#96] + adds x19,x20,x26 + mul x26,x11,x17 + adcs x20,x21,x27 + mul x27,x12,x17 + adcs x21,x22,x28 + mul x28,x13,x17 + adcs x22,x23,x0 + mul x0,x14,x17 + adcs x23,x24,x1 + mul x1,x15,x17 + adcs x24,x25,x3 + mul x3,x16,x17 + adc x25,xzr,xzr + + adds x19,x19,x26 + umulh x26,x11,x17 + adcs x20,x20,x27 + umulh x27,x12,x17 + adcs x21,x21,x28 + mul x4,x4,x19 + umulh x28,x13,x17 + adcs x22,x22,x0 + umulh x0,x14,x17 + adcs x23,x23,x1 + umulh x1,x15,x17 + adcs x24,x24,x3 + umulh x3,x16,x17 + adc x25,x25,xzr + + adds x20,x20,x26 + mul x26,x5,x4 + adcs x21,x21,x27 + mul x27,x6,x4 + adcs x22,x22,x28 + mul x28,x7,x4 + adcs x23,x23,x0 + mul x0,x8,x4 + adcs x24,x24,x1 + mul x1,x9,x4 + adc x25,x25,x3 + mul x3,x10,x4 + ldr x17,[x2,8*4] + adds x19,x19,x26 + umulh x26,x5,x4 + adcs x20,x20,x27 + umulh x27,x6,x4 + adcs x21,x21,x28 + umulh x28,x7,x4 + adcs x22,x22,x0 + umulh x0,x8,x4 + adcs x23,x23,x1 + umulh x1,x9,x4 + adcs x24,x24,x3 + umulh x3,x10,x4 + adc x25,x25,xzr + + ldr x4,[x29,#96] + adds x19,x20,x26 + mul x26,x11,x17 + adcs x20,x21,x27 + mul x27,x12,x17 + adcs x21,x22,x28 + mul x28,x13,x17 + adcs x22,x23,x0 + mul x0,x14,x17 + adcs x23,x24,x1 + mul x1,x15,x17 + adcs x24,x25,x3 + mul x3,x16,x17 + adc x25,xzr,xzr + + adds x19,x19,x26 + umulh x26,x11,x17 + adcs x20,x20,x27 + umulh x27,x12,x17 + adcs x21,x21,x28 + mul x4,x4,x19 + umulh x28,x13,x17 + adcs x22,x22,x0 + umulh x0,x14,x17 + adcs x23,x23,x1 + umulh x1,x15,x17 + adcs x24,x24,x3 + umulh x3,x16,x17 + adc x25,x25,xzr + + adds x20,x20,x26 + mul x26,x5,x4 + adcs x21,x21,x27 + mul x27,x6,x4 + adcs x22,x22,x28 + mul x28,x7,x4 + adcs x23,x23,x0 + mul x0,x8,x4 + adcs x24,x24,x1 + mul x1,x9,x4 + adc x25,x25,x3 + mul x3,x10,x4 + ldr x17,[x2,8*5] + adds x19,x19,x26 + umulh x26,x5,x4 + adcs x20,x20,x27 + umulh x27,x6,x4 + adcs x21,x21,x28 + umulh x28,x7,x4 + adcs x22,x22,x0 + umulh x0,x8,x4 + adcs x23,x23,x1 + umulh x1,x9,x4 + adcs x24,x24,x3 + umulh x3,x10,x4 + adc x25,x25,xzr + + ldr x4,[x29,#96] + adds x19,x20,x26 + mul x26,x11,x17 + adcs x20,x21,x27 + mul x27,x12,x17 + adcs x21,x22,x28 + mul x28,x13,x17 + adcs x22,x23,x0 + mul x0,x14,x17 + adcs x23,x24,x1 + mul x1,x15,x17 + adcs x24,x25,x3 + mul x3,x16,x17 + adc x25,xzr,xzr + + adds x19,x19,x26 + umulh x26,x11,x17 + adcs x20,x20,x27 + umulh x27,x12,x17 + adcs x21,x21,x28 + mul x4,x4,x19 + umulh x28,x13,x17 + adcs x22,x22,x0 + umulh x0,x14,x17 + adcs x23,x23,x1 + umulh x1,x15,x17 + adcs x24,x24,x3 + umulh x3,x16,x17 + adc x25,x25,xzr + + adds x20,x20,x26 + mul x26,x5,x4 + adcs x21,x21,x27 + mul x27,x6,x4 + adcs x22,x22,x28 + mul x28,x7,x4 + adcs x23,x23,x0 + mul x0,x8,x4 + adcs x24,x24,x1 + mul x1,x9,x4 + adc x25,x25,x3 + mul x3,x10,x4 + adds x19,x19,x26 + umulh x26,x5,x4 + adcs x20,x20,x27 + umulh x27,x6,x4 + adcs x21,x21,x28 + umulh x28,x7,x4 + adcs x22,x22,x0 + umulh x0,x8,x4 + adcs x23,x23,x1 + umulh x1,x9,x4 + adcs x24,x24,x3 + umulh x3,x10,x4 + adc x25,x25,xzr + ldp x4,x2,[x29,#96] // pull r_ptr + + adds x11,x20,x26 + adcs x12,x21,x27 + adcs x13,x22,x28 + adcs x14,x23,x0 + adcs x15,x24,x1 + adcs x16,x25,x3 + + ret + ENDP + + + + EXPORT |sgn0_pty_mont_384|[FUNC] + ALIGN 32 +|sgn0_pty_mont_384| PROC + DCDU 3573752639 + stp x29,x30,[sp,#-128]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + + mov x4,x2 + ldp x5,x6,[x1] + ldp x7,x8,[x1,#16] + ldp x9,x10,[x1,#32] + mov x1,x0 + + bl __mul_by_1_mont_384 + ldr x30,[x29,#8] + + and x0,x11,#1 + adds x11,x11,x11 + adcs x12,x12,x12 + adcs x13,x13,x13 + adcs x14,x14,x14 + adcs x15,x15,x15 + adcs x16,x16,x16 + adc x17,xzr,xzr + + subs x11,x11,x5 + sbcs x12,x12,x6 + sbcs x13,x13,x7 + sbcs x14,x14,x8 + sbcs x15,x15,x9 + sbcs x16,x16,x10 + sbc x17,x17,xzr + + mvn x17,x17 + and x17,x17,#2 + orr x0,x0,x17 + + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldr x29,[sp],#128 + DCDU 3573752767 + ret + ENDP + + + + EXPORT |sgn0_pty_mont_384x|[FUNC] + ALIGN 32 +|sgn0_pty_mont_384x| PROC + DCDU 3573752639 + stp x29,x30,[sp,#-128]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + + mov x4,x2 + ldp x5,x6,[x1] + ldp x7,x8,[x1,#16] + ldp x9,x10,[x1,#32] + mov x1,x0 + + bl __mul_by_1_mont_384 + add x1,x1,#48 + + and x2,x11,#1 + orr x3,x11,x12 + adds x11,x11,x11 + orr x3,x3,x13 + adcs x12,x12,x12 + orr x3,x3,x14 + adcs x13,x13,x13 + orr x3,x3,x15 + adcs x14,x14,x14 + orr x3,x3,x16 + adcs x15,x15,x15 + adcs x16,x16,x16 + adc x17,xzr,xzr + + subs x11,x11,x5 + sbcs x12,x12,x6 + sbcs x13,x13,x7 + sbcs x14,x14,x8 + sbcs x15,x15,x9 + sbcs x16,x16,x10 + sbc x17,x17,xzr + + mvn x17,x17 + and x17,x17,#2 + orr x2,x2,x17 + + bl __mul_by_1_mont_384 + ldr x30,[x29,#8] + + and x0,x11,#1 + orr x1,x11,x12 + adds x11,x11,x11 + orr x1,x1,x13 + adcs x12,x12,x12 + orr x1,x1,x14 + adcs x13,x13,x13 + orr x1,x1,x15 + adcs x14,x14,x14 + orr x1,x1,x16 + adcs x15,x15,x15 + adcs x16,x16,x16 + adc x17,xzr,xzr + + subs x11,x11,x5 + sbcs x12,x12,x6 + sbcs x13,x13,x7 + sbcs x14,x14,x8 + sbcs x15,x15,x9 + sbcs x16,x16,x10 + sbc x17,x17,xzr + + mvn x17,x17 + and x17,x17,#2 + orr x0,x0,x17 + + cmp x3,#0 + cseleq x3,x0,x2 + + cmp x1,#0 + cselne x1,x0,x2 + + and x3,x3,#1 + and x1,x1,#2 + orr x0,x1,x3 // pack sign and parity + + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldr x29,[sp],#128 + DCDU 3573752767 + ret + ENDP + END diff --git a/crypto/blst_src/build/win64/mulq_mont_256-x86_64.asm b/crypto/blst_src/build/win64/mulq_mont_256-x86_64.asm new file mode 100644 index 00000000000..6aedca7cdaf --- /dev/null +++ b/crypto/blst_src/build/win64/mulq_mont_256-x86_64.asm @@ -0,0 +1,913 @@ +OPTION DOTNAME +EXTERN mul_mont_sparse_256$1:NEAR +EXTERN sqr_mont_sparse_256$1:NEAR +EXTERN from_mont_256$1:NEAR +EXTERN redc_mont_256$1:NEAR +_DATA SEGMENT +COMM __blst_platform_cap:DWORD:1 +_DATA ENDS +.text$ SEGMENT ALIGN(256) 'CODE' + +PUBLIC mul_mont_sparse_256 + + +ALIGN 32 +mul_mont_sparse_256 PROC PUBLIC + DB 243,15,30,250 + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov r11,rsp +$L$SEH_begin_mul_mont_sparse_256:: + + + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD PTR[40+rsp] +ifdef __BLST_PORTABLE__ + test DWORD PTR[__blst_platform_cap],1 + jnz mul_mont_sparse_256$1 +endif + push rbp + + push rbx + + push r12 + + push r13 + + push r14 + + push r15 + + push rdi + +$L$SEH_body_mul_mont_sparse_256:: + + + mov rax,QWORD PTR[rdx] + mov r13,QWORD PTR[rsi] + mov r14,QWORD PTR[8+rsi] + mov r12,QWORD PTR[16+rsi] + mov rbp,QWORD PTR[24+rsi] + mov rbx,rdx + + mov r15,rax + mul r13 + mov r9,rax + mov rax,r15 + mov r10,rdx + call __mulq_mont_sparse_256 + + mov r15,QWORD PTR[8+rsp] + + mov r14,QWORD PTR[16+rsp] + + mov r13,QWORD PTR[24+rsp] + + mov r12,QWORD PTR[32+rsp] + + mov rbx,QWORD PTR[40+rsp] + + mov rbp,QWORD PTR[48+rsp] + + lea rsp,QWORD PTR[56+rsp] + +$L$SEH_epilogue_mul_mont_sparse_256:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + + DB 0F3h,0C3h ;repret + +$L$SEH_end_mul_mont_sparse_256:: +mul_mont_sparse_256 ENDP + +PUBLIC sqr_mont_sparse_256 + + +ALIGN 32 +sqr_mont_sparse_256 PROC PUBLIC + DB 243,15,30,250 + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov r11,rsp +$L$SEH_begin_sqr_mont_sparse_256:: + + + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 +ifdef __BLST_PORTABLE__ + test DWORD PTR[__blst_platform_cap],1 + jnz sqr_mont_sparse_256$1 +endif + push rbp + + push rbx + + push r12 + + push r13 + + push r14 + + push r15 + + push rdi + +$L$SEH_body_sqr_mont_sparse_256:: + + + mov rax,QWORD PTR[rsi] + mov r8,rcx + mov r14,QWORD PTR[8+rsi] + mov rcx,rdx + mov r12,QWORD PTR[16+rsi] + lea rbx,QWORD PTR[rsi] + mov rbp,QWORD PTR[24+rsi] + + mov r15,rax + mul rax + mov r9,rax + mov rax,r15 + mov r10,rdx + call __mulq_mont_sparse_256 + + mov r15,QWORD PTR[8+rsp] + + mov r14,QWORD PTR[16+rsp] + + mov r13,QWORD PTR[24+rsp] + + mov r12,QWORD PTR[32+rsp] + + mov rbx,QWORD PTR[40+rsp] + + mov rbp,QWORD PTR[48+rsp] + + lea rsp,QWORD PTR[56+rsp] + +$L$SEH_epilogue_sqr_mont_sparse_256:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + + DB 0F3h,0C3h ;repret + +$L$SEH_end_sqr_mont_sparse_256:: +sqr_mont_sparse_256 ENDP + +ALIGN 32 +__mulq_mont_sparse_256 PROC PRIVATE + DB 243,15,30,250 + + mul r14 + add r10,rax + mov rax,r15 + adc rdx,0 + mov r11,rdx + + mul r12 + add r11,rax + mov rax,r15 + adc rdx,0 + mov r12,rdx + + mul rbp + add r12,rax + mov rax,QWORD PTR[8+rbx] + adc rdx,0 + xor r14,r14 + mov r13,rdx + + mov rdi,r9 + imul r9,r8 + + + mov r15,rax + mul QWORD PTR[rsi] + add r10,rax + mov rax,r15 + adc rdx,0 + mov rbp,rdx + + mul QWORD PTR[8+rsi] + add r11,rax + mov rax,r15 + adc rdx,0 + add r11,rbp + adc rdx,0 + mov rbp,rdx + + mul QWORD PTR[16+rsi] + add r12,rax + mov rax,r15 + adc rdx,0 + add r12,rbp + adc rdx,0 + mov rbp,rdx + + mul QWORD PTR[24+rsi] + add r13,rax + mov rax,r9 + adc rdx,0 + add r13,rbp + adc r14,rdx + xor r15,r15 + + + mul QWORD PTR[rcx] + add rdi,rax + mov rax,r9 + adc rdi,rdx + + mul QWORD PTR[8+rcx] + add r10,rax + mov rax,r9 + adc rdx,0 + add r10,rdi + adc rdx,0 + mov rbp,rdx + + mul QWORD PTR[16+rcx] + add r11,rax + mov rax,r9 + adc rdx,0 + add r11,rbp + adc rdx,0 + mov rbp,rdx + + mul QWORD PTR[24+rcx] + add r12,rax + mov rax,QWORD PTR[16+rbx] + adc rdx,0 + add r12,rbp + adc rdx,0 + add r13,rdx + adc r14,0 + adc r15,0 + mov rdi,r10 + imul r10,r8 + + + mov r9,rax + mul QWORD PTR[rsi] + add r11,rax + mov rax,r9 + adc rdx,0 + mov rbp,rdx + + mul QWORD PTR[8+rsi] + add r12,rax + mov rax,r9 + adc rdx,0 + add r12,rbp + adc rdx,0 + mov rbp,rdx + + mul QWORD PTR[16+rsi] + add r13,rax + mov rax,r9 + adc rdx,0 + add r13,rbp + adc rdx,0 + mov rbp,rdx + + mul QWORD PTR[24+rsi] + add r14,rax + mov rax,r10 + adc rdx,0 + add r14,rbp + adc r15,rdx + xor r9,r9 + + + mul QWORD PTR[rcx] + add rdi,rax + mov rax,r10 + adc rdi,rdx + + mul QWORD PTR[8+rcx] + add r11,rax + mov rax,r10 + adc rdx,0 + add r11,rdi + adc rdx,0 + mov rbp,rdx + + mul QWORD PTR[16+rcx] + add r12,rax + mov rax,r10 + adc rdx,0 + add r12,rbp + adc rdx,0 + mov rbp,rdx + + mul QWORD PTR[24+rcx] + add r13,rax + mov rax,QWORD PTR[24+rbx] + adc rdx,0 + add r13,rbp + adc rdx,0 + add r14,rdx + adc r15,0 + adc r9,0 + mov rdi,r11 + imul r11,r8 + + + mov r10,rax + mul QWORD PTR[rsi] + add r12,rax + mov rax,r10 + adc rdx,0 + mov rbp,rdx + + mul QWORD PTR[8+rsi] + add r13,rax + mov rax,r10 + adc rdx,0 + add r13,rbp + adc rdx,0 + mov rbp,rdx + + mul QWORD PTR[16+rsi] + add r14,rax + mov rax,r10 + adc rdx,0 + add r14,rbp + adc rdx,0 + mov rbp,rdx + + mul QWORD PTR[24+rsi] + add r15,rax + mov rax,r11 + adc rdx,0 + add r15,rbp + adc r9,rdx + xor r10,r10 + + + mul QWORD PTR[rcx] + add rdi,rax + mov rax,r11 + adc rdi,rdx + + mul QWORD PTR[8+rcx] + add r12,rax + mov rax,r11 + adc rdx,0 + add r12,rdi + adc rdx,0 + mov rbp,rdx + + mul QWORD PTR[16+rcx] + add r13,rax + mov rax,r11 + adc rdx,0 + add r13,rbp + adc rdx,0 + mov rbp,rdx + + mul QWORD PTR[24+rcx] + add r14,rax + mov rax,r12 + adc rdx,0 + add r14,rbp + adc rdx,0 + add r15,rdx + adc r9,0 + adc r10,0 + imul rax,r8 + mov rsi,QWORD PTR[8+rsp] + + + mov r11,rax + mul QWORD PTR[rcx] + add r12,rax + mov rax,r11 + adc r12,rdx + + mul QWORD PTR[8+rcx] + add r13,rax + mov rax,r11 + adc rdx,0 + add r13,r12 + adc rdx,0 + mov rbp,rdx + + mul QWORD PTR[16+rcx] + add r14,rax + mov rax,r11 + adc rdx,0 + add r14,rbp + adc rdx,0 + mov rbp,rdx + + mul QWORD PTR[24+rcx] + mov rbx,r14 + add r15,rbp + adc rdx,0 + add r15,rax + mov rax,r13 + adc rdx,0 + add r9,rdx + adc r10,0 + + + + + mov r12,r15 + sub r13,QWORD PTR[rcx] + sbb r14,QWORD PTR[8+rcx] + sbb r15,QWORD PTR[16+rcx] + mov rbp,r9 + sbb r9,QWORD PTR[24+rcx] + sbb r10,0 + + cmovc r13,rax + cmovc r14,rbx + cmovc r15,r12 + mov QWORD PTR[rsi],r13 + cmovc r9,rbp + mov QWORD PTR[8+rsi],r14 + mov QWORD PTR[16+rsi],r15 + mov QWORD PTR[24+rsi],r9 + + DB 0F3h,0C3h ;repret + +__mulq_mont_sparse_256 ENDP +PUBLIC from_mont_256 + + +ALIGN 32 +from_mont_256 PROC PUBLIC + DB 243,15,30,250 + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov r11,rsp +$L$SEH_begin_from_mont_256:: + + + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 +ifdef __BLST_PORTABLE__ + test DWORD PTR[__blst_platform_cap],1 + jnz from_mont_256$1 +endif + push rbp + + push rbx + + push r12 + + push r13 + + push r14 + + push r15 + + sub rsp,8 + +$L$SEH_body_from_mont_256:: + + + mov rbx,rdx + call __mulq_by_1_mont_256 + + + + + + mov r10,r14 + mov r11,r15 + mov r12,r9 + + sub r13,QWORD PTR[rbx] + sbb r14,QWORD PTR[8+rbx] + sbb r15,QWORD PTR[16+rbx] + sbb r9,QWORD PTR[24+rbx] + + cmovnc rax,r13 + cmovnc r10,r14 + cmovnc r11,r15 + mov QWORD PTR[rdi],rax + cmovnc r12,r9 + mov QWORD PTR[8+rdi],r10 + mov QWORD PTR[16+rdi],r11 + mov QWORD PTR[24+rdi],r12 + + mov r15,QWORD PTR[8+rsp] + + mov r14,QWORD PTR[16+rsp] + + mov r13,QWORD PTR[24+rsp] + + mov r12,QWORD PTR[32+rsp] + + mov rbx,QWORD PTR[40+rsp] + + mov rbp,QWORD PTR[48+rsp] + + lea rsp,QWORD PTR[56+rsp] + +$L$SEH_epilogue_from_mont_256:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + + DB 0F3h,0C3h ;repret + +$L$SEH_end_from_mont_256:: +from_mont_256 ENDP + +PUBLIC redc_mont_256 + + +ALIGN 32 +redc_mont_256 PROC PUBLIC + DB 243,15,30,250 + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov r11,rsp +$L$SEH_begin_redc_mont_256:: + + + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 +ifdef __BLST_PORTABLE__ + test DWORD PTR[__blst_platform_cap],1 + jnz redc_mont_256$1 +endif + push rbp + + push rbx + + push r12 + + push r13 + + push r14 + + push r15 + + sub rsp,8 + +$L$SEH_body_redc_mont_256:: + + + mov rbx,rdx + call __mulq_by_1_mont_256 + + add r13,QWORD PTR[32+rsi] + adc r14,QWORD PTR[40+rsi] + mov rax,r13 + adc r15,QWORD PTR[48+rsi] + mov r10,r14 + adc r9,QWORD PTR[56+rsi] + sbb rsi,rsi + + + + + mov r11,r15 + sub r13,QWORD PTR[rbx] + sbb r14,QWORD PTR[8+rbx] + sbb r15,QWORD PTR[16+rbx] + mov r12,r9 + sbb r9,QWORD PTR[24+rbx] + sbb rsi,0 + + cmovnc rax,r13 + cmovnc r10,r14 + cmovnc r11,r15 + mov QWORD PTR[rdi],rax + cmovnc r12,r9 + mov QWORD PTR[8+rdi],r10 + mov QWORD PTR[16+rdi],r11 + mov QWORD PTR[24+rdi],r12 + + mov r15,QWORD PTR[8+rsp] + + mov r14,QWORD PTR[16+rsp] + + mov r13,QWORD PTR[24+rsp] + + mov r12,QWORD PTR[32+rsp] + + mov rbx,QWORD PTR[40+rsp] + + mov rbp,QWORD PTR[48+rsp] + + lea rsp,QWORD PTR[56+rsp] + +$L$SEH_epilogue_redc_mont_256:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + + DB 0F3h,0C3h ;repret + +$L$SEH_end_redc_mont_256:: +redc_mont_256 ENDP + +ALIGN 32 +__mulq_by_1_mont_256 PROC PRIVATE + DB 243,15,30,250 + + mov rax,QWORD PTR[rsi] + mov r10,QWORD PTR[8+rsi] + mov r11,QWORD PTR[16+rsi] + mov r12,QWORD PTR[24+rsi] + + mov r13,rax + imul rax,rcx + mov r9,rax + + mul QWORD PTR[rbx] + add r13,rax + mov rax,r9 + adc r13,rdx + + mul QWORD PTR[8+rbx] + add r10,rax + mov rax,r9 + adc rdx,0 + add r10,r13 + adc rdx,0 + mov r13,rdx + + mul QWORD PTR[16+rbx] + mov r14,r10 + imul r10,rcx + add r11,rax + mov rax,r9 + adc rdx,0 + add r11,r13 + adc rdx,0 + mov r13,rdx + + mul QWORD PTR[24+rbx] + add r12,rax + mov rax,r10 + adc rdx,0 + add r12,r13 + adc rdx,0 + mov r13,rdx + + mul QWORD PTR[rbx] + add r14,rax + mov rax,r10 + adc r14,rdx + + mul QWORD PTR[8+rbx] + add r11,rax + mov rax,r10 + adc rdx,0 + add r11,r14 + adc rdx,0 + mov r14,rdx + + mul QWORD PTR[16+rbx] + mov r15,r11 + imul r11,rcx + add r12,rax + mov rax,r10 + adc rdx,0 + add r12,r14 + adc rdx,0 + mov r14,rdx + + mul QWORD PTR[24+rbx] + add r13,rax + mov rax,r11 + adc rdx,0 + add r13,r14 + adc rdx,0 + mov r14,rdx + + mul QWORD PTR[rbx] + add r15,rax + mov rax,r11 + adc r15,rdx + + mul QWORD PTR[8+rbx] + add r12,rax + mov rax,r11 + adc rdx,0 + add r12,r15 + adc rdx,0 + mov r15,rdx + + mul QWORD PTR[16+rbx] + mov r9,r12 + imul r12,rcx + add r13,rax + mov rax,r11 + adc rdx,0 + add r13,r15 + adc rdx,0 + mov r15,rdx + + mul QWORD PTR[24+rbx] + add r14,rax + mov rax,r12 + adc rdx,0 + add r14,r15 + adc rdx,0 + mov r15,rdx + + mul QWORD PTR[rbx] + add r9,rax + mov rax,r12 + adc r9,rdx + + mul QWORD PTR[8+rbx] + add r13,rax + mov rax,r12 + adc rdx,0 + add r13,r9 + adc rdx,0 + mov r9,rdx + + mul QWORD PTR[16+rbx] + add r14,rax + mov rax,r12 + adc rdx,0 + add r14,r9 + adc rdx,0 + mov r9,rdx + + mul QWORD PTR[24+rbx] + add r15,rax + mov rax,r13 + adc rdx,0 + add r15,r9 + adc rdx,0 + mov r9,rdx + DB 0F3h,0C3h ;repret +__mulq_by_1_mont_256 ENDP +.text$ ENDS +.pdata SEGMENT READONLY ALIGN(4) +ALIGN 4 + DD imagerel $L$SEH_begin_mul_mont_sparse_256 + DD imagerel $L$SEH_body_mul_mont_sparse_256 + DD imagerel $L$SEH_info_mul_mont_sparse_256_prologue + + DD imagerel $L$SEH_body_mul_mont_sparse_256 + DD imagerel $L$SEH_epilogue_mul_mont_sparse_256 + DD imagerel $L$SEH_info_mul_mont_sparse_256_body + + DD imagerel $L$SEH_epilogue_mul_mont_sparse_256 + DD imagerel $L$SEH_end_mul_mont_sparse_256 + DD imagerel $L$SEH_info_mul_mont_sparse_256_epilogue + + DD imagerel $L$SEH_begin_sqr_mont_sparse_256 + DD imagerel $L$SEH_body_sqr_mont_sparse_256 + DD imagerel $L$SEH_info_sqr_mont_sparse_256_prologue + + DD imagerel $L$SEH_body_sqr_mont_sparse_256 + DD imagerel $L$SEH_epilogue_sqr_mont_sparse_256 + DD imagerel $L$SEH_info_sqr_mont_sparse_256_body + + DD imagerel $L$SEH_epilogue_sqr_mont_sparse_256 + DD imagerel $L$SEH_end_sqr_mont_sparse_256 + DD imagerel $L$SEH_info_sqr_mont_sparse_256_epilogue + + DD imagerel $L$SEH_begin_from_mont_256 + DD imagerel $L$SEH_body_from_mont_256 + DD imagerel $L$SEH_info_from_mont_256_prologue + + DD imagerel $L$SEH_body_from_mont_256 + DD imagerel $L$SEH_epilogue_from_mont_256 + DD imagerel $L$SEH_info_from_mont_256_body + + DD imagerel $L$SEH_epilogue_from_mont_256 + DD imagerel $L$SEH_end_from_mont_256 + DD imagerel $L$SEH_info_from_mont_256_epilogue + + DD imagerel $L$SEH_begin_redc_mont_256 + DD imagerel $L$SEH_body_redc_mont_256 + DD imagerel $L$SEH_info_redc_mont_256_prologue + + DD imagerel $L$SEH_body_redc_mont_256 + DD imagerel $L$SEH_epilogue_redc_mont_256 + DD imagerel $L$SEH_info_redc_mont_256_body + + DD imagerel $L$SEH_epilogue_redc_mont_256 + DD imagerel $L$SEH_end_redc_mont_256 + DD imagerel $L$SEH_info_redc_mont_256_epilogue + +.pdata ENDS +.xdata SEGMENT READONLY ALIGN(8) +ALIGN 8 +$L$SEH_info_mul_mont_sparse_256_prologue:: +DB 1,0,5,00bh +DB 0,074h,1,0 +DB 0,064h,2,0 +DB 0,0b3h +DB 0,0 + DD 0,0 +$L$SEH_info_mul_mont_sparse_256_body:: +DB 1,0,17,0 +DB 000h,0f4h,001h,000h +DB 000h,0e4h,002h,000h +DB 000h,0d4h,003h,000h +DB 000h,0c4h,004h,000h +DB 000h,034h,005h,000h +DB 000h,054h,006h,000h +DB 000h,074h,008h,000h +DB 000h,064h,009h,000h +DB 000h,062h +DB 000h,000h,000h,000h,000h,000h +DB 000h,000h,000h,000h +$L$SEH_info_mul_mont_sparse_256_epilogue:: +DB 1,0,4,0 +DB 000h,074h,001h,000h +DB 000h,064h,002h,000h +DB 000h,000h,000h,000h + +$L$SEH_info_sqr_mont_sparse_256_prologue:: +DB 1,0,5,00bh +DB 0,074h,1,0 +DB 0,064h,2,0 +DB 0,0b3h +DB 0,0 + DD 0,0 +$L$SEH_info_sqr_mont_sparse_256_body:: +DB 1,0,17,0 +DB 000h,0f4h,001h,000h +DB 000h,0e4h,002h,000h +DB 000h,0d4h,003h,000h +DB 000h,0c4h,004h,000h +DB 000h,034h,005h,000h +DB 000h,054h,006h,000h +DB 000h,074h,008h,000h +DB 000h,064h,009h,000h +DB 000h,062h +DB 000h,000h,000h,000h,000h,000h +DB 000h,000h,000h,000h +$L$SEH_info_sqr_mont_sparse_256_epilogue:: +DB 1,0,4,0 +DB 000h,074h,001h,000h +DB 000h,064h,002h,000h +DB 000h,000h,000h,000h + +$L$SEH_info_from_mont_256_prologue:: +DB 1,0,5,00bh +DB 0,074h,1,0 +DB 0,064h,2,0 +DB 0,0b3h +DB 0,0 + DD 0,0 +$L$SEH_info_from_mont_256_body:: +DB 1,0,17,0 +DB 000h,0f4h,001h,000h +DB 000h,0e4h,002h,000h +DB 000h,0d4h,003h,000h +DB 000h,0c4h,004h,000h +DB 000h,034h,005h,000h +DB 000h,054h,006h,000h +DB 000h,074h,008h,000h +DB 000h,064h,009h,000h +DB 000h,062h +DB 000h,000h,000h,000h,000h,000h +DB 000h,000h,000h,000h +$L$SEH_info_from_mont_256_epilogue:: +DB 1,0,4,0 +DB 000h,074h,001h,000h +DB 000h,064h,002h,000h +DB 000h,000h,000h,000h + +$L$SEH_info_redc_mont_256_prologue:: +DB 1,0,5,00bh +DB 0,074h,1,0 +DB 0,064h,2,0 +DB 0,0b3h +DB 0,0 + DD 0,0 +$L$SEH_info_redc_mont_256_body:: +DB 1,0,17,0 +DB 000h,0f4h,001h,000h +DB 000h,0e4h,002h,000h +DB 000h,0d4h,003h,000h +DB 000h,0c4h,004h,000h +DB 000h,034h,005h,000h +DB 000h,054h,006h,000h +DB 000h,074h,008h,000h +DB 000h,064h,009h,000h +DB 000h,062h +DB 000h,000h,000h,000h,000h,000h +DB 000h,000h,000h,000h +$L$SEH_info_redc_mont_256_epilogue:: +DB 1,0,4,0 +DB 000h,074h,001h,000h +DB 000h,064h,002h,000h +DB 000h,000h,000h,000h + + +.xdata ENDS +END diff --git a/crypto/blst_src/build/win64/mulq_mont_384-x86_64.asm b/crypto/blst_src/build/win64/mulq_mont_384-x86_64.asm new file mode 100644 index 00000000000..8563815917e --- /dev/null +++ b/crypto/blst_src/build/win64/mulq_mont_384-x86_64.asm @@ -0,0 +1,4341 @@ +OPTION DOTNAME +EXTERN mul_mont_384x$1:NEAR +EXTERN sqr_mont_384x$1:NEAR +EXTERN mul_382x$1:NEAR +EXTERN sqr_382x$1:NEAR +EXTERN mul_384$1:NEAR +EXTERN sqr_384$1:NEAR +EXTERN redc_mont_384$1:NEAR +EXTERN from_mont_384$1:NEAR +EXTERN sgn0_pty_mont_384$1:NEAR +EXTERN sgn0_pty_mont_384x$1:NEAR +EXTERN mul_mont_384$1:NEAR +EXTERN sqr_mont_384$1:NEAR +EXTERN sqr_n_mul_mont_384$1:NEAR +EXTERN sqr_n_mul_mont_383$1:NEAR +EXTERN sqr_mont_382x$1:NEAR +_DATA SEGMENT +COMM __blst_platform_cap:DWORD:1 +_DATA ENDS +.text$ SEGMENT ALIGN(256) 'CODE' + + + + + + + + +ALIGN 32 +__subq_mod_384x384 PROC PRIVATE + DB 243,15,30,250 + + mov r8,QWORD PTR[rsi] + mov r9,QWORD PTR[8+rsi] + mov r10,QWORD PTR[16+rsi] + mov r11,QWORD PTR[24+rsi] + mov r12,QWORD PTR[32+rsi] + mov r13,QWORD PTR[40+rsi] + mov r14,QWORD PTR[48+rsi] + + sub r8,QWORD PTR[rdx] + mov r15,QWORD PTR[56+rsi] + sbb r9,QWORD PTR[8+rdx] + mov rax,QWORD PTR[64+rsi] + sbb r10,QWORD PTR[16+rdx] + mov rbx,QWORD PTR[72+rsi] + sbb r11,QWORD PTR[24+rdx] + mov rbp,QWORD PTR[80+rsi] + sbb r12,QWORD PTR[32+rdx] + mov rsi,QWORD PTR[88+rsi] + sbb r13,QWORD PTR[40+rdx] + mov QWORD PTR[rdi],r8 + sbb r14,QWORD PTR[48+rdx] + mov r8,QWORD PTR[rcx] + mov QWORD PTR[8+rdi],r9 + sbb r15,QWORD PTR[56+rdx] + mov r9,QWORD PTR[8+rcx] + mov QWORD PTR[16+rdi],r10 + sbb rax,QWORD PTR[64+rdx] + mov r10,QWORD PTR[16+rcx] + mov QWORD PTR[24+rdi],r11 + sbb rbx,QWORD PTR[72+rdx] + mov r11,QWORD PTR[24+rcx] + mov QWORD PTR[32+rdi],r12 + sbb rbp,QWORD PTR[80+rdx] + mov r12,QWORD PTR[32+rcx] + mov QWORD PTR[40+rdi],r13 + sbb rsi,QWORD PTR[88+rdx] + mov r13,QWORD PTR[40+rcx] + sbb rdx,rdx + + and r8,rdx + and r9,rdx + and r10,rdx + and r11,rdx + and r12,rdx + and r13,rdx + + add r14,r8 + adc r15,r9 + mov QWORD PTR[48+rdi],r14 + adc rax,r10 + mov QWORD PTR[56+rdi],r15 + adc rbx,r11 + mov QWORD PTR[64+rdi],rax + adc rbp,r12 + mov QWORD PTR[72+rdi],rbx + adc rsi,r13 + mov QWORD PTR[80+rdi],rbp + mov QWORD PTR[88+rdi],rsi + + DB 0F3h,0C3h ;repret +__subq_mod_384x384 ENDP + + +ALIGN 32 +__addq_mod_384 PROC PRIVATE + DB 243,15,30,250 + + mov r8,QWORD PTR[rsi] + mov r9,QWORD PTR[8+rsi] + mov r10,QWORD PTR[16+rsi] + mov r11,QWORD PTR[24+rsi] + mov r12,QWORD PTR[32+rsi] + mov r13,QWORD PTR[40+rsi] + + add r8,QWORD PTR[rdx] + adc r9,QWORD PTR[8+rdx] + adc r10,QWORD PTR[16+rdx] + mov r14,r8 + adc r11,QWORD PTR[24+rdx] + mov r15,r9 + adc r12,QWORD PTR[32+rdx] + mov rax,r10 + adc r13,QWORD PTR[40+rdx] + mov rbx,r11 + sbb rdx,rdx + + sub r8,QWORD PTR[rcx] + sbb r9,QWORD PTR[8+rcx] + mov rbp,r12 + sbb r10,QWORD PTR[16+rcx] + sbb r11,QWORD PTR[24+rcx] + sbb r12,QWORD PTR[32+rcx] + mov rsi,r13 + sbb r13,QWORD PTR[40+rcx] + sbb rdx,0 + + cmovc r8,r14 + cmovc r9,r15 + cmovc r10,rax + mov QWORD PTR[rdi],r8 + cmovc r11,rbx + mov QWORD PTR[8+rdi],r9 + cmovc r12,rbp + mov QWORD PTR[16+rdi],r10 + cmovc r13,rsi + mov QWORD PTR[24+rdi],r11 + mov QWORD PTR[32+rdi],r12 + mov QWORD PTR[40+rdi],r13 + + DB 0F3h,0C3h ;repret +__addq_mod_384 ENDP + + +ALIGN 32 +__subq_mod_384 PROC PRIVATE + DB 243,15,30,250 + + mov r8,QWORD PTR[rsi] + mov r9,QWORD PTR[8+rsi] + mov r10,QWORD PTR[16+rsi] + mov r11,QWORD PTR[24+rsi] + mov r12,QWORD PTR[32+rsi] + mov r13,QWORD PTR[40+rsi] + +__subq_mod_384_a_is_loaded:: + sub r8,QWORD PTR[rdx] + mov r14,QWORD PTR[rcx] + sbb r9,QWORD PTR[8+rdx] + mov r15,QWORD PTR[8+rcx] + sbb r10,QWORD PTR[16+rdx] + mov rax,QWORD PTR[16+rcx] + sbb r11,QWORD PTR[24+rdx] + mov rbx,QWORD PTR[24+rcx] + sbb r12,QWORD PTR[32+rdx] + mov rbp,QWORD PTR[32+rcx] + sbb r13,QWORD PTR[40+rdx] + mov rsi,QWORD PTR[40+rcx] + sbb rdx,rdx + + and r14,rdx + and r15,rdx + and rax,rdx + and rbx,rdx + and rbp,rdx + and rsi,rdx + + add r8,r14 + adc r9,r15 + mov QWORD PTR[rdi],r8 + adc r10,rax + mov QWORD PTR[8+rdi],r9 + adc r11,rbx + mov QWORD PTR[16+rdi],r10 + adc r12,rbp + mov QWORD PTR[24+rdi],r11 + adc r13,rsi + mov QWORD PTR[32+rdi],r12 + mov QWORD PTR[40+rdi],r13 + + DB 0F3h,0C3h ;repret +__subq_mod_384 ENDP +PUBLIC mul_mont_384x + + +ALIGN 32 +mul_mont_384x PROC PUBLIC + DB 243,15,30,250 + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov r11,rsp +$L$SEH_begin_mul_mont_384x:: + + + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD PTR[40+rsp] +ifdef __BLST_PORTABLE__ + test DWORD PTR[__blst_platform_cap],1 + jnz mul_mont_384x$1 +endif + push rbp + + push rbx + + push r12 + + push r13 + + push r14 + + push r15 + + sub rsp,328 + +$L$SEH_body_mul_mont_384x:: + + + mov rbx,rdx + mov QWORD PTR[32+rsp],rdi + mov QWORD PTR[24+rsp],rsi + mov QWORD PTR[16+rsp],rdx + mov QWORD PTR[8+rsp],rcx + mov QWORD PTR[rsp],r8 + + + + + lea rdi,QWORD PTR[40+rsp] + call __mulq_384 + + + lea rbx,QWORD PTR[48+rbx] + lea rsi,QWORD PTR[48+rsi] + lea rdi,QWORD PTR[((40+96))+rsp] + call __mulq_384 + + + mov rcx,QWORD PTR[8+rsp] + lea rdx,QWORD PTR[((-48))+rsi] + lea rdi,QWORD PTR[((40+192+48))+rsp] + call __addq_mod_384 + + mov rsi,QWORD PTR[16+rsp] + lea rdx,QWORD PTR[48+rsi] + lea rdi,QWORD PTR[((-48))+rdi] + call __addq_mod_384 + + lea rbx,QWORD PTR[rdi] + lea rsi,QWORD PTR[48+rdi] + call __mulq_384 + + + lea rsi,QWORD PTR[rdi] + lea rdx,QWORD PTR[40+rsp] + mov rcx,QWORD PTR[8+rsp] + call __subq_mod_384x384 + + lea rsi,QWORD PTR[rdi] + lea rdx,QWORD PTR[((-96))+rdi] + call __subq_mod_384x384 + + + lea rsi,QWORD PTR[40+rsp] + lea rdx,QWORD PTR[((40+96))+rsp] + lea rdi,QWORD PTR[40+rsp] + call __subq_mod_384x384 + + mov rbx,rcx + + + lea rsi,QWORD PTR[40+rsp] + mov rcx,QWORD PTR[rsp] + mov rdi,QWORD PTR[32+rsp] + call __mulq_by_1_mont_384 + call __redq_tail_mont_384 + + + lea rsi,QWORD PTR[((40+192))+rsp] + mov rcx,QWORD PTR[rsp] + lea rdi,QWORD PTR[48+rdi] + call __mulq_by_1_mont_384 + call __redq_tail_mont_384 + + lea r8,QWORD PTR[328+rsp] + mov r15,QWORD PTR[r8] + + mov r14,QWORD PTR[8+r8] + + mov r13,QWORD PTR[16+r8] + + mov r12,QWORD PTR[24+r8] + + mov rbx,QWORD PTR[32+r8] + + mov rbp,QWORD PTR[40+r8] + + lea rsp,QWORD PTR[48+r8] + +$L$SEH_epilogue_mul_mont_384x:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + + DB 0F3h,0C3h ;repret + +$L$SEH_end_mul_mont_384x:: +mul_mont_384x ENDP +PUBLIC sqr_mont_384x + + +ALIGN 32 +sqr_mont_384x PROC PUBLIC + DB 243,15,30,250 + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov r11,rsp +$L$SEH_begin_sqr_mont_384x:: + + + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 +ifdef __BLST_PORTABLE__ + test DWORD PTR[__blst_platform_cap],1 + jnz sqr_mont_384x$1 +endif + push rbp + + push rbx + + push r12 + + push r13 + + push r14 + + push r15 + + sub rsp,136 + +$L$SEH_body_sqr_mont_384x:: + + + mov QWORD PTR[rsp],rcx + mov rcx,rdx + mov QWORD PTR[8+rsp],rdi + mov QWORD PTR[16+rsp],rsi + + + lea rdx,QWORD PTR[48+rsi] + lea rdi,QWORD PTR[32+rsp] + call __addq_mod_384 + + + mov rsi,QWORD PTR[16+rsp] + lea rdx,QWORD PTR[48+rsi] + lea rdi,QWORD PTR[((32+48))+rsp] + call __subq_mod_384 + + + mov rsi,QWORD PTR[16+rsp] + lea rbx,QWORD PTR[48+rsi] + + mov rax,QWORD PTR[48+rsi] + mov r14,QWORD PTR[rsi] + mov r15,QWORD PTR[8+rsi] + mov r12,QWORD PTR[16+rsi] + mov r13,QWORD PTR[24+rsi] + + call __mulq_mont_384 + add r14,r14 + adc r15,r15 + adc r8,r8 + mov r12,r14 + adc r9,r9 + mov r13,r15 + adc r10,r10 + mov rax,r8 + adc r11,r11 + mov rbx,r9 + sbb rdx,rdx + + sub r14,QWORD PTR[rcx] + sbb r15,QWORD PTR[8+rcx] + mov rbp,r10 + sbb r8,QWORD PTR[16+rcx] + sbb r9,QWORD PTR[24+rcx] + sbb r10,QWORD PTR[32+rcx] + mov rsi,r11 + sbb r11,QWORD PTR[40+rcx] + sbb rdx,0 + + cmovc r14,r12 + cmovc r15,r13 + cmovc r8,rax + mov QWORD PTR[48+rdi],r14 + cmovc r9,rbx + mov QWORD PTR[56+rdi],r15 + cmovc r10,rbp + mov QWORD PTR[64+rdi],r8 + cmovc r11,rsi + mov QWORD PTR[72+rdi],r9 + mov QWORD PTR[80+rdi],r10 + mov QWORD PTR[88+rdi],r11 + + lea rsi,QWORD PTR[32+rsp] + lea rbx,QWORD PTR[((32+48))+rsp] + + mov rax,QWORD PTR[((32+48))+rsp] + mov r14,QWORD PTR[((32+0))+rsp] + mov r15,QWORD PTR[((32+8))+rsp] + mov r12,QWORD PTR[((32+16))+rsp] + mov r13,QWORD PTR[((32+24))+rsp] + + call __mulq_mont_384 + + lea r8,QWORD PTR[136+rsp] + mov r15,QWORD PTR[r8] + + mov r14,QWORD PTR[8+r8] + + mov r13,QWORD PTR[16+r8] + + mov r12,QWORD PTR[24+r8] + + mov rbx,QWORD PTR[32+r8] + + mov rbp,QWORD PTR[40+r8] + + lea rsp,QWORD PTR[48+r8] + +$L$SEH_epilogue_sqr_mont_384x:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + + DB 0F3h,0C3h ;repret + +$L$SEH_end_sqr_mont_384x:: +sqr_mont_384x ENDP + +PUBLIC mul_382x + + +ALIGN 32 +mul_382x PROC PUBLIC + DB 243,15,30,250 + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov r11,rsp +$L$SEH_begin_mul_382x:: + + + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 +ifdef __BLST_PORTABLE__ + test DWORD PTR[__blst_platform_cap],1 + jnz mul_382x$1 +endif + push rbp + + push rbx + + push r12 + + push r13 + + push r14 + + push r15 + + sub rsp,136 + +$L$SEH_body_mul_382x:: + + + lea rdi,QWORD PTR[96+rdi] + mov QWORD PTR[rsp],rsi + mov QWORD PTR[8+rsp],rdx + mov QWORD PTR[16+rsp],rdi + mov QWORD PTR[24+rsp],rcx + + + mov r8,QWORD PTR[rsi] + mov r9,QWORD PTR[8+rsi] + mov r10,QWORD PTR[16+rsi] + mov r11,QWORD PTR[24+rsi] + mov r12,QWORD PTR[32+rsi] + mov r13,QWORD PTR[40+rsi] + + add r8,QWORD PTR[48+rsi] + adc r9,QWORD PTR[56+rsi] + adc r10,QWORD PTR[64+rsi] + adc r11,QWORD PTR[72+rsi] + adc r12,QWORD PTR[80+rsi] + adc r13,QWORD PTR[88+rsi] + + mov QWORD PTR[((32+0))+rsp],r8 + mov QWORD PTR[((32+8))+rsp],r9 + mov QWORD PTR[((32+16))+rsp],r10 + mov QWORD PTR[((32+24))+rsp],r11 + mov QWORD PTR[((32+32))+rsp],r12 + mov QWORD PTR[((32+40))+rsp],r13 + + + mov r8,QWORD PTR[rdx] + mov r9,QWORD PTR[8+rdx] + mov r10,QWORD PTR[16+rdx] + mov r11,QWORD PTR[24+rdx] + mov r12,QWORD PTR[32+rdx] + mov r13,QWORD PTR[40+rdx] + + add r8,QWORD PTR[48+rdx] + adc r9,QWORD PTR[56+rdx] + adc r10,QWORD PTR[64+rdx] + adc r11,QWORD PTR[72+rdx] + adc r12,QWORD PTR[80+rdx] + adc r13,QWORD PTR[88+rdx] + + mov QWORD PTR[((32+48))+rsp],r8 + mov QWORD PTR[((32+56))+rsp],r9 + mov QWORD PTR[((32+64))+rsp],r10 + mov QWORD PTR[((32+72))+rsp],r11 + mov QWORD PTR[((32+80))+rsp],r12 + mov QWORD PTR[((32+88))+rsp],r13 + + + lea rsi,QWORD PTR[((32+0))+rsp] + lea rbx,QWORD PTR[((32+48))+rsp] + call __mulq_384 + + + mov rsi,QWORD PTR[rsp] + mov rbx,QWORD PTR[8+rsp] + lea rdi,QWORD PTR[((-96))+rdi] + call __mulq_384 + + + lea rsi,QWORD PTR[48+rsi] + lea rbx,QWORD PTR[48+rbx] + lea rdi,QWORD PTR[32+rsp] + call __mulq_384 + + + mov rsi,QWORD PTR[16+rsp] + lea rdx,QWORD PTR[32+rsp] + mov rcx,QWORD PTR[24+rsp] + mov rdi,rsi + call __subq_mod_384x384 + + + lea rsi,QWORD PTR[rdi] + lea rdx,QWORD PTR[((-96))+rdi] + call __subq_mod_384x384 + + + lea rsi,QWORD PTR[((-96))+rdi] + lea rdx,QWORD PTR[32+rsp] + lea rdi,QWORD PTR[((-96))+rdi] + call __subq_mod_384x384 + + lea r8,QWORD PTR[136+rsp] + mov r15,QWORD PTR[r8] + + mov r14,QWORD PTR[8+r8] + + mov r13,QWORD PTR[16+r8] + + mov r12,QWORD PTR[24+r8] + + mov rbx,QWORD PTR[32+r8] + + mov rbp,QWORD PTR[40+r8] + + lea rsp,QWORD PTR[48+r8] + +$L$SEH_epilogue_mul_382x:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + + DB 0F3h,0C3h ;repret + +$L$SEH_end_mul_382x:: +mul_382x ENDP +PUBLIC sqr_382x + + +ALIGN 32 +sqr_382x PROC PUBLIC + DB 243,15,30,250 + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov r11,rsp +$L$SEH_begin_sqr_382x:: + + + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 +ifdef __BLST_PORTABLE__ + test DWORD PTR[__blst_platform_cap],1 + jnz sqr_382x$1 +endif + push rbp + + push rbx + + push r12 + + push r13 + + push r14 + + push r15 + + push rsi + +$L$SEH_body_sqr_382x:: + + + mov rcx,rdx + + + mov r14,QWORD PTR[rsi] + mov r15,QWORD PTR[8+rsi] + mov rax,QWORD PTR[16+rsi] + mov rbx,QWORD PTR[24+rsi] + mov rbp,QWORD PTR[32+rsi] + mov rdx,QWORD PTR[40+rsi] + + mov r8,r14 + add r14,QWORD PTR[48+rsi] + mov r9,r15 + adc r15,QWORD PTR[56+rsi] + mov r10,rax + adc rax,QWORD PTR[64+rsi] + mov r11,rbx + adc rbx,QWORD PTR[72+rsi] + mov r12,rbp + adc rbp,QWORD PTR[80+rsi] + mov r13,rdx + adc rdx,QWORD PTR[88+rsi] + + mov QWORD PTR[rdi],r14 + mov QWORD PTR[8+rdi],r15 + mov QWORD PTR[16+rdi],rax + mov QWORD PTR[24+rdi],rbx + mov QWORD PTR[32+rdi],rbp + mov QWORD PTR[40+rdi],rdx + + + lea rdx,QWORD PTR[48+rsi] + lea rdi,QWORD PTR[48+rdi] + call __subq_mod_384_a_is_loaded + + + lea rsi,QWORD PTR[rdi] + lea rbx,QWORD PTR[((-48))+rdi] + lea rdi,QWORD PTR[((-48))+rdi] + call __mulq_384 + + + mov rsi,QWORD PTR[rsp] + lea rbx,QWORD PTR[48+rsi] + lea rdi,QWORD PTR[96+rdi] + call __mulq_384 + + mov r8,QWORD PTR[rdi] + mov r9,QWORD PTR[8+rdi] + mov r10,QWORD PTR[16+rdi] + mov r11,QWORD PTR[24+rdi] + mov r12,QWORD PTR[32+rdi] + mov r13,QWORD PTR[40+rdi] + mov r14,QWORD PTR[48+rdi] + mov r15,QWORD PTR[56+rdi] + mov rax,QWORD PTR[64+rdi] + mov rbx,QWORD PTR[72+rdi] + mov rbp,QWORD PTR[80+rdi] + add r8,r8 + mov rdx,QWORD PTR[88+rdi] + adc r9,r9 + mov QWORD PTR[rdi],r8 + adc r10,r10 + mov QWORD PTR[8+rdi],r9 + adc r11,r11 + mov QWORD PTR[16+rdi],r10 + adc r12,r12 + mov QWORD PTR[24+rdi],r11 + adc r13,r13 + mov QWORD PTR[32+rdi],r12 + adc r14,r14 + mov QWORD PTR[40+rdi],r13 + adc r15,r15 + mov QWORD PTR[48+rdi],r14 + adc rax,rax + mov QWORD PTR[56+rdi],r15 + adc rbx,rbx + mov QWORD PTR[64+rdi],rax + adc rbp,rbp + mov QWORD PTR[72+rdi],rbx + adc rdx,rdx + mov QWORD PTR[80+rdi],rbp + mov QWORD PTR[88+rdi],rdx + + mov r15,QWORD PTR[8+rsp] + + mov r14,QWORD PTR[16+rsp] + + mov r13,QWORD PTR[24+rsp] + + mov r12,QWORD PTR[32+rsp] + + mov rbx,QWORD PTR[40+rsp] + + mov rbp,QWORD PTR[48+rsp] + + lea rsp,QWORD PTR[56+rsp] + +$L$SEH_epilogue_sqr_382x:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + + DB 0F3h,0C3h ;repret + +$L$SEH_end_sqr_382x:: +sqr_382x ENDP +PUBLIC mul_384 + + +ALIGN 32 +mul_384 PROC PUBLIC + DB 243,15,30,250 + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov r11,rsp +$L$SEH_begin_mul_384:: + + + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 +ifdef __BLST_PORTABLE__ + test DWORD PTR[__blst_platform_cap],1 + jnz mul_384$1 +endif + push rbp + + push rbx + + push r12 + +$L$SEH_body_mul_384:: + + + mov rbx,rdx + call __mulq_384 + + mov r12,QWORD PTR[rsp] + + mov rbx,QWORD PTR[8+rsp] + + mov rbp,QWORD PTR[16+rsp] + + lea rsp,QWORD PTR[24+rsp] + +$L$SEH_epilogue_mul_384:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + + DB 0F3h,0C3h ;repret + +$L$SEH_end_mul_384:: +mul_384 ENDP + + +ALIGN 32 +__mulq_384 PROC PRIVATE + DB 243,15,30,250 + + mov rax,QWORD PTR[rbx] + + mov rbp,rax + mul QWORD PTR[rsi] + mov QWORD PTR[rdi],rax + mov rax,rbp + mov rcx,rdx + + mul QWORD PTR[8+rsi] + add rcx,rax + mov rax,rbp + adc rdx,0 + mov r8,rdx + + mul QWORD PTR[16+rsi] + add r8,rax + mov rax,rbp + adc rdx,0 + mov r9,rdx + + mul QWORD PTR[24+rsi] + add r9,rax + mov rax,rbp + adc rdx,0 + mov r10,rdx + + mul QWORD PTR[32+rsi] + add r10,rax + mov rax,rbp + adc rdx,0 + mov r11,rdx + + mul QWORD PTR[40+rsi] + add r11,rax + mov rax,QWORD PTR[8+rbx] + adc rdx,0 + mov r12,rdx + mov rbp,rax + mul QWORD PTR[rsi] + add rcx,rax + mov rax,rbp + adc rdx,0 + mov QWORD PTR[8+rdi],rcx + mov rcx,rdx + + mul QWORD PTR[8+rsi] + add r8,rax + mov rax,rbp + adc rdx,0 + add rcx,r8 + adc rdx,0 + mov r8,rdx + + mul QWORD PTR[16+rsi] + add r9,rax + mov rax,rbp + adc rdx,0 + add r8,r9 + adc rdx,0 + mov r9,rdx + + mul QWORD PTR[24+rsi] + add r10,rax + mov rax,rbp + adc rdx,0 + add r9,r10 + adc rdx,0 + mov r10,rdx + + mul QWORD PTR[32+rsi] + add r11,rax + mov rax,rbp + adc rdx,0 + add r10,r11 + adc rdx,0 + mov r11,rdx + + mul QWORD PTR[40+rsi] + add r12,rax + mov rax,QWORD PTR[16+rbx] + adc rdx,0 + add r11,r12 + adc rdx,0 + mov r12,rdx + mov rbp,rax + mul QWORD PTR[rsi] + add rcx,rax + mov rax,rbp + adc rdx,0 + mov QWORD PTR[16+rdi],rcx + mov rcx,rdx + + mul QWORD PTR[8+rsi] + add r8,rax + mov rax,rbp + adc rdx,0 + add rcx,r8 + adc rdx,0 + mov r8,rdx + + mul QWORD PTR[16+rsi] + add r9,rax + mov rax,rbp + adc rdx,0 + add r8,r9 + adc rdx,0 + mov r9,rdx + + mul QWORD PTR[24+rsi] + add r10,rax + mov rax,rbp + adc rdx,0 + add r9,r10 + adc rdx,0 + mov r10,rdx + + mul QWORD PTR[32+rsi] + add r11,rax + mov rax,rbp + adc rdx,0 + add r10,r11 + adc rdx,0 + mov r11,rdx + + mul QWORD PTR[40+rsi] + add r12,rax + mov rax,QWORD PTR[24+rbx] + adc rdx,0 + add r11,r12 + adc rdx,0 + mov r12,rdx + mov rbp,rax + mul QWORD PTR[rsi] + add rcx,rax + mov rax,rbp + adc rdx,0 + mov QWORD PTR[24+rdi],rcx + mov rcx,rdx + + mul QWORD PTR[8+rsi] + add r8,rax + mov rax,rbp + adc rdx,0 + add rcx,r8 + adc rdx,0 + mov r8,rdx + + mul QWORD PTR[16+rsi] + add r9,rax + mov rax,rbp + adc rdx,0 + add r8,r9 + adc rdx,0 + mov r9,rdx + + mul QWORD PTR[24+rsi] + add r10,rax + mov rax,rbp + adc rdx,0 + add r9,r10 + adc rdx,0 + mov r10,rdx + + mul QWORD PTR[32+rsi] + add r11,rax + mov rax,rbp + adc rdx,0 + add r10,r11 + adc rdx,0 + mov r11,rdx + + mul QWORD PTR[40+rsi] + add r12,rax + mov rax,QWORD PTR[32+rbx] + adc rdx,0 + add r11,r12 + adc rdx,0 + mov r12,rdx + mov rbp,rax + mul QWORD PTR[rsi] + add rcx,rax + mov rax,rbp + adc rdx,0 + mov QWORD PTR[32+rdi],rcx + mov rcx,rdx + + mul QWORD PTR[8+rsi] + add r8,rax + mov rax,rbp + adc rdx,0 + add rcx,r8 + adc rdx,0 + mov r8,rdx + + mul QWORD PTR[16+rsi] + add r9,rax + mov rax,rbp + adc rdx,0 + add r8,r9 + adc rdx,0 + mov r9,rdx + + mul QWORD PTR[24+rsi] + add r10,rax + mov rax,rbp + adc rdx,0 + add r9,r10 + adc rdx,0 + mov r10,rdx + + mul QWORD PTR[32+rsi] + add r11,rax + mov rax,rbp + adc rdx,0 + add r10,r11 + adc rdx,0 + mov r11,rdx + + mul QWORD PTR[40+rsi] + add r12,rax + mov rax,QWORD PTR[40+rbx] + adc rdx,0 + add r11,r12 + adc rdx,0 + mov r12,rdx + mov rbp,rax + mul QWORD PTR[rsi] + add rcx,rax + mov rax,rbp + adc rdx,0 + mov QWORD PTR[40+rdi],rcx + mov rcx,rdx + + mul QWORD PTR[8+rsi] + add r8,rax + mov rax,rbp + adc rdx,0 + add rcx,r8 + adc rdx,0 + mov r8,rdx + + mul QWORD PTR[16+rsi] + add r9,rax + mov rax,rbp + adc rdx,0 + add r8,r9 + adc rdx,0 + mov r9,rdx + + mul QWORD PTR[24+rsi] + add r10,rax + mov rax,rbp + adc rdx,0 + add r9,r10 + adc rdx,0 + mov r10,rdx + + mul QWORD PTR[32+rsi] + add r11,rax + mov rax,rbp + adc rdx,0 + add r10,r11 + adc rdx,0 + mov r11,rdx + + mul QWORD PTR[40+rsi] + add r12,rax + mov rax,rax + adc rdx,0 + add r11,r12 + adc rdx,0 + mov r12,rdx + mov QWORD PTR[48+rdi],rcx + mov QWORD PTR[56+rdi],r8 + mov QWORD PTR[64+rdi],r9 + mov QWORD PTR[72+rdi],r10 + mov QWORD PTR[80+rdi],r11 + mov QWORD PTR[88+rdi],r12 + + DB 0F3h,0C3h ;repret +__mulq_384 ENDP +PUBLIC sqr_384 + + +ALIGN 32 +sqr_384 PROC PUBLIC + DB 243,15,30,250 + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov r11,rsp +$L$SEH_begin_sqr_384:: + + + mov rdi,rcx + mov rsi,rdx +ifdef __BLST_PORTABLE__ + test DWORD PTR[__blst_platform_cap],1 + jnz sqr_384$1 +endif + push rbp + + push rbx + + push r12 + + push r13 + + push r14 + + push r15 + + sub rsp,8 + +$L$SEH_body_sqr_384:: + + + call __sqrq_384 + + mov r15,QWORD PTR[8+rsp] + + mov r14,QWORD PTR[16+rsp] + + mov r13,QWORD PTR[24+rsp] + + mov r12,QWORD PTR[32+rsp] + + mov rbx,QWORD PTR[40+rsp] + + mov rbp,QWORD PTR[48+rsp] + + lea rsp,QWORD PTR[56+rsp] + +$L$SEH_epilogue_sqr_384:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + + DB 0F3h,0C3h ;repret + +$L$SEH_end_sqr_384:: +sqr_384 ENDP + + +ALIGN 32 +__sqrq_384 PROC PRIVATE + DB 243,15,30,250 + + mov rax,QWORD PTR[rsi] + mov r15,QWORD PTR[8+rsi] + mov rcx,QWORD PTR[16+rsi] + mov rbx,QWORD PTR[24+rsi] + + + mov r14,rax + mul r15 + mov r9,rax + mov rax,r14 + mov rbp,QWORD PTR[32+rsi] + mov r10,rdx + + mul rcx + add r10,rax + mov rax,r14 + adc rdx,0 + mov rsi,QWORD PTR[40+rsi] + mov r11,rdx + + mul rbx + add r11,rax + mov rax,r14 + adc rdx,0 + mov r12,rdx + + mul rbp + add r12,rax + mov rax,r14 + adc rdx,0 + mov r13,rdx + + mul rsi + add r13,rax + mov rax,r14 + adc rdx,0 + mov r14,rdx + + mul rax + xor r8,r8 + mov QWORD PTR[rdi],rax + mov rax,r15 + add r9,r9 + adc r8,0 + add r9,rdx + adc r8,0 + mov QWORD PTR[8+rdi],r9 + + mul rcx + add r11,rax + mov rax,r15 + adc rdx,0 + mov r9,rdx + + mul rbx + add r12,rax + mov rax,r15 + adc rdx,0 + add r12,r9 + adc rdx,0 + mov r9,rdx + + mul rbp + add r13,rax + mov rax,r15 + adc rdx,0 + add r13,r9 + adc rdx,0 + mov r9,rdx + + mul rsi + add r14,rax + mov rax,r15 + adc rdx,0 + add r14,r9 + adc rdx,0 + mov r15,rdx + + mul rax + xor r9,r9 + add r8,rax + mov rax,rcx + add r10,r10 + adc r11,r11 + adc r9,0 + add r10,r8 + adc r11,rdx + adc r9,0 + mov QWORD PTR[16+rdi],r10 + + mul rbx + add r13,rax + mov rax,rcx + adc rdx,0 + mov QWORD PTR[24+rdi],r11 + mov r8,rdx + + mul rbp + add r14,rax + mov rax,rcx + adc rdx,0 + add r14,r8 + adc rdx,0 + mov r8,rdx + + mul rsi + add r15,rax + mov rax,rcx + adc rdx,0 + add r15,r8 + adc rdx,0 + mov rcx,rdx + + mul rax + xor r11,r11 + add r9,rax + mov rax,rbx + add r12,r12 + adc r13,r13 + adc r11,0 + add r12,r9 + adc r13,rdx + adc r11,0 + mov QWORD PTR[32+rdi],r12 + + + mul rbp + add r15,rax + mov rax,rbx + adc rdx,0 + mov QWORD PTR[40+rdi],r13 + mov r8,rdx + + mul rsi + add rcx,rax + mov rax,rbx + adc rdx,0 + add rcx,r8 + adc rdx,0 + mov rbx,rdx + + mul rax + xor r12,r12 + add r11,rax + mov rax,rbp + add r14,r14 + adc r15,r15 + adc r12,0 + add r14,r11 + adc r15,rdx + mov QWORD PTR[48+rdi],r14 + adc r12,0 + mov QWORD PTR[56+rdi],r15 + + + mul rsi + add rbx,rax + mov rax,rbp + adc rdx,0 + mov rbp,rdx + + mul rax + xor r13,r13 + add r12,rax + mov rax,rsi + add rcx,rcx + adc rbx,rbx + adc r13,0 + add rcx,r12 + adc rbx,rdx + mov QWORD PTR[64+rdi],rcx + adc r13,0 + mov QWORD PTR[72+rdi],rbx + + + mul rax + add rax,r13 + add rbp,rbp + adc rdx,0 + add rax,rbp + adc rdx,0 + mov QWORD PTR[80+rdi],rax + mov QWORD PTR[88+rdi],rdx + + DB 0F3h,0C3h ;repret +__sqrq_384 ENDP + +PUBLIC sqr_mont_384 + + +ALIGN 32 +sqr_mont_384 PROC PUBLIC + DB 243,15,30,250 + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov r11,rsp +$L$SEH_begin_sqr_mont_384:: + + + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 +ifdef __BLST_PORTABLE__ + test DWORD PTR[__blst_platform_cap],1 + jnz sqr_mont_384$1 +endif + push rbp + + push rbx + + push r12 + + push r13 + + push r14 + + push r15 + + sub rsp,8*15 + +$L$SEH_body_sqr_mont_384:: + + + mov QWORD PTR[96+rsp],rcx + mov QWORD PTR[104+rsp],rdx + mov QWORD PTR[112+rsp],rdi + + mov rdi,rsp + call __sqrq_384 + + lea rsi,QWORD PTR[rsp] + mov rcx,QWORD PTR[96+rsp] + mov rbx,QWORD PTR[104+rsp] + mov rdi,QWORD PTR[112+rsp] + call __mulq_by_1_mont_384 + call __redq_tail_mont_384 + + lea r8,QWORD PTR[120+rsp] + mov r15,QWORD PTR[120+rsp] + + mov r14,QWORD PTR[8+r8] + + mov r13,QWORD PTR[16+r8] + + mov r12,QWORD PTR[24+r8] + + mov rbx,QWORD PTR[32+r8] + + mov rbp,QWORD PTR[40+r8] + + lea rsp,QWORD PTR[48+r8] + +$L$SEH_epilogue_sqr_mont_384:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + + DB 0F3h,0C3h ;repret + +$L$SEH_end_sqr_mont_384:: +sqr_mont_384 ENDP + + + +PUBLIC redc_mont_384 + + +ALIGN 32 +redc_mont_384 PROC PUBLIC + DB 243,15,30,250 + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov r11,rsp +$L$SEH_begin_redc_mont_384:: + + + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 +ifdef __BLST_PORTABLE__ + test DWORD PTR[__blst_platform_cap],1 + jnz redc_mont_384$1 +endif + push rbp + + push rbx + + push r12 + + push r13 + + push r14 + + push r15 + + sub rsp,8 + +$L$SEH_body_redc_mont_384:: + + + mov rbx,rdx + call __mulq_by_1_mont_384 + call __redq_tail_mont_384 + + mov r15,QWORD PTR[8+rsp] + + mov r14,QWORD PTR[16+rsp] + + mov r13,QWORD PTR[24+rsp] + + mov r12,QWORD PTR[32+rsp] + + mov rbx,QWORD PTR[40+rsp] + + mov rbp,QWORD PTR[48+rsp] + + lea rsp,QWORD PTR[56+rsp] + +$L$SEH_epilogue_redc_mont_384:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + + DB 0F3h,0C3h ;repret + +$L$SEH_end_redc_mont_384:: +redc_mont_384 ENDP + + + + +PUBLIC from_mont_384 + + +ALIGN 32 +from_mont_384 PROC PUBLIC + DB 243,15,30,250 + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov r11,rsp +$L$SEH_begin_from_mont_384:: + + + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 +ifdef __BLST_PORTABLE__ + test DWORD PTR[__blst_platform_cap],1 + jnz from_mont_384$1 +endif + push rbp + + push rbx + + push r12 + + push r13 + + push r14 + + push r15 + + sub rsp,8 + +$L$SEH_body_from_mont_384:: + + + mov rbx,rdx + call __mulq_by_1_mont_384 + + + + + + mov rcx,r15 + mov rdx,r8 + mov rbp,r9 + + sub r14,QWORD PTR[rbx] + sbb r15,QWORD PTR[8+rbx] + mov r13,r10 + sbb r8,QWORD PTR[16+rbx] + sbb r9,QWORD PTR[24+rbx] + sbb r10,QWORD PTR[32+rbx] + mov rsi,r11 + sbb r11,QWORD PTR[40+rbx] + + cmovc r14,rax + cmovc r15,rcx + cmovc r8,rdx + mov QWORD PTR[rdi],r14 + cmovc r9,rbp + mov QWORD PTR[8+rdi],r15 + cmovc r10,r13 + mov QWORD PTR[16+rdi],r8 + cmovc r11,rsi + mov QWORD PTR[24+rdi],r9 + mov QWORD PTR[32+rdi],r10 + mov QWORD PTR[40+rdi],r11 + + mov r15,QWORD PTR[8+rsp] + + mov r14,QWORD PTR[16+rsp] + + mov r13,QWORD PTR[24+rsp] + + mov r12,QWORD PTR[32+rsp] + + mov rbx,QWORD PTR[40+rsp] + + mov rbp,QWORD PTR[48+rsp] + + lea rsp,QWORD PTR[56+rsp] + +$L$SEH_epilogue_from_mont_384:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + + DB 0F3h,0C3h ;repret + +$L$SEH_end_from_mont_384:: +from_mont_384 ENDP + +ALIGN 32 +__mulq_by_1_mont_384 PROC PRIVATE + DB 243,15,30,250 + + mov rax,QWORD PTR[rsi] + mov r9,QWORD PTR[8+rsi] + mov r10,QWORD PTR[16+rsi] + mov r11,QWORD PTR[24+rsi] + mov r12,QWORD PTR[32+rsi] + mov r13,QWORD PTR[40+rsi] + + mov r14,rax + imul rax,rcx + mov r8,rax + + mul QWORD PTR[rbx] + add r14,rax + mov rax,r8 + adc r14,rdx + + mul QWORD PTR[8+rbx] + add r9,rax + mov rax,r8 + adc rdx,0 + add r9,r14 + adc rdx,0 + mov r14,rdx + + mul QWORD PTR[16+rbx] + add r10,rax + mov rax,r8 + adc rdx,0 + add r10,r14 + adc rdx,0 + mov r14,rdx + + mul QWORD PTR[24+rbx] + add r11,rax + mov rax,r8 + adc rdx,0 + mov r15,r9 + imul r9,rcx + add r11,r14 + adc rdx,0 + mov r14,rdx + + mul QWORD PTR[32+rbx] + add r12,rax + mov rax,r8 + adc rdx,0 + add r12,r14 + adc rdx,0 + mov r14,rdx + + mul QWORD PTR[40+rbx] + add r13,rax + mov rax,r9 + adc rdx,0 + add r13,r14 + adc rdx,0 + mov r14,rdx + + mul QWORD PTR[rbx] + add r15,rax + mov rax,r9 + adc r15,rdx + + mul QWORD PTR[8+rbx] + add r10,rax + mov rax,r9 + adc rdx,0 + add r10,r15 + adc rdx,0 + mov r15,rdx + + mul QWORD PTR[16+rbx] + add r11,rax + mov rax,r9 + adc rdx,0 + add r11,r15 + adc rdx,0 + mov r15,rdx + + mul QWORD PTR[24+rbx] + add r12,rax + mov rax,r9 + adc rdx,0 + mov r8,r10 + imul r10,rcx + add r12,r15 + adc rdx,0 + mov r15,rdx + + mul QWORD PTR[32+rbx] + add r13,rax + mov rax,r9 + adc rdx,0 + add r13,r15 + adc rdx,0 + mov r15,rdx + + mul QWORD PTR[40+rbx] + add r14,rax + mov rax,r10 + adc rdx,0 + add r14,r15 + adc rdx,0 + mov r15,rdx + + mul QWORD PTR[rbx] + add r8,rax + mov rax,r10 + adc r8,rdx + + mul QWORD PTR[8+rbx] + add r11,rax + mov rax,r10 + adc rdx,0 + add r11,r8 + adc rdx,0 + mov r8,rdx + + mul QWORD PTR[16+rbx] + add r12,rax + mov rax,r10 + adc rdx,0 + add r12,r8 + adc rdx,0 + mov r8,rdx + + mul QWORD PTR[24+rbx] + add r13,rax + mov rax,r10 + adc rdx,0 + mov r9,r11 + imul r11,rcx + add r13,r8 + adc rdx,0 + mov r8,rdx + + mul QWORD PTR[32+rbx] + add r14,rax + mov rax,r10 + adc rdx,0 + add r14,r8 + adc rdx,0 + mov r8,rdx + + mul QWORD PTR[40+rbx] + add r15,rax + mov rax,r11 + adc rdx,0 + add r15,r8 + adc rdx,0 + mov r8,rdx + + mul QWORD PTR[rbx] + add r9,rax + mov rax,r11 + adc r9,rdx + + mul QWORD PTR[8+rbx] + add r12,rax + mov rax,r11 + adc rdx,0 + add r12,r9 + adc rdx,0 + mov r9,rdx + + mul QWORD PTR[16+rbx] + add r13,rax + mov rax,r11 + adc rdx,0 + add r13,r9 + adc rdx,0 + mov r9,rdx + + mul QWORD PTR[24+rbx] + add r14,rax + mov rax,r11 + adc rdx,0 + mov r10,r12 + imul r12,rcx + add r14,r9 + adc rdx,0 + mov r9,rdx + + mul QWORD PTR[32+rbx] + add r15,rax + mov rax,r11 + adc rdx,0 + add r15,r9 + adc rdx,0 + mov r9,rdx + + mul QWORD PTR[40+rbx] + add r8,rax + mov rax,r12 + adc rdx,0 + add r8,r9 + adc rdx,0 + mov r9,rdx + + mul QWORD PTR[rbx] + add r10,rax + mov rax,r12 + adc r10,rdx + + mul QWORD PTR[8+rbx] + add r13,rax + mov rax,r12 + adc rdx,0 + add r13,r10 + adc rdx,0 + mov r10,rdx + + mul QWORD PTR[16+rbx] + add r14,rax + mov rax,r12 + adc rdx,0 + add r14,r10 + adc rdx,0 + mov r10,rdx + + mul QWORD PTR[24+rbx] + add r15,rax + mov rax,r12 + adc rdx,0 + mov r11,r13 + imul r13,rcx + add r15,r10 + adc rdx,0 + mov r10,rdx + + mul QWORD PTR[32+rbx] + add r8,rax + mov rax,r12 + adc rdx,0 + add r8,r10 + adc rdx,0 + mov r10,rdx + + mul QWORD PTR[40+rbx] + add r9,rax + mov rax,r13 + adc rdx,0 + add r9,r10 + adc rdx,0 + mov r10,rdx + + mul QWORD PTR[rbx] + add r11,rax + mov rax,r13 + adc r11,rdx + + mul QWORD PTR[8+rbx] + add r14,rax + mov rax,r13 + adc rdx,0 + add r14,r11 + adc rdx,0 + mov r11,rdx + + mul QWORD PTR[16+rbx] + add r15,rax + mov rax,r13 + adc rdx,0 + add r15,r11 + adc rdx,0 + mov r11,rdx + + mul QWORD PTR[24+rbx] + add r8,rax + mov rax,r13 + adc rdx,0 + add r8,r11 + adc rdx,0 + mov r11,rdx + + mul QWORD PTR[32+rbx] + add r9,rax + mov rax,r13 + adc rdx,0 + add r9,r11 + adc rdx,0 + mov r11,rdx + + mul QWORD PTR[40+rbx] + add r10,rax + mov rax,r14 + adc rdx,0 + add r10,r11 + adc rdx,0 + mov r11,rdx + DB 0F3h,0C3h ;repret +__mulq_by_1_mont_384 ENDP + + +ALIGN 32 +__redq_tail_mont_384 PROC PRIVATE + DB 243,15,30,250 + + add r14,QWORD PTR[48+rsi] + mov rax,r14 + adc r15,QWORD PTR[56+rsi] + adc r8,QWORD PTR[64+rsi] + adc r9,QWORD PTR[72+rsi] + mov rcx,r15 + adc r10,QWORD PTR[80+rsi] + adc r11,QWORD PTR[88+rsi] + sbb r12,r12 + + + + + mov rdx,r8 + mov rbp,r9 + + sub r14,QWORD PTR[rbx] + sbb r15,QWORD PTR[8+rbx] + mov r13,r10 + sbb r8,QWORD PTR[16+rbx] + sbb r9,QWORD PTR[24+rbx] + sbb r10,QWORD PTR[32+rbx] + mov rsi,r11 + sbb r11,QWORD PTR[40+rbx] + sbb r12,0 + + cmovc r14,rax + cmovc r15,rcx + cmovc r8,rdx + mov QWORD PTR[rdi],r14 + cmovc r9,rbp + mov QWORD PTR[8+rdi],r15 + cmovc r10,r13 + mov QWORD PTR[16+rdi],r8 + cmovc r11,rsi + mov QWORD PTR[24+rdi],r9 + mov QWORD PTR[32+rdi],r10 + mov QWORD PTR[40+rdi],r11 + + DB 0F3h,0C3h ;repret +__redq_tail_mont_384 ENDP + +PUBLIC sgn0_pty_mont_384 + + +ALIGN 32 +sgn0_pty_mont_384 PROC PUBLIC + DB 243,15,30,250 + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov r11,rsp +$L$SEH_begin_sgn0_pty_mont_384:: + + + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 +ifdef __BLST_PORTABLE__ + test DWORD PTR[__blst_platform_cap],1 + jnz sgn0_pty_mont_384$1 +endif + push rbp + + push rbx + + push r12 + + push r13 + + push r14 + + push r15 + + sub rsp,8 + +$L$SEH_body_sgn0_pty_mont_384:: + + + mov rbx,rsi + lea rsi,QWORD PTR[rdi] + mov rcx,rdx + call __mulq_by_1_mont_384 + + xor rax,rax + mov r13,r14 + add r14,r14 + adc r15,r15 + adc r8,r8 + adc r9,r9 + adc r10,r10 + adc r11,r11 + adc rax,0 + + sub r14,QWORD PTR[rbx] + sbb r15,QWORD PTR[8+rbx] + sbb r8,QWORD PTR[16+rbx] + sbb r9,QWORD PTR[24+rbx] + sbb r10,QWORD PTR[32+rbx] + sbb r11,QWORD PTR[40+rbx] + sbb rax,0 + + not rax + and r13,1 + and rax,2 + or rax,r13 + + mov r15,QWORD PTR[8+rsp] + + mov r14,QWORD PTR[16+rsp] + + mov r13,QWORD PTR[24+rsp] + + mov r12,QWORD PTR[32+rsp] + + mov rbx,QWORD PTR[40+rsp] + + mov rbp,QWORD PTR[48+rsp] + + lea rsp,QWORD PTR[56+rsp] + +$L$SEH_epilogue_sgn0_pty_mont_384:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + + DB 0F3h,0C3h ;repret + +$L$SEH_end_sgn0_pty_mont_384:: +sgn0_pty_mont_384 ENDP + +PUBLIC sgn0_pty_mont_384x + + +ALIGN 32 +sgn0_pty_mont_384x PROC PUBLIC + DB 243,15,30,250 + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov r11,rsp +$L$SEH_begin_sgn0_pty_mont_384x:: + + + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 +ifdef __BLST_PORTABLE__ + test DWORD PTR[__blst_platform_cap],1 + jnz sgn0_pty_mont_384x$1 +endif + push rbp + + push rbx + + push r12 + + push r13 + + push r14 + + push r15 + + sub rsp,8 + +$L$SEH_body_sgn0_pty_mont_384x:: + + + mov rbx,rsi + lea rsi,QWORD PTR[48+rdi] + mov rcx,rdx + call __mulq_by_1_mont_384 + + mov r12,r14 + or r14,r15 + or r14,r8 + or r14,r9 + or r14,r10 + or r14,r11 + + lea rsi,QWORD PTR[rdi] + xor rdi,rdi + mov r13,r12 + add r12,r12 + adc r15,r15 + adc r8,r8 + adc r9,r9 + adc r10,r10 + adc r11,r11 + adc rdi,0 + + sub r12,QWORD PTR[rbx] + sbb r15,QWORD PTR[8+rbx] + sbb r8,QWORD PTR[16+rbx] + sbb r9,QWORD PTR[24+rbx] + sbb r10,QWORD PTR[32+rbx] + sbb r11,QWORD PTR[40+rbx] + sbb rdi,0 + + mov QWORD PTR[rsp],r14 + not rdi + and r13,1 + and rdi,2 + or rdi,r13 + + call __mulq_by_1_mont_384 + + mov r12,r14 + or r14,r15 + or r14,r8 + or r14,r9 + or r14,r10 + or r14,r11 + + xor rax,rax + mov r13,r12 + add r12,r12 + adc r15,r15 + adc r8,r8 + adc r9,r9 + adc r10,r10 + adc r11,r11 + adc rax,0 + + sub r12,QWORD PTR[rbx] + sbb r15,QWORD PTR[8+rbx] + sbb r8,QWORD PTR[16+rbx] + sbb r9,QWORD PTR[24+rbx] + sbb r10,QWORD PTR[32+rbx] + sbb r11,QWORD PTR[40+rbx] + sbb rax,0 + + mov r12,QWORD PTR[rsp] + + not rax + + test r14,r14 + cmovz r13,rdi + + test r12,r12 + cmovnz rax,rdi + + and r13,1 + and rax,2 + or rax,r13 + + mov r15,QWORD PTR[8+rsp] + + mov r14,QWORD PTR[16+rsp] + + mov r13,QWORD PTR[24+rsp] + + mov r12,QWORD PTR[32+rsp] + + mov rbx,QWORD PTR[40+rsp] + + mov rbp,QWORD PTR[48+rsp] + + lea rsp,QWORD PTR[56+rsp] + +$L$SEH_epilogue_sgn0_pty_mont_384x:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + + DB 0F3h,0C3h ;repret + +$L$SEH_end_sgn0_pty_mont_384x:: +sgn0_pty_mont_384x ENDP +PUBLIC mul_mont_384 + + +ALIGN 32 +mul_mont_384 PROC PUBLIC + DB 243,15,30,250 + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov r11,rsp +$L$SEH_begin_mul_mont_384:: + + + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD PTR[40+rsp] +ifdef __BLST_PORTABLE__ + test DWORD PTR[__blst_platform_cap],1 + jnz mul_mont_384$1 +endif + push rbp + + push rbx + + push r12 + + push r13 + + push r14 + + push r15 + + sub rsp,8*3 + +$L$SEH_body_mul_mont_384:: + + + mov rax,QWORD PTR[rdx] + mov r14,QWORD PTR[rsi] + mov r15,QWORD PTR[8+rsi] + mov r12,QWORD PTR[16+rsi] + mov r13,QWORD PTR[24+rsi] + mov rbx,rdx + mov QWORD PTR[rsp],r8 + mov QWORD PTR[8+rsp],rdi + + call __mulq_mont_384 + + mov r15,QWORD PTR[24+rsp] + + mov r14,QWORD PTR[32+rsp] + + mov r13,QWORD PTR[40+rsp] + + mov r12,QWORD PTR[48+rsp] + + mov rbx,QWORD PTR[56+rsp] + + mov rbp,QWORD PTR[64+rsp] + + lea rsp,QWORD PTR[72+rsp] + +$L$SEH_epilogue_mul_mont_384:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + + DB 0F3h,0C3h ;repret + +$L$SEH_end_mul_mont_384:: +mul_mont_384 ENDP + +ALIGN 32 +__mulq_mont_384 PROC PRIVATE + DB 243,15,30,250 + + mov rdi,rax + mul r14 + mov r8,rax + mov rax,rdi + mov r9,rdx + + mul r15 + add r9,rax + mov rax,rdi + adc rdx,0 + mov r10,rdx + + mul r12 + add r10,rax + mov rax,rdi + adc rdx,0 + mov r11,rdx + + mov rbp,r8 + imul r8,QWORD PTR[8+rsp] + + mul r13 + add r11,rax + mov rax,rdi + adc rdx,0 + mov r12,rdx + + mul QWORD PTR[32+rsi] + add r12,rax + mov rax,rdi + adc rdx,0 + mov r13,rdx + + mul QWORD PTR[40+rsi] + add r13,rax + mov rax,r8 + adc rdx,0 + xor r15,r15 + mov r14,rdx + + mul QWORD PTR[rcx] + add rbp,rax + mov rax,r8 + adc rbp,rdx + + mul QWORD PTR[8+rcx] + add r9,rax + mov rax,r8 + adc rdx,0 + add r9,rbp + adc rdx,0 + mov rbp,rdx + + mul QWORD PTR[16+rcx] + add r10,rax + mov rax,r8 + adc rdx,0 + add r10,rbp + adc rdx,0 + mov rbp,rdx + + mul QWORD PTR[24+rcx] + add r11,rbp + adc rdx,0 + add r11,rax + mov rax,r8 + adc rdx,0 + mov rbp,rdx + + mul QWORD PTR[32+rcx] + add r12,rax + mov rax,r8 + adc rdx,0 + add r12,rbp + adc rdx,0 + mov rbp,rdx + + mul QWORD PTR[40+rcx] + add r13,rax + mov rax,QWORD PTR[8+rbx] + adc rdx,0 + add r13,rbp + adc r14,rdx + adc r15,0 + + mov rdi,rax + mul QWORD PTR[rsi] + add r9,rax + mov rax,rdi + adc rdx,0 + mov r8,rdx + + mul QWORD PTR[8+rsi] + add r10,rax + mov rax,rdi + adc rdx,0 + add r10,r8 + adc rdx,0 + mov r8,rdx + + mul QWORD PTR[16+rsi] + add r11,rax + mov rax,rdi + adc rdx,0 + add r11,r8 + adc rdx,0 + mov r8,rdx + + mov rbp,r9 + imul r9,QWORD PTR[8+rsp] + + mul QWORD PTR[24+rsi] + add r12,rax + mov rax,rdi + adc rdx,0 + add r12,r8 + adc rdx,0 + mov r8,rdx + + mul QWORD PTR[32+rsi] + add r13,rax + mov rax,rdi + adc rdx,0 + add r13,r8 + adc rdx,0 + mov r8,rdx + + mul QWORD PTR[40+rsi] + add r14,r8 + adc rdx,0 + xor r8,r8 + add r14,rax + mov rax,r9 + adc r15,rdx + adc r8,0 + + mul QWORD PTR[rcx] + add rbp,rax + mov rax,r9 + adc rbp,rdx + + mul QWORD PTR[8+rcx] + add r10,rax + mov rax,r9 + adc rdx,0 + add r10,rbp + adc rdx,0 + mov rbp,rdx + + mul QWORD PTR[16+rcx] + add r11,rax + mov rax,r9 + adc rdx,0 + add r11,rbp + adc rdx,0 + mov rbp,rdx + + mul QWORD PTR[24+rcx] + add r12,rbp + adc rdx,0 + add r12,rax + mov rax,r9 + adc rdx,0 + mov rbp,rdx + + mul QWORD PTR[32+rcx] + add r13,rax + mov rax,r9 + adc rdx,0 + add r13,rbp + adc rdx,0 + mov rbp,rdx + + mul QWORD PTR[40+rcx] + add r14,rax + mov rax,QWORD PTR[16+rbx] + adc rdx,0 + add r14,rbp + adc r15,rdx + adc r8,0 + + mov rdi,rax + mul QWORD PTR[rsi] + add r10,rax + mov rax,rdi + adc rdx,0 + mov r9,rdx + + mul QWORD PTR[8+rsi] + add r11,rax + mov rax,rdi + adc rdx,0 + add r11,r9 + adc rdx,0 + mov r9,rdx + + mul QWORD PTR[16+rsi] + add r12,rax + mov rax,rdi + adc rdx,0 + add r12,r9 + adc rdx,0 + mov r9,rdx + + mov rbp,r10 + imul r10,QWORD PTR[8+rsp] + + mul QWORD PTR[24+rsi] + add r13,rax + mov rax,rdi + adc rdx,0 + add r13,r9 + adc rdx,0 + mov r9,rdx + + mul QWORD PTR[32+rsi] + add r14,rax + mov rax,rdi + adc rdx,0 + add r14,r9 + adc rdx,0 + mov r9,rdx + + mul QWORD PTR[40+rsi] + add r15,r9 + adc rdx,0 + xor r9,r9 + add r15,rax + mov rax,r10 + adc r8,rdx + adc r9,0 + + mul QWORD PTR[rcx] + add rbp,rax + mov rax,r10 + adc rbp,rdx + + mul QWORD PTR[8+rcx] + add r11,rax + mov rax,r10 + adc rdx,0 + add r11,rbp + adc rdx,0 + mov rbp,rdx + + mul QWORD PTR[16+rcx] + add r12,rax + mov rax,r10 + adc rdx,0 + add r12,rbp + adc rdx,0 + mov rbp,rdx + + mul QWORD PTR[24+rcx] + add r13,rbp + adc rdx,0 + add r13,rax + mov rax,r10 + adc rdx,0 + mov rbp,rdx + + mul QWORD PTR[32+rcx] + add r14,rax + mov rax,r10 + adc rdx,0 + add r14,rbp + adc rdx,0 + mov rbp,rdx + + mul QWORD PTR[40+rcx] + add r15,rax + mov rax,QWORD PTR[24+rbx] + adc rdx,0 + add r15,rbp + adc r8,rdx + adc r9,0 + + mov rdi,rax + mul QWORD PTR[rsi] + add r11,rax + mov rax,rdi + adc rdx,0 + mov r10,rdx + + mul QWORD PTR[8+rsi] + add r12,rax + mov rax,rdi + adc rdx,0 + add r12,r10 + adc rdx,0 + mov r10,rdx + + mul QWORD PTR[16+rsi] + add r13,rax + mov rax,rdi + adc rdx,0 + add r13,r10 + adc rdx,0 + mov r10,rdx + + mov rbp,r11 + imul r11,QWORD PTR[8+rsp] + + mul QWORD PTR[24+rsi] + add r14,rax + mov rax,rdi + adc rdx,0 + add r14,r10 + adc rdx,0 + mov r10,rdx + + mul QWORD PTR[32+rsi] + add r15,rax + mov rax,rdi + adc rdx,0 + add r15,r10 + adc rdx,0 + mov r10,rdx + + mul QWORD PTR[40+rsi] + add r8,r10 + adc rdx,0 + xor r10,r10 + add r8,rax + mov rax,r11 + adc r9,rdx + adc r10,0 + + mul QWORD PTR[rcx] + add rbp,rax + mov rax,r11 + adc rbp,rdx + + mul QWORD PTR[8+rcx] + add r12,rax + mov rax,r11 + adc rdx,0 + add r12,rbp + adc rdx,0 + mov rbp,rdx + + mul QWORD PTR[16+rcx] + add r13,rax + mov rax,r11 + adc rdx,0 + add r13,rbp + adc rdx,0 + mov rbp,rdx + + mul QWORD PTR[24+rcx] + add r14,rbp + adc rdx,0 + add r14,rax + mov rax,r11 + adc rdx,0 + mov rbp,rdx + + mul QWORD PTR[32+rcx] + add r15,rax + mov rax,r11 + adc rdx,0 + add r15,rbp + adc rdx,0 + mov rbp,rdx + + mul QWORD PTR[40+rcx] + add r8,rax + mov rax,QWORD PTR[32+rbx] + adc rdx,0 + add r8,rbp + adc r9,rdx + adc r10,0 + + mov rdi,rax + mul QWORD PTR[rsi] + add r12,rax + mov rax,rdi + adc rdx,0 + mov r11,rdx + + mul QWORD PTR[8+rsi] + add r13,rax + mov rax,rdi + adc rdx,0 + add r13,r11 + adc rdx,0 + mov r11,rdx + + mul QWORD PTR[16+rsi] + add r14,rax + mov rax,rdi + adc rdx,0 + add r14,r11 + adc rdx,0 + mov r11,rdx + + mov rbp,r12 + imul r12,QWORD PTR[8+rsp] + + mul QWORD PTR[24+rsi] + add r15,rax + mov rax,rdi + adc rdx,0 + add r15,r11 + adc rdx,0 + mov r11,rdx + + mul QWORD PTR[32+rsi] + add r8,rax + mov rax,rdi + adc rdx,0 + add r8,r11 + adc rdx,0 + mov r11,rdx + + mul QWORD PTR[40+rsi] + add r9,r11 + adc rdx,0 + xor r11,r11 + add r9,rax + mov rax,r12 + adc r10,rdx + adc r11,0 + + mul QWORD PTR[rcx] + add rbp,rax + mov rax,r12 + adc rbp,rdx + + mul QWORD PTR[8+rcx] + add r13,rax + mov rax,r12 + adc rdx,0 + add r13,rbp + adc rdx,0 + mov rbp,rdx + + mul QWORD PTR[16+rcx] + add r14,rax + mov rax,r12 + adc rdx,0 + add r14,rbp + adc rdx,0 + mov rbp,rdx + + mul QWORD PTR[24+rcx] + add r15,rbp + adc rdx,0 + add r15,rax + mov rax,r12 + adc rdx,0 + mov rbp,rdx + + mul QWORD PTR[32+rcx] + add r8,rax + mov rax,r12 + adc rdx,0 + add r8,rbp + adc rdx,0 + mov rbp,rdx + + mul QWORD PTR[40+rcx] + add r9,rax + mov rax,QWORD PTR[40+rbx] + adc rdx,0 + add r9,rbp + adc r10,rdx + adc r11,0 + + mov rdi,rax + mul QWORD PTR[rsi] + add r13,rax + mov rax,rdi + adc rdx,0 + mov r12,rdx + + mul QWORD PTR[8+rsi] + add r14,rax + mov rax,rdi + adc rdx,0 + add r14,r12 + adc rdx,0 + mov r12,rdx + + mul QWORD PTR[16+rsi] + add r15,rax + mov rax,rdi + adc rdx,0 + add r15,r12 + adc rdx,0 + mov r12,rdx + + mov rbp,r13 + imul r13,QWORD PTR[8+rsp] + + mul QWORD PTR[24+rsi] + add r8,rax + mov rax,rdi + adc rdx,0 + add r8,r12 + adc rdx,0 + mov r12,rdx + + mul QWORD PTR[32+rsi] + add r9,rax + mov rax,rdi + adc rdx,0 + add r9,r12 + adc rdx,0 + mov r12,rdx + + mul QWORD PTR[40+rsi] + add r10,r12 + adc rdx,0 + xor r12,r12 + add r10,rax + mov rax,r13 + adc r11,rdx + adc r12,0 + + mul QWORD PTR[rcx] + add rbp,rax + mov rax,r13 + adc rbp,rdx + + mul QWORD PTR[8+rcx] + add r14,rax + mov rax,r13 + adc rdx,0 + add r14,rbp + adc rdx,0 + mov rbp,rdx + + mul QWORD PTR[16+rcx] + add r15,rax + mov rax,r13 + adc rdx,0 + add r15,rbp + adc rdx,0 + mov rbp,rdx + + mul QWORD PTR[24+rcx] + add r8,rbp + adc rdx,0 + add r8,rax + mov rax,r13 + adc rdx,0 + mov rbp,rdx + + mul QWORD PTR[32+rcx] + add r9,rax + mov rax,r13 + adc rdx,0 + add r9,rbp + adc rdx,0 + mov rbp,rdx + + mul QWORD PTR[40+rcx] + add r10,rax + mov rax,r14 + adc rdx,0 + add r10,rbp + adc r11,rdx + adc r12,0 + + + + + mov rdi,QWORD PTR[16+rsp] + sub r14,QWORD PTR[rcx] + mov rdx,r15 + sbb r15,QWORD PTR[8+rcx] + mov rbx,r8 + sbb r8,QWORD PTR[16+rcx] + mov rsi,r9 + sbb r9,QWORD PTR[24+rcx] + mov rbp,r10 + sbb r10,QWORD PTR[32+rcx] + mov r13,r11 + sbb r11,QWORD PTR[40+rcx] + sbb r12,0 + + cmovc r14,rax + cmovc r15,rdx + cmovc r8,rbx + mov QWORD PTR[rdi],r14 + cmovc r9,rsi + mov QWORD PTR[8+rdi],r15 + cmovc r10,rbp + mov QWORD PTR[16+rdi],r8 + cmovc r11,r13 + mov QWORD PTR[24+rdi],r9 + mov QWORD PTR[32+rdi],r10 + mov QWORD PTR[40+rdi],r11 + + DB 0F3h,0C3h ;repret +__mulq_mont_384 ENDP +PUBLIC sqr_n_mul_mont_384 + + +ALIGN 32 +sqr_n_mul_mont_384 PROC PUBLIC + DB 243,15,30,250 + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov r11,rsp +$L$SEH_begin_sqr_n_mul_mont_384:: + + + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD PTR[40+rsp] + mov r9,QWORD PTR[48+rsp] +ifdef __BLST_PORTABLE__ + test DWORD PTR[__blst_platform_cap],1 + jnz sqr_n_mul_mont_384$1 +endif + push rbp + + push rbx + + push r12 + + push r13 + + push r14 + + push r15 + + sub rsp,8*17 + +$L$SEH_body_sqr_n_mul_mont_384:: + + + mov QWORD PTR[rsp],r8 + mov QWORD PTR[8+rsp],rdi + mov QWORD PTR[16+rsp],rcx + lea rdi,QWORD PTR[32+rsp] + mov QWORD PTR[24+rsp],r9 + movq xmm2,QWORD PTR[r9] + +$L$oop_sqr_384:: + movd xmm1,edx + + call __sqrq_384 + + lea rsi,QWORD PTR[rdi] + mov rcx,QWORD PTR[rsp] + mov rbx,QWORD PTR[16+rsp] + call __mulq_by_1_mont_384 + call __redq_tail_mont_384 + + movd edx,xmm1 + lea rsi,QWORD PTR[rdi] + dec edx + jnz $L$oop_sqr_384 + +DB 102,72,15,126,208 + mov rcx,rbx + mov rbx,QWORD PTR[24+rsp] + + + + + + + mov r12,r8 + mov r13,r9 + + call __mulq_mont_384 + + lea r8,QWORD PTR[136+rsp] + mov r15,QWORD PTR[136+rsp] + + mov r14,QWORD PTR[8+r8] + + mov r13,QWORD PTR[16+r8] + + mov r12,QWORD PTR[24+r8] + + mov rbx,QWORD PTR[32+r8] + + mov rbp,QWORD PTR[40+r8] + + lea rsp,QWORD PTR[48+r8] + +$L$SEH_epilogue_sqr_n_mul_mont_384:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + + DB 0F3h,0C3h ;repret + +$L$SEH_end_sqr_n_mul_mont_384:: +sqr_n_mul_mont_384 ENDP + +PUBLIC sqr_n_mul_mont_383 + + +ALIGN 32 +sqr_n_mul_mont_383 PROC PUBLIC + DB 243,15,30,250 + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov r11,rsp +$L$SEH_begin_sqr_n_mul_mont_383:: + + + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD PTR[40+rsp] + mov r9,QWORD PTR[48+rsp] +ifdef __BLST_PORTABLE__ + test DWORD PTR[__blst_platform_cap],1 + jnz sqr_n_mul_mont_383$1 +endif + push rbp + + push rbx + + push r12 + + push r13 + + push r14 + + push r15 + + sub rsp,8*17 + +$L$SEH_body_sqr_n_mul_mont_383:: + + + mov QWORD PTR[rsp],r8 + mov QWORD PTR[8+rsp],rdi + mov QWORD PTR[16+rsp],rcx + lea rdi,QWORD PTR[32+rsp] + mov QWORD PTR[24+rsp],r9 + movq xmm2,QWORD PTR[r9] + +$L$oop_sqr_383:: + movd xmm1,edx + + call __sqrq_384 + + lea rsi,QWORD PTR[rdi] + mov rcx,QWORD PTR[rsp] + mov rbx,QWORD PTR[16+rsp] + call __mulq_by_1_mont_384 + + movd edx,xmm1 + add r14,QWORD PTR[48+rsi] + adc r15,QWORD PTR[56+rsi] + adc r8,QWORD PTR[64+rsi] + adc r9,QWORD PTR[72+rsi] + adc r10,QWORD PTR[80+rsi] + adc r11,QWORD PTR[88+rsi] + lea rsi,QWORD PTR[rdi] + + mov QWORD PTR[rdi],r14 + mov QWORD PTR[8+rdi],r15 + mov QWORD PTR[16+rdi],r8 + mov QWORD PTR[24+rdi],r9 + mov QWORD PTR[32+rdi],r10 + mov QWORD PTR[40+rdi],r11 + + dec edx + jnz $L$oop_sqr_383 + +DB 102,72,15,126,208 + mov rcx,rbx + mov rbx,QWORD PTR[24+rsp] + + + + + + + mov r12,r8 + mov r13,r9 + + call __mulq_mont_384 + + lea r8,QWORD PTR[136+rsp] + mov r15,QWORD PTR[136+rsp] + + mov r14,QWORD PTR[8+r8] + + mov r13,QWORD PTR[16+r8] + + mov r12,QWORD PTR[24+r8] + + mov rbx,QWORD PTR[32+r8] + + mov rbp,QWORD PTR[40+r8] + + lea rsp,QWORD PTR[48+r8] + +$L$SEH_epilogue_sqr_n_mul_mont_383:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + + DB 0F3h,0C3h ;repret + +$L$SEH_end_sqr_n_mul_mont_383:: +sqr_n_mul_mont_383 ENDP + +ALIGN 32 +__mulq_mont_383_nonred PROC PRIVATE + DB 243,15,30,250 + + mov rbp,rax + mul r14 + mov r8,rax + mov rax,rbp + mov r9,rdx + + mul r15 + add r9,rax + mov rax,rbp + adc rdx,0 + mov r10,rdx + + mul r12 + add r10,rax + mov rax,rbp + adc rdx,0 + mov r11,rdx + + mov r15,r8 + imul r8,QWORD PTR[8+rsp] + + mul r13 + add r11,rax + mov rax,rbp + adc rdx,0 + mov r12,rdx + + mul QWORD PTR[32+rsi] + add r12,rax + mov rax,rbp + adc rdx,0 + mov r13,rdx + + mul QWORD PTR[40+rsi] + add r13,rax + mov rax,r8 + adc rdx,0 + mov r14,rdx + + mul QWORD PTR[rcx] + add r15,rax + mov rax,r8 + adc r15,rdx + + mul QWORD PTR[8+rcx] + add r9,rax + mov rax,r8 + adc rdx,0 + add r9,r15 + adc rdx,0 + mov r15,rdx + + mul QWORD PTR[16+rcx] + add r10,rax + mov rax,r8 + adc rdx,0 + add r10,r15 + adc rdx,0 + mov r15,rdx + + mul QWORD PTR[24+rcx] + add r11,r15 + adc rdx,0 + add r11,rax + mov rax,r8 + adc rdx,0 + mov r15,rdx + + mul QWORD PTR[32+rcx] + add r12,rax + mov rax,r8 + adc rdx,0 + add r12,r15 + adc rdx,0 + mov r15,rdx + + mul QWORD PTR[40+rcx] + add r13,rax + mov rax,QWORD PTR[8+rbx] + adc rdx,0 + add r13,r15 + adc r14,rdx + + mov rbp,rax + mul QWORD PTR[rsi] + add r9,rax + mov rax,rbp + adc rdx,0 + mov r15,rdx + + mul QWORD PTR[8+rsi] + add r10,rax + mov rax,rbp + adc rdx,0 + add r10,r15 + adc rdx,0 + mov r15,rdx + + mul QWORD PTR[16+rsi] + add r11,rax + mov rax,rbp + adc rdx,0 + add r11,r15 + adc rdx,0 + mov r15,rdx + + mov r8,r9 + imul r9,QWORD PTR[8+rsp] + + mul QWORD PTR[24+rsi] + add r12,rax + mov rax,rbp + adc rdx,0 + add r12,r15 + adc rdx,0 + mov r15,rdx + + mul QWORD PTR[32+rsi] + add r13,rax + mov rax,rbp + adc rdx,0 + add r13,r15 + adc rdx,0 + mov r15,rdx + + mul QWORD PTR[40+rsi] + add r14,r15 + adc rdx,0 + add r14,rax + mov rax,r9 + adc rdx,0 + mov r15,rdx + + mul QWORD PTR[rcx] + add r8,rax + mov rax,r9 + adc r8,rdx + + mul QWORD PTR[8+rcx] + add r10,rax + mov rax,r9 + adc rdx,0 + add r10,r8 + adc rdx,0 + mov r8,rdx + + mul QWORD PTR[16+rcx] + add r11,rax + mov rax,r9 + adc rdx,0 + add r11,r8 + adc rdx,0 + mov r8,rdx + + mul QWORD PTR[24+rcx] + add r12,r8 + adc rdx,0 + add r12,rax + mov rax,r9 + adc rdx,0 + mov r8,rdx + + mul QWORD PTR[32+rcx] + add r13,rax + mov rax,r9 + adc rdx,0 + add r13,r8 + adc rdx,0 + mov r8,rdx + + mul QWORD PTR[40+rcx] + add r14,rax + mov rax,QWORD PTR[16+rbx] + adc rdx,0 + add r14,r8 + adc r15,rdx + + mov rbp,rax + mul QWORD PTR[rsi] + add r10,rax + mov rax,rbp + adc rdx,0 + mov r8,rdx + + mul QWORD PTR[8+rsi] + add r11,rax + mov rax,rbp + adc rdx,0 + add r11,r8 + adc rdx,0 + mov r8,rdx + + mul QWORD PTR[16+rsi] + add r12,rax + mov rax,rbp + adc rdx,0 + add r12,r8 + adc rdx,0 + mov r8,rdx + + mov r9,r10 + imul r10,QWORD PTR[8+rsp] + + mul QWORD PTR[24+rsi] + add r13,rax + mov rax,rbp + adc rdx,0 + add r13,r8 + adc rdx,0 + mov r8,rdx + + mul QWORD PTR[32+rsi] + add r14,rax + mov rax,rbp + adc rdx,0 + add r14,r8 + adc rdx,0 + mov r8,rdx + + mul QWORD PTR[40+rsi] + add r15,r8 + adc rdx,0 + add r15,rax + mov rax,r10 + adc rdx,0 + mov r8,rdx + + mul QWORD PTR[rcx] + add r9,rax + mov rax,r10 + adc r9,rdx + + mul QWORD PTR[8+rcx] + add r11,rax + mov rax,r10 + adc rdx,0 + add r11,r9 + adc rdx,0 + mov r9,rdx + + mul QWORD PTR[16+rcx] + add r12,rax + mov rax,r10 + adc rdx,0 + add r12,r9 + adc rdx,0 + mov r9,rdx + + mul QWORD PTR[24+rcx] + add r13,r9 + adc rdx,0 + add r13,rax + mov rax,r10 + adc rdx,0 + mov r9,rdx + + mul QWORD PTR[32+rcx] + add r14,rax + mov rax,r10 + adc rdx,0 + add r14,r9 + adc rdx,0 + mov r9,rdx + + mul QWORD PTR[40+rcx] + add r15,rax + mov rax,QWORD PTR[24+rbx] + adc rdx,0 + add r15,r9 + adc r8,rdx + + mov rbp,rax + mul QWORD PTR[rsi] + add r11,rax + mov rax,rbp + adc rdx,0 + mov r9,rdx + + mul QWORD PTR[8+rsi] + add r12,rax + mov rax,rbp + adc rdx,0 + add r12,r9 + adc rdx,0 + mov r9,rdx + + mul QWORD PTR[16+rsi] + add r13,rax + mov rax,rbp + adc rdx,0 + add r13,r9 + adc rdx,0 + mov r9,rdx + + mov r10,r11 + imul r11,QWORD PTR[8+rsp] + + mul QWORD PTR[24+rsi] + add r14,rax + mov rax,rbp + adc rdx,0 + add r14,r9 + adc rdx,0 + mov r9,rdx + + mul QWORD PTR[32+rsi] + add r15,rax + mov rax,rbp + adc rdx,0 + add r15,r9 + adc rdx,0 + mov r9,rdx + + mul QWORD PTR[40+rsi] + add r8,r9 + adc rdx,0 + add r8,rax + mov rax,r11 + adc rdx,0 + mov r9,rdx + + mul QWORD PTR[rcx] + add r10,rax + mov rax,r11 + adc r10,rdx + + mul QWORD PTR[8+rcx] + add r12,rax + mov rax,r11 + adc rdx,0 + add r12,r10 + adc rdx,0 + mov r10,rdx + + mul QWORD PTR[16+rcx] + add r13,rax + mov rax,r11 + adc rdx,0 + add r13,r10 + adc rdx,0 + mov r10,rdx + + mul QWORD PTR[24+rcx] + add r14,r10 + adc rdx,0 + add r14,rax + mov rax,r11 + adc rdx,0 + mov r10,rdx + + mul QWORD PTR[32+rcx] + add r15,rax + mov rax,r11 + adc rdx,0 + add r15,r10 + adc rdx,0 + mov r10,rdx + + mul QWORD PTR[40+rcx] + add r8,rax + mov rax,QWORD PTR[32+rbx] + adc rdx,0 + add r8,r10 + adc r9,rdx + + mov rbp,rax + mul QWORD PTR[rsi] + add r12,rax + mov rax,rbp + adc rdx,0 + mov r10,rdx + + mul QWORD PTR[8+rsi] + add r13,rax + mov rax,rbp + adc rdx,0 + add r13,r10 + adc rdx,0 + mov r10,rdx + + mul QWORD PTR[16+rsi] + add r14,rax + mov rax,rbp + adc rdx,0 + add r14,r10 + adc rdx,0 + mov r10,rdx + + mov r11,r12 + imul r12,QWORD PTR[8+rsp] + + mul QWORD PTR[24+rsi] + add r15,rax + mov rax,rbp + adc rdx,0 + add r15,r10 + adc rdx,0 + mov r10,rdx + + mul QWORD PTR[32+rsi] + add r8,rax + mov rax,rbp + adc rdx,0 + add r8,r10 + adc rdx,0 + mov r10,rdx + + mul QWORD PTR[40+rsi] + add r9,r10 + adc rdx,0 + add r9,rax + mov rax,r12 + adc rdx,0 + mov r10,rdx + + mul QWORD PTR[rcx] + add r11,rax + mov rax,r12 + adc r11,rdx + + mul QWORD PTR[8+rcx] + add r13,rax + mov rax,r12 + adc rdx,0 + add r13,r11 + adc rdx,0 + mov r11,rdx + + mul QWORD PTR[16+rcx] + add r14,rax + mov rax,r12 + adc rdx,0 + add r14,r11 + adc rdx,0 + mov r11,rdx + + mul QWORD PTR[24+rcx] + add r15,r11 + adc rdx,0 + add r15,rax + mov rax,r12 + adc rdx,0 + mov r11,rdx + + mul QWORD PTR[32+rcx] + add r8,rax + mov rax,r12 + adc rdx,0 + add r8,r11 + adc rdx,0 + mov r11,rdx + + mul QWORD PTR[40+rcx] + add r9,rax + mov rax,QWORD PTR[40+rbx] + adc rdx,0 + add r9,r11 + adc r10,rdx + + mov rbp,rax + mul QWORD PTR[rsi] + add r13,rax + mov rax,rbp + adc rdx,0 + mov r11,rdx + + mul QWORD PTR[8+rsi] + add r14,rax + mov rax,rbp + adc rdx,0 + add r14,r11 + adc rdx,0 + mov r11,rdx + + mul QWORD PTR[16+rsi] + add r15,rax + mov rax,rbp + adc rdx,0 + add r15,r11 + adc rdx,0 + mov r11,rdx + + mov r12,r13 + imul r13,QWORD PTR[8+rsp] + + mul QWORD PTR[24+rsi] + add r8,rax + mov rax,rbp + adc rdx,0 + add r8,r11 + adc rdx,0 + mov r11,rdx + + mul QWORD PTR[32+rsi] + add r9,rax + mov rax,rbp + adc rdx,0 + add r9,r11 + adc rdx,0 + mov r11,rdx + + mul QWORD PTR[40+rsi] + add r10,r11 + adc rdx,0 + add r10,rax + mov rax,r13 + adc rdx,0 + mov r11,rdx + + mul QWORD PTR[rcx] + add r12,rax + mov rax,r13 + adc r12,rdx + + mul QWORD PTR[8+rcx] + add r14,rax + mov rax,r13 + adc rdx,0 + add r14,r12 + adc rdx,0 + mov r12,rdx + + mul QWORD PTR[16+rcx] + add r15,rax + mov rax,r13 + adc rdx,0 + add r15,r12 + adc rdx,0 + mov r12,rdx + + mul QWORD PTR[24+rcx] + add r8,r12 + adc rdx,0 + add r8,rax + mov rax,r13 + adc rdx,0 + mov r12,rdx + + mul QWORD PTR[32+rcx] + add r9,rax + mov rax,r13 + adc rdx,0 + add r9,r12 + adc rdx,0 + mov r12,rdx + + mul QWORD PTR[40+rcx] + add r10,rax + mov rax,r14 + adc rdx,0 + add r10,r12 + adc r11,rdx + DB 0F3h,0C3h ;repret +__mulq_mont_383_nonred ENDP +PUBLIC sqr_mont_382x + + +ALIGN 32 +sqr_mont_382x PROC PUBLIC + DB 243,15,30,250 + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov r11,rsp +$L$SEH_begin_sqr_mont_382x:: + + + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 +ifdef __BLST_PORTABLE__ + test DWORD PTR[__blst_platform_cap],1 + jnz sqr_mont_382x$1 +endif + push rbp + + push rbx + + push r12 + + push r13 + + push r14 + + push r15 + + sub rsp,136 + +$L$SEH_body_sqr_mont_382x:: + + + mov QWORD PTR[rsp],rcx + mov rcx,rdx + mov QWORD PTR[16+rsp],rsi + mov QWORD PTR[24+rsp],rdi + + + mov r8,QWORD PTR[rsi] + mov r9,QWORD PTR[8+rsi] + mov r10,QWORD PTR[16+rsi] + mov r11,QWORD PTR[24+rsi] + mov r12,QWORD PTR[32+rsi] + mov r13,QWORD PTR[40+rsi] + + mov r14,r8 + add r8,QWORD PTR[48+rsi] + mov r15,r9 + adc r9,QWORD PTR[56+rsi] + mov rax,r10 + adc r10,QWORD PTR[64+rsi] + mov rdx,r11 + adc r11,QWORD PTR[72+rsi] + mov rbx,r12 + adc r12,QWORD PTR[80+rsi] + mov rbp,r13 + adc r13,QWORD PTR[88+rsi] + + sub r14,QWORD PTR[48+rsi] + sbb r15,QWORD PTR[56+rsi] + sbb rax,QWORD PTR[64+rsi] + sbb rdx,QWORD PTR[72+rsi] + sbb rbx,QWORD PTR[80+rsi] + sbb rbp,QWORD PTR[88+rsi] + sbb rdi,rdi + + mov QWORD PTR[((32+0))+rsp],r8 + mov QWORD PTR[((32+8))+rsp],r9 + mov QWORD PTR[((32+16))+rsp],r10 + mov QWORD PTR[((32+24))+rsp],r11 + mov QWORD PTR[((32+32))+rsp],r12 + mov QWORD PTR[((32+40))+rsp],r13 + + mov QWORD PTR[((32+48))+rsp],r14 + mov QWORD PTR[((32+56))+rsp],r15 + mov QWORD PTR[((32+64))+rsp],rax + mov QWORD PTR[((32+72))+rsp],rdx + mov QWORD PTR[((32+80))+rsp],rbx + mov QWORD PTR[((32+88))+rsp],rbp + mov QWORD PTR[((32+96))+rsp],rdi + + + + lea rbx,QWORD PTR[48+rsi] + + mov rax,QWORD PTR[48+rsi] + mov r14,QWORD PTR[rsi] + mov r15,QWORD PTR[8+rsi] + mov r12,QWORD PTR[16+rsi] + mov r13,QWORD PTR[24+rsi] + + mov rdi,QWORD PTR[24+rsp] + call __mulq_mont_383_nonred + add r14,r14 + adc r15,r15 + adc r8,r8 + adc r9,r9 + adc r10,r10 + adc r11,r11 + + mov QWORD PTR[48+rdi],r14 + mov QWORD PTR[56+rdi],r15 + mov QWORD PTR[64+rdi],r8 + mov QWORD PTR[72+rdi],r9 + mov QWORD PTR[80+rdi],r10 + mov QWORD PTR[88+rdi],r11 + + lea rsi,QWORD PTR[32+rsp] + lea rbx,QWORD PTR[((32+48))+rsp] + + mov rax,QWORD PTR[((32+48))+rsp] + mov r14,QWORD PTR[((32+0))+rsp] + mov r15,QWORD PTR[((32+8))+rsp] + mov r12,QWORD PTR[((32+16))+rsp] + mov r13,QWORD PTR[((32+24))+rsp] + + call __mulq_mont_383_nonred + mov rsi,QWORD PTR[((32+96))+rsp] + mov r12,QWORD PTR[((32+0))+rsp] + mov r13,QWORD PTR[((32+8))+rsp] + and r12,rsi + mov rax,QWORD PTR[((32+16))+rsp] + and r13,rsi + mov rbx,QWORD PTR[((32+24))+rsp] + and rax,rsi + mov rbp,QWORD PTR[((32+32))+rsp] + and rbx,rsi + and rbp,rsi + and rsi,QWORD PTR[((32+40))+rsp] + + sub r14,r12 + mov r12,QWORD PTR[rcx] + sbb r15,r13 + mov r13,QWORD PTR[8+rcx] + sbb r8,rax + mov rax,QWORD PTR[16+rcx] + sbb r9,rbx + mov rbx,QWORD PTR[24+rcx] + sbb r10,rbp + mov rbp,QWORD PTR[32+rcx] + sbb r11,rsi + sbb rsi,rsi + + and r12,rsi + and r13,rsi + and rax,rsi + and rbx,rsi + and rbp,rsi + and rsi,QWORD PTR[40+rcx] + + add r14,r12 + adc r15,r13 + adc r8,rax + adc r9,rbx + adc r10,rbp + adc r11,rsi + + mov QWORD PTR[rdi],r14 + mov QWORD PTR[8+rdi],r15 + mov QWORD PTR[16+rdi],r8 + mov QWORD PTR[24+rdi],r9 + mov QWORD PTR[32+rdi],r10 + mov QWORD PTR[40+rdi],r11 + lea r8,QWORD PTR[136+rsp] + mov r15,QWORD PTR[r8] + + mov r14,QWORD PTR[8+r8] + + mov r13,QWORD PTR[16+r8] + + mov r12,QWORD PTR[24+r8] + + mov rbx,QWORD PTR[32+r8] + + mov rbp,QWORD PTR[40+r8] + + lea rsp,QWORD PTR[48+r8] + +$L$SEH_epilogue_sqr_mont_382x:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + + DB 0F3h,0C3h ;repret + +$L$SEH_end_sqr_mont_382x:: +sqr_mont_382x ENDP +.text$ ENDS +.pdata SEGMENT READONLY ALIGN(4) +ALIGN 4 + DD imagerel $L$SEH_begin_mul_mont_384x + DD imagerel $L$SEH_body_mul_mont_384x + DD imagerel $L$SEH_info_mul_mont_384x_prologue + + DD imagerel $L$SEH_body_mul_mont_384x + DD imagerel $L$SEH_epilogue_mul_mont_384x + DD imagerel $L$SEH_info_mul_mont_384x_body + + DD imagerel $L$SEH_epilogue_mul_mont_384x + DD imagerel $L$SEH_end_mul_mont_384x + DD imagerel $L$SEH_info_mul_mont_384x_epilogue + + DD imagerel $L$SEH_begin_sqr_mont_384x + DD imagerel $L$SEH_body_sqr_mont_384x + DD imagerel $L$SEH_info_sqr_mont_384x_prologue + + DD imagerel $L$SEH_body_sqr_mont_384x + DD imagerel $L$SEH_epilogue_sqr_mont_384x + DD imagerel $L$SEH_info_sqr_mont_384x_body + + DD imagerel $L$SEH_epilogue_sqr_mont_384x + DD imagerel $L$SEH_end_sqr_mont_384x + DD imagerel $L$SEH_info_sqr_mont_384x_epilogue + + DD imagerel $L$SEH_begin_mul_382x + DD imagerel $L$SEH_body_mul_382x + DD imagerel $L$SEH_info_mul_382x_prologue + + DD imagerel $L$SEH_body_mul_382x + DD imagerel $L$SEH_epilogue_mul_382x + DD imagerel $L$SEH_info_mul_382x_body + + DD imagerel $L$SEH_epilogue_mul_382x + DD imagerel $L$SEH_end_mul_382x + DD imagerel $L$SEH_info_mul_382x_epilogue + + DD imagerel $L$SEH_begin_sqr_382x + DD imagerel $L$SEH_body_sqr_382x + DD imagerel $L$SEH_info_sqr_382x_prologue + + DD imagerel $L$SEH_body_sqr_382x + DD imagerel $L$SEH_epilogue_sqr_382x + DD imagerel $L$SEH_info_sqr_382x_body + + DD imagerel $L$SEH_epilogue_sqr_382x + DD imagerel $L$SEH_end_sqr_382x + DD imagerel $L$SEH_info_sqr_382x_epilogue + + DD imagerel $L$SEH_begin_mul_384 + DD imagerel $L$SEH_body_mul_384 + DD imagerel $L$SEH_info_mul_384_prologue + + DD imagerel $L$SEH_body_mul_384 + DD imagerel $L$SEH_epilogue_mul_384 + DD imagerel $L$SEH_info_mul_384_body + + DD imagerel $L$SEH_epilogue_mul_384 + DD imagerel $L$SEH_end_mul_384 + DD imagerel $L$SEH_info_mul_384_epilogue + + DD imagerel $L$SEH_begin_sqr_384 + DD imagerel $L$SEH_body_sqr_384 + DD imagerel $L$SEH_info_sqr_384_prologue + + DD imagerel $L$SEH_body_sqr_384 + DD imagerel $L$SEH_epilogue_sqr_384 + DD imagerel $L$SEH_info_sqr_384_body + + DD imagerel $L$SEH_epilogue_sqr_384 + DD imagerel $L$SEH_end_sqr_384 + DD imagerel $L$SEH_info_sqr_384_epilogue + + DD imagerel $L$SEH_begin_sqr_mont_384 + DD imagerel $L$SEH_body_sqr_mont_384 + DD imagerel $L$SEH_info_sqr_mont_384_prologue + + DD imagerel $L$SEH_body_sqr_mont_384 + DD imagerel $L$SEH_epilogue_sqr_mont_384 + DD imagerel $L$SEH_info_sqr_mont_384_body + + DD imagerel $L$SEH_epilogue_sqr_mont_384 + DD imagerel $L$SEH_end_sqr_mont_384 + DD imagerel $L$SEH_info_sqr_mont_384_epilogue + + DD imagerel $L$SEH_begin_redc_mont_384 + DD imagerel $L$SEH_body_redc_mont_384 + DD imagerel $L$SEH_info_redc_mont_384_prologue + + DD imagerel $L$SEH_body_redc_mont_384 + DD imagerel $L$SEH_epilogue_redc_mont_384 + DD imagerel $L$SEH_info_redc_mont_384_body + + DD imagerel $L$SEH_epilogue_redc_mont_384 + DD imagerel $L$SEH_end_redc_mont_384 + DD imagerel $L$SEH_info_redc_mont_384_epilogue + + DD imagerel $L$SEH_begin_from_mont_384 + DD imagerel $L$SEH_body_from_mont_384 + DD imagerel $L$SEH_info_from_mont_384_prologue + + DD imagerel $L$SEH_body_from_mont_384 + DD imagerel $L$SEH_epilogue_from_mont_384 + DD imagerel $L$SEH_info_from_mont_384_body + + DD imagerel $L$SEH_epilogue_from_mont_384 + DD imagerel $L$SEH_end_from_mont_384 + DD imagerel $L$SEH_info_from_mont_384_epilogue + + DD imagerel $L$SEH_begin_sgn0_pty_mont_384 + DD imagerel $L$SEH_body_sgn0_pty_mont_384 + DD imagerel $L$SEH_info_sgn0_pty_mont_384_prologue + + DD imagerel $L$SEH_body_sgn0_pty_mont_384 + DD imagerel $L$SEH_epilogue_sgn0_pty_mont_384 + DD imagerel $L$SEH_info_sgn0_pty_mont_384_body + + DD imagerel $L$SEH_epilogue_sgn0_pty_mont_384 + DD imagerel $L$SEH_end_sgn0_pty_mont_384 + DD imagerel $L$SEH_info_sgn0_pty_mont_384_epilogue + + DD imagerel $L$SEH_begin_sgn0_pty_mont_384x + DD imagerel $L$SEH_body_sgn0_pty_mont_384x + DD imagerel $L$SEH_info_sgn0_pty_mont_384x_prologue + + DD imagerel $L$SEH_body_sgn0_pty_mont_384x + DD imagerel $L$SEH_epilogue_sgn0_pty_mont_384x + DD imagerel $L$SEH_info_sgn0_pty_mont_384x_body + + DD imagerel $L$SEH_epilogue_sgn0_pty_mont_384x + DD imagerel $L$SEH_end_sgn0_pty_mont_384x + DD imagerel $L$SEH_info_sgn0_pty_mont_384x_epilogue + + DD imagerel $L$SEH_begin_mul_mont_384 + DD imagerel $L$SEH_body_mul_mont_384 + DD imagerel $L$SEH_info_mul_mont_384_prologue + + DD imagerel $L$SEH_body_mul_mont_384 + DD imagerel $L$SEH_epilogue_mul_mont_384 + DD imagerel $L$SEH_info_mul_mont_384_body + + DD imagerel $L$SEH_epilogue_mul_mont_384 + DD imagerel $L$SEH_end_mul_mont_384 + DD imagerel $L$SEH_info_mul_mont_384_epilogue + + DD imagerel $L$SEH_begin_sqr_n_mul_mont_384 + DD imagerel $L$SEH_body_sqr_n_mul_mont_384 + DD imagerel $L$SEH_info_sqr_n_mul_mont_384_prologue + + DD imagerel $L$SEH_body_sqr_n_mul_mont_384 + DD imagerel $L$SEH_epilogue_sqr_n_mul_mont_384 + DD imagerel $L$SEH_info_sqr_n_mul_mont_384_body + + DD imagerel $L$SEH_epilogue_sqr_n_mul_mont_384 + DD imagerel $L$SEH_end_sqr_n_mul_mont_384 + DD imagerel $L$SEH_info_sqr_n_mul_mont_384_epilogue + + DD imagerel $L$SEH_begin_sqr_n_mul_mont_383 + DD imagerel $L$SEH_body_sqr_n_mul_mont_383 + DD imagerel $L$SEH_info_sqr_n_mul_mont_383_prologue + + DD imagerel $L$SEH_body_sqr_n_mul_mont_383 + DD imagerel $L$SEH_epilogue_sqr_n_mul_mont_383 + DD imagerel $L$SEH_info_sqr_n_mul_mont_383_body + + DD imagerel $L$SEH_epilogue_sqr_n_mul_mont_383 + DD imagerel $L$SEH_end_sqr_n_mul_mont_383 + DD imagerel $L$SEH_info_sqr_n_mul_mont_383_epilogue + + DD imagerel $L$SEH_begin_sqr_mont_382x + DD imagerel $L$SEH_body_sqr_mont_382x + DD imagerel $L$SEH_info_sqr_mont_382x_prologue + + DD imagerel $L$SEH_body_sqr_mont_382x + DD imagerel $L$SEH_epilogue_sqr_mont_382x + DD imagerel $L$SEH_info_sqr_mont_382x_body + + DD imagerel $L$SEH_epilogue_sqr_mont_382x + DD imagerel $L$SEH_end_sqr_mont_382x + DD imagerel $L$SEH_info_sqr_mont_382x_epilogue + +.pdata ENDS +.xdata SEGMENT READONLY ALIGN(8) +ALIGN 8 +$L$SEH_info_mul_mont_384x_prologue:: +DB 1,0,5,00bh +DB 0,074h,1,0 +DB 0,064h,2,0 +DB 0,0b3h +DB 0,0 + DD 0,0 +$L$SEH_info_mul_mont_384x_body:: +DB 1,0,18,0 +DB 000h,0f4h,029h,000h +DB 000h,0e4h,02ah,000h +DB 000h,0d4h,02bh,000h +DB 000h,0c4h,02ch,000h +DB 000h,034h,02dh,000h +DB 000h,054h,02eh,000h +DB 000h,074h,030h,000h +DB 000h,064h,031h,000h +DB 000h,001h,02fh,000h +DB 000h,000h,000h,000h +DB 000h,000h,000h,000h +$L$SEH_info_mul_mont_384x_epilogue:: +DB 1,0,4,0 +DB 000h,074h,001h,000h +DB 000h,064h,002h,000h +DB 000h,000h,000h,000h + +$L$SEH_info_sqr_mont_384x_prologue:: +DB 1,0,5,00bh +DB 0,074h,1,0 +DB 0,064h,2,0 +DB 0,0b3h +DB 0,0 + DD 0,0 +$L$SEH_info_sqr_mont_384x_body:: +DB 1,0,18,0 +DB 000h,0f4h,011h,000h +DB 000h,0e4h,012h,000h +DB 000h,0d4h,013h,000h +DB 000h,0c4h,014h,000h +DB 000h,034h,015h,000h +DB 000h,054h,016h,000h +DB 000h,074h,018h,000h +DB 000h,064h,019h,000h +DB 000h,001h,017h,000h +DB 000h,000h,000h,000h +DB 000h,000h,000h,000h +$L$SEH_info_sqr_mont_384x_epilogue:: +DB 1,0,4,0 +DB 000h,074h,001h,000h +DB 000h,064h,002h,000h +DB 000h,000h,000h,000h + +$L$SEH_info_mul_382x_prologue:: +DB 1,0,5,00bh +DB 0,074h,1,0 +DB 0,064h,2,0 +DB 0,0b3h +DB 0,0 + DD 0,0 +$L$SEH_info_mul_382x_body:: +DB 1,0,18,0 +DB 000h,0f4h,011h,000h +DB 000h,0e4h,012h,000h +DB 000h,0d4h,013h,000h +DB 000h,0c4h,014h,000h +DB 000h,034h,015h,000h +DB 000h,054h,016h,000h +DB 000h,074h,018h,000h +DB 000h,064h,019h,000h +DB 000h,001h,017h,000h +DB 000h,000h,000h,000h +DB 000h,000h,000h,000h +$L$SEH_info_mul_382x_epilogue:: +DB 1,0,4,0 +DB 000h,074h,001h,000h +DB 000h,064h,002h,000h +DB 000h,000h,000h,000h + +$L$SEH_info_sqr_382x_prologue:: +DB 1,0,5,00bh +DB 0,074h,1,0 +DB 0,064h,2,0 +DB 0,0b3h +DB 0,0 + DD 0,0 +$L$SEH_info_sqr_382x_body:: +DB 1,0,17,0 +DB 000h,0f4h,001h,000h +DB 000h,0e4h,002h,000h +DB 000h,0d4h,003h,000h +DB 000h,0c4h,004h,000h +DB 000h,034h,005h,000h +DB 000h,054h,006h,000h +DB 000h,074h,008h,000h +DB 000h,064h,009h,000h +DB 000h,062h +DB 000h,000h,000h,000h,000h,000h +DB 000h,000h,000h,000h +$L$SEH_info_sqr_382x_epilogue:: +DB 1,0,4,0 +DB 000h,074h,001h,000h +DB 000h,064h,002h,000h +DB 000h,000h,000h,000h + +$L$SEH_info_mul_384_prologue:: +DB 1,0,5,00bh +DB 0,074h,1,0 +DB 0,064h,2,0 +DB 0,0b3h +DB 0,0 + DD 0,0 +$L$SEH_info_mul_384_body:: +DB 1,0,11,0 +DB 000h,0c4h,000h,000h +DB 000h,034h,001h,000h +DB 000h,054h,002h,000h +DB 000h,074h,004h,000h +DB 000h,064h,005h,000h +DB 000h,022h +DB 000h,000h,000h,000h,000h,000h +$L$SEH_info_mul_384_epilogue:: +DB 1,0,4,0 +DB 000h,074h,001h,000h +DB 000h,064h,002h,000h +DB 000h,000h,000h,000h + +$L$SEH_info_sqr_384_prologue:: +DB 1,0,5,00bh +DB 0,074h,1,0 +DB 0,064h,2,0 +DB 0,0b3h +DB 0,0 + DD 0,0 +$L$SEH_info_sqr_384_body:: +DB 1,0,17,0 +DB 000h,0f4h,001h,000h +DB 000h,0e4h,002h,000h +DB 000h,0d4h,003h,000h +DB 000h,0c4h,004h,000h +DB 000h,034h,005h,000h +DB 000h,054h,006h,000h +DB 000h,074h,008h,000h +DB 000h,064h,009h,000h +DB 000h,062h +DB 000h,000h,000h,000h,000h,000h +DB 000h,000h,000h,000h +$L$SEH_info_sqr_384_epilogue:: +DB 1,0,4,0 +DB 000h,074h,001h,000h +DB 000h,064h,002h,000h +DB 000h,000h,000h,000h + +$L$SEH_info_sqr_mont_384_prologue:: +DB 1,0,5,00bh +DB 0,074h,1,0 +DB 0,064h,2,0 +DB 0,0b3h +DB 0,0 + DD 0,0 +$L$SEH_info_sqr_mont_384_body:: +DB 1,0,18,0 +DB 000h,0f4h,00fh,000h +DB 000h,0e4h,010h,000h +DB 000h,0d4h,011h,000h +DB 000h,0c4h,012h,000h +DB 000h,034h,013h,000h +DB 000h,054h,014h,000h +DB 000h,074h,016h,000h +DB 000h,064h,017h,000h +DB 000h,001h,015h,000h +DB 000h,000h,000h,000h +DB 000h,000h,000h,000h +$L$SEH_info_sqr_mont_384_epilogue:: +DB 1,0,4,0 +DB 000h,074h,001h,000h +DB 000h,064h,002h,000h +DB 000h,000h,000h,000h + +$L$SEH_info_redc_mont_384_prologue:: +DB 1,0,5,00bh +DB 0,074h,1,0 +DB 0,064h,2,0 +DB 0,0b3h +DB 0,0 + DD 0,0 +$L$SEH_info_redc_mont_384_body:: +DB 1,0,17,0 +DB 000h,0f4h,001h,000h +DB 000h,0e4h,002h,000h +DB 000h,0d4h,003h,000h +DB 000h,0c4h,004h,000h +DB 000h,034h,005h,000h +DB 000h,054h,006h,000h +DB 000h,074h,008h,000h +DB 000h,064h,009h,000h +DB 000h,062h +DB 000h,000h,000h,000h,000h,000h +DB 000h,000h,000h,000h +$L$SEH_info_redc_mont_384_epilogue:: +DB 1,0,4,0 +DB 000h,074h,001h,000h +DB 000h,064h,002h,000h +DB 000h,000h,000h,000h + +$L$SEH_info_from_mont_384_prologue:: +DB 1,0,5,00bh +DB 0,074h,1,0 +DB 0,064h,2,0 +DB 0,0b3h +DB 0,0 + DD 0,0 +$L$SEH_info_from_mont_384_body:: +DB 1,0,17,0 +DB 000h,0f4h,001h,000h +DB 000h,0e4h,002h,000h +DB 000h,0d4h,003h,000h +DB 000h,0c4h,004h,000h +DB 000h,034h,005h,000h +DB 000h,054h,006h,000h +DB 000h,074h,008h,000h +DB 000h,064h,009h,000h +DB 000h,062h +DB 000h,000h,000h,000h,000h,000h +DB 000h,000h,000h,000h +$L$SEH_info_from_mont_384_epilogue:: +DB 1,0,4,0 +DB 000h,074h,001h,000h +DB 000h,064h,002h,000h +DB 000h,000h,000h,000h + +$L$SEH_info_sgn0_pty_mont_384_prologue:: +DB 1,0,5,00bh +DB 0,074h,1,0 +DB 0,064h,2,0 +DB 0,0b3h +DB 0,0 + DD 0,0 +$L$SEH_info_sgn0_pty_mont_384_body:: +DB 1,0,17,0 +DB 000h,0f4h,001h,000h +DB 000h,0e4h,002h,000h +DB 000h,0d4h,003h,000h +DB 000h,0c4h,004h,000h +DB 000h,034h,005h,000h +DB 000h,054h,006h,000h +DB 000h,074h,008h,000h +DB 000h,064h,009h,000h +DB 000h,062h +DB 000h,000h,000h,000h,000h,000h +DB 000h,000h,000h,000h +$L$SEH_info_sgn0_pty_mont_384_epilogue:: +DB 1,0,4,0 +DB 000h,074h,001h,000h +DB 000h,064h,002h,000h +DB 000h,000h,000h,000h + +$L$SEH_info_sgn0_pty_mont_384x_prologue:: +DB 1,0,5,00bh +DB 0,074h,1,0 +DB 0,064h,2,0 +DB 0,0b3h +DB 0,0 + DD 0,0 +$L$SEH_info_sgn0_pty_mont_384x_body:: +DB 1,0,17,0 +DB 000h,0f4h,001h,000h +DB 000h,0e4h,002h,000h +DB 000h,0d4h,003h,000h +DB 000h,0c4h,004h,000h +DB 000h,034h,005h,000h +DB 000h,054h,006h,000h +DB 000h,074h,008h,000h +DB 000h,064h,009h,000h +DB 000h,062h +DB 000h,000h,000h,000h,000h,000h +DB 000h,000h,000h,000h +$L$SEH_info_sgn0_pty_mont_384x_epilogue:: +DB 1,0,4,0 +DB 000h,074h,001h,000h +DB 000h,064h,002h,000h +DB 000h,000h,000h,000h + +$L$SEH_info_mul_mont_384_prologue:: +DB 1,0,5,00bh +DB 0,074h,1,0 +DB 0,064h,2,0 +DB 0,0b3h +DB 0,0 + DD 0,0 +$L$SEH_info_mul_mont_384_body:: +DB 1,0,17,0 +DB 000h,0f4h,003h,000h +DB 000h,0e4h,004h,000h +DB 000h,0d4h,005h,000h +DB 000h,0c4h,006h,000h +DB 000h,034h,007h,000h +DB 000h,054h,008h,000h +DB 000h,074h,00ah,000h +DB 000h,064h,00bh,000h +DB 000h,082h +DB 000h,000h,000h,000h,000h,000h +DB 000h,000h,000h,000h +$L$SEH_info_mul_mont_384_epilogue:: +DB 1,0,4,0 +DB 000h,074h,001h,000h +DB 000h,064h,002h,000h +DB 000h,000h,000h,000h + +$L$SEH_info_sqr_n_mul_mont_384_prologue:: +DB 1,0,5,00bh +DB 0,074h,1,0 +DB 0,064h,2,0 +DB 0,0b3h +DB 0,0 + DD 0,0 +$L$SEH_info_sqr_n_mul_mont_384_body:: +DB 1,0,18,0 +DB 000h,0f4h,011h,000h +DB 000h,0e4h,012h,000h +DB 000h,0d4h,013h,000h +DB 000h,0c4h,014h,000h +DB 000h,034h,015h,000h +DB 000h,054h,016h,000h +DB 000h,074h,018h,000h +DB 000h,064h,019h,000h +DB 000h,001h,017h,000h +DB 000h,000h,000h,000h +DB 000h,000h,000h,000h +$L$SEH_info_sqr_n_mul_mont_384_epilogue:: +DB 1,0,4,0 +DB 000h,074h,001h,000h +DB 000h,064h,002h,000h +DB 000h,000h,000h,000h + +$L$SEH_info_sqr_n_mul_mont_383_prologue:: +DB 1,0,5,00bh +DB 0,074h,1,0 +DB 0,064h,2,0 +DB 0,0b3h +DB 0,0 + DD 0,0 +$L$SEH_info_sqr_n_mul_mont_383_body:: +DB 1,0,18,0 +DB 000h,0f4h,011h,000h +DB 000h,0e4h,012h,000h +DB 000h,0d4h,013h,000h +DB 000h,0c4h,014h,000h +DB 000h,034h,015h,000h +DB 000h,054h,016h,000h +DB 000h,074h,018h,000h +DB 000h,064h,019h,000h +DB 000h,001h,017h,000h +DB 000h,000h,000h,000h +DB 000h,000h,000h,000h +$L$SEH_info_sqr_n_mul_mont_383_epilogue:: +DB 1,0,4,0 +DB 000h,074h,001h,000h +DB 000h,064h,002h,000h +DB 000h,000h,000h,000h + +$L$SEH_info_sqr_mont_382x_prologue:: +DB 1,0,5,00bh +DB 0,074h,1,0 +DB 0,064h,2,0 +DB 0,0b3h +DB 0,0 + DD 0,0 +$L$SEH_info_sqr_mont_382x_body:: +DB 1,0,18,0 +DB 000h,0f4h,011h,000h +DB 000h,0e4h,012h,000h +DB 000h,0d4h,013h,000h +DB 000h,0c4h,014h,000h +DB 000h,034h,015h,000h +DB 000h,054h,016h,000h +DB 000h,074h,018h,000h +DB 000h,064h,019h,000h +DB 000h,001h,017h,000h +DB 000h,000h,000h,000h +DB 000h,000h,000h,000h +$L$SEH_info_sqr_mont_382x_epilogue:: +DB 1,0,4,0 +DB 000h,074h,001h,000h +DB 000h,064h,002h,000h +DB 000h,000h,000h,000h + + +.xdata ENDS +END diff --git a/crypto/blst_src/build/win64/mulx_mont_256-x86_64.asm b/crypto/blst_src/build/win64/mulx_mont_256-x86_64.asm new file mode 100644 index 00000000000..21d18a8b40b --- /dev/null +++ b/crypto/blst_src/build/win64/mulx_mont_256-x86_64.asm @@ -0,0 +1,810 @@ +OPTION DOTNAME +PUBLIC mul_mont_sparse_256$1 +PUBLIC sqr_mont_sparse_256$1 +PUBLIC from_mont_256$1 +PUBLIC redc_mont_256$1 +.text$ SEGMENT ALIGN(256) 'CODE' + +PUBLIC mulx_mont_sparse_256 + + +ALIGN 32 +mulx_mont_sparse_256 PROC PUBLIC + DB 243,15,30,250 + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov r11,rsp +$L$SEH_begin_mulx_mont_sparse_256:: + + + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD PTR[40+rsp] +mul_mont_sparse_256$1:: + push rbp + + push rbx + + push r12 + + push r13 + + push r14 + + push r15 + + sub rsp,8 + +$L$SEH_body_mulx_mont_sparse_256:: + + + mov rbx,rdx + mov rdx,QWORD PTR[rdx] + mov r14,QWORD PTR[rsi] + mov r15,QWORD PTR[8+rsi] + mov rbp,QWORD PTR[16+rsi] + mov r9,QWORD PTR[24+rsi] + lea rsi,QWORD PTR[((-128))+rsi] + lea rcx,QWORD PTR[((-128))+rcx] + + mulx r11,rax,r14 + call __mulx_mont_sparse_256 + + mov r15,QWORD PTR[8+rsp] + + mov r14,QWORD PTR[16+rsp] + + mov r13,QWORD PTR[24+rsp] + + mov r12,QWORD PTR[32+rsp] + + mov rbx,QWORD PTR[40+rsp] + + mov rbp,QWORD PTR[48+rsp] + + lea rsp,QWORD PTR[56+rsp] + +$L$SEH_epilogue_mulx_mont_sparse_256:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + + DB 0F3h,0C3h ;repret + +$L$SEH_end_mulx_mont_sparse_256:: +mulx_mont_sparse_256 ENDP + +PUBLIC sqrx_mont_sparse_256 + + +ALIGN 32 +sqrx_mont_sparse_256 PROC PUBLIC + DB 243,15,30,250 + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov r11,rsp +$L$SEH_begin_sqrx_mont_sparse_256:: + + + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 +sqr_mont_sparse_256$1:: + push rbp + + push rbx + + push r12 + + push r13 + + push r14 + + push r15 + + sub rsp,8 + +$L$SEH_body_sqrx_mont_sparse_256:: + + + mov rbx,rsi + mov r8,rcx + mov rcx,rdx + mov rdx,QWORD PTR[rsi] + mov r15,QWORD PTR[8+rsi] + mov rbp,QWORD PTR[16+rsi] + mov r9,QWORD PTR[24+rsi] + lea rsi,QWORD PTR[((-128))+rbx] + lea rcx,QWORD PTR[((-128))+rcx] + + mulx r11,rax,rdx + call __mulx_mont_sparse_256 + + mov r15,QWORD PTR[8+rsp] + + mov r14,QWORD PTR[16+rsp] + + mov r13,QWORD PTR[24+rsp] + + mov r12,QWORD PTR[32+rsp] + + mov rbx,QWORD PTR[40+rsp] + + mov rbp,QWORD PTR[48+rsp] + + lea rsp,QWORD PTR[56+rsp] + +$L$SEH_epilogue_sqrx_mont_sparse_256:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + + DB 0F3h,0C3h ;repret + +$L$SEH_end_sqrx_mont_sparse_256:: +sqrx_mont_sparse_256 ENDP + +ALIGN 32 +__mulx_mont_sparse_256 PROC PRIVATE + DB 243,15,30,250 + + mulx r12,r15,r15 + mulx r13,rbp,rbp + add r11,r15 + mulx r14,r9,r9 + mov rdx,QWORD PTR[8+rbx] + adc r12,rbp + adc r13,r9 + adc r14,0 + + mov r10,rax + imul rax,r8 + + + xor r15,r15 + mulx r9,rbp,QWORD PTR[((0+128))+rsi] + adox r11,rbp + adcx r12,r9 + + mulx r9,rbp,QWORD PTR[((8+128))+rsi] + adox r12,rbp + adcx r13,r9 + + mulx r9,rbp,QWORD PTR[((16+128))+rsi] + adox r13,rbp + adcx r14,r9 + + mulx r9,rbp,QWORD PTR[((24+128))+rsi] + mov rdx,rax + adox r14,rbp + adcx r9,r15 + adox r15,r9 + + + mulx rax,rbp,QWORD PTR[((0+128))+rcx] + adcx r10,rbp + adox rax,r11 + + mulx r9,rbp,QWORD PTR[((8+128))+rcx] + adcx rax,rbp + adox r12,r9 + + mulx r9,rbp,QWORD PTR[((16+128))+rcx] + adcx r12,rbp + adox r13,r9 + + mulx r9,rbp,QWORD PTR[((24+128))+rcx] + mov rdx,QWORD PTR[16+rbx] + adcx r13,rbp + adox r14,r9 + adcx r14,r10 + adox r15,r10 + adcx r15,r10 + adox r10,r10 + adc r10,0 + mov r11,rax + imul rax,r8 + + + xor rbp,rbp + mulx r9,rbp,QWORD PTR[((0+128))+rsi] + adox r12,rbp + adcx r13,r9 + + mulx r9,rbp,QWORD PTR[((8+128))+rsi] + adox r13,rbp + adcx r14,r9 + + mulx r9,rbp,QWORD PTR[((16+128))+rsi] + adox r14,rbp + adcx r15,r9 + + mulx r9,rbp,QWORD PTR[((24+128))+rsi] + mov rdx,rax + adox r15,rbp + adcx r9,r10 + adox r10,r9 + + + mulx rax,rbp,QWORD PTR[((0+128))+rcx] + adcx r11,rbp + adox rax,r12 + + mulx r9,rbp,QWORD PTR[((8+128))+rcx] + adcx rax,rbp + adox r13,r9 + + mulx r9,rbp,QWORD PTR[((16+128))+rcx] + adcx r13,rbp + adox r14,r9 + + mulx r9,rbp,QWORD PTR[((24+128))+rcx] + mov rdx,QWORD PTR[24+rbx] + adcx r14,rbp + adox r15,r9 + adcx r15,r11 + adox r10,r11 + adcx r10,r11 + adox r11,r11 + adc r11,0 + mov r12,rax + imul rax,r8 + + + xor rbp,rbp + mulx r9,rbp,QWORD PTR[((0+128))+rsi] + adox r13,rbp + adcx r14,r9 + + mulx r9,rbp,QWORD PTR[((8+128))+rsi] + adox r14,rbp + adcx r15,r9 + + mulx r9,rbp,QWORD PTR[((16+128))+rsi] + adox r15,rbp + adcx r10,r9 + + mulx r9,rbp,QWORD PTR[((24+128))+rsi] + mov rdx,rax + adox r10,rbp + adcx r9,r11 + adox r11,r9 + + + mulx rax,rbp,QWORD PTR[((0+128))+rcx] + adcx r12,rbp + adox rax,r13 + + mulx r9,rbp,QWORD PTR[((8+128))+rcx] + adcx rax,rbp + adox r14,r9 + + mulx r9,rbp,QWORD PTR[((16+128))+rcx] + adcx r14,rbp + adox r15,r9 + + mulx r9,rbp,QWORD PTR[((24+128))+rcx] + mov rdx,rax + adcx r15,rbp + adox r10,r9 + adcx r10,r12 + adox r11,r12 + adcx r11,r12 + adox r12,r12 + adc r12,0 + imul rdx,r8 + + + xor rbp,rbp + mulx r9,r13,QWORD PTR[((0+128))+rcx] + adcx r13,rax + adox r14,r9 + + mulx r9,rbp,QWORD PTR[((8+128))+rcx] + adcx r14,rbp + adox r15,r9 + + mulx r9,rbp,QWORD PTR[((16+128))+rcx] + adcx r15,rbp + adox r10,r9 + + mulx r9,rbp,QWORD PTR[((24+128))+rcx] + mov rdx,r14 + lea rcx,QWORD PTR[128+rcx] + adcx r10,rbp + adox r11,r9 + mov rax,r15 + adcx r11,r13 + adox r12,r13 + adc r12,0 + + + + + mov rbp,r10 + sub r14,QWORD PTR[rcx] + sbb r15,QWORD PTR[8+rcx] + sbb r10,QWORD PTR[16+rcx] + mov r9,r11 + sbb r11,QWORD PTR[24+rcx] + sbb r12,0 + + cmovc r14,rdx + cmovc r15,rax + cmovc r10,rbp + mov QWORD PTR[rdi],r14 + cmovc r11,r9 + mov QWORD PTR[8+rdi],r15 + mov QWORD PTR[16+rdi],r10 + mov QWORD PTR[24+rdi],r11 + + DB 0F3h,0C3h ;repret +__mulx_mont_sparse_256 ENDP +PUBLIC fromx_mont_256 + + +ALIGN 32 +fromx_mont_256 PROC PUBLIC + DB 243,15,30,250 + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov r11,rsp +$L$SEH_begin_fromx_mont_256:: + + + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 +from_mont_256$1:: + push rbp + + push rbx + + push r12 + + push r13 + + push r14 + + push r15 + + sub rsp,8 + +$L$SEH_body_fromx_mont_256:: + + + mov rbx,rdx + call __mulx_by_1_mont_256 + + + + + + mov rdx,r15 + mov r12,r10 + mov r13,r11 + + sub r14,QWORD PTR[rbx] + sbb r15,QWORD PTR[8+rbx] + sbb r10,QWORD PTR[16+rbx] + sbb r11,QWORD PTR[24+rbx] + + cmovnc rax,r14 + cmovnc rdx,r15 + cmovnc r12,r10 + mov QWORD PTR[rdi],rax + cmovnc r13,r11 + mov QWORD PTR[8+rdi],rdx + mov QWORD PTR[16+rdi],r12 + mov QWORD PTR[24+rdi],r13 + + mov r15,QWORD PTR[8+rsp] + + mov r14,QWORD PTR[16+rsp] + + mov r13,QWORD PTR[24+rsp] + + mov r12,QWORD PTR[32+rsp] + + mov rbx,QWORD PTR[40+rsp] + + mov rbp,QWORD PTR[48+rsp] + + lea rsp,QWORD PTR[56+rsp] + +$L$SEH_epilogue_fromx_mont_256:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + + DB 0F3h,0C3h ;repret + +$L$SEH_end_fromx_mont_256:: +fromx_mont_256 ENDP + +PUBLIC redcx_mont_256 + + +ALIGN 32 +redcx_mont_256 PROC PUBLIC + DB 243,15,30,250 + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov r11,rsp +$L$SEH_begin_redcx_mont_256:: + + + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 +redc_mont_256$1:: + push rbp + + push rbx + + push r12 + + push r13 + + push r14 + + push r15 + + sub rsp,8 + +$L$SEH_body_redcx_mont_256:: + + + mov rbx,rdx + call __mulx_by_1_mont_256 + + add r14,QWORD PTR[32+rsi] + adc r15,QWORD PTR[40+rsi] + mov rax,r14 + adc r10,QWORD PTR[48+rsi] + mov rdx,r15 + adc r11,QWORD PTR[56+rsi] + sbb rsi,rsi + + + + + mov r12,r10 + sub r14,QWORD PTR[rbx] + sbb r15,QWORD PTR[8+rbx] + sbb r10,QWORD PTR[16+rbx] + mov r13,r11 + sbb r11,QWORD PTR[24+rbx] + sbb rsi,0 + + cmovnc rax,r14 + cmovnc rdx,r15 + cmovnc r12,r10 + mov QWORD PTR[rdi],rax + cmovnc r13,r11 + mov QWORD PTR[8+rdi],rdx + mov QWORD PTR[16+rdi],r12 + mov QWORD PTR[24+rdi],r13 + + mov r15,QWORD PTR[8+rsp] + + mov r14,QWORD PTR[16+rsp] + + mov r13,QWORD PTR[24+rsp] + + mov r12,QWORD PTR[32+rsp] + + mov rbx,QWORD PTR[40+rsp] + + mov rbp,QWORD PTR[48+rsp] + + lea rsp,QWORD PTR[56+rsp] + +$L$SEH_epilogue_redcx_mont_256:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + + DB 0F3h,0C3h ;repret + +$L$SEH_end_redcx_mont_256:: +redcx_mont_256 ENDP + +ALIGN 32 +__mulx_by_1_mont_256 PROC PRIVATE + DB 243,15,30,250 + + mov rax,QWORD PTR[rsi] + mov r11,QWORD PTR[8+rsi] + mov r12,QWORD PTR[16+rsi] + mov r13,QWORD PTR[24+rsi] + + mov r14,rax + imul rax,rcx + mov r10,rax + + mul QWORD PTR[rbx] + add r14,rax + mov rax,r10 + adc r14,rdx + + mul QWORD PTR[8+rbx] + add r11,rax + mov rax,r10 + adc rdx,0 + add r11,r14 + adc rdx,0 + mov r14,rdx + + mul QWORD PTR[16+rbx] + mov r15,r11 + imul r11,rcx + add r12,rax + mov rax,r10 + adc rdx,0 + add r12,r14 + adc rdx,0 + mov r14,rdx + + mul QWORD PTR[24+rbx] + add r13,rax + mov rax,r11 + adc rdx,0 + add r13,r14 + adc rdx,0 + mov r14,rdx + + mul QWORD PTR[rbx] + add r15,rax + mov rax,r11 + adc r15,rdx + + mul QWORD PTR[8+rbx] + add r12,rax + mov rax,r11 + adc rdx,0 + add r12,r15 + adc rdx,0 + mov r15,rdx + + mul QWORD PTR[16+rbx] + mov r10,r12 + imul r12,rcx + add r13,rax + mov rax,r11 + adc rdx,0 + add r13,r15 + adc rdx,0 + mov r15,rdx + + mul QWORD PTR[24+rbx] + add r14,rax + mov rax,r12 + adc rdx,0 + add r14,r15 + adc rdx,0 + mov r15,rdx + + mul QWORD PTR[rbx] + add r10,rax + mov rax,r12 + adc r10,rdx + + mul QWORD PTR[8+rbx] + add r13,rax + mov rax,r12 + adc rdx,0 + add r13,r10 + adc rdx,0 + mov r10,rdx + + mul QWORD PTR[16+rbx] + mov r11,r13 + imul r13,rcx + add r14,rax + mov rax,r12 + adc rdx,0 + add r14,r10 + adc rdx,0 + mov r10,rdx + + mul QWORD PTR[24+rbx] + add r15,rax + mov rax,r13 + adc rdx,0 + add r15,r10 + adc rdx,0 + mov r10,rdx + + mul QWORD PTR[rbx] + add r11,rax + mov rax,r13 + adc r11,rdx + + mul QWORD PTR[8+rbx] + add r14,rax + mov rax,r13 + adc rdx,0 + add r14,r11 + adc rdx,0 + mov r11,rdx + + mul QWORD PTR[16+rbx] + add r15,rax + mov rax,r13 + adc rdx,0 + add r15,r11 + adc rdx,0 + mov r11,rdx + + mul QWORD PTR[24+rbx] + add r10,rax + mov rax,r14 + adc rdx,0 + add r10,r11 + adc rdx,0 + mov r11,rdx + DB 0F3h,0C3h ;repret +__mulx_by_1_mont_256 ENDP +.text$ ENDS +.pdata SEGMENT READONLY ALIGN(4) +ALIGN 4 + DD imagerel $L$SEH_begin_mulx_mont_sparse_256 + DD imagerel $L$SEH_body_mulx_mont_sparse_256 + DD imagerel $L$SEH_info_mulx_mont_sparse_256_prologue + + DD imagerel $L$SEH_body_mulx_mont_sparse_256 + DD imagerel $L$SEH_epilogue_mulx_mont_sparse_256 + DD imagerel $L$SEH_info_mulx_mont_sparse_256_body + + DD imagerel $L$SEH_epilogue_mulx_mont_sparse_256 + DD imagerel $L$SEH_end_mulx_mont_sparse_256 + DD imagerel $L$SEH_info_mulx_mont_sparse_256_epilogue + + DD imagerel $L$SEH_begin_sqrx_mont_sparse_256 + DD imagerel $L$SEH_body_sqrx_mont_sparse_256 + DD imagerel $L$SEH_info_sqrx_mont_sparse_256_prologue + + DD imagerel $L$SEH_body_sqrx_mont_sparse_256 + DD imagerel $L$SEH_epilogue_sqrx_mont_sparse_256 + DD imagerel $L$SEH_info_sqrx_mont_sparse_256_body + + DD imagerel $L$SEH_epilogue_sqrx_mont_sparse_256 + DD imagerel $L$SEH_end_sqrx_mont_sparse_256 + DD imagerel $L$SEH_info_sqrx_mont_sparse_256_epilogue + + DD imagerel $L$SEH_begin_fromx_mont_256 + DD imagerel $L$SEH_body_fromx_mont_256 + DD imagerel $L$SEH_info_fromx_mont_256_prologue + + DD imagerel $L$SEH_body_fromx_mont_256 + DD imagerel $L$SEH_epilogue_fromx_mont_256 + DD imagerel $L$SEH_info_fromx_mont_256_body + + DD imagerel $L$SEH_epilogue_fromx_mont_256 + DD imagerel $L$SEH_end_fromx_mont_256 + DD imagerel $L$SEH_info_fromx_mont_256_epilogue + + DD imagerel $L$SEH_begin_redcx_mont_256 + DD imagerel $L$SEH_body_redcx_mont_256 + DD imagerel $L$SEH_info_redcx_mont_256_prologue + + DD imagerel $L$SEH_body_redcx_mont_256 + DD imagerel $L$SEH_epilogue_redcx_mont_256 + DD imagerel $L$SEH_info_redcx_mont_256_body + + DD imagerel $L$SEH_epilogue_redcx_mont_256 + DD imagerel $L$SEH_end_redcx_mont_256 + DD imagerel $L$SEH_info_redcx_mont_256_epilogue + +.pdata ENDS +.xdata SEGMENT READONLY ALIGN(8) +ALIGN 8 +$L$SEH_info_mulx_mont_sparse_256_prologue:: +DB 1,0,5,00bh +DB 0,074h,1,0 +DB 0,064h,2,0 +DB 0,0b3h +DB 0,0 + DD 0,0 +$L$SEH_info_mulx_mont_sparse_256_body:: +DB 1,0,17,0 +DB 000h,0f4h,001h,000h +DB 000h,0e4h,002h,000h +DB 000h,0d4h,003h,000h +DB 000h,0c4h,004h,000h +DB 000h,034h,005h,000h +DB 000h,054h,006h,000h +DB 000h,074h,008h,000h +DB 000h,064h,009h,000h +DB 000h,062h +DB 000h,000h,000h,000h,000h,000h +DB 000h,000h,000h,000h +$L$SEH_info_mulx_mont_sparse_256_epilogue:: +DB 1,0,4,0 +DB 000h,074h,001h,000h +DB 000h,064h,002h,000h +DB 000h,000h,000h,000h + +$L$SEH_info_sqrx_mont_sparse_256_prologue:: +DB 1,0,5,00bh +DB 0,074h,1,0 +DB 0,064h,2,0 +DB 0,0b3h +DB 0,0 + DD 0,0 +$L$SEH_info_sqrx_mont_sparse_256_body:: +DB 1,0,17,0 +DB 000h,0f4h,001h,000h +DB 000h,0e4h,002h,000h +DB 000h,0d4h,003h,000h +DB 000h,0c4h,004h,000h +DB 000h,034h,005h,000h +DB 000h,054h,006h,000h +DB 000h,074h,008h,000h +DB 000h,064h,009h,000h +DB 000h,062h +DB 000h,000h,000h,000h,000h,000h +DB 000h,000h,000h,000h +$L$SEH_info_sqrx_mont_sparse_256_epilogue:: +DB 1,0,4,0 +DB 000h,074h,001h,000h +DB 000h,064h,002h,000h +DB 000h,000h,000h,000h + +$L$SEH_info_fromx_mont_256_prologue:: +DB 1,0,5,00bh +DB 0,074h,1,0 +DB 0,064h,2,0 +DB 0,0b3h +DB 0,0 + DD 0,0 +$L$SEH_info_fromx_mont_256_body:: +DB 1,0,17,0 +DB 000h,0f4h,001h,000h +DB 000h,0e4h,002h,000h +DB 000h,0d4h,003h,000h +DB 000h,0c4h,004h,000h +DB 000h,034h,005h,000h +DB 000h,054h,006h,000h +DB 000h,074h,008h,000h +DB 000h,064h,009h,000h +DB 000h,062h +DB 000h,000h,000h,000h,000h,000h +DB 000h,000h,000h,000h +$L$SEH_info_fromx_mont_256_epilogue:: +DB 1,0,4,0 +DB 000h,074h,001h,000h +DB 000h,064h,002h,000h +DB 000h,000h,000h,000h + +$L$SEH_info_redcx_mont_256_prologue:: +DB 1,0,5,00bh +DB 0,074h,1,0 +DB 0,064h,2,0 +DB 0,0b3h +DB 0,0 + DD 0,0 +$L$SEH_info_redcx_mont_256_body:: +DB 1,0,17,0 +DB 000h,0f4h,001h,000h +DB 000h,0e4h,002h,000h +DB 000h,0d4h,003h,000h +DB 000h,0c4h,004h,000h +DB 000h,034h,005h,000h +DB 000h,054h,006h,000h +DB 000h,074h,008h,000h +DB 000h,064h,009h,000h +DB 000h,062h +DB 000h,000h,000h,000h,000h,000h +DB 000h,000h,000h,000h +$L$SEH_info_redcx_mont_256_epilogue:: +DB 1,0,4,0 +DB 000h,074h,001h,000h +DB 000h,064h,002h,000h +DB 000h,000h,000h,000h + + +.xdata ENDS +END diff --git a/crypto/blst_src/build/win64/mulx_mont_384-x86_64.asm b/crypto/blst_src/build/win64/mulx_mont_384-x86_64.asm new file mode 100644 index 00000000000..4dc41b04098 --- /dev/null +++ b/crypto/blst_src/build/win64/mulx_mont_384-x86_64.asm @@ -0,0 +1,3644 @@ +OPTION DOTNAME +PUBLIC mul_mont_384x$1 +PUBLIC sqr_mont_384x$1 +PUBLIC mul_382x$1 +PUBLIC sqr_382x$1 +PUBLIC mul_384$1 +PUBLIC sqr_384$1 +PUBLIC redc_mont_384$1 +PUBLIC from_mont_384$1 +PUBLIC sgn0_pty_mont_384$1 +PUBLIC sgn0_pty_mont_384x$1 +PUBLIC mul_mont_384$1 +PUBLIC sqr_mont_384$1 +PUBLIC sqr_n_mul_mont_384$1 +PUBLIC sqr_n_mul_mont_383$1 +PUBLIC sqr_mont_382x$1 +.text$ SEGMENT ALIGN(256) 'CODE' + + + + + + + + +ALIGN 32 +__subx_mod_384x384 PROC PRIVATE + DB 243,15,30,250 + + mov r8,QWORD PTR[rsi] + mov r9,QWORD PTR[8+rsi] + mov r10,QWORD PTR[16+rsi] + mov r11,QWORD PTR[24+rsi] + mov r12,QWORD PTR[32+rsi] + mov r13,QWORD PTR[40+rsi] + mov r14,QWORD PTR[48+rsi] + + sub r8,QWORD PTR[rdx] + mov r15,QWORD PTR[56+rsi] + sbb r9,QWORD PTR[8+rdx] + mov rax,QWORD PTR[64+rsi] + sbb r10,QWORD PTR[16+rdx] + mov rbx,QWORD PTR[72+rsi] + sbb r11,QWORD PTR[24+rdx] + mov rbp,QWORD PTR[80+rsi] + sbb r12,QWORD PTR[32+rdx] + mov rsi,QWORD PTR[88+rsi] + sbb r13,QWORD PTR[40+rdx] + mov QWORD PTR[rdi],r8 + sbb r14,QWORD PTR[48+rdx] + mov r8,QWORD PTR[rcx] + mov QWORD PTR[8+rdi],r9 + sbb r15,QWORD PTR[56+rdx] + mov r9,QWORD PTR[8+rcx] + mov QWORD PTR[16+rdi],r10 + sbb rax,QWORD PTR[64+rdx] + mov r10,QWORD PTR[16+rcx] + mov QWORD PTR[24+rdi],r11 + sbb rbx,QWORD PTR[72+rdx] + mov r11,QWORD PTR[24+rcx] + mov QWORD PTR[32+rdi],r12 + sbb rbp,QWORD PTR[80+rdx] + mov r12,QWORD PTR[32+rcx] + mov QWORD PTR[40+rdi],r13 + sbb rsi,QWORD PTR[88+rdx] + mov r13,QWORD PTR[40+rcx] + sbb rdx,rdx + + and r8,rdx + and r9,rdx + and r10,rdx + and r11,rdx + and r12,rdx + and r13,rdx + + add r14,r8 + adc r15,r9 + mov QWORD PTR[48+rdi],r14 + adc rax,r10 + mov QWORD PTR[56+rdi],r15 + adc rbx,r11 + mov QWORD PTR[64+rdi],rax + adc rbp,r12 + mov QWORD PTR[72+rdi],rbx + adc rsi,r13 + mov QWORD PTR[80+rdi],rbp + mov QWORD PTR[88+rdi],rsi + + DB 0F3h,0C3h ;repret +__subx_mod_384x384 ENDP + + +ALIGN 32 +__addx_mod_384 PROC PRIVATE + DB 243,15,30,250 + + mov r8,QWORD PTR[rsi] + mov r9,QWORD PTR[8+rsi] + mov r10,QWORD PTR[16+rsi] + mov r11,QWORD PTR[24+rsi] + mov r12,QWORD PTR[32+rsi] + mov r13,QWORD PTR[40+rsi] + + add r8,QWORD PTR[rdx] + adc r9,QWORD PTR[8+rdx] + adc r10,QWORD PTR[16+rdx] + mov r14,r8 + adc r11,QWORD PTR[24+rdx] + mov r15,r9 + adc r12,QWORD PTR[32+rdx] + mov rax,r10 + adc r13,QWORD PTR[40+rdx] + mov rbx,r11 + sbb rdx,rdx + + sub r8,QWORD PTR[rcx] + sbb r9,QWORD PTR[8+rcx] + mov rbp,r12 + sbb r10,QWORD PTR[16+rcx] + sbb r11,QWORD PTR[24+rcx] + sbb r12,QWORD PTR[32+rcx] + mov rsi,r13 + sbb r13,QWORD PTR[40+rcx] + sbb rdx,0 + + cmovc r8,r14 + cmovc r9,r15 + cmovc r10,rax + mov QWORD PTR[rdi],r8 + cmovc r11,rbx + mov QWORD PTR[8+rdi],r9 + cmovc r12,rbp + mov QWORD PTR[16+rdi],r10 + cmovc r13,rsi + mov QWORD PTR[24+rdi],r11 + mov QWORD PTR[32+rdi],r12 + mov QWORD PTR[40+rdi],r13 + + DB 0F3h,0C3h ;repret +__addx_mod_384 ENDP + + +ALIGN 32 +__subx_mod_384 PROC PRIVATE + DB 243,15,30,250 + + mov r8,QWORD PTR[rsi] + mov r9,QWORD PTR[8+rsi] + mov r10,QWORD PTR[16+rsi] + mov r11,QWORD PTR[24+rsi] + mov r12,QWORD PTR[32+rsi] + mov r13,QWORD PTR[40+rsi] + +__subx_mod_384_a_is_loaded:: + sub r8,QWORD PTR[rdx] + mov r14,QWORD PTR[rcx] + sbb r9,QWORD PTR[8+rdx] + mov r15,QWORD PTR[8+rcx] + sbb r10,QWORD PTR[16+rdx] + mov rax,QWORD PTR[16+rcx] + sbb r11,QWORD PTR[24+rdx] + mov rbx,QWORD PTR[24+rcx] + sbb r12,QWORD PTR[32+rdx] + mov rbp,QWORD PTR[32+rcx] + sbb r13,QWORD PTR[40+rdx] + mov rsi,QWORD PTR[40+rcx] + sbb rdx,rdx + + and r14,rdx + and r15,rdx + and rax,rdx + and rbx,rdx + and rbp,rdx + and rsi,rdx + + add r8,r14 + adc r9,r15 + mov QWORD PTR[rdi],r8 + adc r10,rax + mov QWORD PTR[8+rdi],r9 + adc r11,rbx + mov QWORD PTR[16+rdi],r10 + adc r12,rbp + mov QWORD PTR[24+rdi],r11 + adc r13,rsi + mov QWORD PTR[32+rdi],r12 + mov QWORD PTR[40+rdi],r13 + + DB 0F3h,0C3h ;repret +__subx_mod_384 ENDP +PUBLIC mulx_mont_384x + + +ALIGN 32 +mulx_mont_384x PROC PUBLIC + DB 243,15,30,250 + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov r11,rsp +$L$SEH_begin_mulx_mont_384x:: + + + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD PTR[40+rsp] +mul_mont_384x$1:: + push rbp + + push rbx + + push r12 + + push r13 + + push r14 + + push r15 + + sub rsp,328 + +$L$SEH_body_mulx_mont_384x:: + + + mov rbx,rdx + mov QWORD PTR[32+rsp],rdi + mov QWORD PTR[24+rsp],rsi + mov QWORD PTR[16+rsp],rdx + mov QWORD PTR[8+rsp],rcx + mov QWORD PTR[rsp],r8 + + + + + lea rdi,QWORD PTR[40+rsp] + call __mulx_384 + + + lea rbx,QWORD PTR[48+rbx] + lea rsi,QWORD PTR[((128+48))+rsi] + lea rdi,QWORD PTR[96+rdi] + call __mulx_384 + + + mov rcx,QWORD PTR[8+rsp] + lea rsi,QWORD PTR[rbx] + lea rdx,QWORD PTR[((-48))+rbx] + lea rdi,QWORD PTR[((40+192+48))+rsp] + call __addx_mod_384 + + mov rsi,QWORD PTR[24+rsp] + lea rdx,QWORD PTR[48+rsi] + lea rdi,QWORD PTR[((-48))+rdi] + call __addx_mod_384 + + lea rbx,QWORD PTR[rdi] + lea rsi,QWORD PTR[48+rdi] + call __mulx_384 + + + lea rsi,QWORD PTR[rdi] + lea rdx,QWORD PTR[40+rsp] + mov rcx,QWORD PTR[8+rsp] + call __subx_mod_384x384 + + lea rsi,QWORD PTR[rdi] + lea rdx,QWORD PTR[((-96))+rdi] + call __subx_mod_384x384 + + + lea rsi,QWORD PTR[40+rsp] + lea rdx,QWORD PTR[((40+96))+rsp] + lea rdi,QWORD PTR[40+rsp] + call __subx_mod_384x384 + + lea rbx,QWORD PTR[rcx] + + + lea rsi,QWORD PTR[40+rsp] + mov rcx,QWORD PTR[rsp] + mov rdi,QWORD PTR[32+rsp] + call __mulx_by_1_mont_384 + call __redx_tail_mont_384 + + + lea rsi,QWORD PTR[((40+192))+rsp] + mov rcx,QWORD PTR[rsp] + lea rdi,QWORD PTR[48+rdi] + call __mulx_by_1_mont_384 + call __redx_tail_mont_384 + + lea r8,QWORD PTR[328+rsp] + mov r15,QWORD PTR[r8] + + mov r14,QWORD PTR[8+r8] + + mov r13,QWORD PTR[16+r8] + + mov r12,QWORD PTR[24+r8] + + mov rbx,QWORD PTR[32+r8] + + mov rbp,QWORD PTR[40+r8] + + lea rsp,QWORD PTR[48+r8] + +$L$SEH_epilogue_mulx_mont_384x:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + + DB 0F3h,0C3h ;repret + +$L$SEH_end_mulx_mont_384x:: +mulx_mont_384x ENDP +PUBLIC sqrx_mont_384x + + +ALIGN 32 +sqrx_mont_384x PROC PUBLIC + DB 243,15,30,250 + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov r11,rsp +$L$SEH_begin_sqrx_mont_384x:: + + + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 +sqr_mont_384x$1:: + push rbp + + push rbx + + push r12 + + push r13 + + push r14 + + push r15 + + sub rsp,136 + +$L$SEH_body_sqrx_mont_384x:: + + + mov QWORD PTR[rsp],rcx + mov rcx,rdx + + mov QWORD PTR[16+rsp],rdi + mov QWORD PTR[24+rsp],rsi + + + lea rdx,QWORD PTR[48+rsi] + lea rdi,QWORD PTR[32+rsp] + call __addx_mod_384 + + + mov rsi,QWORD PTR[24+rsp] + lea rdx,QWORD PTR[48+rsi] + lea rdi,QWORD PTR[((32+48))+rsp] + call __subx_mod_384 + + + mov rsi,QWORD PTR[24+rsp] + lea rbx,QWORD PTR[48+rsi] + + mov rdx,QWORD PTR[48+rsi] + mov r14,QWORD PTR[rsi] + mov r15,QWORD PTR[8+rsi] + mov rax,QWORD PTR[16+rsi] + mov r12,QWORD PTR[24+rsi] + mov rdi,QWORD PTR[32+rsi] + mov rbp,QWORD PTR[40+rsi] + lea rsi,QWORD PTR[((-128))+rsi] + lea rcx,QWORD PTR[((-128))+rcx] + + mulx r9,r8,r14 + call __mulx_mont_384 + add rdx,rdx + adc r15,r15 + adc rax,rax + mov r8,rdx + adc r12,r12 + mov r9,r15 + adc rdi,rdi + mov r10,rax + adc rbp,rbp + mov r11,r12 + sbb rsi,rsi + + sub rdx,QWORD PTR[rcx] + sbb r15,QWORD PTR[8+rcx] + mov r13,rdi + sbb rax,QWORD PTR[16+rcx] + sbb r12,QWORD PTR[24+rcx] + sbb rdi,QWORD PTR[32+rcx] + mov r14,rbp + sbb rbp,QWORD PTR[40+rcx] + sbb rsi,0 + + cmovc rdx,r8 + cmovc r15,r9 + cmovc rax,r10 + mov QWORD PTR[48+rbx],rdx + cmovc r12,r11 + mov QWORD PTR[56+rbx],r15 + cmovc rdi,r13 + mov QWORD PTR[64+rbx],rax + cmovc rbp,r14 + mov QWORD PTR[72+rbx],r12 + mov QWORD PTR[80+rbx],rdi + mov QWORD PTR[88+rbx],rbp + + lea rsi,QWORD PTR[32+rsp] + lea rbx,QWORD PTR[((32+48))+rsp] + + mov rdx,QWORD PTR[((32+48))+rsp] + mov r14,QWORD PTR[((32+0))+rsp] + mov r15,QWORD PTR[((32+8))+rsp] + mov rax,QWORD PTR[((32+16))+rsp] + mov r12,QWORD PTR[((32+24))+rsp] + mov rdi,QWORD PTR[((32+32))+rsp] + mov rbp,QWORD PTR[((32+40))+rsp] + lea rsi,QWORD PTR[((-128))+rsi] + lea rcx,QWORD PTR[((-128))+rcx] + + mulx r9,r8,r14 + call __mulx_mont_384 + + lea r8,QWORD PTR[136+rsp] + mov r15,QWORD PTR[r8] + + mov r14,QWORD PTR[8+r8] + + mov r13,QWORD PTR[16+r8] + + mov r12,QWORD PTR[24+r8] + + mov rbx,QWORD PTR[32+r8] + + mov rbp,QWORD PTR[40+r8] + + lea rsp,QWORD PTR[48+r8] + +$L$SEH_epilogue_sqrx_mont_384x:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + + DB 0F3h,0C3h ;repret + +$L$SEH_end_sqrx_mont_384x:: +sqrx_mont_384x ENDP + +PUBLIC mulx_382x + + +ALIGN 32 +mulx_382x PROC PUBLIC + DB 243,15,30,250 + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov r11,rsp +$L$SEH_begin_mulx_382x:: + + + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 +mul_382x$1:: + push rbp + + push rbx + + push r12 + + push r13 + + push r14 + + push r15 + + sub rsp,136 + +$L$SEH_body_mulx_382x:: + + + lea rdi,QWORD PTR[96+rdi] + mov QWORD PTR[rsp],rsi + mov QWORD PTR[8+rsp],rdx + mov QWORD PTR[16+rsp],rdi + mov QWORD PTR[24+rsp],rcx + + + mov r8,QWORD PTR[rsi] + mov r9,QWORD PTR[8+rsi] + mov r10,QWORD PTR[16+rsi] + mov r11,QWORD PTR[24+rsi] + mov r12,QWORD PTR[32+rsi] + mov r13,QWORD PTR[40+rsi] + + add r8,QWORD PTR[48+rsi] + adc r9,QWORD PTR[56+rsi] + adc r10,QWORD PTR[64+rsi] + adc r11,QWORD PTR[72+rsi] + adc r12,QWORD PTR[80+rsi] + adc r13,QWORD PTR[88+rsi] + + mov QWORD PTR[((32+0))+rsp],r8 + mov QWORD PTR[((32+8))+rsp],r9 + mov QWORD PTR[((32+16))+rsp],r10 + mov QWORD PTR[((32+24))+rsp],r11 + mov QWORD PTR[((32+32))+rsp],r12 + mov QWORD PTR[((32+40))+rsp],r13 + + + mov r8,QWORD PTR[rdx] + mov r9,QWORD PTR[8+rdx] + mov r10,QWORD PTR[16+rdx] + mov r11,QWORD PTR[24+rdx] + mov r12,QWORD PTR[32+rdx] + mov r13,QWORD PTR[40+rdx] + + add r8,QWORD PTR[48+rdx] + adc r9,QWORD PTR[56+rdx] + adc r10,QWORD PTR[64+rdx] + adc r11,QWORD PTR[72+rdx] + adc r12,QWORD PTR[80+rdx] + adc r13,QWORD PTR[88+rdx] + + mov QWORD PTR[((32+48))+rsp],r8 + mov QWORD PTR[((32+56))+rsp],r9 + mov QWORD PTR[((32+64))+rsp],r10 + mov QWORD PTR[((32+72))+rsp],r11 + mov QWORD PTR[((32+80))+rsp],r12 + mov QWORD PTR[((32+88))+rsp],r13 + + + lea rsi,QWORD PTR[((32+0))+rsp] + lea rbx,QWORD PTR[((32+48))+rsp] + call __mulx_384 + + + mov rsi,QWORD PTR[rsp] + mov rbx,QWORD PTR[8+rsp] + lea rdi,QWORD PTR[((-96))+rdi] + call __mulx_384 + + + lea rsi,QWORD PTR[((48+128))+rsi] + lea rbx,QWORD PTR[48+rbx] + lea rdi,QWORD PTR[32+rsp] + call __mulx_384 + + + mov rsi,QWORD PTR[16+rsp] + lea rdx,QWORD PTR[32+rsp] + mov rcx,QWORD PTR[24+rsp] + mov rdi,rsi + call __subx_mod_384x384 + + + lea rsi,QWORD PTR[rdi] + lea rdx,QWORD PTR[((-96))+rdi] + call __subx_mod_384x384 + + + lea rsi,QWORD PTR[((-96))+rdi] + lea rdx,QWORD PTR[32+rsp] + lea rdi,QWORD PTR[((-96))+rdi] + call __subx_mod_384x384 + + lea r8,QWORD PTR[136+rsp] + mov r15,QWORD PTR[r8] + + mov r14,QWORD PTR[8+r8] + + mov r13,QWORD PTR[16+r8] + + mov r12,QWORD PTR[24+r8] + + mov rbx,QWORD PTR[32+r8] + + mov rbp,QWORD PTR[40+r8] + + lea rsp,QWORD PTR[48+r8] + +$L$SEH_epilogue_mulx_382x:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + + DB 0F3h,0C3h ;repret + +$L$SEH_end_mulx_382x:: +mulx_382x ENDP +PUBLIC sqrx_382x + + +ALIGN 32 +sqrx_382x PROC PUBLIC + DB 243,15,30,250 + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov r11,rsp +$L$SEH_begin_sqrx_382x:: + + + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 +sqr_382x$1:: + push rbp + + push rbx + + push r12 + + push r13 + + push r14 + + push r15 + + push rsi + +$L$SEH_body_sqrx_382x:: + + + mov rcx,rdx + + + mov r14,QWORD PTR[rsi] + mov r15,QWORD PTR[8+rsi] + mov rax,QWORD PTR[16+rsi] + mov rbx,QWORD PTR[24+rsi] + mov rbp,QWORD PTR[32+rsi] + mov rdx,QWORD PTR[40+rsi] + + mov r8,r14 + add r14,QWORD PTR[48+rsi] + mov r9,r15 + adc r15,QWORD PTR[56+rsi] + mov r10,rax + adc rax,QWORD PTR[64+rsi] + mov r11,rbx + adc rbx,QWORD PTR[72+rsi] + mov r12,rbp + adc rbp,QWORD PTR[80+rsi] + mov r13,rdx + adc rdx,QWORD PTR[88+rsi] + + mov QWORD PTR[rdi],r14 + mov QWORD PTR[8+rdi],r15 + mov QWORD PTR[16+rdi],rax + mov QWORD PTR[24+rdi],rbx + mov QWORD PTR[32+rdi],rbp + mov QWORD PTR[40+rdi],rdx + + + lea rdx,QWORD PTR[48+rsi] + lea rdi,QWORD PTR[48+rdi] + call __subx_mod_384_a_is_loaded + + + lea rsi,QWORD PTR[rdi] + lea rbx,QWORD PTR[((-48))+rdi] + lea rdi,QWORD PTR[((-48))+rdi] + call __mulx_384 + + + mov rsi,QWORD PTR[rsp] + lea rbx,QWORD PTR[48+rsi] + lea rdi,QWORD PTR[96+rdi] + call __mulx_384 + + mov r8,QWORD PTR[rdi] + mov r9,QWORD PTR[8+rdi] + mov r10,QWORD PTR[16+rdi] + mov r11,QWORD PTR[24+rdi] + mov r12,QWORD PTR[32+rdi] + mov r13,QWORD PTR[40+rdi] + mov r14,QWORD PTR[48+rdi] + mov r15,QWORD PTR[56+rdi] + mov rax,QWORD PTR[64+rdi] + mov rbx,QWORD PTR[72+rdi] + mov rbp,QWORD PTR[80+rdi] + add r8,r8 + mov rdx,QWORD PTR[88+rdi] + adc r9,r9 + mov QWORD PTR[rdi],r8 + adc r10,r10 + mov QWORD PTR[8+rdi],r9 + adc r11,r11 + mov QWORD PTR[16+rdi],r10 + adc r12,r12 + mov QWORD PTR[24+rdi],r11 + adc r13,r13 + mov QWORD PTR[32+rdi],r12 + adc r14,r14 + mov QWORD PTR[40+rdi],r13 + adc r15,r15 + mov QWORD PTR[48+rdi],r14 + adc rax,rax + mov QWORD PTR[56+rdi],r15 + adc rbx,rbx + mov QWORD PTR[64+rdi],rax + adc rbp,rbp + mov QWORD PTR[72+rdi],rbx + adc rdx,rdx + mov QWORD PTR[80+rdi],rbp + mov QWORD PTR[88+rdi],rdx + + mov r15,QWORD PTR[8+rsp] + + mov r14,QWORD PTR[16+rsp] + + mov r13,QWORD PTR[24+rsp] + + mov r12,QWORD PTR[32+rsp] + + mov rbx,QWORD PTR[40+rsp] + + mov rbp,QWORD PTR[48+rsp] + + lea rsp,QWORD PTR[56+rsp] + +$L$SEH_epilogue_sqrx_382x:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + + DB 0F3h,0C3h ;repret + +$L$SEH_end_sqrx_382x:: +sqrx_382x ENDP +PUBLIC mulx_384 + + +ALIGN 32 +mulx_384 PROC PUBLIC + DB 243,15,30,250 + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov r11,rsp +$L$SEH_begin_mulx_384:: + + + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 +mul_384$1:: + push rbp + + push rbx + + push r12 + + push r13 + + push r14 + + push r15 + +$L$SEH_body_mulx_384:: + + + mov rbx,rdx + call __mulx_384 + + mov r15,QWORD PTR[rsp] + + mov r14,QWORD PTR[8+rsp] + + mov r13,QWORD PTR[16+rsp] + + mov r12,QWORD PTR[24+rsp] + + mov rbx,QWORD PTR[32+rsp] + + mov rbp,QWORD PTR[40+rsp] + + lea rsp,QWORD PTR[48+rsp] + +$L$SEH_epilogue_mulx_384:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + + DB 0F3h,0C3h ;repret + +$L$SEH_end_mulx_384:: +mulx_384 ENDP + + +ALIGN 32 +__mulx_384 PROC PRIVATE + DB 243,15,30,250 + + mov rdx,QWORD PTR[rbx] + mov r14,QWORD PTR[rsi] + mov r15,QWORD PTR[8+rsi] + mov r10,QWORD PTR[16+rsi] + mov r11,QWORD PTR[24+rsi] + mov r12,QWORD PTR[32+rsi] + mov r13,QWORD PTR[40+rsi] + lea rsi,QWORD PTR[((-128))+rsi] + + mulx rcx,r9,r14 + xor rbp,rbp + + mulx rax,r8,r15 + adcx r8,rcx + mov QWORD PTR[rdi],r9 + + mulx rcx,r9,r10 + adcx r9,rax + + mulx rax,r10,r11 + adcx r10,rcx + + mulx rcx,r11,r12 + adcx r11,rax + + mulx r13,r12,r13 + mov rdx,QWORD PTR[8+rbx] + adcx r12,rcx + adcx r13,rbp + mulx rcx,rax,r14 + adcx rax,r8 + adox r9,rcx + mov QWORD PTR[8+rdi],rax + + mulx rcx,r8,r15 + adcx r8,r9 + adox r10,rcx + + mulx rax,r9,QWORD PTR[((128+16))+rsi] + adcx r9,r10 + adox r11,rax + + mulx rcx,r10,QWORD PTR[((128+24))+rsi] + adcx r10,r11 + adox r12,rcx + + mulx rax,r11,QWORD PTR[((128+32))+rsi] + adcx r11,r12 + adox rax,r13 + + mulx r13,r12,QWORD PTR[((128+40))+rsi] + mov rdx,QWORD PTR[16+rbx] + adcx r12,rax + adox r13,rbp + adcx r13,rbp + mulx rcx,rax,r14 + adcx rax,r8 + adox r9,rcx + mov QWORD PTR[16+rdi],rax + + mulx rcx,r8,r15 + adcx r8,r9 + adox r10,rcx + + mulx rax,r9,QWORD PTR[((128+16))+rsi] + adcx r9,r10 + adox r11,rax + + mulx rcx,r10,QWORD PTR[((128+24))+rsi] + adcx r10,r11 + adox r12,rcx + + mulx rax,r11,QWORD PTR[((128+32))+rsi] + adcx r11,r12 + adox rax,r13 + + mulx r13,r12,QWORD PTR[((128+40))+rsi] + mov rdx,QWORD PTR[24+rbx] + adcx r12,rax + adox r13,rbp + adcx r13,rbp + mulx rcx,rax,r14 + adcx rax,r8 + adox r9,rcx + mov QWORD PTR[24+rdi],rax + + mulx rcx,r8,r15 + adcx r8,r9 + adox r10,rcx + + mulx rax,r9,QWORD PTR[((128+16))+rsi] + adcx r9,r10 + adox r11,rax + + mulx rcx,r10,QWORD PTR[((128+24))+rsi] + adcx r10,r11 + adox r12,rcx + + mulx rax,r11,QWORD PTR[((128+32))+rsi] + adcx r11,r12 + adox rax,r13 + + mulx r13,r12,QWORD PTR[((128+40))+rsi] + mov rdx,QWORD PTR[32+rbx] + adcx r12,rax + adox r13,rbp + adcx r13,rbp + mulx rcx,rax,r14 + adcx rax,r8 + adox r9,rcx + mov QWORD PTR[32+rdi],rax + + mulx rcx,r8,r15 + adcx r8,r9 + adox r10,rcx + + mulx rax,r9,QWORD PTR[((128+16))+rsi] + adcx r9,r10 + adox r11,rax + + mulx rcx,r10,QWORD PTR[((128+24))+rsi] + adcx r10,r11 + adox r12,rcx + + mulx rax,r11,QWORD PTR[((128+32))+rsi] + adcx r11,r12 + adox rax,r13 + + mulx r13,r12,QWORD PTR[((128+40))+rsi] + mov rdx,QWORD PTR[40+rbx] + adcx r12,rax + adox r13,rbp + adcx r13,rbp + mulx rcx,rax,r14 + adcx rax,r8 + adox r9,rcx + mov QWORD PTR[40+rdi],rax + + mulx rcx,r8,r15 + adcx r8,r9 + adox r10,rcx + + mulx rax,r9,QWORD PTR[((128+16))+rsi] + adcx r9,r10 + adox r11,rax + + mulx rcx,r10,QWORD PTR[((128+24))+rsi] + adcx r10,r11 + adox r12,rcx + + mulx rax,r11,QWORD PTR[((128+32))+rsi] + adcx r11,r12 + adox rax,r13 + + mulx r13,r12,QWORD PTR[((128+40))+rsi] + mov rdx,rax + adcx r12,rax + adox r13,rbp + adcx r13,rbp + mov QWORD PTR[48+rdi],r8 + mov QWORD PTR[56+rdi],r9 + mov QWORD PTR[64+rdi],r10 + mov QWORD PTR[72+rdi],r11 + mov QWORD PTR[80+rdi],r12 + mov QWORD PTR[88+rdi],r13 + + DB 0F3h,0C3h ;repret +__mulx_384 ENDP +PUBLIC sqrx_384 + + +ALIGN 32 +sqrx_384 PROC PUBLIC + DB 243,15,30,250 + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov r11,rsp +$L$SEH_begin_sqrx_384:: + + + mov rdi,rcx + mov rsi,rdx +sqr_384$1:: + push rbp + + push rbx + + push r12 + + push r13 + + push r14 + + push r15 + + push rdi + +$L$SEH_body_sqrx_384:: + + + call __sqrx_384 + + mov r15,QWORD PTR[8+rsp] + + mov r14,QWORD PTR[16+rsp] + + mov r13,QWORD PTR[24+rsp] + + mov r12,QWORD PTR[32+rsp] + + mov rbx,QWORD PTR[40+rsp] + + mov rbp,QWORD PTR[48+rsp] + + lea rsp,QWORD PTR[56+rsp] + +$L$SEH_epilogue_sqrx_384:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + + DB 0F3h,0C3h ;repret + +$L$SEH_end_sqrx_384:: +sqrx_384 ENDP + +ALIGN 32 +__sqrx_384 PROC PRIVATE + DB 243,15,30,250 + + mov rdx,QWORD PTR[rsi] + mov r14,QWORD PTR[8+rsi] + mov r15,QWORD PTR[16+rsi] + mov rcx,QWORD PTR[24+rsi] + mov rbx,QWORD PTR[32+rsi] + + + mulx rdi,r8,r14 + mov rbp,QWORD PTR[40+rsi] + mulx rax,r9,r15 + add r9,rdi + mulx rdi,r10,rcx + adc r10,rax + mulx rax,r11,rbx + adc r11,rdi + mulx r13,r12,rbp + mov rdx,r14 + adc r12,rax + adc r13,0 + + + xor r14,r14 + mulx rax,rdi,r15 + adcx r10,rdi + adox r11,rax + + mulx rax,rdi,rcx + adcx r11,rdi + adox r12,rax + + mulx rax,rdi,rbx + adcx r12,rdi + adox r13,rax + + mulx rax,rdi,rbp + mov rdx,r15 + adcx r13,rdi + adox rax,r14 + adcx r14,rax + + + xor r15,r15 + mulx rax,rdi,rcx + adcx r12,rdi + adox r13,rax + + mulx rax,rdi,rbx + adcx r13,rdi + adox r14,rax + + mulx rax,rdi,rbp + mov rdx,rcx + adcx r14,rdi + adox rax,r15 + adcx r15,rax + + + xor rcx,rcx + mulx rax,rdi,rbx + adcx r14,rdi + adox r15,rax + + mulx rax,rdi,rbp + mov rdx,rbx + adcx r15,rdi + adox rax,rcx + adcx rcx,rax + + + mulx rbx,rdi,rbp + mov rdx,QWORD PTR[rsi] + add rcx,rdi + mov rdi,QWORD PTR[8+rsp] + adc rbx,0 + + + xor rbp,rbp + adcx r8,r8 + adcx r9,r9 + adcx r10,r10 + adcx r11,r11 + adcx r12,r12 + + + mulx rax,rdx,rdx + mov QWORD PTR[rdi],rdx + mov rdx,QWORD PTR[8+rsi] + adox r8,rax + mov QWORD PTR[8+rdi],r8 + + mulx rax,r8,rdx + mov rdx,QWORD PTR[16+rsi] + adox r9,r8 + adox r10,rax + mov QWORD PTR[16+rdi],r9 + mov QWORD PTR[24+rdi],r10 + + mulx r9,r8,rdx + mov rdx,QWORD PTR[24+rsi] + adox r11,r8 + adox r12,r9 + adcx r13,r13 + adcx r14,r14 + mov QWORD PTR[32+rdi],r11 + mov QWORD PTR[40+rdi],r12 + + mulx r9,r8,rdx + mov rdx,QWORD PTR[32+rsi] + adox r13,r8 + adox r14,r9 + adcx r15,r15 + adcx rcx,rcx + mov QWORD PTR[48+rdi],r13 + mov QWORD PTR[56+rdi],r14 + + mulx r9,r8,rdx + mov rdx,QWORD PTR[40+rsi] + adox r15,r8 + adox rcx,r9 + adcx rbx,rbx + adcx rbp,rbp + mov QWORD PTR[64+rdi],r15 + mov QWORD PTR[72+rdi],rcx + + mulx r9,r8,rdx + adox rbx,r8 + adox rbp,r9 + + mov QWORD PTR[80+rdi],rbx + mov QWORD PTR[88+rdi],rbp + + DB 0F3h,0C3h ;repret +__sqrx_384 ENDP + + + +PUBLIC redcx_mont_384 + + +ALIGN 32 +redcx_mont_384 PROC PUBLIC + DB 243,15,30,250 + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov r11,rsp +$L$SEH_begin_redcx_mont_384:: + + + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 +redc_mont_384$1:: + push rbp + + push rbx + + push r12 + + push r13 + + push r14 + + push r15 + + sub rsp,8 + +$L$SEH_body_redcx_mont_384:: + + + mov rbx,rdx + call __mulx_by_1_mont_384 + call __redx_tail_mont_384 + + mov r15,QWORD PTR[8+rsp] + + mov r14,QWORD PTR[16+rsp] + + mov r13,QWORD PTR[24+rsp] + + mov r12,QWORD PTR[32+rsp] + + mov rbx,QWORD PTR[40+rsp] + + mov rbp,QWORD PTR[48+rsp] + + lea rsp,QWORD PTR[56+rsp] + +$L$SEH_epilogue_redcx_mont_384:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + + DB 0F3h,0C3h ;repret + +$L$SEH_end_redcx_mont_384:: +redcx_mont_384 ENDP + + + + +PUBLIC fromx_mont_384 + + +ALIGN 32 +fromx_mont_384 PROC PUBLIC + DB 243,15,30,250 + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov r11,rsp +$L$SEH_begin_fromx_mont_384:: + + + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 +from_mont_384$1:: + push rbp + + push rbx + + push r12 + + push r13 + + push r14 + + push r15 + + sub rsp,8 + +$L$SEH_body_fromx_mont_384:: + + + mov rbx,rdx + call __mulx_by_1_mont_384 + + + + + mov rax,r14 + mov rcx,r15 + mov rdx,r8 + mov rbp,r9 + + sub r14,QWORD PTR[rbx] + sbb r15,QWORD PTR[8+rbx] + mov r13,r10 + sbb r8,QWORD PTR[16+rbx] + sbb r9,QWORD PTR[24+rbx] + sbb r10,QWORD PTR[32+rbx] + mov rsi,r11 + sbb r11,QWORD PTR[40+rbx] + + cmovc r14,rax + cmovc r15,rcx + cmovc r8,rdx + mov QWORD PTR[rdi],r14 + cmovc r9,rbp + mov QWORD PTR[8+rdi],r15 + cmovc r10,r13 + mov QWORD PTR[16+rdi],r8 + cmovc r11,rsi + mov QWORD PTR[24+rdi],r9 + mov QWORD PTR[32+rdi],r10 + mov QWORD PTR[40+rdi],r11 + + mov r15,QWORD PTR[8+rsp] + + mov r14,QWORD PTR[16+rsp] + + mov r13,QWORD PTR[24+rsp] + + mov r12,QWORD PTR[32+rsp] + + mov rbx,QWORD PTR[40+rsp] + + mov rbp,QWORD PTR[48+rsp] + + lea rsp,QWORD PTR[56+rsp] + +$L$SEH_epilogue_fromx_mont_384:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + + DB 0F3h,0C3h ;repret + +$L$SEH_end_fromx_mont_384:: +fromx_mont_384 ENDP + +ALIGN 32 +__mulx_by_1_mont_384 PROC PRIVATE + DB 243,15,30,250 + + mov r8,QWORD PTR[rsi] + mov rdx,rcx + mov r9,QWORD PTR[8+rsi] + mov r10,QWORD PTR[16+rsi] + mov r11,QWORD PTR[24+rsi] + mov r12,QWORD PTR[32+rsi] + mov r13,QWORD PTR[40+rsi] + imul rdx,r8 + + + xor r14,r14 + mulx rbp,rax,QWORD PTR[rbx] + adcx r8,rax + adox r9,rbp + + mulx rbp,rax,QWORD PTR[8+rbx] + adcx r9,rax + adox r10,rbp + + mulx rbp,rax,QWORD PTR[16+rbx] + adcx r10,rax + adox r11,rbp + + mulx rbp,rax,QWORD PTR[24+rbx] + adcx r11,rax + adox r12,rbp + + mulx rbp,rax,QWORD PTR[32+rbx] + adcx r12,rax + adox r13,rbp + + mulx rbp,rax,QWORD PTR[40+rbx] + mov rdx,rcx + adcx r13,rax + adox rbp,r14 + adcx r14,rbp + imul rdx,r9 + + + xor r15,r15 + mulx rbp,rax,QWORD PTR[rbx] + adcx r9,rax + adox r10,rbp + + mulx rbp,rax,QWORD PTR[8+rbx] + adcx r10,rax + adox r11,rbp + + mulx rbp,rax,QWORD PTR[16+rbx] + adcx r11,rax + adox r12,rbp + + mulx rbp,rax,QWORD PTR[24+rbx] + adcx r12,rax + adox r13,rbp + + mulx rbp,rax,QWORD PTR[32+rbx] + adcx r13,rax + adox r14,rbp + + mulx rbp,rax,QWORD PTR[40+rbx] + mov rdx,rcx + adcx r14,rax + adox rbp,r15 + adcx r15,rbp + imul rdx,r10 + + + xor r8,r8 + mulx rbp,rax,QWORD PTR[rbx] + adcx r10,rax + adox r11,rbp + + mulx rbp,rax,QWORD PTR[8+rbx] + adcx r11,rax + adox r12,rbp + + mulx rbp,rax,QWORD PTR[16+rbx] + adcx r12,rax + adox r13,rbp + + mulx rbp,rax,QWORD PTR[24+rbx] + adcx r13,rax + adox r14,rbp + + mulx rbp,rax,QWORD PTR[32+rbx] + adcx r14,rax + adox r15,rbp + + mulx rbp,rax,QWORD PTR[40+rbx] + mov rdx,rcx + adcx r15,rax + adox rbp,r8 + adcx r8,rbp + imul rdx,r11 + + + xor r9,r9 + mulx rbp,rax,QWORD PTR[rbx] + adcx r11,rax + adox r12,rbp + + mulx rbp,rax,QWORD PTR[8+rbx] + adcx r12,rax + adox r13,rbp + + mulx rbp,rax,QWORD PTR[16+rbx] + adcx r13,rax + adox r14,rbp + + mulx rbp,rax,QWORD PTR[24+rbx] + adcx r14,rax + adox r15,rbp + + mulx rbp,rax,QWORD PTR[32+rbx] + adcx r15,rax + adox r8,rbp + + mulx rbp,rax,QWORD PTR[40+rbx] + mov rdx,rcx + adcx r8,rax + adox rbp,r9 + adcx r9,rbp + imul rdx,r12 + + + xor r10,r10 + mulx rbp,rax,QWORD PTR[rbx] + adcx r12,rax + adox r13,rbp + + mulx rbp,rax,QWORD PTR[8+rbx] + adcx r13,rax + adox r14,rbp + + mulx rbp,rax,QWORD PTR[16+rbx] + adcx r14,rax + adox r15,rbp + + mulx rbp,rax,QWORD PTR[24+rbx] + adcx r15,rax + adox r8,rbp + + mulx rbp,rax,QWORD PTR[32+rbx] + adcx r8,rax + adox r9,rbp + + mulx rbp,rax,QWORD PTR[40+rbx] + mov rdx,rcx + adcx r9,rax + adox rbp,r10 + adcx r10,rbp + imul rdx,r13 + + + xor r11,r11 + mulx rbp,rax,QWORD PTR[rbx] + adcx r13,rax + adox r14,rbp + + mulx rbp,rax,QWORD PTR[8+rbx] + adcx r14,rax + adox r15,rbp + + mulx rbp,rax,QWORD PTR[16+rbx] + adcx r15,rax + adox r8,rbp + + mulx rbp,rax,QWORD PTR[24+rbx] + adcx r8,rax + adox r9,rbp + + mulx rbp,rax,QWORD PTR[32+rbx] + adcx r9,rax + adox r10,rbp + + mulx rbp,rax,QWORD PTR[40+rbx] + mov rdx,rcx + adcx r10,rax + adox rbp,r11 + adcx r11,rbp + DB 0F3h,0C3h ;repret +__mulx_by_1_mont_384 ENDP + + +ALIGN 32 +__redx_tail_mont_384 PROC PRIVATE + DB 243,15,30,250 + + add r14,QWORD PTR[48+rsi] + mov rax,r14 + adc r15,QWORD PTR[56+rsi] + adc r8,QWORD PTR[64+rsi] + adc r9,QWORD PTR[72+rsi] + mov rcx,r15 + adc r10,QWORD PTR[80+rsi] + adc r11,QWORD PTR[88+rsi] + sbb r12,r12 + + + + + mov rdx,r8 + mov rbp,r9 + + sub r14,QWORD PTR[rbx] + sbb r15,QWORD PTR[8+rbx] + mov r13,r10 + sbb r8,QWORD PTR[16+rbx] + sbb r9,QWORD PTR[24+rbx] + sbb r10,QWORD PTR[32+rbx] + mov rsi,r11 + sbb r11,QWORD PTR[40+rbx] + sbb r12,0 + + cmovc r14,rax + cmovc r15,rcx + cmovc r8,rdx + mov QWORD PTR[rdi],r14 + cmovc r9,rbp + mov QWORD PTR[8+rdi],r15 + cmovc r10,r13 + mov QWORD PTR[16+rdi],r8 + cmovc r11,rsi + mov QWORD PTR[24+rdi],r9 + mov QWORD PTR[32+rdi],r10 + mov QWORD PTR[40+rdi],r11 + + DB 0F3h,0C3h ;repret +__redx_tail_mont_384 ENDP + +PUBLIC sgn0x_pty_mont_384 + + +ALIGN 32 +sgn0x_pty_mont_384 PROC PUBLIC + DB 243,15,30,250 + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov r11,rsp +$L$SEH_begin_sgn0x_pty_mont_384:: + + + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 +sgn0_pty_mont_384$1:: + push rbp + + push rbx + + push r12 + + push r13 + + push r14 + + push r15 + + sub rsp,8 + +$L$SEH_body_sgn0x_pty_mont_384:: + + + mov rbx,rsi + lea rsi,QWORD PTR[rdi] + mov rcx,rdx + call __mulx_by_1_mont_384 + + xor rax,rax + mov r13,r14 + add r14,r14 + adc r15,r15 + adc r8,r8 + adc r9,r9 + adc r10,r10 + adc r11,r11 + adc rax,0 + + sub r14,QWORD PTR[rbx] + sbb r15,QWORD PTR[8+rbx] + sbb r8,QWORD PTR[16+rbx] + sbb r9,QWORD PTR[24+rbx] + sbb r10,QWORD PTR[32+rbx] + sbb r11,QWORD PTR[40+rbx] + sbb rax,0 + + not rax + and r13,1 + and rax,2 + or rax,r13 + + mov r15,QWORD PTR[8+rsp] + + mov r14,QWORD PTR[16+rsp] + + mov r13,QWORD PTR[24+rsp] + + mov r12,QWORD PTR[32+rsp] + + mov rbx,QWORD PTR[40+rsp] + + mov rbp,QWORD PTR[48+rsp] + + lea rsp,QWORD PTR[56+rsp] + +$L$SEH_epilogue_sgn0x_pty_mont_384:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + + DB 0F3h,0C3h ;repret + +$L$SEH_end_sgn0x_pty_mont_384:: +sgn0x_pty_mont_384 ENDP + +PUBLIC sgn0x_pty_mont_384x + + +ALIGN 32 +sgn0x_pty_mont_384x PROC PUBLIC + DB 243,15,30,250 + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov r11,rsp +$L$SEH_begin_sgn0x_pty_mont_384x:: + + + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 +sgn0_pty_mont_384x$1:: + push rbp + + push rbx + + push r12 + + push r13 + + push r14 + + push r15 + + sub rsp,8 + +$L$SEH_body_sgn0x_pty_mont_384x:: + + + mov rbx,rsi + lea rsi,QWORD PTR[48+rdi] + mov rcx,rdx + call __mulx_by_1_mont_384 + + mov r12,r14 + or r14,r15 + or r14,r8 + or r14,r9 + or r14,r10 + or r14,r11 + + lea rsi,QWORD PTR[rdi] + xor rdi,rdi + mov r13,r12 + add r12,r12 + adc r15,r15 + adc r8,r8 + adc r9,r9 + adc r10,r10 + adc r11,r11 + adc rdi,0 + + sub r12,QWORD PTR[rbx] + sbb r15,QWORD PTR[8+rbx] + sbb r8,QWORD PTR[16+rbx] + sbb r9,QWORD PTR[24+rbx] + sbb r10,QWORD PTR[32+rbx] + sbb r11,QWORD PTR[40+rbx] + sbb rdi,0 + + mov QWORD PTR[rsp],r14 + not rdi + and r13,1 + and rdi,2 + or rdi,r13 + + call __mulx_by_1_mont_384 + + mov r12,r14 + or r14,r15 + or r14,r8 + or r14,r9 + or r14,r10 + or r14,r11 + + xor rax,rax + mov r13,r12 + add r12,r12 + adc r15,r15 + adc r8,r8 + adc r9,r9 + adc r10,r10 + adc r11,r11 + adc rax,0 + + sub r12,QWORD PTR[rbx] + sbb r15,QWORD PTR[8+rbx] + sbb r8,QWORD PTR[16+rbx] + sbb r9,QWORD PTR[24+rbx] + sbb r10,QWORD PTR[32+rbx] + sbb r11,QWORD PTR[40+rbx] + sbb rax,0 + + mov r12,QWORD PTR[rsp] + + not rax + + test r14,r14 + cmovz r13,rdi + + test r12,r12 + cmovnz rax,rdi + + and r13,1 + and rax,2 + or rax,r13 + + mov r15,QWORD PTR[8+rsp] + + mov r14,QWORD PTR[16+rsp] + + mov r13,QWORD PTR[24+rsp] + + mov r12,QWORD PTR[32+rsp] + + mov rbx,QWORD PTR[40+rsp] + + mov rbp,QWORD PTR[48+rsp] + + lea rsp,QWORD PTR[56+rsp] + +$L$SEH_epilogue_sgn0x_pty_mont_384x:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + + DB 0F3h,0C3h ;repret + +$L$SEH_end_sgn0x_pty_mont_384x:: +sgn0x_pty_mont_384x ENDP +PUBLIC mulx_mont_384 + + +ALIGN 32 +mulx_mont_384 PROC PUBLIC + DB 243,15,30,250 + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov r11,rsp +$L$SEH_begin_mulx_mont_384:: + + + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD PTR[40+rsp] +mul_mont_384$1:: + push rbp + + push rbx + + push r12 + + push r13 + + push r14 + + push r15 + + lea rsp,QWORD PTR[((-24))+rsp] + +$L$SEH_body_mulx_mont_384:: + + + mov rbx,rdx + mov rdx,QWORD PTR[rdx] + mov r14,QWORD PTR[rsi] + mov r15,QWORD PTR[8+rsi] + mov rax,QWORD PTR[16+rsi] + mov r12,QWORD PTR[24+rsi] + mov QWORD PTR[16+rsp],rdi + mov rdi,QWORD PTR[32+rsi] + mov rbp,QWORD PTR[40+rsi] + lea rsi,QWORD PTR[((-128))+rsi] + lea rcx,QWORD PTR[((-128))+rcx] + mov QWORD PTR[rsp],r8 + + mulx r9,r8,r14 + call __mulx_mont_384 + + mov r15,QWORD PTR[24+rsp] + + mov r14,QWORD PTR[32+rsp] + + mov r13,QWORD PTR[40+rsp] + + mov r12,QWORD PTR[48+rsp] + + mov rbx,QWORD PTR[56+rsp] + + mov rbp,QWORD PTR[64+rsp] + + lea rsp,QWORD PTR[72+rsp] + +$L$SEH_epilogue_mulx_mont_384:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + + DB 0F3h,0C3h ;repret + +$L$SEH_end_mulx_mont_384:: +mulx_mont_384 ENDP + +ALIGN 32 +__mulx_mont_384 PROC PRIVATE + DB 243,15,30,250 + + + mulx r10,r14,r15 + mulx r11,r15,rax + add r9,r14 + mulx r12,rax,r12 + adc r10,r15 + mulx r13,rdi,rdi + adc r11,rax + mulx r14,rbp,rbp + mov rdx,QWORD PTR[8+rbx] + adc r12,rdi + adc r13,rbp + adc r14,0 + xor r15,r15 + + mov QWORD PTR[16+rsp],r8 + imul r8,QWORD PTR[8+rsp] + + + xor rax,rax + mulx rbp,rdi,QWORD PTR[((0+128))+rsi] + adox r9,rdi + adcx r10,rbp + + mulx rbp,rdi,QWORD PTR[((8+128))+rsi] + adox r10,rdi + adcx r11,rbp + + mulx rbp,rdi,QWORD PTR[((16+128))+rsi] + adox r11,rdi + adcx r12,rbp + + mulx rbp,rdi,QWORD PTR[((24+128))+rsi] + adox r12,rdi + adcx r13,rbp + + mulx rbp,rdi,QWORD PTR[((32+128))+rsi] + adox r13,rdi + adcx r14,rbp + + mulx rbp,rdi,QWORD PTR[((40+128))+rsi] + mov rdx,r8 + adox r14,rdi + adcx r15,rbp + adox r15,rax + adox rax,rax + + + xor r8,r8 + mulx rbp,rdi,QWORD PTR[((0+128))+rcx] + adcx rdi,QWORD PTR[16+rsp] + adox r9,rbp + + mulx rbp,rdi,QWORD PTR[((8+128))+rcx] + adcx r9,rdi + adox r10,rbp + + mulx rbp,rdi,QWORD PTR[((16+128))+rcx] + adcx r10,rdi + adox r11,rbp + + mulx rbp,rdi,QWORD PTR[((24+128))+rcx] + adcx r11,rdi + adox r12,rbp + + mulx rbp,rdi,QWORD PTR[((32+128))+rcx] + adcx r12,rdi + adox r13,rbp + + mulx rbp,rdi,QWORD PTR[((40+128))+rcx] + mov rdx,QWORD PTR[16+rbx] + adcx r13,rdi + adox r14,rbp + adcx r14,r8 + adox r15,r8 + adcx r15,r8 + adox rax,r8 + adcx rax,r8 + mov QWORD PTR[16+rsp],r9 + imul r9,QWORD PTR[8+rsp] + + + xor r8,r8 + mulx rbp,rdi,QWORD PTR[((0+128))+rsi] + adox r10,rdi + adcx r11,rbp + + mulx rbp,rdi,QWORD PTR[((8+128))+rsi] + adox r11,rdi + adcx r12,rbp + + mulx rbp,rdi,QWORD PTR[((16+128))+rsi] + adox r12,rdi + adcx r13,rbp + + mulx rbp,rdi,QWORD PTR[((24+128))+rsi] + adox r13,rdi + adcx r14,rbp + + mulx rbp,rdi,QWORD PTR[((32+128))+rsi] + adox r14,rdi + adcx r15,rbp + + mulx rbp,rdi,QWORD PTR[((40+128))+rsi] + mov rdx,r9 + adox r15,rdi + adcx rax,rbp + adox rax,r8 + adox r8,r8 + + + xor r9,r9 + mulx rbp,rdi,QWORD PTR[((0+128))+rcx] + adcx rdi,QWORD PTR[16+rsp] + adox r10,rbp + + mulx rbp,rdi,QWORD PTR[((8+128))+rcx] + adcx r10,rdi + adox r11,rbp + + mulx rbp,rdi,QWORD PTR[((16+128))+rcx] + adcx r11,rdi + adox r12,rbp + + mulx rbp,rdi,QWORD PTR[((24+128))+rcx] + adcx r12,rdi + adox r13,rbp + + mulx rbp,rdi,QWORD PTR[((32+128))+rcx] + adcx r13,rdi + adox r14,rbp + + mulx rbp,rdi,QWORD PTR[((40+128))+rcx] + mov rdx,QWORD PTR[24+rbx] + adcx r14,rdi + adox r15,rbp + adcx r15,r9 + adox rax,r9 + adcx rax,r9 + adox r8,r9 + adcx r8,r9 + mov QWORD PTR[16+rsp],r10 + imul r10,QWORD PTR[8+rsp] + + + xor r9,r9 + mulx rbp,rdi,QWORD PTR[((0+128))+rsi] + adox r11,rdi + adcx r12,rbp + + mulx rbp,rdi,QWORD PTR[((8+128))+rsi] + adox r12,rdi + adcx r13,rbp + + mulx rbp,rdi,QWORD PTR[((16+128))+rsi] + adox r13,rdi + adcx r14,rbp + + mulx rbp,rdi,QWORD PTR[((24+128))+rsi] + adox r14,rdi + adcx r15,rbp + + mulx rbp,rdi,QWORD PTR[((32+128))+rsi] + adox r15,rdi + adcx rax,rbp + + mulx rbp,rdi,QWORD PTR[((40+128))+rsi] + mov rdx,r10 + adox rax,rdi + adcx r8,rbp + adox r8,r9 + adox r9,r9 + + + xor r10,r10 + mulx rbp,rdi,QWORD PTR[((0+128))+rcx] + adcx rdi,QWORD PTR[16+rsp] + adox r11,rbp + + mulx rbp,rdi,QWORD PTR[((8+128))+rcx] + adcx r11,rdi + adox r12,rbp + + mulx rbp,rdi,QWORD PTR[((16+128))+rcx] + adcx r12,rdi + adox r13,rbp + + mulx rbp,rdi,QWORD PTR[((24+128))+rcx] + adcx r13,rdi + adox r14,rbp + + mulx rbp,rdi,QWORD PTR[((32+128))+rcx] + adcx r14,rdi + adox r15,rbp + + mulx rbp,rdi,QWORD PTR[((40+128))+rcx] + mov rdx,QWORD PTR[32+rbx] + adcx r15,rdi + adox rax,rbp + adcx rax,r10 + adox r8,r10 + adcx r8,r10 + adox r9,r10 + adcx r9,r10 + mov QWORD PTR[16+rsp],r11 + imul r11,QWORD PTR[8+rsp] + + + xor r10,r10 + mulx rbp,rdi,QWORD PTR[((0+128))+rsi] + adox r12,rdi + adcx r13,rbp + + mulx rbp,rdi,QWORD PTR[((8+128))+rsi] + adox r13,rdi + adcx r14,rbp + + mulx rbp,rdi,QWORD PTR[((16+128))+rsi] + adox r14,rdi + adcx r15,rbp + + mulx rbp,rdi,QWORD PTR[((24+128))+rsi] + adox r15,rdi + adcx rax,rbp + + mulx rbp,rdi,QWORD PTR[((32+128))+rsi] + adox rax,rdi + adcx r8,rbp + + mulx rbp,rdi,QWORD PTR[((40+128))+rsi] + mov rdx,r11 + adox r8,rdi + adcx r9,rbp + adox r9,r10 + adox r10,r10 + + + xor r11,r11 + mulx rbp,rdi,QWORD PTR[((0+128))+rcx] + adcx rdi,QWORD PTR[16+rsp] + adox r12,rbp + + mulx rbp,rdi,QWORD PTR[((8+128))+rcx] + adcx r12,rdi + adox r13,rbp + + mulx rbp,rdi,QWORD PTR[((16+128))+rcx] + adcx r13,rdi + adox r14,rbp + + mulx rbp,rdi,QWORD PTR[((24+128))+rcx] + adcx r14,rdi + adox r15,rbp + + mulx rbp,rdi,QWORD PTR[((32+128))+rcx] + adcx r15,rdi + adox rax,rbp + + mulx rbp,rdi,QWORD PTR[((40+128))+rcx] + mov rdx,QWORD PTR[40+rbx] + adcx rax,rdi + adox r8,rbp + adcx r8,r11 + adox r9,r11 + adcx r9,r11 + adox r10,r11 + adcx r10,r11 + mov QWORD PTR[16+rsp],r12 + imul r12,QWORD PTR[8+rsp] + + + xor r11,r11 + mulx rbp,rdi,QWORD PTR[((0+128))+rsi] + adox r13,rdi + adcx r14,rbp + + mulx rbp,rdi,QWORD PTR[((8+128))+rsi] + adox r14,rdi + adcx r15,rbp + + mulx rbp,rdi,QWORD PTR[((16+128))+rsi] + adox r15,rdi + adcx rax,rbp + + mulx rbp,rdi,QWORD PTR[((24+128))+rsi] + adox rax,rdi + adcx r8,rbp + + mulx rbp,rdi,QWORD PTR[((32+128))+rsi] + adox r8,rdi + adcx r9,rbp + + mulx rbp,rdi,QWORD PTR[((40+128))+rsi] + mov rdx,r12 + adox r9,rdi + adcx r10,rbp + adox r10,r11 + adox r11,r11 + + + xor r12,r12 + mulx rbp,rdi,QWORD PTR[((0+128))+rcx] + adcx rdi,QWORD PTR[16+rsp] + adox r13,rbp + + mulx rbp,rdi,QWORD PTR[((8+128))+rcx] + adcx r13,rdi + adox r14,rbp + + mulx rbp,rdi,QWORD PTR[((16+128))+rcx] + adcx r14,rdi + adox r15,rbp + + mulx rbp,rdi,QWORD PTR[((24+128))+rcx] + adcx r15,rdi + adox rax,rbp + + mulx rbp,rdi,QWORD PTR[((32+128))+rcx] + adcx rax,rdi + adox r8,rbp + + mulx rbp,rdi,QWORD PTR[((40+128))+rcx] + mov rdx,r13 + adcx r8,rdi + adox r9,rbp + adcx r9,r12 + adox r10,r12 + adcx r10,r12 + adox r11,r12 + adcx r11,r12 + imul rdx,QWORD PTR[8+rsp] + mov rbx,QWORD PTR[24+rsp] + + + xor r12,r12 + mulx rbp,rdi,QWORD PTR[((0+128))+rcx] + adcx r13,rdi + adox r14,rbp + + mulx rbp,rdi,QWORD PTR[((8+128))+rcx] + adcx r14,rdi + adox r15,rbp + + mulx rbp,rdi,QWORD PTR[((16+128))+rcx] + adcx r15,rdi + adox rax,rbp + + mulx rbp,rdi,QWORD PTR[((24+128))+rcx] + adcx rax,rdi + adox r8,rbp + mov r13,r15 + + mulx rbp,rdi,QWORD PTR[((32+128))+rcx] + adcx r8,rdi + adox r9,rbp + mov rsi,rax + + mulx rbp,rdi,QWORD PTR[((40+128))+rcx] + adcx r9,rdi + adox r10,rbp + mov rdx,r14 + adcx r10,r12 + adox r11,r12 + lea rcx,QWORD PTR[128+rcx] + mov r12,r8 + adc r11,0 + + + + + sub r14,QWORD PTR[rcx] + sbb r15,QWORD PTR[8+rcx] + mov rdi,r9 + sbb rax,QWORD PTR[16+rcx] + sbb r8,QWORD PTR[24+rcx] + sbb r9,QWORD PTR[32+rcx] + mov rbp,r10 + sbb r10,QWORD PTR[40+rcx] + sbb r11,0 + + cmovnc rdx,r14 + cmovc r15,r13 + cmovc rax,rsi + cmovnc r12,r8 + mov QWORD PTR[rbx],rdx + cmovnc rdi,r9 + mov QWORD PTR[8+rbx],r15 + cmovnc rbp,r10 + mov QWORD PTR[16+rbx],rax + mov QWORD PTR[24+rbx],r12 + mov QWORD PTR[32+rbx],rdi + mov QWORD PTR[40+rbx],rbp + + DB 0F3h,0C3h ;repret + +__mulx_mont_384 ENDP +PUBLIC sqrx_mont_384 + + +ALIGN 32 +sqrx_mont_384 PROC PUBLIC + DB 243,15,30,250 + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov r11,rsp +$L$SEH_begin_sqrx_mont_384:: + + + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 +sqr_mont_384$1:: + push rbp + + push rbx + + push r12 + + push r13 + + push r14 + + push r15 + + lea rsp,QWORD PTR[((-24))+rsp] + +$L$SEH_body_sqrx_mont_384:: + + + mov r8,rcx + lea rcx,QWORD PTR[((-128))+rdx] + mov rdx,QWORD PTR[rsi] + mov r15,QWORD PTR[8+rsi] + mov rax,QWORD PTR[16+rsi] + mov r12,QWORD PTR[24+rsi] + mov QWORD PTR[16+rsp],rdi + mov rdi,QWORD PTR[32+rsi] + mov rbp,QWORD PTR[40+rsi] + + lea rbx,QWORD PTR[rsi] + mov QWORD PTR[rsp],r8 + lea rsi,QWORD PTR[((-128))+rsi] + + mulx r9,r8,rdx + call __mulx_mont_384 + + mov r15,QWORD PTR[24+rsp] + + mov r14,QWORD PTR[32+rsp] + + mov r13,QWORD PTR[40+rsp] + + mov r12,QWORD PTR[48+rsp] + + mov rbx,QWORD PTR[56+rsp] + + mov rbp,QWORD PTR[64+rsp] + + lea rsp,QWORD PTR[72+rsp] + +$L$SEH_epilogue_sqrx_mont_384:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + + DB 0F3h,0C3h ;repret + +$L$SEH_end_sqrx_mont_384:: +sqrx_mont_384 ENDP + +PUBLIC sqrx_n_mul_mont_384 + + +ALIGN 32 +sqrx_n_mul_mont_384 PROC PUBLIC + DB 243,15,30,250 + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov r11,rsp +$L$SEH_begin_sqrx_n_mul_mont_384:: + + + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD PTR[40+rsp] + mov r9,QWORD PTR[48+rsp] +sqr_n_mul_mont_384$1:: + push rbp + + push rbx + + push r12 + + push r13 + + push r14 + + push r15 + + lea rsp,QWORD PTR[((-40))+rsp] + +$L$SEH_body_sqrx_n_mul_mont_384:: + + + mov r10,rdx + mov rdx,QWORD PTR[rsi] + mov r15,QWORD PTR[8+rsi] + mov rax,QWORD PTR[16+rsi] + mov rbx,rsi + mov r12,QWORD PTR[24+rsi] + mov QWORD PTR[16+rsp],rdi + mov rdi,QWORD PTR[32+rsi] + mov rbp,QWORD PTR[40+rsi] + + mov QWORD PTR[rsp],r8 + mov QWORD PTR[24+rsp],r9 + movq xmm2,QWORD PTR[r9] + +$L$oop_sqrx_384:: + movd xmm1,r10d + lea rsi,QWORD PTR[((-128))+rbx] + lea rcx,QWORD PTR[((-128))+rcx] + + mulx r9,r8,rdx + call __mulx_mont_384 + + movd r10d,xmm1 + dec r10d + jnz $L$oop_sqrx_384 + + mov r14,rdx +DB 102,72,15,126,210 + lea rsi,QWORD PTR[((-128))+rbx] + mov rbx,QWORD PTR[24+rsp] + lea rcx,QWORD PTR[((-128))+rcx] + + mulx r9,r8,r14 + call __mulx_mont_384 + + mov r15,QWORD PTR[40+rsp] + + mov r14,QWORD PTR[48+rsp] + + mov r13,QWORD PTR[56+rsp] + + mov r12,QWORD PTR[64+rsp] + + mov rbx,QWORD PTR[72+rsp] + + mov rbp,QWORD PTR[80+rsp] + + lea rsp,QWORD PTR[88+rsp] + +$L$SEH_epilogue_sqrx_n_mul_mont_384:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + + DB 0F3h,0C3h ;repret + +$L$SEH_end_sqrx_n_mul_mont_384:: +sqrx_n_mul_mont_384 ENDP + +PUBLIC sqrx_n_mul_mont_383 + + +ALIGN 32 +sqrx_n_mul_mont_383 PROC PUBLIC + DB 243,15,30,250 + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov r11,rsp +$L$SEH_begin_sqrx_n_mul_mont_383:: + + + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD PTR[40+rsp] + mov r9,QWORD PTR[48+rsp] +sqr_n_mul_mont_383$1:: + push rbp + + push rbx + + push r12 + + push r13 + + push r14 + + push r15 + + lea rsp,QWORD PTR[((-40))+rsp] + +$L$SEH_body_sqrx_n_mul_mont_383:: + + + mov r10,rdx + mov rdx,QWORD PTR[rsi] + mov r15,QWORD PTR[8+rsi] + mov rax,QWORD PTR[16+rsi] + mov rbx,rsi + mov r12,QWORD PTR[24+rsi] + mov QWORD PTR[16+rsp],rdi + mov rdi,QWORD PTR[32+rsi] + mov rbp,QWORD PTR[40+rsi] + + mov QWORD PTR[rsp],r8 + mov QWORD PTR[24+rsp],r9 + movq xmm2,QWORD PTR[r9] + lea rcx,QWORD PTR[((-128))+rcx] + +$L$oop_sqrx_383:: + movd xmm1,r10d + lea rsi,QWORD PTR[((-128))+rbx] + + mulx r9,r8,rdx + call __mulx_mont_383_nonred + + movd r10d,xmm1 + dec r10d + jnz $L$oop_sqrx_383 + + mov r14,rdx +DB 102,72,15,126,210 + lea rsi,QWORD PTR[((-128))+rbx] + mov rbx,QWORD PTR[24+rsp] + + mulx r9,r8,r14 + call __mulx_mont_384 + + mov r15,QWORD PTR[40+rsp] + + mov r14,QWORD PTR[48+rsp] + + mov r13,QWORD PTR[56+rsp] + + mov r12,QWORD PTR[64+rsp] + + mov rbx,QWORD PTR[72+rsp] + + mov rbp,QWORD PTR[80+rsp] + + lea rsp,QWORD PTR[88+rsp] + +$L$SEH_epilogue_sqrx_n_mul_mont_383:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + + DB 0F3h,0C3h ;repret + +$L$SEH_end_sqrx_n_mul_mont_383:: +sqrx_n_mul_mont_383 ENDP + +ALIGN 32 +__mulx_mont_383_nonred PROC PRIVATE + DB 243,15,30,250 + + + mulx r10,r14,r15 + mulx r11,r15,rax + add r9,r14 + mulx r12,rax,r12 + adc r10,r15 + mulx r13,rdi,rdi + adc r11,rax + mulx r14,rbp,rbp + mov rdx,QWORD PTR[8+rbx] + adc r12,rdi + adc r13,rbp + adc r14,0 + mov rax,r8 + imul r8,QWORD PTR[8+rsp] + + + xor r15,r15 + mulx rbp,rdi,QWORD PTR[((0+128))+rsi] + adox r9,rdi + adcx r10,rbp + + mulx rbp,rdi,QWORD PTR[((8+128))+rsi] + adox r10,rdi + adcx r11,rbp + + mulx rbp,rdi,QWORD PTR[((16+128))+rsi] + adox r11,rdi + adcx r12,rbp + + mulx rbp,rdi,QWORD PTR[((24+128))+rsi] + adox r12,rdi + adcx r13,rbp + + mulx rbp,rdi,QWORD PTR[((32+128))+rsi] + adox r13,rdi + adcx r14,rbp + + mulx rbp,rdi,QWORD PTR[((40+128))+rsi] + mov rdx,r8 + adox r14,rdi + adcx rbp,r15 + adox r15,rbp + + + xor r8,r8 + mulx rbp,rdi,QWORD PTR[((0+128))+rcx] + adcx rax,rdi + adox r9,rbp + + mulx rbp,rdi,QWORD PTR[((8+128))+rcx] + adcx r9,rdi + adox r10,rbp + + mulx rbp,rdi,QWORD PTR[((16+128))+rcx] + adcx r10,rdi + adox r11,rbp + + mulx rbp,rdi,QWORD PTR[((24+128))+rcx] + adcx r11,rdi + adox r12,rbp + + mulx rbp,rdi,QWORD PTR[((32+128))+rcx] + adcx r12,rdi + adox r13,rbp + + mulx rbp,rdi,QWORD PTR[((40+128))+rcx] + mov rdx,QWORD PTR[16+rbx] + adcx r13,rdi + adox r14,rbp + adcx r14,rax + adox r15,rax + adcx r15,rax + mov r8,r9 + imul r9,QWORD PTR[8+rsp] + + + xor rax,rax + mulx rbp,rdi,QWORD PTR[((0+128))+rsi] + adox r10,rdi + adcx r11,rbp + + mulx rbp,rdi,QWORD PTR[((8+128))+rsi] + adox r11,rdi + adcx r12,rbp + + mulx rbp,rdi,QWORD PTR[((16+128))+rsi] + adox r12,rdi + adcx r13,rbp + + mulx rbp,rdi,QWORD PTR[((24+128))+rsi] + adox r13,rdi + adcx r14,rbp + + mulx rbp,rdi,QWORD PTR[((32+128))+rsi] + adox r14,rdi + adcx r15,rbp + + mulx rbp,rdi,QWORD PTR[((40+128))+rsi] + mov rdx,r9 + adox r15,rdi + adcx rbp,rax + adox rax,rbp + + + xor r9,r9 + mulx rbp,rdi,QWORD PTR[((0+128))+rcx] + adcx r8,rdi + adox r10,rbp + + mulx rbp,rdi,QWORD PTR[((8+128))+rcx] + adcx r10,rdi + adox r11,rbp + + mulx rbp,rdi,QWORD PTR[((16+128))+rcx] + adcx r11,rdi + adox r12,rbp + + mulx rbp,rdi,QWORD PTR[((24+128))+rcx] + adcx r12,rdi + adox r13,rbp + + mulx rbp,rdi,QWORD PTR[((32+128))+rcx] + adcx r13,rdi + adox r14,rbp + + mulx rbp,rdi,QWORD PTR[((40+128))+rcx] + mov rdx,QWORD PTR[24+rbx] + adcx r14,rdi + adox r15,rbp + adcx r15,r8 + adox rax,r8 + adcx rax,r8 + mov r9,r10 + imul r10,QWORD PTR[8+rsp] + + + xor r8,r8 + mulx rbp,rdi,QWORD PTR[((0+128))+rsi] + adox r11,rdi + adcx r12,rbp + + mulx rbp,rdi,QWORD PTR[((8+128))+rsi] + adox r12,rdi + adcx r13,rbp + + mulx rbp,rdi,QWORD PTR[((16+128))+rsi] + adox r13,rdi + adcx r14,rbp + + mulx rbp,rdi,QWORD PTR[((24+128))+rsi] + adox r14,rdi + adcx r15,rbp + + mulx rbp,rdi,QWORD PTR[((32+128))+rsi] + adox r15,rdi + adcx rax,rbp + + mulx rbp,rdi,QWORD PTR[((40+128))+rsi] + mov rdx,r10 + adox rax,rdi + adcx rbp,r8 + adox r8,rbp + + + xor r10,r10 + mulx rbp,rdi,QWORD PTR[((0+128))+rcx] + adcx r9,rdi + adox r11,rbp + + mulx rbp,rdi,QWORD PTR[((8+128))+rcx] + adcx r11,rdi + adox r12,rbp + + mulx rbp,rdi,QWORD PTR[((16+128))+rcx] + adcx r12,rdi + adox r13,rbp + + mulx rbp,rdi,QWORD PTR[((24+128))+rcx] + adcx r13,rdi + adox r14,rbp + + mulx rbp,rdi,QWORD PTR[((32+128))+rcx] + adcx r14,rdi + adox r15,rbp + + mulx rbp,rdi,QWORD PTR[((40+128))+rcx] + mov rdx,QWORD PTR[32+rbx] + adcx r15,rdi + adox rax,rbp + adcx rax,r9 + adox r8,r9 + adcx r8,r9 + mov r10,r11 + imul r11,QWORD PTR[8+rsp] + + + xor r9,r9 + mulx rbp,rdi,QWORD PTR[((0+128))+rsi] + adox r12,rdi + adcx r13,rbp + + mulx rbp,rdi,QWORD PTR[((8+128))+rsi] + adox r13,rdi + adcx r14,rbp + + mulx rbp,rdi,QWORD PTR[((16+128))+rsi] + adox r14,rdi + adcx r15,rbp + + mulx rbp,rdi,QWORD PTR[((24+128))+rsi] + adox r15,rdi + adcx rax,rbp + + mulx rbp,rdi,QWORD PTR[((32+128))+rsi] + adox rax,rdi + adcx r8,rbp + + mulx rbp,rdi,QWORD PTR[((40+128))+rsi] + mov rdx,r11 + adox r8,rdi + adcx rbp,r9 + adox r9,rbp + + + xor r11,r11 + mulx rbp,rdi,QWORD PTR[((0+128))+rcx] + adcx r10,rdi + adox r12,rbp + + mulx rbp,rdi,QWORD PTR[((8+128))+rcx] + adcx r12,rdi + adox r13,rbp + + mulx rbp,rdi,QWORD PTR[((16+128))+rcx] + adcx r13,rdi + adox r14,rbp + + mulx rbp,rdi,QWORD PTR[((24+128))+rcx] + adcx r14,rdi + adox r15,rbp + + mulx rbp,rdi,QWORD PTR[((32+128))+rcx] + adcx r15,rdi + adox rax,rbp + + mulx rbp,rdi,QWORD PTR[((40+128))+rcx] + mov rdx,QWORD PTR[40+rbx] + adcx rax,rdi + adox r8,rbp + adcx r8,r10 + adox r9,r10 + adcx r9,r10 + mov r11,r12 + imul r12,QWORD PTR[8+rsp] + + + xor r10,r10 + mulx rbp,rdi,QWORD PTR[((0+128))+rsi] + adox r13,rdi + adcx r14,rbp + + mulx rbp,rdi,QWORD PTR[((8+128))+rsi] + adox r14,rdi + adcx r15,rbp + + mulx rbp,rdi,QWORD PTR[((16+128))+rsi] + adox r15,rdi + adcx rax,rbp + + mulx rbp,rdi,QWORD PTR[((24+128))+rsi] + adox rax,rdi + adcx r8,rbp + + mulx rbp,rdi,QWORD PTR[((32+128))+rsi] + adox r8,rdi + adcx r9,rbp + + mulx rbp,rdi,QWORD PTR[((40+128))+rsi] + mov rdx,r12 + adox r9,rdi + adcx rbp,r10 + adox r10,rbp + + + xor r12,r12 + mulx rbp,rdi,QWORD PTR[((0+128))+rcx] + adcx r11,rdi + adox r13,rbp + + mulx rbp,rdi,QWORD PTR[((8+128))+rcx] + adcx r13,rdi + adox r14,rbp + + mulx rbp,rdi,QWORD PTR[((16+128))+rcx] + adcx r14,rdi + adox r15,rbp + + mulx rbp,rdi,QWORD PTR[((24+128))+rcx] + adcx r15,rdi + adox rax,rbp + + mulx rbp,rdi,QWORD PTR[((32+128))+rcx] + adcx rax,rdi + adox r8,rbp + + mulx rbp,rdi,QWORD PTR[((40+128))+rcx] + mov rdx,r13 + adcx r8,rdi + adox r9,rbp + adcx r9,r11 + adox r10,r11 + adcx r10,r11 + imul rdx,QWORD PTR[8+rsp] + mov rbx,QWORD PTR[24+rsp] + + + xor r12,r12 + mulx rbp,rdi,QWORD PTR[((0+128))+rcx] + adcx r13,rdi + adox r14,rbp + + mulx rbp,rdi,QWORD PTR[((8+128))+rcx] + adcx r14,rdi + adox r15,rbp + + mulx rbp,rdi,QWORD PTR[((16+128))+rcx] + adcx r15,rdi + adox rax,rbp + + mulx rbp,rdi,QWORD PTR[((24+128))+rcx] + adcx rax,rdi + adox r8,rbp + + mulx rbp,rdi,QWORD PTR[((32+128))+rcx] + adcx r8,rdi + adox r9,rbp + + mulx rbp,rdi,QWORD PTR[((40+128))+rcx] + mov rdx,r14 + adcx r9,rdi + adox r10,rbp + adc r10,0 + mov r12,r8 + + mov QWORD PTR[rbx],r14 + mov QWORD PTR[8+rbx],r15 + mov QWORD PTR[16+rbx],rax + mov rdi,r9 + mov QWORD PTR[24+rbx],r8 + mov QWORD PTR[32+rbx],r9 + mov QWORD PTR[40+rbx],r10 + mov rbp,r10 + + DB 0F3h,0C3h ;repret + +__mulx_mont_383_nonred ENDP +PUBLIC sqrx_mont_382x + + +ALIGN 32 +sqrx_mont_382x PROC PUBLIC + DB 243,15,30,250 + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov r11,rsp +$L$SEH_begin_sqrx_mont_382x:: + + + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 +sqr_mont_382x$1:: + push rbp + + push rbx + + push r12 + + push r13 + + push r14 + + push r15 + + sub rsp,136 + +$L$SEH_body_sqrx_mont_382x:: + + + mov QWORD PTR[rsp],rcx + mov rcx,rdx + mov QWORD PTR[16+rsp],rdi + mov QWORD PTR[24+rsp],rsi + + + mov r8,QWORD PTR[rsi] + mov r9,QWORD PTR[8+rsi] + mov r10,QWORD PTR[16+rsi] + mov r11,QWORD PTR[24+rsi] + mov r12,QWORD PTR[32+rsi] + mov r13,QWORD PTR[40+rsi] + + mov r14,r8 + add r8,QWORD PTR[48+rsi] + mov r15,r9 + adc r9,QWORD PTR[56+rsi] + mov rax,r10 + adc r10,QWORD PTR[64+rsi] + mov rdx,r11 + adc r11,QWORD PTR[72+rsi] + mov rbx,r12 + adc r12,QWORD PTR[80+rsi] + mov rbp,r13 + adc r13,QWORD PTR[88+rsi] + + sub r14,QWORD PTR[48+rsi] + sbb r15,QWORD PTR[56+rsi] + sbb rax,QWORD PTR[64+rsi] + sbb rdx,QWORD PTR[72+rsi] + sbb rbx,QWORD PTR[80+rsi] + sbb rbp,QWORD PTR[88+rsi] + sbb rdi,rdi + + mov QWORD PTR[((32+0))+rsp],r8 + mov QWORD PTR[((32+8))+rsp],r9 + mov QWORD PTR[((32+16))+rsp],r10 + mov QWORD PTR[((32+24))+rsp],r11 + mov QWORD PTR[((32+32))+rsp],r12 + mov QWORD PTR[((32+40))+rsp],r13 + + mov QWORD PTR[((32+48))+rsp],r14 + mov QWORD PTR[((32+56))+rsp],r15 + mov QWORD PTR[((32+64))+rsp],rax + mov QWORD PTR[((32+72))+rsp],rdx + mov QWORD PTR[((32+80))+rsp],rbx + mov QWORD PTR[((32+88))+rsp],rbp + mov QWORD PTR[((32+96))+rsp],rdi + + + + lea rbx,QWORD PTR[48+rsi] + + mov rdx,QWORD PTR[48+rsi] + mov r14,QWORD PTR[rsi] + mov r15,QWORD PTR[8+rsi] + mov rax,QWORD PTR[16+rsi] + mov r12,QWORD PTR[24+rsi] + mov rdi,QWORD PTR[32+rsi] + mov rbp,QWORD PTR[40+rsi] + lea rsi,QWORD PTR[((-128))+rsi] + lea rcx,QWORD PTR[((-128))+rcx] + + mulx r9,r8,r14 + call __mulx_mont_383_nonred + add rdx,rdx + adc r15,r15 + adc rax,rax + adc r12,r12 + adc rdi,rdi + adc rbp,rbp + + mov QWORD PTR[48+rbx],rdx + mov QWORD PTR[56+rbx],r15 + mov QWORD PTR[64+rbx],rax + mov QWORD PTR[72+rbx],r12 + mov QWORD PTR[80+rbx],rdi + mov QWORD PTR[88+rbx],rbp + + lea rsi,QWORD PTR[((32-128))+rsp] + lea rbx,QWORD PTR[((32+48))+rsp] + + mov rdx,QWORD PTR[((32+48))+rsp] + mov r14,QWORD PTR[((32+0))+rsp] + mov r15,QWORD PTR[((32+8))+rsp] + mov rax,QWORD PTR[((32+16))+rsp] + mov r12,QWORD PTR[((32+24))+rsp] + mov rdi,QWORD PTR[((32+32))+rsp] + mov rbp,QWORD PTR[((32+40))+rsp] + + + + mulx r9,r8,r14 + call __mulx_mont_383_nonred + mov r14,QWORD PTR[((32+96))+rsp] + lea rcx,QWORD PTR[128+rcx] + mov r8,QWORD PTR[((32+0))+rsp] + and r8,r14 + mov r9,QWORD PTR[((32+8))+rsp] + and r9,r14 + mov r10,QWORD PTR[((32+16))+rsp] + and r10,r14 + mov r11,QWORD PTR[((32+24))+rsp] + and r11,r14 + mov r13,QWORD PTR[((32+32))+rsp] + and r13,r14 + and r14,QWORD PTR[((32+40))+rsp] + + sub rdx,r8 + mov r8,QWORD PTR[rcx] + sbb r15,r9 + mov r9,QWORD PTR[8+rcx] + sbb rax,r10 + mov r10,QWORD PTR[16+rcx] + sbb r12,r11 + mov r11,QWORD PTR[24+rcx] + sbb rdi,r13 + mov r13,QWORD PTR[32+rcx] + sbb rbp,r14 + sbb r14,r14 + + and r8,r14 + and r9,r14 + and r10,r14 + and r11,r14 + and r13,r14 + and r14,QWORD PTR[40+rcx] + + add rdx,r8 + adc r15,r9 + adc rax,r10 + adc r12,r11 + adc rdi,r13 + adc rbp,r14 + + mov QWORD PTR[rbx],rdx + mov QWORD PTR[8+rbx],r15 + mov QWORD PTR[16+rbx],rax + mov QWORD PTR[24+rbx],r12 + mov QWORD PTR[32+rbx],rdi + mov QWORD PTR[40+rbx],rbp + lea r8,QWORD PTR[136+rsp] + mov r15,QWORD PTR[r8] + + mov r14,QWORD PTR[8+r8] + + mov r13,QWORD PTR[16+r8] + + mov r12,QWORD PTR[24+r8] + + mov rbx,QWORD PTR[32+r8] + + mov rbp,QWORD PTR[40+r8] + + lea rsp,QWORD PTR[48+r8] + +$L$SEH_epilogue_sqrx_mont_382x:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + + DB 0F3h,0C3h ;repret + +$L$SEH_end_sqrx_mont_382x:: +sqrx_mont_382x ENDP +.text$ ENDS +.pdata SEGMENT READONLY ALIGN(4) +ALIGN 4 + DD imagerel $L$SEH_begin_mulx_mont_384x + DD imagerel $L$SEH_body_mulx_mont_384x + DD imagerel $L$SEH_info_mulx_mont_384x_prologue + + DD imagerel $L$SEH_body_mulx_mont_384x + DD imagerel $L$SEH_epilogue_mulx_mont_384x + DD imagerel $L$SEH_info_mulx_mont_384x_body + + DD imagerel $L$SEH_epilogue_mulx_mont_384x + DD imagerel $L$SEH_end_mulx_mont_384x + DD imagerel $L$SEH_info_mulx_mont_384x_epilogue + + DD imagerel $L$SEH_begin_sqrx_mont_384x + DD imagerel $L$SEH_body_sqrx_mont_384x + DD imagerel $L$SEH_info_sqrx_mont_384x_prologue + + DD imagerel $L$SEH_body_sqrx_mont_384x + DD imagerel $L$SEH_epilogue_sqrx_mont_384x + DD imagerel $L$SEH_info_sqrx_mont_384x_body + + DD imagerel $L$SEH_epilogue_sqrx_mont_384x + DD imagerel $L$SEH_end_sqrx_mont_384x + DD imagerel $L$SEH_info_sqrx_mont_384x_epilogue + + DD imagerel $L$SEH_begin_mulx_382x + DD imagerel $L$SEH_body_mulx_382x + DD imagerel $L$SEH_info_mulx_382x_prologue + + DD imagerel $L$SEH_body_mulx_382x + DD imagerel $L$SEH_epilogue_mulx_382x + DD imagerel $L$SEH_info_mulx_382x_body + + DD imagerel $L$SEH_epilogue_mulx_382x + DD imagerel $L$SEH_end_mulx_382x + DD imagerel $L$SEH_info_mulx_382x_epilogue + + DD imagerel $L$SEH_begin_sqrx_382x + DD imagerel $L$SEH_body_sqrx_382x + DD imagerel $L$SEH_info_sqrx_382x_prologue + + DD imagerel $L$SEH_body_sqrx_382x + DD imagerel $L$SEH_epilogue_sqrx_382x + DD imagerel $L$SEH_info_sqrx_382x_body + + DD imagerel $L$SEH_epilogue_sqrx_382x + DD imagerel $L$SEH_end_sqrx_382x + DD imagerel $L$SEH_info_sqrx_382x_epilogue + + DD imagerel $L$SEH_begin_mulx_384 + DD imagerel $L$SEH_body_mulx_384 + DD imagerel $L$SEH_info_mulx_384_prologue + + DD imagerel $L$SEH_body_mulx_384 + DD imagerel $L$SEH_epilogue_mulx_384 + DD imagerel $L$SEH_info_mulx_384_body + + DD imagerel $L$SEH_epilogue_mulx_384 + DD imagerel $L$SEH_end_mulx_384 + DD imagerel $L$SEH_info_mulx_384_epilogue + + DD imagerel $L$SEH_begin_sqrx_384 + DD imagerel $L$SEH_body_sqrx_384 + DD imagerel $L$SEH_info_sqrx_384_prologue + + DD imagerel $L$SEH_body_sqrx_384 + DD imagerel $L$SEH_epilogue_sqrx_384 + DD imagerel $L$SEH_info_sqrx_384_body + + DD imagerel $L$SEH_epilogue_sqrx_384 + DD imagerel $L$SEH_end_sqrx_384 + DD imagerel $L$SEH_info_sqrx_384_epilogue + + DD imagerel $L$SEH_begin_redcx_mont_384 + DD imagerel $L$SEH_body_redcx_mont_384 + DD imagerel $L$SEH_info_redcx_mont_384_prologue + + DD imagerel $L$SEH_body_redcx_mont_384 + DD imagerel $L$SEH_epilogue_redcx_mont_384 + DD imagerel $L$SEH_info_redcx_mont_384_body + + DD imagerel $L$SEH_epilogue_redcx_mont_384 + DD imagerel $L$SEH_end_redcx_mont_384 + DD imagerel $L$SEH_info_redcx_mont_384_epilogue + + DD imagerel $L$SEH_begin_fromx_mont_384 + DD imagerel $L$SEH_body_fromx_mont_384 + DD imagerel $L$SEH_info_fromx_mont_384_prologue + + DD imagerel $L$SEH_body_fromx_mont_384 + DD imagerel $L$SEH_epilogue_fromx_mont_384 + DD imagerel $L$SEH_info_fromx_mont_384_body + + DD imagerel $L$SEH_epilogue_fromx_mont_384 + DD imagerel $L$SEH_end_fromx_mont_384 + DD imagerel $L$SEH_info_fromx_mont_384_epilogue + + DD imagerel $L$SEH_begin_sgn0x_pty_mont_384 + DD imagerel $L$SEH_body_sgn0x_pty_mont_384 + DD imagerel $L$SEH_info_sgn0x_pty_mont_384_prologue + + DD imagerel $L$SEH_body_sgn0x_pty_mont_384 + DD imagerel $L$SEH_epilogue_sgn0x_pty_mont_384 + DD imagerel $L$SEH_info_sgn0x_pty_mont_384_body + + DD imagerel $L$SEH_epilogue_sgn0x_pty_mont_384 + DD imagerel $L$SEH_end_sgn0x_pty_mont_384 + DD imagerel $L$SEH_info_sgn0x_pty_mont_384_epilogue + + DD imagerel $L$SEH_begin_sgn0x_pty_mont_384x + DD imagerel $L$SEH_body_sgn0x_pty_mont_384x + DD imagerel $L$SEH_info_sgn0x_pty_mont_384x_prologue + + DD imagerel $L$SEH_body_sgn0x_pty_mont_384x + DD imagerel $L$SEH_epilogue_sgn0x_pty_mont_384x + DD imagerel $L$SEH_info_sgn0x_pty_mont_384x_body + + DD imagerel $L$SEH_epilogue_sgn0x_pty_mont_384x + DD imagerel $L$SEH_end_sgn0x_pty_mont_384x + DD imagerel $L$SEH_info_sgn0x_pty_mont_384x_epilogue + + DD imagerel $L$SEH_begin_mulx_mont_384 + DD imagerel $L$SEH_body_mulx_mont_384 + DD imagerel $L$SEH_info_mulx_mont_384_prologue + + DD imagerel $L$SEH_body_mulx_mont_384 + DD imagerel $L$SEH_epilogue_mulx_mont_384 + DD imagerel $L$SEH_info_mulx_mont_384_body + + DD imagerel $L$SEH_epilogue_mulx_mont_384 + DD imagerel $L$SEH_end_mulx_mont_384 + DD imagerel $L$SEH_info_mulx_mont_384_epilogue + + DD imagerel $L$SEH_begin_sqrx_mont_384 + DD imagerel $L$SEH_body_sqrx_mont_384 + DD imagerel $L$SEH_info_sqrx_mont_384_prologue + + DD imagerel $L$SEH_body_sqrx_mont_384 + DD imagerel $L$SEH_epilogue_sqrx_mont_384 + DD imagerel $L$SEH_info_sqrx_mont_384_body + + DD imagerel $L$SEH_epilogue_sqrx_mont_384 + DD imagerel $L$SEH_end_sqrx_mont_384 + DD imagerel $L$SEH_info_sqrx_mont_384_epilogue + + DD imagerel $L$SEH_begin_sqrx_n_mul_mont_384 + DD imagerel $L$SEH_body_sqrx_n_mul_mont_384 + DD imagerel $L$SEH_info_sqrx_n_mul_mont_384_prologue + + DD imagerel $L$SEH_body_sqrx_n_mul_mont_384 + DD imagerel $L$SEH_epilogue_sqrx_n_mul_mont_384 + DD imagerel $L$SEH_info_sqrx_n_mul_mont_384_body + + DD imagerel $L$SEH_epilogue_sqrx_n_mul_mont_384 + DD imagerel $L$SEH_end_sqrx_n_mul_mont_384 + DD imagerel $L$SEH_info_sqrx_n_mul_mont_384_epilogue + + DD imagerel $L$SEH_begin_sqrx_n_mul_mont_383 + DD imagerel $L$SEH_body_sqrx_n_mul_mont_383 + DD imagerel $L$SEH_info_sqrx_n_mul_mont_383_prologue + + DD imagerel $L$SEH_body_sqrx_n_mul_mont_383 + DD imagerel $L$SEH_epilogue_sqrx_n_mul_mont_383 + DD imagerel $L$SEH_info_sqrx_n_mul_mont_383_body + + DD imagerel $L$SEH_epilogue_sqrx_n_mul_mont_383 + DD imagerel $L$SEH_end_sqrx_n_mul_mont_383 + DD imagerel $L$SEH_info_sqrx_n_mul_mont_383_epilogue + + DD imagerel $L$SEH_begin_sqrx_mont_382x + DD imagerel $L$SEH_body_sqrx_mont_382x + DD imagerel $L$SEH_info_sqrx_mont_382x_prologue + + DD imagerel $L$SEH_body_sqrx_mont_382x + DD imagerel $L$SEH_epilogue_sqrx_mont_382x + DD imagerel $L$SEH_info_sqrx_mont_382x_body + + DD imagerel $L$SEH_epilogue_sqrx_mont_382x + DD imagerel $L$SEH_end_sqrx_mont_382x + DD imagerel $L$SEH_info_sqrx_mont_382x_epilogue + +.pdata ENDS +.xdata SEGMENT READONLY ALIGN(8) +ALIGN 8 +$L$SEH_info_mulx_mont_384x_prologue:: +DB 1,0,5,00bh +DB 0,074h,1,0 +DB 0,064h,2,0 +DB 0,0b3h +DB 0,0 + DD 0,0 +$L$SEH_info_mulx_mont_384x_body:: +DB 1,0,18,0 +DB 000h,0f4h,029h,000h +DB 000h,0e4h,02ah,000h +DB 000h,0d4h,02bh,000h +DB 000h,0c4h,02ch,000h +DB 000h,034h,02dh,000h +DB 000h,054h,02eh,000h +DB 000h,074h,030h,000h +DB 000h,064h,031h,000h +DB 000h,001h,02fh,000h +DB 000h,000h,000h,000h +DB 000h,000h,000h,000h +$L$SEH_info_mulx_mont_384x_epilogue:: +DB 1,0,4,0 +DB 000h,074h,001h,000h +DB 000h,064h,002h,000h +DB 000h,000h,000h,000h + +$L$SEH_info_sqrx_mont_384x_prologue:: +DB 1,0,5,00bh +DB 0,074h,1,0 +DB 0,064h,2,0 +DB 0,0b3h +DB 0,0 + DD 0,0 +$L$SEH_info_sqrx_mont_384x_body:: +DB 1,0,18,0 +DB 000h,0f4h,011h,000h +DB 000h,0e4h,012h,000h +DB 000h,0d4h,013h,000h +DB 000h,0c4h,014h,000h +DB 000h,034h,015h,000h +DB 000h,054h,016h,000h +DB 000h,074h,018h,000h +DB 000h,064h,019h,000h +DB 000h,001h,017h,000h +DB 000h,000h,000h,000h +DB 000h,000h,000h,000h +$L$SEH_info_sqrx_mont_384x_epilogue:: +DB 1,0,4,0 +DB 000h,074h,001h,000h +DB 000h,064h,002h,000h +DB 000h,000h,000h,000h + +$L$SEH_info_mulx_382x_prologue:: +DB 1,0,5,00bh +DB 0,074h,1,0 +DB 0,064h,2,0 +DB 0,0b3h +DB 0,0 + DD 0,0 +$L$SEH_info_mulx_382x_body:: +DB 1,0,18,0 +DB 000h,0f4h,011h,000h +DB 000h,0e4h,012h,000h +DB 000h,0d4h,013h,000h +DB 000h,0c4h,014h,000h +DB 000h,034h,015h,000h +DB 000h,054h,016h,000h +DB 000h,074h,018h,000h +DB 000h,064h,019h,000h +DB 000h,001h,017h,000h +DB 000h,000h,000h,000h +DB 000h,000h,000h,000h +$L$SEH_info_mulx_382x_epilogue:: +DB 1,0,4,0 +DB 000h,074h,001h,000h +DB 000h,064h,002h,000h +DB 000h,000h,000h,000h + +$L$SEH_info_sqrx_382x_prologue:: +DB 1,0,5,00bh +DB 0,074h,1,0 +DB 0,064h,2,0 +DB 0,0b3h +DB 0,0 + DD 0,0 +$L$SEH_info_sqrx_382x_body:: +DB 1,0,17,0 +DB 000h,0f4h,001h,000h +DB 000h,0e4h,002h,000h +DB 000h,0d4h,003h,000h +DB 000h,0c4h,004h,000h +DB 000h,034h,005h,000h +DB 000h,054h,006h,000h +DB 000h,074h,008h,000h +DB 000h,064h,009h,000h +DB 000h,062h +DB 000h,000h,000h,000h,000h,000h +DB 000h,000h,000h,000h +$L$SEH_info_sqrx_382x_epilogue:: +DB 1,0,4,0 +DB 000h,074h,001h,000h +DB 000h,064h,002h,000h +DB 000h,000h,000h,000h + +$L$SEH_info_mulx_384_prologue:: +DB 1,0,5,00bh +DB 0,074h,1,0 +DB 0,064h,2,0 +DB 0,0b3h +DB 0,0 + DD 0,0 +$L$SEH_info_mulx_384_body:: +DB 1,0,17,0 +DB 000h,0f4h,000h,000h +DB 000h,0e4h,001h,000h +DB 000h,0d4h,002h,000h +DB 000h,0c4h,003h,000h +DB 000h,034h,004h,000h +DB 000h,054h,005h,000h +DB 000h,074h,007h,000h +DB 000h,064h,008h,000h +DB 000h,052h +DB 000h,000h,000h,000h,000h,000h +DB 000h,000h,000h,000h +$L$SEH_info_mulx_384_epilogue:: +DB 1,0,4,0 +DB 000h,074h,001h,000h +DB 000h,064h,002h,000h +DB 000h,000h,000h,000h + +$L$SEH_info_sqrx_384_prologue:: +DB 1,0,5,00bh +DB 0,074h,1,0 +DB 0,064h,2,0 +DB 0,0b3h +DB 0,0 + DD 0,0 +$L$SEH_info_sqrx_384_body:: +DB 1,0,17,0 +DB 000h,0f4h,001h,000h +DB 000h,0e4h,002h,000h +DB 000h,0d4h,003h,000h +DB 000h,0c4h,004h,000h +DB 000h,034h,005h,000h +DB 000h,054h,006h,000h +DB 000h,074h,008h,000h +DB 000h,064h,009h,000h +DB 000h,062h +DB 000h,000h,000h,000h,000h,000h +DB 000h,000h,000h,000h +$L$SEH_info_sqrx_384_epilogue:: +DB 1,0,4,0 +DB 000h,074h,001h,000h +DB 000h,064h,002h,000h +DB 000h,000h,000h,000h + +$L$SEH_info_redcx_mont_384_prologue:: +DB 1,0,5,00bh +DB 0,074h,1,0 +DB 0,064h,2,0 +DB 0,0b3h +DB 0,0 + DD 0,0 +$L$SEH_info_redcx_mont_384_body:: +DB 1,0,17,0 +DB 000h,0f4h,001h,000h +DB 000h,0e4h,002h,000h +DB 000h,0d4h,003h,000h +DB 000h,0c4h,004h,000h +DB 000h,034h,005h,000h +DB 000h,054h,006h,000h +DB 000h,074h,008h,000h +DB 000h,064h,009h,000h +DB 000h,062h +DB 000h,000h,000h,000h,000h,000h +DB 000h,000h,000h,000h +$L$SEH_info_redcx_mont_384_epilogue:: +DB 1,0,4,0 +DB 000h,074h,001h,000h +DB 000h,064h,002h,000h +DB 000h,000h,000h,000h + +$L$SEH_info_fromx_mont_384_prologue:: +DB 1,0,5,00bh +DB 0,074h,1,0 +DB 0,064h,2,0 +DB 0,0b3h +DB 0,0 + DD 0,0 +$L$SEH_info_fromx_mont_384_body:: +DB 1,0,17,0 +DB 000h,0f4h,001h,000h +DB 000h,0e4h,002h,000h +DB 000h,0d4h,003h,000h +DB 000h,0c4h,004h,000h +DB 000h,034h,005h,000h +DB 000h,054h,006h,000h +DB 000h,074h,008h,000h +DB 000h,064h,009h,000h +DB 000h,062h +DB 000h,000h,000h,000h,000h,000h +DB 000h,000h,000h,000h +$L$SEH_info_fromx_mont_384_epilogue:: +DB 1,0,4,0 +DB 000h,074h,001h,000h +DB 000h,064h,002h,000h +DB 000h,000h,000h,000h + +$L$SEH_info_sgn0x_pty_mont_384_prologue:: +DB 1,0,5,00bh +DB 0,074h,1,0 +DB 0,064h,2,0 +DB 0,0b3h +DB 0,0 + DD 0,0 +$L$SEH_info_sgn0x_pty_mont_384_body:: +DB 1,0,17,0 +DB 000h,0f4h,001h,000h +DB 000h,0e4h,002h,000h +DB 000h,0d4h,003h,000h +DB 000h,0c4h,004h,000h +DB 000h,034h,005h,000h +DB 000h,054h,006h,000h +DB 000h,074h,008h,000h +DB 000h,064h,009h,000h +DB 000h,062h +DB 000h,000h,000h,000h,000h,000h +DB 000h,000h,000h,000h +$L$SEH_info_sgn0x_pty_mont_384_epilogue:: +DB 1,0,4,0 +DB 000h,074h,001h,000h +DB 000h,064h,002h,000h +DB 000h,000h,000h,000h + +$L$SEH_info_sgn0x_pty_mont_384x_prologue:: +DB 1,0,5,00bh +DB 0,074h,1,0 +DB 0,064h,2,0 +DB 0,0b3h +DB 0,0 + DD 0,0 +$L$SEH_info_sgn0x_pty_mont_384x_body:: +DB 1,0,17,0 +DB 000h,0f4h,001h,000h +DB 000h,0e4h,002h,000h +DB 000h,0d4h,003h,000h +DB 000h,0c4h,004h,000h +DB 000h,034h,005h,000h +DB 000h,054h,006h,000h +DB 000h,074h,008h,000h +DB 000h,064h,009h,000h +DB 000h,062h +DB 000h,000h,000h,000h,000h,000h +DB 000h,000h,000h,000h +$L$SEH_info_sgn0x_pty_mont_384x_epilogue:: +DB 1,0,4,0 +DB 000h,074h,001h,000h +DB 000h,064h,002h,000h +DB 000h,000h,000h,000h + +$L$SEH_info_mulx_mont_384_prologue:: +DB 1,0,5,00bh +DB 0,074h,1,0 +DB 0,064h,2,0 +DB 0,0b3h +DB 0,0 + DD 0,0 +$L$SEH_info_mulx_mont_384_body:: +DB 1,0,17,0 +DB 000h,0f4h,003h,000h +DB 000h,0e4h,004h,000h +DB 000h,0d4h,005h,000h +DB 000h,0c4h,006h,000h +DB 000h,034h,007h,000h +DB 000h,054h,008h,000h +DB 000h,074h,00ah,000h +DB 000h,064h,00bh,000h +DB 000h,082h +DB 000h,000h,000h,000h,000h,000h +DB 000h,000h,000h,000h +$L$SEH_info_mulx_mont_384_epilogue:: +DB 1,0,4,0 +DB 000h,074h,001h,000h +DB 000h,064h,002h,000h +DB 000h,000h,000h,000h + +$L$SEH_info_sqrx_mont_384_prologue:: +DB 1,0,5,00bh +DB 0,074h,1,0 +DB 0,064h,2,0 +DB 0,0b3h +DB 0,0 + DD 0,0 +$L$SEH_info_sqrx_mont_384_body:: +DB 1,0,17,0 +DB 000h,0f4h,003h,000h +DB 000h,0e4h,004h,000h +DB 000h,0d4h,005h,000h +DB 000h,0c4h,006h,000h +DB 000h,034h,007h,000h +DB 000h,054h,008h,000h +DB 000h,074h,00ah,000h +DB 000h,064h,00bh,000h +DB 000h,082h +DB 000h,000h,000h,000h,000h,000h +DB 000h,000h,000h,000h +$L$SEH_info_sqrx_mont_384_epilogue:: +DB 1,0,4,0 +DB 000h,074h,001h,000h +DB 000h,064h,002h,000h +DB 000h,000h,000h,000h + +$L$SEH_info_sqrx_n_mul_mont_384_prologue:: +DB 1,0,5,00bh +DB 0,074h,1,0 +DB 0,064h,2,0 +DB 0,0b3h +DB 0,0 + DD 0,0 +$L$SEH_info_sqrx_n_mul_mont_384_body:: +DB 1,0,17,0 +DB 000h,0f4h,005h,000h +DB 000h,0e4h,006h,000h +DB 000h,0d4h,007h,000h +DB 000h,0c4h,008h,000h +DB 000h,034h,009h,000h +DB 000h,054h,00ah,000h +DB 000h,074h,00ch,000h +DB 000h,064h,00dh,000h +DB 000h,0a2h +DB 000h,000h,000h,000h,000h,000h +DB 000h,000h,000h,000h +$L$SEH_info_sqrx_n_mul_mont_384_epilogue:: +DB 1,0,4,0 +DB 000h,074h,001h,000h +DB 000h,064h,002h,000h +DB 000h,000h,000h,000h + +$L$SEH_info_sqrx_n_mul_mont_383_prologue:: +DB 1,0,5,00bh +DB 0,074h,1,0 +DB 0,064h,2,0 +DB 0,0b3h +DB 0,0 + DD 0,0 +$L$SEH_info_sqrx_n_mul_mont_383_body:: +DB 1,0,17,0 +DB 000h,0f4h,005h,000h +DB 000h,0e4h,006h,000h +DB 000h,0d4h,007h,000h +DB 000h,0c4h,008h,000h +DB 000h,034h,009h,000h +DB 000h,054h,00ah,000h +DB 000h,074h,00ch,000h +DB 000h,064h,00dh,000h +DB 000h,0a2h +DB 000h,000h,000h,000h,000h,000h +DB 000h,000h,000h,000h +$L$SEH_info_sqrx_n_mul_mont_383_epilogue:: +DB 1,0,4,0 +DB 000h,074h,001h,000h +DB 000h,064h,002h,000h +DB 000h,000h,000h,000h + +$L$SEH_info_sqrx_mont_382x_prologue:: +DB 1,0,5,00bh +DB 0,074h,1,0 +DB 0,064h,2,0 +DB 0,0b3h +DB 0,0 + DD 0,0 +$L$SEH_info_sqrx_mont_382x_body:: +DB 1,0,18,0 +DB 000h,0f4h,011h,000h +DB 000h,0e4h,012h,000h +DB 000h,0d4h,013h,000h +DB 000h,0c4h,014h,000h +DB 000h,034h,015h,000h +DB 000h,054h,016h,000h +DB 000h,074h,018h,000h +DB 000h,064h,019h,000h +DB 000h,001h,017h,000h +DB 000h,000h,000h,000h +DB 000h,000h,000h,000h +$L$SEH_info_sqrx_mont_382x_epilogue:: +DB 1,0,4,0 +DB 000h,074h,001h,000h +DB 000h,064h,002h,000h +DB 000h,000h,000h,000h + + +.xdata ENDS +END diff --git a/crypto/blst_src/build/win64/sha256-armv8.asm b/crypto/blst_src/build/win64/sha256-armv8.asm new file mode 100644 index 00000000000..31e74219c19 --- /dev/null +++ b/crypto/blst_src/build/win64/sha256-armv8.asm @@ -0,0 +1,1084 @@ +// +// Copyright Supranational LLC +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 +// +// ==================================================================== +// Written by Andy Polyakov, @dot-asm, initially for the OpenSSL +// project. +// ==================================================================== +// +// sha256_block procedure for ARMv8. +// +// This module is stripped of scalar code paths, with rationale that all +// known processors are NEON-capable. +// +// See original module at CRYPTOGAMS for further details. + + COMMON |__blst_platform_cap|,4 + AREA |.text|,CODE,ALIGN=8,ARM64 + + ALIGN 64 + +|$LK256| + DCDU 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 + DCDU 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 + DCDU 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 + DCDU 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 + DCDU 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc + DCDU 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da + DCDU 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 + DCDU 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 + DCDU 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 + DCDU 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 + DCDU 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 + DCDU 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 + DCDU 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 + DCDU 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 + DCDU 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 + DCDU 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 + DCDU 0 //terminator + + DCB "SHA256 block transform for ARMv8, CRYPTOGAMS by @dot-asm",0 + ALIGN 4 + ALIGN 4 + + EXPORT |blst_sha256_block_armv8|[FUNC] + ALIGN 64 +|blst_sha256_block_armv8| PROC +|$Lv8_entry| + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + + ld1 {v0.4s,v1.4s},[x0] + adr x3,|$LK256| + +|$Loop_hw| + ld1 {v4.16b,v5.16b,v6.16b,v7.16b},[x1],#64 + sub x2,x2,#1 + ld1 {v16.4s},[x3],#16 + rev32 v4.16b,v4.16b + rev32 v5.16b,v5.16b + rev32 v6.16b,v6.16b + rev32 v7.16b,v7.16b + orr v18.16b,v0.16b,v0.16b // offload + orr v19.16b,v1.16b,v1.16b + ld1 {v17.4s},[x3],#16 + add v16.4s,v16.4s,v4.4s + DCDU 0x5e2828a4 //sha256su0 v4.16b,v5.16b + orr v2.16b,v0.16b,v0.16b + DCDU 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s + DCDU 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s + DCDU 0x5e0760c4 //sha256su1 v4.16b,v6.16b,v7.16b + ld1 {v16.4s},[x3],#16 + add v17.4s,v17.4s,v5.4s + DCDU 0x5e2828c5 //sha256su0 v5.16b,v6.16b + orr v2.16b,v0.16b,v0.16b + DCDU 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s + DCDU 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s + DCDU 0x5e0460e5 //sha256su1 v5.16b,v7.16b,v4.16b + ld1 {v17.4s},[x3],#16 + add v16.4s,v16.4s,v6.4s + DCDU 0x5e2828e6 //sha256su0 v6.16b,v7.16b + orr v2.16b,v0.16b,v0.16b + DCDU 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s + DCDU 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s + DCDU 0x5e056086 //sha256su1 v6.16b,v4.16b,v5.16b + ld1 {v16.4s},[x3],#16 + add v17.4s,v17.4s,v7.4s + DCDU 0x5e282887 //sha256su0 v7.16b,v4.16b + orr v2.16b,v0.16b,v0.16b + DCDU 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s + DCDU 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s + DCDU 0x5e0660a7 //sha256su1 v7.16b,v5.16b,v6.16b + ld1 {v17.4s},[x3],#16 + add v16.4s,v16.4s,v4.4s + DCDU 0x5e2828a4 //sha256su0 v4.16b,v5.16b + orr v2.16b,v0.16b,v0.16b + DCDU 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s + DCDU 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s + DCDU 0x5e0760c4 //sha256su1 v4.16b,v6.16b,v7.16b + ld1 {v16.4s},[x3],#16 + add v17.4s,v17.4s,v5.4s + DCDU 0x5e2828c5 //sha256su0 v5.16b,v6.16b + orr v2.16b,v0.16b,v0.16b + DCDU 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s + DCDU 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s + DCDU 0x5e0460e5 //sha256su1 v5.16b,v7.16b,v4.16b + ld1 {v17.4s},[x3],#16 + add v16.4s,v16.4s,v6.4s + DCDU 0x5e2828e6 //sha256su0 v6.16b,v7.16b + orr v2.16b,v0.16b,v0.16b + DCDU 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s + DCDU 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s + DCDU 0x5e056086 //sha256su1 v6.16b,v4.16b,v5.16b + ld1 {v16.4s},[x3],#16 + add v17.4s,v17.4s,v7.4s + DCDU 0x5e282887 //sha256su0 v7.16b,v4.16b + orr v2.16b,v0.16b,v0.16b + DCDU 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s + DCDU 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s + DCDU 0x5e0660a7 //sha256su1 v7.16b,v5.16b,v6.16b + ld1 {v17.4s},[x3],#16 + add v16.4s,v16.4s,v4.4s + DCDU 0x5e2828a4 //sha256su0 v4.16b,v5.16b + orr v2.16b,v0.16b,v0.16b + DCDU 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s + DCDU 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s + DCDU 0x5e0760c4 //sha256su1 v4.16b,v6.16b,v7.16b + ld1 {v16.4s},[x3],#16 + add v17.4s,v17.4s,v5.4s + DCDU 0x5e2828c5 //sha256su0 v5.16b,v6.16b + orr v2.16b,v0.16b,v0.16b + DCDU 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s + DCDU 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s + DCDU 0x5e0460e5 //sha256su1 v5.16b,v7.16b,v4.16b + ld1 {v17.4s},[x3],#16 + add v16.4s,v16.4s,v6.4s + DCDU 0x5e2828e6 //sha256su0 v6.16b,v7.16b + orr v2.16b,v0.16b,v0.16b + DCDU 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s + DCDU 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s + DCDU 0x5e056086 //sha256su1 v6.16b,v4.16b,v5.16b + ld1 {v16.4s},[x3],#16 + add v17.4s,v17.4s,v7.4s + DCDU 0x5e282887 //sha256su0 v7.16b,v4.16b + orr v2.16b,v0.16b,v0.16b + DCDU 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s + DCDU 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s + DCDU 0x5e0660a7 //sha256su1 v7.16b,v5.16b,v6.16b + ld1 {v17.4s},[x3],#16 + add v16.4s,v16.4s,v4.4s + orr v2.16b,v0.16b,v0.16b + DCDU 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s + DCDU 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s + + ld1 {v16.4s},[x3],#16 + add v17.4s,v17.4s,v5.4s + orr v2.16b,v0.16b,v0.16b + DCDU 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s + DCDU 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s + + ld1 {v17.4s},[x3] + add v16.4s,v16.4s,v6.4s + sub x3,x3,#64*4-16 // rewind + orr v2.16b,v0.16b,v0.16b + DCDU 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s + DCDU 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s + + add v17.4s,v17.4s,v7.4s + orr v2.16b,v0.16b,v0.16b + DCDU 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s + DCDU 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s + + add v0.4s,v0.4s,v18.4s + add v1.4s,v1.4s,v19.4s + + cbnz x2,|$Loop_hw| + + st1 {v0.4s,v1.4s},[x0] + + ldr x29,[sp],#16 + ret + ENDP + + EXPORT |blst_sha256_block_data_order|[FUNC] + ALIGN 16 +|blst_sha256_block_data_order| PROC + adrp x16,__blst_platform_cap + ldr w16,[x16,__blst_platform_cap] + tst w16,#1 + bne |$Lv8_entry| + + stp x29, x30, [sp, #-16]! + mov x29, sp + sub sp,sp,#16*4 + + adr x16,|$LK256| + add x2,x1,x2,lsl#6 // len to point at the end of inp + + ld1 {v0.16b},[x1], #16 + ld1 {v1.16b},[x1], #16 + ld1 {v2.16b},[x1], #16 + ld1 {v3.16b},[x1], #16 + ld1 {v4.4s},[x16], #16 + ld1 {v5.4s},[x16], #16 + ld1 {v6.4s},[x16], #16 + ld1 {v7.4s},[x16], #16 + rev32 v0.16b,v0.16b // yes, even on + rev32 v1.16b,v1.16b // big-endian + rev32 v2.16b,v2.16b + rev32 v3.16b,v3.16b + mov x17,sp + add v4.4s,v4.4s,v0.4s + add v5.4s,v5.4s,v1.4s + add v6.4s,v6.4s,v2.4s + st1 {v4.4s,v5.4s},[x17], #32 + add v7.4s,v7.4s,v3.4s + st1 {v6.4s,v7.4s},[x17] + sub x17,x17,#32 + + ldp w3,w4,[x0] + ldp w5,w6,[x0,#8] + ldp w7,w8,[x0,#16] + ldp w9,w10,[x0,#24] + ldr w12,[sp,#0] + mov w13,wzr + eor w14,w4,w5 + mov w15,wzr + b |$L_00_48| + + ALIGN 16 +|$L_00_48| + ext8 v4.16b,v0.16b,v1.16b,#4 + add w10,w10,w12 + add w3,w3,w15 + and w12,w8,w7 + bic w15,w9,w7 + ext8 v7.16b,v2.16b,v3.16b,#4 + eor w11,w7,w7,ror#5 + add w3,w3,w13 + mov d19,v3.d[1] + orr w12,w12,w15 + eor w11,w11,w7,ror#19 + ushr v6.4s,v4.4s,#7 + eor w15,w3,w3,ror#11 + ushr v5.4s,v4.4s,#3 + add w10,w10,w12 + add v0.4s,v0.4s,v7.4s + ror w11,w11,#6 + sli v6.4s,v4.4s,#25 + eor w13,w3,w4 + eor w15,w15,w3,ror#20 + ushr v7.4s,v4.4s,#18 + add w10,w10,w11 + ldr w12,[sp,#4] + and w14,w14,w13 + eor v5.16b,v5.16b,v6.16b + ror w15,w15,#2 + add w6,w6,w10 + sli v7.4s,v4.4s,#14 + eor w14,w14,w4 + ushr v16.4s,v19.4s,#17 + add w9,w9,w12 + add w10,w10,w15 + and w12,w7,w6 + eor v5.16b,v5.16b,v7.16b + bic w15,w8,w6 + eor w11,w6,w6,ror#5 + sli v16.4s,v19.4s,#15 + add w10,w10,w14 + orr w12,w12,w15 + ushr v17.4s,v19.4s,#10 + eor w11,w11,w6,ror#19 + eor w15,w10,w10,ror#11 + ushr v7.4s,v19.4s,#19 + add w9,w9,w12 + ror w11,w11,#6 + add v0.4s,v0.4s,v5.4s + eor w14,w10,w3 + eor w15,w15,w10,ror#20 + sli v7.4s,v19.4s,#13 + add w9,w9,w11 + ldr w12,[sp,#8] + and w13,w13,w14 + eor v17.16b,v17.16b,v16.16b + ror w15,w15,#2 + add w5,w5,w9 + eor w13,w13,w3 + eor v17.16b,v17.16b,v7.16b + add w8,w8,w12 + add w9,w9,w15 + and w12,w6,w5 + add v0.4s,v0.4s,v17.4s + bic w15,w7,w5 + eor w11,w5,w5,ror#5 + add w9,w9,w13 + ushr v18.4s,v0.4s,#17 + orr w12,w12,w15 + ushr v19.4s,v0.4s,#10 + eor w11,w11,w5,ror#19 + eor w15,w9,w9,ror#11 + sli v18.4s,v0.4s,#15 + add w8,w8,w12 + ushr v17.4s,v0.4s,#19 + ror w11,w11,#6 + eor w13,w9,w10 + eor v19.16b,v19.16b,v18.16b + eor w15,w15,w9,ror#20 + add w8,w8,w11 + sli v17.4s,v0.4s,#13 + ldr w12,[sp,#12] + and w14,w14,w13 + ror w15,w15,#2 + ld1 {v4.4s},[x16], #16 + add w4,w4,w8 + eor v19.16b,v19.16b,v17.16b + eor w14,w14,w10 + eor v17.16b,v17.16b,v17.16b + add w7,w7,w12 + add w8,w8,w15 + and w12,w5,w4 + mov v17.d[1],v19.d[0] + bic w15,w6,w4 + eor w11,w4,w4,ror#5 + add w8,w8,w14 + add v0.4s,v0.4s,v17.4s + orr w12,w12,w15 + eor w11,w11,w4,ror#19 + eor w15,w8,w8,ror#11 + add v4.4s,v4.4s,v0.4s + add w7,w7,w12 + ror w11,w11,#6 + eor w14,w8,w9 + eor w15,w15,w8,ror#20 + add w7,w7,w11 + ldr w12,[sp,#16] + and w13,w13,w14 + ror w15,w15,#2 + add w3,w3,w7 + eor w13,w13,w9 + st1 {v4.4s},[x17], #16 + ext8 v4.16b,v1.16b,v2.16b,#4 + add w6,w6,w12 + add w7,w7,w15 + and w12,w4,w3 + bic w15,w5,w3 + ext8 v7.16b,v3.16b,v0.16b,#4 + eor w11,w3,w3,ror#5 + add w7,w7,w13 + mov d19,v0.d[1] + orr w12,w12,w15 + eor w11,w11,w3,ror#19 + ushr v6.4s,v4.4s,#7 + eor w15,w7,w7,ror#11 + ushr v5.4s,v4.4s,#3 + add w6,w6,w12 + add v1.4s,v1.4s,v7.4s + ror w11,w11,#6 + sli v6.4s,v4.4s,#25 + eor w13,w7,w8 + eor w15,w15,w7,ror#20 + ushr v7.4s,v4.4s,#18 + add w6,w6,w11 + ldr w12,[sp,#20] + and w14,w14,w13 + eor v5.16b,v5.16b,v6.16b + ror w15,w15,#2 + add w10,w10,w6 + sli v7.4s,v4.4s,#14 + eor w14,w14,w8 + ushr v16.4s,v19.4s,#17 + add w5,w5,w12 + add w6,w6,w15 + and w12,w3,w10 + eor v5.16b,v5.16b,v7.16b + bic w15,w4,w10 + eor w11,w10,w10,ror#5 + sli v16.4s,v19.4s,#15 + add w6,w6,w14 + orr w12,w12,w15 + ushr v17.4s,v19.4s,#10 + eor w11,w11,w10,ror#19 + eor w15,w6,w6,ror#11 + ushr v7.4s,v19.4s,#19 + add w5,w5,w12 + ror w11,w11,#6 + add v1.4s,v1.4s,v5.4s + eor w14,w6,w7 + eor w15,w15,w6,ror#20 + sli v7.4s,v19.4s,#13 + add w5,w5,w11 + ldr w12,[sp,#24] + and w13,w13,w14 + eor v17.16b,v17.16b,v16.16b + ror w15,w15,#2 + add w9,w9,w5 + eor w13,w13,w7 + eor v17.16b,v17.16b,v7.16b + add w4,w4,w12 + add w5,w5,w15 + and w12,w10,w9 + add v1.4s,v1.4s,v17.4s + bic w15,w3,w9 + eor w11,w9,w9,ror#5 + add w5,w5,w13 + ushr v18.4s,v1.4s,#17 + orr w12,w12,w15 + ushr v19.4s,v1.4s,#10 + eor w11,w11,w9,ror#19 + eor w15,w5,w5,ror#11 + sli v18.4s,v1.4s,#15 + add w4,w4,w12 + ushr v17.4s,v1.4s,#19 + ror w11,w11,#6 + eor w13,w5,w6 + eor v19.16b,v19.16b,v18.16b + eor w15,w15,w5,ror#20 + add w4,w4,w11 + sli v17.4s,v1.4s,#13 + ldr w12,[sp,#28] + and w14,w14,w13 + ror w15,w15,#2 + ld1 {v4.4s},[x16], #16 + add w8,w8,w4 + eor v19.16b,v19.16b,v17.16b + eor w14,w14,w6 + eor v17.16b,v17.16b,v17.16b + add w3,w3,w12 + add w4,w4,w15 + and w12,w9,w8 + mov v17.d[1],v19.d[0] + bic w15,w10,w8 + eor w11,w8,w8,ror#5 + add w4,w4,w14 + add v1.4s,v1.4s,v17.4s + orr w12,w12,w15 + eor w11,w11,w8,ror#19 + eor w15,w4,w4,ror#11 + add v4.4s,v4.4s,v1.4s + add w3,w3,w12 + ror w11,w11,#6 + eor w14,w4,w5 + eor w15,w15,w4,ror#20 + add w3,w3,w11 + ldr w12,[sp,#32] + and w13,w13,w14 + ror w15,w15,#2 + add w7,w7,w3 + eor w13,w13,w5 + st1 {v4.4s},[x17], #16 + ext8 v4.16b,v2.16b,v3.16b,#4 + add w10,w10,w12 + add w3,w3,w15 + and w12,w8,w7 + bic w15,w9,w7 + ext8 v7.16b,v0.16b,v1.16b,#4 + eor w11,w7,w7,ror#5 + add w3,w3,w13 + mov d19,v1.d[1] + orr w12,w12,w15 + eor w11,w11,w7,ror#19 + ushr v6.4s,v4.4s,#7 + eor w15,w3,w3,ror#11 + ushr v5.4s,v4.4s,#3 + add w10,w10,w12 + add v2.4s,v2.4s,v7.4s + ror w11,w11,#6 + sli v6.4s,v4.4s,#25 + eor w13,w3,w4 + eor w15,w15,w3,ror#20 + ushr v7.4s,v4.4s,#18 + add w10,w10,w11 + ldr w12,[sp,#36] + and w14,w14,w13 + eor v5.16b,v5.16b,v6.16b + ror w15,w15,#2 + add w6,w6,w10 + sli v7.4s,v4.4s,#14 + eor w14,w14,w4 + ushr v16.4s,v19.4s,#17 + add w9,w9,w12 + add w10,w10,w15 + and w12,w7,w6 + eor v5.16b,v5.16b,v7.16b + bic w15,w8,w6 + eor w11,w6,w6,ror#5 + sli v16.4s,v19.4s,#15 + add w10,w10,w14 + orr w12,w12,w15 + ushr v17.4s,v19.4s,#10 + eor w11,w11,w6,ror#19 + eor w15,w10,w10,ror#11 + ushr v7.4s,v19.4s,#19 + add w9,w9,w12 + ror w11,w11,#6 + add v2.4s,v2.4s,v5.4s + eor w14,w10,w3 + eor w15,w15,w10,ror#20 + sli v7.4s,v19.4s,#13 + add w9,w9,w11 + ldr w12,[sp,#40] + and w13,w13,w14 + eor v17.16b,v17.16b,v16.16b + ror w15,w15,#2 + add w5,w5,w9 + eor w13,w13,w3 + eor v17.16b,v17.16b,v7.16b + add w8,w8,w12 + add w9,w9,w15 + and w12,w6,w5 + add v2.4s,v2.4s,v17.4s + bic w15,w7,w5 + eor w11,w5,w5,ror#5 + add w9,w9,w13 + ushr v18.4s,v2.4s,#17 + orr w12,w12,w15 + ushr v19.4s,v2.4s,#10 + eor w11,w11,w5,ror#19 + eor w15,w9,w9,ror#11 + sli v18.4s,v2.4s,#15 + add w8,w8,w12 + ushr v17.4s,v2.4s,#19 + ror w11,w11,#6 + eor w13,w9,w10 + eor v19.16b,v19.16b,v18.16b + eor w15,w15,w9,ror#20 + add w8,w8,w11 + sli v17.4s,v2.4s,#13 + ldr w12,[sp,#44] + and w14,w14,w13 + ror w15,w15,#2 + ld1 {v4.4s},[x16], #16 + add w4,w4,w8 + eor v19.16b,v19.16b,v17.16b + eor w14,w14,w10 + eor v17.16b,v17.16b,v17.16b + add w7,w7,w12 + add w8,w8,w15 + and w12,w5,w4 + mov v17.d[1],v19.d[0] + bic w15,w6,w4 + eor w11,w4,w4,ror#5 + add w8,w8,w14 + add v2.4s,v2.4s,v17.4s + orr w12,w12,w15 + eor w11,w11,w4,ror#19 + eor w15,w8,w8,ror#11 + add v4.4s,v4.4s,v2.4s + add w7,w7,w12 + ror w11,w11,#6 + eor w14,w8,w9 + eor w15,w15,w8,ror#20 + add w7,w7,w11 + ldr w12,[sp,#48] + and w13,w13,w14 + ror w15,w15,#2 + add w3,w3,w7 + eor w13,w13,w9 + st1 {v4.4s},[x17], #16 + ext8 v4.16b,v3.16b,v0.16b,#4 + add w6,w6,w12 + add w7,w7,w15 + and w12,w4,w3 + bic w15,w5,w3 + ext8 v7.16b,v1.16b,v2.16b,#4 + eor w11,w3,w3,ror#5 + add w7,w7,w13 + mov d19,v2.d[1] + orr w12,w12,w15 + eor w11,w11,w3,ror#19 + ushr v6.4s,v4.4s,#7 + eor w15,w7,w7,ror#11 + ushr v5.4s,v4.4s,#3 + add w6,w6,w12 + add v3.4s,v3.4s,v7.4s + ror w11,w11,#6 + sli v6.4s,v4.4s,#25 + eor w13,w7,w8 + eor w15,w15,w7,ror#20 + ushr v7.4s,v4.4s,#18 + add w6,w6,w11 + ldr w12,[sp,#52] + and w14,w14,w13 + eor v5.16b,v5.16b,v6.16b + ror w15,w15,#2 + add w10,w10,w6 + sli v7.4s,v4.4s,#14 + eor w14,w14,w8 + ushr v16.4s,v19.4s,#17 + add w5,w5,w12 + add w6,w6,w15 + and w12,w3,w10 + eor v5.16b,v5.16b,v7.16b + bic w15,w4,w10 + eor w11,w10,w10,ror#5 + sli v16.4s,v19.4s,#15 + add w6,w6,w14 + orr w12,w12,w15 + ushr v17.4s,v19.4s,#10 + eor w11,w11,w10,ror#19 + eor w15,w6,w6,ror#11 + ushr v7.4s,v19.4s,#19 + add w5,w5,w12 + ror w11,w11,#6 + add v3.4s,v3.4s,v5.4s + eor w14,w6,w7 + eor w15,w15,w6,ror#20 + sli v7.4s,v19.4s,#13 + add w5,w5,w11 + ldr w12,[sp,#56] + and w13,w13,w14 + eor v17.16b,v17.16b,v16.16b + ror w15,w15,#2 + add w9,w9,w5 + eor w13,w13,w7 + eor v17.16b,v17.16b,v7.16b + add w4,w4,w12 + add w5,w5,w15 + and w12,w10,w9 + add v3.4s,v3.4s,v17.4s + bic w15,w3,w9 + eor w11,w9,w9,ror#5 + add w5,w5,w13 + ushr v18.4s,v3.4s,#17 + orr w12,w12,w15 + ushr v19.4s,v3.4s,#10 + eor w11,w11,w9,ror#19 + eor w15,w5,w5,ror#11 + sli v18.4s,v3.4s,#15 + add w4,w4,w12 + ushr v17.4s,v3.4s,#19 + ror w11,w11,#6 + eor w13,w5,w6 + eor v19.16b,v19.16b,v18.16b + eor w15,w15,w5,ror#20 + add w4,w4,w11 + sli v17.4s,v3.4s,#13 + ldr w12,[sp,#60] + and w14,w14,w13 + ror w15,w15,#2 + ld1 {v4.4s},[x16], #16 + add w8,w8,w4 + eor v19.16b,v19.16b,v17.16b + eor w14,w14,w6 + eor v17.16b,v17.16b,v17.16b + add w3,w3,w12 + add w4,w4,w15 + and w12,w9,w8 + mov v17.d[1],v19.d[0] + bic w15,w10,w8 + eor w11,w8,w8,ror#5 + add w4,w4,w14 + add v3.4s,v3.4s,v17.4s + orr w12,w12,w15 + eor w11,w11,w8,ror#19 + eor w15,w4,w4,ror#11 + add v4.4s,v4.4s,v3.4s + add w3,w3,w12 + ror w11,w11,#6 + eor w14,w4,w5 + eor w15,w15,w4,ror#20 + add w3,w3,w11 + ldr w12,[x16] + and w13,w13,w14 + ror w15,w15,#2 + add w7,w7,w3 + eor w13,w13,w5 + st1 {v4.4s},[x17], #16 + cmp w12,#0 // check for K256 terminator + ldr w12,[sp,#0] + sub x17,x17,#64 + bne |$L_00_48| + + sub x16,x16,#256 // rewind x16 + cmp x1,x2 + mov x17, #64 + cseleq x17,x17,xzr + sub x1,x1,x17 // avoid SEGV + mov x17,sp + add w10,w10,w12 + add w3,w3,w15 + and w12,w8,w7 + ld1 {v0.16b},[x1],#16 + bic w15,w9,w7 + eor w11,w7,w7,ror#5 + ld1 {v4.4s},[x16],#16 + add w3,w3,w13 + orr w12,w12,w15 + eor w11,w11,w7,ror#19 + eor w15,w3,w3,ror#11 + rev32 v0.16b,v0.16b + add w10,w10,w12 + ror w11,w11,#6 + eor w13,w3,w4 + eor w15,w15,w3,ror#20 + add v4.4s,v4.4s,v0.4s + add w10,w10,w11 + ldr w12,[sp,#4] + and w14,w14,w13 + ror w15,w15,#2 + add w6,w6,w10 + eor w14,w14,w4 + add w9,w9,w12 + add w10,w10,w15 + and w12,w7,w6 + bic w15,w8,w6 + eor w11,w6,w6,ror#5 + add w10,w10,w14 + orr w12,w12,w15 + eor w11,w11,w6,ror#19 + eor w15,w10,w10,ror#11 + add w9,w9,w12 + ror w11,w11,#6 + eor w14,w10,w3 + eor w15,w15,w10,ror#20 + add w9,w9,w11 + ldr w12,[sp,#8] + and w13,w13,w14 + ror w15,w15,#2 + add w5,w5,w9 + eor w13,w13,w3 + add w8,w8,w12 + add w9,w9,w15 + and w12,w6,w5 + bic w15,w7,w5 + eor w11,w5,w5,ror#5 + add w9,w9,w13 + orr w12,w12,w15 + eor w11,w11,w5,ror#19 + eor w15,w9,w9,ror#11 + add w8,w8,w12 + ror w11,w11,#6 + eor w13,w9,w10 + eor w15,w15,w9,ror#20 + add w8,w8,w11 + ldr w12,[sp,#12] + and w14,w14,w13 + ror w15,w15,#2 + add w4,w4,w8 + eor w14,w14,w10 + add w7,w7,w12 + add w8,w8,w15 + and w12,w5,w4 + bic w15,w6,w4 + eor w11,w4,w4,ror#5 + add w8,w8,w14 + orr w12,w12,w15 + eor w11,w11,w4,ror#19 + eor w15,w8,w8,ror#11 + add w7,w7,w12 + ror w11,w11,#6 + eor w14,w8,w9 + eor w15,w15,w8,ror#20 + add w7,w7,w11 + ldr w12,[sp,#16] + and w13,w13,w14 + ror w15,w15,#2 + add w3,w3,w7 + eor w13,w13,w9 + st1 {v4.4s},[x17], #16 + add w6,w6,w12 + add w7,w7,w15 + and w12,w4,w3 + ld1 {v1.16b},[x1],#16 + bic w15,w5,w3 + eor w11,w3,w3,ror#5 + ld1 {v4.4s},[x16],#16 + add w7,w7,w13 + orr w12,w12,w15 + eor w11,w11,w3,ror#19 + eor w15,w7,w7,ror#11 + rev32 v1.16b,v1.16b + add w6,w6,w12 + ror w11,w11,#6 + eor w13,w7,w8 + eor w15,w15,w7,ror#20 + add v4.4s,v4.4s,v1.4s + add w6,w6,w11 + ldr w12,[sp,#20] + and w14,w14,w13 + ror w15,w15,#2 + add w10,w10,w6 + eor w14,w14,w8 + add w5,w5,w12 + add w6,w6,w15 + and w12,w3,w10 + bic w15,w4,w10 + eor w11,w10,w10,ror#5 + add w6,w6,w14 + orr w12,w12,w15 + eor w11,w11,w10,ror#19 + eor w15,w6,w6,ror#11 + add w5,w5,w12 + ror w11,w11,#6 + eor w14,w6,w7 + eor w15,w15,w6,ror#20 + add w5,w5,w11 + ldr w12,[sp,#24] + and w13,w13,w14 + ror w15,w15,#2 + add w9,w9,w5 + eor w13,w13,w7 + add w4,w4,w12 + add w5,w5,w15 + and w12,w10,w9 + bic w15,w3,w9 + eor w11,w9,w9,ror#5 + add w5,w5,w13 + orr w12,w12,w15 + eor w11,w11,w9,ror#19 + eor w15,w5,w5,ror#11 + add w4,w4,w12 + ror w11,w11,#6 + eor w13,w5,w6 + eor w15,w15,w5,ror#20 + add w4,w4,w11 + ldr w12,[sp,#28] + and w14,w14,w13 + ror w15,w15,#2 + add w8,w8,w4 + eor w14,w14,w6 + add w3,w3,w12 + add w4,w4,w15 + and w12,w9,w8 + bic w15,w10,w8 + eor w11,w8,w8,ror#5 + add w4,w4,w14 + orr w12,w12,w15 + eor w11,w11,w8,ror#19 + eor w15,w4,w4,ror#11 + add w3,w3,w12 + ror w11,w11,#6 + eor w14,w4,w5 + eor w15,w15,w4,ror#20 + add w3,w3,w11 + ldr w12,[sp,#32] + and w13,w13,w14 + ror w15,w15,#2 + add w7,w7,w3 + eor w13,w13,w5 + st1 {v4.4s},[x17], #16 + add w10,w10,w12 + add w3,w3,w15 + and w12,w8,w7 + ld1 {v2.16b},[x1],#16 + bic w15,w9,w7 + eor w11,w7,w7,ror#5 + ld1 {v4.4s},[x16],#16 + add w3,w3,w13 + orr w12,w12,w15 + eor w11,w11,w7,ror#19 + eor w15,w3,w3,ror#11 + rev32 v2.16b,v2.16b + add w10,w10,w12 + ror w11,w11,#6 + eor w13,w3,w4 + eor w15,w15,w3,ror#20 + add v4.4s,v4.4s,v2.4s + add w10,w10,w11 + ldr w12,[sp,#36] + and w14,w14,w13 + ror w15,w15,#2 + add w6,w6,w10 + eor w14,w14,w4 + add w9,w9,w12 + add w10,w10,w15 + and w12,w7,w6 + bic w15,w8,w6 + eor w11,w6,w6,ror#5 + add w10,w10,w14 + orr w12,w12,w15 + eor w11,w11,w6,ror#19 + eor w15,w10,w10,ror#11 + add w9,w9,w12 + ror w11,w11,#6 + eor w14,w10,w3 + eor w15,w15,w10,ror#20 + add w9,w9,w11 + ldr w12,[sp,#40] + and w13,w13,w14 + ror w15,w15,#2 + add w5,w5,w9 + eor w13,w13,w3 + add w8,w8,w12 + add w9,w9,w15 + and w12,w6,w5 + bic w15,w7,w5 + eor w11,w5,w5,ror#5 + add w9,w9,w13 + orr w12,w12,w15 + eor w11,w11,w5,ror#19 + eor w15,w9,w9,ror#11 + add w8,w8,w12 + ror w11,w11,#6 + eor w13,w9,w10 + eor w15,w15,w9,ror#20 + add w8,w8,w11 + ldr w12,[sp,#44] + and w14,w14,w13 + ror w15,w15,#2 + add w4,w4,w8 + eor w14,w14,w10 + add w7,w7,w12 + add w8,w8,w15 + and w12,w5,w4 + bic w15,w6,w4 + eor w11,w4,w4,ror#5 + add w8,w8,w14 + orr w12,w12,w15 + eor w11,w11,w4,ror#19 + eor w15,w8,w8,ror#11 + add w7,w7,w12 + ror w11,w11,#6 + eor w14,w8,w9 + eor w15,w15,w8,ror#20 + add w7,w7,w11 + ldr w12,[sp,#48] + and w13,w13,w14 + ror w15,w15,#2 + add w3,w3,w7 + eor w13,w13,w9 + st1 {v4.4s},[x17], #16 + add w6,w6,w12 + add w7,w7,w15 + and w12,w4,w3 + ld1 {v3.16b},[x1],#16 + bic w15,w5,w3 + eor w11,w3,w3,ror#5 + ld1 {v4.4s},[x16],#16 + add w7,w7,w13 + orr w12,w12,w15 + eor w11,w11,w3,ror#19 + eor w15,w7,w7,ror#11 + rev32 v3.16b,v3.16b + add w6,w6,w12 + ror w11,w11,#6 + eor w13,w7,w8 + eor w15,w15,w7,ror#20 + add v4.4s,v4.4s,v3.4s + add w6,w6,w11 + ldr w12,[sp,#52] + and w14,w14,w13 + ror w15,w15,#2 + add w10,w10,w6 + eor w14,w14,w8 + add w5,w5,w12 + add w6,w6,w15 + and w12,w3,w10 + bic w15,w4,w10 + eor w11,w10,w10,ror#5 + add w6,w6,w14 + orr w12,w12,w15 + eor w11,w11,w10,ror#19 + eor w15,w6,w6,ror#11 + add w5,w5,w12 + ror w11,w11,#6 + eor w14,w6,w7 + eor w15,w15,w6,ror#20 + add w5,w5,w11 + ldr w12,[sp,#56] + and w13,w13,w14 + ror w15,w15,#2 + add w9,w9,w5 + eor w13,w13,w7 + add w4,w4,w12 + add w5,w5,w15 + and w12,w10,w9 + bic w15,w3,w9 + eor w11,w9,w9,ror#5 + add w5,w5,w13 + orr w12,w12,w15 + eor w11,w11,w9,ror#19 + eor w15,w5,w5,ror#11 + add w4,w4,w12 + ror w11,w11,#6 + eor w13,w5,w6 + eor w15,w15,w5,ror#20 + add w4,w4,w11 + ldr w12,[sp,#60] + and w14,w14,w13 + ror w15,w15,#2 + add w8,w8,w4 + eor w14,w14,w6 + add w3,w3,w12 + add w4,w4,w15 + and w12,w9,w8 + bic w15,w10,w8 + eor w11,w8,w8,ror#5 + add w4,w4,w14 + orr w12,w12,w15 + eor w11,w11,w8,ror#19 + eor w15,w4,w4,ror#11 + add w3,w3,w12 + ror w11,w11,#6 + eor w14,w4,w5 + eor w15,w15,w4,ror#20 + add w3,w3,w11 + and w13,w13,w14 + ror w15,w15,#2 + add w7,w7,w3 + eor w13,w13,w5 + st1 {v4.4s},[x17], #16 + add w3,w3,w15 // h+=Sigma0(a) from the past + ldp w11,w12,[x0,#0] + add w3,w3,w13 // h+=Maj(a,b,c) from the past + ldp w13,w14,[x0,#8] + add w3,w3,w11 // accumulate + add w4,w4,w12 + ldp w11,w12,[x0,#16] + add w5,w5,w13 + add w6,w6,w14 + ldp w13,w14,[x0,#24] + add w7,w7,w11 + add w8,w8,w12 + ldr w12,[sp,#0] + stp w3,w4,[x0,#0] + add w9,w9,w13 + mov w13,wzr + stp w5,w6,[x0,#8] + add w10,w10,w14 + stp w7,w8,[x0,#16] + eor w14,w4,w5 + stp w9,w10,[x0,#24] + mov w15,wzr + mov x17,sp + bne |$L_00_48| + + ldr x29,[x29] + add sp,sp,#16*4+16 + ret + ENDP + + + EXPORT |blst_sha256_emit|[FUNC] + ALIGN 16 +|blst_sha256_emit| PROC + ldp x4,x5,[x1] + ldp x6,x7,[x1,#16] +#ifndef __AARCH64EB__ + rev x4,x4 + rev x5,x5 + rev x6,x6 + rev x7,x7 +#endif + str w4,[x0,#4] + lsr x4,x4,#32 + str w5,[x0,#12] + lsr x5,x5,#32 + str w6,[x0,#20] + lsr x6,x6,#32 + str w7,[x0,#28] + lsr x7,x7,#32 + str w4,[x0,#0] + str w5,[x0,#8] + str w6,[x0,#16] + str w7,[x0,#24] + ret + ENDP + + + + EXPORT |blst_sha256_bcopy|[FUNC] + ALIGN 16 +|blst_sha256_bcopy| PROC +|$Loop_bcopy| + ldrb w3,[x1],#1 + sub x2,x2,#1 + strb w3,[x0],#1 + cbnz x2,|$Loop_bcopy| + ret + ENDP + + + + EXPORT |blst_sha256_hcopy|[FUNC] + ALIGN 16 +|blst_sha256_hcopy| PROC + ldp x4,x5,[x1] + ldp x6,x7,[x1,#16] + stp x4,x5,[x0] + stp x6,x7,[x0,#16] + ret + ENDP + END diff --git a/crypto/blst_src/build/win64/sha256-x86_64.asm b/crypto/blst_src/build/win64/sha256-x86_64.asm new file mode 100644 index 00000000000..a502a75ecaf --- /dev/null +++ b/crypto/blst_src/build/win64/sha256-x86_64.asm @@ -0,0 +1,1575 @@ +OPTION DOTNAME +_DATA SEGMENT +COMM __blst_platform_cap:DWORD:1 +_DATA ENDS +.text$ SEGMENT ALIGN(256) 'CODE' + +ALIGN 64 + +K256:: + DD 0428a2f98h,071374491h,0b5c0fbcfh,0e9b5dba5h + DD 03956c25bh,059f111f1h,0923f82a4h,0ab1c5ed5h + DD 0d807aa98h,012835b01h,0243185beh,0550c7dc3h + DD 072be5d74h,080deb1feh,09bdc06a7h,0c19bf174h + DD 0e49b69c1h,0efbe4786h,00fc19dc6h,0240ca1cch + DD 02de92c6fh,04a7484aah,05cb0a9dch,076f988dah + DD 0983e5152h,0a831c66dh,0b00327c8h,0bf597fc7h + DD 0c6e00bf3h,0d5a79147h,006ca6351h,014292967h + DD 027b70a85h,02e1b2138h,04d2c6dfch,053380d13h + DD 0650a7354h,0766a0abbh,081c2c92eh,092722c85h + DD 0a2bfe8a1h,0a81a664bh,0c24b8b70h,0c76c51a3h + DD 0d192e819h,0d6990624h,0f40e3585h,0106aa070h + DD 019a4c116h,01e376c08h,02748774ch,034b0bcb5h + DD 0391c0cb3h,04ed8aa4ah,05b9cca4fh,0682e6ff3h + DD 0748f82eeh,078a5636fh,084c87814h,08cc70208h + DD 090befffah,0a4506cebh,0bef9a3f7h,0c67178f2h + + DD 000010203h,004050607h,008090a0bh,00c0d0e0fh + DD 003020100h,00b0a0908h,0ffffffffh,0ffffffffh + DD 0ffffffffh,0ffffffffh,003020100h,00b0a0908h +DB 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97 +DB 110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54 +DB 52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121 +DB 32,64,100,111,116,45,97,115,109,0 +PUBLIC blst_sha256_block_data_order_shaext + + +ALIGN 64 +blst_sha256_block_data_order_shaext PROC PUBLIC + DB 243,15,30,250 + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov r11,rsp +$L$SEH_begin_blst_sha256_block_data_order_shaext:: + + + push rbp + + mov rbp,rsp + + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 +$L$blst_sha256_block_data_order$2:: + sub rsp,050h + + movaps XMMWORD PTR[(-80)+rbp],xmm6 + movaps XMMWORD PTR[(-64)+rbp],xmm7 + movaps XMMWORD PTR[(-48)+rbp],xmm8 + movaps XMMWORD PTR[(-32)+rbp],xmm9 + movaps XMMWORD PTR[(-16)+rbp],xmm10 + +$L$SEH_body_blst_sha256_block_data_order_shaext:: + + lea rcx,QWORD PTR[((K256+128))] + movdqu xmm1,XMMWORD PTR[rdi] + movdqu xmm2,XMMWORD PTR[16+rdi] + movdqa xmm7,XMMWORD PTR[((256-128))+rcx] + + pshufd xmm0,xmm1,01bh + pshufd xmm1,xmm1,0b1h + pshufd xmm2,xmm2,01bh + movdqa xmm8,xmm7 +DB 102,15,58,15,202,8 + punpcklqdq xmm2,xmm0 + jmp $L$oop_shaext + +ALIGN 16 +$L$oop_shaext:: + movdqu xmm3,XMMWORD PTR[rsi] + movdqu xmm4,XMMWORD PTR[16+rsi] + movdqu xmm5,XMMWORD PTR[32+rsi] +DB 102,15,56,0,223 + movdqu xmm6,XMMWORD PTR[48+rsi] + + movdqa xmm0,XMMWORD PTR[((0-128))+rcx] + paddd xmm0,xmm3 +DB 102,15,56,0,231 + movdqa xmm10,xmm2 +DB 15,56,203,209 + pshufd xmm0,xmm0,00eh + nop + movdqa xmm9,xmm1 +DB 15,56,203,202 + + movdqa xmm0,XMMWORD PTR[((16-128))+rcx] + paddd xmm0,xmm4 +DB 102,15,56,0,239 +DB 15,56,203,209 + pshufd xmm0,xmm0,00eh + lea rsi,QWORD PTR[64+rsi] +DB 15,56,204,220 +DB 15,56,203,202 + + movdqa xmm0,XMMWORD PTR[((32-128))+rcx] + paddd xmm0,xmm5 +DB 102,15,56,0,247 +DB 15,56,203,209 + pshufd xmm0,xmm0,00eh + movdqa xmm7,xmm6 +DB 102,15,58,15,253,4 + nop + paddd xmm3,xmm7 +DB 15,56,204,229 +DB 15,56,203,202 + + movdqa xmm0,XMMWORD PTR[((48-128))+rcx] + paddd xmm0,xmm6 +DB 15,56,205,222 +DB 15,56,203,209 + pshufd xmm0,xmm0,00eh + movdqa xmm7,xmm3 +DB 102,15,58,15,254,4 + nop + paddd xmm4,xmm7 +DB 15,56,204,238 +DB 15,56,203,202 + movdqa xmm0,XMMWORD PTR[((64-128))+rcx] + paddd xmm0,xmm3 +DB 15,56,205,227 +DB 15,56,203,209 + pshufd xmm0,xmm0,00eh + movdqa xmm7,xmm4 +DB 102,15,58,15,251,4 + nop + paddd xmm5,xmm7 +DB 15,56,204,243 +DB 15,56,203,202 + movdqa xmm0,XMMWORD PTR[((80-128))+rcx] + paddd xmm0,xmm4 +DB 15,56,205,236 +DB 15,56,203,209 + pshufd xmm0,xmm0,00eh + movdqa xmm7,xmm5 +DB 102,15,58,15,252,4 + nop + paddd xmm6,xmm7 +DB 15,56,204,220 +DB 15,56,203,202 + movdqa xmm0,XMMWORD PTR[((96-128))+rcx] + paddd xmm0,xmm5 +DB 15,56,205,245 +DB 15,56,203,209 + pshufd xmm0,xmm0,00eh + movdqa xmm7,xmm6 +DB 102,15,58,15,253,4 + nop + paddd xmm3,xmm7 +DB 15,56,204,229 +DB 15,56,203,202 + movdqa xmm0,XMMWORD PTR[((112-128))+rcx] + paddd xmm0,xmm6 +DB 15,56,205,222 +DB 15,56,203,209 + pshufd xmm0,xmm0,00eh + movdqa xmm7,xmm3 +DB 102,15,58,15,254,4 + nop + paddd xmm4,xmm7 +DB 15,56,204,238 +DB 15,56,203,202 + movdqa xmm0,XMMWORD PTR[((128-128))+rcx] + paddd xmm0,xmm3 +DB 15,56,205,227 +DB 15,56,203,209 + pshufd xmm0,xmm0,00eh + movdqa xmm7,xmm4 +DB 102,15,58,15,251,4 + nop + paddd xmm5,xmm7 +DB 15,56,204,243 +DB 15,56,203,202 + movdqa xmm0,XMMWORD PTR[((144-128))+rcx] + paddd xmm0,xmm4 +DB 15,56,205,236 +DB 15,56,203,209 + pshufd xmm0,xmm0,00eh + movdqa xmm7,xmm5 +DB 102,15,58,15,252,4 + nop + paddd xmm6,xmm7 +DB 15,56,204,220 +DB 15,56,203,202 + movdqa xmm0,XMMWORD PTR[((160-128))+rcx] + paddd xmm0,xmm5 +DB 15,56,205,245 +DB 15,56,203,209 + pshufd xmm0,xmm0,00eh + movdqa xmm7,xmm6 +DB 102,15,58,15,253,4 + nop + paddd xmm3,xmm7 +DB 15,56,204,229 +DB 15,56,203,202 + movdqa xmm0,XMMWORD PTR[((176-128))+rcx] + paddd xmm0,xmm6 +DB 15,56,205,222 +DB 15,56,203,209 + pshufd xmm0,xmm0,00eh + movdqa xmm7,xmm3 +DB 102,15,58,15,254,4 + nop + paddd xmm4,xmm7 +DB 15,56,204,238 +DB 15,56,203,202 + movdqa xmm0,XMMWORD PTR[((192-128))+rcx] + paddd xmm0,xmm3 +DB 15,56,205,227 +DB 15,56,203,209 + pshufd xmm0,xmm0,00eh + movdqa xmm7,xmm4 +DB 102,15,58,15,251,4 + nop + paddd xmm5,xmm7 +DB 15,56,204,243 +DB 15,56,203,202 + movdqa xmm0,XMMWORD PTR[((208-128))+rcx] + paddd xmm0,xmm4 +DB 15,56,205,236 +DB 15,56,203,209 + pshufd xmm0,xmm0,00eh + movdqa xmm7,xmm5 +DB 102,15,58,15,252,4 +DB 15,56,203,202 + paddd xmm6,xmm7 + + movdqa xmm0,XMMWORD PTR[((224-128))+rcx] + paddd xmm0,xmm5 +DB 15,56,203,209 + pshufd xmm0,xmm0,00eh +DB 15,56,205,245 + movdqa xmm7,xmm8 +DB 15,56,203,202 + + movdqa xmm0,XMMWORD PTR[((240-128))+rcx] + paddd xmm0,xmm6 + nop +DB 15,56,203,209 + pshufd xmm0,xmm0,00eh + dec rdx + nop +DB 15,56,203,202 + + paddd xmm2,xmm10 + paddd xmm1,xmm9 + jnz $L$oop_shaext + + pshufd xmm2,xmm2,0b1h + pshufd xmm7,xmm1,01bh + pshufd xmm1,xmm1,0b1h + punpckhqdq xmm1,xmm2 +DB 102,15,58,15,215,8 + + movdqu XMMWORD PTR[rdi],xmm1 + movdqu XMMWORD PTR[16+rdi],xmm2 + movaps xmm6,XMMWORD PTR[((-80))+rbp] + movaps xmm7,XMMWORD PTR[((-64))+rbp] + movaps xmm8,XMMWORD PTR[((-48))+rbp] + movaps xmm9,XMMWORD PTR[((-32))+rbp] + movaps xmm10,XMMWORD PTR[((-16))+rbp] + mov rsp,rbp + + pop rbp + +$L$SEH_epilogue_blst_sha256_block_data_order_shaext:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + + DB 0F3h,0C3h ;repret + +$L$SEH_end_blst_sha256_block_data_order_shaext:: +blst_sha256_block_data_order_shaext ENDP +PUBLIC blst_sha256_block_data_order + + +ALIGN 64 +blst_sha256_block_data_order PROC PUBLIC + DB 243,15,30,250 + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov r11,rsp +$L$SEH_begin_blst_sha256_block_data_order:: + + + push rbp + + mov rbp,rsp + + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + test DWORD PTR[__blst_platform_cap],2 + jnz $L$blst_sha256_block_data_order$2 + push rbx + + push r12 + + push r13 + + push r14 + + push r15 + + shl rdx,4 + sub rsp,88 + + lea rdx,QWORD PTR[rdx*4+rsi] + mov QWORD PTR[((-64))+rbp],rdi + + mov QWORD PTR[((-48))+rbp],rdx + movaps XMMWORD PTR[(-128)+rbp],xmm6 + movaps XMMWORD PTR[(-112)+rbp],xmm7 + movaps XMMWORD PTR[(-96)+rbp],xmm8 + movaps XMMWORD PTR[(-80)+rbp],xmm9 + +$L$SEH_body_blst_sha256_block_data_order:: + + + lea rsp,QWORD PTR[((-64))+rsp] + mov eax,DWORD PTR[rdi] + and rsp,-64 + mov ebx,DWORD PTR[4+rdi] + mov ecx,DWORD PTR[8+rdi] + mov edx,DWORD PTR[12+rdi] + mov r8d,DWORD PTR[16+rdi] + mov r9d,DWORD PTR[20+rdi] + mov r10d,DWORD PTR[24+rdi] + mov r11d,DWORD PTR[28+rdi] + + + jmp $L$loop_ssse3 +ALIGN 16 +$L$loop_ssse3:: + movdqa xmm7,XMMWORD PTR[((K256+256))] + mov QWORD PTR[((-56))+rbp],rsi + movdqu xmm0,XMMWORD PTR[rsi] + movdqu xmm1,XMMWORD PTR[16+rsi] + movdqu xmm2,XMMWORD PTR[32+rsi] +DB 102,15,56,0,199 + movdqu xmm3,XMMWORD PTR[48+rsi] + lea rsi,QWORD PTR[K256] +DB 102,15,56,0,207 + movdqa xmm4,XMMWORD PTR[rsi] + movdqa xmm5,XMMWORD PTR[16+rsi] +DB 102,15,56,0,215 + paddd xmm4,xmm0 + movdqa xmm6,XMMWORD PTR[32+rsi] +DB 102,15,56,0,223 + movdqa xmm7,XMMWORD PTR[48+rsi] + paddd xmm5,xmm1 + paddd xmm6,xmm2 + paddd xmm7,xmm3 + movdqa XMMWORD PTR[rsp],xmm4 + mov r14d,eax + movdqa XMMWORD PTR[16+rsp],xmm5 + mov edi,ebx + movdqa XMMWORD PTR[32+rsp],xmm6 + xor edi,ecx + movdqa XMMWORD PTR[48+rsp],xmm7 + mov r13d,r8d + jmp $L$ssse3_00_47 + +ALIGN 16 +$L$ssse3_00_47:: + sub rsi,-64 + ror r13d,14 + movdqa xmm4,xmm1 + mov eax,r14d + mov r12d,r9d + movdqa xmm7,xmm3 + ror r14d,9 + xor r13d,r8d + xor r12d,r10d + ror r13d,5 + xor r14d,eax +DB 102,15,58,15,224,4 + and r12d,r8d + xor r13d,r8d +DB 102,15,58,15,250,4 + add r11d,DWORD PTR[rsp] + mov r15d,eax + xor r12d,r10d + ror r14d,11 + movdqa xmm5,xmm4 + xor r15d,ebx + add r11d,r12d + movdqa xmm6,xmm4 + ror r13d,6 + and edi,r15d + psrld xmm4,3 + xor r14d,eax + add r11d,r13d + xor edi,ebx + paddd xmm0,xmm7 + ror r14d,2 + add edx,r11d + psrld xmm6,7 + add r11d,edi + mov r13d,edx + pshufd xmm7,xmm3,250 + add r14d,r11d + ror r13d,14 + pslld xmm5,14 + mov r11d,r14d + mov r12d,r8d + pxor xmm4,xmm6 + ror r14d,9 + xor r13d,edx + xor r12d,r9d + ror r13d,5 + psrld xmm6,11 + xor r14d,r11d + pxor xmm4,xmm5 + and r12d,edx + xor r13d,edx + pslld xmm5,11 + add r10d,DWORD PTR[4+rsp] + mov edi,r11d + pxor xmm4,xmm6 + xor r12d,r9d + ror r14d,11 + movdqa xmm6,xmm7 + xor edi,eax + add r10d,r12d + pxor xmm4,xmm5 + ror r13d,6 + and r15d,edi + xor r14d,r11d + psrld xmm7,10 + add r10d,r13d + xor r15d,eax + paddd xmm0,xmm4 + ror r14d,2 + add ecx,r10d + psrlq xmm6,17 + add r10d,r15d + mov r13d,ecx + add r14d,r10d + pxor xmm7,xmm6 + ror r13d,14 + mov r10d,r14d + mov r12d,edx + ror r14d,9 + psrlq xmm6,2 + xor r13d,ecx + xor r12d,r8d + pxor xmm7,xmm6 + ror r13d,5 + xor r14d,r10d + and r12d,ecx + pshufd xmm7,xmm7,128 + xor r13d,ecx + add r9d,DWORD PTR[8+rsp] + mov r15d,r10d + psrldq xmm7,8 + xor r12d,r8d + ror r14d,11 + xor r15d,r11d + add r9d,r12d + ror r13d,6 + paddd xmm0,xmm7 + and edi,r15d + xor r14d,r10d + add r9d,r13d + pshufd xmm7,xmm0,80 + xor edi,r11d + ror r14d,2 + add ebx,r9d + movdqa xmm6,xmm7 + add r9d,edi + mov r13d,ebx + psrld xmm7,10 + add r14d,r9d + ror r13d,14 + psrlq xmm6,17 + mov r9d,r14d + mov r12d,ecx + pxor xmm7,xmm6 + ror r14d,9 + xor r13d,ebx + xor r12d,edx + ror r13d,5 + xor r14d,r9d + psrlq xmm6,2 + and r12d,ebx + xor r13d,ebx + add r8d,DWORD PTR[12+rsp] + pxor xmm7,xmm6 + mov edi,r9d + xor r12d,edx + ror r14d,11 + pshufd xmm7,xmm7,8 + xor edi,r10d + add r8d,r12d + movdqa xmm6,XMMWORD PTR[rsi] + ror r13d,6 + and r15d,edi + pslldq xmm7,8 + xor r14d,r9d + add r8d,r13d + xor r15d,r10d + paddd xmm0,xmm7 + ror r14d,2 + add eax,r8d + add r8d,r15d + paddd xmm6,xmm0 + mov r13d,eax + add r14d,r8d + movdqa XMMWORD PTR[rsp],xmm6 + ror r13d,14 + movdqa xmm4,xmm2 + mov r8d,r14d + mov r12d,ebx + movdqa xmm7,xmm0 + ror r14d,9 + xor r13d,eax + xor r12d,ecx + ror r13d,5 + xor r14d,r8d +DB 102,15,58,15,225,4 + and r12d,eax + xor r13d,eax +DB 102,15,58,15,251,4 + add edx,DWORD PTR[16+rsp] + mov r15d,r8d + xor r12d,ecx + ror r14d,11 + movdqa xmm5,xmm4 + xor r15d,r9d + add edx,r12d + movdqa xmm6,xmm4 + ror r13d,6 + and edi,r15d + psrld xmm4,3 + xor r14d,r8d + add edx,r13d + xor edi,r9d + paddd xmm1,xmm7 + ror r14d,2 + add r11d,edx + psrld xmm6,7 + add edx,edi + mov r13d,r11d + pshufd xmm7,xmm0,250 + add r14d,edx + ror r13d,14 + pslld xmm5,14 + mov edx,r14d + mov r12d,eax + pxor xmm4,xmm6 + ror r14d,9 + xor r13d,r11d + xor r12d,ebx + ror r13d,5 + psrld xmm6,11 + xor r14d,edx + pxor xmm4,xmm5 + and r12d,r11d + xor r13d,r11d + pslld xmm5,11 + add ecx,DWORD PTR[20+rsp] + mov edi,edx + pxor xmm4,xmm6 + xor r12d,ebx + ror r14d,11 + movdqa xmm6,xmm7 + xor edi,r8d + add ecx,r12d + pxor xmm4,xmm5 + ror r13d,6 + and r15d,edi + xor r14d,edx + psrld xmm7,10 + add ecx,r13d + xor r15d,r8d + paddd xmm1,xmm4 + ror r14d,2 + add r10d,ecx + psrlq xmm6,17 + add ecx,r15d + mov r13d,r10d + add r14d,ecx + pxor xmm7,xmm6 + ror r13d,14 + mov ecx,r14d + mov r12d,r11d + ror r14d,9 + psrlq xmm6,2 + xor r13d,r10d + xor r12d,eax + pxor xmm7,xmm6 + ror r13d,5 + xor r14d,ecx + and r12d,r10d + pshufd xmm7,xmm7,128 + xor r13d,r10d + add ebx,DWORD PTR[24+rsp] + mov r15d,ecx + psrldq xmm7,8 + xor r12d,eax + ror r14d,11 + xor r15d,edx + add ebx,r12d + ror r13d,6 + paddd xmm1,xmm7 + and edi,r15d + xor r14d,ecx + add ebx,r13d + pshufd xmm7,xmm1,80 + xor edi,edx + ror r14d,2 + add r9d,ebx + movdqa xmm6,xmm7 + add ebx,edi + mov r13d,r9d + psrld xmm7,10 + add r14d,ebx + ror r13d,14 + psrlq xmm6,17 + mov ebx,r14d + mov r12d,r10d + pxor xmm7,xmm6 + ror r14d,9 + xor r13d,r9d + xor r12d,r11d + ror r13d,5 + xor r14d,ebx + psrlq xmm6,2 + and r12d,r9d + xor r13d,r9d + add eax,DWORD PTR[28+rsp] + pxor xmm7,xmm6 + mov edi,ebx + xor r12d,r11d + ror r14d,11 + pshufd xmm7,xmm7,8 + xor edi,ecx + add eax,r12d + movdqa xmm6,XMMWORD PTR[16+rsi] + ror r13d,6 + and r15d,edi + pslldq xmm7,8 + xor r14d,ebx + add eax,r13d + xor r15d,ecx + paddd xmm1,xmm7 + ror r14d,2 + add r8d,eax + add eax,r15d + paddd xmm6,xmm1 + mov r13d,r8d + add r14d,eax + movdqa XMMWORD PTR[16+rsp],xmm6 + ror r13d,14 + movdqa xmm4,xmm3 + mov eax,r14d + mov r12d,r9d + movdqa xmm7,xmm1 + ror r14d,9 + xor r13d,r8d + xor r12d,r10d + ror r13d,5 + xor r14d,eax +DB 102,15,58,15,226,4 + and r12d,r8d + xor r13d,r8d +DB 102,15,58,15,248,4 + add r11d,DWORD PTR[32+rsp] + mov r15d,eax + xor r12d,r10d + ror r14d,11 + movdqa xmm5,xmm4 + xor r15d,ebx + add r11d,r12d + movdqa xmm6,xmm4 + ror r13d,6 + and edi,r15d + psrld xmm4,3 + xor r14d,eax + add r11d,r13d + xor edi,ebx + paddd xmm2,xmm7 + ror r14d,2 + add edx,r11d + psrld xmm6,7 + add r11d,edi + mov r13d,edx + pshufd xmm7,xmm1,250 + add r14d,r11d + ror r13d,14 + pslld xmm5,14 + mov r11d,r14d + mov r12d,r8d + pxor xmm4,xmm6 + ror r14d,9 + xor r13d,edx + xor r12d,r9d + ror r13d,5 + psrld xmm6,11 + xor r14d,r11d + pxor xmm4,xmm5 + and r12d,edx + xor r13d,edx + pslld xmm5,11 + add r10d,DWORD PTR[36+rsp] + mov edi,r11d + pxor xmm4,xmm6 + xor r12d,r9d + ror r14d,11 + movdqa xmm6,xmm7 + xor edi,eax + add r10d,r12d + pxor xmm4,xmm5 + ror r13d,6 + and r15d,edi + xor r14d,r11d + psrld xmm7,10 + add r10d,r13d + xor r15d,eax + paddd xmm2,xmm4 + ror r14d,2 + add ecx,r10d + psrlq xmm6,17 + add r10d,r15d + mov r13d,ecx + add r14d,r10d + pxor xmm7,xmm6 + ror r13d,14 + mov r10d,r14d + mov r12d,edx + ror r14d,9 + psrlq xmm6,2 + xor r13d,ecx + xor r12d,r8d + pxor xmm7,xmm6 + ror r13d,5 + xor r14d,r10d + and r12d,ecx + pshufd xmm7,xmm7,128 + xor r13d,ecx + add r9d,DWORD PTR[40+rsp] + mov r15d,r10d + psrldq xmm7,8 + xor r12d,r8d + ror r14d,11 + xor r15d,r11d + add r9d,r12d + ror r13d,6 + paddd xmm2,xmm7 + and edi,r15d + xor r14d,r10d + add r9d,r13d + pshufd xmm7,xmm2,80 + xor edi,r11d + ror r14d,2 + add ebx,r9d + movdqa xmm6,xmm7 + add r9d,edi + mov r13d,ebx + psrld xmm7,10 + add r14d,r9d + ror r13d,14 + psrlq xmm6,17 + mov r9d,r14d + mov r12d,ecx + pxor xmm7,xmm6 + ror r14d,9 + xor r13d,ebx + xor r12d,edx + ror r13d,5 + xor r14d,r9d + psrlq xmm6,2 + and r12d,ebx + xor r13d,ebx + add r8d,DWORD PTR[44+rsp] + pxor xmm7,xmm6 + mov edi,r9d + xor r12d,edx + ror r14d,11 + pshufd xmm7,xmm7,8 + xor edi,r10d + add r8d,r12d + movdqa xmm6,XMMWORD PTR[32+rsi] + ror r13d,6 + and r15d,edi + pslldq xmm7,8 + xor r14d,r9d + add r8d,r13d + xor r15d,r10d + paddd xmm2,xmm7 + ror r14d,2 + add eax,r8d + add r8d,r15d + paddd xmm6,xmm2 + mov r13d,eax + add r14d,r8d + movdqa XMMWORD PTR[32+rsp],xmm6 + ror r13d,14 + movdqa xmm4,xmm0 + mov r8d,r14d + mov r12d,ebx + movdqa xmm7,xmm2 + ror r14d,9 + xor r13d,eax + xor r12d,ecx + ror r13d,5 + xor r14d,r8d +DB 102,15,58,15,227,4 + and r12d,eax + xor r13d,eax +DB 102,15,58,15,249,4 + add edx,DWORD PTR[48+rsp] + mov r15d,r8d + xor r12d,ecx + ror r14d,11 + movdqa xmm5,xmm4 + xor r15d,r9d + add edx,r12d + movdqa xmm6,xmm4 + ror r13d,6 + and edi,r15d + psrld xmm4,3 + xor r14d,r8d + add edx,r13d + xor edi,r9d + paddd xmm3,xmm7 + ror r14d,2 + add r11d,edx + psrld xmm6,7 + add edx,edi + mov r13d,r11d + pshufd xmm7,xmm2,250 + add r14d,edx + ror r13d,14 + pslld xmm5,14 + mov edx,r14d + mov r12d,eax + pxor xmm4,xmm6 + ror r14d,9 + xor r13d,r11d + xor r12d,ebx + ror r13d,5 + psrld xmm6,11 + xor r14d,edx + pxor xmm4,xmm5 + and r12d,r11d + xor r13d,r11d + pslld xmm5,11 + add ecx,DWORD PTR[52+rsp] + mov edi,edx + pxor xmm4,xmm6 + xor r12d,ebx + ror r14d,11 + movdqa xmm6,xmm7 + xor edi,r8d + add ecx,r12d + pxor xmm4,xmm5 + ror r13d,6 + and r15d,edi + xor r14d,edx + psrld xmm7,10 + add ecx,r13d + xor r15d,r8d + paddd xmm3,xmm4 + ror r14d,2 + add r10d,ecx + psrlq xmm6,17 + add ecx,r15d + mov r13d,r10d + add r14d,ecx + pxor xmm7,xmm6 + ror r13d,14 + mov ecx,r14d + mov r12d,r11d + ror r14d,9 + psrlq xmm6,2 + xor r13d,r10d + xor r12d,eax + pxor xmm7,xmm6 + ror r13d,5 + xor r14d,ecx + and r12d,r10d + pshufd xmm7,xmm7,128 + xor r13d,r10d + add ebx,DWORD PTR[56+rsp] + mov r15d,ecx + psrldq xmm7,8 + xor r12d,eax + ror r14d,11 + xor r15d,edx + add ebx,r12d + ror r13d,6 + paddd xmm3,xmm7 + and edi,r15d + xor r14d,ecx + add ebx,r13d + pshufd xmm7,xmm3,80 + xor edi,edx + ror r14d,2 + add r9d,ebx + movdqa xmm6,xmm7 + add ebx,edi + mov r13d,r9d + psrld xmm7,10 + add r14d,ebx + ror r13d,14 + psrlq xmm6,17 + mov ebx,r14d + mov r12d,r10d + pxor xmm7,xmm6 + ror r14d,9 + xor r13d,r9d + xor r12d,r11d + ror r13d,5 + xor r14d,ebx + psrlq xmm6,2 + and r12d,r9d + xor r13d,r9d + add eax,DWORD PTR[60+rsp] + pxor xmm7,xmm6 + mov edi,ebx + xor r12d,r11d + ror r14d,11 + pshufd xmm7,xmm7,8 + xor edi,ecx + add eax,r12d + movdqa xmm6,XMMWORD PTR[48+rsi] + ror r13d,6 + and r15d,edi + pslldq xmm7,8 + xor r14d,ebx + add eax,r13d + xor r15d,ecx + paddd xmm3,xmm7 + ror r14d,2 + add r8d,eax + add eax,r15d + paddd xmm6,xmm3 + mov r13d,r8d + add r14d,eax + movdqa XMMWORD PTR[48+rsp],xmm6 + cmp BYTE PTR[67+rsi],0 + jne $L$ssse3_00_47 + ror r13d,14 + mov eax,r14d + mov r12d,r9d + ror r14d,9 + xor r13d,r8d + xor r12d,r10d + ror r13d,5 + xor r14d,eax + and r12d,r8d + xor r13d,r8d + add r11d,DWORD PTR[rsp] + mov r15d,eax + xor r12d,r10d + ror r14d,11 + xor r15d,ebx + add r11d,r12d + ror r13d,6 + and edi,r15d + xor r14d,eax + add r11d,r13d + xor edi,ebx + ror r14d,2 + add edx,r11d + add r11d,edi + mov r13d,edx + add r14d,r11d + ror r13d,14 + mov r11d,r14d + mov r12d,r8d + ror r14d,9 + xor r13d,edx + xor r12d,r9d + ror r13d,5 + xor r14d,r11d + and r12d,edx + xor r13d,edx + add r10d,DWORD PTR[4+rsp] + mov edi,r11d + xor r12d,r9d + ror r14d,11 + xor edi,eax + add r10d,r12d + ror r13d,6 + and r15d,edi + xor r14d,r11d + add r10d,r13d + xor r15d,eax + ror r14d,2 + add ecx,r10d + add r10d,r15d + mov r13d,ecx + add r14d,r10d + ror r13d,14 + mov r10d,r14d + mov r12d,edx + ror r14d,9 + xor r13d,ecx + xor r12d,r8d + ror r13d,5 + xor r14d,r10d + and r12d,ecx + xor r13d,ecx + add r9d,DWORD PTR[8+rsp] + mov r15d,r10d + xor r12d,r8d + ror r14d,11 + xor r15d,r11d + add r9d,r12d + ror r13d,6 + and edi,r15d + xor r14d,r10d + add r9d,r13d + xor edi,r11d + ror r14d,2 + add ebx,r9d + add r9d,edi + mov r13d,ebx + add r14d,r9d + ror r13d,14 + mov r9d,r14d + mov r12d,ecx + ror r14d,9 + xor r13d,ebx + xor r12d,edx + ror r13d,5 + xor r14d,r9d + and r12d,ebx + xor r13d,ebx + add r8d,DWORD PTR[12+rsp] + mov edi,r9d + xor r12d,edx + ror r14d,11 + xor edi,r10d + add r8d,r12d + ror r13d,6 + and r15d,edi + xor r14d,r9d + add r8d,r13d + xor r15d,r10d + ror r14d,2 + add eax,r8d + add r8d,r15d + mov r13d,eax + add r14d,r8d + ror r13d,14 + mov r8d,r14d + mov r12d,ebx + ror r14d,9 + xor r13d,eax + xor r12d,ecx + ror r13d,5 + xor r14d,r8d + and r12d,eax + xor r13d,eax + add edx,DWORD PTR[16+rsp] + mov r15d,r8d + xor r12d,ecx + ror r14d,11 + xor r15d,r9d + add edx,r12d + ror r13d,6 + and edi,r15d + xor r14d,r8d + add edx,r13d + xor edi,r9d + ror r14d,2 + add r11d,edx + add edx,edi + mov r13d,r11d + add r14d,edx + ror r13d,14 + mov edx,r14d + mov r12d,eax + ror r14d,9 + xor r13d,r11d + xor r12d,ebx + ror r13d,5 + xor r14d,edx + and r12d,r11d + xor r13d,r11d + add ecx,DWORD PTR[20+rsp] + mov edi,edx + xor r12d,ebx + ror r14d,11 + xor edi,r8d + add ecx,r12d + ror r13d,6 + and r15d,edi + xor r14d,edx + add ecx,r13d + xor r15d,r8d + ror r14d,2 + add r10d,ecx + add ecx,r15d + mov r13d,r10d + add r14d,ecx + ror r13d,14 + mov ecx,r14d + mov r12d,r11d + ror r14d,9 + xor r13d,r10d + xor r12d,eax + ror r13d,5 + xor r14d,ecx + and r12d,r10d + xor r13d,r10d + add ebx,DWORD PTR[24+rsp] + mov r15d,ecx + xor r12d,eax + ror r14d,11 + xor r15d,edx + add ebx,r12d + ror r13d,6 + and edi,r15d + xor r14d,ecx + add ebx,r13d + xor edi,edx + ror r14d,2 + add r9d,ebx + add ebx,edi + mov r13d,r9d + add r14d,ebx + ror r13d,14 + mov ebx,r14d + mov r12d,r10d + ror r14d,9 + xor r13d,r9d + xor r12d,r11d + ror r13d,5 + xor r14d,ebx + and r12d,r9d + xor r13d,r9d + add eax,DWORD PTR[28+rsp] + mov edi,ebx + xor r12d,r11d + ror r14d,11 + xor edi,ecx + add eax,r12d + ror r13d,6 + and r15d,edi + xor r14d,ebx + add eax,r13d + xor r15d,ecx + ror r14d,2 + add r8d,eax + add eax,r15d + mov r13d,r8d + add r14d,eax + ror r13d,14 + mov eax,r14d + mov r12d,r9d + ror r14d,9 + xor r13d,r8d + xor r12d,r10d + ror r13d,5 + xor r14d,eax + and r12d,r8d + xor r13d,r8d + add r11d,DWORD PTR[32+rsp] + mov r15d,eax + xor r12d,r10d + ror r14d,11 + xor r15d,ebx + add r11d,r12d + ror r13d,6 + and edi,r15d + xor r14d,eax + add r11d,r13d + xor edi,ebx + ror r14d,2 + add edx,r11d + add r11d,edi + mov r13d,edx + add r14d,r11d + ror r13d,14 + mov r11d,r14d + mov r12d,r8d + ror r14d,9 + xor r13d,edx + xor r12d,r9d + ror r13d,5 + xor r14d,r11d + and r12d,edx + xor r13d,edx + add r10d,DWORD PTR[36+rsp] + mov edi,r11d + xor r12d,r9d + ror r14d,11 + xor edi,eax + add r10d,r12d + ror r13d,6 + and r15d,edi + xor r14d,r11d + add r10d,r13d + xor r15d,eax + ror r14d,2 + add ecx,r10d + add r10d,r15d + mov r13d,ecx + add r14d,r10d + ror r13d,14 + mov r10d,r14d + mov r12d,edx + ror r14d,9 + xor r13d,ecx + xor r12d,r8d + ror r13d,5 + xor r14d,r10d + and r12d,ecx + xor r13d,ecx + add r9d,DWORD PTR[40+rsp] + mov r15d,r10d + xor r12d,r8d + ror r14d,11 + xor r15d,r11d + add r9d,r12d + ror r13d,6 + and edi,r15d + xor r14d,r10d + add r9d,r13d + xor edi,r11d + ror r14d,2 + add ebx,r9d + add r9d,edi + mov r13d,ebx + add r14d,r9d + ror r13d,14 + mov r9d,r14d + mov r12d,ecx + ror r14d,9 + xor r13d,ebx + xor r12d,edx + ror r13d,5 + xor r14d,r9d + and r12d,ebx + xor r13d,ebx + add r8d,DWORD PTR[44+rsp] + mov edi,r9d + xor r12d,edx + ror r14d,11 + xor edi,r10d + add r8d,r12d + ror r13d,6 + and r15d,edi + xor r14d,r9d + add r8d,r13d + xor r15d,r10d + ror r14d,2 + add eax,r8d + add r8d,r15d + mov r13d,eax + add r14d,r8d + ror r13d,14 + mov r8d,r14d + mov r12d,ebx + ror r14d,9 + xor r13d,eax + xor r12d,ecx + ror r13d,5 + xor r14d,r8d + and r12d,eax + xor r13d,eax + add edx,DWORD PTR[48+rsp] + mov r15d,r8d + xor r12d,ecx + ror r14d,11 + xor r15d,r9d + add edx,r12d + ror r13d,6 + and edi,r15d + xor r14d,r8d + add edx,r13d + xor edi,r9d + ror r14d,2 + add r11d,edx + add edx,edi + mov r13d,r11d + add r14d,edx + ror r13d,14 + mov edx,r14d + mov r12d,eax + ror r14d,9 + xor r13d,r11d + xor r12d,ebx + ror r13d,5 + xor r14d,edx + and r12d,r11d + xor r13d,r11d + add ecx,DWORD PTR[52+rsp] + mov edi,edx + xor r12d,ebx + ror r14d,11 + xor edi,r8d + add ecx,r12d + ror r13d,6 + and r15d,edi + xor r14d,edx + add ecx,r13d + xor r15d,r8d + ror r14d,2 + add r10d,ecx + add ecx,r15d + mov r13d,r10d + add r14d,ecx + ror r13d,14 + mov ecx,r14d + mov r12d,r11d + ror r14d,9 + xor r13d,r10d + xor r12d,eax + ror r13d,5 + xor r14d,ecx + and r12d,r10d + xor r13d,r10d + add ebx,DWORD PTR[56+rsp] + mov r15d,ecx + xor r12d,eax + ror r14d,11 + xor r15d,edx + add ebx,r12d + ror r13d,6 + and edi,r15d + xor r14d,ecx + add ebx,r13d + xor edi,edx + ror r14d,2 + add r9d,ebx + add ebx,edi + mov r13d,r9d + add r14d,ebx + ror r13d,14 + mov ebx,r14d + mov r12d,r10d + ror r14d,9 + xor r13d,r9d + xor r12d,r11d + ror r13d,5 + xor r14d,ebx + and r12d,r9d + xor r13d,r9d + add eax,DWORD PTR[60+rsp] + mov edi,ebx + xor r12d,r11d + ror r14d,11 + xor edi,ecx + add eax,r12d + ror r13d,6 + and r15d,edi + xor r14d,ebx + add eax,r13d + xor r15d,ecx + ror r14d,2 + add r8d,eax + add eax,r15d + mov r13d,r8d + add r14d,eax + mov rdi,QWORD PTR[((-64))+rbp] + mov eax,r14d + mov rsi,QWORD PTR[((-56))+rbp] + + add eax,DWORD PTR[rdi] + add ebx,DWORD PTR[4+rdi] + add ecx,DWORD PTR[8+rdi] + add edx,DWORD PTR[12+rdi] + add r8d,DWORD PTR[16+rdi] + add r9d,DWORD PTR[20+rdi] + add r10d,DWORD PTR[24+rdi] + add r11d,DWORD PTR[28+rdi] + + lea rsi,QWORD PTR[64+rsi] + cmp rsi,QWORD PTR[((-48))+rbp] + + mov DWORD PTR[rdi],eax + mov DWORD PTR[4+rdi],ebx + mov DWORD PTR[8+rdi],ecx + mov DWORD PTR[12+rdi],edx + mov DWORD PTR[16+rdi],r8d + mov DWORD PTR[20+rdi],r9d + mov DWORD PTR[24+rdi],r10d + mov DWORD PTR[28+rdi],r11d + jb $L$loop_ssse3 + + xorps xmm0,xmm0 + movaps XMMWORD PTR[rsp],xmm0 + movaps XMMWORD PTR[16+rsp],xmm0 + movaps XMMWORD PTR[32+rsp],xmm0 + movaps XMMWORD PTR[48+rsp],xmm0 + movaps xmm6,XMMWORD PTR[((-128))+rbp] + movaps xmm7,XMMWORD PTR[((-112))+rbp] + movaps xmm8,XMMWORD PTR[((-96))+rbp] + movaps xmm9,XMMWORD PTR[((-80))+rbp] + mov r15,QWORD PTR[((-40))+rbp] + mov r14,QWORD PTR[((-32))+rbp] + mov r13,QWORD PTR[((-24))+rbp] + mov r12,QWORD PTR[((-16))+rbp] + mov rbx,QWORD PTR[((-8))+rbp] + mov rsp,rbp + + pop rbp + +$L$SEH_epilogue_blst_sha256_block_data_order:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + + DB 0F3h,0C3h ;repret + +$L$SEH_end_blst_sha256_block_data_order:: +blst_sha256_block_data_order ENDP +PUBLIC blst_sha256_emit + + +ALIGN 16 +blst_sha256_emit PROC PUBLIC + DB 243,15,30,250 + + mov r8,QWORD PTR[rdx] + mov r9,QWORD PTR[8+rdx] + mov r10,QWORD PTR[16+rdx] + bswap r8 + mov r11,QWORD PTR[24+rdx] + bswap r9 + mov DWORD PTR[4+rcx],r8d + bswap r10 + mov DWORD PTR[12+rcx],r9d + bswap r11 + mov DWORD PTR[20+rcx],r10d + shr r8,32 + mov DWORD PTR[28+rcx],r11d + shr r9,32 + mov DWORD PTR[rcx],r8d + shr r10,32 + mov DWORD PTR[8+rcx],r9d + shr r11,32 + mov DWORD PTR[16+rcx],r10d + mov DWORD PTR[24+rcx],r11d + DB 0F3h,0C3h ;repret +blst_sha256_emit ENDP + +PUBLIC blst_sha256_bcopy + + +ALIGN 16 +blst_sha256_bcopy PROC PUBLIC + DB 243,15,30,250 + + sub rcx,rdx +$L$oop_bcopy:: + movzx eax,BYTE PTR[rdx] + lea rdx,QWORD PTR[1+rdx] + mov BYTE PTR[((-1))+rdx*1+rcx],al + dec r8 + jnz $L$oop_bcopy + DB 0F3h,0C3h ;repret +blst_sha256_bcopy ENDP + +PUBLIC blst_sha256_hcopy + + +ALIGN 16 +blst_sha256_hcopy PROC PUBLIC + DB 243,15,30,250 + + mov r8,QWORD PTR[rdx] + mov r9,QWORD PTR[8+rdx] + mov r10,QWORD PTR[16+rdx] + mov r11,QWORD PTR[24+rdx] + mov QWORD PTR[rcx],r8 + mov QWORD PTR[8+rcx],r9 + mov QWORD PTR[16+rcx],r10 + mov QWORD PTR[24+rcx],r11 + DB 0F3h,0C3h ;repret +blst_sha256_hcopy ENDP +.text$ ENDS +.pdata SEGMENT READONLY ALIGN(4) +ALIGN 4 + DD imagerel $L$SEH_begin_blst_sha256_block_data_order_shaext + DD imagerel $L$SEH_body_blst_sha256_block_data_order_shaext + DD imagerel $L$SEH_info_blst_sha256_block_data_order_shaext_prologue + + DD imagerel $L$SEH_body_blst_sha256_block_data_order_shaext + DD imagerel $L$SEH_epilogue_blst_sha256_block_data_order_shaext + DD imagerel $L$SEH_info_blst_sha256_block_data_order_shaext_body + + DD imagerel $L$SEH_epilogue_blst_sha256_block_data_order_shaext + DD imagerel $L$SEH_end_blst_sha256_block_data_order_shaext + DD imagerel $L$SEH_info_blst_sha256_block_data_order_shaext_epilogue + + DD imagerel $L$SEH_begin_blst_sha256_block_data_order + DD imagerel $L$SEH_body_blst_sha256_block_data_order + DD imagerel $L$SEH_info_blst_sha256_block_data_order_prologue + + DD imagerel $L$SEH_body_blst_sha256_block_data_order + DD imagerel $L$SEH_epilogue_blst_sha256_block_data_order + DD imagerel $L$SEH_info_blst_sha256_block_data_order_body + + DD imagerel $L$SEH_epilogue_blst_sha256_block_data_order + DD imagerel $L$SEH_end_blst_sha256_block_data_order + DD imagerel $L$SEH_info_blst_sha256_block_data_order_epilogue + +.pdata ENDS +.xdata SEGMENT READONLY ALIGN(8) +ALIGN 8 +$L$SEH_info_blst_sha256_block_data_order_shaext_prologue:: +DB 1,4,6,005h +DB 4,074h,2,0 +DB 4,064h,3,0 +DB 4,053h +DB 1,050h + DD 0,0 +$L$SEH_info_blst_sha256_block_data_order_shaext_body:: +DB 1,0,17,85 +DB 000h,068h,000h,000h +DB 000h,078h,001h,000h +DB 000h,088h,002h,000h +DB 000h,098h,003h,000h +DB 000h,0a8h,004h,000h +DB 000h,074h,00ch,000h +DB 000h,064h,00dh,000h +DB 000h,053h +DB 000h,092h +DB 000h,050h +DB 000h,000h,000h,000h,000h,000h +DB 000h,000h,000h,000h +$L$SEH_info_blst_sha256_block_data_order_shaext_epilogue:: +DB 1,0,4,0 +DB 000h,074h,001h,000h +DB 000h,064h,002h,000h +DB 000h,000h,000h,000h + +$L$SEH_info_blst_sha256_block_data_order_prologue:: +DB 1,4,6,005h +DB 4,074h,2,0 +DB 4,064h,3,0 +DB 4,053h +DB 1,050h + DD 0,0 +$L$SEH_info_blst_sha256_block_data_order_body:: +DB 1,0,25,133 +DB 000h,068h,000h,000h +DB 000h,078h,001h,000h +DB 000h,088h,002h,000h +DB 000h,098h,003h,000h +DB 000h,0f4h,00bh,000h +DB 000h,0e4h,00ch,000h +DB 000h,0d4h,00dh,000h +DB 000h,0c4h,00eh,000h +DB 000h,034h,00fh,000h +DB 000h,074h,012h,000h +DB 000h,064h,013h,000h +DB 000h,053h +DB 000h,0f2h +DB 000h,050h +DB 000h,000h,000h,000h,000h,000h +DB 000h,000h,000h,000h +$L$SEH_info_blst_sha256_block_data_order_epilogue:: +DB 1,0,4,0 +DB 000h,074h,001h,000h +DB 000h,064h,002h,000h +DB 000h,000h,000h,000h + + +.xdata ENDS +END diff --git a/crypto/blst_src/bulk_addition.c b/crypto/blst_src/bulk_addition.c new file mode 100644 index 00000000000..4d36f405b64 --- /dev/null +++ b/crypto/blst_src/bulk_addition.c @@ -0,0 +1,176 @@ +/* + * Copyright Supranational LLC + * Licensed under the Apache License, Version 2.0, see LICENSE for details. + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "fields.h" +#include "point.h" + +/* + * This implementation uses explicit addition formula: + * + * λ = (Y₂-Y₁)/(X₂-X₁) + * X₃ = λ²-(X₁+X₂) + * Y₃ = λ⋅(X₁-X₃)-Y₁ + * + * But since we don't know if we'll have to add point to itself, we need + * to eventually resort to corresponding doubling formula: + * + * λ = 3X₁²/2Y₁ + * X₃ = λ²-2X₁ + * Y₃ = λ⋅(X₁-X₃)-Y₁ + * + * The formulae use prohibitively expensive inversion, but whenever we + * have a lot of affine points to accumulate, we can amortize the cost + * by applying Montgomery's batch inversion approach. As a result, + * asymptotic[!] per-point cost for addition is as small as 5M+1S. For + * comparison, ptype##_dadd_affine takes 8M+5S. In practice, all things + * considered, the improvement coefficient varies from 60% to 85% + * depending on platform and curve. + * + * THIS IMPLEMENTATION IS *NOT* CONSTANT-TIME. [But if there is an + * application that requires constant time-ness, speak up!] + */ + +/* + * Calculate λ's numerator and denominator. + * + * input: A x1 y1 - + * B x2 y2 - + * output: + * if A!=B: A x1 y1 (x2-x1)*mul_acc + * B x2+x1 y2-y1 (x2-x1) + * + * if A==B: A x y 2y*mul_acc + * B 2x 3*x^2 2y + * + * if A==-B: A 0 0 1*mul_acc + * B 0 3*x^2 0 + */ +#define HEAD(ptype, bits, field, one) \ +static void ptype##_head(ptype AB[2], const vec##bits mul_acc) \ +{ \ + ptype *A = AB, *B = AB+1; \ + limb_t inf = vec_is_zero(A, sizeof(ptype##_affine)) | \ + vec_is_zero(B, sizeof(ptype##_affine)); \ + static const vec##bits zero = { 0 }; \ +\ + sub_##field(B->Z, B->X, A->X); /* X2-X1 */ \ + add_##field(B->X, B->X, A->X); /* X2+X1 */ \ + add_##field(A->Z, B->Y, A->Y); /* Y2+Y1 */ \ + sub_##field(B->Y, B->Y, A->Y); /* Y2-Y1 */ \ + if (vec_is_zero(B->Z, sizeof(B->Z))) { /* X2==X1 */ \ + inf = vec_is_zero(A->Z, sizeof(A->Z)); \ + vec_select(B->X, A->Z, B->X, sizeof(B->X), inf); \ + sqr_##field(B->Y, A->X); \ + mul_by_3_##field(B->Y, B->Y); /* 3*X1^2 */ \ + vec_copy(B->Z, A->Z, sizeof(B->Z)); /* 2*Y1 */ \ + } /* B->Y is numenator */ \ + /* B->Z is denominator */ \ + vec_select(A->X, B->X, A->X, sizeof(A->X), inf); \ + vec_select(A->Y, A->Z, A->Y, sizeof(A->Y), inf); \ + vec_select(A->Z, one, B->Z, sizeof(A->Z), inf); \ + vec_select(B->Z, zero, B->Z, sizeof(B->Z), inf); \ + if (mul_acc != NULL) \ + mul_##field(A->Z, A->Z, mul_acc); /* chain multiplication */\ +} + +/* + * Calculate λ and resulting coordinates. + * + * input: A x1 y1 - + * B x2+x1 nominator - + * lambda 1/denominator + * output: D x3=(nom/den)^2-(x2+x1) y3=(nom/den)(x1-x3)-y1 + */ +#define TAIL(ptype, bits, field, one) \ +static void ptype##_tail(ptype *D, ptype AB[2], vec##bits lambda) \ +{ \ + ptype *A = AB, *B = AB+1; \ + vec##bits llambda; \ + limb_t inf = vec_is_zero(B->Z, sizeof(B->Z)); \ +\ + mul_##field(lambda, lambda, B->Y); /* λ = (Y2-Y1)/(X2-X1) */ \ + /* alt. 3*X1^2/2*Y1 */ \ + sqr_##field(llambda, lambda); \ + sub_##field(D->X, llambda, B->X); /* X3 = λ^2-X1-X2 */ \ +\ + sub_##field(D->Y, A->X, D->X); \ + mul_##field(D->Y, D->Y, lambda); \ + sub_##field(D->Y, D->Y, A->Y); /* Y3 = λ*(X1-X3)-Y1 */ \ +\ + vec_select(D->X, A->X, D->X, 2*sizeof(D->X), inf); \ + vec_select(B->Z, one, B->Z, sizeof(B->Z), inf); \ +} + +/* + * |points[]| is volatile buffer with |X|s and |Y|s initially holding + * input affine coordinates, and with |Z|s being used as additional + * temporary storage [unrelated to Jacobian coordinates]. |sum| is + * in-/output, initialize to infinity accordingly. + */ +#define ADDITION_BTREE(prefix, ptype, bits, field, one) \ +HEAD(ptype, bits, field, one) \ +TAIL(ptype, bits, field, one) \ +static void ptype##s_accumulate(ptype *sum, ptype points[], size_t n) \ +{ \ + ptype *dst; \ + void *mul_acc; \ + size_t i; \ +\ + while (n >= 16) { \ + if (n & 1) \ + ptype##_dadd_affine(sum, sum, (const ptype##_affine *)points++); \ + n /= 2; \ + for (mul_acc = NULL, i = n; i--; mul_acc = points->Z, points += 2) \ + ptype##_head(points, mul_acc); \ +\ + reciprocal_##field(points[-2].Z, points[-2].Z); /* 1/∏ Zi */ \ +\ + for (dst = points, i = n; --i;) { \ + dst--; points -= 2; \ + mul_##field(points[-2].Z, points[0].Z, points[-2].Z); \ + ptype##_tail(dst, points, points[-2].Z); \ + mul_##field(points[-2].Z, points[0].Z, points[1].Z); \ + } \ + dst--; points -= 2; \ + ptype##_tail(dst, points, points[0].Z); \ + points = dst; \ + } \ + while (n--) \ + ptype##_dadd_affine(sum, sum, (const ptype##_affine *)points++); \ +} \ +\ +void prefix##s_add(ptype *sum, const ptype##_affine *const points[], \ + size_t npoints) \ +{ \ + const size_t stride = SCRATCH_LIMIT / sizeof(ptype); \ + ptype *scratch = alloca((npoints > stride ? stride : npoints) * \ + sizeof(ptype)); \ + const ptype##_affine *point = NULL; \ +\ + vec_zero(sum, sizeof(*sum)); \ + while (npoints) { \ + size_t i, j = npoints > stride ? stride : npoints; \ + for (i=0; i> (8 * (n % sizeof(limb_t)))); + } +} + +static inline void limbs_from_le_bytes(limb_t *restrict ret, + const unsigned char *in, size_t n) +{ + limb_t limb = 0; + + while(n--) { + limb <<= 8; + limb |= in[n]; + /* + * 'if (n % sizeof(limb_t) == 0)' is omitted because it's cheaper + * to perform redundant stores than to pay penalty for + * mispredicted branch. Besides, some compilers unroll the + * loop and remove redundant stores to 'restrict'-ed storage... + */ + ret[n / sizeof(limb_t)] = limb; + } +} + +static inline void le_bytes_from_limbs(unsigned char *out, const limb_t *in, + size_t n) +{ + const union { + long one; + char little; + } is_endian = { 1 }; + limb_t limb; + size_t i, j, r; + + if ((uptr_t)out == (uptr_t)in && is_endian.little) + return; + + r = n % sizeof(limb_t); + n /= sizeof(limb_t); + + for(i = 0; i < n; i++) { + for (limb = in[i], j = 0; j < sizeof(limb_t); j++, limb >>= 8) + *out++ = (unsigned char)limb; + } + if (r) { + for (limb = in[i], j = 0; j < r; j++, limb >>= 8) + *out++ = (unsigned char)limb; + } +} + +static inline char hex_from_nibble(unsigned char nibble) +{ + int mask = (9 - (nibble &= 0xf)) >> 31; + return (char)(nibble + ((('a'-10) & mask) | ('0' & ~mask))); +} + +static unsigned char nibble_from_hex(char c) +{ + int mask, ret; + + mask = (('a'-c-1) & (c-1-'f')) >> 31; + ret = (10 + c - 'a') & mask; + mask = (('A'-c-1) & (c-1-'F')) >> 31; + ret |= (10 + c - 'A') & mask; + mask = (('0'-c-1) & (c-1-'9')) >> 31; + ret |= (c - '0') & mask; + mask = ((ret-1) & ~mask) >> 31; + ret |= 16 & mask; + + return (unsigned char)ret; +} + +static void bytes_from_hexascii(unsigned char *ret, size_t sz, const char *hex) +{ + size_t len; + unsigned char b = 0; + + if (hex[0]=='0' && (hex[1]=='x' || hex[1]=='X')) + hex += 2; + + for (len = 0; len<2*sz && nibble_from_hex(hex[len])<16; len++) ; + + bytes_zero(ret, sz); + + while(len--) { + b <<= 4; + b |= nibble_from_hex(*hex++); + if (len % 2 == 0) + ret[len / 2] = b; + } +} + +static void limbs_from_hexascii(limb_t *ret, size_t sz, const char *hex) +{ + size_t len; + limb_t limb = 0; + + if (hex[0]=='0' && (hex[1]=='x' || hex[1]=='X')) + hex += 2; + + for (len = 0; len<2*sz && nibble_from_hex(hex[len])<16; len++) ; + + vec_zero(ret, sz); + + while(len--) { + limb <<= 4; + limb |= nibble_from_hex(*hex++); + if (len % (2*sizeof(limb_t)) == 0) + ret[len / (2*sizeof(limb_t))] = limb; + } +} + +#endif diff --git a/crypto/blst_src/client_min_pk.c b/crypto/blst_src/client_min_pk.c new file mode 100644 index 00000000000..0fcf563f502 --- /dev/null +++ b/crypto/blst_src/client_min_pk.c @@ -0,0 +1,17 @@ +/* + * Copyright Supranational LLC + * Licensed under the Apache License, Version 2.0, see LICENSE for details. + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "keygen.c" +#include "e2.c" +#include "hash_to_field.c" +#include "map_to_g2.c" +#include "e1.c" +#include "exp.c" +#include "sqrt.c" +#include "recip.c" +#include "consts.c" +#include "vect.c" +#include "exports.c" diff --git a/crypto/blst_src/client_min_sig.c b/crypto/blst_src/client_min_sig.c new file mode 100644 index 00000000000..8e4663daede --- /dev/null +++ b/crypto/blst_src/client_min_sig.c @@ -0,0 +1,17 @@ +/* + * Copyright Supranational LLC + * Licensed under the Apache License, Version 2.0, see LICENSE for details. + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "keygen.c" +#include "e1.c" +#include "hash_to_field.c" +#include "map_to_g1.c" +#include "e2.c" +#include "exp.c" +#include "sqrt.c" +#include "recip.c" +#include "consts.c" +#include "vect.c" +#include "exports.c" diff --git a/crypto/blst_src/consts.c b/crypto/blst_src/consts.c new file mode 100644 index 00000000000..021c878a258 --- /dev/null +++ b/crypto/blst_src/consts.c @@ -0,0 +1,36 @@ +/* + * Copyright Supranational LLC + * Licensed under the Apache License, Version 2.0, see LICENSE for details. + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "consts.h" + +/* z = -0xd201000000010000 */ +const vec384 BLS12_381_P = { /* (z-1)^2 * (z^4 - z^2 + 1)/3 + z */ + TO_LIMB_T(0xb9feffffffffaaab), TO_LIMB_T(0x1eabfffeb153ffff), + TO_LIMB_T(0x6730d2a0f6b0f624), TO_LIMB_T(0x64774b84f38512bf), + TO_LIMB_T(0x4b1ba7b6434bacd7), TO_LIMB_T(0x1a0111ea397fe69a) +}; +const limb_t BLS12_381_p0 = (limb_t)0x89f3fffcfffcfffd; /* -1/P */ + +const radix384 BLS12_381_Rx = { /* (1<<384)%P, "radix", one-in-Montgomery */ + { { ONE_MONT_P }, + { 0 } } +}; + +const vec384 BLS12_381_RR = { /* (1<<768)%P, "radix"^2, to-Montgomery */ + TO_LIMB_T(0xf4df1f341c341746), TO_LIMB_T(0x0a76e6a609d104f1), + TO_LIMB_T(0x8de5476c4c95b6d5), TO_LIMB_T(0x67eb88a9939d83c0), + TO_LIMB_T(0x9a793e85b519952d), TO_LIMB_T(0x11988fe592cae3aa) +}; + +const vec256 BLS12_381_r = { /* z^4 - z^2 + 1, group order */ + TO_LIMB_T(0xffffffff00000001), TO_LIMB_T(0x53bda402fffe5bfe), + TO_LIMB_T(0x3339d80809a1d805), TO_LIMB_T(0x73eda753299d7d48) +}; + +const vec256 BLS12_381_rRR = { /* (1<<512)%r, "radix"^2, to-Montgomery */ + TO_LIMB_T(0xc999e990f3f29c6d), TO_LIMB_T(0x2b6cedcb87925c23), + TO_LIMB_T(0x05d314967254398f), TO_LIMB_T(0x0748d9d99f59ff11) +}; diff --git a/crypto/blst_src/consts.h b/crypto/blst_src/consts.h new file mode 100644 index 00000000000..cb391b817df --- /dev/null +++ b/crypto/blst_src/consts.h @@ -0,0 +1,30 @@ +/* + * Copyright Supranational LLC + * Licensed under the Apache License, Version 2.0, see LICENSE for details. + * SPDX-License-Identifier: Apache-2.0 + */ +#ifndef __BLS12_381_ASM_CONST_H__ +#define __BLS12_381_ASM_CONST_H__ +#include "vect.h" + +extern const vec384 BLS12_381_P; +extern const limb_t BLS12_381_p0; +static const limb_t p0 = (limb_t)0x89f3fffcfffcfffd; /* -1/P */ +typedef union { vec384 p12[12]; vec384x p2; vec384 p; } radix384; +extern const radix384 BLS12_381_Rx; /* (1<<384)%P, "radix", one-in-Montgomery */ +extern const vec384 BLS12_381_RR; /* (1<<768)%P, "radix"^2, to-Montgomery */ + +#define ONE_MONT_P TO_LIMB_T(0x760900000002fffd), \ + TO_LIMB_T(0xebf4000bc40c0002), \ + TO_LIMB_T(0x5f48985753c758ba), \ + TO_LIMB_T(0x77ce585370525745), \ + TO_LIMB_T(0x5c071a97a256ec6d), \ + TO_LIMB_T(0x15f65ec3fa80e493) + +#define ZERO_384 (BLS12_381_Rx.p2[1]) + +extern const vec256 BLS12_381_r; /* order */ +static const limb_t r0 = (limb_t)0xfffffffeffffffff; /* -1/r */ +extern const vec256 BLS12_381_rRR; /* (1<<512)%r, "radix"^2, to-Montgomery */ + +#endif diff --git a/crypto/blst_src/cpuid.c b/crypto/blst_src/cpuid.c new file mode 100644 index 00000000000..43b9229d341 --- /dev/null +++ b/crypto/blst_src/cpuid.c @@ -0,0 +1,85 @@ +/* + * Copyright Supranational LLC + * Licensed under the Apache License, Version 2.0, see LICENSE for details. + * SPDX-License-Identifier: Apache-2.0 + */ + +#if (defined(__GNUC__) || defined(__clang__) || defined(__SUNPRO_C)) && !defined(_WIN32) +__attribute__((visibility("hidden"))) +#endif +int __blst_platform_cap = 0; + +#if defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) + +# if defined(__GNUC__) || defined(__clang__) || defined(__SUNPRO_C) +static void __cpuidex(int info[4], int func, int sub) +{ + int eax, ebx, ecx, edx; + + __asm__("cpuid" : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx) + : "a"(func), "c"(sub)); + + info[0] = eax; + info[1] = ebx; + info[2] = ecx; + info[3] = edx; +} +# else +# include +# endif + +# if defined(__GNUC__) || defined(__clang__) +__attribute__((constructor)) +# endif +static int __blst_cpuid(void) +{ + int info[4], cap = 0; + + __cpuidex(info, 0, 0); + if (info[0] > 6) { + __cpuidex(info, 7, 0); + cap |= (info[1]>>19) & 1; /* ADX */ + cap |= (info[1]>>28) & 2; /* SHA */ + } + + __blst_platform_cap = cap; + + return 0; +} + +# if defined(_MSC_VER) && !defined(__clang__) +# pragma section(".CRT$XCU",read) +__declspec(allocate(".CRT$XCU")) static int (*p)(void) = __blst_cpuid; +# elif defined(__SUNPRO_C) +# pragma init(__blst_cpuid) +# endif + +#elif defined(__aarch64__) || defined(__aarch64) + +# if defined(__linux__) && (defined(__GNUC__) || defined(__clang__)) +extern unsigned long getauxval(unsigned long type) __attribute__ ((weak)); + +__attribute__((constructor)) +static int __blst_cpuid(void) +{ + int cap = 0; + + if (getauxval) { + unsigned long hwcap_ce = getauxval(16); + cap = (hwcap_ce>>6) & 1; /* SHA256 */ + } + + __blst_platform_cap = cap; + + return 0; +} +# elif defined(__APPLE__) && (defined(__GNUC__) || defined(__clang__)) +__attribute__((constructor)) +static int __blst_cpuid() +{ + __blst_platform_cap = 1; /* SHA256 */ + return 0; +} +# endif + +#endif diff --git a/crypto/blst_src/e1.c b/crypto/blst_src/e1.c new file mode 100644 index 00000000000..f8a7be7bc14 --- /dev/null +++ b/crypto/blst_src/e1.c @@ -0,0 +1,564 @@ +/* + * Copyright Supranational LLC + * Licensed under the Apache License, Version 2.0, see LICENSE for details. + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "point.h" +#include "fields.h" +#include "errors.h" + +/* + * y^2 = x^3 + B + */ +static const vec384 B_E1 = { /* (4 << 384) % P */ + TO_LIMB_T(0xaa270000000cfff3), TO_LIMB_T(0x53cc0032fc34000a), + TO_LIMB_T(0x478fe97a6b0a807f), TO_LIMB_T(0xb1d37ebee6ba24d7), + TO_LIMB_T(0x8ec9733bbf78ab2f), TO_LIMB_T(0x09d645513d83de7e) +}; + +const POINTonE1 BLS12_381_G1 = { /* generator point [in Montgomery] */ + /* (0x17f1d3a73197d7942695638c4fa9ac0fc3688c4f9774b905 + * a14e3a3f171bac586c55e83ff97a1aeffb3af00adb22c6bb << 384) % P */ + { TO_LIMB_T(0x5cb38790fd530c16), TO_LIMB_T(0x7817fc679976fff5), + TO_LIMB_T(0x154f95c7143ba1c1), TO_LIMB_T(0xf0ae6acdf3d0e747), + TO_LIMB_T(0xedce6ecc21dbf440), TO_LIMB_T(0x120177419e0bfb75) }, + /* (0x08b3f481e3aaa0f1a09e30ed741d8ae4fcf5e095d5d00af6 + * 00db18cb2c04b3edd03cc744a2888ae40caa232946c5e7e1 << 384) % P */ + { TO_LIMB_T(0xbaac93d50ce72271), TO_LIMB_T(0x8c22631a7918fd8e), + TO_LIMB_T(0xdd595f13570725ce), TO_LIMB_T(0x51ac582950405194), + TO_LIMB_T(0x0e1c8c3fad0059c0), TO_LIMB_T(0x0bbc3efc5008a26a) }, + { ONE_MONT_P } +}; + +const POINTonE1 BLS12_381_NEG_G1 = { /* negative generator [in Montgomery] */ + /* (0x17f1d3a73197d7942695638c4fa9ac0fc3688c4f9774b905 + * a14e3a3f171bac586c55e83ff97a1aeffb3af00adb22c6bb << 384) % P */ + { TO_LIMB_T(0x5cb38790fd530c16), TO_LIMB_T(0x7817fc679976fff5), + TO_LIMB_T(0x154f95c7143ba1c1), TO_LIMB_T(0xf0ae6acdf3d0e747), + TO_LIMB_T(0xedce6ecc21dbf440), TO_LIMB_T(0x120177419e0bfb75) }, + /* (0x114d1d6855d545a8aa7d76c8cf2e21f267816aef1db507c9 + * 6655b9d5caac42364e6f38ba0ecb751bad54dcd6b939c2ca << 384) % P */ + { TO_LIMB_T(0xff526c2af318883a), TO_LIMB_T(0x92899ce4383b0270), + TO_LIMB_T(0x89d7738d9fa9d055), TO_LIMB_T(0x12caf35ba344c12a), + TO_LIMB_T(0x3cff1b76964b5317), TO_LIMB_T(0x0e44d2ede9774430) }, + { ONE_MONT_P } +}; + +static inline void mul_by_b_onE1(vec384 out, const vec384 in) +{ lshift_fp(out, in, 2); } + +static inline void mul_by_4b_onE1(vec384 out, const vec384 in) +{ lshift_fp(out, in, 4); } + +static void POINTonE1_cneg(POINTonE1 *p, bool_t cbit) +{ cneg_fp(p->Y, p->Y, cbit); } + +void blst_p1_cneg(POINTonE1 *a, int cbit) +{ POINTonE1_cneg(a, is_zero(cbit) ^ 1); } + +static void POINTonE1_from_Jacobian(POINTonE1 *out, const POINTonE1 *in) +{ + vec384 Z, ZZ; + limb_t inf = vec_is_zero(in->Z, sizeof(in->Z)); + + reciprocal_fp(Z, in->Z); /* 1/Z */ + + sqr_fp(ZZ, Z); + mul_fp(out->X, in->X, ZZ); /* X = X/Z^2 */ + + mul_fp(ZZ, ZZ, Z); + mul_fp(out->Y, in->Y, ZZ); /* Y = Y/Z^3 */ + + vec_select(out->Z, in->Z, BLS12_381_G1.Z, + sizeof(BLS12_381_G1.Z), inf); /* Z = inf ? 0 : 1 */ +} + +void blst_p1_from_jacobian(POINTonE1 *out, const POINTonE1 *a) +{ POINTonE1_from_Jacobian(out, a); } + +static void POINTonE1_to_affine(POINTonE1_affine *out, const POINTonE1 *in) +{ + POINTonE1 p; + + if (!vec_is_equal(in->Z, BLS12_381_Rx.p, sizeof(in->Z))) { + POINTonE1_from_Jacobian(&p, in); + in = &p; + } + vec_copy(out, in, sizeof(*out)); +} + +void blst_p1_to_affine(POINTonE1_affine *out, const POINTonE1 *a) +{ POINTonE1_to_affine(out, a); } + +void blst_p1_from_affine(POINTonE1 *out, const POINTonE1_affine *a) +{ + vec_copy(out, a, sizeof(*a)); + vec_select(out->Z, a->X, BLS12_381_Rx.p, sizeof(out->Z), + vec_is_zero(a, sizeof(*a))); +} + +static bool_t POINTonE1_affine_on_curve(const POINTonE1_affine *p) +{ + vec384 XXX, YY; + + sqr_fp(XXX, p->X); + mul_fp(XXX, XXX, p->X); /* X^3 */ + add_fp(XXX, XXX, B_E1); /* X^3 + B */ + + sqr_fp(YY, p->Y); /* Y^2 */ + + return vec_is_equal(XXX, YY, sizeof(XXX)); +} + +int blst_p1_affine_on_curve(const POINTonE1_affine *p) +{ return (int)(POINTonE1_affine_on_curve(p) | vec_is_zero(p, sizeof(*p))); } + +static bool_t POINTonE1_on_curve(const POINTonE1 *p) +{ + vec384 XXX, YY, BZ6; + limb_t inf = vec_is_zero(p->Z, sizeof(p->Z)); + + sqr_fp(BZ6, p->Z); + mul_fp(BZ6, BZ6, p->Z); + sqr_fp(BZ6, BZ6); /* Z^6 */ + mul_by_b_onE1(BZ6, BZ6); /* B*Z^6 */ + + sqr_fp(XXX, p->X); + mul_fp(XXX, XXX, p->X); /* X^3 */ + add_fp(XXX, XXX, BZ6); /* X^3 + B*Z^6 */ + + sqr_fp(YY, p->Y); /* Y^2 */ + + return vec_is_equal(XXX, YY, sizeof(XXX)) | inf; +} + +int blst_p1_on_curve(const POINTonE1 *p) +{ return (int)POINTonE1_on_curve(p); } + +static limb_t POINTonE1_affine_Serialize_BE(unsigned char out[96], + const POINTonE1_affine *in) +{ + vec384 temp; + + from_fp(temp, in->X); + be_bytes_from_limbs(out, temp, sizeof(temp)); + + from_fp(temp, in->Y); + be_bytes_from_limbs(out + 48, temp, sizeof(temp)); + + return sgn0_pty_mod_384(temp, BLS12_381_P); +} + +void blst_p1_affine_serialize(unsigned char out[96], + const POINTonE1_affine *in) +{ + if (vec_is_zero(in->X, 2*sizeof(in->X))) { + bytes_zero(out, 96); + out[0] = 0x40; /* infinity bit */ + } else { + (void)POINTonE1_affine_Serialize_BE(out, in); + } +} + +static limb_t POINTonE1_Serialize_BE(unsigned char out[96], + const POINTonE1 *in) +{ + POINTonE1 p; + + if (!vec_is_equal(in->Z, BLS12_381_Rx.p, sizeof(in->Z))) { + POINTonE1_from_Jacobian(&p, in); + in = &p; + } + + return POINTonE1_affine_Serialize_BE(out, (const POINTonE1_affine *)in); +} + +static void POINTonE1_Serialize(unsigned char out[96], const POINTonE1 *in) +{ + if (vec_is_zero(in->Z, sizeof(in->Z))) { + bytes_zero(out, 96); + out[0] = 0x40; /* infinity bit */ + } else { + (void)POINTonE1_Serialize_BE(out, in); + } +} + +void blst_p1_serialize(unsigned char out[96], const POINTonE1 *in) +{ POINTonE1_Serialize(out, in); } + +static limb_t POINTonE1_affine_Compress_BE(unsigned char out[48], + const POINTonE1_affine *in) +{ + vec384 temp; + + from_fp(temp, in->X); + be_bytes_from_limbs(out, temp, sizeof(temp)); + + return sgn0_pty_mont_384(in->Y, BLS12_381_P, p0); +} + +void blst_p1_affine_compress(unsigned char out[48], const POINTonE1_affine *in) +{ + if (vec_is_zero(in->X, 2*sizeof(in->X))) { + bytes_zero(out, 48); + out[0] = 0xc0; /* compressed and infinity bits */ + } else { + limb_t sign = POINTonE1_affine_Compress_BE(out, in); + out[0] |= (unsigned char)(0x80 | ((sign & 2) << 4)); + } +} + +static limb_t POINTonE1_Compress_BE(unsigned char out[48], + const POINTonE1 *in) +{ + POINTonE1 p; + + if (!vec_is_equal(in->Z, BLS12_381_Rx.p, sizeof(in->Z))) { + POINTonE1_from_Jacobian(&p, in); + in = &p; + } + + return POINTonE1_affine_Compress_BE(out, (const POINTonE1_affine *)in); +} + +void blst_p1_compress(unsigned char out[48], const POINTonE1 *in) +{ + if (vec_is_zero(in->Z, sizeof(in->Z))) { + bytes_zero(out, 48); + out[0] = 0xc0; /* compressed and infinity bits */ + } else { + limb_t sign = POINTonE1_Compress_BE(out, in); + out[0] |= (unsigned char)(0x80 | ((sign & 2) << 4)); + } +} + +static limb_t POINTonE1_Uncompress_BE(POINTonE1_affine *out, + const unsigned char in[48]) +{ + POINTonE1_affine ret; + vec384 temp; + + limbs_from_be_bytes(ret.X, in, sizeof(ret.X)); + /* clear top 3 bits in case caller was conveying some information there */ + ret.X[sizeof(ret.X)/sizeof(limb_t)-1] &= ((limb_t)0-1) >> 3; + add_fp(temp, ret.X, ZERO_384); /* less than modulus? */ + if (!vec_is_equal(temp, ret.X, sizeof(temp))) + return (limb_t)0 - BLST_BAD_ENCODING; + mul_fp(ret.X, ret.X, BLS12_381_RR); + + sqr_fp(ret.Y, ret.X); + mul_fp(ret.Y, ret.Y, ret.X); + add_fp(ret.Y, ret.Y, B_E1); /* X^3 + B */ + if (!sqrt_fp(ret.Y, ret.Y)) + return (limb_t)0 - BLST_POINT_NOT_ON_CURVE; + + vec_copy(out, &ret, sizeof(ret)); + + return sgn0_pty_mont_384(out->Y, BLS12_381_P, p0); +} + +static BLST_ERROR POINTonE1_Uncompress_Z(POINTonE1_affine *out, + const unsigned char in[48]) +{ + unsigned char in0 = in[0]; + limb_t sgn0_pty; + + if ((in0 & 0x80) == 0) /* compressed bit */ + return BLST_BAD_ENCODING; + + if (in0 & 0x40) { /* infinity bit */ + if (byte_is_zero(in0 & 0x3f) & bytes_are_zero(in+1, 47)) { + vec_zero(out, sizeof(*out)); + return BLST_SUCCESS; + } else { + return BLST_BAD_ENCODING; + } + } + + sgn0_pty = POINTonE1_Uncompress_BE(out, in); + + if (sgn0_pty > 3) + return (BLST_ERROR)(0 - sgn0_pty); /* POINT_NOT_ON_CURVE */ + + sgn0_pty >>= 1; /* skip over parity bit */ + sgn0_pty ^= (in0 & 0x20) >> 5; + cneg_fp(out->Y, out->Y, sgn0_pty); + + /* (0,±2) is not in group, but application might want to ignore? */ + return vec_is_zero(out->X, sizeof(out->X)) ? BLST_POINT_NOT_IN_GROUP + : BLST_SUCCESS; +} + +BLST_ERROR blst_p1_uncompress(POINTonE1_affine *out, const unsigned char in[48]) +{ return POINTonE1_Uncompress_Z(out, in); } + +static BLST_ERROR POINTonE1_Deserialize_BE(POINTonE1_affine *out, + const unsigned char in[96]) +{ + POINTonE1_affine ret; + vec384 temp; + + limbs_from_be_bytes(ret.X, in, sizeof(ret.X)); + limbs_from_be_bytes(ret.Y, in + 48, sizeof(ret.Y)); + + /* clear top 3 bits in case caller was conveying some information there */ + ret.X[sizeof(ret.X)/sizeof(limb_t)-1] &= ((limb_t)0-1) >> 3; + add_fp(temp, ret.X, ZERO_384); /* less than modulus? */ + if (!vec_is_equal(temp, ret.X, sizeof(temp))) + return BLST_BAD_ENCODING; + + add_fp(temp, ret.Y, ZERO_384); /* less than modulus? */ + if (!vec_is_equal(temp, ret.Y, sizeof(temp))) + return BLST_BAD_ENCODING; + + mul_fp(ret.X, ret.X, BLS12_381_RR); + mul_fp(ret.Y, ret.Y, BLS12_381_RR); + + if (!POINTonE1_affine_on_curve(&ret)) + return BLST_POINT_NOT_ON_CURVE; + + vec_copy(out, &ret, sizeof(ret)); + + /* (0,±2) is not in group, but application might want to ignore? */ + return vec_is_zero(out->X, sizeof(out->X)) ? BLST_POINT_NOT_IN_GROUP + : BLST_SUCCESS; +} + +static BLST_ERROR POINTonE1_Deserialize_Z(POINTonE1_affine *out, + const unsigned char in[96]) +{ + unsigned char in0 = in[0]; + + if ((in0 & 0xe0) == 0) + return POINTonE1_Deserialize_BE(out, in); + + if (in0 & 0x80) /* compressed bit */ + return POINTonE1_Uncompress_Z(out, in); + + if (in0 & 0x40) { /* infinity bit */ + if (byte_is_zero(in0 & 0x3f) & bytes_are_zero(in+1, 95)) { + vec_zero(out, sizeof(*out)); + return BLST_SUCCESS; + } + } + + return BLST_BAD_ENCODING; +} + +BLST_ERROR blst_p1_deserialize(POINTonE1_affine *out, + const unsigned char in[96]) +{ return POINTonE1_Deserialize_Z(out, in); } + +#include "ec_ops.h" +POINT_DADD_IMPL(POINTonE1, 384, fp) +POINT_DADD_AFFINE_IMPL_A0(POINTonE1, 384, fp, BLS12_381_Rx.p) +POINT_ADD_IMPL(POINTonE1, 384, fp) +POINT_ADD_AFFINE_IMPL(POINTonE1, 384, fp, BLS12_381_Rx.p) +POINT_DOUBLE_IMPL_A0(POINTonE1, 384, fp) +POINT_IS_EQUAL_IMPL(POINTonE1, 384, fp) + +void blst_p1_add(POINTonE1 *out, const POINTonE1 *a, const POINTonE1 *b) +{ POINTonE1_add(out, a, b); } + +void blst_p1_add_or_double(POINTonE1 *out, const POINTonE1 *a, + const POINTonE1 *b) +{ POINTonE1_dadd(out, a, b, NULL); } + +void blst_p1_add_affine(POINTonE1 *out, const POINTonE1 *a, + const POINTonE1_affine *b) +{ POINTonE1_add_affine(out, a, b); } + +void blst_p1_add_or_double_affine(POINTonE1 *out, const POINTonE1 *a, + const POINTonE1_affine *b) +{ POINTonE1_dadd_affine(out, a, b); } + +void blst_p1_double(POINTonE1 *out, const POINTonE1 *a) +{ POINTonE1_double(out, a); } + +int blst_p1_is_equal(const POINTonE1 *a, const POINTonE1 *b) +{ return (int)POINTonE1_is_equal(a, b); } + +#include "ec_mult.h" +POINT_MULT_SCALAR_WX_IMPL(POINTonE1, 4) +POINT_MULT_SCALAR_WX_IMPL(POINTonE1, 5) + +#ifdef __BLST_PRIVATE_TESTMODE__ +POINT_AFFINE_MULT_SCALAR_IMPL(POINTonE1) + +DECLARE_PRIVATE_POINTXZ(POINTonE1, 384) +POINT_LADDER_PRE_IMPL(POINTonE1, 384, fp) +POINT_LADDER_STEP_IMPL_A0(POINTonE1, 384, fp, onE1) +POINT_LADDER_POST_IMPL_A0(POINTonE1, 384, fp, onE1) +POINT_MULT_SCALAR_LADDER_IMPL(POINTonE1) +#endif + +static const vec384 beta = { /* such that beta^3 - 1 = 0 */ + /* -1/2 * (1 + sqrt(-3)) = ((P-2)^(P-2)) * (1 + (P-3)^((P+1)/4)) */ + /* (0x1a0111ea397fe699ec02408663d4de85aa0d857d89759ad4 + 897d29650fb85f9b409427eb4f49fffd8bfd00000000aaac << 384) % P */ + TO_LIMB_T(0xcd03c9e48671f071), TO_LIMB_T(0x5dab22461fcda5d2), + TO_LIMB_T(0x587042afd3851b95), TO_LIMB_T(0x8eb60ebe01bacb9e), + TO_LIMB_T(0x03f97d6e83d050d2), TO_LIMB_T(0x18f0206554638741) +}; + +static void sigma(POINTonE1 *out, const POINTonE1 *in) +{ + vec_copy(out->X, in->X, 2*sizeof(out->X)); + mul_fp(out->Z, in->Z, beta); +} + +/* Gallant-Lambert-Vanstone, ~45% faster than POINTonE1_mult_w5 */ +static void POINTonE1_mult_glv(POINTonE1 *out, const POINTonE1 *in, + const pow256 SK) +{ + union { vec256 l; pow256 s; } val; + + /* SK/z^2 [in constant time] */ + + limbs_from_le_bytes(val.l, SK, 32); + div_by_zz(val.l); + le_bytes_from_limbs(val.s, val.l, 32); + + { + const byte *scalars[2] = { val.s+16, val.s }; + POINTonE1 table[2][1<<(5-1)]; /* 4.5KB */ + size_t i; + + POINTonE1_precompute_w5(table[0], in); + for (i = 0; i < 1<<(5-1); i++) { + mul_fp(table[1][i].X, table[0][i].X, beta); + cneg_fp(table[1][i].Y, table[0][i].Y, 1); + vec_copy(table[1][i].Z, table[0][i].Z, sizeof(table[1][i].Z)); + } + + POINTonE1s_mult_w5(out, NULL, 2, scalars, 128, table); + POINTonE1_cneg(out, 1); + mul_fp(out->Z, out->Z, beta); + mul_fp(out->Z, out->Z, beta); + } + + vec_zero(val.l, sizeof(val)); /* scrub the copy of SK */ +} + +static void POINTonE1_sign(POINTonE1 *out, const POINTonE1 *in, const pow256 SK) +{ + vec384 Z, ZZ; + limb_t inf; + + POINTonE1_mult_glv(out, in, SK); + + /* convert to affine to remove possible bias in out->Z */ + inf = vec_is_zero(out->Z, sizeof(out->Z)); +#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + flt_reciprocal_fp(Z, out->Z); /* 1/Z */ +#else + reciprocal_fp(Z, out->Z); /* 1/Z */ +#endif + + sqr_fp(ZZ, Z); + mul_fp(out->X, out->X, ZZ); /* X = X/Z^2 */ + + mul_fp(ZZ, ZZ, Z); + mul_fp(out->Y, out->Y, ZZ); /* Y = Y/Z^3 */ + + vec_select(out->Z, out->Z, BLS12_381_G1.Z, sizeof(BLS12_381_G1.Z), + inf); /* Z = inf ? 0 : 1 */ +} + +void blst_sk_to_pk_in_g1(POINTonE1 *out, const pow256 SK) +{ POINTonE1_sign(out, &BLS12_381_G1, SK); } + +void blst_sign_pk_in_g2(POINTonE1 *out, const POINTonE1 *msg, const pow256 SK) +{ POINTonE1_sign(out, msg, SK); } + +void blst_sk_to_pk2_in_g1(unsigned char out[96], POINTonE1_affine *PK, + const pow256 SK) +{ + POINTonE1 P[1]; + + POINTonE1_sign(P, &BLS12_381_G1, SK); + if (PK != NULL) + vec_copy(PK, P, sizeof(*PK)); + if (out != NULL) { + limb_t sgn0_pty = POINTonE1_Serialize_BE(out, P); + out[0] |= (sgn0_pty & 2) << 4; /* pre-decorate */ + out[0] |= vec_is_zero(P->Z, sizeof(P->Z)) << 6; + } +} + +void blst_sign_pk2_in_g2(unsigned char out[96], POINTonE1_affine *sig, + const POINTonE1 *hash, const pow256 SK) +{ + POINTonE1 P[1]; + + POINTonE1_sign(P, hash, SK); + if (sig != NULL) + vec_copy(sig, P, sizeof(*sig)); + if (out != NULL) { + limb_t sgn0_pty = POINTonE1_Serialize_BE(out, P); + out[0] |= (sgn0_pty & 2) << 4; /* pre-decorate */ + out[0] |= vec_is_zero(P->Z, sizeof(P->Z)) << 6; + } +} + +void blst_p1_mult(POINTonE1 *out, const POINTonE1 *a, + const byte *scalar, size_t nbits) +{ + if (nbits < 176) { + if (nbits) + POINTonE1_mult_w4(out, a, scalar, nbits); + else + vec_zero(out, sizeof(*out)); + } else if (nbits <= 256) { + union { vec256 l; pow256 s; } val; + size_t i, j, top, mask = (size_t)0 - 1; + + /* this is not about constant-time-ness, but branch optimization */ + for (top = (nbits + 7)/8, i=0, j=0; i> (8*sizeof(top)-1)); + j += 1 & mask; + } + + if (check_mod_256(val.s, BLS12_381_r)) /* z^4 is the formal limit */ + POINTonE1_mult_glv(out, a, val.s); + else /* should never be the case, added for formal completeness */ + POINTonE1_mult_w5(out, a, scalar, nbits); + + vec_zero(val.l, sizeof(val)); + } else { /* should never be the case, added for formal completeness */ + POINTonE1_mult_w5(out, a, scalar, nbits); + } +} + +void blst_p1_unchecked_mult(POINTonE1 *out, const POINTonE1 *a, + const byte *scalar, size_t nbits) +{ + if (nbits) + POINTonE1_mult_w4(out, a, scalar, nbits); + else + vec_zero(out, sizeof(*out)); +} + +int blst_p1_affine_is_equal(const POINTonE1_affine *a, + const POINTonE1_affine *b) +{ return (int)vec_is_equal(a, b, sizeof(*a)); } + +int blst_p1_is_inf(const POINTonE1 *p) +{ return (int)vec_is_zero(p->Z, sizeof(p->Z)); } + +const POINTonE1 *blst_p1_generator(void) +{ return &BLS12_381_G1; } + +int blst_p1_affine_is_inf(const POINTonE1_affine *p) +{ return (int)vec_is_zero(p, sizeof(*p)); } + +const POINTonE1_affine *blst_p1_affine_generator(void) +{ return (const POINTonE1_affine *)&BLS12_381_G1; } + +size_t blst_p1_sizeof(void) +{ return sizeof(POINTonE1); } + +size_t blst_p1_affine_sizeof(void) +{ return sizeof(POINTonE1_affine); } diff --git a/crypto/blst_src/e2.c b/crypto/blst_src/e2.c new file mode 100644 index 00000000000..77f8064bce2 --- /dev/null +++ b/crypto/blst_src/e2.c @@ -0,0 +1,638 @@ +/* + * Copyright Supranational LLC + * Licensed under the Apache License, Version 2.0, see LICENSE for details. + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "point.h" +#include "fields.h" +#include "errors.h" + +/* + * y^2 = x^3 + B + */ +static const vec384x B_E2 = { /* 4 + 4*i */ + { TO_LIMB_T(0xaa270000000cfff3), TO_LIMB_T(0x53cc0032fc34000a), + TO_LIMB_T(0x478fe97a6b0a807f), TO_LIMB_T(0xb1d37ebee6ba24d7), + TO_LIMB_T(0x8ec9733bbf78ab2f), TO_LIMB_T(0x09d645513d83de7e) }, + { TO_LIMB_T(0xaa270000000cfff3), TO_LIMB_T(0x53cc0032fc34000a), + TO_LIMB_T(0x478fe97a6b0a807f), TO_LIMB_T(0xb1d37ebee6ba24d7), + TO_LIMB_T(0x8ec9733bbf78ab2f), TO_LIMB_T(0x09d645513d83de7e) } +}; + +const POINTonE2 BLS12_381_G2 = { /* generator point [in Montgomery] */ +{ /* (0x024aa2b2f08f0a91260805272dc51051c6e47ad4fa403b02 + b4510b647ae3d1770bac0326a805bbefd48056c8c121bdb8 << 384) % P */ + { TO_LIMB_T(0xf5f28fa202940a10), TO_LIMB_T(0xb3f5fb2687b4961a), + TO_LIMB_T(0xa1a893b53e2ae580), TO_LIMB_T(0x9894999d1a3caee9), + TO_LIMB_T(0x6f67b7631863366b), TO_LIMB_T(0x058191924350bcd7) }, + /* (0x13e02b6052719f607dacd3a088274f65596bd0d09920b61a + b5da61bbdc7f5049334cf11213945d57e5ac7d055d042b7e << 384) % P */ + { TO_LIMB_T(0xa5a9c0759e23f606), TO_LIMB_T(0xaaa0c59dbccd60c3), + TO_LIMB_T(0x3bb17e18e2867806), TO_LIMB_T(0x1b1ab6cc8541b367), + TO_LIMB_T(0xc2b6ed0ef2158547), TO_LIMB_T(0x11922a097360edf3) } +}, +{ /* (0x0ce5d527727d6e118cc9cdc6da2e351aadfd9baa8cbdd3a7 + 6d429a695160d12c923ac9cc3baca289e193548608b82801 << 384) % P */ + { TO_LIMB_T(0x4c730af860494c4a), TO_LIMB_T(0x597cfa1f5e369c5a), + TO_LIMB_T(0xe7e6856caa0a635a), TO_LIMB_T(0xbbefb5e96e0d495f), + TO_LIMB_T(0x07d3a975f0ef25a2), TO_LIMB_T(0x0083fd8e7e80dae5) }, + /* (0x0606c4a02ea734cc32acd2b02bc28b99cb3e287e85a763af + 267492ab572e99ab3f370d275cec1da1aaa9075ff05f79be << 384) % P */ + { TO_LIMB_T(0xadc0fc92df64b05d), TO_LIMB_T(0x18aa270a2b1461dc), + TO_LIMB_T(0x86adac6a3be4eba0), TO_LIMB_T(0x79495c4ec93da33a), + TO_LIMB_T(0xe7175850a43ccaed), TO_LIMB_T(0x0b2bc2a163de1bf2) }, +}, +{ { ONE_MONT_P }, { 0 } } +}; + +const POINTonE2 BLS12_381_NEG_G2 = { /* negative generator [in Montgomery] */ +{ /* (0x024aa2b2f08f0a91260805272dc51051c6e47ad4fa403b02 + b4510b647ae3d1770bac0326a805bbefd48056c8c121bdb8 << 384) % P */ + { TO_LIMB_T(0xf5f28fa202940a10), TO_LIMB_T(0xb3f5fb2687b4961a), + TO_LIMB_T(0xa1a893b53e2ae580), TO_LIMB_T(0x9894999d1a3caee9), + TO_LIMB_T(0x6f67b7631863366b), TO_LIMB_T(0x058191924350bcd7) }, + /* (0x13e02b6052719f607dacd3a088274f65596bd0d09920b61a + b5da61bbdc7f5049334cf11213945d57e5ac7d055d042b7e << 384) % P */ + { TO_LIMB_T(0xa5a9c0759e23f606), TO_LIMB_T(0xaaa0c59dbccd60c3), + TO_LIMB_T(0x3bb17e18e2867806), TO_LIMB_T(0x1b1ab6cc8541b367), + TO_LIMB_T(0xc2b6ed0ef2158547), TO_LIMB_T(0x11922a097360edf3) } +}, +{ /* (0x0d1b3cc2c7027888be51d9ef691d77bcb679afda66c73f17 + f9ee3837a55024f78c71363275a75d75d86bab79f74782aa << 384) % P */ + { TO_LIMB_T(0x6d8bf5079fb65e61), TO_LIMB_T(0xc52f05df531d63a5), + TO_LIMB_T(0x7f4a4d344ca692c9), TO_LIMB_T(0xa887959b8577c95f), + TO_LIMB_T(0x4347fe40525c8734), TO_LIMB_T(0x197d145bbaff0bb5) }, + /* (0x13fa4d4a0ad8b1ce186ed5061789213d993923066dddaf10 + 40bc3ff59f825c78df74f2d75467e25e0f55f8a00fa030ed << 384) % P */ + { TO_LIMB_T(0x0c3e036d209afa4e), TO_LIMB_T(0x0601d8f4863f9e23), + TO_LIMB_T(0xe0832636bacc0a84), TO_LIMB_T(0xeb2def362a476f84), + TO_LIMB_T(0x64044f659f0ee1e9), TO_LIMB_T(0x0ed54f48d5a1caa7) } +}, +{ { ONE_MONT_P }, { 0 } } +}; + +static void mul_by_b_onE2(vec384x out, const vec384x in) +{ + sub_fp(out[0], in[0], in[1]); + add_fp(out[1], in[0], in[1]); + lshift_fp(out[0], out[0], 2); + lshift_fp(out[1], out[1], 2); +} + +static void mul_by_4b_onE2(vec384x out, const vec384x in) +{ + sub_fp(out[0], in[0], in[1]); + add_fp(out[1], in[0], in[1]); + lshift_fp(out[0], out[0], 4); + lshift_fp(out[1], out[1], 4); +} + +static void POINTonE2_cneg(POINTonE2 *p, bool_t cbit) +{ cneg_fp2(p->Y, p->Y, cbit); } + +void blst_p2_cneg(POINTonE2 *a, int cbit) +{ POINTonE2_cneg(a, is_zero(cbit) ^ 1); } + +static void POINTonE2_from_Jacobian(POINTonE2 *out, const POINTonE2 *in) +{ + vec384x Z, ZZ; + limb_t inf = vec_is_zero(in->Z, sizeof(in->Z)); + + reciprocal_fp2(Z, in->Z); /* 1/Z */ + + sqr_fp2(ZZ, Z); + mul_fp2(out->X, in->X, ZZ); /* X = X/Z^2 */ + + mul_fp2(ZZ, ZZ, Z); + mul_fp2(out->Y, in->Y, ZZ); /* Y = Y/Z^3 */ + + vec_select(out->Z, in->Z, BLS12_381_G2.Z, + sizeof(BLS12_381_G2.Z), inf); /* Z = inf ? 0 : 1 */ +} + +void blst_p2_from_jacobian(POINTonE2 *out, const POINTonE2 *a) +{ POINTonE2_from_Jacobian(out, a); } + +static void POINTonE2_to_affine(POINTonE2_affine *out, const POINTonE2 *in) +{ + POINTonE2 p; + + if (!vec_is_equal(in->Z, BLS12_381_Rx.p2, sizeof(in->Z))) { + POINTonE2_from_Jacobian(&p, in); + in = &p; + } + vec_copy(out, in, sizeof(*out)); +} + +void blst_p2_to_affine(POINTonE2_affine *out, const POINTonE2 *a) +{ POINTonE2_to_affine(out, a); } + +void blst_p2_from_affine(POINTonE2 *out, const POINTonE2_affine *a) +{ + vec_copy(out, a, sizeof(*a)); + vec_select(out->Z, a->X, BLS12_381_Rx.p2, sizeof(out->Z), + vec_is_zero(a, sizeof(*a))); +} + +static bool_t POINTonE2_affine_on_curve(const POINTonE2_affine *p) +{ + vec384x XXX, YY; + + sqr_fp2(XXX, p->X); + mul_fp2(XXX, XXX, p->X); /* X^3 */ + add_fp2(XXX, XXX, B_E2); /* X^3 + B */ + + sqr_fp2(YY, p->Y); /* Y^2 */ + + return vec_is_equal(XXX, YY, sizeof(XXX)); +} + +int blst_p2_affine_on_curve(const POINTonE2_affine *p) +{ return (int)(POINTonE2_affine_on_curve(p) | vec_is_zero(p, sizeof(*p))); } + +static bool_t POINTonE2_on_curve(const POINTonE2 *p) +{ + vec384x XXX, YY, BZ6; + limb_t inf = vec_is_zero(p->Z, sizeof(p->Z)); + + sqr_fp2(BZ6, p->Z); + mul_fp2(BZ6, BZ6, p->Z); + sqr_fp2(XXX, BZ6); /* Z^6 */ + mul_by_b_onE2(BZ6, XXX); /* B*Z^6 */ + + sqr_fp2(XXX, p->X); + mul_fp2(XXX, XXX, p->X); /* X^3 */ + add_fp2(XXX, XXX, BZ6); /* X^3 + B*Z^6 */ + + sqr_fp2(YY, p->Y); /* Y^2 */ + + return vec_is_equal(XXX, YY, sizeof(XXX)) | inf; +} + +int blst_p2_on_curve(const POINTonE2 *p) +{ return (int)POINTonE2_on_curve(p); } + +static limb_t POINTonE2_affine_Serialize_BE(unsigned char out[192], + const POINTonE2_affine *in) +{ + vec384x temp; + + from_fp(temp[1], in->X[1]); + be_bytes_from_limbs(out, temp[1], sizeof(temp[1])); + from_fp(temp[0], in->X[0]); + be_bytes_from_limbs(out + 48, temp[0], sizeof(temp[0])); + + from_fp(temp[1], in->Y[1]); + be_bytes_from_limbs(out + 96, temp[1], sizeof(temp[1])); + from_fp(temp[0], in->Y[0]); + be_bytes_from_limbs(out + 144, temp[0], sizeof(temp[0])); + + return sgn0_pty_mod_384x(temp, BLS12_381_P); +} + +void blst_p2_affine_serialize(unsigned char out[192], + const POINTonE2_affine *in) +{ + if (vec_is_zero(in->X, 2*sizeof(in->X))) { + bytes_zero(out, 192); + out[0] = 0x40; /* infinity bit */ + } else { + (void)POINTonE2_affine_Serialize_BE(out, in); + } +} + +static limb_t POINTonE2_Serialize_BE(unsigned char out[192], + const POINTonE2 *in) +{ + POINTonE2 p; + + if (!vec_is_equal(in->Z, BLS12_381_Rx.p2, sizeof(in->Z))) { + POINTonE2_from_Jacobian(&p, in); + in = &p; + } + + return POINTonE2_affine_Serialize_BE(out, (const POINTonE2_affine *)in); +} + +static void POINTonE2_Serialize(unsigned char out[192], const POINTonE2 *in) +{ + if (vec_is_zero(in->Z, sizeof(in->Z))) { + bytes_zero(out, 192); + out[0] = 0x40; /* infinity bit */ + } else { + (void)POINTonE2_Serialize_BE(out, in); + } +} + +void blst_p2_serialize(unsigned char out[192], const POINTonE2 *in) +{ POINTonE2_Serialize(out, in); } + +static limb_t POINTonE2_affine_Compress_BE(unsigned char out[96], + const POINTonE2_affine *in) +{ + vec384 temp; + + from_fp(temp, in->X[1]); + be_bytes_from_limbs(out, temp, sizeof(temp)); + from_fp(temp, in->X[0]); + be_bytes_from_limbs(out + 48, temp, sizeof(temp)); + + return sgn0_pty_mont_384x(in->Y, BLS12_381_P, p0); +} + +void blst_p2_affine_compress(unsigned char out[96], const POINTonE2_affine *in) +{ + if (vec_is_zero(in->X, 2*sizeof(in->X))) { + bytes_zero(out, 96); + out[0] = 0xc0; /* compressed and infinity bits */ + } else { + limb_t sign = POINTonE2_affine_Compress_BE(out, in); + out[0] |= (unsigned char)(0x80 | ((sign & 2) << 4)); + } +} + +static limb_t POINTonE2_Compress_BE(unsigned char out[96], + const POINTonE2 *in) +{ + POINTonE2 p; + + if (!vec_is_equal(in->Z, BLS12_381_Rx.p, sizeof(in->Z))) { + POINTonE2_from_Jacobian(&p, in); + in = &p; + } + + return POINTonE2_affine_Compress_BE(out, (const POINTonE2_affine *)in); +} + +void blst_p2_compress(unsigned char out[96], const POINTonE2 *in) +{ + if (vec_is_zero(in->Z, sizeof(in->Z))) { + bytes_zero(out, 96); + out[0] = 0xc0; /* compressed and infinity bits */ + } else { + limb_t sign = POINTonE2_Compress_BE(out, in); + out[0] |= (unsigned char)(0x80 | ((sign & 2) << 4)); + } +} + +static limb_t POINTonE2_Uncompress_BE(POINTonE2_affine *out, + const unsigned char in[96]) +{ + POINTonE2_affine ret; + vec384 temp; + + limbs_from_be_bytes(ret.X[1], in, sizeof(ret.X[1])); + limbs_from_be_bytes(ret.X[0], in + 48, sizeof(ret.X[0])); + + /* clear top 3 bits in case caller was conveying some information there */ + ret.X[1][sizeof(ret.X[1])/sizeof(limb_t)-1] &= ((limb_t)0-1) >> 3; + add_fp(temp, ret.X[1], ZERO_384); /* less than modulus? */ + if (!vec_is_equal(temp, ret.X[1], sizeof(temp))) + return (limb_t)0 - BLST_BAD_ENCODING; + + add_fp(temp, ret.X[0], ZERO_384); /* less than modulus? */ + if (!vec_is_equal(temp, ret.X[0], sizeof(temp))) + return (limb_t)0 - BLST_BAD_ENCODING; + + mul_fp(ret.X[0], ret.X[0], BLS12_381_RR); + mul_fp(ret.X[1], ret.X[1], BLS12_381_RR); + + sqr_fp2(ret.Y, ret.X); + mul_fp2(ret.Y, ret.Y, ret.X); + add_fp2(ret.Y, ret.Y, B_E2); /* X^3 + B */ + if (!sqrt_fp2(ret.Y, ret.Y)) + return (limb_t)0 - BLST_POINT_NOT_ON_CURVE; + + vec_copy(out, &ret, sizeof(ret)); + + return sgn0_pty_mont_384x(out->Y, BLS12_381_P, p0); +} + +static BLST_ERROR POINTonE2_Uncompress_Z(POINTonE2_affine *out, + const unsigned char in[96]) +{ + unsigned char in0 = in[0]; + limb_t sgn0_pty; + + if ((in0 & 0x80) == 0) /* compressed bit */ + return BLST_BAD_ENCODING; + + if (in0 & 0x40) { /* infinity bit */ + if (byte_is_zero(in0 & 0x3f) & bytes_are_zero(in+1, 95)) { + vec_zero(out, sizeof(*out)); + return BLST_SUCCESS; + } else { + return BLST_BAD_ENCODING; + } + } + + sgn0_pty = POINTonE2_Uncompress_BE(out, in); + + if (sgn0_pty > 3) + return (BLST_ERROR)(0 - sgn0_pty); /* POINT_NOT_ON_CURVE */ + + sgn0_pty >>= 1; /* skip over parity bit */ + sgn0_pty ^= (in0 & 0x20) >> 5; + cneg_fp2(out->Y, out->Y, sgn0_pty); + + return BLST_SUCCESS; +} + +BLST_ERROR blst_p2_uncompress(POINTonE2_affine *out, const unsigned char in[96]) +{ return POINTonE2_Uncompress_Z(out, in); } + +static BLST_ERROR POINTonE2_Deserialize_BE(POINTonE2_affine *out, + const unsigned char in[192]) +{ + POINTonE2_affine ret; + vec384 temp; + + limbs_from_be_bytes(ret.X[1], in, sizeof(ret.X[1])); + limbs_from_be_bytes(ret.X[0], in + 48, sizeof(ret.X[0])); + limbs_from_be_bytes(ret.Y[1], in + 96, sizeof(ret.Y[1])); + limbs_from_be_bytes(ret.Y[0], in + 144, sizeof(ret.Y[0])); + + /* clear top 3 bits in case caller was conveying some information there */ + ret.X[1][sizeof(ret.X[1])/sizeof(limb_t)-1] &= ((limb_t)0-1) >> 3; + add_fp(temp, ret.X[1], ZERO_384); /* less than modulus? */ + if (!vec_is_equal(temp, ret.X[1], sizeof(temp))) + return BLST_BAD_ENCODING; + + add_fp(temp, ret.X[0], ZERO_384); /* less than modulus? */ + if (!vec_is_equal(temp, ret.X[0], sizeof(temp))) + return BLST_BAD_ENCODING; + + add_fp(temp, ret.Y[1], ZERO_384); /* less than modulus? */ + if (!vec_is_equal(temp, ret.Y[1], sizeof(temp))) + return BLST_BAD_ENCODING; + + add_fp(temp, ret.Y[0], ZERO_384); /* less than modulus? */ + if (!vec_is_equal(temp, ret.Y[0], sizeof(temp))) + return BLST_BAD_ENCODING; + + mul_fp(ret.X[0], ret.X[0], BLS12_381_RR); + mul_fp(ret.X[1], ret.X[1], BLS12_381_RR); + mul_fp(ret.Y[0], ret.Y[0], BLS12_381_RR); + mul_fp(ret.Y[1], ret.Y[1], BLS12_381_RR); + + if (!POINTonE2_affine_on_curve(&ret)) + return BLST_POINT_NOT_ON_CURVE; + + vec_copy(out, &ret, sizeof(ret)); + + return BLST_SUCCESS; +} + +static BLST_ERROR POINTonE2_Deserialize_Z(POINTonE2_affine *out, + const unsigned char in[192]) +{ + unsigned char in0 = in[0]; + + if ((in0 & 0xe0) == 0) + return POINTonE2_Deserialize_BE(out, in); + + if (in0 & 0x80) /* compressed bit */ + return POINTonE2_Uncompress_Z(out, in); + + if (in0 & 0x40) { /* infinity bit */ + if (byte_is_zero(in0 & 0x3f) & bytes_are_zero(in+1, 191)) { + vec_zero(out, sizeof(*out)); + return BLST_SUCCESS; + } + } + + return BLST_BAD_ENCODING; +} + +BLST_ERROR blst_p2_deserialize(POINTonE2_affine *out, + const unsigned char in[192]) +{ return POINTonE2_Deserialize_Z(out, in); } + +#include "ec_ops.h" +POINT_DADD_IMPL(POINTonE2, 384x, fp2) +POINT_DADD_AFFINE_IMPL_A0(POINTonE2, 384x, fp2, BLS12_381_Rx.p2) +POINT_ADD_IMPL(POINTonE2, 384x, fp2) +POINT_ADD_AFFINE_IMPL(POINTonE2, 384x, fp2, BLS12_381_Rx.p2) +POINT_DOUBLE_IMPL_A0(POINTonE2, 384x, fp2) +POINT_IS_EQUAL_IMPL(POINTonE2, 384x, fp2) + +void blst_p2_add(POINTonE2 *out, const POINTonE2 *a, const POINTonE2 *b) +{ POINTonE2_add(out, a, b); } + +void blst_p2_add_or_double(POINTonE2 *out, const POINTonE2 *a, + const POINTonE2 *b) +{ POINTonE2_dadd(out, a, b, NULL); } + +void blst_p2_add_affine(POINTonE2 *out, const POINTonE2 *a, + const POINTonE2_affine *b) +{ POINTonE2_add_affine(out, a, b); } + +void blst_p2_add_or_double_affine(POINTonE2 *out, const POINTonE2 *a, + const POINTonE2_affine *b) +{ POINTonE2_dadd_affine(out, a, b); } + +void blst_p2_double(POINTonE2 *out, const POINTonE2 *a) +{ POINTonE2_double(out, a); } + +int blst_p2_is_equal(const POINTonE2 *a, const POINTonE2 *b) +{ return (int)POINTonE2_is_equal(a, b); } + +#include "ec_mult.h" +POINT_MULT_SCALAR_WX_IMPL(POINTonE2, 4) +POINT_MULT_SCALAR_WX_IMPL(POINTonE2, 5) + +#ifdef __BLST_PRIVATE_TESTMODE__ +POINT_AFFINE_MULT_SCALAR_IMPL(POINTonE2) + +DECLARE_PRIVATE_POINTXZ(POINTonE2, 384x) +POINT_LADDER_PRE_IMPL(POINTonE2, 384x, fp2) +POINT_LADDER_STEP_IMPL_A0(POINTonE2, 384x, fp2, onE2) +POINT_LADDER_POST_IMPL_A0(POINTonE2, 384x, fp2, onE2) +POINT_MULT_SCALAR_LADDER_IMPL(POINTonE2) +#endif + +static void psi(POINTonE2 *out, const POINTonE2 *in) +{ + static const vec384x frobenius_x = { /* 1/(1 + i)^((P-1)/3) */ + { 0 }, + { /* (0x1a0111ea397fe699ec02408663d4de85aa0d857d89759ad4 + 897d29650fb85f9b409427eb4f49fffd8bfd00000000aaad << 384) % P */ + TO_LIMB_T(0x890dc9e4867545c3), TO_LIMB_T(0x2af322533285a5d5), + TO_LIMB_T(0x50880866309b7e2c), TO_LIMB_T(0xa20d1b8c7e881024), + TO_LIMB_T(0x14e4f04fe2db9068), TO_LIMB_T(0x14e56d3f1564853a) } + }; + static const vec384x frobenius_y = { /* 1/(1 + i)^((P-1)/2) */ + { /* (0x135203e60180a68ee2e9c448d77a2cd91c3dedd930b1cf60 + ef396489f61eb45e304466cf3e67fa0af1ee7b04121bdea2 << 384) % P */ + TO_LIMB_T(0x3e2f585da55c9ad1), TO_LIMB_T(0x4294213d86c18183), + TO_LIMB_T(0x382844c88b623732), TO_LIMB_T(0x92ad2afd19103e18), + TO_LIMB_T(0x1d794e4fac7cf0b9), TO_LIMB_T(0x0bd592fc7d825ec8) }, + { /* (0x06af0e0437ff400b6831e36d6bd17ffe48395dabc2d3435e + 77f76e17009241c5ee67992f72ec05f4c81084fbede3cc09 << 384) % P */ + TO_LIMB_T(0x7bcfa7a25aa30fda), TO_LIMB_T(0xdc17dec12a927e7c), + TO_LIMB_T(0x2f088dd86b4ebef1), TO_LIMB_T(0xd1ca2087da74d4a7), + TO_LIMB_T(0x2da2596696cebc1d), TO_LIMB_T(0x0e2b7eedbbfd87d2) }, + }; + + vec_copy(out, in, sizeof(*out)); + cneg_fp(out->X[1], out->X[1], 1); mul_fp2(out->X, out->X, frobenius_x); + cneg_fp(out->Y[1], out->Y[1], 1); mul_fp2(out->Y, out->Y, frobenius_y); + cneg_fp(out->Z[1], out->Z[1], 1); +} + +/* Galbraith-Lin-Scott, ~67% faster than POINTonE2_mul_w5 */ +static void POINTonE2_mult_gls(POINTonE2 *out, const POINTonE2 *in, + const pow256 SK) +{ + union { vec256 l; pow256 s; } val; + + /* break down SK to "digits" with |z| as radix [in constant time] */ + + limbs_from_le_bytes(val.l, SK, 32); + div_by_zz(val.l); + div_by_z(val.l); + div_by_z(val.l + NLIMBS(256)/2); + le_bytes_from_limbs(val.s, val.l, 32); + + { + const byte *scalars[2] = { val.s, NULL }; + POINTonE2 table[4][1<<(5-1)]; /* 18KB */ + size_t i; + + POINTonE2_precompute_w5(table[0], in); + for (i = 0; i < 1<<(5-1); i++) { + psi(&table[1][i], &table[0][i]); + psi(&table[2][i], &table[1][i]); + psi(&table[3][i], &table[2][i]); + POINTonE2_cneg(&table[1][i], 1); /* account for z being negative */ + POINTonE2_cneg(&table[3][i], 1); + } + + POINTonE2s_mult_w5(out, NULL, 4, scalars, 64, table); + } + + vec_zero(val.l, sizeof(val)); /* scrub the copy of SK */ +} + +static void POINTonE2_sign(POINTonE2 *out, const POINTonE2 *in, const pow256 SK) +{ + vec384x Z, ZZ; + limb_t inf; + + POINTonE2_mult_gls(out, in, SK); + + /* convert to affine to remove possible bias in out->Z */ + inf = vec_is_zero(out->Z, sizeof(out->Z)); +#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + flt_reciprocal_fp2(Z, out->Z); /* 1/Z */ +#else + reciprocal_fp2(Z, out->Z); /* 1/Z */ +#endif + + sqr_fp2(ZZ, Z); + mul_fp2(out->X, out->X, ZZ); /* X = X/Z^2 */ + + mul_fp2(ZZ, ZZ, Z); + mul_fp2(out->Y, out->Y, ZZ); /* Y = Y/Z^3 */ + + vec_select(out->Z, out->Z, BLS12_381_G2.Z, sizeof(BLS12_381_G2.Z), + inf); /* Z = inf ? 0 : 1 */ +} + +void blst_sk_to_pk_in_g2(POINTonE2 *out, const pow256 SK) +{ POINTonE2_sign(out, &BLS12_381_G2, SK); } + +void blst_sign_pk_in_g1(POINTonE2 *out, const POINTonE2 *msg, const pow256 SK) +{ POINTonE2_sign(out, msg, SK); } + +void blst_sk_to_pk2_in_g2(unsigned char out[192], POINTonE2_affine *PK, + const pow256 SK) +{ + POINTonE2 P[1]; + + POINTonE2_sign(P, &BLS12_381_G2, SK); + if (PK != NULL) + vec_copy(PK, P, sizeof(*PK)); + if (out != NULL) { + limb_t sgn0_pty = POINTonE2_Serialize_BE(out, P); + out[0] |= (sgn0_pty & 2) << 4; /* pre-decorate */ + out[0] |= vec_is_zero(P->Z, sizeof(P->Z)) << 6; + } +} + +void blst_sign_pk2_in_g1(unsigned char out[192], POINTonE2_affine *sig, + const POINTonE2 *hash, const pow256 SK) +{ + POINTonE2 P[1]; + + POINTonE2_sign(P, hash, SK); + if (sig != NULL) + vec_copy(sig, P, sizeof(*sig)); + if (out != NULL) { + limb_t sgn0_pty = POINTonE2_Serialize_BE(out, P); + out[0] |= (sgn0_pty & 2) << 4; /* pre-decorate */ + out[0] |= vec_is_zero(P->Z, sizeof(P->Z)) << 6; + } +} + +void blst_p2_mult(POINTonE2 *out, const POINTonE2 *a, + const byte *scalar, size_t nbits) +{ + if (nbits < 144) { + if (nbits) + POINTonE2_mult_w4(out, a, scalar, nbits); + else + vec_zero(out, sizeof(*out)); + } else if (nbits <= 256) { + union { vec256 l; pow256 s; } val; + size_t i, j, top, mask = (size_t)0 - 1; + + /* this is not about constant-time-ness, but branch optimization */ + for (top = (nbits + 7)/8, i=0, j=0; i> (8*sizeof(top)-1)); + j += 1 & mask; + } + + if (check_mod_256(val.s, BLS12_381_r)) /* z^4 is the formal limit */ + POINTonE2_mult_gls(out, a, val.s); + else /* should never be the case, added for formal completeness */ + POINTonE2_mult_w5(out, a, scalar, nbits); + + vec_zero(val.l, sizeof(val)); + } else { /* should never be the case, added for formal completeness */ + POINTonE2_mult_w5(out, a, scalar, nbits); + } +} + +void blst_p2_unchecked_mult(POINTonE2 *out, const POINTonE2 *a, + const byte *scalar, size_t nbits) +{ + if (nbits) + POINTonE2_mult_w4(out, a, scalar, nbits); + else + vec_zero(out, sizeof(*out)); +} + +int blst_p2_affine_is_equal(const POINTonE2_affine *a, + const POINTonE2_affine *b) +{ return (int)vec_is_equal(a, b, sizeof(*a)); } + +int blst_p2_is_inf(const POINTonE2 *p) +{ return (int)vec_is_zero(p->Z, sizeof(p->Z)); } + +const POINTonE2 *blst_p2_generator(void) +{ return &BLS12_381_G2; } + +int blst_p2_affine_is_inf(const POINTonE2_affine *p) +{ return (int)vec_is_zero(p, sizeof(*p)); } + +const POINTonE2_affine *blst_p2_affine_generator(void) +{ return (const POINTonE2_affine *)&BLS12_381_G2; } + +size_t blst_p2_sizeof(void) +{ return sizeof(POINTonE2); } + +size_t blst_p2_affine_sizeof(void) +{ return sizeof(POINTonE2_affine); } diff --git a/crypto/blst_src/ec_mult.h b/crypto/blst_src/ec_mult.h new file mode 100644 index 00000000000..3c23489570c --- /dev/null +++ b/crypto/blst_src/ec_mult.h @@ -0,0 +1,290 @@ +/* + * Copyright Supranational LLC + * Licensed under the Apache License, Version 2.0, see LICENSE for details. + * SPDX-License-Identifier: Apache-2.0 + */ +#ifndef __BLS12_381_ASM_EC_MULT_H__ +#define __BLS12_381_ASM_EC_MULT_H__ + +#include "point.h" + +/* Works up to 9 bits */ +static limb_t get_wval(const byte *d, size_t off, size_t bits) +{ + size_t top = off + bits - 1; + limb_t ret; + + ret = ((limb_t)d[top / 8] << 8) | d[off / 8]; + + return ret >> (off%8); +} + +/* Works up to 25 bits. */ +static limb_t get_wval_limb(const byte *d, size_t off, size_t bits) +{ + size_t i, top = (off + bits - 1)/8; + limb_t ret, mask = (limb_t)0 - 1; + + d += off/8; + top -= off/8-1; + + /* this is not about constant-time-ness, but branch optimization */ + for (ret=0, i=0; i<4;) { + ret |= (*d & mask) << (8*i); + mask = (limb_t)0 - ((++i - top) >> (8*sizeof(top)-1)); + d += 1 & mask; + } + + return ret >> (off%8); +} + +/* + * Window value encoding that utilizes the fact that -P is trivially + * calculated, which allows to halve the size of pre-computed table, + * is attributed to A. D. Booth, hence the name of the subroutines... + */ +static limb_t booth_encode(limb_t wval, size_t sz) +{ + limb_t mask = 0 - (wval >> sz); /* "sign" bit -> mask */ + launder(mask); + + wval = (wval + 1) >> 1; + wval = (wval ^ mask) - mask; + + /* &0x1f, but <=0x10, is index in table, rest is extended "sign" bit */ + return wval; +} + +/* + * Key feature of these constant-time subroutines is that they tolerate + * zeros in most significant bit positions of the scalar[s], or in other + * words, zero-padded scalar values. This means that one can and should + * pass order's bit-length, which is customarily publicly known, instead + * of the factual scalars' bit-lengths. This is facilitated by point + * addition subroutines implemented to handle points at infinity, which + * are encoded as Z==0. [Doubling algorithms handle such points at + * infinity "naturally," since resulting Z is product of original Z.] + */ +#define POINT_MULT_SCALAR_WX_IMPL(ptype, SZ) \ +static void ptype##_gather_booth_w##SZ(ptype *restrict p, \ + const ptype table[1<<(SZ-1)], \ + limb_t booth_idx) \ +{ \ + size_t i; \ + bool_t booth_sign = (booth_idx >> SZ) & 1; \ +\ + booth_idx &= (1< 0) \ + wval = get_wval(scalar, bits - 1, window + 1) & wmask; \ + else \ + wval = (scalar[0] << 1) & wmask; \ +\ + wval = booth_encode(wval, SZ); \ + ptype##_gather_booth_w##SZ(ret, table[0], wval); \ +\ + i = 1; \ + while (bits > 0) { \ + for (; i < npoints; i++) { \ + scalar = *scalar_s ? *scalar_s++ : scalar+nbytes; \ + wval = get_wval(scalar, bits - 1, window + 1) & wmask; \ + wval = booth_encode(wval, SZ); \ + ptype##_gather_booth_w##SZ(temp, table[i], wval); \ + ptype##_dadd(ret, ret, temp, NULL); \ + } \ +\ + for (j = 0; j < SZ; j++) \ + ptype##_double(ret, ret); \ +\ + window = SZ; \ + wmask = ((limb_t)1 << (window + 1)) - 1; \ + bits -= window; \ + i = 0; scalar_s = scalars; \ + } \ +\ + for (; i < npoints; i++) { \ + scalar = *scalar_s ? *scalar_s++ : scalar+nbytes; \ + wval = (scalar[0] << 1) & wmask; \ + wval = booth_encode(wval, SZ); \ + ptype##_gather_booth_w##SZ(temp, table[i], wval); \ + ptype##_dadd(ret, ret, temp, NULL); \ + } \ +} \ +\ +static void ptype##_mult_w##SZ(ptype *ret, const ptype *point, \ + const byte *scalar, size_t bits) \ +{ \ + limb_t wmask, wval; \ + size_t j, window; \ + ptype temp[1]; \ + ptype table[1<<(SZ-1)]; \ +\ + ptype##_precompute_w##SZ(table, point); \ +\ + /* top excess bits modulo target window size */ \ + window = bits % SZ; /* yes, it may be zero */ \ + wmask = ((limb_t)1 << (window + 1)) - 1; \ +\ + bits -= window; \ + wval = bits ? get_wval(scalar, bits - 1, window + 1) \ + : (limb_t)scalar[0] << 1; \ + wval &= wmask; \ + wval = booth_encode(wval, SZ); \ + ptype##_gather_booth_w##SZ(ret, table, wval); \ +\ + while (bits > 0) { \ + for (j = 0; j < SZ; j++) \ + ptype##_double(ret, ret); \ +\ + window = SZ; \ + wmask = ((limb_t)1 << (window + 1)) - 1; \ + bits -= window; \ +\ + wval = bits ? get_wval(scalar, bits - 1, window + 1) \ + : (limb_t)scalar[0] << 1; \ + wval &= wmask; \ + wval = booth_encode(wval, SZ); \ + ptype##_gather_booth_w##SZ(temp, table, wval); \ + if (bits > 0) ptype##_add(ret, ret, temp); \ + else ptype##_dadd(ret, ret, temp, NULL); \ + } \ +} + +#if 0 +/* ~50%, or ~2x[!] slower than w5... */ +#define POINT_MULT_SCALAR_LADDER_IMPL(ptype) \ +static void ptype##_mult_ladder(ptype *ret, const ptype *p, \ + const byte *scalar, size_t bits) \ +{ \ + ptype sum[1]; \ + bool_t bit, pbit = 0; \ +\ + vec_copy(sum, p, sizeof(ptype)); \ + vec_zero(ret, sizeof(ptype)); /* infinity */ \ +\ + while (bits--) { \ + bit = is_bit_set(scalar, bits); \ + bit ^= pbit; \ + ptype##_cswap(ret, sum, bit); \ + ptype##_add(sum, sum, ret); \ + ptype##_double(ret, ret); \ + pbit ^= bit; \ + } \ + ptype##_cswap(ret, sum, pbit); \ +} +#else +/* >40% better performance than above, [and ~30% slower than w5]... */ +#define POINT_MULT_SCALAR_LADDER_IMPL(ptype) \ +static void ptype##_mult_ladder(ptype *out, const ptype *p, \ + const byte *scalar, size_t bits) \ +{ \ + ptype##xz sum[1]; \ + ptype##xz pxz[1]; \ + ptype##xz ret[1]; \ + bool_t bit, pbit = 0; \ +\ + ptype##xz_ladder_pre(pxz, p); \ + vec_copy(sum, pxz, sizeof(ptype##xz)); \ + vec_zero(ret, sizeof(ptype##xz)); /* infinity */ \ +\ + while (bits--) { \ + bit = is_bit_set(scalar, bits); \ + bit ^= pbit; \ + ptype##xz_cswap(ret, sum, bit); \ + ptype##xz_ladder_step(ret, sum, pxz); \ + pbit ^= bit; \ + } \ + ptype##xz_cswap(ret, sum, pbit); \ + ptype##xz_ladder_post(out, ret, sum, pxz, p->Y); \ +} +#endif + +/* + * Sole reason for existence of this implementation is that addition + * with affine point renders a share of multiplications redundant by + * virtue of Z==1. And since pre-defined generator point can be and + * customarily is instantiated affine, it would be hardly appropriate + * to pass on this opportunity. Though while it's faster than the + * generic ladder implementation, by ~25%, it's not faster than XZ one + * above, <15% slower. Just in case, it's faster than generic ladder + * even if one accounts for prior conversion to affine coordinates, + * so that choice [for resource-constrained case] is actually between + * this plus said conversion and XZ ladder... + * + * To summarize, if ptype##_mult_w5 executed in one unit of time, then + * - naive ptype##_mult_ladder would execute in ~2; + * - XZ version above - in ~1.4; + * - ptype##_affine_mult_ladder below - in ~1.65; + * - [small-footprint ptype##_to_affine would run in ~0.18]. + * + * Caveat lector, |p_affine|*(order+2) produces wrong result, because + * addition doesn't handle doubling. Indeed, P*(order+1) is P and it + * fails to add with itself producing infinity in last addition. But + * as long as |scalar| is reduced modulo order, as it should be, it's + * not a problem... + */ +#define POINT_AFFINE_MULT_SCALAR_IMPL(ptype) \ +static void ptype##_affine_mult_ladder(ptype *ret, \ + const ptype##_affine *p_affine, \ + const byte *scalar, size_t bits) \ +{ \ + ptype sum[1]; \ + bool_t bit; \ +\ + vec_zero(ret, sizeof(ptype)); /* infinity */ \ +\ + while (bits--) { \ + ptype##_double(ret, ret); \ + ptype##_add_affine(sum, ret, p_affine); \ + bit = (scalar[bits / LIMB_T_BITS] >> (bits % LIMB_T_BITS)) & 1; \ + ptype##_ccopy(ret, sum, bit); \ + } \ +} +#endif diff --git a/crypto/blst_src/ec_ops.h b/crypto/blst_src/ec_ops.h new file mode 100644 index 00000000000..0d531f816e2 --- /dev/null +++ b/crypto/blst_src/ec_ops.h @@ -0,0 +1,787 @@ +/* + * Copyright Supranational LLC + * Licensed under the Apache License, Version 2.0, see LICENSE for details. + * SPDX-License-Identifier: Apache-2.0 + */ +#ifndef __BLS12_384_ASM_EC_OPS_H__ +#define __BLS12_384_ASM_EC_OPS_H__ +/* + * Addition that can handle doubling [as well as points at infinity, + * which are encoded as Z==0] in constant time. It naturally comes at + * cost, but this subroutine should be called only when independent + * points are processed, which is considered reasonable compromise. + * For example, ptype##s_mult_w5 calls it, but since *major* gain is + * result of pure doublings being effectively divided by amount of + * points, slightly slower addition can be tolerated. But what is the + * additional cost more specifically? Best addition result is 11M+5S, + * while this routine takes 13M+5S (+1M+1S if a4!=0), as per + * + * -------------+------------- + * addition | doubling + * -------------+------------- + * U1 = X1*Z2^2 | U1 = X1 + * U2 = X2*Z1^2 | + * S1 = Y1*Z2^3 | S1 = Y1 + * S2 = Y2*Z1^3 | + * zz = Z1*Z2 | zz = Z1 + * H = U2-U1 | H' = 2*Y1 + * R = S2-S1 | R' = 3*X1^2[+a*Z1^4] + * sx = U1+U2 | sx = X1+X1 + * -------------+------------- + * H!=0 || R!=0 | H==0 && R==0 + * + * X3 = R^2-H^2*sx + * Y3 = R*(H^2*U1-X3)-H^3*S1 + * Z3 = H*zz + * + * As for R!=0 condition in context of H==0, a.k.a. P-P. The result is + * infinity by virtue of Z3 = (U2-U1)*zz = H*zz = 0*zz == 0. + */ +#define POINT_DADD_IMPL(ptype, bits, field) \ +static void ptype##_dadd(ptype *out, const ptype *p1, const ptype *p2, \ + const vec##bits a4) \ +{ \ + ptype p3; /* starts as (U1, S1, zz) from addition side */\ + struct { vec##bits H, R, sx; } add, dbl; \ + bool_t p1inf, p2inf, is_dbl; \ +\ + add_##field(dbl.sx, p1->X, p1->X); /* sx = X1+X1 */\ + sqr_##field(dbl.R, p1->X); /* X1^2 */\ + mul_by_3_##field(dbl.R, dbl.R); /* R = 3*X1^2 */\ + add_##field(dbl.H, p1->Y, p1->Y); /* H = 2*Y1 */\ +\ + p2inf = vec_is_zero(p2->Z, sizeof(p2->Z)); \ + sqr_##field(p3.X, p2->Z); /* Z2^2 */\ + mul_##field(p3.Z, p1->Z, p2->Z); /* Z1*Z2 */\ + p1inf = vec_is_zero(p1->Z, sizeof(p1->Z)); \ + sqr_##field(add.H, p1->Z); /* Z1^2 */\ +\ + if (a4 != NULL) { \ + sqr_##field(p3.Y, add.H); /* Z1^4, [borrow p3.Y] */\ + mul_##field(p3.Y, p3.Y, a4); \ + add_##field(dbl.R, dbl.R, p3.Y);/* R = 3*X1^2+a*Z1^4 */\ + } \ +\ + mul_##field(p3.Y, p1->Y, p2->Z); \ + mul_##field(p3.Y, p3.Y, p3.X); /* S1 = Y1*Z2^3 */\ + mul_##field(add.R, p2->Y, p1->Z); \ + mul_##field(add.R, add.R, add.H); /* S2 = Y2*Z1^3 */\ + sub_##field(add.R, add.R, p3.Y); /* R = S2-S1 */\ +\ + mul_##field(p3.X, p3.X, p1->X); /* U1 = X1*Z2^2 */\ + mul_##field(add.H, add.H, p2->X); /* U2 = X2*Z1^2 */\ +\ + add_##field(add.sx, add.H, p3.X); /* sx = U1+U2 */\ + sub_##field(add.H, add.H, p3.X); /* H = U2-U1 */\ +\ + /* make the choice between addition and doubling */\ + is_dbl = vec_is_zero(add.H, 2*sizeof(add.H)); \ + vec_select(&p3, p1, &p3, sizeof(p3), is_dbl); \ + vec_select(&add, &dbl, &add, sizeof(add), is_dbl); \ + /* |p3| and |add| hold all inputs now, |p3| will hold output */\ +\ + mul_##field(p3.Z, p3.Z, add.H); /* Z3 = H*Z1*Z2 */\ +\ + sqr_##field(dbl.H, add.H); /* H^2 */\ + mul_##field(dbl.R, dbl.H, add.H); /* H^3 */\ + mul_##field(dbl.R, dbl.R, p3.Y); /* H^3*S1 */\ + mul_##field(p3.Y, dbl.H, p3.X); /* H^2*U1 */\ +\ + mul_##field(dbl.H, dbl.H, add.sx); /* H^2*sx */\ + sqr_##field(p3.X, add.R); /* R^2 */\ + sub_##field(p3.X, p3.X, dbl.H); /* X3 = R^2-H^2*sx */\ +\ + sub_##field(p3.Y, p3.Y, p3.X); /* H^2*U1-X3 */\ + mul_##field(p3.Y, p3.Y, add.R); /* R*(H^2*U1-X3) */\ + sub_##field(p3.Y, p3.Y, dbl.R); /* Y3 = R*(H^2*U1-X3)-H^3*S1 */\ +\ + vec_select(&p3, p1, &p3, sizeof(ptype), p2inf); \ + vec_select(out, p2, &p3, sizeof(ptype), p1inf); \ +} + +/* + * Addition with affine point that can handle doubling [as well as + * points at infinity, with |p1| being encoded as Z==0 and |p2| as + * X,Y==0] in constant time. But at what additional cost? Best + * addition result is 7M+4S, while this routine takes 8M+5S, as per + * + * -------------+------------- + * addition | doubling + * -------------+------------- + * U1 = X1 | U1 = X2 + * U2 = X2*Z1^2 | + * S1 = Y1 | S1 = Y2 + * S2 = Y2*Z1^3 | + * H = U2-X1 | H' = 2*Y2 + * R = S2-Y1 | R' = 3*X2^2[+a] + * sx = X1+U2 | sx = X2+X2 + * zz = H*Z1 | zz = H' + * -------------+------------- + * H!=0 || R!=0 | H==0 && R==0 + * + * X3 = R^2-H^2*sx + * Y3 = R*(H^2*U1-X3)-H^3*S1 + * Z3 = zz + * + * As for R!=0 condition in context of H==0, a.k.a. P-P. The result is + * infinity by virtue of Z3 = (U2-U1)*zz = H*zz = 0*zz == 0. + */ +#define POINT_DADD_AFFINE_IMPL_A0(ptype, bits, field, one) \ +static void ptype##_dadd_affine(ptype *out, const ptype *p1, \ + const ptype##_affine *p2) \ +{ \ + ptype p3; /* starts as (,, H*Z1) from addition side */\ + struct { vec##bits H, R, sx; } add, dbl; \ + bool_t p1inf, p2inf, is_dbl; \ +\ + p2inf = vec_is_zero(p2->X, 2*sizeof(p2->X)); \ + add_##field(dbl.sx, p2->X, p2->X); /* sx = X2+X2 */\ + sqr_##field(dbl.R, p2->X); /* X2^2 */\ + mul_by_3_##field(dbl.R, dbl.R); /* R = 3*X2^2 */\ + add_##field(dbl.H, p2->Y, p2->Y); /* H = 2*Y2 */\ +\ + p1inf = vec_is_zero(p1->Z, sizeof(p1->Z)); \ + sqr_##field(add.H, p1->Z); /* Z1^2 */\ + mul_##field(add.R, add.H, p1->Z); /* Z1^3 */\ + mul_##field(add.R, add.R, p2->Y); /* S2 = Y2*Z1^3 */\ + sub_##field(add.R, add.R, p1->Y); /* R = S2-Y1 */\ +\ + mul_##field(add.H, add.H, p2->X); /* U2 = X2*Z1^2 */\ +\ + add_##field(add.sx, add.H, p1->X); /* sx = X1+U2 */\ + sub_##field(add.H, add.H, p1->X); /* H = U2-X1 */\ +\ + mul_##field(p3.Z, add.H, p1->Z); /* Z3 = H*Z1 */\ +\ + /* make the choice between addition and doubling */ \ + is_dbl = vec_is_zero(add.H, 2*sizeof(add.H)); \ + vec_select(p3.X, p2, p1, 2*sizeof(p3.X), is_dbl); \ + vec_select(p3.Z, dbl.H, p3.Z, sizeof(p3.Z), is_dbl);\ + vec_select(&add, &dbl, &add, sizeof(add), is_dbl); \ + /* |p3| and |add| hold all inputs now, |p3| will hold output */\ +\ + sqr_##field(dbl.H, add.H); /* H^2 */\ + mul_##field(dbl.R, dbl.H, add.H); /* H^3 */\ + mul_##field(dbl.R, dbl.R, p3.Y); /* H^3*S1 */\ + mul_##field(p3.Y, dbl.H, p3.X); /* H^2*U1 */\ +\ + mul_##field(dbl.H, dbl.H, add.sx); /* H^2*sx */\ + sqr_##field(p3.X, add.R); /* R^2 */\ + sub_##field(p3.X, p3.X, dbl.H); /* X3 = R^2-H^2*sx */\ +\ + sub_##field(p3.Y, p3.Y, p3.X); /* H^2*U1-X3 */\ + mul_##field(p3.Y, p3.Y, add.R); /* R*(H^2*U1-X3) */\ + sub_##field(p3.Y, p3.Y, dbl.R); /* Y3 = R*(H^2*U1-X3)-H^3*S1 */\ +\ + vec_select(p3.X, p2, p3.X, 2*sizeof(p3.X), p1inf); \ + vec_select(p3.Z, one, p3.Z, sizeof(p3.Z), p1inf); \ + vec_select(out, p1, &p3, sizeof(ptype), p2inf); \ +} + +/* + * https://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-0.html#addition-add-2007-bl + * with twist to handle either input at infinity, which are encoded as Z==0. + */ +#define POINT_ADD_IMPL(ptype, bits, field) \ +static void ptype##_add(ptype *out, const ptype *p1, const ptype *p2) \ +{ \ + ptype p3; \ + vec##bits Z1Z1, Z2Z2, U1, S1, H, I, J; \ + bool_t p1inf, p2inf; \ +\ + p1inf = vec_is_zero(p1->Z, sizeof(p1->Z)); \ + sqr_##field(Z1Z1, p1->Z); /* Z1Z1 = Z1^2 */\ +\ + mul_##field(p3.Z, Z1Z1, p1->Z); /* Z1*Z1Z1 */\ + mul_##field(p3.Z, p3.Z, p2->Y); /* S2 = Y2*Z1*Z1Z1 */\ +\ + p2inf = vec_is_zero(p2->Z, sizeof(p2->Z)); \ + sqr_##field(Z2Z2, p2->Z); /* Z2Z2 = Z2^2 */\ +\ + mul_##field(S1, Z2Z2, p2->Z); /* Z2*Z2Z2 */\ + mul_##field(S1, S1, p1->Y); /* S1 = Y1*Z2*Z2Z2 */\ +\ + sub_##field(p3.Z, p3.Z, S1); /* S2-S1 */\ + add_##field(p3.Z, p3.Z, p3.Z); /* r = 2*(S2-S1) */\ +\ + mul_##field(U1, p1->X, Z2Z2); /* U1 = X1*Z2Z2 */\ + mul_##field(H, p2->X, Z1Z1); /* U2 = X2*Z1Z1 */\ +\ + sub_##field(H, H, U1); /* H = U2-U1 */\ +\ + add_##field(I, H, H); /* 2*H */\ + sqr_##field(I, I); /* I = (2*H)^2 */\ +\ + mul_##field(J, H, I); /* J = H*I */\ + mul_##field(S1, S1, J); /* S1*J */\ +\ + mul_##field(p3.Y, U1, I); /* V = U1*I */\ +\ + sqr_##field(p3.X, p3.Z); /* r^2 */\ + sub_##field(p3.X, p3.X, J); /* r^2-J */\ + sub_##field(p3.X, p3.X, p3.Y); \ + sub_##field(p3.X, p3.X, p3.Y); /* X3 = r^2-J-2*V */\ +\ + sub_##field(p3.Y, p3.Y, p3.X); /* V-X3 */\ + mul_##field(p3.Y, p3.Y, p3.Z); /* r*(V-X3) */\ + sub_##field(p3.Y, p3.Y, S1); \ + sub_##field(p3.Y, p3.Y, S1); /* Y3 = r*(V-X3)-2*S1*J */\ +\ + add_##field(p3.Z, p1->Z, p2->Z); /* Z1+Z2 */\ + sqr_##field(p3.Z, p3.Z); /* (Z1+Z2)^2 */\ + sub_##field(p3.Z, p3.Z, Z1Z1); /* (Z1+Z2)^2-Z1Z1 */\ + sub_##field(p3.Z, p3.Z, Z2Z2); /* (Z1+Z2)^2-Z1Z1-Z2Z2 */\ + mul_##field(p3.Z, p3.Z, H); /* Z3 = ((Z1+Z2)^2-Z1Z1-Z2Z2)*H */\ +\ + vec_select(&p3, p1, &p3, sizeof(ptype), p2inf); \ + vec_select(out, p2, &p3, sizeof(ptype), p1inf); \ +} + +/* + * https://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-0.html#addition-madd-2007-bl + * with twist to handle either input at infinity, with |p1| encoded as Z==0, + * and |p2| as X==Y==0. + */ +#define POINT_ADD_AFFINE_IMPL(ptype, bits, field, one) \ +static void ptype##_add_affine(ptype *out, const ptype *p1, \ + const ptype##_affine *p2) \ +{ \ + ptype p3; \ + vec##bits Z1Z1, H, HH, I, J; \ + bool_t p1inf, p2inf; \ +\ + p1inf = vec_is_zero(p1->Z, sizeof(p1->Z)); \ +\ + sqr_##field(Z1Z1, p1->Z); /* Z1Z1 = Z1^2 */\ +\ + mul_##field(p3.Z, Z1Z1, p1->Z); /* Z1*Z1Z1 */\ + mul_##field(p3.Z, p3.Z, p2->Y); /* S2 = Y2*Z1*Z1Z1 */\ +\ + p2inf = vec_is_zero(p2->X, 2*sizeof(p2->X)); \ +\ + mul_##field(H, p2->X, Z1Z1); /* U2 = X2*Z1Z1 */\ + sub_##field(H, H, p1->X); /* H = U2-X1 */\ +\ + sqr_##field(HH, H); /* HH = H^2 */\ + add_##field(I, HH, HH); \ + add_##field(I, I, I); /* I = 4*HH */\ +\ + mul_##field(p3.Y, p1->X, I); /* V = X1*I */\ + mul_##field(J, H, I); /* J = H*I */\ + mul_##field(I, J, p1->Y); /* Y1*J */\ +\ + sub_##field(p3.Z, p3.Z, p1->Y); /* S2-Y1 */\ + add_##field(p3.Z, p3.Z, p3.Z); /* r = 2*(S2-Y1) */\ +\ + sqr_##field(p3.X, p3.Z); /* r^2 */\ + sub_##field(p3.X, p3.X, J); /* r^2-J */\ + sub_##field(p3.X, p3.X, p3.Y); \ + sub_##field(p3.X, p3.X, p3.Y); /* X3 = r^2-J-2*V */\ +\ + sub_##field(p3.Y, p3.Y, p3.X); /* V-X3 */\ + mul_##field(p3.Y, p3.Y, p3.Z); /* r*(V-X3) */\ + sub_##field(p3.Y, p3.Y, I); \ + sub_##field(p3.Y, p3.Y, I); /* Y3 = r*(V-X3)-2*Y1*J */\ +\ + add_##field(p3.Z, p1->Z, H); /* Z1+H */\ + sqr_##field(p3.Z, p3.Z); /* (Z1+H)^2 */\ + sub_##field(p3.Z, p3.Z, Z1Z1); /* (Z1+H)^2-Z1Z1 */\ + sub_##field(p3.Z, p3.Z, HH); /* Z3 = (Z1+H)^2-Z1Z1-HH */\ +\ + vec_select(p3.Z, one, p3.Z, sizeof(p3.Z), p1inf); \ + vec_select(p3.X, p2, p3.X, 2*sizeof(p3.X), p1inf); \ + vec_select(out, p1, &p3, sizeof(ptype), p2inf); \ +} + +/* + * https://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-0.html#doubling-dbl-2009-l + */ +#define POINT_DOUBLE_IMPL_A0(ptype, bits, field) \ +static void ptype##_double(ptype *p3, const ptype *p1) \ +{ \ + vec##bits A, B, C; \ +\ + sqr_##field(A, p1->X); /* A = X1^2 */\ + sqr_##field(B, p1->Y); /* B = Y1^2 */\ + sqr_##field(C, B); /* C = B^2 */\ +\ + add_##field(B, B, p1->X); /* X1+B */\ + sqr_##field(B, B); /* (X1+B)^2 */\ + sub_##field(B, B, A); /* (X1+B)^2-A */\ + sub_##field(B, B, C); /* (X1+B)^2-A-C */\ + add_##field(B, B, B); /* D = 2*((X1+B)^2-A-C) */\ +\ + mul_by_3_##field(A, A); /* E = 3*A */\ +\ + sqr_##field(p3->X, A); /* F = E^2 */\ + sub_##field(p3->X, p3->X, B); \ + sub_##field(p3->X, p3->X, B); /* X3 = F-2*D */\ +\ + add_##field(p3->Z, p1->Z, p1->Z); /* 2*Z1 */\ + mul_##field(p3->Z, p3->Z, p1->Y); /* Z3 = 2*Z1*Y1 */\ +\ + mul_by_8_##field(C, C); /* 8*C */\ + sub_##field(p3->Y, B, p3->X); /* D-X3 */\ + mul_##field(p3->Y, p3->Y, A); /* E*(D-X3) */\ + sub_##field(p3->Y, p3->Y, C); /* Y3 = E*(D-X3)-8*C */\ +} + +#define POINT_LADDER_PRE_IMPL(ptype, bits, field) \ +static void ptype##xz_ladder_pre(ptype##xz *pxz, const ptype *p) \ +{ \ + mul_##field(pxz->X, p->X, p->Z); /* X2 = X1*Z1 */\ + sqr_##field(pxz->Z, p->Z); \ + mul_##field(pxz->Z, pxz->Z, p->Z); /* Z2 = Z1^3 */\ +} + +/* + * https://hyperelliptic.org/EFD/g1p/auto-shortw-xz.html#ladder-ladd-2002-it-3 + * with twist to handle either input at infinity, which are encoded as Z==0. + * Just in case, order of doubling and addition is reverse in comparison to + * hyperelliptic.org entry. This was done to minimize temporary storage. + * + * XZ1 is |p|, XZ2&XZ4 are in&out |r|, XZ3&XZ5 are in&out |s|. + */ +#define POINT_LADDER_STEP_IMPL_A0(ptype, bits, field, suffix4b) \ +static void ptype##xz_ladder_step(ptype##xz *r, ptype##xz *s, \ + const ptype##xz *p) \ +{ \ + ptype##xz p5; \ + vec##bits A, B, C, D, XX, ZZ; \ + bool_t r_inf, s_inf; \ + /* s += r */\ + mul_##field(A, r->X, s->X); /* A = X2*X3 */\ + mul_##field(B, r->Z, s->Z); /* B = Z2*Z3 */\ + mul_##field(C, r->X, s->Z); /* C = X2*Z3 */\ + mul_##field(D, r->Z, s->X); /* D = X3*Z2 */\ +\ + sqr_##field(A, A); /* (A[-a*B])^2 */\ + add_##field(p5.X, C, D); /* C+D */\ + mul_##field(p5.X, p5.X, B); /* B*(C+D) */\ + mul_by_4b_##suffix4b(B, p5.X); /* b4*B*(C+D) */\ + sub_##field(p5.X, A, B); /* (A[-a*B])^2-b4*B*(C+D) */\ + mul_##field(p5.X, p5.X, p->Z); /* X5 = Z1*((A[-a*B])^2-b4*B*(C+D)) */\ +\ + sub_##field(p5.Z, C, D); /* C-D */\ + sqr_##field(p5.Z, p5.Z); /* (C-D)^2 */\ + mul_##field(p5.Z, p5.Z, p->X); /* Z5 = X1*(C-D)^2 */\ +\ + r_inf = vec_is_zero(r->Z, sizeof(r->Z)); \ + s_inf = vec_is_zero(s->Z, sizeof(s->Z)); \ +\ + vec_select(&p5, r, &p5, sizeof(ptype##xz), s_inf); \ + vec_select(s, s, &p5, sizeof(ptype##xz), r_inf); \ + /* r *= 2 */\ + sqr_##field(XX, r->X); /* XX = X2^2 */\ + sqr_##field(ZZ, r->Z); /* ZZ = Z2^2 */\ +\ + add_##field(r->Z, r->X, r->Z); /* X2+Z2 */\ + sqr_##field(r->Z, r->Z); /* (X2+Z2)^2 */\ + sub_##field(r->Z, r->Z, XX); /* (X2+Z2)^2-XX */\ + sub_##field(r->Z, r->Z, ZZ); /* E = (X2+Z2)^2-XX-ZZ */\ +\ + sqr_##field(A, XX); /* (XX[-a*ZZ])^2 */\ + mul_##field(B, r->Z, ZZ); /* E*ZZ */\ + mul_by_4b_##suffix4b(C, B); /* b4*E*ZZ */\ + sub_##field(r->X, A, C); /* X4 = (XX[-a*ZZ])^2-b4*E*ZZ */\ +\ + sqr_##field(ZZ, ZZ); /* ZZ^2 */\ + mul_by_4b_##suffix4b(B, ZZ); /* b4*ZZ^2 */\ + mul_##field(r->Z, r->Z, XX); /* E*(XX[+a*ZZ]) */\ + add_##field(r->Z, r->Z, r->Z); /* 2*E*(XX[+a*ZZ]) */\ + add_##field(r->Z, r->Z, B); /* Z4 = 2*E*(XX[+a*ZZ])+b4*ZZ^2 */\ +} + +/* + * Recover the |r|'s y-coordinate using Eq. (8) from Brier-Joye, + * "Weierstraß Elliptic Curves and Side-Channel Attacks", with XZ twist + * and conversion to Jacobian coordinates from /.../ecp_smpl.c, + * and with twist to recover from |s| at infinity [which occurs when + * multiplying by (order-1)]. + * + * X4 = 2*Y1*X2*Z3*Z1*Z2 + * Y4 = 2*b*Z3*(Z1*Z2)^2 + Z3*(a*Z1*Z2+X1*X2)*(X1*Z2+X2*Z1) - X3*(X1*Z2-X2*Z1)^2 + * Z4 = 2*Y1*Z3*Z2^2*Z1 + * + * Z3x2 = 2*Z3 + * Y1Z3x2 = Y1*Z3x2 + * Z1Z2 = Z1*Z2 + * X1Z2 = X1*Z2 + * X2Z1 = X2*Z1 + * X4 = Y1Z3x2*X2*Z1Z2 + * A = b*Z3x2*(Z1Z2)^2 + * B = Z3*(a*Z1Z2+X1*X2)*(X1Z2+X2Z1) + * C = X3*(X1Z2-X2Z1)^2 + * Y4 = A+B-C + * Z4 = Y1Z3x2*Z1Z2*Z2 + * + * XZ1 is |p|, XZ2 is |r|, XZ3 is |s|, 'a' is 0. + */ +#define POINT_LADDER_POST_IMPL_A0(ptype, bits, field, suffixb) \ +static void ptype##xz_ladder_post(ptype *p4, \ + const ptype##xz *r, const ptype##xz *s, \ + const ptype##xz *p, const vec##bits Y1) \ +{ \ + vec##bits Z3x2, Y1Z3x2, Z1Z2, X1Z2, X2Z1, A, B, C; \ + bool_t s_inf; \ +\ + add_##field(Z3x2, s->Z, s->Z); /* Z3x2 = 2*Z3 */\ + mul_##field(Y1Z3x2, Y1, Z3x2); /* Y1Z3x2 = Y1*Z3x2 */\ + mul_##field(Z1Z2, p->Z, r->Z); /* Z1Z2 = Z1*Z2 */\ + mul_##field(X1Z2, p->X, r->Z); /* X1Z2 = X1*Z2 */\ + mul_##field(X2Z1, r->X, p->Z); /* X2Z1 = X2*Z1 */\ +\ + mul_##field(p4->X, Y1Z3x2, r->X); /* Y1Z3x2*X2 */\ + mul_##field(p4->X, p4->X, Z1Z2); /* X4 = Y1Z3x2*X2*Z1Z2 */\ +\ + sqr_##field(A, Z1Z2); /* (Z1Z2)^2 */\ + mul_##field(B, A, Z3x2); /* Z3x2*(Z1Z2)^2 */\ + mul_by_b_##suffixb(A, B); /* A = b*Z3x2*(Z1Z2)^2 */\ +\ + mul_##field(B, p->X, r->X); /* [a*Z1Z2+]X1*X2 */\ + mul_##field(B, B, s->Z); /* Z3*([a*Z1Z2+]X1*X2) */\ + add_##field(C, X1Z2, X2Z1); /* X1Z2+X2Z1 */\ + mul_##field(B, B, C); /* B = Z3*([a*Z2Z1+]X1*X2)*(X1Z2+X2Z1) */\ +\ + sub_##field(C, X1Z2, X2Z1); /* X1Z2-X2Z1 */\ + sqr_##field(C, C); /* (X1Z2-X2Z1)^2 */\ + mul_##field(C, C, s->X); /* C = X3*(X1Z2-X2Z1)^2 */\ +\ + add_##field(A, A, B); /* A+B */\ + sub_##field(A, A, C); /* Y4 = A+B-C */\ +\ + mul_##field(p4->Z, Z1Z2, r->Z); /* Z1Z2*Z2 */\ + mul_##field(p4->Z, p4->Z, Y1Z3x2); /* Y1Z3x2*Z1Z2*Z2 */\ +\ + s_inf = vec_is_zero(s->Z, sizeof(s->Z)); \ + vec_select(p4->X, p->X, p4->X, sizeof(p4->X), s_inf); \ + vec_select(p4->Y, Y1, A, sizeof(p4->Y), s_inf); \ + vec_select(p4->Z, p->Z, p4->Z, sizeof(p4->Z), s_inf); \ + ptype##_cneg(p4, s_inf); \ + /* to Jacobian */\ + mul_##field(p4->X, p4->X, p4->Z); /* X4 = X4*Z4 */\ + sqr_##field(B, p4->Z); \ + mul_##field(p4->Y, p4->Y, B); /* Y4 = Y4*Z4^2 */\ +} + +#define POINT_IS_EQUAL_IMPL(ptype, bits, field) \ +static limb_t ptype##_is_equal(const ptype *p1, const ptype *p2) \ +{ \ + vec##bits Z1Z1, Z2Z2; \ + ptype##_affine a1, a2; \ + bool_t is_inf1 = vec_is_zero(p1->Z, sizeof(p1->Z)); \ + bool_t is_inf2 = vec_is_zero(p2->Z, sizeof(p2->Z)); \ +\ + sqr_##field(Z1Z1, p1->Z); /* Z1Z1 = Z1^2 */\ + sqr_##field(Z2Z2, p2->Z); /* Z2Z2 = Z2^2 */\ +\ + mul_##field(a1.X, p1->X, Z2Z2); /* U1 = X1*Z2Z2 */\ + mul_##field(a2.X, p2->X, Z1Z1); /* U2 = X2*Z1Z1 */\ +\ + mul_##field(a1.Y, p1->Y, p2->Z); /* Y1*Z2 */\ + mul_##field(a2.Y, p2->Y, p1->Z); /* Y2*Z1 */\ +\ + mul_##field(a1.Y, a1.Y, Z2Z2); /* S1 = Y1*Z2*Z2Z2 */\ + mul_##field(a2.Y, a2.Y, Z1Z1); /* S2 = Y2*Z1*Z1Z1 */\ +\ + return vec_is_equal(&a1, &a2, sizeof(a1)) & (is_inf1 ^ is_inf2 ^ 1); \ +} + +/* + * https://eprint.iacr.org/2015/1060, algorithm 7 with a twist to handle + * |p3| pointing at either |p1| or |p2|. This is resolved by adding |t5| + * and replacing few first references to |X3| in the formula, up to step + * 21, with it. 12M[+27A], doubling and infinity are handled by the + * formula itself. Infinity is to be encoded as [0, !0, 0]. + */ +#define POINT_PROJ_DADD_IMPL_A0(ptype, bits, field, suffixb) \ +static void ptype##proj_dadd(ptype##proj *p3, const ptype##proj *p1, \ + const ptype##proj *p2) \ +{ \ + vec##bits t0, t1, t2, t3, t4, t5; \ +\ + mul_##field(t0, p1->X, p2->X); /* 1. t0 = X1*X2 */\ + mul_##field(t1, p1->Y, p2->Y); /* 2. t1 = Y1*Y2 */\ + mul_##field(t2, p1->Z, p2->Z); /* 3. t2 = Z1*Z2 */\ + add_##field(t3, p1->X, p1->Y); /* 4. t3 = X1+Y1 */\ + add_##field(t4, p2->X, p2->Y); /* 5. t4 = X2+Y2 */\ + mul_##field(t3, t3, t4); /* 6. t3 = t3*t4 */\ + add_##field(t4, t0, t1); /* 7. t4 = t0+t1 */\ + sub_##field(t3, t3, t4); /* 8. t3 = t3-t4 */\ + add_##field(t4, p1->Y, p1->Z); /* 9. t4 = Y1+Z1 */\ + add_##field(t5, p2->Y, p2->Z); /* 10. t5 = Y2+Z2 */\ + mul_##field(t4, t4, t5); /* 11. t4 = t4*t5 */\ + add_##field(t5, t1, t2); /* 12. t5 = t1+t2 */\ + sub_##field(t4, t4, t5); /* 13. t4 = t4-t5 */\ + add_##field(t5, p1->X, p1->Z); /* 14. t5 = X1+Z1 */\ + add_##field(p3->Y, p2->X, p2->Z); /* 15. Y3 = X2+Z2 */\ + mul_##field(t5, t5, p3->Y); /* 16. t5 = t5*Y3 */\ + add_##field(p3->Y, t0, t2); /* 17. Y3 = t0+t2 */\ + sub_##field(p3->Y, t5, p3->Y); /* 18. Y3 = t5-Y3 */\ + mul_by_3_##field(t0, t0); /* 19-20. t0 = 3*t0 */\ + mul_by_3_##field(t5, t2); /* 21. t5 = 3*t2 */\ + mul_by_b_##suffixb(t2, t5); /* 21. t2 = b*t5 */\ + add_##field(p3->Z, t1, t2); /* 22. Z3 = t1+t2 */\ + sub_##field(t1, t1, t2); /* 23. t1 = t1-t2 */\ + mul_by_3_##field(t5, p3->Y); /* 24. t5 = 3*Y3 */\ + mul_by_b_##suffixb(p3->Y, t5); /* 24. Y3 = b*t5 */\ + mul_##field(p3->X, t4, p3->Y); /* 25. X3 = t4*Y3 */\ + mul_##field(t2, t3, t1); /* 26. t2 = t3*t1 */\ + sub_##field(p3->X, t2, p3->X); /* 27. X3 = t2-X3 */\ + mul_##field(p3->Y, p3->Y, t0); /* 28. Y3 = Y3*t0 */\ + mul_##field(t1, t1, p3->Z); /* 29. t1 = t1*Z3 */\ + add_##field(p3->Y, t1, p3->Y); /* 30. Y3 = t1+Y3 */\ + mul_##field(t0, t0, t3); /* 31. t0 = t0*t3 */\ + mul_##field(p3->Z, p3->Z, t4); /* 32. Z3 = Z3*t4 */\ + add_##field(p3->Z, p3->Z, t0); /* 33. Z3 = Z3+t0 */\ +} + +/* + * https://eprint.iacr.org/2015/1060, algorithm 8 with a twist to handle + * |p2| being infinity encoded as [0, 0]. 11M[+21A]. + */ +#define POINT_PROJ_DADD_AFFINE_IMPL_A0(ptype, bits, field, suffixb) \ +static void ptype##proj_dadd_affine(ptype##proj *out, const ptype##proj *p1, \ + const ptype##_affine *p2) \ +{ \ + ptype##proj p3[1]; \ + vec##bits t0, t1, t2, t3, t4; \ + limb_t p2inf = vec_is_zero(p2, sizeof(*p2)); \ +\ + mul_##field(t0, p1->X, p2->X); /* 1. t0 = X1*X2 */\ + mul_##field(t1, p1->Y, p2->Y); /* 2. t1 = Y1*Y2 */\ + add_##field(t3, p1->X, p1->Y); /* 3. t3 = X1+Y1 */\ + add_##field(t4, p2->X, p2->Y); /* 4. t4 = X2+Y2 */\ + mul_##field(t3, t3, t4); /* 5. t3 = t3*t4 */\ + add_##field(t4, t0, t1); /* 6. t4 = t0+t1 */\ + sub_##field(t3, t3, t4); /* 7. t3 = t3-t4 */\ + mul_##field(t4, p2->Y, p1->Z); /* 8. t4 = Y2*Z1 */\ + add_##field(t4, t4, p1->Y); /* 9. t4 = t4+Y1 */\ + mul_##field(p3->Y, p2->X, p1->Z); /* 10. Y3 = X2*Z1 */\ + add_##field(p3->Y, p3->Y, p1->X); /* 11. Y3 = Y3+X1 */\ + mul_by_3_##field(t0, t0); /* 12-13. t0 = 3*t0 */\ + mul_by_b_##suffixb(t2, p1->Z); /* 14. t2 = b*Z1 */\ + mul_by_3_##field(t2, t2); /* 14. t2 = 3*t2 */\ + add_##field(p3->Z, t1, t2); /* 15. Z3 = t1+t2 */\ + sub_##field(t1, t1, t2); /* 16. t1 = t1-t2 */\ + mul_by_b_##suffixb(t2, p3->Y); /* 17. t2 = b*Y3 */\ + mul_by_3_##field(p3->Y, t2); /* 17. Y3 = 3*t2 */\ + mul_##field(p3->X, t4, p3->Y); /* 18. X3 = t4*Y3 */\ + mul_##field(t2, t3, t1); /* 19. t2 = t3*t1 */\ + sub_##field(p3->X, t2, p3->X); /* 20. X3 = t2-X3 */\ + mul_##field(p3->Y, p3->Y, t0); /* 21. Y3 = Y3*t0 */\ + mul_##field(t1, t1, p3->Z); /* 22. t1 = t1*Z3 */\ + add_##field(p3->Y, t1, p3->Y); /* 23. Y3 = t1+Y3 */\ + mul_##field(t0, t0, t3); /* 24. t0 = t0*t3 */\ + mul_##field(p3->Z, p3->Z, t4); /* 25. Z3 = Z3*t4 */\ + add_##field(p3->Z, p3->Z, t0); /* 26. Z3 = Z3+t0 */\ +\ + vec_select(out, p1, p3, sizeof(*out), p2inf); \ +} + +/* + * https://eprint.iacr.org/2015/1060, algorithm 9 with a twist to handle + * |p3| pointing at |p1|. This is resolved by adding |t3| to hold X*Y + * and reordering operations to bring references to |p1| forward. + * 6M+2S[+13A]. + */ +#define POINT_PROJ_DOUBLE_IMPL_A0(ptype, bits, field, suffixb) \ +static void ptype##proj_double(ptype##proj *p3, const ptype##proj *p1) \ +{ \ + vec##bits t0, t1, t2, t3; \ +\ + sqr_##field(t0, p1->Y); /* 1. t0 = Y*Y */\ + mul_##field(t1, p1->Y, p1->Z); /* 5. t1 = Y*Z */\ + sqr_##field(t2, p1->Z); /* 6. t2 = Z*Z */\ + mul_##field(t3, p1->X, p1->Y); /* 16. t3 = X*Y */\ + lshift_##field(p3->Z, t0, 3); /* 2-4. Z3 = 8*t0 */\ + mul_by_b_##suffixb(p3->X, t2); /* 7. t2 = b*t2 */\ + mul_by_3_##field(t2, p3->X); /* 7. t2 = 3*t2 */\ + mul_##field(p3->X, t2, p3->Z); /* 8. X3 = t2*Z3 */\ + add_##field(p3->Y, t0, t2); /* 9. Y3 = t0+t2 */\ + mul_##field(p3->Z, t1, p3->Z); /* 10. Z3 = t1*Z3 */\ + mul_by_3_##field(t2, t2); /* 11-12. t2 = 3*t2 */\ + sub_##field(t0, t0, t2); /* 13. t0 = t0-t2 */\ + mul_##field(p3->Y, t0, p3->Y); /* 14. Y3 = t0*Y3 */\ + add_##field(p3->Y, p3->X, p3->Y); /* 15. Y3 = X3+Y3 */\ + mul_##field(p3->X, t0, t3); /* 17. X3 = t0*t3 */\ + add_##field(p3->X, p3->X, p3->X); /* 18. X3 = X3+X3 */\ +} + +#define POINT_PROJ_TO_JACOBIAN_IMPL(ptype, bits, field) \ +static void ptype##proj_to_Jacobian(ptype *out, const ptype##proj *in) \ +{ \ + vec##bits ZZ; \ +\ + sqr_##field(ZZ, in->Z); \ + mul_##field(out->X, in->X, in->Z); \ + mul_##field(out->Y, in->Y, ZZ); \ + vec_copy(out->Z, in->Z, sizeof(out->Z)); \ +} + +#define POINT_TO_PROJECTIVE_IMPL(ptype, bits, field, one) \ +static void ptype##_to_projective(ptype##proj *out, const ptype *in) \ +{ \ + vec##bits ZZ; \ + limb_t is_inf = vec_is_zero(in->Z, sizeof(in->Z)); \ +\ + sqr_##field(ZZ, in->Z); \ + mul_##field(out->X, in->X, in->Z); \ + vec_select(out->Y, one, in->Y, sizeof(out->Y), is_inf); \ + mul_##field(out->Z, ZZ, in->Z); \ +} + +/******************* !!!!! NOT CONSTANT TIME !!!!! *******************/ + +/* + * http://hyperelliptic.org/EFD/g1p/auto-shortw-xyzz.html#addition-add-2008-s + * http://hyperelliptic.org/EFD/g1p/auto-shortw-xyzz.html#doubling-dbl-2008-s-1 + * with twist to handle either input at infinity. Addition costs 12M+2S, + * while conditional doubling - 4M+6M+3S. + */ +#define POINTXYZZ_DADD_IMPL(ptype, bits, field) \ +static void ptype##xyzz_dadd(ptype##xyzz *p3, const ptype##xyzz *p1, \ + const ptype##xyzz *p2) \ +{ \ + vec##bits U, S, P, R; \ +\ + if (vec_is_zero(p2->ZZZ, 2*sizeof(p2->ZZZ))) { \ + vec_copy(p3, p1, sizeof(*p3)); \ + return; \ + } else if (vec_is_zero(p1->ZZZ, 2*sizeof(p1->ZZZ))) { \ + vec_copy(p3, p2, sizeof(*p3)); \ + return; \ + } \ +\ + mul_##field(U, p1->X, p2->ZZ); /* U1 = X1*ZZ2 */\ + mul_##field(S, p1->Y, p2->ZZZ); /* S1 = Y1*ZZZ2 */\ + mul_##field(P, p2->X, p1->ZZ); /* U2 = X2*ZZ1 */\ + mul_##field(R, p2->Y, p1->ZZZ); /* S2 = Y2*ZZZ1 */\ + sub_##field(P, P, U); /* P = U2-U1 */\ + sub_##field(R, R, S); /* R = S2-S1 */\ +\ + if (!vec_is_zero(P, sizeof(P))) { /* X1!=X2 */\ + vec##bits PP, PPP, Q; /* add |p1| and |p2| */\ +\ + sqr_##field(PP, P); /* PP = P^2 */\ + mul_##field(PPP, PP, P); /* PPP = P*PP */\ + mul_##field(Q, U, PP); /* Q = U1*PP */\ + sqr_##field(p3->X, R); /* R^2 */\ + add_##field(P, Q, Q); \ + sub_##field(p3->X, p3->X, PPP); /* R^2-PPP */\ + sub_##field(p3->X, p3->X, P); /* X3 = R^2-PPP-2*Q */\ + sub_##field(Q, Q, p3->X); \ + mul_##field(Q, Q, R); /* R*(Q-X3) */\ + mul_##field(p3->Y, S, PPP); /* S1*PPP */\ + sub_##field(p3->Y, Q, p3->Y); /* Y3 = R*(Q-X3)-S1*PPP */\ + mul_##field(p3->ZZ, p1->ZZ, p2->ZZ); /* ZZ1*ZZ2 */\ + mul_##field(p3->ZZZ, p1->ZZZ, p2->ZZZ); /* ZZZ1*ZZZ2 */\ + mul_##field(p3->ZZ, p3->ZZ, PP); /* ZZ3 = ZZ1*ZZ2*PP */\ + mul_##field(p3->ZZZ, p3->ZZZ, PPP); /* ZZZ3 = ZZZ1*ZZZ2*PPP */\ + } else if (vec_is_zero(R, sizeof(R))) { /* X1==X2 && Y1==Y2 */\ + vec##bits V, W, M; /* double |p1| */\ +\ + add_##field(U, p1->Y, p1->Y); /* U = 2*Y1 */\ + sqr_##field(V, U); /* V = U^2 */\ + mul_##field(W, V, U); /* W = U*V */\ + mul_##field(S, p1->X, V); /* S = X1*V */\ + sqr_##field(M, p1->X); \ + mul_by_3_##field(M, M); /* M = 3*X1^2[+a*ZZ1^2] */\ + sqr_##field(p3->X, M); \ + add_##field(U, S, S); /* 2*S */\ + sub_##field(p3->X, p3->X, U); /* X3 = M^2-2*S */\ + mul_##field(p3->Y, W, p1->Y); /* W*Y1 */\ + sub_##field(S, S, p3->X); \ + mul_##field(S, S, M); /* M*(S-X3) */\ + sub_##field(p3->Y, S, p3->Y); /* Y3 = M*(S-X3)-W*Y1 */\ + mul_##field(p3->ZZ, p1->ZZ, V); /* ZZ3 = V*ZZ1 */\ + mul_##field(p3->ZZZ, p1->ZZZ, W); /* ZZ3 = W*ZZZ1 */\ + } else { /* X1==X2 && Y1==-Y2 */\ + vec_zero(p3->ZZZ, 2*sizeof(p3->ZZZ)); /* set |p3| to infinity */\ + } \ +} + +/* + * http://hyperelliptic.org/EFD/g1p/auto-shortw-xyzz.html#addition-madd-2008-s + * http://hyperelliptic.org/EFD/g1p/auto-shortw-xyzz.html#doubling-mdbl-2008-s-1 + * with twists to handle even subtractions and either input at infinity. + * Addition costs 8M+2S, while conditional doubling - 2M+4M+3S. + */ +#define POINTXYZZ_DADD_AFFINE_IMPL(ptype, bits, field, one) \ +static void ptype##xyzz_dadd_affine(ptype##xyzz *p3, const ptype##xyzz *p1, \ + const ptype##_affine *p2, \ + bool_t subtract) \ +{ \ + vec##bits P, R; \ +\ + if (vec_is_zero(p2, sizeof(*p2))) { \ + vec_copy(p3, p1, sizeof(*p3)); \ + return; \ + } else if (vec_is_zero(p1->ZZZ, 2*sizeof(p1->ZZZ))) { \ + vec_copy(p3->X, p2->X, 2*sizeof(p3->X));\ + cneg_##field(p3->ZZZ, one, subtract); \ + vec_copy(p3->ZZ, one, sizeof(p3->ZZ)); \ + return; \ + } \ +\ + mul_##field(P, p2->X, p1->ZZ); /* U2 = X2*ZZ1 */\ + mul_##field(R, p2->Y, p1->ZZZ); /* S2 = Y2*ZZZ1 */\ + cneg_##field(R, R, subtract); \ + sub_##field(P, P, p1->X); /* P = U2-X1 */\ + sub_##field(R, R, p1->Y); /* R = S2-Y1 */\ +\ + if (!vec_is_zero(P, sizeof(P))) { /* X1!=X2 */\ + vec##bits PP, PPP, Q; /* add |p2| to |p1| */\ +\ + sqr_##field(PP, P); /* PP = P^2 */\ + mul_##field(PPP, PP, P); /* PPP = P*PP */\ + mul_##field(Q, p1->X, PP); /* Q = X1*PP */\ + sqr_##field(p3->X, R); /* R^2 */\ + add_##field(P, Q, Q); \ + sub_##field(p3->X, p3->X, PPP); /* R^2-PPP */\ + sub_##field(p3->X, p3->X, P); /* X3 = R^2-PPP-2*Q */\ + sub_##field(Q, Q, p3->X); \ + mul_##field(Q, Q, R); /* R*(Q-X3) */\ + mul_##field(p3->Y, p1->Y, PPP); /* Y1*PPP */\ + sub_##field(p3->Y, Q, p3->Y); /* Y3 = R*(Q-X3)-Y1*PPP */\ + mul_##field(p3->ZZ, p1->ZZ, PP); /* ZZ3 = ZZ1*PP */\ + mul_##field(p3->ZZZ, p1->ZZZ, PPP); /* ZZZ3 = ZZZ1*PPP */\ + } else if (vec_is_zero(R, sizeof(R))) { /* X1==X2 && Y1==Y2 */\ + vec##bits U, S, M; /* double |p2| */\ +\ + add_##field(U, p2->Y, p2->Y); /* U = 2*Y1 */\ + sqr_##field(p3->ZZ, U); /* [ZZ3 =] V = U^2 */\ + mul_##field(p3->ZZZ, p3->ZZ, U); /* [ZZZ3 =] W = U*V */\ + mul_##field(S, p2->X, p3->ZZ); /* S = X1*V */\ + sqr_##field(M, p2->X); \ + mul_by_3_##field(M, M); /* M = 3*X1^2[+a] */\ + sqr_##field(p3->X, M); \ + add_##field(U, S, S); /* 2*S */\ + sub_##field(p3->X, p3->X, U); /* X3 = M^2-2*S */\ + mul_##field(p3->Y, p3->ZZZ, p2->Y); /* W*Y1 */\ + sub_##field(S, S, p3->X); \ + mul_##field(S, S, M); /* M*(S-X3) */\ + sub_##field(p3->Y, S, p3->Y); /* Y3 = M*(S-X3)-W*Y1 */\ + cneg_##field(p3->ZZZ, p3->ZZZ, subtract); \ + } else { /* X1==X2 && Y1==-Y2 */\ + vec_zero(p3->ZZZ, 2*sizeof(p3->ZZZ)); /* set |p3| to infinity */\ + } \ +} + +#define POINTXYZZ_TO_JACOBIAN_IMPL(ptype, bits, field) \ +static void ptype##xyzz_to_Jacobian(ptype *out, const ptype##xyzz *in) \ +{ \ + mul_##field(out->X, in->X, in->ZZ); \ + mul_##field(out->Y, in->Y, in->ZZZ); \ + vec_copy(out->Z, in->ZZ, sizeof(out->Z)); \ +} + +#define POINT_TO_XYZZ_IMPL(ptype, bits, field) \ +static void ptype##_to_xyzz(ptype##xyzz *out, const ptype *in) \ +{ \ + vec_copy(out->X, in->X, 2*sizeof(out->X)); \ + sqr_##field(out->ZZ, in->Z); \ + mul_##field(out->ZZZ, out->ZZ, in->Z); \ +} + +#endif diff --git a/crypto/blst_src/errors.h b/crypto/blst_src/errors.h new file mode 100644 index 00000000000..425daeb486f --- /dev/null +++ b/crypto/blst_src/errors.h @@ -0,0 +1,19 @@ +/* + * Copyright Supranational LLC + * Licensed under the Apache License, Version 2.0, see LICENSE for details. + * SPDX-License-Identifier: Apache-2.0 + */ +#ifndef __BLS12_381_ASM_ERRORS_H__ +#define __BLS12_381_ASM_ERRORS_H__ + +typedef enum { + BLST_SUCCESS = 0, + BLST_BAD_ENCODING, + BLST_POINT_NOT_ON_CURVE, + BLST_POINT_NOT_IN_GROUP, + BLST_AGGR_TYPE_MISMATCH, + BLST_VERIFY_FAIL, + BLST_PK_IS_INFINITY, +} BLST_ERROR; + +#endif diff --git a/crypto/blst_src/exp.c b/crypto/blst_src/exp.c new file mode 100644 index 00000000000..55c5c5a7875 --- /dev/null +++ b/crypto/blst_src/exp.c @@ -0,0 +1,55 @@ +/* + * Copyright Supranational LLC + * Licensed under the Apache License, Version 2.0, see LICENSE for details. + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "vect.h" + +/* + * |out| = |inp|^|pow|, small footprint, public exponent + */ +static void exp_mont_384(vec384 out, const vec384 inp, const byte *pow, + size_t pow_bits, const vec384 p, limb_t n0) +{ +#if 1 + vec384 ret; + + vec_copy(ret, inp, sizeof(ret)); /* ret = inp^1 */ + --pow_bits; /* most significant bit is set, skip over */ + while (pow_bits--) { + sqr_mont_384(ret, ret, p, n0); + if (is_bit_set(pow, pow_bits)) + mul_mont_384(ret, ret, inp, p, n0); + } + vec_copy(out, ret, sizeof(ret)); /* out = ret */ +#else + unsigned int i; + vec384 sqr; + + vec_copy(sqr, inp, sizeof(sqr)); + for (i = 0; !is_bit_set(pow, i++);) + sqr_mont_384(sqr, sqr, sqr, p, n0); + vec_copy(out, sqr, sizeof(sqr)); + for (; i < pow_bits; i++) { + sqr_mont_384(sqr, sqr, sqr, p, n0); + if (is_bit_set(pow, i)) + mul_mont_384(out, out, sqr, p, n0); + } +#endif +} + +static void exp_mont_384x(vec384x out, const vec384x inp, const byte *pow, + size_t pow_bits, const vec384 p, limb_t n0) +{ + vec384x ret; + + vec_copy(ret, inp, sizeof(ret)); /* |ret| = |inp|^1 */ + --pow_bits; /* most significant bit is accounted for, skip over */ + while (pow_bits--) { + sqr_mont_384x(ret, ret, p, n0); + if (is_bit_set(pow, pow_bits)) + mul_mont_384x(ret, ret, inp, p, n0); + } + vec_copy(out, ret, sizeof(ret)); /* |out| = |ret| */ +} diff --git a/crypto/blst_src/exports.c b/crypto/blst_src/exports.c new file mode 100644 index 00000000000..1ca4d4757fa --- /dev/null +++ b/crypto/blst_src/exports.c @@ -0,0 +1,583 @@ +/* + * Copyright Supranational LLC + * Licensed under the Apache License, Version 2.0, see LICENSE for details. + * SPDX-License-Identifier: Apache-2.0 + */ +/* + * Why this file? Overall goal is to ensure that all internal calls + * remain internal after linking application. This is to both + * + * a) minimize possibility of external name conflicts (since all + * non-blst-prefixed and [assembly subroutines] remain static); + * b) preclude possibility of unintentional internal reference + * overload in shared library context (one can achieve same + * effect with -Bsymbolic, but we don't want to rely on end-user + * to remember to use it); + */ + +#include "fields.h" +#include "bytes.h" + +/* + * BLS12-381-specific Fr shortcuts to assembly. + */ +void blst_fr_add(vec256 ret, const vec256 a, const vec256 b) +{ add_mod_256(ret, a, b, BLS12_381_r); } + +void blst_fr_sub(vec256 ret, const vec256 a, const vec256 b) +{ sub_mod_256(ret, a, b, BLS12_381_r); } + +void blst_fr_mul_by_3(vec256 ret, const vec256 a) +{ mul_by_3_mod_256(ret, a, BLS12_381_r); } + +void blst_fr_lshift(vec256 ret, const vec256 a, size_t count) +{ lshift_mod_256(ret, a, count, BLS12_381_r); } + +void blst_fr_rshift(vec256 ret, const vec256 a, size_t count) +{ rshift_mod_256(ret, a, count, BLS12_381_r); } + +void blst_fr_mul(vec256 ret, const vec256 a, const vec256 b) +{ mul_mont_sparse_256(ret, a, b, BLS12_381_r, r0); } + +void blst_fr_ct_bfly(vec256 x0, vec256 x1, const vec256 twiddle) +{ + vec256 x2; + + mul_mont_sparse_256(x2, x1, twiddle, BLS12_381_r, r0); + sub_mod_256(x1, x0, x2, BLS12_381_r); + add_mod_256(x0, x0, x2, BLS12_381_r); +} + +void blst_fr_gs_bfly(vec256 x0, vec256 x1, const vec256 twiddle) +{ + vec256 x2; + + sub_mod_256(x2, x0, x1, BLS12_381_r); + add_mod_256(x0, x0, x1, BLS12_381_r); + mul_mont_sparse_256(x1, x2, twiddle, BLS12_381_r, r0); +} + +void blst_fr_sqr(vec256 ret, const vec256 a) +{ sqr_mont_sparse_256(ret, a, BLS12_381_r, r0); } + +void blst_fr_cneg(vec256 ret, const vec256 a, int flag) +{ cneg_mod_256(ret, a, is_zero(flag) ^ 1, BLS12_381_r); } + +void blst_fr_to(vec256 ret, const vec256 a) +{ mul_mont_sparse_256(ret, a, BLS12_381_rRR, BLS12_381_r, r0); } + +void blst_fr_from(vec256 ret, const vec256 a) +{ from_mont_256(ret, a, BLS12_381_r, r0); } + +void blst_fr_from_scalar(vec256 ret, const pow256 a) +{ + const union { + long one; + char little; + } is_endian = { 1 }; + + if ((uptr_t)ret == (uptr_t)a && is_endian.little) { + mul_mont_sparse_256(ret, (const limb_t *)a, BLS12_381_rRR, + BLS12_381_r, r0); + } else { + vec256 out; + limbs_from_le_bytes(out, a, 32); + mul_mont_sparse_256(ret, out, BLS12_381_rRR, BLS12_381_r, r0); + vec_zero(out, sizeof(out)); + } +} + +void blst_scalar_from_fr(pow256 ret, const vec256 a) +{ + const union { + long one; + char little; + } is_endian = { 1 }; + + if ((uptr_t)ret == (uptr_t)a && is_endian.little) { + from_mont_256((limb_t *)ret, a, BLS12_381_r, r0); + } else { + vec256 out; + from_mont_256(out, a, BLS12_381_r, r0); + le_bytes_from_limbs(ret, out, 32); + vec_zero(out, sizeof(out)); + } +} + +int blst_scalar_fr_check(const pow256 a) +{ return (int)(check_mod_256(a, BLS12_381_r) | + bytes_are_zero(a, sizeof(pow256))); +} + +int blst_sk_check(const pow256 a) +{ return (int)check_mod_256(a, BLS12_381_r); } + +int blst_sk_add_n_check(pow256 ret, const pow256 a, const pow256 b) +{ return (int)add_n_check_mod_256(ret, a, b, BLS12_381_r); } + +int blst_sk_sub_n_check(pow256 ret, const pow256 a, const pow256 b) +{ return (int)sub_n_check_mod_256(ret, a, b, BLS12_381_r); } + +int blst_sk_mul_n_check(pow256 ret, const pow256 a, const pow256 b) +{ + vec256 t[2]; + const union { + long one; + char little; + } is_endian = { 1 }; + bool_t is_zero; + + if (((size_t)a|(size_t)b)%sizeof(limb_t) != 0 || !is_endian.little) { + limbs_from_le_bytes(t[0], a, sizeof(pow256)); + limbs_from_le_bytes(t[1], b, sizeof(pow256)); + a = (const byte *)t[0]; + b = (const byte *)t[1]; + } + mul_mont_sparse_256(t[0], BLS12_381_rRR, (const limb_t *)a, BLS12_381_r, r0); + mul_mont_sparse_256(t[0], t[0], (const limb_t *)b, BLS12_381_r, r0); + le_bytes_from_limbs(ret, t[0], sizeof(pow256)); + is_zero = vec_is_zero(t[0], sizeof(vec256)); + vec_zero(t, sizeof(t)); + + return (int)(is_zero^1); +} + +void blst_sk_inverse(pow256 ret, const pow256 a) +{ + const union { + long one; + char little; + } is_endian = { 1 }; + + if (((size_t)a|(size_t)ret)%sizeof(limb_t) == 0 && is_endian.little) { + limb_t *out = (limb_t *)ret; + mul_mont_sparse_256(out, (const limb_t *)a, BLS12_381_rRR, + BLS12_381_r, r0); + reciprocal_fr(out, out); + from_mont_256(out, out, BLS12_381_r, r0); + } else { + vec256 out; + limbs_from_le_bytes(out, a, 32); + mul_mont_sparse_256(out, out, BLS12_381_rRR, BLS12_381_r, r0); + reciprocal_fr(out, out); + from_mont_256(out, out, BLS12_381_r, r0); + le_bytes_from_limbs(ret, out, 32); + vec_zero(out, sizeof(out)); + } +} + +/* + * BLS12-381-specific Fp shortcuts to assembly. + */ +void blst_fp_add(vec384 ret, const vec384 a, const vec384 b) +{ add_fp(ret, a, b); } + +void blst_fp_sub(vec384 ret, const vec384 a, const vec384 b) +{ sub_fp(ret, a, b); } + +void blst_fp_mul_by_3(vec384 ret, const vec384 a) +{ mul_by_3_fp(ret, a); } + +void blst_fp_mul_by_8(vec384 ret, const vec384 a) +{ mul_by_8_fp(ret, a); } + +void blst_fp_lshift(vec384 ret, const vec384 a, size_t count) +{ lshift_fp(ret, a, count); } + +void blst_fp_mul(vec384 ret, const vec384 a, const vec384 b) +{ mul_fp(ret, a, b); } + +void blst_fp_sqr(vec384 ret, const vec384 a) +{ sqr_fp(ret, a); } + +void blst_fp_cneg(vec384 ret, const vec384 a, int flag) +{ cneg_fp(ret, a, is_zero(flag) ^ 1); } + +void blst_fp_to(vec384 ret, const vec384 a) +{ mul_fp(ret, a, BLS12_381_RR); } + +void blst_fp_from(vec384 ret, const vec384 a) +{ from_fp(ret, a); } + +/* + * Fp serialization/deserialization. + */ +void blst_fp_from_uint32(vec384 ret, const unsigned int a[12]) +{ + if (sizeof(limb_t) == 8) { + int i; + for (i = 0; i < 6; i++) + ret[i] = a[2*i] | ((limb_t)a[2*i+1] << (32 & (8*sizeof(limb_t)-1))); + a = (const unsigned int *)ret; + } + mul_fp(ret, (const limb_t *)a, BLS12_381_RR); +} + +void blst_uint32_from_fp(unsigned int ret[12], const vec384 a) +{ + if (sizeof(limb_t) == 4) { + from_fp((limb_t *)ret, a); + } else { + vec384 out; + int i; + + from_fp(out, a); + for (i = 0; i < 6; i++) { + limb_t limb = out[i]; + ret[2*i] = (unsigned int)limb; + ret[2*i+1] = (unsigned int)(limb >> (32 & (8*sizeof(limb_t)-1))); + } + } +} + +void blst_fp_from_uint64(vec384 ret, const unsigned long long a[6]) +{ + const union { + long one; + char little; + } is_endian = { 1 }; + + if (sizeof(limb_t) == 4 && !is_endian.little) { + int i; + for (i = 0; i < 6; i++) { + unsigned long long limb = a[i]; + ret[2*i] = (limb_t)limb; + ret[2*i+1] = (limb_t)(limb >> 32); + } + a = (const unsigned long long *)ret; + } + mul_fp(ret, (const limb_t *)a, BLS12_381_RR); +} + +void blst_uint64_from_fp(unsigned long long ret[6], const vec384 a) +{ + const union { + long one; + char little; + } is_endian = { 1 }; + + if (sizeof(limb_t) == 8 || is_endian.little) { + from_fp((limb_t *)ret, a); + } else { + vec384 out; + int i; + + from_fp(out, a); + for (i = 0; i < 6; i++) + ret[i] = out[2*i] | ((unsigned long long)out[2*i+1] << 32); + } +} + +void blst_fp_from_bendian(vec384 ret, const unsigned char a[48]) +{ + vec384 out; + + limbs_from_be_bytes(out, a, sizeof(vec384)); + mul_fp(ret, out, BLS12_381_RR); +} + +void blst_bendian_from_fp(unsigned char ret[48], const vec384 a) +{ + vec384 out; + + from_fp(out, a); + be_bytes_from_limbs(ret, out, sizeof(vec384)); +} + +void blst_fp_from_lendian(vec384 ret, const unsigned char a[48]) +{ + vec384 out; + + limbs_from_le_bytes(out, a, sizeof(vec384)); + mul_fp(ret, out, BLS12_381_RR); +} + +void blst_lendian_from_fp(unsigned char ret[48], const vec384 a) +{ + vec384 out; + + from_fp(out, a); + le_bytes_from_limbs(ret, out, sizeof(vec384)); +} + +/* + * BLS12-381-specific Fp2 shortcuts to assembly. + */ +void blst_fp2_add(vec384x ret, const vec384x a, const vec384x b) +{ add_fp2(ret, a, b); } + +void blst_fp2_sub(vec384x ret, const vec384x a, const vec384x b) +{ sub_fp2(ret, a, b); } + +void blst_fp2_mul_by_3(vec384x ret, const vec384x a) +{ mul_by_3_fp2(ret, a); } + +void blst_fp2_mul_by_8(vec384x ret, const vec384x a) +{ mul_by_8_fp2(ret, a); } + +void blst_fp2_lshift(vec384x ret, const vec384x a, size_t count) +{ lshift_fp2(ret, a, count); } + +void blst_fp2_mul(vec384x ret, const vec384x a, const vec384x b) +{ mul_fp2(ret, a, b); } + +void blst_fp2_sqr(vec384x ret, const vec384x a) +{ sqr_fp2(ret, a); } + +void blst_fp2_cneg(vec384x ret, const vec384x a, int flag) +{ cneg_fp2(ret, a, is_zero(flag) ^ 1); } + +/* + * Scalar serialization/deserialization. + */ +void blst_scalar_from_uint32(pow256 ret, const unsigned int a[8]) +{ + const union { + long one; + char little; + } is_endian = { 1 }; + size_t i; + + if ((uptr_t)ret==(uptr_t)a && is_endian.little) + return; + + for(i = 0; i < 8; i++) { + unsigned int w = a[i]; + *ret++ = (byte)w; + *ret++ = (byte)(w >> 8); + *ret++ = (byte)(w >> 16); + *ret++ = (byte)(w >> 24); + } +} + +void blst_uint32_from_scalar(unsigned int ret[8], const pow256 a) +{ + const union { + long one; + char little; + } is_endian = { 1 }; + size_t i; + + if ((uptr_t)ret==(uptr_t)a && is_endian.little) + return; + + for(i = 0; i < 8; i++) { + unsigned int w = (unsigned int)(*a++); + w |= (unsigned int)(*a++) << 8; + w |= (unsigned int)(*a++) << 16; + w |= (unsigned int)(*a++) << 24; + ret[i] = w; + } +} + +void blst_scalar_from_uint64(pow256 ret, const unsigned long long a[4]) +{ + const union { + long one; + char little; + } is_endian = { 1 }; + size_t i; + + if ((uptr_t)ret==(uptr_t)a && is_endian.little) + return; + + for(i = 0; i < 4; i++) { + unsigned long long w = a[i]; + *ret++ = (byte)w; + *ret++ = (byte)(w >> 8); + *ret++ = (byte)(w >> 16); + *ret++ = (byte)(w >> 24); + *ret++ = (byte)(w >> 32); + *ret++ = (byte)(w >> 40); + *ret++ = (byte)(w >> 48); + *ret++ = (byte)(w >> 56); + } +} + +void blst_uint64_from_scalar(unsigned long long ret[4], const pow256 a) +{ + const union { + long one; + char little; + } is_endian = { 1 }; + size_t i; + + if ((uptr_t)ret==(uptr_t)a && is_endian.little) + return; + + for(i = 0; i < 4; i++) { + unsigned long long w = (unsigned long long)(*a++); + w |= (unsigned long long)(*a++) << 8; + w |= (unsigned long long)(*a++) << 16; + w |= (unsigned long long)(*a++) << 24; + w |= (unsigned long long)(*a++) << 32; + w |= (unsigned long long)(*a++) << 40; + w |= (unsigned long long)(*a++) << 48; + w |= (unsigned long long)(*a++) << 56; + ret[i] = w; + } +} + +void blst_scalar_from_bendian(pow256 ret, const unsigned char a[32]) +{ + vec256 out; + limbs_from_be_bytes(out, a, sizeof(out)); + le_bytes_from_limbs(ret, out, sizeof(out)); + vec_zero(out, sizeof(out)); +} + +void blst_bendian_from_scalar(unsigned char ret[32], const pow256 a) +{ + vec256 out; + limbs_from_le_bytes(out, a, sizeof(out)); + be_bytes_from_limbs(ret, out, sizeof(out)); + vec_zero(out, sizeof(out)); +} + +void blst_scalar_from_lendian(pow256 ret, const unsigned char a[32]) +{ + size_t i; + + if ((uptr_t)ret==(uptr_t)a) + return; + + for (i = 0; i < 32; i++) + ret[i] = a[i]; +} + +void blst_lendian_from_scalar(unsigned char ret[32], const pow256 a) +{ + size_t i; + + if ((uptr_t)ret==(uptr_t)a) + return; + + for (i = 0; i < 32; i++) + ret[i] = a[i]; +} + +void blst_fr_from_uint64(vec256 ret, const unsigned long long a[4]) +{ + const union { + long one; + char little; + } is_endian = { 1 }; + + if (sizeof(limb_t) == 4 && !is_endian.little) { + int i; + for (i = 0; i < 4; i++) { + unsigned long long limb = a[i]; + ret[2*i] = (limb_t)limb; + ret[2*i+1] = (limb_t)(limb >> 32); + } + a = (const unsigned long long *)ret; + } + mul_mont_sparse_256(ret, (const limb_t *)a, BLS12_381_rRR, BLS12_381_r, r0); +} + +void blst_uint64_from_fr(unsigned long long ret[4], const vec256 a) +{ + const union { + long one; + char little; + } is_endian = { 1 }; + + if (sizeof(limb_t) == 8 || is_endian.little) { + from_mont_256((limb_t *)ret, a, BLS12_381_r, r0); + } else { + vec256 out; + int i; + + from_mont_256(out, a, BLS12_381_r, r0); + for (i = 0; i < 4; i++) + ret[i] = out[2*i] | ((unsigned long long)out[2*i+1] << 32); + vec_zero(out, sizeof(out)); + } +} + +int blst_scalar_from_le_bytes(pow256 out, const unsigned char *bytes, size_t n) +{ + size_t rem = (n - 1) % 32 + 1; + struct { vec256 out, digit; } t; + limb_t ret; + + vec_zero(t.out, sizeof(t.out)); + + n -= rem; + limbs_from_le_bytes(t.out, bytes += n, rem); + mul_mont_sparse_256(t.out, BLS12_381_rRR, t.out, BLS12_381_r, r0); + + while (n) { + limbs_from_le_bytes(t.digit, bytes -= 32, 32); + add_mod_256(t.out, t.out, t.digit, BLS12_381_r); + mul_mont_sparse_256(t.out, BLS12_381_rRR, t.out, BLS12_381_r, r0); + n -= 32; + } + + from_mont_256(t.out, t.out, BLS12_381_r, r0); + + ret = vec_is_zero(t.out, sizeof(t.out)); + le_bytes_from_limbs(out, t.out, 32); + vec_zero(&t, sizeof(t)); + + return (int)(ret^1); +} + +int blst_scalar_from_be_bytes(pow256 out, const unsigned char *bytes, size_t n) +{ + size_t rem = (n - 1) % 32 + 1; + struct { vec256 out, digit; } t; + limb_t ret; + + vec_zero(t.out, sizeof(t.out)); + + limbs_from_be_bytes(t.out, bytes, rem); + mul_mont_sparse_256(t.out, BLS12_381_rRR, t.out, BLS12_381_r, r0); + + while (n -= rem) { + limbs_from_be_bytes(t.digit, bytes += rem, 32); + add_mod_256(t.out, t.out, t.digit, BLS12_381_r); + mul_mont_sparse_256(t.out, BLS12_381_rRR, t.out, BLS12_381_r, r0); + rem = 32; + } + + from_mont_256(t.out, t.out, BLS12_381_r, r0); + + ret = vec_is_zero(t.out, sizeof(t.out)); + le_bytes_from_limbs(out, t.out, 32); + vec_zero(&t, sizeof(t)); + + return (int)(ret^1); +} + +/* + * Single-short SHA-256 hash function. + */ +#include "sha256.h" + +void blst_sha256(unsigned char md[32], const void *msg, size_t len) +{ + SHA256_CTX ctx; + + sha256_init(&ctx); + sha256_update(&ctx, msg, len); + sha256_final(md, &ctx); +} + +/* + * Test facilitator. + */ +void blst_scalar_from_hexascii(pow256 ret, const char *hex) +{ bytes_from_hexascii(ret, sizeof(pow256), hex); } + +void blst_fr_from_hexascii(vec256 ret, const char *hex) +{ + limbs_from_hexascii(ret, sizeof(vec256), hex); + mul_mont_sparse_256(ret, ret, BLS12_381_rRR, BLS12_381_r, r0); +} + +void blst_fp_from_hexascii(vec384 ret, const char *hex) +{ + limbs_from_hexascii(ret, sizeof(vec384), hex); + mul_fp(ret, ret, BLS12_381_RR); +} diff --git a/crypto/blst_src/fields.h b/crypto/blst_src/fields.h new file mode 100644 index 00000000000..4b2323d2cce --- /dev/null +++ b/crypto/blst_src/fields.h @@ -0,0 +1,116 @@ +/* + * Copyright Supranational LLC + * Licensed under the Apache License, Version 2.0, see LICENSE for details. + * SPDX-License-Identifier: Apache-2.0 + */ +#ifndef __BLS12_381_ASM_FIELDS_H__ +#define __BLS12_381_ASM_FIELDS_H__ + +#include "vect.h" +#include "consts.h" + +/* + * BLS12-381-specific Fp shortcuts to assembly. + */ +static inline void add_fp(vec384 ret, const vec384 a, const vec384 b) +{ add_mod_384(ret, a, b, BLS12_381_P); } + +static inline void sub_fp(vec384 ret, const vec384 a, const vec384 b) +{ sub_mod_384(ret, a, b, BLS12_381_P); } + +static inline void mul_by_3_fp(vec384 ret, const vec384 a) +{ mul_by_3_mod_384(ret, a, BLS12_381_P); } + +static inline void mul_by_8_fp(vec384 ret, const vec384 a) +{ mul_by_8_mod_384(ret, a, BLS12_381_P); } + +static inline void lshift_fp(vec384 ret, const vec384 a, size_t count) +{ lshift_mod_384(ret, a, count, BLS12_381_P); } + +static inline void rshift_fp(vec384 ret, const vec384 a, size_t count) +{ rshift_mod_384(ret, a, count, BLS12_381_P); } + +static inline void div_by_2_fp(vec384 ret, const vec384 a) +{ div_by_2_mod_384(ret, a, BLS12_381_P); } + +static inline void mul_fp(vec384 ret, const vec384 a, const vec384 b) +{ mul_mont_384(ret, a, b, BLS12_381_P, p0); } + +static inline void sqr_fp(vec384 ret, const vec384 a) +{ sqr_mont_384(ret, a, BLS12_381_P, p0); } + +static inline void cneg_fp(vec384 ret, const vec384 a, bool_t flag) +{ cneg_mod_384(ret, a, flag, BLS12_381_P); } + +static inline void from_fp(vec384 ret, const vec384 a) +{ from_mont_384(ret, a, BLS12_381_P, p0); } + +static inline void redc_fp(vec384 ret, const vec768 a) +{ redc_mont_384(ret, a, BLS12_381_P, p0); } + +/* + * BLS12-381-specific Fp2 shortcuts to assembly. + */ +static inline void add_fp2(vec384x ret, const vec384x a, const vec384x b) +{ add_mod_384x(ret, a, b, BLS12_381_P); } + +static inline void sub_fp2(vec384x ret, const vec384x a, const vec384x b) +{ sub_mod_384x(ret, a, b, BLS12_381_P); } + +static inline void mul_by_3_fp2(vec384x ret, const vec384x a) +{ mul_by_3_mod_384x(ret, a, BLS12_381_P); } + +static inline void mul_by_8_fp2(vec384x ret, const vec384x a) +{ mul_by_8_mod_384x(ret, a, BLS12_381_P); } + +static inline void lshift_fp2(vec384x ret, const vec384x a, size_t count) +{ + lshift_mod_384(ret[0], a[0], count, BLS12_381_P); + lshift_mod_384(ret[1], a[1], count, BLS12_381_P); +} + +static inline void mul_fp2(vec384x ret, const vec384x a, const vec384x b) +{ mul_mont_384x(ret, a, b, BLS12_381_P, p0); } + +static inline void sqr_fp2(vec384x ret, const vec384x a) +{ sqr_mont_384x(ret, a, BLS12_381_P, p0); } + +static inline void cneg_fp2(vec384x ret, const vec384x a, bool_t flag) +{ + cneg_mod_384(ret[0], a[0], flag, BLS12_381_P); + cneg_mod_384(ret[1], a[1], flag, BLS12_381_P); +} + +#define vec_load_global vec_copy + +static void reciprocal_fp(vec384 out, const vec384 inp); +static void flt_reciprocal_fp(vec384 out, const vec384 inp); +static bool_t recip_sqrt_fp(vec384 out, const vec384 inp); +static bool_t sqrt_fp(vec384 out, const vec384 inp); + +static void reciprocal_fp2(vec384x out, const vec384x inp); +static void flt_reciprocal_fp2(vec384x out, const vec384x inp); +static bool_t recip_sqrt_fp2(vec384x out, const vec384x inp, + const vec384x recip_ZZZ, const vec384x magic_ZZZ); +static bool_t sqrt_fp2(vec384x out, const vec384x inp); +static bool_t sqrt_align_fp2(vec384x out, const vec384x ret, + const vec384x sqrt, const vec384x inp); + +typedef vec384x vec384fp2; +typedef vec384fp2 vec384fp6[3]; +typedef vec384fp6 vec384fp12[2]; + +static void sqr_fp12(vec384fp12 ret, const vec384fp12 a); +static void cyclotomic_sqr_fp12(vec384fp12 ret, const vec384fp12 a); +static void mul_fp12(vec384fp12 ret, const vec384fp12 a, const vec384fp12 b); +static void mul_by_xy00z0_fp12(vec384fp12 ret, const vec384fp12 a, + const vec384fp6 xy00z0); +static void conjugate_fp12(vec384fp12 a); +static void inverse_fp12(vec384fp12 ret, const vec384fp12 a); +/* caveat lector! |n| has to be non-zero and not more than 3! */ +static void frobenius_map_fp12(vec384fp12 ret, const vec384fp12 a, size_t n); + +#define neg_fp(r,a) cneg_fp((r),(a),1) +#define neg_fp2(r,a) cneg_fp2((r),(a),1) + +#endif /* __BLS12_381_ASM_FIELDS_H__ */ diff --git a/crypto/blst_src/fp12_tower.c b/crypto/blst_src/fp12_tower.c new file mode 100644 index 00000000000..d6c0b124eb6 --- /dev/null +++ b/crypto/blst_src/fp12_tower.c @@ -0,0 +1,789 @@ +/* + * Copyright Supranational LLC + * Licensed under the Apache License, Version 2.0, see LICENSE for details. + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "fields.h" + +/* + * Fp2 = Fp[u] / (u^2 + 1) + * Fp6 = Fp2[v] / (v^3 - u - 1) + * Fp12 = Fp6[w] / (w^2 - v) + */ + +static inline void mul_by_u_plus_1_fp2(vec384x ret, const vec384x a) +{ mul_by_1_plus_i_mod_384x(ret, a, BLS12_381_P); } + +#if 1 && !defined(__BLST_NO_ASM__) +#define __FP2x2__ +/* + * Fp2x2 is a "widened" version of Fp2, which allows to consolidate + * reductions from several multiplications. In other words instead of + * "mul_redc-mul_redc-add" we get "mul-mul-add-redc," where latter + * addition is double-width... To be more specific this gives ~7-10% + * faster pairing depending on platform... + */ +typedef vec768 vec768x[2]; + +static inline void add_fp2x2(vec768x ret, const vec768x a, const vec768x b) +{ + add_mod_384x384(ret[0], a[0], b[0], BLS12_381_P); + add_mod_384x384(ret[1], a[1], b[1], BLS12_381_P); +} + +static inline void sub_fp2x2(vec768x ret, const vec768x a, const vec768x b) +{ + sub_mod_384x384(ret[0], a[0], b[0], BLS12_381_P); + sub_mod_384x384(ret[1], a[1], b[1], BLS12_381_P); +} + +static inline void mul_by_u_plus_1_fp2x2(vec768x ret, const vec768x a) +{ + /* caveat lector! |ret| may not be same as |a| */ + sub_mod_384x384(ret[0], a[0], a[1], BLS12_381_P); + add_mod_384x384(ret[1], a[0], a[1], BLS12_381_P); +} + +static inline void redc_fp2x2(vec384x ret, const vec768x a) +{ + redc_mont_384(ret[0], a[0], BLS12_381_P, p0); + redc_mont_384(ret[1], a[1], BLS12_381_P, p0); +} + +static void mul_fp2x2(vec768x ret, const vec384x a, const vec384x b) +{ +#if 1 + mul_382x(ret, a, b, BLS12_381_P); /* +~6% in Miller loop */ +#else + union { vec384 x[2]; vec768 x2; } t; + + add_mod_384(t.x[0], a[0], a[1], BLS12_381_P); + add_mod_384(t.x[1], b[0], b[1], BLS12_381_P); + mul_384(ret[1], t.x[0], t.x[1]); + + mul_384(ret[0], a[0], b[0]); + mul_384(t.x2, a[1], b[1]); + + sub_mod_384x384(ret[1], ret[1], ret[0], BLS12_381_P); + sub_mod_384x384(ret[1], ret[1], t.x2, BLS12_381_P); + + sub_mod_384x384(ret[0], ret[0], t.x2, BLS12_381_P); +#endif +} + +static void sqr_fp2x2(vec768x ret, const vec384x a) +{ +#if 1 + sqr_382x(ret, a, BLS12_381_P); /* +~5% in final exponentiation */ +#else + vec384 t0, t1; + + add_mod_384(t0, a[0], a[1], BLS12_381_P); + sub_mod_384(t1, a[0], a[1], BLS12_381_P); + + mul_384(ret[1], a[0], a[1]); + add_mod_384x384(ret[1], ret[1], ret[1], BLS12_381_P); + + mul_384(ret[0], t0, t1); +#endif +} +#endif /* __FP2x2__ */ + +/* + * Fp6 extension + */ +#if defined(__FP2x2__) /* ~10-13% improvement for mul_fp12 and sqr_fp12 */ +typedef vec768x vec768fp6[3]; + +static inline void sub_fp6x2(vec768fp6 ret, const vec768fp6 a, + const vec768fp6 b) +{ + sub_fp2x2(ret[0], a[0], b[0]); + sub_fp2x2(ret[1], a[1], b[1]); + sub_fp2x2(ret[2], a[2], b[2]); +} + +static void mul_fp6x2(vec768fp6 ret, const vec384fp6 a, const vec384fp6 b) +{ + vec768x t0, t1, t2; + vec384x aa, bb; + + mul_fp2x2(t0, a[0], b[0]); + mul_fp2x2(t1, a[1], b[1]); + mul_fp2x2(t2, a[2], b[2]); + + /* ret[0] = ((a1 + a2)*(b1 + b2) - a1*b1 - a2*b2)*(u+1) + a0*b0 + = (a1*b2 + a2*b1)*(u+1) + a0*b0 */ + add_fp2(aa, a[1], a[2]); + add_fp2(bb, b[1], b[2]); + mul_fp2x2(ret[0], aa, bb); + sub_fp2x2(ret[0], ret[0], t1); + sub_fp2x2(ret[0], ret[0], t2); + mul_by_u_plus_1_fp2x2(ret[1], ret[0]); /* borrow ret[1] for a moment */ + add_fp2x2(ret[0], ret[1], t0); + + /* ret[1] = (a0 + a1)*(b0 + b1) - a0*b0 - a1*b1 + a2*b2*(u+1) + = a0*b1 + a1*b0 + a2*b2*(u+1) */ + add_fp2(aa, a[0], a[1]); + add_fp2(bb, b[0], b[1]); + mul_fp2x2(ret[1], aa, bb); + sub_fp2x2(ret[1], ret[1], t0); + sub_fp2x2(ret[1], ret[1], t1); + mul_by_u_plus_1_fp2x2(ret[2], t2); /* borrow ret[2] for a moment */ + add_fp2x2(ret[1], ret[1], ret[2]); + + /* ret[2] = (a0 + a2)*(b0 + b2) - a0*b0 - a2*b2 + a1*b1 + = a0*b2 + a2*b0 + a1*b1 */ + add_fp2(aa, a[0], a[2]); + add_fp2(bb, b[0], b[2]); + mul_fp2x2(ret[2], aa, bb); + sub_fp2x2(ret[2], ret[2], t0); + sub_fp2x2(ret[2], ret[2], t2); + add_fp2x2(ret[2], ret[2], t1); +} + +static inline void redc_fp6x2(vec384fp6 ret, const vec768fp6 a) +{ + redc_fp2x2(ret[0], a[0]); + redc_fp2x2(ret[1], a[1]); + redc_fp2x2(ret[2], a[2]); +} + +static void mul_fp6(vec384fp6 ret, const vec384fp6 a, const vec384fp6 b) +{ + vec768fp6 r; + + mul_fp6x2(r, a, b); + redc_fp6x2(ret, r); /* narrow to normal width */ +} + +static void sqr_fp6(vec384fp6 ret, const vec384fp6 a) +{ + vec768x s0, m01, m12, s2, rx; + + sqr_fp2x2(s0, a[0]); + + mul_fp2x2(m01, a[0], a[1]); + add_fp2x2(m01, m01, m01); + + mul_fp2x2(m12, a[1], a[2]); + add_fp2x2(m12, m12, m12); + + sqr_fp2x2(s2, a[2]); + + /* ret[2] = (a0 + a1 + a2)^2 - a0^2 - a2^2 - 2*(a0*a1) - 2*(a1*a2) + = a1^2 + 2*(a0*a2) */ + add_fp2(ret[2], a[2], a[1]); + add_fp2(ret[2], ret[2], a[0]); + sqr_fp2x2(rx, ret[2]); + sub_fp2x2(rx, rx, s0); + sub_fp2x2(rx, rx, s2); + sub_fp2x2(rx, rx, m01); + sub_fp2x2(rx, rx, m12); + redc_fp2x2(ret[2], rx); + + /* ret[0] = a0^2 + 2*(a1*a2)*(u+1) */ + mul_by_u_plus_1_fp2x2(rx, m12); + add_fp2x2(rx, rx, s0); + redc_fp2x2(ret[0], rx); + + /* ret[1] = a2^2*(u+1) + 2*(a0*a1) */ + mul_by_u_plus_1_fp2x2(rx, s2); + add_fp2x2(rx, rx, m01); + redc_fp2x2(ret[1], rx); +} +#else +static void mul_fp6(vec384fp6 ret, const vec384fp6 a, const vec384fp6 b) +{ + vec384x t0, t1, t2, t3, t4, t5; + + mul_fp2(t0, a[0], b[0]); + mul_fp2(t1, a[1], b[1]); + mul_fp2(t2, a[2], b[2]); + + /* ret[0] = ((a1 + a2)*(b1 + b2) - a1*b1 - a2*b2)*(u+1) + a0*b0 + = (a1*b2 + a2*b1)*(u+1) + a0*b0 */ + add_fp2(t4, a[1], a[2]); + add_fp2(t5, b[1], b[2]); + mul_fp2(t3, t4, t5); + sub_fp2(t3, t3, t1); + sub_fp2(t3, t3, t2); + mul_by_u_plus_1_fp2(t3, t3); + /* add_fp2(ret[0], t3, t0); considering possible aliasing... */ + + /* ret[1] = (a0 + a1)*(b0 + b1) - a0*b0 - a1*b1 + a2*b2*(u+1) + = a0*b1 + a1*b0 + a2*b2*(u+1) */ + add_fp2(t4, a[0], a[1]); + add_fp2(t5, b[0], b[1]); + mul_fp2(ret[1], t4, t5); + sub_fp2(ret[1], ret[1], t0); + sub_fp2(ret[1], ret[1], t1); + mul_by_u_plus_1_fp2(t4, t2); + add_fp2(ret[1], ret[1], t4); + + /* ret[2] = (a0 + a2)*(b0 + b2) - a0*b0 - a2*b2 + a1*b1 + = a0*b2 + a2*b0 + a1*b1 */ + add_fp2(t4, a[0], a[2]); + add_fp2(t5, b[0], b[2]); + mul_fp2(ret[2], t4, t5); + sub_fp2(ret[2], ret[2], t0); + sub_fp2(ret[2], ret[2], t2); + add_fp2(ret[2], ret[2], t1); + + add_fp2(ret[0], t3, t0); /* ... moved from above */ +} + +static void sqr_fp6(vec384fp6 ret, const vec384fp6 a) +{ + vec384x s0, m01, m12, s2; + + sqr_fp2(s0, a[0]); + + mul_fp2(m01, a[0], a[1]); + add_fp2(m01, m01, m01); + + mul_fp2(m12, a[1], a[2]); + add_fp2(m12, m12, m12); + + sqr_fp2(s2, a[2]); + + /* ret[2] = (a0 + a1 + a2)^2 - a0^2 - a2^2 - 2*(a0*a1) - 2*(a1*a2) + = a1^2 + 2*(a0*a2) */ + add_fp2(ret[2], a[2], a[1]); + add_fp2(ret[2], ret[2], a[0]); + sqr_fp2(ret[2], ret[2]); + sub_fp2(ret[2], ret[2], s0); + sub_fp2(ret[2], ret[2], s2); + sub_fp2(ret[2], ret[2], m01); + sub_fp2(ret[2], ret[2], m12); + + /* ret[0] = a0^2 + 2*(a1*a2)*(u+1) */ + mul_by_u_plus_1_fp2(ret[0], m12); + add_fp2(ret[0], ret[0], s0); + + /* ret[1] = a2^2*(u+1) + 2*(a0*a1) */ + mul_by_u_plus_1_fp2(ret[1], s2); + add_fp2(ret[1], ret[1], m01); +} +#endif + +static void add_fp6(vec384fp6 ret, const vec384fp6 a, const vec384fp6 b) +{ + add_fp2(ret[0], a[0], b[0]); + add_fp2(ret[1], a[1], b[1]); + add_fp2(ret[2], a[2], b[2]); +} + +static void sub_fp6(vec384fp6 ret, const vec384fp6 a, const vec384fp6 b) +{ + sub_fp2(ret[0], a[0], b[0]); + sub_fp2(ret[1], a[1], b[1]); + sub_fp2(ret[2], a[2], b[2]); +} + +static void neg_fp6(vec384fp6 ret, const vec384fp6 a) +{ + neg_fp2(ret[0], a[0]); + neg_fp2(ret[1], a[1]); + neg_fp2(ret[2], a[2]); +} + +#if 0 +#define mul_by_v_fp6 mul_by_v_fp6 +static void mul_by_v_fp6(vec384fp6 ret, const vec384fp6 a) +{ + vec384x t; + + mul_by_u_plus_1_fp2(t, a[2]); + vec_copy(ret[2], a[1], sizeof(a[1])); + vec_copy(ret[1], a[0], sizeof(a[0])); + vec_copy(ret[0], t, sizeof(t)); +} +#endif + +/* + * Fp12 extension + */ +#if defined(__FP2x2__) +static void mul_fp12(vec384fp12 ret, const vec384fp12 a, const vec384fp12 b) +{ + vec768fp6 t0, t1, rx; + vec384fp6 t2; + + mul_fp6x2(t0, a[0], b[0]); + mul_fp6x2(t1, a[1], b[1]); + + /* ret[1] = (a0 + a1)*(b0 + b1) - a0*b0 - a1*b1 + = a0*b1 + a1*b0 */ + add_fp6(t2, a[0], a[1]); + add_fp6(ret[1], b[0], b[1]); + mul_fp6x2(rx, ret[1], t2); + sub_fp6x2(rx, rx, t0); + sub_fp6x2(rx, rx, t1); + redc_fp6x2(ret[1], rx); + + /* ret[0] = a0*b0 + a1*b1*v */ + mul_by_u_plus_1_fp2x2(rx[0], t1[2]); + add_fp2x2(rx[0], t0[0], rx[0]); + add_fp2x2(rx[1], t0[1], t1[0]); + add_fp2x2(rx[2], t0[2], t1[1]); + redc_fp6x2(ret[0], rx); +} + +static inline void mul_by_0y0_fp6x2(vec768fp6 ret, const vec384fp6 a, + const vec384fp2 b) +{ + mul_fp2x2(ret[1], a[2], b); /* borrow ret[1] for a moment */ + mul_by_u_plus_1_fp2x2(ret[0], ret[1]); + mul_fp2x2(ret[1], a[0], b); + mul_fp2x2(ret[2], a[1], b); +} + +static void mul_by_xy0_fp6x2(vec768fp6 ret, const vec384fp6 a, + const vec384fp6 b) +{ + vec768x t0, t1; + vec384x aa, bb; + + mul_fp2x2(t0, a[0], b[0]); + mul_fp2x2(t1, a[1], b[1]); + + /* ret[0] = ((a1 + a2)*(b1 + 0) - a1*b1 - a2*0)*(u+1) + a0*b0 + = (a1*0 + a2*b1)*(u+1) + a0*b0 */ + mul_fp2x2(ret[1], a[2], b[1]); /* borrow ret[1] for a moment */ + mul_by_u_plus_1_fp2x2(ret[0], ret[1]); + add_fp2x2(ret[0], ret[0], t0); + + /* ret[1] = (a0 + a1)*(b0 + b1) - a0*b0 - a1*b1 + a2*0*(u+1) + = a0*b1 + a1*b0 + a2*0*(u+1) */ + add_fp2(aa, a[0], a[1]); + add_fp2(bb, b[0], b[1]); + mul_fp2x2(ret[1], aa, bb); + sub_fp2x2(ret[1], ret[1], t0); + sub_fp2x2(ret[1], ret[1], t1); + + /* ret[2] = (a0 + a2)*(b0 + 0) - a0*b0 - a2*0 + a1*b1 + = a0*0 + a2*b0 + a1*b1 */ + mul_fp2x2(ret[2], a[2], b[0]); + add_fp2x2(ret[2], ret[2], t1); +} + +static void mul_by_xy00z0_fp12(vec384fp12 ret, const vec384fp12 a, + const vec384fp6 xy00z0) +{ + vec768fp6 t0, t1, rr; + vec384fp6 t2; + + mul_by_xy0_fp6x2(t0, a[0], xy00z0); + mul_by_0y0_fp6x2(t1, a[1], xy00z0[2]); + + /* ret[1] = (a0 + a1)*(b0 + b1) - a0*b0 - a1*b1 + = a0*b1 + a1*b0 */ + vec_copy(t2[0], xy00z0[0], sizeof(t2[0])); + add_fp2(t2[1], xy00z0[1], xy00z0[2]); + add_fp6(ret[1], a[0], a[1]); + mul_by_xy0_fp6x2(rr, ret[1], t2); + sub_fp6x2(rr, rr, t0); + sub_fp6x2(rr, rr, t1); + redc_fp6x2(ret[1], rr); + + /* ret[0] = a0*b0 + a1*b1*v */ + mul_by_u_plus_1_fp2x2(rr[0], t1[2]); + add_fp2x2(rr[0], t0[0], rr[0]); + add_fp2x2(rr[1], t0[1], t1[0]); + add_fp2x2(rr[2], t0[2], t1[1]); + redc_fp6x2(ret[0], rr); +} +#else +static void mul_fp12(vec384fp12 ret, const vec384fp12 a, const vec384fp12 b) +{ + vec384fp6 t0, t1, t2; + + mul_fp6(t0, a[0], b[0]); + mul_fp6(t1, a[1], b[1]); + + /* ret[1] = (a0 + a1)*(b0 + b1) - a0*b0 - a1*b1 + = a0*b1 + a1*b0 */ + add_fp6(t2, a[0], a[1]); + add_fp6(ret[1], b[0], b[1]); + mul_fp6(ret[1], ret[1], t2); + sub_fp6(ret[1], ret[1], t0); + sub_fp6(ret[1], ret[1], t1); + + /* ret[0] = a0*b0 + a1*b1*v */ +#ifdef mul_by_v_fp6 + mul_by_v_fp6(t1, t1); + add_fp6(ret[0], t0, t1); +#else + mul_by_u_plus_1_fp2(t1[2], t1[2]); + add_fp2(ret[0][0], t0[0], t1[2]); + add_fp2(ret[0][1], t0[1], t1[0]); + add_fp2(ret[0][2], t0[2], t1[1]); +#endif +} + +static inline void mul_by_0y0_fp6(vec384fp6 ret, const vec384fp6 a, + const vec384fp2 b) +{ + vec384x t; + + mul_fp2(t, a[2], b); + mul_fp2(ret[2], a[1], b); + mul_fp2(ret[1], a[0], b); + mul_by_u_plus_1_fp2(ret[0], t); +} + +static void mul_by_xy0_fp6(vec384fp6 ret, const vec384fp6 a, const vec384fp6 b) +{ + vec384x t0, t1, /*t2,*/ t3, t4, t5; + + mul_fp2(t0, a[0], b[0]); + mul_fp2(t1, a[1], b[1]); + + /* ret[0] = ((a1 + a2)*(b1 + 0) - a1*b1 - a2*0)*(u+1) + a0*b0 + = (a1*0 + a2*b1)*(u+1) + a0*b0 */ + mul_fp2(t3, a[2], b[1]); + mul_by_u_plus_1_fp2(t3, t3); + /* add_fp2(ret[0], t3, t0); considering possible aliasing... */ + + /* ret[1] = (a0 + a1)*(b0 + b1) - a0*b0 - a1*b1 + a2*0*(u+1) + = a0*b1 + a1*b0 + a2*0*(u+1) */ + add_fp2(t4, a[0], a[1]); + add_fp2(t5, b[0], b[1]); + mul_fp2(ret[1], t4, t5); + sub_fp2(ret[1], ret[1], t0); + sub_fp2(ret[1], ret[1], t1); + + /* ret[2] = (a0 + a2)*(b0 + 0) - a0*b0 - a2*0 + a1*b1 + = a0*0 + a2*b0 + a1*b1 */ + mul_fp2(ret[2], a[2], b[0]); + add_fp2(ret[2], ret[2], t1); + + add_fp2(ret[0], t3, t0); /* ... moved from above */ +} + +static void mul_by_xy00z0_fp12(vec384fp12 ret, const vec384fp12 a, + const vec384fp6 xy00z0) +{ + vec384fp6 t0, t1, t2; + + mul_by_xy0_fp6(t0, a[0], xy00z0); + mul_by_0y0_fp6(t1, a[1], xy00z0[2]); + + /* ret[1] = (a0 + a1)*(b0 + b1) - a0*b0 - a1*b1 + = a0*b1 + a1*b0 */ + vec_copy(t2[0], xy00z0[0], sizeof(t2[0])); + add_fp2(t2[1], xy00z0[1], xy00z0[2]); + add_fp6(ret[1], a[0], a[1]); + mul_by_xy0_fp6(ret[1], ret[1], t2); + sub_fp6(ret[1], ret[1], t0); + sub_fp6(ret[1], ret[1], t1); + + /* ret[0] = a0*b0 + a1*b1*v */ +#ifdef mul_by_v_fp6 + mul_by_v_fp6(t1, t1); + add_fp6(ret[0], t0, t1); +#else + mul_by_u_plus_1_fp2(t1[2], t1[2]); + add_fp2(ret[0][0], t0[0], t1[2]); + add_fp2(ret[0][1], t0[1], t1[0]); + add_fp2(ret[0][2], t0[2], t1[1]); +#endif +} +#endif + +static void sqr_fp12(vec384fp12 ret, const vec384fp12 a) +{ + vec384fp6 t0, t1; + + add_fp6(t0, a[0], a[1]); +#ifdef mul_by_v_fp6 + mul_by_v_fp6(t1, a[1]); + add_fp6(t1, a[0], t1); +#else + mul_by_u_plus_1_fp2(t1[2], a[1][2]); + add_fp2(t1[0], a[0][0], t1[2]); + add_fp2(t1[1], a[0][1], a[1][0]); + add_fp2(t1[2], a[0][2], a[1][1]); +#endif + mul_fp6(t0, t0, t1); + mul_fp6(t1, a[0], a[1]); + + /* ret[1] = 2*(a0*a1) */ + add_fp6(ret[1], t1, t1); + + /* ret[0] = (a0 + a1)*(a0 + a1*v) - a0*a1 - a0*a1*v + = a0^2 + a1^2*v */ + sub_fp6(ret[0], t0, t1); +#ifdef mul_by_v_fp6 + mul_by_v_fp6(t1, t1); + sub_fp6(ret[0], ret[0], t1); +#else + mul_by_u_plus_1_fp2(t1[2], t1[2]); + sub_fp2(ret[0][0], ret[0][0], t1[2]); + sub_fp2(ret[0][1], ret[0][1], t1[0]); + sub_fp2(ret[0][2], ret[0][2], t1[1]); +#endif +} + +static void conjugate_fp12(vec384fp12 a) +{ neg_fp6(a[1], a[1]); } + +static void inverse_fp6(vec384fp6 ret, const vec384fp6 a) +{ + vec384x c0, c1, c2, t0, t1; + + /* c0 = a0^2 - (a1*a2)*(u+1) */ + sqr_fp2(c0, a[0]); + mul_fp2(t0, a[1], a[2]); + mul_by_u_plus_1_fp2(t0, t0); + sub_fp2(c0, c0, t0); + + /* c1 = a2^2*(u+1) - (a0*a1) */ + sqr_fp2(c1, a[2]); + mul_by_u_plus_1_fp2(c1, c1); + mul_fp2(t0, a[0], a[1]); + sub_fp2(c1, c1, t0); + + /* c2 = a1^2 - a0*a2 */ + sqr_fp2(c2, a[1]); + mul_fp2(t0, a[0], a[2]); + sub_fp2(c2, c2, t0); + + /* (a2*c1 + a1*c2)*(u+1) + a0*c0 */ + mul_fp2(t0, c1, a[2]); + mul_fp2(t1, c2, a[1]); + add_fp2(t0, t0, t1); + mul_by_u_plus_1_fp2(t0, t0); + mul_fp2(t1, c0, a[0]); + add_fp2(t0, t0, t1); + + reciprocal_fp2(t1, t0); + + mul_fp2(ret[0], c0, t1); + mul_fp2(ret[1], c1, t1); + mul_fp2(ret[2], c2, t1); +} + +static void inverse_fp12(vec384fp12 ret, const vec384fp12 a) +{ + vec384fp6 t0, t1; + + sqr_fp6(t0, a[0]); + sqr_fp6(t1, a[1]); +#ifdef mul_by_v_fp6 + mul_by_v_fp6(t1, t1); + sub_fp6(t0, t0, t1); +#else + mul_by_u_plus_1_fp2(t1[2], t1[2]); + sub_fp2(t0[0], t0[0], t1[2]); + sub_fp2(t0[1], t0[1], t1[0]); + sub_fp2(t0[2], t0[2], t1[1]); +#endif + + inverse_fp6(t1, t0); + + mul_fp6(ret[0], a[0], t1); + mul_fp6(ret[1], a[1], t1); + neg_fp6(ret[1], ret[1]); +} + +typedef vec384x vec384fp4[2]; + +#if defined(__FP2x2__) +static void sqr_fp4(vec384fp4 ret, const vec384x a0, const vec384x a1) +{ + vec768x t0, t1, t2; + + sqr_fp2x2(t0, a0); + sqr_fp2x2(t1, a1); + add_fp2(ret[1], a0, a1); + + mul_by_u_plus_1_fp2x2(t2, t1); + add_fp2x2(t2, t2, t0); + redc_fp2x2(ret[0], t2); + + sqr_fp2x2(t2, ret[1]); + sub_fp2x2(t2, t2, t0); + sub_fp2x2(t2, t2, t1); + redc_fp2x2(ret[1], t2); +} +#else +static void sqr_fp4(vec384fp4 ret, const vec384x a0, const vec384x a1) +{ + vec384x t0, t1; + + sqr_fp2(t0, a0); + sqr_fp2(t1, a1); + add_fp2(ret[1], a0, a1); + + mul_by_u_plus_1_fp2(ret[0], t1); + add_fp2(ret[0], ret[0], t0); + + sqr_fp2(ret[1], ret[1]); + sub_fp2(ret[1], ret[1], t0); + sub_fp2(ret[1], ret[1], t1); +} +#endif + +static void cyclotomic_sqr_fp12(vec384fp12 ret, const vec384fp12 a) +{ + vec384fp4 t0, t1, t2; + + sqr_fp4(t0, a[0][0], a[1][1]); + sqr_fp4(t1, a[1][0], a[0][2]); + sqr_fp4(t2, a[0][1], a[1][2]); + + sub_fp2(ret[0][0], t0[0], a[0][0]); + add_fp2(ret[0][0], ret[0][0], ret[0][0]); + add_fp2(ret[0][0], ret[0][0], t0[0]); + + sub_fp2(ret[0][1], t1[0], a[0][1]); + add_fp2(ret[0][1], ret[0][1], ret[0][1]); + add_fp2(ret[0][1], ret[0][1], t1[0]); + + sub_fp2(ret[0][2], t2[0], a[0][2]); + add_fp2(ret[0][2], ret[0][2], ret[0][2]); + add_fp2(ret[0][2], ret[0][2], t2[0]); + + mul_by_u_plus_1_fp2(t2[1], t2[1]); + add_fp2(ret[1][0], t2[1], a[1][0]); + add_fp2(ret[1][0], ret[1][0], ret[1][0]); + add_fp2(ret[1][0], ret[1][0], t2[1]); + + add_fp2(ret[1][1], t0[1], a[1][1]); + add_fp2(ret[1][1], ret[1][1], ret[1][1]); + add_fp2(ret[1][1], ret[1][1], t0[1]); + + add_fp2(ret[1][2], t1[1], a[1][2]); + add_fp2(ret[1][2], ret[1][2], ret[1][2]); + add_fp2(ret[1][2], ret[1][2], t1[1]); +} + +/* + * caveat lector! |n| has to be non-zero and not more than 3! + */ +static inline void frobenius_map_fp2(vec384x ret, const vec384x a, size_t n) +{ + vec_copy(ret[0], a[0], sizeof(ret[0])); + cneg_fp(ret[1], a[1], n & 1); +} + +static void frobenius_map_fp6(vec384fp6 ret, const vec384fp6 a, size_t n) +{ + static const vec384x coeffs1[] = { /* (u + 1)^((P^n - 1) / 3) */ + { { 0 }, + { TO_LIMB_T(0xcd03c9e48671f071), TO_LIMB_T(0x5dab22461fcda5d2), + TO_LIMB_T(0x587042afd3851b95), TO_LIMB_T(0x8eb60ebe01bacb9e), + TO_LIMB_T(0x03f97d6e83d050d2), TO_LIMB_T(0x18f0206554638741) } }, + { { TO_LIMB_T(0x30f1361b798a64e8), TO_LIMB_T(0xf3b8ddab7ece5a2a), + TO_LIMB_T(0x16a8ca3ac61577f7), TO_LIMB_T(0xc26a2ff874fd029b), + TO_LIMB_T(0x3636b76660701c6e), TO_LIMB_T(0x051ba4ab241b6160) } }, + { { 0 }, { ONE_MONT_P } } + }; + static const vec384 coeffs2[] = { /* (u + 1)^((2P^n - 2) / 3) */ + { TO_LIMB_T(0x890dc9e4867545c3), TO_LIMB_T(0x2af322533285a5d5), + TO_LIMB_T(0x50880866309b7e2c), TO_LIMB_T(0xa20d1b8c7e881024), + TO_LIMB_T(0x14e4f04fe2db9068), TO_LIMB_T(0x14e56d3f1564853a) }, + { TO_LIMB_T(0xcd03c9e48671f071), TO_LIMB_T(0x5dab22461fcda5d2), + TO_LIMB_T(0x587042afd3851b95), TO_LIMB_T(0x8eb60ebe01bacb9e), + TO_LIMB_T(0x03f97d6e83d050d2), TO_LIMB_T(0x18f0206554638741) }, + { TO_LIMB_T(0x43f5fffffffcaaae), TO_LIMB_T(0x32b7fff2ed47fffd), + TO_LIMB_T(0x07e83a49a2e99d69), TO_LIMB_T(0xeca8f3318332bb7a), + TO_LIMB_T(0xef148d1ea0f4c069), TO_LIMB_T(0x040ab3263eff0206) } + }; + + frobenius_map_fp2(ret[0], a[0], n); + frobenius_map_fp2(ret[1], a[1], n); + frobenius_map_fp2(ret[2], a[2], n); + --n; /* implied ONE_MONT_P at index 0 */ + mul_fp2(ret[1], ret[1], coeffs1[n]); + mul_fp(ret[2][0], ret[2][0], coeffs2[n]); + mul_fp(ret[2][1], ret[2][1], coeffs2[n]); +} + +static void frobenius_map_fp12(vec384fp12 ret, const vec384fp12 a, size_t n) +{ + static const vec384x coeffs[] = { /* (u + 1)^((P^n - 1) / 6) */ + { { TO_LIMB_T(0x07089552b319d465), TO_LIMB_T(0xc6695f92b50a8313), + TO_LIMB_T(0x97e83cccd117228f), TO_LIMB_T(0xa35baecab2dc29ee), + TO_LIMB_T(0x1ce393ea5daace4d), TO_LIMB_T(0x08f2220fb0fb66eb) }, + { TO_LIMB_T(0xb2f66aad4ce5d646), TO_LIMB_T(0x5842a06bfc497cec), + TO_LIMB_T(0xcf4895d42599d394), TO_LIMB_T(0xc11b9cba40a8e8d0), + TO_LIMB_T(0x2e3813cbe5a0de89), TO_LIMB_T(0x110eefda88847faf) } }, + { { TO_LIMB_T(0xecfb361b798dba3a), TO_LIMB_T(0xc100ddb891865a2c), + TO_LIMB_T(0x0ec08ff1232bda8e), TO_LIMB_T(0xd5c13cc6f1ca4721), + TO_LIMB_T(0x47222a47bf7b5c04), TO_LIMB_T(0x0110f184e51c5f59) } }, + { { TO_LIMB_T(0x3e2f585da55c9ad1), TO_LIMB_T(0x4294213d86c18183), + TO_LIMB_T(0x382844c88b623732), TO_LIMB_T(0x92ad2afd19103e18), + TO_LIMB_T(0x1d794e4fac7cf0b9), TO_LIMB_T(0x0bd592fc7d825ec8) }, + { TO_LIMB_T(0x7bcfa7a25aa30fda), TO_LIMB_T(0xdc17dec12a927e7c), + TO_LIMB_T(0x2f088dd86b4ebef1), TO_LIMB_T(0xd1ca2087da74d4a7), + TO_LIMB_T(0x2da2596696cebc1d), TO_LIMB_T(0x0e2b7eedbbfd87d2) } }, + }; + + frobenius_map_fp6(ret[0], a[0], n); + frobenius_map_fp6(ret[1], a[1], n); + --n; /* implied ONE_MONT_P at index 0 */ + mul_fp2(ret[1][0], ret[1][0], coeffs[n]); + mul_fp2(ret[1][1], ret[1][1], coeffs[n]); + mul_fp2(ret[1][2], ret[1][2], coeffs[n]); +} + + +/* + * BLS12-381-specific Fp12 shortcuts. + */ +void blst_fp12_sqr(vec384fp12 ret, const vec384fp12 a) +{ sqr_fp12(ret, a); } + +void blst_fp12_cyclotomic_sqr(vec384fp12 ret, const vec384fp12 a) +{ cyclotomic_sqr_fp12(ret, a); } + +void blst_fp12_mul(vec384fp12 ret, const vec384fp12 a, const vec384fp12 b) +{ mul_fp12(ret, a, b); } + +void blst_fp12_mul_by_xy00z0(vec384fp12 ret, const vec384fp12 a, + const vec384fp6 xy00z0) +{ mul_by_xy00z0_fp12(ret, a, xy00z0); } + +void blst_fp12_conjugate(vec384fp12 a) +{ conjugate_fp12(a); } + +void blst_fp12_inverse(vec384fp12 ret, const vec384fp12 a) +{ inverse_fp12(ret, a); } + +/* caveat lector! |n| has to be non-zero and not more than 3! */ +void blst_fp12_frobenius_map(vec384fp12 ret, const vec384fp12 a, size_t n) +{ frobenius_map_fp12(ret, a, n); } + +int blst_fp12_is_equal(const vec384fp12 a, const vec384fp12 b) +{ return (int)vec_is_equal(a, b, sizeof(vec384fp12)); } + +int blst_fp12_is_one(const vec384fp12 a) +{ + return (int)(vec_is_equal(a[0][0], BLS12_381_Rx.p2, sizeof(a[0][0])) & + vec_is_zero(a[0][1], sizeof(vec384fp12) - sizeof(a[0][0]))); +} + +const vec384fp12 *blst_fp12_one(void) +{ return (const vec384fp12 *)BLS12_381_Rx.p12; } + +void blst_bendian_from_fp12(unsigned char ret[48*12], const vec384fp12 a) +{ + size_t i, j; + vec384 out; + + for (i = 0; i < 3; i++) { + for (j = 0; j < 2; j++) { + from_fp(out, a[j][i][0]); + be_bytes_from_limbs(ret, out, sizeof(vec384)); ret += 48; + from_fp(out, a[j][i][1]); + be_bytes_from_limbs(ret, out, sizeof(vec384)); ret += 48; + } + } +} + +size_t blst_fp12_sizeof(void) +{ return sizeof(vec384fp12); } diff --git a/crypto/blst_src/hash_to_field.c b/crypto/blst_src/hash_to_field.c new file mode 100644 index 00000000000..6816ea8b922 --- /dev/null +++ b/crypto/blst_src/hash_to_field.c @@ -0,0 +1,177 @@ +/* + * Copyright Supranational LLC + * Licensed under the Apache License, Version 2.0, see LICENSE for details. + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "consts.h" +#include "sha256.h" + +static const vec384 BLS12_381_RRRR = { /* RR^2 */ + TO_LIMB_T(0xed48ac6bd94ca1e0), TO_LIMB_T(0x315f831e03a7adf8), + TO_LIMB_T(0x9a53352a615e29dd), TO_LIMB_T(0x34c04e5e921e1761), + TO_LIMB_T(0x2512d43565724728), TO_LIMB_T(0x0aa6346091755d4d) +}; + +#ifdef expand_message_xmd +void expand_message_xmd(unsigned char *bytes, size_t len_in_bytes, + const unsigned char *aug, size_t aug_len, + const unsigned char *msg, size_t msg_len, + const unsigned char *DST, size_t DST_len); +#else +static void sha256_init_Zpad(SHA256_CTX *ctx) +{ + ctx->h[0] = 0xda5698beU; + ctx->h[1] = 0x17b9b469U; + ctx->h[2] = 0x62335799U; + ctx->h[3] = 0x779fbecaU; + ctx->h[4] = 0x8ce5d491U; + ctx->h[5] = 0xc0d26243U; + ctx->h[6] = 0xbafef9eaU; + ctx->h[7] = 0x1837a9d8U; + ctx->N = 64; + vec_zero(ctx->buf, sizeof(ctx->buf)); + ctx->off = 0; +} + +static void vec_xor(void *restrict ret, const void *restrict a, + const void *restrict b, size_t num) +{ + limb_t *rp = (limb_t *)ret; + const limb_t *ap = (const limb_t *)a; + const limb_t *bp = (const limb_t *)b; + size_t i; + + num /= sizeof(limb_t); + + for (i = 0; i < num; i++) + rp[i] = ap[i] ^ bp[i]; +} + +static void expand_message_xmd(unsigned char *bytes, size_t len_in_bytes, + const unsigned char *aug, size_t aug_len, + const unsigned char *msg, size_t msg_len, + const unsigned char *DST, size_t DST_len) +{ + union { limb_t align; unsigned char c[32]; } b_0; + union { limb_t align; unsigned char c[33+256+31]; } b_i; + unsigned char *p; + size_t i, b_i_bits, b_i_blocks; + SHA256_CTX ctx; + + /* + * compose template for 'strxor(b_0, b_(i-1)) || I2OSP(i, 1) || DST_prime' + */ + if (DST_len > 255) { + sha256_init(&ctx); + sha256_update(&ctx, "H2C-OVERSIZE-DST-", 17); + sha256_update(&ctx, DST, DST_len); + sha256_final(b_0.c, &ctx); + DST = b_0.c, DST_len = 32; + } + b_i_blocks = ((33 + DST_len + 1 + 9) + 63) & -64; + vec_zero(b_i.c + b_i_blocks - 64, 64); + + p = b_i.c + 33; + for (i = 0; i < DST_len; i++) + p[i] = DST[i]; + p[i++] = (unsigned char)DST_len; + p[i++] = 0x80; + p[i+6] = p[i+5] = p[i+4] = p[i+3] = p[i+2] = p[i+1] = p[i+0] = 0; + b_i_bits = (33 + DST_len + 1) * 8; + p = b_i.c + b_i_blocks; + p[-2] = (unsigned char)(b_i_bits >> 8); + p[-1] = (unsigned char)(b_i_bits); + + sha256_init_Zpad(&ctx); /* Z_pad | */ + sha256_update(&ctx, aug, aug_len); /* | aug | */ + sha256_update(&ctx, msg, msg_len); /* | msg | */ + /* | I2OSP(len_in_bytes, 2) || I2OSP(0, 1) || DST_prime */ + b_i.c[30] = (unsigned char)(len_in_bytes >> 8); + b_i.c[31] = (unsigned char)(len_in_bytes); + b_i.c[32] = 0; + sha256_update(&ctx, b_i.c + 30, 3 + DST_len + 1); + sha256_final(b_0.c, &ctx); + + sha256_init_h(ctx.h); + vec_copy(b_i.c, b_0.c, 32); + ++b_i.c[32]; + sha256_block_data_order(ctx.h, b_i.c, b_i_blocks / 64); + sha256_emit(bytes, ctx.h); + + len_in_bytes += 31; /* ell = ceil(len_in_bytes / b_in_bytes), with */ + len_in_bytes /= 32; /* caller being responsible for accordingly large + * buffer. hash_to_field passes one with length + * divisible by 64, remember? which works... */ + while (--len_in_bytes) { + sha256_init_h(ctx.h); + vec_xor(b_i.c, b_0.c, bytes, 32); + bytes += 32; + ++b_i.c[32]; + sha256_block_data_order(ctx.h, b_i.c, b_i_blocks / 64); + sha256_emit(bytes, ctx.h); + } +} +#endif + +/* + * |nelems| is 'count * m' from spec + */ +static void hash_to_field(vec384 elems[], size_t nelems, + const unsigned char *aug, size_t aug_len, + const unsigned char *msg, size_t msg_len, + const unsigned char *DST, size_t DST_len) +{ + size_t L = sizeof(vec384) + 128/8; /* ceil((ceil(log2(p)) + k) / 8) */ + size_t len_in_bytes = L * nelems; /* divisible by 64, hurray! */ +#if !defined(__STDC_VERSION__) || __STDC_VERSION__<199901 \ + || defined(__STDC_NO_VLA__) + limb_t *pseudo_random = alloca(len_in_bytes); +#else + limb_t pseudo_random[len_in_bytes/sizeof(limb_t)]; +#endif + unsigned char *bytes; + vec768 elem; + + aug_len = aug!=NULL ? aug_len : 0; + DST_len = DST!=NULL ? DST_len : 0; + + expand_message_xmd((unsigned char *)pseudo_random, len_in_bytes, + aug, aug_len, msg, msg_len, DST, DST_len); + + vec_zero(elem, sizeof(elem)); + bytes = (unsigned char *)pseudo_random; + while (nelems--) { + limbs_from_be_bytes(elem, bytes, L); + bytes += L; + /* + * L-bytes block % P, output is in Montgomery domain... + */ + redc_mont_384(elems[0], elem, BLS12_381_P, p0); + mul_mont_384(elems[0], elems[0], BLS12_381_RRRR, BLS12_381_P, p0); + elems++; + } +} + +void blst_expand_message_xmd(unsigned char *bytes, size_t len_in_bytes, + const unsigned char *msg, size_t msg_len, + const unsigned char *DST, size_t DST_len) +{ + size_t buf_len = (len_in_bytes+31) & ((size_t)0-32); + unsigned char *buf_ptr = bytes; + + if (buf_len > 255*32) + return; + + if (buf_len != len_in_bytes) + buf_ptr = alloca(buf_len); + + expand_message_xmd(buf_ptr, len_in_bytes, NULL, 0, msg, msg_len, + DST, DST_len); + if (buf_ptr != bytes) { + unsigned char *ptr = buf_ptr; + while (len_in_bytes--) + *bytes++ = *ptr++; + vec_zero(buf_ptr, buf_len); + } +} diff --git a/crypto/blst_src/keygen.c b/crypto/blst_src/keygen.c new file mode 100644 index 00000000000..9b62f16b534 --- /dev/null +++ b/crypto/blst_src/keygen.c @@ -0,0 +1,319 @@ +/* + * Copyright Supranational LLC + * Licensed under the Apache License, Version 2.0, see LICENSE for details. + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "consts.h" +#include "bytes.h" +#include "sha256.h" + +typedef struct { + SHA256_CTX ctx; + unsigned int h_ipad[8]; + unsigned int h_opad[8]; + union { limb_t l[64/sizeof(limb_t)]; unsigned char c[64]; } tail; +} HMAC_SHA256_CTX; + +static void HMAC_init(HMAC_SHA256_CTX *ctx, const void *K, size_t K_len) +{ + size_t i; + + if (K == NULL) { /* reuse h_ipad and h_opad */ + sha256_hcopy(ctx->ctx.h, ctx->h_ipad); + ctx->ctx.N = 64; + vec_zero(ctx->ctx.buf, sizeof(ctx->ctx.buf)); + ctx->ctx.off = 0; + + return; + } + + vec_zero(ctx->tail.c, sizeof(ctx->tail)); + if (K_len > 64) { + sha256_init(&ctx->ctx); + sha256_update(&ctx->ctx, K, K_len); + sha256_final(ctx->tail.c, &ctx->ctx); + } else { + sha256_bcopy(ctx->tail.c, K, K_len); + } + + for (i = 0; i < 64/sizeof(limb_t); i++) + ctx->tail.l[i] ^= (limb_t)0x3636363636363636; + + sha256_init(&ctx->ctx); + sha256_update(&ctx->ctx, ctx->tail.c, 64); + sha256_hcopy(ctx->h_ipad, ctx->ctx.h); + + for (i = 0; i < 64/sizeof(limb_t); i++) + ctx->tail.l[i] ^= (limb_t)(0x3636363636363636 ^ 0x5c5c5c5c5c5c5c5c); + + sha256_init_h(ctx->h_opad); + sha256_block_data_order(ctx->h_opad, ctx->tail.c, 1); + + vec_zero(ctx->tail.c, sizeof(ctx->tail)); + ctx->tail.c[32] = 0x80; + ctx->tail.c[62] = 3; /* (64+32)*8 in big endian */ + ctx->tail.c[63] = 0; +} + +static void HMAC_update(HMAC_SHA256_CTX *ctx, const unsigned char *inp, + size_t len) +{ sha256_update(&ctx->ctx, inp, len); } + +static void HMAC_final(unsigned char md[32], HMAC_SHA256_CTX *ctx) +{ + sha256_final(ctx->tail.c, &ctx->ctx); + sha256_hcopy(ctx->ctx.h, ctx->h_opad); + sha256_block_data_order(ctx->ctx.h, ctx->tail.c, 1); + sha256_emit(md, ctx->ctx.h); +} + +static void HKDF_Extract(unsigned char PRK[32], + const void *salt, size_t salt_len, + const void *IKM, size_t IKM_len, +#ifndef __BLST_HKDF_TESTMODE__ + int IKM_fixup, +#endif + HMAC_SHA256_CTX *ctx) +{ + unsigned char zero[1] = { 0 }; + + HMAC_init(ctx, salt != NULL ? salt : zero, salt_len); + HMAC_update(ctx, IKM, IKM_len); +#ifndef __BLST_HKDF_TESTMODE__ + if (IKM_fixup) { + /* Section 2.3 KeyGen in BLS-signature draft */ + HMAC_update(ctx, zero, 1); + } +#endif + HMAC_final(PRK, ctx); +} + +static void HKDF_Expand(unsigned char *OKM, size_t L, + const unsigned char PRK[32], + const void *info, size_t info_len, +#ifndef __BLST_HKDF_TESTMODE__ + int info_fixup, +#endif + HMAC_SHA256_CTX *ctx) +{ +#if !defined(__STDC_VERSION__) || __STDC_VERSION__<199901 \ + || defined(__STDC_NO_VLA__) + unsigned char *info_prime = alloca(info_len + 2 + 1); +#else + unsigned char info_prime[info_len + 2 + 1]; +#endif + + HMAC_init(ctx, PRK, 32); + + if (info_len != 0) + sha256_bcopy(info_prime, info, info_len); +#ifndef __BLST_HKDF_TESTMODE__ + if (info_fixup) { + /* Section 2.3 KeyGen in BLS-signature draft */ + info_prime[info_len + 0] = (unsigned char)(L >> 8); + info_prime[info_len + 1] = (unsigned char)(L); + info_len += 2; + } +#endif + info_prime[info_len] = 1; /* counter */ + HMAC_update(ctx, info_prime, info_len + 1); + HMAC_final(ctx->tail.c, ctx); + while (L > 32) { + sha256_hcopy((unsigned int *)OKM, (const unsigned int *)ctx->tail.c); + OKM += 32; L -= 32; + ++info_prime[info_len]; /* counter */ + HMAC_init(ctx, NULL, 0); + HMAC_update(ctx, ctx->tail.c, 32); + HMAC_update(ctx, info_prime, info_len + 1); + HMAC_final(ctx->tail.c, ctx); + } + sha256_bcopy(OKM, ctx->tail.c, L); +} + +#ifndef __BLST_HKDF_TESTMODE__ +static void keygen(pow256 SK, const void *IKM, size_t IKM_len, + const void *salt, size_t salt_len, + const void *info, size_t info_len, + int version) +{ + struct { + HMAC_SHA256_CTX ctx; + unsigned char PRK[32], OKM[48]; + vec512 key; + } scratch; + unsigned char salt_prime[32] = "BLS-SIG-KEYGEN-SALT-"; + + if (IKM_len < 32 || (version > 4 && salt == NULL)) { + vec_zero(SK, sizeof(pow256)); + return; + } + + /* + * Vet |info| since some callers were caught to be sloppy, e.g. + * SWIG-4.0-generated Python wrapper... + */ + info_len = info==NULL ? 0 : info_len; + + if (salt == NULL) { + salt = salt_prime; + salt_len = 20; + } + + if (version == 4) { + /* salt = H(salt) */ + sha256_init(&scratch.ctx.ctx); + sha256_update(&scratch.ctx.ctx, salt, salt_len); + sha256_final(salt_prime, &scratch.ctx.ctx); + salt = salt_prime; + salt_len = sizeof(salt_prime); + } + + while (1) { + /* PRK = HKDF-Extract(salt, IKM || I2OSP(0, 1)) */ + HKDF_Extract(scratch.PRK, salt, salt_len, + IKM, IKM_len, 1, &scratch.ctx); + + /* OKM = HKDF-Expand(PRK, key_info || I2OSP(L, 2), L) */ + HKDF_Expand(scratch.OKM, sizeof(scratch.OKM), scratch.PRK, + info, info_len, 1, &scratch.ctx); + + /* SK = OS2IP(OKM) mod r */ + vec_zero(scratch.key, sizeof(scratch.key)); + limbs_from_be_bytes(scratch.key, scratch.OKM, sizeof(scratch.OKM)); + redc_mont_256(scratch.key, scratch.key, BLS12_381_r, r0); + /* + * Given that mul_mont_sparse_256 has special boundary conditions + * it's appropriate to mention that redc_mont_256 output is fully + * reduced at this point. Because we started with 384-bit input, + * one with most significant half smaller than the modulus. + */ + mul_mont_sparse_256(scratch.key, scratch.key, BLS12_381_rRR, + BLS12_381_r, r0); + + if (version < 4 || !vec_is_zero(scratch.key, sizeof(vec256))) + break; + + /* salt = H(salt) */ + sha256_init(&scratch.ctx.ctx); + sha256_update(&scratch.ctx.ctx, salt, salt_len); + sha256_final(salt_prime, &scratch.ctx.ctx); + salt = salt_prime; + salt_len = sizeof(salt_prime); + } + + le_bytes_from_limbs(SK, scratch.key, sizeof(pow256)); + + /* + * scrub the stack just in case next callee inadvertently flashes + * a fragment across application boundary... + */ + vec_zero(&scratch, sizeof(scratch)); +} + +void blst_keygen(pow256 SK, const void *IKM, size_t IKM_len, + const void *info, size_t info_len) +{ keygen(SK, IKM, IKM_len, NULL, 0, info, info_len, 4); } + +void blst_keygen_v3(pow256 SK, const void *IKM, size_t IKM_len, + const void *info, size_t info_len) +{ keygen(SK, IKM, IKM_len, NULL, 0, info, info_len, 3); } + +void blst_keygen_v4_5(pow256 SK, const void *IKM, size_t IKM_len, + const void *salt, size_t salt_len, + const void *info, size_t info_len) +{ keygen(SK, IKM, IKM_len, salt, salt_len, info, info_len, 4); } + +void blst_keygen_v5(pow256 SK, const void *IKM, size_t IKM_len, + const void *salt, size_t salt_len, + const void *info, size_t info_len) +{ keygen(SK, IKM, IKM_len, salt, salt_len, info, info_len, 5); } + +/* + * https://eips.ethereum.org/EIPS/eip-2333 + */ +void blst_derive_master_eip2333(pow256 SK, const void *seed, size_t seed_len) +{ keygen(SK, seed, seed_len, NULL, 0, NULL, 0, 4); } + +static void parent_SK_to_lamport_PK(pow256 PK, const pow256 parent_SK, + unsigned int index) +{ + size_t i; + struct { + HMAC_SHA256_CTX ctx; + SHA256_CTX ret; + unsigned char PRK[32], IKM[32]; + unsigned char lamport[255][32]; + } scratch; + + /* salt = I2OSP(index, 4) */ + unsigned char salt[4] = { (unsigned char)(index>>24), + (unsigned char)(index>>16), + (unsigned char)(index>>8), + (unsigned char)(index) }; + + /* IKM = I2OSP(parent_SK, 32) */ + for (i = 0; i < 32; i++) + scratch.IKM[i] = parent_SK[31-i]; + + /* lamport_0 = IKM_to_lamport_SK(IKM, salt) */ + HKDF_Extract(scratch.PRK, salt, sizeof(salt), scratch.IKM, 32, 0, + &scratch.ctx); + HKDF_Expand(scratch.lamport[0], sizeof(scratch.lamport), + scratch.PRK, NULL, 0, 0, &scratch.ctx); + + vec_zero(scratch.ctx.ctx.buf, sizeof(scratch.ctx.ctx.buf)); + scratch.ctx.ctx.buf[32] = 0x80; + scratch.ctx.ctx.buf[62] = 1; /* 32*8 in big endian */ + scratch.ctx.ctx.buf[63] = 0; + for (i = 0; i < 255; i++) { + /* lamport_PK = lamport_PK | SHA256(lamport_0[i]) */ + sha256_init_h(scratch.ctx.ctx.h); + sha256_bcopy(scratch.ctx.ctx.buf, scratch.lamport[i], 32); + sha256_block_data_order(scratch.ctx.ctx.h, scratch.ctx.ctx.buf, 1); + sha256_emit(scratch.lamport[i], scratch.ctx.ctx.h); + } + + /* compressed_lamport_PK = SHA256(lamport_PK) */ + sha256_init(&scratch.ret); + sha256_update(&scratch.ret, scratch.lamport, sizeof(scratch.lamport)); + + /* not_IKM = flip_bits(IKM) */ + for (i = 0; i< 32; i++) + scratch.IKM[i] = ~scratch.IKM[i]; + + /* lamport_1 = IKM_to_lamport_SK(not_IKM, salt) */ + HKDF_Extract(scratch.PRK, salt, sizeof(salt), scratch.IKM, 32, 0, + &scratch.ctx); + HKDF_Expand(scratch.lamport[0], sizeof(scratch.lamport), + scratch.PRK, NULL, 0, 0, &scratch.ctx); + + vec_zero(scratch.ctx.ctx.buf, sizeof(scratch.ctx.ctx.buf)); + scratch.ctx.ctx.buf[32] = 0x80; + scratch.ctx.ctx.buf[62] = 1; + for (i = 0; i < 255; i++) { + /* lamport_PK = lamport_PK | SHA256(lamport_1[i]) */ + sha256_init_h(scratch.ctx.ctx.h); + sha256_bcopy(scratch.ctx.ctx.buf, scratch.lamport[i], 32); + sha256_block_data_order(scratch.ctx.ctx.h, scratch.ctx.ctx.buf, 1); + sha256_emit(scratch.lamport[i], scratch.ctx.ctx.h); + } + + /* compressed_lamport_PK = SHA256(lamport_PK) */ + sha256_update(&scratch.ret, scratch.lamport, sizeof(scratch.lamport)); + sha256_final(PK, &scratch.ret); + + /* + * scrub the stack just in case next callee inadvertently flashes + * a fragment across application boundary... + */ + vec_zero(&scratch, sizeof(scratch)); +} + +void blst_derive_child_eip2333(pow256 SK, const pow256 parent_SK, + unsigned int child_index) +{ + parent_SK_to_lamport_PK(SK, parent_SK, child_index); + keygen(SK, SK, sizeof(pow256), NULL, 0, NULL, 0, 4); +} +#endif diff --git a/crypto/blst_src/map_to_g1.c b/crypto/blst_src/map_to_g1.c new file mode 100644 index 00000000000..6613d68bb29 --- /dev/null +++ b/crypto/blst_src/map_to_g1.c @@ -0,0 +1,559 @@ +/* + * Copyright Supranational LLC + * Licensed under the Apache License, Version 2.0, see LICENSE for details. + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "point.h" +#include "fields.h" + +/* + * y^2 = x^3 + A'*x + B', isogenous one + */ +static const vec384 Aprime_E1 = { + /* (0x00144698a3b8e9433d693a02c96d4982b0ea985383ee66a8 + d8e8981aefd881ac98936f8da0e0f97f5cf428082d584c1d << 384) % P */ + TO_LIMB_T(0x2f65aa0e9af5aa51), TO_LIMB_T(0x86464c2d1e8416c3), + TO_LIMB_T(0xb85ce591b7bd31e2), TO_LIMB_T(0x27e11c91b5f24e7c), + TO_LIMB_T(0x28376eda6bfc1835), TO_LIMB_T(0x155455c3e5071d85) +}; +static const vec384 Bprime_E1 = { + /* (0x12e2908d11688030018b12e8753eee3b2016c1f0f24f4070 + a0b9c14fcef35ef55a23215a316ceaa5d1cc48e98e172be0 << 384) % P */ + TO_LIMB_T(0xfb996971fe22a1e0), TO_LIMB_T(0x9aa93eb35b742d6f), + TO_LIMB_T(0x8c476013de99c5c4), TO_LIMB_T(0x873e27c3a221e571), + TO_LIMB_T(0xca72b5e45a52d888), TO_LIMB_T(0x06824061418a386b) +}; + +static void map_fp_times_Zz(vec384 map[], const vec384 isogeny_map[], + const vec384 Zz_powers[], size_t n) +{ + while (n--) + mul_fp(map[n], isogeny_map[n], Zz_powers[n]); +} + +static void map_fp(vec384 acc, const vec384 x, const vec384 map[], size_t n) +{ + while (n--) { + mul_fp(acc, acc, x); + add_fp(acc, acc, map[n]); + } +} + +static void isogeny_map_to_E1(POINTonE1 *out, const POINTonE1 *p) +{ + /* + * x = x_num / x_den, where + * x_num = k_(1,11) * x'^11 + k_(1,10) * x'^10 + k_(1,9) * x'^9 + + * ... + k_(1,0) + * ... + */ + static const vec384 isogeny_map_x_num[] = { /* (k_(1,*)<<384) % P */ + { TO_LIMB_T(0x4d18b6f3af00131c), TO_LIMB_T(0x19fa219793fee28c), + TO_LIMB_T(0x3f2885f1467f19ae), TO_LIMB_T(0x23dcea34f2ffb304), + TO_LIMB_T(0xd15b58d2ffc00054), TO_LIMB_T(0x0913be200a20bef4) }, + { TO_LIMB_T(0x898985385cdbbd8b), TO_LIMB_T(0x3c79e43cc7d966aa), + TO_LIMB_T(0x1597e193f4cd233a), TO_LIMB_T(0x8637ef1e4d6623ad), + TO_LIMB_T(0x11b22deed20d827b), TO_LIMB_T(0x07097bc5998784ad) }, + { TO_LIMB_T(0xa542583a480b664b), TO_LIMB_T(0xfc7169c026e568c6), + TO_LIMB_T(0x5ba2ef314ed8b5a6), TO_LIMB_T(0x5b5491c05102f0e7), + TO_LIMB_T(0xdf6e99707d2a0079), TO_LIMB_T(0x0784151ed7605524) }, + { TO_LIMB_T(0x494e212870f72741), TO_LIMB_T(0xab9be52fbda43021), + TO_LIMB_T(0x26f5577994e34c3d), TO_LIMB_T(0x049dfee82aefbd60), + TO_LIMB_T(0x65dadd7828505289), TO_LIMB_T(0x0e93d431ea011aeb) }, + { TO_LIMB_T(0x90ee774bd6a74d45), TO_LIMB_T(0x7ada1c8a41bfb185), + TO_LIMB_T(0x0f1a8953b325f464), TO_LIMB_T(0x104c24211be4805c), + TO_LIMB_T(0x169139d319ea7a8f), TO_LIMB_T(0x09f20ead8e532bf6) }, + { TO_LIMB_T(0x6ddd93e2f43626b7), TO_LIMB_T(0xa5482c9aa1ccd7bd), + TO_LIMB_T(0x143245631883f4bd), TO_LIMB_T(0x2e0a94ccf77ec0db), + TO_LIMB_T(0xb0282d480e56489f), TO_LIMB_T(0x18f4bfcbb4368929) }, + { TO_LIMB_T(0x23c5f0c953402dfd), TO_LIMB_T(0x7a43ff6958ce4fe9), + TO_LIMB_T(0x2c390d3d2da5df63), TO_LIMB_T(0xd0df5c98e1f9d70f), + TO_LIMB_T(0xffd89869a572b297), TO_LIMB_T(0x1277ffc72f25e8fe) }, + { TO_LIMB_T(0x79f4f0490f06a8a6), TO_LIMB_T(0x85f894a88030fd81), + TO_LIMB_T(0x12da3054b18b6410), TO_LIMB_T(0xe2a57f6505880d65), + TO_LIMB_T(0xbba074f260e400f1), TO_LIMB_T(0x08b76279f621d028) }, + { TO_LIMB_T(0xe67245ba78d5b00b), TO_LIMB_T(0x8456ba9a1f186475), + TO_LIMB_T(0x7888bff6e6b33bb4), TO_LIMB_T(0xe21585b9a30f86cb), + TO_LIMB_T(0x05a69cdcef55feee), TO_LIMB_T(0x09e699dd9adfa5ac) }, + { TO_LIMB_T(0x0de5c357bff57107), TO_LIMB_T(0x0a0db4ae6b1a10b2), + TO_LIMB_T(0xe256bb67b3b3cd8d), TO_LIMB_T(0x8ad456574e9db24f), + TO_LIMB_T(0x0443915f50fd4179), TO_LIMB_T(0x098c4bf7de8b6375) }, + { TO_LIMB_T(0xe6b0617e7dd929c7), TO_LIMB_T(0xfe6e37d442537375), + TO_LIMB_T(0x1dafdeda137a489e), TO_LIMB_T(0xe4efd1ad3f767ceb), + TO_LIMB_T(0x4a51d8667f0fe1cf), TO_LIMB_T(0x054fdf4bbf1d821c) }, + { TO_LIMB_T(0x72db2a50658d767b), TO_LIMB_T(0x8abf91faa257b3d5), + TO_LIMB_T(0xe969d6833764ab47), TO_LIMB_T(0x464170142a1009eb), + TO_LIMB_T(0xb14f01aadb30be2f), TO_LIMB_T(0x18ae6a856f40715d) } + }; + /* ... + * x_den = x'^10 + k_(2,9) * x'^9 + k_(2,8) * x'^8 + ... + k_(2,0) + */ + static const vec384 isogeny_map_x_den[] = { /* (k_(2,*)<<384) % P */ + { TO_LIMB_T(0xb962a077fdb0f945), TO_LIMB_T(0xa6a9740fefda13a0), + TO_LIMB_T(0xc14d568c3ed6c544), TO_LIMB_T(0xb43fc37b908b133e), + TO_LIMB_T(0x9c0b3ac929599016), TO_LIMB_T(0x0165aa6c93ad115f) }, + { TO_LIMB_T(0x23279a3ba506c1d9), TO_LIMB_T(0x92cfca0a9465176a), + TO_LIMB_T(0x3b294ab13755f0ff), TO_LIMB_T(0x116dda1c5070ae93), + TO_LIMB_T(0xed4530924cec2045), TO_LIMB_T(0x083383d6ed81f1ce) }, + { TO_LIMB_T(0x9885c2a6449fecfc), TO_LIMB_T(0x4a2b54ccd37733f0), + TO_LIMB_T(0x17da9ffd8738c142), TO_LIMB_T(0xa0fba72732b3fafd), + TO_LIMB_T(0xff364f36e54b6812), TO_LIMB_T(0x0f29c13c660523e2) }, + { TO_LIMB_T(0xe349cc118278f041), TO_LIMB_T(0xd487228f2f3204fb), + TO_LIMB_T(0xc9d325849ade5150), TO_LIMB_T(0x43a92bd69c15c2df), + TO_LIMB_T(0x1c2c7844bc417be4), TO_LIMB_T(0x12025184f407440c) }, + { TO_LIMB_T(0x587f65ae6acb057b), TO_LIMB_T(0x1444ef325140201f), + TO_LIMB_T(0xfbf995e71270da49), TO_LIMB_T(0xccda066072436a42), + TO_LIMB_T(0x7408904f0f186bb2), TO_LIMB_T(0x13b93c63edf6c015) }, + { TO_LIMB_T(0xfb918622cd141920), TO_LIMB_T(0x4a4c64423ecaddb4), + TO_LIMB_T(0x0beb232927f7fb26), TO_LIMB_T(0x30f94df6f83a3dc2), + TO_LIMB_T(0xaeedd424d780f388), TO_LIMB_T(0x06cc402dd594bbeb) }, + { TO_LIMB_T(0xd41f761151b23f8f), TO_LIMB_T(0x32a92465435719b3), + TO_LIMB_T(0x64f436e888c62cb9), TO_LIMB_T(0xdf70a9a1f757c6e4), + TO_LIMB_T(0x6933a38d5b594c81), TO_LIMB_T(0x0c6f7f7237b46606) }, + { TO_LIMB_T(0x693c08747876c8f7), TO_LIMB_T(0x22c9850bf9cf80f0), + TO_LIMB_T(0x8e9071dab950c124), TO_LIMB_T(0x89bc62d61c7baf23), + TO_LIMB_T(0xbc6be2d8dad57c23), TO_LIMB_T(0x17916987aa14a122) }, + { TO_LIMB_T(0x1be3ff439c1316fd), TO_LIMB_T(0x9965243a7571dfa7), + TO_LIMB_T(0xc7f7f62962f5cd81), TO_LIMB_T(0x32c6aa9af394361c), + TO_LIMB_T(0xbbc2ee18e1c227f4), TO_LIMB_T(0x0c102cbac531bb34) }, + { TO_LIMB_T(0x997614c97bacbf07), TO_LIMB_T(0x61f86372b99192c0), + TO_LIMB_T(0x5b8c95fc14353fc3), TO_LIMB_T(0xca2b066c2a87492f), + TO_LIMB_T(0x16178f5bbf698711), TO_LIMB_T(0x12a6dcd7f0f4e0e8) } + }; + /* + * y = y' * y_num / y_den, where + * y_num = k_(3,15) * x'^15 + k_(3,14) * x'^14 + k_(3,13) * x'^13 + + * ... + k_(3,0) + * ... + */ + static const vec384 isogeny_map_y_num[] = { /* (k_(3,*)<<384) % P */ + { TO_LIMB_T(0x2b567ff3e2837267), TO_LIMB_T(0x1d4d9e57b958a767), + TO_LIMB_T(0xce028fea04bd7373), TO_LIMB_T(0xcc31a30a0b6cd3df), + TO_LIMB_T(0x7d7b18a682692693), TO_LIMB_T(0x0d300744d42a0310) }, + { TO_LIMB_T(0x99c2555fa542493f), TO_LIMB_T(0xfe7f53cc4874f878), + TO_LIMB_T(0x5df0608b8f97608a), TO_LIMB_T(0x14e03832052b49c8), + TO_LIMB_T(0x706326a6957dd5a4), TO_LIMB_T(0x0a8dadd9c2414555) }, + { TO_LIMB_T(0x13d942922a5cf63a), TO_LIMB_T(0x357e33e36e261e7d), + TO_LIMB_T(0xcf05a27c8456088d), TO_LIMB_T(0x0000bd1de7ba50f0), + TO_LIMB_T(0x83d0c7532f8c1fde), TO_LIMB_T(0x13f70bf38bbf2905) }, + { TO_LIMB_T(0x5c57fd95bfafbdbb), TO_LIMB_T(0x28a359a65e541707), + TO_LIMB_T(0x3983ceb4f6360b6d), TO_LIMB_T(0xafe19ff6f97e6d53), + TO_LIMB_T(0xb3468f4550192bf7), TO_LIMB_T(0x0bb6cde49d8ba257) }, + { TO_LIMB_T(0x590b62c7ff8a513f), TO_LIMB_T(0x314b4ce372cacefd), + TO_LIMB_T(0x6bef32ce94b8a800), TO_LIMB_T(0x6ddf84a095713d5f), + TO_LIMB_T(0x64eace4cb0982191), TO_LIMB_T(0x0386213c651b888d) }, + { TO_LIMB_T(0xa5310a31111bbcdd), TO_LIMB_T(0xa14ac0f5da148982), + TO_LIMB_T(0xf9ad9cc95423d2e9), TO_LIMB_T(0xaa6ec095283ee4a7), + TO_LIMB_T(0xcf5b1f022e1c9107), TO_LIMB_T(0x01fddf5aed881793) }, + { TO_LIMB_T(0x65a572b0d7a7d950), TO_LIMB_T(0xe25c2d8183473a19), + TO_LIMB_T(0xc2fcebe7cb877dbd), TO_LIMB_T(0x05b2d36c769a89b0), + TO_LIMB_T(0xba12961be86e9efb), TO_LIMB_T(0x07eb1b29c1dfde1f) }, + { TO_LIMB_T(0x93e09572f7c4cd24), TO_LIMB_T(0x364e929076795091), + TO_LIMB_T(0x8569467e68af51b5), TO_LIMB_T(0xa47da89439f5340f), + TO_LIMB_T(0xf4fa918082e44d64), TO_LIMB_T(0x0ad52ba3e6695a79) }, + { TO_LIMB_T(0x911429844e0d5f54), TO_LIMB_T(0xd03f51a3516bb233), + TO_LIMB_T(0x3d587e5640536e66), TO_LIMB_T(0xfa86d2a3a9a73482), + TO_LIMB_T(0xa90ed5adf1ed5537), TO_LIMB_T(0x149c9c326a5e7393) }, + { TO_LIMB_T(0x462bbeb03c12921a), TO_LIMB_T(0xdc9af5fa0a274a17), + TO_LIMB_T(0x9a558ebde836ebed), TO_LIMB_T(0x649ef8f11a4fae46), + TO_LIMB_T(0x8100e1652b3cdc62), TO_LIMB_T(0x1862bd62c291dacb) }, + { TO_LIMB_T(0x05c9b8ca89f12c26), TO_LIMB_T(0x0194160fa9b9ac4f), + TO_LIMB_T(0x6a643d5a6879fa2c), TO_LIMB_T(0x14665bdd8846e19d), + TO_LIMB_T(0xbb1d0d53af3ff6bf), TO_LIMB_T(0x12c7e1c3b28962e5) }, + { TO_LIMB_T(0xb55ebf900b8a3e17), TO_LIMB_T(0xfedc77ec1a9201c4), + TO_LIMB_T(0x1f07db10ea1a4df4), TO_LIMB_T(0x0dfbd15dc41a594d), + TO_LIMB_T(0x389547f2334a5391), TO_LIMB_T(0x02419f98165871a4) }, + { TO_LIMB_T(0xb416af000745fc20), TO_LIMB_T(0x8e563e9d1ea6d0f5), + TO_LIMB_T(0x7c763e17763a0652), TO_LIMB_T(0x01458ef0159ebbef), + TO_LIMB_T(0x8346fe421f96bb13), TO_LIMB_T(0x0d2d7b829ce324d2) }, + { TO_LIMB_T(0x93096bb538d64615), TO_LIMB_T(0x6f2a2619951d823a), + TO_LIMB_T(0x8f66b3ea59514fa4), TO_LIMB_T(0xf563e63704f7092f), + TO_LIMB_T(0x724b136c4cf2d9fa), TO_LIMB_T(0x046959cfcfd0bf49) }, + { TO_LIMB_T(0xea748d4b6e405346), TO_LIMB_T(0x91e9079c2c02d58f), + TO_LIMB_T(0x41064965946d9b59), TO_LIMB_T(0xa06731f1d2bbe1ee), + TO_LIMB_T(0x07f897e267a33f1b), TO_LIMB_T(0x1017290919210e5f) }, + { TO_LIMB_T(0x872aa6c17d985097), TO_LIMB_T(0xeecc53161264562a), + TO_LIMB_T(0x07afe37afff55002), TO_LIMB_T(0x54759078e5be6838), + TO_LIMB_T(0xc4b92d15db8acca8), TO_LIMB_T(0x106d87d1b51d13b9) } + }; + /* ... + * y_den = x'^15 + k_(4,14) * x'^14 + k_(4,13) * x'^13 + ... + k_(4,0) + */ + static const vec384 isogeny_map_y_den[] = { /* (k_(4,*)<<384) % P */ + { TO_LIMB_T(0xeb6c359d47e52b1c), TO_LIMB_T(0x18ef5f8a10634d60), + TO_LIMB_T(0xddfa71a0889d5b7e), TO_LIMB_T(0x723e71dcc5fc1323), + TO_LIMB_T(0x52f45700b70d5c69), TO_LIMB_T(0x0a8b981ee47691f1) }, + { TO_LIMB_T(0x616a3c4f5535b9fb), TO_LIMB_T(0x6f5f037395dbd911), + TO_LIMB_T(0xf25f4cc5e35c65da), TO_LIMB_T(0x3e50dffea3c62658), + TO_LIMB_T(0x6a33dca523560776), TO_LIMB_T(0x0fadeff77b6bfe3e) }, + { TO_LIMB_T(0x2be9b66df470059c), TO_LIMB_T(0x24a2c159a3d36742), + TO_LIMB_T(0x115dbe7ad10c2a37), TO_LIMB_T(0xb6634a652ee5884d), + TO_LIMB_T(0x04fe8bb2b8d81af4), TO_LIMB_T(0x01c2a7a256fe9c41) }, + { TO_LIMB_T(0xf27bf8ef3b75a386), TO_LIMB_T(0x898b367476c9073f), + TO_LIMB_T(0x24482e6b8c2f4e5f), TO_LIMB_T(0xc8e0bbd6fe110806), + TO_LIMB_T(0x59b0c17f7631448a), TO_LIMB_T(0x11037cd58b3dbfbd) }, + { TO_LIMB_T(0x31c7912ea267eec6), TO_LIMB_T(0x1dbf6f1c5fcdb700), + TO_LIMB_T(0xd30d4fe3ba86fdb1), TO_LIMB_T(0x3cae528fbee9a2a4), + TO_LIMB_T(0xb1cce69b6aa9ad9a), TO_LIMB_T(0x044393bb632d94fb) }, + { TO_LIMB_T(0xc66ef6efeeb5c7e8), TO_LIMB_T(0x9824c289dd72bb55), + TO_LIMB_T(0x71b1a4d2f119981d), TO_LIMB_T(0x104fc1aafb0919cc), + TO_LIMB_T(0x0e49df01d942a628), TO_LIMB_T(0x096c3a09773272d4) }, + { TO_LIMB_T(0x9abc11eb5fadeff4), TO_LIMB_T(0x32dca50a885728f0), + TO_LIMB_T(0xfb1fa3721569734c), TO_LIMB_T(0xc4b76271ea6506b3), + TO_LIMB_T(0xd466a75599ce728e), TO_LIMB_T(0x0c81d4645f4cb6ed) }, + { TO_LIMB_T(0x4199f10e5b8be45b), TO_LIMB_T(0xda64e495b1e87930), + TO_LIMB_T(0xcb353efe9b33e4ff), TO_LIMB_T(0x9e9efb24aa6424c6), + TO_LIMB_T(0xf08d33680a237465), TO_LIMB_T(0x0d3378023e4c7406) }, + { TO_LIMB_T(0x7eb4ae92ec74d3a5), TO_LIMB_T(0xc341b4aa9fac3497), + TO_LIMB_T(0x5be603899e907687), TO_LIMB_T(0x03bfd9cca75cbdeb), + TO_LIMB_T(0x564c2935a96bfa93), TO_LIMB_T(0x0ef3c33371e2fdb5) }, + { TO_LIMB_T(0x7ee91fd449f6ac2e), TO_LIMB_T(0xe5d5bd5cb9357a30), + TO_LIMB_T(0x773a8ca5196b1380), TO_LIMB_T(0xd0fda172174ed023), + TO_LIMB_T(0x6cb95e0fa776aead), TO_LIMB_T(0x0d22d5a40cec7cff) }, + { TO_LIMB_T(0xf727e09285fd8519), TO_LIMB_T(0xdc9d55a83017897b), + TO_LIMB_T(0x7549d8bd057894ae), TO_LIMB_T(0x178419613d90d8f8), + TO_LIMB_T(0xfce95ebdeb5b490a), TO_LIMB_T(0x0467ffaef23fc49e) }, + { TO_LIMB_T(0xc1769e6a7c385f1b), TO_LIMB_T(0x79bc930deac01c03), + TO_LIMB_T(0x5461c75a23ede3b5), TO_LIMB_T(0x6e20829e5c230c45), + TO_LIMB_T(0x828e0f1e772a53cd), TO_LIMB_T(0x116aefa749127bff) }, + { TO_LIMB_T(0x101c10bf2744c10a), TO_LIMB_T(0xbbf18d053a6a3154), + TO_LIMB_T(0xa0ecf39ef026f602), TO_LIMB_T(0xfc009d4996dc5153), + TO_LIMB_T(0xb9000209d5bd08d3), TO_LIMB_T(0x189e5fe4470cd73c) }, + { TO_LIMB_T(0x7ebd546ca1575ed2), TO_LIMB_T(0xe47d5a981d081b55), + TO_LIMB_T(0x57b2b625b6d4ca21), TO_LIMB_T(0xb0a1ba04228520cc), + TO_LIMB_T(0x98738983c2107ff3), TO_LIMB_T(0x13dddbc4799d81d6) }, + { TO_LIMB_T(0x09319f2e39834935), TO_LIMB_T(0x039e952cbdb05c21), + TO_LIMB_T(0x55ba77a9a2f76493), TO_LIMB_T(0xfd04e3dfc6086467), + TO_LIMB_T(0xfb95832e7d78742e), TO_LIMB_T(0x0ef9c24eccaf5e0e) } + }; + vec384 Zz_powers[15], map[15], xn, xd, yn, yd; + + /* lay down Z^2 powers in descending order */ + sqr_fp(Zz_powers[14], p->Z); /* ZZ^1 */ +#ifdef __OPTIMIZE_SIZE__ + for (size_t i = 14; i > 0; i--) + mul_fp(Zz_powers[i-1], Zz_powers[i], Zz_powers[14]); +#else + sqr_fp(Zz_powers[13], Zz_powers[14]); /* ZZ^2 1+1 */ + mul_fp(Zz_powers[12], Zz_powers[14], Zz_powers[13]);/* ZZ^3 2+1 */ + sqr_fp(Zz_powers[11], Zz_powers[13]); /* ZZ^4 2+2 */ + mul_fp(Zz_powers[10], Zz_powers[13], Zz_powers[12]);/* ZZ^5 2+3 */ + sqr_fp(Zz_powers[9], Zz_powers[12]); /* ZZ^6 3+3 */ + mul_fp(Zz_powers[8], Zz_powers[12], Zz_powers[11]);/* ZZ^7 3+4 */ + sqr_fp(Zz_powers[7], Zz_powers[11]); /* ZZ^8 4+4 */ + mul_fp(Zz_powers[6], Zz_powers[11], Zz_powers[10]);/* ZZ^9 4+5 */ + sqr_fp(Zz_powers[5], Zz_powers[10]); /* ZZ^10 5+5 */ + mul_fp(Zz_powers[4], Zz_powers[10], Zz_powers[9]); /* ZZ^11 5+6 */ + sqr_fp(Zz_powers[3], Zz_powers[9]); /* ZZ^12 6+6 */ + mul_fp(Zz_powers[2], Zz_powers[9], Zz_powers[8]); /* ZZ^13 6+7 */ + sqr_fp(Zz_powers[1], Zz_powers[8]); /* ZZ^14 7+7 */ + mul_fp(Zz_powers[0], Zz_powers[8], Zz_powers[7]); /* ZZ^15 7+8 */ +#endif + + map_fp_times_Zz(map, isogeny_map_x_num, Zz_powers + 4, 11); + mul_fp(xn, p->X, isogeny_map_x_num[11]); + add_fp(xn, xn, map[10]); + map_fp(xn, p->X, map, 10); + + map_fp_times_Zz(map, isogeny_map_x_den, Zz_powers + 5, 10); + add_fp(xd, p->X, map[9]); + map_fp(xd, p->X, map, 9); + mul_fp(xd, xd, Zz_powers[14]); /* xd *= Z^2 */ + + map_fp_times_Zz(map, isogeny_map_y_num, Zz_powers, 15); + mul_fp(yn, p->X, isogeny_map_y_num[15]); + add_fp(yn, yn, map[14]); + map_fp(yn, p->X, map, 14); + mul_fp(yn, yn, p->Y); /* yn *= Y */ + + map_fp_times_Zz(map, isogeny_map_y_den, Zz_powers, 15); + add_fp(yd, p->X, map[14]); + map_fp(yd, p->X, map, 14); + mul_fp(Zz_powers[14], Zz_powers[14], p->Z); + mul_fp(yd, yd, Zz_powers[14]); /* yd *= Z^3 */ + + /* convert (xn, xd, yn, yd) to Jacobian coordinates */ + mul_fp(out->Z, xd, yd); /* Z = xd * yd */ + mul_fp(out->X, xn, yd); + mul_fp(out->X, out->X, out->Z); /* X = xn * xd * yd^2 */ + sqr_fp(out->Y, out->Z); + mul_fp(out->Y, out->Y, xd); + mul_fp(out->Y, out->Y, yn); /* Y = yn * xd^3 * yd^2 */ +} + +static void map_to_isogenous_E1(POINTonE1 *p, const vec384 u) +{ + static const vec384 minus_A = { /* P - A */ + TO_LIMB_T(0x8a9955f1650a005a), TO_LIMB_T(0x9865b3d192cfe93c), + TO_LIMB_T(0xaed3ed0f3ef3c441), TO_LIMB_T(0x3c962ef33d92c442), + TO_LIMB_T(0x22e438dbd74f94a2), TO_LIMB_T(0x04acbc265478c915) + }; + static const vec384 Z = { /* (11<<384) % P */ + TO_LIMB_T(0x886c00000023ffdc), TO_LIMB_T(0x0f70008d3090001d), + TO_LIMB_T(0x77672417ed5828c3), TO_LIMB_T(0x9dac23e943dc1740), + TO_LIMB_T(0x50553f1b9c131521), TO_LIMB_T(0x078c712fbe0ab6e8) + }; + static const vec384 sqrt_minus_ZZZ = { + TO_LIMB_T(0x43b571cad3215f1f), TO_LIMB_T(0xccb460ef1c702dc2), + TO_LIMB_T(0x742d884f4f97100b), TO_LIMB_T(0xdb2c3e3238a3382b), + TO_LIMB_T(0xe40f3fa13fce8f88), TO_LIMB_T(0x0073a2af9892a2ff) + }; + static const vec384 ZxA = { + TO_LIMB_T(0x7f674ea0a8915178), TO_LIMB_T(0xb0f945fc13b8fa65), + TO_LIMB_T(0x4b46759a38e87d76), TO_LIMB_T(0x2e7a929641bbb6a1), + TO_LIMB_T(0x1668ddfa462bf6b6), TO_LIMB_T(0x00960e2ed1cf294c) + }; + vec384 uu, tv2, x2n, gx1, gxd, y2; +#if 0 + vec384 xn, x1n, xd, y, y1, Zuu, tv4; +#else +# define xn p->X +# define y p->Y +# define xd p->Z +# define x1n xn +# define y1 y +# define Zuu x2n +# define tv4 y1 +#endif +#define sgn0_fp(a) (sgn0_pty_mont_384((a), BLS12_381_P, p0) & 1) + bool_t e1, e2; + + /* + * as per map_to_curve() from poc/sswu_opt.sage at + * https://github.com/cfrg/draft-irtf-cfrg-hash-to-curve + */ + /* x numerator variants */ + sqr_fp(uu, u); /* uu = u^2 */ + mul_fp(Zuu, Z, uu); /* Zuu = Z * uu */ + sqr_fp(tv2, Zuu); /* tv2 = Zuu^2 */ + add_fp(tv2, tv2, Zuu); /* tv2 = tv2 + Zuu */ + add_fp(x1n, tv2, BLS12_381_Rx.p); /* x1n = tv2 + 1 */ + mul_fp(x1n, x1n, Bprime_E1); /* x1n = x1n * B */ + mul_fp(x2n, Zuu, x1n); /* x2n = Zuu * x1n */ + + /* x denumenator */ + mul_fp(xd, minus_A, tv2); /* xd = -A * tv2 */ + e1 = vec_is_zero(xd, sizeof(xd)); /* e1 = xd == 0 */ + vec_select(xd, ZxA, xd, sizeof(xd), e1); /* # If xd == 0, set xd = Z*A */ + + /* y numerators variants */ + sqr_fp(tv2, xd); /* tv2 = xd^2 */ + mul_fp(gxd, xd, tv2); /* gxd = xd^3 */ + mul_fp(tv2, Aprime_E1, tv2); /* tv2 = A * tv2 */ + sqr_fp(gx1, x1n); /* gx1 = x1n^2 */ + add_fp(gx1, gx1, tv2); /* gx1 = gx1 + tv2 # x1n^2 + A*xd^2 */ + mul_fp(gx1, gx1, x1n); /* gx1 = gx1 * x1n # x1n^3 + A*x1n*xd^2 */ + mul_fp(tv2, Bprime_E1, gxd); /* tv2 = B * gxd */ + add_fp(gx1, gx1, tv2); /* gx1 = gx1 + tv2 # x1^3 + A*x1*xd^2 + B*xd^3 */ + sqr_fp(tv4, gxd); /* tv4 = gxd^2 */ + mul_fp(tv2, gx1, gxd); /* tv2 = gx1 * gxd */ + mul_fp(tv4, tv4, tv2); /* tv4 = tv4 * tv2 # gx1*gxd^3 */ + e2 = recip_sqrt_fp(y1, tv4); /* y1 = tv4^c1 # (gx1*gxd^3)^((p-3)/4) */ + mul_fp(y1, y1, tv2); /* y1 = y1 * tv2 # gx1*gxd*y1 */ + mul_fp(y2, y1, sqrt_minus_ZZZ); /* y2 = y1 * c2 # y2 = y1*sqrt(-Z^3) */ + mul_fp(y2, y2, uu); /* y2 = y2 * uu */ + mul_fp(y2, y2, u); /* y2 = y2 * u */ + + /* choose numerators */ + vec_select(xn, x1n, x2n, sizeof(xn), e2); /* xn = e2 ? x1n : x2n */ + vec_select(y, y1, y2, sizeof(y), e2); /* y = e2 ? y1 : y2 */ + + e1 = sgn0_fp(u); + e2 = sgn0_fp(y); + cneg_fp(y, y, e1^e2); /* fix sign of y */ + /* return (xn, xd, y, 1) */ + + /* convert (xn, xd, y, 1) to Jacobian projective coordinates */ + mul_fp(p->X, xn, xd); /* X = xn * xd */ + mul_fp(p->Y, y, gxd); /* Y = y * xd^3 */ +#ifndef xd + vec_copy(p->Z, xd, sizeof(xd)); /* Z = xd */ +#else +# undef xn +# undef y +# undef xd +# undef x1n +# undef y1 +# undef Zuu +# undef tv4 +#endif +#undef sgn0_fp +} + +static void POINTonE1_add_n_dbl(POINTonE1 *out, const POINTonE1 *p, size_t n) +{ + POINTonE1_dadd(out, out, p, NULL); + while(n--) + POINTonE1_double(out, out); +} + +static void POINTonE1_times_minus_z(POINTonE1 *out, const POINTonE1 *in) +{ + POINTonE1_double(out, in); /* 1: 0x2 */ + POINTonE1_add_n_dbl(out, in, 2); /* 2..4: 0x3..0xc */ + POINTonE1_add_n_dbl(out, in, 3); /* 5..8: 0xd..0x68 */ + POINTonE1_add_n_dbl(out, in, 9); /* 9..18: 0x69..0xd200 */ + POINTonE1_add_n_dbl(out, in, 32); /* 19..51: ..0xd20100000000 */ + POINTonE1_add_n_dbl(out, in, 16); /* 52..68: ..0xd201000000010000 */ +} + +/* + * |u|, |v| are expected to be in Montgomery representation + */ +static void map_to_g1(POINTonE1 *out, const vec384 u, const vec384 v) +{ + POINTonE1 p; + + map_to_isogenous_E1(&p, u); + + if (v != NULL) { + map_to_isogenous_E1(out, v); /* borrow |out| */ + POINTonE1_dadd(&p, &p, out, Aprime_E1); + } + + isogeny_map_to_E1(&p, &p); /* sprinkle isogenous powder */ + + /* clear the cofactor by multiplying |p| by 1-z, 0xd201000000010001 */ + POINTonE1_times_minus_z(out, &p); + POINTonE1_dadd(out, out, &p, NULL); +} + +void blst_map_to_g1(POINTonE1 *out, const vec384 u, const vec384 v) +{ map_to_g1(out, u, v); } + +static void Encode_to_G1(POINTonE1 *p, const unsigned char *msg, size_t msg_len, + const unsigned char *DST, size_t DST_len, + const unsigned char *aug, size_t aug_len) +{ + vec384 u[1]; + + hash_to_field(u, 1, aug, aug_len, msg, msg_len, DST, DST_len); + map_to_g1(p, u[0], NULL); +} + +void blst_encode_to_g1(POINTonE1 *p, const unsigned char *msg, size_t msg_len, + const unsigned char *DST, size_t DST_len, + const unsigned char *aug, size_t aug_len) +{ Encode_to_G1(p, msg, msg_len, DST, DST_len, aug, aug_len); } + +static void Hash_to_G1(POINTonE1 *p, const unsigned char *msg, size_t msg_len, + const unsigned char *DST, size_t DST_len, + const unsigned char *aug, size_t aug_len) +{ + vec384 u[2]; + + hash_to_field(u, 2, aug, aug_len, msg, msg_len, DST, DST_len); + map_to_g1(p, u[0], u[1]); +} + +void blst_hash_to_g1(POINTonE1 *p, const unsigned char *msg, size_t msg_len, + const unsigned char *DST, size_t DST_len, + const unsigned char *aug, size_t aug_len) +{ Hash_to_G1(p, msg, msg_len, DST, DST_len, aug, aug_len); } + +static void sigma(POINTonE1 *out, const POINTonE1 *in); + +#if 0 +#ifdef __OPTIMIZE_SIZE__ +static void POINTonE1_times_zz_minus_1_div_by_3(POINTonE1 *out, + const POINTonE1 *in) +{ + static const byte zz_minus_1_div_by_3[] = { + TO_BYTES(0x0000000055555555ULL), TO_BYTES(0x396c8c005555e156) + }; + size_t n = 126-1; + const POINTonE1 *dblin = in; + + while(n--) { + POINTonE1_double(out, dblin); dblin = out; + if (is_bit_set(zz_minus_1_div_by_3, n)) + POINTonE1_dadd(out, out, in, NULL); + } +} +#else +static void POINTonE1_dbl_n_add(POINTonE1 *out, size_t n, const POINTonE1 *p) +{ + while(n--) + POINTonE1_double(out, out); + POINTonE1_dadd(out, out, p, NULL); +} + +static void POINTonE1_times_zz_minus_1_div_by_3(POINTonE1 *out, + const POINTonE1 *in) +{ + POINTonE1 t3, t5, t7, t11, t85; + + POINTonE1_double(&t7, in); /* 2P */ + POINTonE1_dadd(&t3, &t7, in, NULL); /* 3P */ + POINTonE1_dadd(&t5, &t3, &t7, NULL); /* 5P */ + POINTonE1_dadd(&t7, &t5, &t7, NULL); /* 7P */ + POINTonE1_double(&t85, &t5); /* 10P */ + POINTonE1_dadd(&t11, &t85, in, NULL); /* 11P */ + POINTonE1_dbl_n_add(&t85, 3, &t5); /* 0x55P */ + /* (-0xd201000000010000^2 - 1) / 3 */ + POINTonE1_double(out, &t7); /* 0xe */ + POINTonE1_dbl_n_add(out, 5, &t11); /* 0x1cb */ + POINTonE1_dbl_n_add(out, 3, &t3); /* 0xe5b */ + POINTonE1_dbl_n_add(out, 3, in); /* 0x72d9 */ + POINTonE1_dbl_n_add(out, 5, &t3); /* 0xe5b23 */ + POINTonE1_dbl_n_add(out, 18, &t85); /* 0x396c8c0055 */ + POINTonE1_dbl_n_add(out, 8, &t85); /* 0x396c8c005555 */ + POINTonE1_dbl_n_add(out, 3, &t7); /* 0x1cb646002aaaf */ + POINTonE1_dbl_n_add(out, 7, &t5); /* 0xe5b23001555785 */ + POINTonE1_dbl_n_add(out, 5, &t11); /* 0x1cb646002aaaf0ab */ + POINTonE1_dbl_n_add(out, 41, &t85); /* 0x396c8c005555e1560000000055 */ + POINTonE1_dbl_n_add(out, 8, &t85); /* 0x396c8c005555e156000000005555 */ + POINTonE1_dbl_n_add(out, 8, &t85); /* 0x396c8c005555e15600000000555555 */ + POINTonE1_dbl_n_add(out, 8, &t85); /* 0x396c8c005555e1560000000055555555 */ +} +#endif + +static bool_t POINTonE1_in_G1(const POINTonE1 *P) +{ + POINTonE1 t0, t1, t2; + + /* Bowe, S., "Faster subgroup checks for BLS12-381" */ + sigma(&t0, P); /* σ(P) */ + sigma(&t1, &t0); /* σ²(P) */ + + POINTonE1_double(&t0, &t0); /* 2σ(P) */ + POINTonE1_dadd(&t2, &t1, P, NULL); /* P + σ²(P) */ + POINTonE1_cneg(&t2, 1); /* - P - σ²(P) */ + POINTonE1_dadd(&t2, &t2, &t0, NULL); /* 2σ(P) - P - σ²(P) */ + POINTonE1_times_zz_minus_1_div_by_3( &t0, &t2); + POINTonE1_cneg(&t1, 1); + POINTonE1_dadd(&t0, &t0, &t1, NULL); /* [(z²-1)/3](2σ(P) - P - σ²(P)) */ + /* - σ²(P) */ + return vec_is_zero(t0.Z, sizeof(t0.Z)); +} +#else +static bool_t POINTonE1_in_G1(const POINTonE1 *P) +{ + POINTonE1 t0, t1; + + /* Scott, M., https://eprint.iacr.org/2021/1130 */ + POINTonE1_times_minus_z(&t0, P); + POINTonE1_times_minus_z(&t1, &t0); + POINTonE1_cneg(&t1, 1); /* [-z²]P */ + + sigma(&t0, P); /* σ(P) */ + sigma(&t0, &t0); /* σ²(P) */ + + return POINTonE1_is_equal(&t0, &t1); +} +#endif + +int blst_p1_in_g1(const POINTonE1 *p) +{ return (int)POINTonE1_in_G1(p); } + +int blst_p1_affine_in_g1(const POINTonE1_affine *p) +{ + POINTonE1 P; + + vec_copy(P.X, p->X, 2*sizeof(P.X)); + vec_select(P.Z, p->X, BLS12_381_Rx.p, sizeof(P.Z), + vec_is_zero(p, sizeof(*p))); + + return (int)POINTonE1_in_G1(&P); +} diff --git a/crypto/blst_src/map_to_g2.c b/crypto/blst_src/map_to_g2.c new file mode 100644 index 00000000000..90fd86e9d31 --- /dev/null +++ b/crypto/blst_src/map_to_g2.c @@ -0,0 +1,444 @@ +/* + * Copyright Supranational LLC + * Licensed under the Apache License, Version 2.0, see LICENSE for details. + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "point.h" +#include "fields.h" + +/* + * y^2 = x^3 + A'*x + B', isogenous one + */ +static const vec384x Aprime_E2 = { /* 240*i */ + { 0 }, + { TO_LIMB_T(0xe53a000003135242), TO_LIMB_T(0x01080c0fdef80285), + TO_LIMB_T(0xe7889edbe340f6bd), TO_LIMB_T(0x0b51375126310601), + TO_LIMB_T(0x02d6985717c744ab), TO_LIMB_T(0x1220b4e979ea5467) } +}; +static const vec384x Bprime_E2 = { /* 1012 + 1012*i */ + { TO_LIMB_T(0x22ea00000cf89db2), TO_LIMB_T(0x6ec832df71380aa4), + TO_LIMB_T(0x6e1b94403db5a66e), TO_LIMB_T(0x75bf3c53a79473ba), + TO_LIMB_T(0x3dd3a569412c0a34), TO_LIMB_T(0x125cdb5e74dc4fd1) }, + { TO_LIMB_T(0x22ea00000cf89db2), TO_LIMB_T(0x6ec832df71380aa4), + TO_LIMB_T(0x6e1b94403db5a66e), TO_LIMB_T(0x75bf3c53a79473ba), + TO_LIMB_T(0x3dd3a569412c0a34), TO_LIMB_T(0x125cdb5e74dc4fd1) } +}; + +static void map_fp2_times_Zz(vec384x map[], const vec384x isogeny_map[], + const vec384x Zz_powers[], size_t n) +{ + while (n--) + mul_fp2(map[n], isogeny_map[n], Zz_powers[n]); +} + +static void map_fp2(vec384x acc, const vec384x x, const vec384x map[], size_t n) +{ + while (n--) { + mul_fp2(acc, acc, x); + add_fp2(acc, acc, map[n]); + } +} + +static void isogeny_map_to_E2(POINTonE2 *out, const POINTonE2 *p) +{ + /* + * x = x_num / x_den, where + * x_num = k_(1,3) * x'^3 + k_(1,2) * x'^2 + k_(1,1) * x' + k_(1,0) + * ... + */ + static const vec384x isogeny_map_x_num[] = { /* (k_(1,*)<<384) % P */ + {{ TO_LIMB_T(0x47f671c71ce05e62), TO_LIMB_T(0x06dd57071206393e), + TO_LIMB_T(0x7c80cd2af3fd71a2), TO_LIMB_T(0x048103ea9e6cd062), + TO_LIMB_T(0xc54516acc8d037f6), TO_LIMB_T(0x13808f550920ea41) }, + { TO_LIMB_T(0x47f671c71ce05e62), TO_LIMB_T(0x06dd57071206393e), + TO_LIMB_T(0x7c80cd2af3fd71a2), TO_LIMB_T(0x048103ea9e6cd062), + TO_LIMB_T(0xc54516acc8d037f6), TO_LIMB_T(0x13808f550920ea41) }}, + {{ 0 }, + { TO_LIMB_T(0x5fe55555554c71d0), TO_LIMB_T(0x873fffdd236aaaa3), + TO_LIMB_T(0x6a6b4619b26ef918), TO_LIMB_T(0x21c2888408874945), + TO_LIMB_T(0x2836cda7028cabc5), TO_LIMB_T(0x0ac73310a7fd5abd) }}, + {{ TO_LIMB_T(0x0a0c5555555971c3), TO_LIMB_T(0xdb0c00101f9eaaae), + TO_LIMB_T(0xb1fb2f941d797997), TO_LIMB_T(0xd3960742ef416e1c), + TO_LIMB_T(0xb70040e2c20556f4), TO_LIMB_T(0x149d7861e581393b) }, + { TO_LIMB_T(0xaff2aaaaaaa638e8), TO_LIMB_T(0x439fffee91b55551), + TO_LIMB_T(0xb535a30cd9377c8c), TO_LIMB_T(0x90e144420443a4a2), + TO_LIMB_T(0x941b66d3814655e2), TO_LIMB_T(0x0563998853fead5e) }}, + {{ TO_LIMB_T(0x40aac71c71c725ed), TO_LIMB_T(0x190955557a84e38e), + TO_LIMB_T(0xd817050a8f41abc3), TO_LIMB_T(0xd86485d4c87f6fb1), + TO_LIMB_T(0x696eb479f885d059), TO_LIMB_T(0x198e1a74328002d2) }, + { 0 }} + }; + /* ... + * x_den = x'^2 + k_(2,1) * x' + k_(2,0) + */ + static const vec384x isogeny_map_x_den[] = { /* (k_(2,*)<<384) % P */ + {{ 0 }, + { TO_LIMB_T(0x1f3affffff13ab97), TO_LIMB_T(0xf25bfc611da3ff3e), + TO_LIMB_T(0xca3757cb3819b208), TO_LIMB_T(0x3e6427366f8cec18), + TO_LIMB_T(0x03977bc86095b089), TO_LIMB_T(0x04f69db13f39a952) }}, + {{ TO_LIMB_T(0x447600000027552e), TO_LIMB_T(0xdcb8009a43480020), + TO_LIMB_T(0x6f7ee9ce4a6e8b59), TO_LIMB_T(0xb10330b7c0a95bc6), + TO_LIMB_T(0x6140b1fcfb1e54b7), TO_LIMB_T(0x0381be097f0bb4e1) }, + { TO_LIMB_T(0x7588ffffffd8557d), TO_LIMB_T(0x41f3ff646e0bffdf), + TO_LIMB_T(0xf7b1e8d2ac426aca), TO_LIMB_T(0xb3741acd32dbb6f8), + TO_LIMB_T(0xe9daf5b9482d581f), TO_LIMB_T(0x167f53e0ba7431b8) }} + }; + /* + * y = y' * y_num / y_den, where + * y_num = k_(3,3) * x'^3 + k_(3,2) * x'^2 + k_(3,1) * x' + k_(3,0) + * ... + */ + static const vec384x isogeny_map_y_num[] = { /* (k_(3,*)<<384) % P */ + {{ TO_LIMB_T(0x96d8f684bdfc77be), TO_LIMB_T(0xb530e4f43b66d0e2), + TO_LIMB_T(0x184a88ff379652fd), TO_LIMB_T(0x57cb23ecfae804e1), + TO_LIMB_T(0x0fd2e39eada3eba9), TO_LIMB_T(0x08c8055e31c5d5c3) }, + { TO_LIMB_T(0x96d8f684bdfc77be), TO_LIMB_T(0xb530e4f43b66d0e2), + TO_LIMB_T(0x184a88ff379652fd), TO_LIMB_T(0x57cb23ecfae804e1), + TO_LIMB_T(0x0fd2e39eada3eba9), TO_LIMB_T(0x08c8055e31c5d5c3) }}, + {{ 0 }, + { TO_LIMB_T(0xbf0a71c71c91b406), TO_LIMB_T(0x4d6d55d28b7638fd), + TO_LIMB_T(0x9d82f98e5f205aee), TO_LIMB_T(0xa27aa27b1d1a18d5), + TO_LIMB_T(0x02c3b2b2d2938e86), TO_LIMB_T(0x0c7d13420b09807f) }}, + {{ TO_LIMB_T(0xd7f9555555531c74), TO_LIMB_T(0x21cffff748daaaa8), + TO_LIMB_T(0x5a9ad1866c9bbe46), TO_LIMB_T(0x4870a2210221d251), + TO_LIMB_T(0x4a0db369c0a32af1), TO_LIMB_T(0x02b1ccc429ff56af) }, + { TO_LIMB_T(0xe205aaaaaaac8e37), TO_LIMB_T(0xfcdc000768795556), + TO_LIMB_T(0x0c96011a8a1537dd), TO_LIMB_T(0x1c06a963f163406e), + TO_LIMB_T(0x010df44c82a881e6), TO_LIMB_T(0x174f45260f808feb) }}, + {{ TO_LIMB_T(0xa470bda12f67f35c), TO_LIMB_T(0xc0fe38e23327b425), + TO_LIMB_T(0xc9d3d0f2c6f0678d), TO_LIMB_T(0x1c55c9935b5a982e), + TO_LIMB_T(0x27f6c0e2f0746764), TO_LIMB_T(0x117c5e6e28aa9054) }, + { 0 }} + }; + /* ... + * y_den = x'^3 + k_(4,2) * x'^2 + k_(4,1) * x' + k_(4,0) + */ + static const vec384x isogeny_map_y_den[] = { /* (k_(4,*)<<384) % P */ + {{ TO_LIMB_T(0x0162fffffa765adf), TO_LIMB_T(0x8f7bea480083fb75), + TO_LIMB_T(0x561b3c2259e93611), TO_LIMB_T(0x11e19fc1a9c875d5), + TO_LIMB_T(0xca713efc00367660), TO_LIMB_T(0x03c6a03d41da1151) }, + { TO_LIMB_T(0x0162fffffa765adf), TO_LIMB_T(0x8f7bea480083fb75), + TO_LIMB_T(0x561b3c2259e93611), TO_LIMB_T(0x11e19fc1a9c875d5), + TO_LIMB_T(0xca713efc00367660), TO_LIMB_T(0x03c6a03d41da1151) }}, + {{ 0 }, + { TO_LIMB_T(0x5db0fffffd3b02c5), TO_LIMB_T(0xd713f52358ebfdba), + TO_LIMB_T(0x5ea60761a84d161a), TO_LIMB_T(0xbb2c75a34ea6c44a), + TO_LIMB_T(0x0ac6735921c1119b), TO_LIMB_T(0x0ee3d913bdacfbf6) }}, + {{ TO_LIMB_T(0x66b10000003affc5), TO_LIMB_T(0xcb1400e764ec0030), + TO_LIMB_T(0xa73e5eb56fa5d106), TO_LIMB_T(0x8984c913a0fe09a9), + TO_LIMB_T(0x11e10afb78ad7f13), TO_LIMB_T(0x05429d0e3e918f52) }, + { TO_LIMB_T(0x534dffffffc4aae6), TO_LIMB_T(0x5397ff174c67ffcf), + TO_LIMB_T(0xbff273eb870b251d), TO_LIMB_T(0xdaf2827152870915), + TO_LIMB_T(0x393a9cbaca9e2dc3), TO_LIMB_T(0x14be74dbfaee5748) }} + }; + vec384x Zz_powers[3], map[3], xn, xd, yn, yd; + + /* lay down Z^2 powers in descending order */ + sqr_fp2(Zz_powers[2], p->Z); /* ZZ^1 */ + sqr_fp2(Zz_powers[1], Zz_powers[2]); /* ZZ^2 1+1 */ + mul_fp2(Zz_powers[0], Zz_powers[2], Zz_powers[1]); /* ZZ^3 2+1 */ + + map_fp2_times_Zz(map, isogeny_map_x_num, Zz_powers, 3); + mul_fp2(xn, p->X, isogeny_map_x_num[3]); + add_fp2(xn, xn, map[2]); + map_fp2(xn, p->X, map, 2); + + map_fp2_times_Zz(map, isogeny_map_x_den, Zz_powers + 1, 2); + add_fp2(xd, p->X, map[1]); + map_fp2(xd, p->X, map, 1); + mul_fp2(xd, xd, Zz_powers[2]); /* xd *= Z^2 */ + + map_fp2_times_Zz(map, isogeny_map_y_num, Zz_powers, 3); + mul_fp2(yn, p->X, isogeny_map_y_num[3]); + add_fp2(yn, yn, map[2]); + map_fp2(yn, p->X, map, 2); + mul_fp2(yn, yn, p->Y); /* yn *= Y */ + + map_fp2_times_Zz(map, isogeny_map_y_den, Zz_powers, 3); + add_fp2(yd, p->X, map[2]); + map_fp2(yd, p->X, map, 2); + mul_fp2(Zz_powers[2], Zz_powers[2], p->Z); + mul_fp2(yd, yd, Zz_powers[2]); /* yd *= Z^3 */ + + /* convert (xn, xd, yn, yd) to Jacobian coordinates */ + mul_fp2(out->Z, xd, yd); /* Z = xd * yd */ + mul_fp2(out->X, xn, yd); + mul_fp2(out->X, out->X, out->Z); /* X = xn * xd * yd^2 */ + sqr_fp2(out->Y, out->Z); + mul_fp2(out->Y, out->Y, xd); + mul_fp2(out->Y, out->Y, yn); /* Y = yn * xd^3 * yd^2 */ +} + +static void map_to_isogenous_E2(POINTonE2 *p, const vec384x u) +{ + static const vec384x minus_A = { + { 0 }, + { TO_LIMB_T(0xd4c4fffffcec5869), TO_LIMB_T(0x1da3f3eed25bfd79), + TO_LIMB_T(0x7fa833c5136fff67), TO_LIMB_T(0x59261433cd540cbd), + TO_LIMB_T(0x48450f5f2b84682c), TO_LIMB_T(0x07e05d00bf959233) } + }; + static const vec384x Z = { /* -2 - i */ + { TO_LIMB_T(0x87ebfffffff9555c), TO_LIMB_T(0x656fffe5da8ffffa), + TO_LIMB_T(0x0fd0749345d33ad2), TO_LIMB_T(0xd951e663066576f4), + TO_LIMB_T(0xde291a3d41e980d3), TO_LIMB_T(0x0815664c7dfe040d) }, + { TO_LIMB_T(0x43f5fffffffcaaae), TO_LIMB_T(0x32b7fff2ed47fffd), + TO_LIMB_T(0x07e83a49a2e99d69), TO_LIMB_T(0xeca8f3318332bb7a), + TO_LIMB_T(0xef148d1ea0f4c069), TO_LIMB_T(0x040ab3263eff0206) } + }; + static const vec384x recip_ZZZ = { /* 1/(Z^3) */ + { TO_LIMB_T(0x65018f5c28f598eb), TO_LIMB_T(0xe6020417f022d916), + TO_LIMB_T(0xd6327313288369c7), TO_LIMB_T(0x622ded8eb447156f), + TO_LIMB_T(0xe52a2aee72c2a01f), TO_LIMB_T(0x089812fb8481ffe4) }, + { TO_LIMB_T(0x2574eb851eb8619f), TO_LIMB_T(0xdba2e97912925604), + TO_LIMB_T(0x67e495a909e7a18e), TO_LIMB_T(0xdf2da23b8145b8f7), + TO_LIMB_T(0xcf5d3728310ebf6d), TO_LIMB_T(0x11be446236f4c116) } + }; + static const vec384x magic_ZZZ = { /* 1/Z^3 = a + b*i */ + /* a^2 + b^2 */ + { TO_LIMB_T(0xaa7eb851eb8508e0), TO_LIMB_T(0x1c54fdf360989374), + TO_LIMB_T(0xc87f2fc6e716c62e), TO_LIMB_T(0x0124aefb1f9efea7), + TO_LIMB_T(0xb2f8be63e844865c), TO_LIMB_T(0x08b47f775a7ef35a) }, + /* (a^2 + b^2)^((P-3)/4) */ + { TO_LIMB_T(0xe4132bbd838cf70a), TO_LIMB_T(0x01d769ac83772c19), + TO_LIMB_T(0xa83dd6e974c22e45), TO_LIMB_T(0xbc8ec3e777b08dff), + TO_LIMB_T(0xc035c2042ecf5da3), TO_LIMB_T(0x073929e97f0850bf) } + }; + static const vec384x ZxA = { /* 240 - 480*i */ + { TO_LIMB_T(0xe53a000003135242), TO_LIMB_T(0x01080c0fdef80285), + TO_LIMB_T(0xe7889edbe340f6bd), TO_LIMB_T(0x0b51375126310601), + TO_LIMB_T(0x02d6985717c744ab), TO_LIMB_T(0x1220b4e979ea5467) }, + { TO_LIMB_T(0xa989fffff9d8b0d2), TO_LIMB_T(0x3b47e7dda4b7faf3), + TO_LIMB_T(0xff50678a26dffece), TO_LIMB_T(0xb24c28679aa8197a), + TO_LIMB_T(0x908a1ebe5708d058), TO_LIMB_T(0x0fc0ba017f2b2466) } + }; + vec384x uu, tv2, tv4, x2n, gx1, gxd, y2; +#if 0 + vec384x xn, x1n, xd, y, y1, Zuu; +#else +# define xn p->X +# define y p->Y +# define xd p->Z +# define x1n xn +# define y1 y +# define Zuu x2n +#endif +#define sgn0_fp2(a) (sgn0_pty_mont_384x((a), BLS12_381_P, p0) & 1) + bool_t e1, e2; + + /* + * as per map_to_curve() from poc/sswu_opt.sage at + * https://github.com/cfrg/draft-irtf-cfrg-hash-to-curve + * with 9mod16 twists... + */ + /* x numerator variants */ + sqr_fp2(uu, u); /* uu = u^2 */ + mul_fp2(Zuu, Z, uu); /* Zuu = Z * uu */ + sqr_fp2(tv2, Zuu); /* tv2 = Zuu^2 */ + add_fp2(tv2, tv2, Zuu); /* tv2 = tv2 + Zuu */ + add_fp2(x1n, tv2, BLS12_381_Rx.p2); /* x1n = tv2 + 1 */ + mul_fp2(x1n, x1n, Bprime_E2); /* x1n = x1n * B */ + mul_fp2(x2n, Zuu, x1n); /* x2n = Zuu * x1n */ + + /* x denumenator */ + mul_fp2(xd, minus_A, tv2); /* xd = -A * tv2 */ + e1 = vec_is_zero(xd, sizeof(xd)); /* e1 = xd == 0 */ + vec_select(xd, ZxA, xd, sizeof(xd), e1); /* # If xd == 0, set xd = Z*A */ + + /* y numerators variants */ + sqr_fp2(tv2, xd); /* tv2 = xd^2 */ + mul_fp2(gxd, xd, tv2); /* gxd = xd^3 */ + mul_fp2(tv2, Aprime_E2, tv2); /* tv2 = A * tv2 */ + sqr_fp2(gx1, x1n); /* gx1 = x1n^2 */ + add_fp2(gx1, gx1, tv2); /* gx1 = gx1 + tv2 # x1n^2 + A*xd^2 */ + mul_fp2(gx1, gx1, x1n); /* gx1 = gx1 * x1n # x1n^3 + A*x1n*xd^2 */ + mul_fp2(tv2, Bprime_E2, gxd); /* tv2 = B * gxd */ + add_fp2(gx1, gx1, tv2); /* gx1 = gx1 + tv2 # x1^3 + A*x1*xd^2 + B*xd^3 */ + sqr_fp2(tv4, gxd); /* tv4 = gxd^2 */ + mul_fp2(tv2, gx1, gxd); /* tv2 = gx1 * gxd */ + mul_fp2(tv4, tv4, tv2); /* tv4 = tv4 * tv2 # gx1*gxd^3 */ + e2 = recip_sqrt_fp2(y1, tv4, /* y1 = tv4^c1 # (gx1*gxd^3)^((p^2-9)/16) */ + recip_ZZZ, magic_ZZZ); + mul_fp2(y1, y1, tv2); /* y1 = y1 * tv2 # gx1*gxd*y1 */ + mul_fp2(y2, y1, uu); /* y2 = y1 * uu */ + mul_fp2(y2, y2, u); /* y2 = y2 * u */ + + /* choose numerators */ + vec_select(xn, x1n, x2n, sizeof(xn), e2); /* xn = e2 ? x1n : x2n */ + vec_select(y, y1, y2, sizeof(y), e2); /* y = e2 ? y1 : y2 */ + + e1 = sgn0_fp2(u); + e2 = sgn0_fp2(y); + cneg_fp2(y, y, e1^e2); /* fix sign of y */ + /* return (xn, xd, y, 1) */ + + /* convert (xn, xd, y, 1) to Jacobian projective coordinates */ + mul_fp2(p->X, xn, xd); /* X = xn * xd */ + mul_fp2(p->Y, y, gxd); /* Y = y * xd^3 */ +#ifndef xd + vec_copy(p->Z, xd, sizeof(xd)); /* Z = xd */ +#else +# undef xn +# undef y +# undef xd +# undef x1n +# undef y1 +# undef Zuu +# undef tv4 +#endif +#undef sgn0_fp2 +} + +#if 0 +static const byte h_eff[] = { + TO_BYTES(0xe8020005aaa95551), TO_BYTES(0x59894c0adebbf6b4), + TO_BYTES(0xe954cbc06689f6a3), TO_BYTES(0x2ec0ec69d7477c1a), + TO_BYTES(0x6d82bf015d1212b0), TO_BYTES(0x329c2f178731db95), + TO_BYTES(0x9986ff031508ffe1), TO_BYTES(0x88e2a8e9145ad768), + TO_BYTES(0x584c6a0ea91b3528), TO_BYTES(0x0bc69f08f2ee75b3) +}; + +static void clear_cofactor(POINTonE2 *out, const POINTonE2 *p) +{ POINTonE2_mult_w5(out, p, h_eff, 636); } +#else +/* + * As per suggestions in "7. Clearing the cofactor" at + * https://tools.ietf.org/html/draft-irtf-cfrg-hash-to-curve-06 + */ +static void POINTonE2_add_n_dbl(POINTonE2 *out, const POINTonE2 *p, size_t n) +{ + POINTonE2_dadd(out, out, p, NULL); + while(n--) + POINTonE2_double(out, out); +} + +static void POINTonE2_times_minus_z(POINTonE2 *out, const POINTonE2 *in) +{ + POINTonE2_double(out, in); /* 1: 0x2 */ + POINTonE2_add_n_dbl(out, in, 2); /* 2..4: 0x3..0xc */ + POINTonE2_add_n_dbl(out, in, 3); /* 5..8: 0xd..0x68 */ + POINTonE2_add_n_dbl(out, in, 9); /* 9..18: 0x69..0xd200 */ + POINTonE2_add_n_dbl(out, in, 32); /* 19..51: ..0xd20100000000 */ + POINTonE2_add_n_dbl(out, in, 16); /* 52..68: ..0xd201000000010000 */ +} + +static void psi(POINTonE2 *out, const POINTonE2 *in); + +static void clear_cofactor(POINTonE2 *out, const POINTonE2 *p) +{ + POINTonE2 t0, t1; + + /* A.Budroni, F.Pintore, "Efficient hash maps to G2 on BLS curves" */ + POINTonE2_double(out, p); /* out = 2P */ + psi(out, out); /* out = Ψ(2P) */ + psi(out, out); /* out = Ψ²(2P) */ + + vec_copy(&t0, p, sizeof(t0)); + POINTonE2_cneg(&t0, 1); /* t0 = -P */ + psi(&t1, &t0); /* t1 = -Ψ(P) */ + POINTonE2_dadd(out, out, &t0, NULL);/* out = Ψ²(2P) - P */ + POINTonE2_dadd(out, out, &t1, NULL);/* out = Ψ²(2P) - P - Ψ(P) */ + + POINTonE2_times_minus_z(&t0, p); /* t0 = [-z]P */ + POINTonE2_dadd(&t0, &t0, p, NULL); /* t0 = [-z + 1]P */ + POINTonE2_dadd(&t0, &t0, &t1, NULL);/* t0 = [-z + 1]P - Ψ(P) */ + POINTonE2_times_minus_z(&t1, &t0); /* t1 = [z² - z]P + [z]Ψ(P) */ + POINTonE2_dadd(out, out, &t1, NULL);/* out = [z² - z - 1]P */ + /* + [z - 1]Ψ(P) */ + /* + Ψ²(2P) */ +} +#endif + +/* + * |u|, |v| are expected to be in Montgomery representation + */ +static void map_to_g2(POINTonE2 *out, const vec384x u, const vec384x v) +{ + POINTonE2 p; + + map_to_isogenous_E2(&p, u); + + if (v != NULL) { + map_to_isogenous_E2(out, v); /* borrow |out| */ + POINTonE2_dadd(&p, &p, out, Aprime_E2); + } + + isogeny_map_to_E2(&p, &p); /* sprinkle isogenous powder */ + clear_cofactor(out, &p); +} + +void blst_map_to_g2(POINTonE2 *out, const vec384x u, const vec384x v) +{ map_to_g2(out, u, v); } + +static void Encode_to_G2(POINTonE2 *p, const unsigned char *msg, size_t msg_len, + const unsigned char *DST, size_t DST_len, + const unsigned char *aug, size_t aug_len) +{ + vec384x u[1]; + + hash_to_field(u[0], 2, aug, aug_len, msg, msg_len, DST, DST_len); + map_to_g2(p, u[0], NULL); +} + +void blst_encode_to_g2(POINTonE2 *p, const unsigned char *msg, size_t msg_len, + const unsigned char *DST, size_t DST_len, + const unsigned char *aug, size_t aug_len) +{ Encode_to_G2(p, msg, msg_len, DST, DST_len, aug, aug_len); } + +static void Hash_to_G2(POINTonE2 *p, const unsigned char *msg, size_t msg_len, + const unsigned char *DST, size_t DST_len, + const unsigned char *aug, size_t aug_len) +{ + vec384x u[2]; + + hash_to_field(u[0], 4, aug, aug_len, msg, msg_len, DST, DST_len); + map_to_g2(p, u[0], u[1]); +} + +void blst_hash_to_g2(POINTonE2 *p, const unsigned char *msg, size_t msg_len, + const unsigned char *DST, size_t DST_len, + const unsigned char *aug, size_t aug_len) +{ Hash_to_G2(p, msg, msg_len, DST, DST_len, aug, aug_len); } + +static bool_t POINTonE2_in_G2(const POINTonE2 *P) +{ +#if 0 + POINTonE2 t0, t1, t2; + + /* Bowe, S., "Faster subgroup checks for BLS12-381" */ + psi(&t0, P); /* Ψ(P) */ + psi(&t0, &t0); /* Ψ²(P) */ + psi(&t1, &t0); /* Ψ³(P) */ + + POINTonE2_times_minus_z(&t2, &t1); + POINTonE2_dadd(&t0, &t0, &t2, NULL); + POINTonE2_cneg(&t0, 1); + POINTonE2_dadd(&t0, &t0, P, NULL); /* [z]Ψ³(P) - Ψ²(P) + P */ + + return vec_is_zero(t0.Z, sizeof(t0.Z)); +#else + POINTonE2 t0, t1; + + /* Scott, M., https://eprint.iacr.org/2021/1130 */ + psi(&t0, P); /* Ψ(P) */ + + POINTonE2_times_minus_z(&t1, P); + POINTonE2_cneg(&t1, 1); /* [z]P */ + + return POINTonE2_is_equal(&t0, &t1); +#endif +} + +int blst_p2_in_g2(const POINTonE2 *p) +{ return (int)POINTonE2_in_G2(p); } + +int blst_p2_affine_in_g2(const POINTonE2_affine *p) +{ + POINTonE2 P; + + vec_copy(P.X, p->X, 2*sizeof(P.X)); + vec_select(P.Z, p->X, BLS12_381_Rx.p, sizeof(P.Z), + vec_is_zero(p, sizeof(*p))); + + return (int)POINTonE2_in_G2(&P); +} diff --git a/crypto/blst_src/multi_scalar.c b/crypto/blst_src/multi_scalar.c new file mode 100644 index 00000000000..55ab8227718 --- /dev/null +++ b/crypto/blst_src/multi_scalar.c @@ -0,0 +1,427 @@ +/* + * Copyright Supranational LLC + * Licensed under the Apache License, Version 2.0, see LICENSE for details. + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "fields.h" +#include "point.h" + +/* + * Infinite point among inputs would be devastating. Shall we change it? + */ +#define POINTS_TO_AFFINE_IMPL(prefix, ptype, bits, field) \ +static void ptype##s_to_affine(ptype##_affine dst[], \ + const ptype *const points[], size_t npoints) \ +{ \ + size_t i; \ + vec##bits *acc, ZZ, ZZZ; \ + const ptype *point = NULL; \ + const size_t stride = sizeof(ptype)==sizeof(POINTonE1) ? 1536 : 768; \ +\ + while (npoints) { \ + const ptype *p, *const *walkback; \ + size_t delta = strideZ, sizeof(vec##bits)); \ + for (i = 1; i < delta; i++, acc++) \ + point = *points ? *points++ : point+1, \ + mul_##field(acc[0], acc[-1], point->Z); \ +\ + --acc; reciprocal_##field(acc[0], acc[0]); \ +\ + walkback = points-1, p = point, --delta, dst += delta; \ + for (i = 0; i < delta; i++, acc--, dst--) { \ + mul_##field(acc[-1], acc[-1], acc[0]); /* 1/Z */\ + sqr_##field(ZZ, acc[-1]); /* 1/Z^2 */\ + mul_##field(ZZZ, ZZ, acc[-1]); /* 1/Z^3 */\ + mul_##field(acc[-1], p->Z, acc[0]); \ + mul_##field(dst->X, p->X, ZZ); /* X = X'/Z^2 */\ + mul_##field(dst->Y, p->Y, ZZZ); /* Y = Y'/Z^3 */\ + p = (p == *walkback) ? *--walkback : p-1; \ + } \ + sqr_##field(ZZ, acc[0]); /* 1/Z^2 */\ + mul_##field(ZZZ, ZZ, acc[0]); /* 1/Z^3 */\ + mul_##field(dst->X, p->X, ZZ); /* X = X'/Z^2 */\ + mul_##field(dst->Y, p->Y, ZZZ); /* Y = Y'/Z^3 */\ + ++delta, dst += delta, npoints -= delta; \ + } \ +} \ +\ +void prefix##s_to_affine(ptype##_affine dst[], const ptype *const points[], \ + size_t npoints) \ +{ ptype##s_to_affine(dst, points, npoints); } + +POINTS_TO_AFFINE_IMPL(blst_p1, POINTonE1, 384, fp) +POINTS_TO_AFFINE_IMPL(blst_p2, POINTonE2, 384x, fp2) + +/* + * This is two-step multi-scalar multiplication procedure. First, given + * a set of points you pre-compute a table for chosen windowing factor + * [expressed in bits with value between 2 and 14], and then you pass + * this table to the actual multiplication procedure along with scalars. + * Idea is that the pre-computed table will be reused multiple times. In + * which case multiplication runs faster than below Pippenger algorithm + * implementation for up to ~16K points for wbits=8, naturally at the + * expense of multi-megabyte table. One can trade even more memory for + * performance, but each wbits increment doubles the memory requirement, + * so at some point it gets prohibively large... For reference, without + * reusing the table it's faster than Pippenger algorithm for up ~32 + * points [with wbits=5]... + */ + +#define SCRATCH_SZ(ptype) (sizeof(ptype)==sizeof(POINTonE1) ? 8192 : 4096) + +#define PRECOMPUTE_WBITS_IMPL(prefix, ptype, bits, field, one) \ +static void ptype##_precompute_row_wbits(ptype row[], size_t wbits, \ + const ptype##_affine *point) \ +{ \ + size_t i, j, n = (size_t)1 << (wbits-1); \ + /* row[-1] is implicit infinity */\ + vec_copy(&row[0], point, sizeof(*point)); /* row[0]=p*1 */\ + vec_copy(&row[0].Z, one, sizeof(row[0].Z)); \ + ptype##_double(&row[1], &row[0]); /* row[1]=p*(1+1) */\ + for (i = 2, j = 1; i < n; i += 2, j++) \ + ptype##_add_affine(&row[i], &row[i-1], point), /* row[2]=p*(2+1) */\ + ptype##_double(&row[i+1], &row[j]); /* row[3]=p*(2+2) */\ +} /* row[4] ... */\ +\ +static void ptype##s_to_affine_row_wbits(ptype##_affine dst[], ptype src[], \ + size_t wbits, size_t npoints) \ +{ \ + size_t total = npoints << (wbits-1); \ + size_t nwin = (size_t)1 << (wbits-1); \ + size_t i, j; \ + vec##bits *acc, ZZ, ZZZ; \ +\ + src += total; \ + acc = (vec##bits *)src; \ + vec_copy(acc++, one, sizeof(vec##bits)); \ + for (i = 0; i < npoints; i++) \ + for (j = nwin; --src, --j; acc++) \ + mul_##field(acc[0], acc[-1], src->Z); \ +\ + --acc; reciprocal_##field(acc[0], acc[0]); \ +\ + for (i = 0; i < npoints; i++) { \ + vec_copy(dst++, src++, sizeof(ptype##_affine)); \ + for (j = 1; j < nwin; j++, acc--, src++, dst++) { \ + mul_##field(acc[-1], acc[-1], acc[0]); /* 1/Z */\ + sqr_##field(ZZ, acc[-1]); /* 1/Z^2 */\ + mul_##field(ZZZ, ZZ, acc[-1]); /* 1/Z^3 */\ + mul_##field(acc[-1], src->Z, acc[0]); \ + mul_##field(dst->X, src->X, ZZ); /* X = X'/Z^2 */\ + mul_##field(dst->Y, src->Y, ZZZ); /* Y = Y'/Z^3 */\ + } \ + } \ +} \ +\ +/* flat |points[n]| can be placed at the end of |table[n<<(wbits-1)]| */\ +static void ptype##s_precompute_wbits(ptype##_affine table[], size_t wbits, \ + const ptype##_affine *const points[], \ + size_t npoints) \ +{ \ + size_t total = npoints << (wbits-1); \ + size_t nwin = (size_t)1 << (wbits-1); \ + size_t nmin = wbits>9 ? (size_t)1: (size_t)1 << (9-wbits); \ + size_t i, top = 0; \ + ptype *rows, *row; \ + const ptype##_affine *point = NULL; \ + size_t stride = ((512*1024)/sizeof(ptype##_affine)) >> wbits; \ + if (stride == 0) stride = 1; \ +\ + while (npoints >= nmin) { \ + size_t limit = total - npoints; \ +\ + if (top + (stride << wbits) > limit) { \ + stride = (limit - top) >> wbits; \ + if (stride == 0) break; \ + } \ + rows = row = (ptype *)(&table[top]); \ + for (i = 0; i < stride; i++, row += nwin) \ + point = *points ? *points++ : point+1, \ + ptype##_precompute_row_wbits(row, wbits, point); \ + ptype##s_to_affine_row_wbits(&table[top], rows, wbits, stride); \ + top += stride << (wbits-1); \ + npoints -= stride; \ + } \ + rows = row = alloca(2*sizeof(ptype##_affine) * npoints * nwin); \ + for (i = 0; i < npoints; i++, row += nwin) \ + point = *points ? *points++ : point+1, \ + ptype##_precompute_row_wbits(row, wbits, point); \ + ptype##s_to_affine_row_wbits(&table[top], rows, wbits, npoints); \ +} \ +\ +size_t prefix##s_mult_wbits_precompute_sizeof(size_t wbits, size_t npoints) \ +{ return (sizeof(ptype##_affine)*npoints) << (wbits-1); } \ +void prefix##s_mult_wbits_precompute(ptype##_affine table[], size_t wbits, \ + const ptype##_affine *const points[], \ + size_t npoints) \ +{ ptype##s_precompute_wbits(table, wbits, points, npoints); } + +#define POINTS_MULT_WBITS_IMPL(prefix, ptype, bits, field, one) \ +static void ptype##_gather_booth_wbits(ptype *p, const ptype##_affine row[], \ + size_t wbits, limb_t booth_idx) \ +{ \ + bool_t booth_sign = (booth_idx >> wbits) & 1; \ + bool_t idx_is_zero; \ + static const ptype##_affine infinity = { 0 }; \ +\ + booth_idx &= ((limb_t)1 << wbits) - 1; \ + idx_is_zero = is_zero(booth_idx); \ + booth_idx -= 1 ^ idx_is_zero; \ + vec_select(p, &infinity, &row[booth_idx], sizeof(row[0]), idx_is_zero); \ + ptype##_cneg(p, booth_sign); \ +} \ +\ +static void ptype##s_mult_wbits(ptype *ret, const ptype##_affine table[], \ + size_t wbits, size_t npoints, \ + const byte *const scalars[], size_t nbits, \ + ptype scratch[]) \ +{ \ + limb_t wmask, wval; \ + size_t i, j, z, nbytes, window, nwin = (size_t)1 << (wbits-1); \ + const byte *scalar, *const *scalar_s = scalars; \ + const ptype##_affine *row = table; \ +\ + size_t scratch_sz = SCRATCH_SZ(ptype); \ + if (scratch == NULL) { \ + scratch_sz /= 4; /* limit to 288K */ \ + scratch_sz = scratch_sz < npoints ? scratch_sz : npoints; \ + scratch = alloca(sizeof(ptype) * scratch_sz); \ + } \ +\ + nbytes = (nbits + 7)/8; /* convert |nbits| to bytes */ \ + scalar = *scalar_s++; \ +\ + /* top excess bits modulo target window size */ \ + window = nbits % wbits; /* yes, it may be zero */ \ + wmask = ((limb_t)1 << (window + 1)) - 1; \ +\ + nbits -= window; \ + z = is_zero(nbits); \ + wval = (get_wval_limb(scalar, nbits - (z^1), wbits + (z^1)) << z) & wmask; \ + wval = booth_encode(wval, wbits); \ + ptype##_gather_booth_wbits(&scratch[0], row, wbits, wval); \ + row += nwin; \ +\ + i = 1; vec_zero(ret, sizeof(*ret)); \ + while (nbits > 0) { \ + for (j = i; i < npoints; i++, j++, row += nwin) { \ + if (j == scratch_sz) \ + ptype##s_accumulate(ret, scratch, j), j = 0; \ + scalar = *scalar_s ? *scalar_s++ : scalar+nbytes; \ + wval = get_wval_limb(scalar, nbits - 1, window + 1) & wmask; \ + wval = booth_encode(wval, wbits); \ + ptype##_gather_booth_wbits(&scratch[j], row, wbits, wval); \ + } \ + ptype##s_accumulate(ret, scratch, j); \ +\ + for (j = 0; j < wbits; j++) \ + ptype##_double(ret, ret); \ +\ + window = wbits; \ + wmask = ((limb_t)1 << (window + 1)) - 1; \ + nbits -= window; \ + i = 0; row = table; scalar_s = scalars; \ + } \ +\ + for (j = i; i < npoints; i++, j++, row += nwin) { \ + if (j == scratch_sz) \ + ptype##s_accumulate(ret, scratch, j), j = 0; \ + scalar = *scalar_s ? *scalar_s++ : scalar+nbytes; \ + wval = (get_wval_limb(scalar, 0, wbits) << 1) & wmask; \ + wval = booth_encode(wval, wbits); \ + ptype##_gather_booth_wbits(&scratch[j], row, wbits, wval); \ + } \ + ptype##s_accumulate(ret, scratch, j); \ +} \ +\ +size_t prefix##s_mult_wbits_scratch_sizeof(size_t npoints) \ +{ \ + const size_t scratch_sz = SCRATCH_SZ(ptype); \ + return sizeof(ptype) * (npoints < scratch_sz ? npoints : scratch_sz); \ +} \ +void prefix##s_mult_wbits(ptype *ret, const ptype##_affine table[], \ + size_t wbits, size_t npoints, \ + const byte *const scalars[], size_t nbits, \ + ptype scratch[]) \ +{ ptype##s_mult_wbits(ret, table, wbits, npoints, scalars, nbits, scratch); } + +PRECOMPUTE_WBITS_IMPL(blst_p1, POINTonE1, 384, fp, BLS12_381_Rx.p) +POINTS_MULT_WBITS_IMPL(blst_p1, POINTonE1, 384, fp, BLS12_381_Rx.p) + +PRECOMPUTE_WBITS_IMPL(blst_p2, POINTonE2, 384x, fp2, BLS12_381_Rx.p2) +POINTS_MULT_WBITS_IMPL(blst_p2, POINTonE2, 384x, fp2, BLS12_381_Rx.p2) + +/* + * Pippenger algorithm implementation, fastest option for larger amount + * of points... + */ + +static size_t pippenger_window_size(size_t npoints) +{ + size_t wbits; + + for (wbits=0; npoints>>=1; wbits++) ; + + return wbits>12 ? wbits-3 : (wbits>4 ? wbits-2 : (wbits ? 2 : 1)); +} + +#define DECLARE_PRIVATE_POINTXYZZ(ptype, bits) \ +typedef struct { vec##bits X,Y,ZZZ,ZZ; } ptype##xyzz; + +#define POINTS_MULT_PIPPENGER_IMPL(prefix, ptype) \ +static void ptype##_integrate_buckets(ptype *out, ptype##xyzz buckets[], \ + size_t wbits) \ +{ \ + ptype##xyzz ret[1], acc[1]; \ + size_t n = (size_t)1 << wbits; \ +\ + /* Calculate sum of x[i-1]*i for i=1 through 1<<|wbits|. */\ + vec_copy(acc, &buckets[--n], sizeof(acc)); \ + vec_copy(ret, &buckets[n], sizeof(ret)); \ + vec_zero(&buckets[n], sizeof(buckets[n])); \ + while (n--) { \ + ptype##xyzz_dadd(acc, acc, &buckets[n]); \ + ptype##xyzz_dadd(ret, ret, acc); \ + vec_zero(&buckets[n], sizeof(buckets[n])); \ + } \ + ptype##xyzz_to_Jacobian(out, ret); \ +} \ +\ +static void ptype##_bucket(ptype##xyzz buckets[], limb_t booth_idx, \ + size_t wbits, const ptype##_affine *p) \ +{ \ + bool_t booth_sign = (booth_idx >> wbits) & 1; \ +\ + booth_idx &= (1< nbits) wbits = nbits - bit0, cbits = wbits + 1; \ + else wbits = cbits = window; \ + ptype##s_tile_pippenger(ret, points, npoints, scalars, nbits, scratch, \ + bit0, wbits, cbits); \ +} \ +void prefix##s_mult_pippenger(ptype *ret, \ + const ptype##_affine *const points[], \ + size_t npoints, \ + const byte *const scalars[], size_t nbits, \ + ptype##xyzz scratch[]) \ +{ \ + if (npoints == 1) { \ + prefix##_from_affine(ret, points[0]); \ + prefix##_mult(ret, ret, scalars[0], nbits); \ + return; \ + } \ + if ((npoints * sizeof(ptype##_affine) * 8 * 3) <= SCRATCH_LIMIT) { \ + ptype##_affine *table = alloca(npoints * sizeof(ptype##_affine) * 8); \ + ptype##s_precompute_wbits(table, 4, points, npoints); \ + ptype##s_mult_wbits(ret, table, 4, npoints, scalars, nbits, NULL); \ + return; \ + } \ + ptype##s_mult_pippenger(ret, points, npoints, scalars, nbits, scratch, 0); \ +} + +DECLARE_PRIVATE_POINTXYZZ(POINTonE1, 384) +POINTXYZZ_TO_JACOBIAN_IMPL(POINTonE1, 384, fp) +POINTXYZZ_DADD_IMPL(POINTonE1, 384, fp) +POINTXYZZ_DADD_AFFINE_IMPL(POINTonE1, 384, fp, BLS12_381_Rx.p) +POINTS_MULT_PIPPENGER_IMPL(blst_p1, POINTonE1) + +DECLARE_PRIVATE_POINTXYZZ(POINTonE2, 384x) +POINTXYZZ_TO_JACOBIAN_IMPL(POINTonE2, 384x, fp2) +POINTXYZZ_DADD_IMPL(POINTonE2, 384x, fp2) +POINTXYZZ_DADD_AFFINE_IMPL(POINTonE2, 384x, fp2, BLS12_381_Rx.p2) +POINTS_MULT_PIPPENGER_IMPL(blst_p2, POINTonE2) diff --git a/crypto/blst_src/no_asm.h b/crypto/blst_src/no_asm.h new file mode 100644 index 00000000000..be7bf47e197 --- /dev/null +++ b/crypto/blst_src/no_asm.h @@ -0,0 +1,1345 @@ +/* + * Copyright Supranational LLC + * Licensed under the Apache License, Version 2.0, see LICENSE for details. + * SPDX-License-Identifier: Apache-2.0 + */ + +#if LIMB_T_BITS==32 +typedef unsigned long long llimb_t; +#endif + +#if !defined(__STDC_VERSION__) || __STDC_VERSION__<199901 || defined(__STDC_NO_VLA__) +# error "unsupported compiler" +#endif + +#if defined(__clang__) +# pragma GCC diagnostic ignored "-Wstatic-in-inline" +#endif + +#if !defined(__clang__) && !defined(__builtin_assume) +# if defined(__GNUC__) && __GNUC__>=5 +# define __builtin_assume(condition) if (!(condition)) __builtin_unreachable() +# elif defined(_MSC_VER) +# define __builtin_assume(condition) __assume(condition) +# else +# define __builtin_assume(condition) (void)(condition) +# endif +#endif + +static void mul_mont_n(limb_t ret[], const limb_t a[], const limb_t b[], + const limb_t p[], limb_t n0, size_t n) +{ + __builtin_assume(n != 0 && n%2 == 0); + llimb_t limbx; + limb_t mask, borrow, mx, hi, tmp[n+1], carry; + size_t i, j; + + for (mx=b[0], hi=0, i=0; i> LIMB_T_BITS); + } + mx = n0*tmp[0]; + tmp[i] = hi; + + for (carry=0, j=0; ; ) { + limbx = (mx * (llimb_t)p[0]) + tmp[0]; + hi = (limb_t)(limbx >> LIMB_T_BITS); + for (i=1; i> LIMB_T_BITS); + } + limbx = tmp[i] + (hi + (llimb_t)carry); + tmp[i-1] = (limb_t)limbx; + carry = (limb_t)(limbx >> LIMB_T_BITS); + + if (++j==n) + break; + + for (mx=b[j], hi=0, i=0; i> LIMB_T_BITS); + } + mx = n0*tmp[0]; + limbx = hi + (llimb_t)carry; + tmp[i] = (limb_t)limbx; + carry = (limb_t)(limbx >> LIMB_T_BITS); + } + + for (borrow=0, i=0; i> LIMB_T_BITS) & 1; + } + + mask = carry - borrow; + launder(mask); + + for(i=0; i> LIMB_T_BITS); + } + + for (borrow=0, i=0; i> LIMB_T_BITS) & 1; + } + + mask = carry - borrow; + launder(mask); + + for(i=0; i> LIMB_T_BITS) & 1; + } + + mask = 0 - borrow; + launder(mask); + + for (carry=0, i=0; i> LIMB_T_BITS); + } +} + +#define SUB_MOD_IMPL(bits) \ +inline void sub_mod_##bits(vec##bits ret, const vec##bits a, \ + const vec##bits b, const vec##bits p) \ +{ sub_mod_n(ret, a, b, p, NLIMBS(bits)); } + +SUB_MOD_IMPL(256) +SUB_MOD_IMPL(384) + +static void mul_by_3_mod_n(limb_t ret[], const limb_t a[], const limb_t p[], + size_t n) +{ + __builtin_assume(n != 0); + llimb_t limbx; + limb_t mask, carry, borrow, tmp[n], two_a[n]; + size_t i; + + for (carry=0, i=0; i>(LIMB_T_BITS-1); + } + + for (borrow=0, i=0; i> LIMB_T_BITS) & 1; + } + + mask = carry - borrow; + launder(mask); + + for(i=0; i> LIMB_T_BITS); + } + + for (borrow=0, i=0; i> LIMB_T_BITS) & 1; + } + + mask = carry - borrow; + launder(mask); + + for(i=0; i>(LIMB_T_BITS-1); + } + + for (borrow=0, i=0; i> LIMB_T_BITS) & 1; + } + + mask = carry - borrow; + launder(mask); + + for(i=0; i> LIMB_T_BITS) & 1; + } + + flag &= vec_is_zero(a, sizeof(tmp)) ^ 1; + mask = (limb_t)0 - flag; + + for(i=0; i> LIMB_T_BITS) & 1; + } + + return borrow & (is_zero(acc) ^ 1); +} + +#define CHECK_MOD_IMPL(bits) \ +inline limb_t check_mod_##bits(const pow##bits a, const vec##bits p) \ +{ return check_mod_n(a, p, NLIMBS(bits)); } + +CHECK_MOD_IMPL(256) + +static limb_t add_n_check_mod_n(byte ret[], const byte a[], const byte b[], + const limb_t p[], size_t n) +{ + __builtin_assume(n != 0); + limb_t ret_[n], a_[n], b_[n], zero; + + limbs_from_le_bytes(a_, a, sizeof(a_)); + limbs_from_le_bytes(b_, b, sizeof(b_)); + + add_mod_n(ret_, a_, b_, p, n); + zero = vec_is_zero(ret_, sizeof(ret_)); + + le_bytes_from_limbs(ret, ret_, sizeof(ret_)); + + return zero^1; +} + +#define ADD_N_CHECK_MOD_IMPL(bits) \ +inline limb_t add_n_check_mod_##bits(pow##bits ret, const pow##bits a, \ + const pow##bits b, const vec##bits p) \ +{ return add_n_check_mod_n(ret, a, b, p, NLIMBS(bits)); } + +ADD_N_CHECK_MOD_IMPL(256) + +static limb_t sub_n_check_mod_n(byte ret[], const byte a[], const byte b[], + const limb_t p[], size_t n) +{ + __builtin_assume(n != 0); + limb_t ret_[n], a_[n], b_[n], zero; + + limbs_from_le_bytes(a_, a, sizeof(a_)); + limbs_from_le_bytes(b_, b, sizeof(b_)); + + sub_mod_n(ret_, a_, b_, p, n); + zero = vec_is_zero(ret_, sizeof(ret_)); + + le_bytes_from_limbs(ret, ret_, sizeof(ret_)); + + return zero^1; +} + +#define SUB_N_CHECK_MOD_IMPL(bits) \ +inline limb_t sub_n_check_mod_##bits(pow##bits ret, const pow##bits a, \ + const pow##bits b, const vec##bits p) \ +{ return sub_n_check_mod_n(ret, a, b, p, NLIMBS(bits)); } + +SUB_N_CHECK_MOD_IMPL(256) + +static void from_mont_n(limb_t ret[], const limb_t a[], + const limb_t p[], limb_t n0, size_t n) +{ + __builtin_assume(n != 0 && n%2 == 0); + llimb_t limbx; + limb_t mask, borrow, mx, hi, tmp[n]; + size_t i, j; + + for (j=0; j> LIMB_T_BITS); + for (i=1; i> LIMB_T_BITS); + } + tmp[i-1] = hi; + a = tmp; + } + + /* this is needed only if input can be non-fully-reduced */ + for (borrow=0, i=0; i> LIMB_T_BITS) & 1; + } + + mask = 0 - borrow; + launder(mask); + + for(i=0; i> LIMB_T_BITS); + for (i=1; i> LIMB_T_BITS); + } + tmp[i-1] = hi; + b = tmp; + } + + for (carry=0, i=0; i> LIMB_T_BITS); + } + + for (borrow=0, i=0; i> LIMB_T_BITS) & 1; + } + + mask = carry - borrow; + launder(mask); + + for(i=0; i> LIMB_T_BITS); + } + + for (next=ret[0], i=0; i> 1; + next = ret[i+1]; + ret[i] = limb | next << (LIMB_T_BITS-1); + } + ret[i] = next >> 1 | carry << (LIMB_T_BITS-1); + + a = ret; + } +} + +#define RSHIFT_MOD_IMPL(bits) \ +inline void rshift_mod_##bits(vec##bits ret, const vec##bits a, size_t count, \ + const vec##bits p) \ +{ rshift_mod_n(ret, a, count, p, NLIMBS(bits)); } + +RSHIFT_MOD_IMPL(256) +RSHIFT_MOD_IMPL(384) + +#define DIV_BY_2_MOD_IMPL(bits) \ +inline void div_by_2_mod_##bits(vec##bits ret, const vec##bits a, \ + const vec##bits p) \ +{ rshift_mod_n(ret, a, 1, p, NLIMBS(bits)); } + +DIV_BY_2_MOD_IMPL(384) + +static limb_t sgn0_pty_mod_n(const limb_t a[], const limb_t p[], size_t n) +{ + __builtin_assume(n != 0); + llimb_t limbx; + limb_t carry, borrow, ret, tmp[n]; + size_t i; + + ret = a[0] & 1; /* parity */ + + for (carry=0, i=0; i>(LIMB_T_BITS-1); + } + + for (borrow=0, i=0; i> LIMB_T_BITS) & 1; + } + + ret |= ((carry - borrow) & 2) ^ 2; + + return ret; +} + +inline limb_t sgn0_pty_mod_384(const vec384 a, const vec384 p) +{ return sgn0_pty_mod_n(a, p, NLIMBS(384)); } + +inline limb_t sgn0_pty_mont_384(const vec384 a, const vec384 p, limb_t n0) +{ + vec384 tmp; + + from_mont_n(tmp, a, p, n0, NLIMBS(384)); + + return sgn0_pty_mod_n(tmp, p, NLIMBS(384)); +} + +inline limb_t sgn0_pty_mod_384x(const vec384x a, const vec384 p) +{ + limb_t re, im, sign, prty; + + re = sgn0_pty_mod_n(a[0], p, NLIMBS(384)); + im = sgn0_pty_mod_n(a[1], p, NLIMBS(384)); + + /* a->im!=0 ? sgn0(a->im) : sgn0(a->re) */ + sign = (limb_t)0 - vec_is_zero(a[1], sizeof(vec384)); + sign = (re & sign) | (im & ~sign); + + /* a->re==0 ? prty(a->im) : prty(a->re) */ + prty = (limb_t)0 - vec_is_zero(a[0], sizeof(vec384)); + prty = (im & prty) | (re & ~prty); + + return (sign & 2) | (prty & 1); +} + +inline limb_t sgn0_pty_mont_384x(const vec384x a, const vec384 p, limb_t n0) +{ + vec384x tmp; + + from_mont_n(tmp[0], a[0], p, n0, NLIMBS(384)); + from_mont_n(tmp[1], a[1], p, n0, NLIMBS(384)); + + return sgn0_pty_mod_384x(tmp, p); +} + +void mul_mont_384x(vec384x ret, const vec384x a, const vec384x b, + const vec384 p, limb_t n0) +{ + vec384 aa, bb, cc; + + add_mod_n(aa, a[0], a[1], p, NLIMBS(384)); + add_mod_n(bb, b[0], b[1], p, NLIMBS(384)); + mul_mont_n(bb, bb, aa, p, n0, NLIMBS(384)); + mul_mont_n(aa, a[0], b[0], p, n0, NLIMBS(384)); + mul_mont_n(cc, a[1], b[1], p, n0, NLIMBS(384)); + sub_mod_n(ret[0], aa, cc, p, NLIMBS(384)); + sub_mod_n(ret[1], bb, aa, p, NLIMBS(384)); + sub_mod_n(ret[1], ret[1], cc, p, NLIMBS(384)); +} + +/* + * mul_mont_n without final conditional subtraction, which implies + * that modulus is one bit short, which in turn means that there are + * no carries to handle between iterations... + */ +static void mul_mont_nonred_n(limb_t ret[], const limb_t a[], const limb_t b[], + const limb_t p[], limb_t n0, size_t n) +{ + __builtin_assume(n != 0 && n%2 == 0); + llimb_t limbx; + limb_t mx, hi, tmp[n+1]; + size_t i, j; + + for (mx=b[0], hi=0, i=0; i> LIMB_T_BITS); + } + mx = n0*tmp[0]; + tmp[i] = hi; + + for (j=0; ; ) { + limbx = (mx * (llimb_t)p[0]) + tmp[0]; + hi = (limb_t)(limbx >> LIMB_T_BITS); + for (i=1; i> LIMB_T_BITS); + } + tmp[i-1] = tmp[i] + hi; + + if (++j==n) + break; + + for (mx=b[j], hi=0, i=0; i> LIMB_T_BITS); + } + mx = n0*tmp[0]; + tmp[i] = hi; + } + + vec_copy(ret, tmp, sizeof(tmp)-sizeof(limb_t)); +} + +void sqr_n_mul_mont_383(vec384 ret, const vec384 a, size_t count, + const vec384 p, limb_t n0, const vec384 b) +{ + __builtin_assume(count != 0); + while(count--) { + mul_mont_nonred_n(ret, a, a, p, n0, NLIMBS(384)); + a = ret; + } + mul_mont_n(ret, ret, b, p, n0, NLIMBS(384)); +} + +void sqr_mont_382x(vec384x ret, const vec384x a, + const vec384 p, limb_t n0) +{ + llimb_t limbx; + limb_t mask, carry, borrow; + size_t i; + vec384 t0, t1; + + /* "add_mod_n(t0, a[0], a[1], p, NLIMBS(384));" */ + for (carry=0, i=0; i> LIMB_T_BITS); + } + + /* "sub_mod_n(t1, a[0], a[1], p, NLIMBS(384));" */ + for (borrow=0, i=0; i> LIMB_T_BITS) & 1; + } + mask = 0 - borrow; + launder(mask); + + /* "mul_mont_n(ret[1], a[0], a[1], p, n0, NLIMBS(384));" */ + mul_mont_nonred_n(ret[1], a[0], a[1], p, n0, NLIMBS(384)); + + /* "add_mod_n(ret[1], ret[1], ret[1], p, NLIMBS(384));" */ + for (carry=0, i=0; i>(LIMB_T_BITS-1); + } + + /* "mul_mont_n(ret[0], t0, t1, p, n0, NLIMBS(384));" */ + mul_mont_nonred_n(ret[0], t0, t1, p, n0, NLIMBS(384)); + + /* account for t1's sign... */ + for (borrow=0, i=0; i> LIMB_T_BITS) & 1; + } + mask = 0 - borrow; + launder(mask); + for (carry=0, i=0; i> LIMB_T_BITS); + } +} + +#if defined(__GNUC__) || defined(__clang__) +# define MSB(x) ({ limb_t ret = (x) >> (LIMB_T_BITS-1); launder(ret); ret; }) +#else +# define MSB(x) ((x) >> (LIMB_T_BITS-1)) +#endif + +static size_t num_bits(limb_t l) +{ + limb_t x, mask; + size_t bits = is_zero(l) ^ 1; + + if (sizeof(limb_t) == 8) { + x = l >> (32 & (8*sizeof(limb_t)-1)); + mask = 0 - MSB(0 - x); + bits += 32 & mask; + l ^= (x ^ l) & mask; + } + + x = l >> 16; + mask = 0 - MSB(0 - x); + bits += 16 & mask; + l ^= (x ^ l) & mask; + + x = l >> 8; + mask = 0 - MSB(0 - x); + bits += 8 & mask; + l ^= (x ^ l) & mask; + + x = l >> 4; + mask = 0 - MSB(0 - x); + bits += 4 & mask; + l ^= (x ^ l) & mask; + + x = l >> 2; + mask = 0 - MSB(0 - x); + bits += 2 & mask; + l ^= (x ^ l) & mask; + + bits += l >> 1; + + return bits; +} + +#if defined(__clang_major__) && __clang_major__>7 +__attribute__((optnone)) +#endif +static limb_t lshift_2(limb_t hi, limb_t lo, size_t l) +{ + size_t r = LIMB_T_BITS - l; + limb_t mask = 0 - (is_zero(l)^1); + return (hi << (l&(LIMB_T_BITS-1))) | ((lo & mask) >> (r&(LIMB_T_BITS-1))); +} + +/* + * https://eprint.iacr.org/2020/972 with 'k' being LIMB_T_BITS-1. + */ +static void ab_approximation_n(limb_t a_[2], const limb_t a[], + limb_t b_[2], const limb_t b[], size_t n) +{ + __builtin_assume(n != 0 && n%2 == 0); + limb_t a_hi, a_lo, b_hi, b_lo, mask; + size_t i; + + i = n-1; + a_hi = a[i], a_lo = a[i-1]; + b_hi = b[i], b_lo = b[i-1]; + for (i--; --i;) { + mask = 0 - is_zero(a_hi | b_hi); + a_hi = ((a_lo ^ a_hi) & mask) ^ a_hi; + b_hi = ((b_lo ^ b_hi) & mask) ^ b_hi; + a_lo = ((a[i] ^ a_lo) & mask) ^ a_lo; + b_lo = ((b[i] ^ b_lo) & mask) ^ b_lo; + } + i = LIMB_T_BITS - num_bits(a_hi | b_hi); + /* |i| can be LIMB_T_BITS if all a[2..]|b[2..] were zeros */ + + a_[0] = a[0], a_[1] = lshift_2(a_hi, a_lo, i); + b_[0] = b[0], b_[1] = lshift_2(b_hi, b_lo, i); +} + +typedef struct { limb_t f0, g0, f1, g1; } factors; + +static void inner_loop_n(factors *fg, const limb_t a_[2], const limb_t b_[2], + size_t n) +{ + __builtin_assume(n != 0); + llimb_t limbx; + limb_t f0 = 1, g0 = 0, f1 = 0, g1 = 1; + limb_t a_lo, a_hi, b_lo, b_hi, t_lo, t_hi, odd, borrow, xorm; + + a_lo = a_[0], a_hi = a_[1]; + b_lo = b_[0], b_hi = b_[1]; + + while(n--) { + odd = 0 - (a_lo&1); + + /* a_ -= b_ if a_ is odd */ + t_lo = a_lo, t_hi = a_hi; + limbx = a_lo - (llimb_t)(b_lo & odd); + a_lo = (limb_t)limbx; + borrow = (limb_t)(limbx >> LIMB_T_BITS) & 1; + limbx = a_hi - ((llimb_t)(b_hi & odd) + borrow); + a_hi = (limb_t)limbx; + borrow = (limb_t)(limbx >> LIMB_T_BITS); + + /* negate a_-b_ if it borrowed */ + a_lo ^= borrow; + a_hi ^= borrow; + limbx = a_lo + (llimb_t)(borrow & 1); + a_lo = (limb_t)limbx; + a_hi += (limb_t)(limbx >> LIMB_T_BITS) & 1; + + /* b_=a_ if a_-b_ borrowed */ + b_lo = ((t_lo ^ b_lo) & borrow) ^ b_lo; + b_hi = ((t_hi ^ b_hi) & borrow) ^ b_hi; + + /* exchange f0 and f1 if a_-b_ borrowed */ + xorm = (f0 ^ f1) & borrow; + f0 ^= xorm; + f1 ^= xorm; + + /* exchange g0 and g1 if a_-b_ borrowed */ + xorm = (g0 ^ g1) & borrow; + g0 ^= xorm; + g1 ^= xorm; + + /* subtract if a_ was odd */ + f0 -= f1 & odd; + g0 -= g1 & odd; + + f1 <<= 1; + g1 <<= 1; + a_lo >>= 1; a_lo |= a_hi << (LIMB_T_BITS-1); + a_hi >>= 1; + } + + fg->f0 = f0, fg->g0 = g0, fg->f1 = f1, fg->g1= g1; +} + +static limb_t cneg_n(limb_t ret[], const limb_t a[], limb_t neg, size_t n) +{ + __builtin_assume(n != 0); + llimb_t limbx = 0; + limb_t carry; + size_t i; + + for (carry=neg&1, i=0; i> LIMB_T_BITS); + } + + return 0 - MSB((limb_t)limbx); +} + +static limb_t add_n(limb_t ret[], const limb_t a[], limb_t b[], size_t n) +{ + __builtin_assume(n != 0); + llimb_t limbx; + limb_t carry; + size_t i; + + for (carry=0, i=0; i> LIMB_T_BITS); + } + + return carry; +} + +static limb_t umul_n(limb_t ret[], const limb_t a[], limb_t b, size_t n) +{ + __builtin_assume(n != 0); + llimb_t limbx; + limb_t hi; + size_t i; + + for (hi=0, i=0; i> LIMB_T_BITS); + } + + return hi; +} + +static limb_t smul_n_shift_n(limb_t ret[], const limb_t a[], limb_t *f_, + const limb_t b[], limb_t *g_, + size_t n) +{ + __builtin_assume(n != 0); + limb_t a_[n+1], b_[n+1], f, g, neg, carry, hi; + size_t i; + + /* |a|*|f_| */ + f = *f_; + neg = 0 - MSB(f); + f = (f ^ neg) - neg; /* ensure |f| is positive */ + (void)cneg_n(a_, a, neg, n); + hi = umul_n(a_, a_, f, n); + a_[n] = hi - (f & neg); + + /* |b|*|g_| */ + g = *g_; + neg = 0 - MSB(g); + g = (g ^ neg) - neg; /* ensure |g| is positive */ + (void)cneg_n(b_, b, neg, n); + hi = umul_n(b_, b_, g, n); + b_[n] = hi - (g & neg); + + /* |a|*|f_| + |b|*|g_| */ + (void)add_n(a_, a_, b_, n+1); + + /* (|a|*|f_| + |b|*|g_|) >> k */ + for (carry=a_[0], i=0; i> (LIMB_T_BITS-2); + carry = a_[i+1]; + ret[i] = hi | (carry << 2); + } + + /* ensure result is non-negative, fix up |f_| and |g_| accordingly */ + neg = 0 - MSB(carry); + *f_ = (*f_ ^ neg) - neg; + *g_ = (*g_ ^ neg) - neg; + (void)cneg_n(ret, ret, neg, n); + + return neg; +} + +static limb_t smul_2n(limb_t ret[], const limb_t u[], limb_t f, + const limb_t v[], limb_t g, size_t n) +{ + __builtin_assume(n != 0); + limb_t u_[n], v_[n], neg, hi; + + /* |u|*|f_| */ + neg = 0 - MSB(f); + f = (f ^ neg) - neg; /* ensure |f| is positive */ + neg = cneg_n(u_, u, neg, n); + hi = umul_n(u_, u_, f, n) - (f&neg); + + /* |v|*|g_| */ + neg = 0 - MSB(g); + g = (g ^ neg) - neg; /* ensure |g| is positive */ + neg = cneg_n(v_, v, neg, n); + hi += umul_n(v_, v_, g, n) - (g&neg); + + /* |u|*|f_| + |v|*|g_| */ + hi += add_n(ret, u_, v_, n); + + return hi; +} + +static void ct_inverse_mod_n(limb_t ret[], const limb_t inp[], + const limb_t mod[], const limb_t modx[], size_t n) +{ + __builtin_assume(n != 0 && n%2 == 0); + llimb_t limbx; + limb_t a[n], b[n], u[2*n], v[2*n], t[2*n]; + limb_t a_[2], b_[2], sign, carry, top; + factors fg; + size_t i; + + vec_copy(a, inp, sizeof(a)); + vec_copy(b, mod, sizeof(b)); + vec_zero(u, sizeof(u)); u[0] = 1; + vec_zero(v, sizeof(v)); + + for (i=0; i<(2*n*LIMB_T_BITS)/(LIMB_T_BITS-2); i++) { + ab_approximation_n(a_, a, b_, b, n); + inner_loop_n(&fg, a_, b_, LIMB_T_BITS-2); + (void)smul_n_shift_n(t, a, &fg.f0, b, &fg.g0, n); + (void)smul_n_shift_n(b, a, &fg.f1, b, &fg.g1, n); + vec_copy(a, t, sizeof(a)); + smul_2n(t, u, fg.f0, v, fg.g0, 2*n); + smul_2n(v, u, fg.f1, v, fg.g1, 2*n); + vec_copy(u, t, sizeof(u)); + } + + inner_loop_n(&fg, a, b, (2*n*LIMB_T_BITS)%(LIMB_T_BITS-2)); + top = smul_2n(ret, u, fg.f1, v, fg.g1, 2*n); + + sign = 0 - MSB(top); /* top is 1, 0 or -1 */ + for (carry=0, i=0; i> LIMB_T_BITS); + } + top += carry; + sign = 0 - top; /* top is 1, 0 or -1 */ + top |= sign; + for (i=0; i> LIMB_T_BITS) & 1; + limbx = a_hi - ((llimb_t)(b_hi & odd) + borrow); + a_hi = (limb_t)limbx; + borrow = (limb_t)(limbx >> LIMB_T_BITS); + + L += ((t_lo & b_lo) >> 1) & borrow; + + /* negate a_-b_ if it borrowed */ + a_lo ^= borrow; + a_hi ^= borrow; + limbx = a_lo + (llimb_t)(borrow & 1); + a_lo = (limb_t)limbx; + a_hi += (limb_t)(limbx >> LIMB_T_BITS) & 1; + + /* b_=a_ if a_-b_ borrowed */ + b_lo = ((t_lo ^ b_lo) & borrow) ^ b_lo; + b_hi = ((t_hi ^ b_hi) & borrow) ^ b_hi; + + /* exchange f0 and f1 if a_-b_ borrowed */ + xorm = (f0 ^ f1) & borrow; + f0 ^= xorm; + f1 ^= xorm; + + /* exchange g0 and g1 if a_-b_ borrowed */ + xorm = (g0 ^ g1) & borrow; + g0 ^= xorm; + g1 ^= xorm; + + /* subtract if a_ was odd */ + f0 -= f1 & odd; + g0 -= g1 & odd; + + f1 <<= 1; + g1 <<= 1; + a_lo >>= 1; a_lo |= a_hi << (LIMB_T_BITS-1); + a_hi >>= 1; + + L += (b_lo + 2) >> 2; + } + + fg->f0 = f0, fg->g0 = g0, fg->f1 = f1, fg->g1 = g1; + + return L; +} + +static bool_t ct_is_sqr_mod_n(const limb_t inp[], const limb_t mod[], size_t n) +{ + __builtin_assume(n != 0 && n%2 == 0); + limb_t a[n], b[n], t[n]; + limb_t a_[2], b_[2], neg, L = 0; + factors fg; + size_t i; + + vec_copy(a, inp, sizeof(a)); + vec_copy(b, mod, sizeof(b)); + + for (i=0; i<(2*n*LIMB_T_BITS)/(LIMB_T_BITS-2); i++) { + ab_approximation_n(a_, a, b_, b, n); + L = legendre_loop_n(L, &fg, a_, b_, LIMB_T_BITS-2); + neg = smul_n_shift_n(t, a, &fg.f0, b, &fg.g0, n); + (void)smul_n_shift_n(b, a, &fg.f1, b, &fg.g1, n); + vec_copy(a, t, sizeof(a)); + L += (b[0] >> 1) & neg; + } + + L = legendre_loop_n(L, &fg, a, b, (2*n*LIMB_T_BITS)%(LIMB_T_BITS-2)); + + return (L & 1) ^ 1; +} + +#define CT_IS_SQR_MOD_IMPL(bits) \ +inline bool_t ct_is_square_mod_##bits(const vec##bits inp, \ + const vec##bits mod) \ +{ return ct_is_sqr_mod_n(inp, mod, NLIMBS(bits)); } + +CT_IS_SQR_MOD_IMPL(384) + +/* + * |div_top| points at two most significant limbs of the dividend, |d_hi| + * and |d_lo| are two most significant limbs of the divisor. If divisor + * is only one limb, it is to be passed in |d_hi| with zero in |d_lo|. + * The divisor is required to be "bitwise left-aligned," and dividend's + * top limbs to be not larger than the divisor's. The latter limitation + * can be problematic in the first iteration of multi-precision division, + * where in most general case the condition would have to be "smaller." + * The subroutine considers four limbs, two of which are "overlapping," + * hence the name... Another way to look at it is to think of the pair + * of the dividend's limbs being suffixed with a zero: + * +-------+-------+-------+ + * R | | | 0 | + * +-------+-------+-------+ + * +-------+-------+ + * D | | | + * +-------+-------+ + */ +limb_t div_3_limbs(const limb_t div_top[2], limb_t d_lo, limb_t d_hi) +{ + llimb_t Rx; + limb_t r_lo = div_top[0], r_hi = div_top[1]; + limb_t Q = 0, mask, borrow, rx; + size_t i; + + for (i = 0; i < LIMB_T_BITS; i++) { + /* "borrow, Rx = R - D" */ + Rx = (llimb_t)r_lo - d_lo; + rx = (limb_t)Rx; + borrow = (limb_t)(Rx >> LIMB_T_BITS) & 1; + Rx = r_hi - (d_hi + (llimb_t)borrow); + borrow = (limb_t)(Rx >> LIMB_T_BITS); + + /* "if (R >= D) R -= D" */ + r_lo = ((r_lo ^ rx) & borrow) ^ rx; + rx = (limb_t)Rx; + r_hi = ((r_hi ^ rx) & borrow) ^ rx; + + Q <<= 1; + Q |= ~borrow & 1; + + /* "D >>= 1" */ + d_lo >>= 1; d_lo |= d_hi << (LIMB_T_BITS - 1); + d_hi >>= 1; + } + + mask = 0 - MSB(Q); /* does it overflow? */ + + /* "borrow, Rx = R - D" */ + Rx = (llimb_t)r_lo - d_lo; + rx = (limb_t)Rx; + borrow = (limb_t)(Rx >> LIMB_T_BITS) & 1; + Rx = r_hi - (d_hi + (llimb_t)borrow); + borrow = (limb_t)(Rx >> LIMB_T_BITS) & 1; + + Q <<= 1; + Q |= borrow ^ 1; + + return (Q | mask); +} + +static limb_t quot_rem_n(limb_t *div_rem, const limb_t *divisor, + limb_t quotient, size_t n) +{ + __builtin_assume(n != 0 && n%2 == 0); + llimb_t limbx; + limb_t tmp[n+1], carry, mask, borrow; + size_t i; + + /* divisor*quotient */ + for (carry=0, i=0; i> LIMB_T_BITS); + } + tmp[i] = carry; + + /* remainder = dividend - divisor*quotient */ + for (borrow=0, i=0; i<=n; i++) { + limbx = div_rem[i] - (tmp[i] + (llimb_t)borrow); + tmp[i] = (limb_t)limbx; + borrow = (limb_t)(limbx >> LIMB_T_BITS) & 1; + } + + mask = 0 - borrow; + launder(mask); + + /* if quotient was off by one, add divisor to the remainder */ + for (carry=0, i=0; i> LIMB_T_BITS) & 1; + } + + return (div_rem[i] = quotient + mask); +} + +inline limb_t quot_rem_128(limb_t *div_rem, const limb_t *divisor, + limb_t quotient) +{ return quot_rem_n(div_rem, divisor, quotient, NLIMBS(128)); } + +inline limb_t quot_rem_64(limb_t *div_rem, const limb_t *divisor, + limb_t quotient) +{ return quot_rem_n(div_rem, divisor, quotient, NLIMBS(64)); } + +/* + * Unlock reference implementations in vect.c + */ +#define mul_by_8_mod_384 mul_by_8_mod_384 +#define mul_by_8_mod_384x mul_by_8_mod_384x +#define mul_by_3_mod_384x mul_by_3_mod_384x +#define mul_by_1_plus_i_mod_384x mul_by_1_plus_i_mod_384x +#define add_mod_384x add_mod_384x +#define sub_mod_384x sub_mod_384x +#define lshift_mod_384x lshift_mod_384x +#define sqr_mont_384x sqr_mont_384x + +inline void vec_prefetch(const void *ptr, size_t len) +{ (void)ptr; (void)len; } + +/* + * SHA-256 + */ +#define ROTR(x,n) ((x)>>n | (x)<<(32-n)) +#define Sigma0(x) (ROTR((x),2) ^ ROTR((x),13) ^ ROTR((x),22)) +#define Sigma1(x) (ROTR((x),6) ^ ROTR((x),11) ^ ROTR((x),25)) +#define sigma0(x) (ROTR((x),7) ^ ROTR((x),18) ^ ((x)>>3)) +#define sigma1(x) (ROTR((x),17) ^ ROTR((x),19) ^ ((x)>>10)) +#define Ch(x,y,z) (((x) & (y)) ^ ((~(x)) & (z))) +#define Maj(x,y,z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z))) + +void blst_sha256_block_data_order(unsigned int *v, const void *inp, + size_t blocks) +{ + static const unsigned int K256[64] = { + 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, + 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, + 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, + 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, + 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, + 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, + 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, + 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, + 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, + 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, + 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, + 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, + 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, + 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, + 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, + 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 + }; + unsigned int X[16], l, a, b, c, d, e, f, g, h, s0, s1, T1, T2; + const unsigned char *data = inp; + size_t round; + + a = v[0]; + b = v[1]; + c = v[2]; + d = v[3]; + e = v[4]; + f = v[5]; + g = v[6]; + h = v[7]; + + while (blocks--) { + for (round = 0; round < 16; round++) { + l = (unsigned int)data[0] << 24; + l |= (unsigned int)data[1] << 16; + l |= (unsigned int)data[2] << 8; + l |= (unsigned int)data[3]; + data += 4; + T1 = X[round] = l; + T1 += h + Sigma1(e) + Ch(e, f, g) + K256[round]; + T2 = Sigma0(a) + Maj(a, b, c); + h = g; + g = f; + f = e; + e = d + T1; + d = c; + c = b; + b = a; + a = T1 + T2; + } + + for (; round < 64; round++) { + s0 = X[(round + 1) & 0x0f]; + s0 = sigma0(s0); + s1 = X[(round + 14) & 0x0f]; + s1 = sigma1(s1); + + T1 = X[round & 0xf] += s0 + s1 + X[(round + 9) & 0xf]; + T1 += h + Sigma1(e) + Ch(e, f, g) + K256[round]; + T2 = Sigma0(a) + Maj(a, b, c); + h = g; + g = f; + f = e; + e = d + T1; + d = c; + c = b; + b = a; + a = T1 + T2; + } + + a += v[0]; v[0] = a; + b += v[1]; v[1] = b; + c += v[2]; v[2] = c; + d += v[3]; v[3] = d; + e += v[4]; v[4] = e; + f += v[5]; v[5] = f; + g += v[6]; v[6] = g; + h += v[7]; v[7] = h; + } +} +#undef ROTR +#undef Sigma0 +#undef Sigma1 +#undef sigma0 +#undef sigma1 +#undef Ch +#undef Maj + +void blst_sha256_hcopy(unsigned int dst[8], const unsigned int src[8]) +{ + size_t i; + + for (i=0; i<8; i++) + dst[i] = src[i]; +} + +void blst_sha256_emit(unsigned char md[32], const unsigned int h[8]) +{ + size_t i; + + for (i=0; i<8; i++, md+=4) { + unsigned int h_i = h[i]; + md[0] = (unsigned char)(h_i >> 24); + md[1] = (unsigned char)(h_i >> 16); + md[2] = (unsigned char)(h_i >> 8); + md[3] = (unsigned char)h_i; + } +} + +void blst_sha256_bcopy(void *dst_, const void *src_, size_t len) +{ + unsigned char *dst = dst_; + const unsigned char *src = src_; + size_t i; + + for (i=0; iZ); /* Z1Z1 = Z1^2 */ + mul_fp2(U2, Q->X, Z1Z1); /* U2 = X2*Z1Z1 */ + + mul_fp2(S2, Q->Y, R->Z); + mul_fp2(S2, S2, Z1Z1); /* S2 = Y2*Z1*Z1Z1 */ + + sub_fp2(H, U2, R->X); /* H = U2-X1 */ + + sqr_fp2(HH, H); /* HH = H^2 */ + add_fp2(I, HH, HH); + add_fp2(I, I, I); /* I = 4*HH */ + + mul_fp2(J, H, I); /* J = H*I */ + + sub_fp2(r, S2, R->Y); + add_fp2(r, r, r); /* r = 2*(S2-Y1) */ + + mul_fp2(V, R->X, I); /* V = X1*I */ + + sqr_fp2(T->X, r); + sub_fp2(T->X, T->X, J); + sub_fp2(T->X, T->X, V); + sub_fp2(T->X, T->X, V); /* X3 = r^2-J-2*V */ + + mul_fp2(J, J, R->Y); + sub_fp2(T->Y, V, T->X); + mul_fp2(T->Y, T->Y, r); + sub_fp2(T->Y, T->Y, J); + sub_fp2(T->Y, T->Y, J); /* Y3 = r*(V-X3)-2*Y1*J */ + + add_fp2(T->Z, R->Z, H); + sqr_fp2(T->Z, T->Z); + sub_fp2(T->Z, T->Z, Z1Z1); + sub_fp2(T->Z, T->Z, HH); /* Z3 = (Z1+H)^2-Z1Z1-HH */ + + /* + * line evaluation + */ + mul_fp2(I, r, Q->X); + mul_fp2(J, Q->Y, T->Z); + sub_fp2(I, I, J); + add_fp2(line[0], I, I); /* 2*(r*X2 - Y2*Z3) */ +#ifdef r +# undef r +#else + vec_copy(line[1], r, sizeof(r)); +#endif + vec_copy(line[2], T->Z, sizeof(T->Z)); +} + +static void line_dbl(vec384fp6 line, POINTonE2 *T, const POINTonE2 *Q) +{ + vec384x ZZ, A, B, C, D, E, F; + + /* + * https://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-0.html#doubling-dbl-2009-alnr + */ + sqr_fp2(A, Q->X); /* A = X1^2 */ + sqr_fp2(B, Q->Y); /* B = Y1^2 */ + sqr_fp2(ZZ, Q->Z); /* ZZ = Z1^2 */ + sqr_fp2(C, B); /* C = B^2 */ + + add_fp2(D, Q->X, B); /* X1+B */ + sqr_fp2(D, D); /* (X1+B)^2 */ + sub_fp2(D, D, A); /* (X1+B)^2-A */ + sub_fp2(D, D, C); /* (X1+B)^2-A-C */ + add_fp2(D, D, D); /* D = 2*((X1+B)^2-A-C) */ + + mul_by_3_fp2(E, A); /* E = 3*A */ + sqr_fp2(F, E); /* F = E^2 */ + + add_fp2(line[0], E, Q->X); /* 3*A+X1 for line evaluation */ + + sub_fp2(T->X, F, D); + sub_fp2(T->X, T->X, D); /* X3 = F-2*D */ + + add_fp2(T->Z, Q->Y, Q->Z); + sqr_fp2(T->Z, T->Z); + sub_fp2(T->Z, T->Z, B); + sub_fp2(T->Z, T->Z, ZZ); /* Z3 = (Y1+Z1)^2-B-ZZ */ + + mul_by_8_fp2(C, C); /* 8*C */ + sub_fp2(T->Y, D, T->X); /* D-X3 */ + mul_fp2(T->Y, T->Y, E); /* E*(D-X3) */ + sub_fp2(T->Y, T->Y, C); /* Y3 = E*(D-X3)-8*C */ + + /* + * line evaluation + */ + sqr_fp2(line[0], line[0]); + sub_fp2(line[0], line[0], A); + sub_fp2(line[0], line[0], F); /* (3*A+X1)^2 - X1^2 - 9*A^2 */ + lshift_fp2(B, B, 2); + sub_fp2(line[0], line[0], B); /* 6*X1^3 - 4*Y1^2 */ + + mul_fp2(line[1], E, ZZ); /* 3*X1^2 * Z1^2 */ + + mul_fp2(line[2], T->Z, ZZ); /* Z3 * Z1^2 */ +} + +static void line_by_Px2(vec384fp6 line, const POINTonE1_affine *Px2) +{ + mul_fp(line[1][0], line[1][0], Px2->X); /* "b01" *= -2*P->X */ + mul_fp(line[1][1], line[1][1], Px2->X); + + mul_fp(line[2][0], line[2][0], Px2->Y); /* "b11" *= 2*P->Y */ + mul_fp(line[2][1], line[2][1], Px2->Y); +} + +#if 0 +static void add_n_dbl(vec384fp12 ret, POINTonE2 *T, const POINTonE2_affine *Q, + const POINTonE1_affine *Px2, vec384fp6 line, size_t n) +{ + line_add(line, T, T, Q); line_by_Px2(line, Px2); + mul_by_xy00z0_fp12(ret, ret, line); + while (n--) { + sqr_fp12(ret, ret); + line_dbl(line, T, T); line_by_Px2(line, Px2); + mul_by_xy00z0_fp12(ret, ret, line); + } +} + +static void miller_loop(vec384fp12 ret, const POINTonE2 *Q, const POINTonE1 *P) +{ +#define Q ((const POINTonE2_affine *)Q) + POINTonE2 T[1]; + POINTonE1_affine Px2[1]; + vec384fp6 line; /* it's not actual fp6, but 3 packed fp2, "xy00z0" */ + + /* Move common expression from line evaluation to line_by_Px2. */ + add_fp(Px2->X, P->X, P->X); + neg_fp(Px2->X, Px2->X); + add_fp(Px2->Y, P->Y, P->Y); + + vec_copy(T->X, Q->X, 2*sizeof(T->X)); + vec_copy(T->Z, BLS12_381_Rx.p2, sizeof(T->Z)); + + /* first step is ret = 1^2*line, which is replaced with ret = line */ + line_dbl(line, T, T); /* 0x2 */ + line_by_Px2(line, Px2); + vec_zero(ret, sizeof(vec384fp12)); + vec_copy(ret[0][0], line[0], 2*sizeof(vec384fp2)); + vec_copy(ret[1][1], line[2], sizeof(vec384fp2)); + add_n_dbl(ret, T, Q, Px2, line, 2); /* ..0xc */ + add_n_dbl(ret, T, Q, Px2, line, 3); /* ..0x68 */ + add_n_dbl(ret, T, Q, Px2, line, 9); /* ..0xd200 */ + add_n_dbl(ret, T, Q, Px2, line, 32); /* ..0xd20100000000 */ + add_n_dbl(ret, T, Q, Px2, line, 16); /* ..0xd201000000010000 */ + conjugate_fp12(ret); /* account for z being negative */ +#undef Q +} +#endif + +static void start_dbl_n(vec384fp12 ret, POINTonE2 T[], + const POINTonE1_affine Px2[], size_t n) +{ + size_t i; + vec384fp6 line; /* it's not actual fp6, but 3 packed fp2, "xy00z0" */ + + /* first step is ret = 1^2*line, which is replaced with ret = line */ + line_dbl(line, T+0, T+0); line_by_Px2(line, Px2+0); + vec_zero(ret, sizeof(vec384fp12)); + vec_copy(ret[0][0], line[0], 2*sizeof(vec384fp2)); + vec_copy(ret[1][1], line[2], sizeof(vec384fp2)); + + for (i = 1; i < n; i++) { + line_dbl(line, T+i, T+i); line_by_Px2(line, Px2+i); + mul_by_xy00z0_fp12(ret, ret, line); + } +} + +static void add_n_dbl_n(vec384fp12 ret, POINTonE2 T[], + const POINTonE2_affine Q[], + const POINTonE1_affine Px2[], + size_t n, size_t k) +{ + size_t i; + vec384fp6 line; /* it's not actual fp6, but 3 packed fp2, "xy00z0" */ + + for (i = 0; i < n; i++) { + line_add(line, T+i, T+i, Q+i); line_by_Px2(line, Px2+i); + mul_by_xy00z0_fp12(ret, ret, line); + } + while (k--) { + sqr_fp12(ret, ret); + for (i = 0; i < n; i++) { + line_dbl(line, T+i, T+i); line_by_Px2(line, Px2+i); + mul_by_xy00z0_fp12(ret, ret, line); + } + } +} + +static void miller_loop_n(vec384fp12 ret, const POINTonE2_affine Q[], + const POINTonE1_affine P[], size_t n) +{ +#if !defined(__STDC_VERSION__) || __STDC_VERSION__<199901 \ + || defined(__STDC_NO_VLA__) + POINTonE2 *T = alloca(n*sizeof(POINTonE2)); + POINTonE1_affine *Px2 = alloca(n*sizeof(POINTonE1_affine)); +#else + POINTonE2 T[n]; + POINTonE1_affine Px2[n]; +#endif + size_t i; + + if ((n == 1) && (vec_is_zero(&Q[0], sizeof(Q[0])) | + vec_is_zero(&P[0], sizeof(P[0]))) ) { + /* + * Special case of infinite aggregated signature, pair the additive + * group's identity with the multiplicative group's identity. + */ + vec_copy(ret, BLS12_381_Rx.p12, sizeof(vec384fp12)); + return; + } + + for (i = 0; i < n; i++) { + /* Move common expression from line evaluation to line_by_Px2. */ + add_fp(Px2[i].X, P[i].X, P[i].X); + neg_fp(Px2[i].X, Px2[i].X); + add_fp(Px2[i].Y, P[i].Y, P[i].Y); + + vec_copy(T[i].X, Q[i].X, 2*sizeof(T[i].X)); + vec_copy(T[i].Z, BLS12_381_Rx.p2, sizeof(T[i].Z)); + } + + /* first step is ret = 1^2*line, which is replaced with ret = line */ + start_dbl_n(ret, T, Px2, n); /* 0x2 */ + add_n_dbl_n(ret, T, Q, Px2, n, 2); /* ..0xc */ + add_n_dbl_n(ret, T, Q, Px2, n, 3); /* ..0x68 */ + add_n_dbl_n(ret, T, Q, Px2, n, 9); /* ..0xd200 */ + add_n_dbl_n(ret, T, Q, Px2, n, 32); /* ..0xd20100000000 */ + add_n_dbl_n(ret, T, Q, Px2, n, 16); /* ..0xd201000000010000 */ + conjugate_fp12(ret); /* account for z being negative */ +} + +static void pre_add_n_dbl(vec384fp6 lines[], POINTonE2 *T, + const POINTonE2_affine *Q, + size_t n) +{ + line_add(lines++[0], T, T, Q); + while (n--) + line_dbl(lines++[0], T, T); +} + +static void precompute_lines(vec384fp6 Qlines[68], const POINTonE2_affine *Q) +{ + POINTonE2 T[1]; + + vec_copy(T->X, Q->X, 2*sizeof(T->X)); + vec_copy(T->Z, BLS12_381_Rx.p2, sizeof(T->Z)); + + line_dbl(Qlines[0], T, T); /* 0x2 */ + pre_add_n_dbl(&Qlines[1], T, Q, 2); /* ..0xc */ + pre_add_n_dbl(&Qlines[4], T, Q, 3); /* ..0x68 */ + pre_add_n_dbl(&Qlines[8], T, Q, 9); /* ..0xd200 */ + pre_add_n_dbl(&Qlines[18], T, Q, 32); /* ..0xd20100000000 */ + pre_add_n_dbl(&Qlines[51], T, Q, 16); /* ..0xd201000000010000 */ +} + +static void post_line_by_Px2(vec384fp6 out, const vec384fp6 in, + const POINTonE1_affine *Px2) +{ + vec_copy(out[0], in[0], sizeof(out[0])); + + mul_fp(out[1][0], in[1][0], Px2->X); /* "b01" *= -2*P->X */ + mul_fp(out[1][1], in[1][1], Px2->X); + + mul_fp(out[2][0], in[2][0], Px2->Y); /* "b11" *= 2*P->Y */ + mul_fp(out[2][1], in[2][1], Px2->Y); +} + +static void post_add_n_dbl(vec384fp12 ret, const vec384fp6 lines[], + const POINTonE1_affine *Px2, size_t n) +{ + vec384fp6 line; + + post_line_by_Px2(line, lines++[0], Px2); + mul_by_xy00z0_fp12(ret, ret, line); + while (n--) { + sqr_fp12(ret, ret); + post_line_by_Px2(line, lines++[0], Px2); + mul_by_xy00z0_fp12(ret, ret, line); + } +} + +static void miller_loop_lines(vec384fp12 ret, const vec384fp6 Qlines[68], + const POINTonE1_affine *P) +{ + POINTonE1_affine Px2[1]; + vec384fp6 line; /* it's not actual fp6, but 3 packed fp2, "xy00z0" */ + + /* Move common expression from line evaluation to line_by_Px2. */ + add_fp(Px2->X, P->X, P->X); + neg_fp(Px2->X, Px2->X); + add_fp(Px2->Y, P->Y, P->Y); + + /* first step is ret = 1^2*line, which is replaced with ret = line */ + post_line_by_Px2(line, Qlines[0], Px2); /* 0x2 */ + vec_zero(ret, sizeof(vec384fp12)); + vec_copy(ret[0][0], line[0], 2*sizeof(vec384fp2)); + vec_copy(ret[1][1], line[2], sizeof(vec384fp2)); + post_add_n_dbl(ret, &Qlines[1], Px2, 2); /* ..0xc */ + post_add_n_dbl(ret, &Qlines[4], Px2, 3); /* ..0x68 */ + post_add_n_dbl(ret, &Qlines[8], Px2, 9); /* ..0xd200 */ + post_add_n_dbl(ret, &Qlines[18], Px2, 32); /* ..0xd20100000000 */ + post_add_n_dbl(ret, &Qlines[51], Px2, 16); /* ..0xd201000000010000 */ + conjugate_fp12(ret); /* account for z being negative */ +} + +#ifdef INTERNAL_TESTMODE +static void miller_loop_alt(vec384fp12 ret, const POINTonE2_affine *Q, + const POINTonE1_affine *P) +{ + vec384fp6 lines[68]; + + precompute_lines(lines, Q); + miller_loop_lines(ret, lines, P); +} +#endif + +static void mul_n_sqr(vec384fp12 ret, const vec384fp12 a, size_t n) +{ + mul_fp12(ret, ret, a); + while (n--) + cyclotomic_sqr_fp12(ret, ret); +} + +static void raise_to_z_div_by_2(vec384fp12 ret, const vec384fp12 a) +{ + cyclotomic_sqr_fp12(ret, a); /* 0x2 */ + mul_n_sqr(ret, a, 2); /* ..0xc */ + mul_n_sqr(ret, a, 3); /* ..0x68 */ + mul_n_sqr(ret, a, 9); /* ..0xd200 */ + mul_n_sqr(ret, a, 32); /* ..0xd20100000000 */ + mul_n_sqr(ret, a, 16-1); /* ..0x6900800000008000 */ + conjugate_fp12(ret); /* account for z being negative */ +} + +#define raise_to_z(a, b) (raise_to_z_div_by_2(a, b), cyclotomic_sqr_fp12(a, a)) + +/* + * Adaptation from /pairing/src/bls12_381/mod.rs + */ +static void final_exp(vec384fp12 ret, const vec384fp12 f) +{ + vec384fp12 y0, y1, y2, y3; + + vec_copy(y1, f, sizeof(y1)); + conjugate_fp12(y1); + inverse_fp12(y2, f); + mul_fp12(ret, y1, y2); + frobenius_map_fp12(y2, ret, 2); + mul_fp12(ret, ret, y2); + + cyclotomic_sqr_fp12(y0, ret); + raise_to_z(y1, y0); + raise_to_z_div_by_2(y2, y1); + vec_copy(y3, ret, sizeof(y3)); + conjugate_fp12(y3); + mul_fp12(y1, y1, y3); + conjugate_fp12(y1); + mul_fp12(y1, y1, y2); + raise_to_z(y2, y1); + raise_to_z(y3, y2); + conjugate_fp12(y1); + mul_fp12(y3, y3, y1); + conjugate_fp12(y1); + frobenius_map_fp12(y1, y1, 3); + frobenius_map_fp12(y2, y2, 2); + mul_fp12(y1, y1, y2); + raise_to_z(y2, y3); + mul_fp12(y2, y2, y0); + mul_fp12(y2, y2, ret); + mul_fp12(y1, y1, y2); + frobenius_map_fp12(y2, y3, 1); + mul_fp12(ret, y1, y2); +} + +void blst_miller_loop(vec384fp12 ret, const POINTonE2_affine *Q, + const POINTonE1_affine *P) +{ miller_loop_n(ret, Q ? Q : (const POINTonE2_affine *)&BLS12_381_G2, + P ? P : (const POINTonE1_affine *)&BLS12_381_G1, 1); +} + +#ifndef MILLER_LOOP_N_MAX +# define MILLER_LOOP_N_MAX 16 +#endif + +void blst_miller_loop_n(vec384fp12 out, const POINTonE2_affine *const Qs[], + const POINTonE1_affine *const Ps[], + size_t n) +{ /* ~10KB of stack storage */ + POINTonE2 T[MILLER_LOOP_N_MAX]; + POINTonE2_affine Q[MILLER_LOOP_N_MAX]; + POINTonE1_affine Px2[MILLER_LOOP_N_MAX]; + const POINTonE2_affine *Qptr = NULL; + const POINTonE1_affine *Pptr = NULL; + size_t i, j; + + for (i = 0, j = 0; j < n; j++) { + Qptr = *Qs ? *Qs++ : Qptr+1; + Pptr = *Ps ? *Ps++ : Pptr+1; + + /* Move common expression from line evaluation to line_by_Px2. */ + add_fp(Px2[i].X, Pptr->X, Pptr->X); + neg_fp(Px2[i].X, Px2[i].X); + add_fp(Px2[i].Y, Pptr->Y, Pptr->Y); + + vec_copy(Q[i].X, Qptr->X, 2*sizeof(Q[i].X)); + vec_copy(T[i].X, Qptr->X, 2*sizeof(T[i].X)); + vec_copy(T[i].Z, BLS12_381_Rx.p2, sizeof(T[i].Z)); + + if (++i == MILLER_LOOP_N_MAX || j == n-1) { + vec384fp12 tmp; + vec384fp6 *ret = j < MILLER_LOOP_N_MAX ? out : tmp; + + /* first step is ret = 1^2*line, which is just ret = line */ + start_dbl_n(ret, T, Px2, i); /* 0x2 */ + add_n_dbl_n(ret, T, Q, Px2, i, 2); /* ..0xc */ + add_n_dbl_n(ret, T, Q, Px2, i, 3); /* ..0x68 */ + add_n_dbl_n(ret, T, Q, Px2, i, 9); /* ..0xd200 */ + add_n_dbl_n(ret, T, Q, Px2, i, 32); /* ..0xd20100000000 */ + add_n_dbl_n(ret, T, Q, Px2, i, 16); /* ..0xd201000000010000 */ + conjugate_fp12(ret); /* account for z being negative */ + + if (j >= MILLER_LOOP_N_MAX) + mul_fp12(out, out, ret); + + i = 0; + } + } +} + +void blst_final_exp(vec384fp12 ret, const vec384fp12 f) +{ final_exp(ret, f); } + +void blst_precompute_lines(vec384fp6 Qlines[68], const POINTonE2_affine *Q) +{ precompute_lines(Qlines, Q); } + +void blst_miller_loop_lines(vec384fp12 ret, const vec384fp6 Qlines[68], + const POINTonE1_affine *P) +{ miller_loop_lines(ret, Qlines, P); } + +static bool_t is_cyclotomic(const vec384fp12 f) +{ + vec384fp12 a, b; + + frobenius_map_fp12(a, f, 2); + frobenius_map_fp12(b, a, 2); + mul_fp12(b, b, f); + + return vec_is_equal(a, b, sizeof(a)); +} + +int blst_fp12_in_group(const vec384fp12 f) +{ + vec384fp12 a, b; + + if (vec_is_zero(f, sizeof(vec384fp12)) || !is_cyclotomic(f)) + return 0; + + frobenius_map_fp12(a, f, 1); + raise_to_z(b, f); + + return (int)vec_is_equal(a, b, sizeof(a)); +} diff --git a/crypto/blst_src/pentaroot-addchain.h b/crypto/blst_src/pentaroot-addchain.h new file mode 100644 index 00000000000..5bdd9ddf7f7 --- /dev/null +++ b/crypto/blst_src/pentaroot-addchain.h @@ -0,0 +1,333 @@ +/* + * Copyright Supranational LLC + * Licensed under the Apache License, Version 2.0, see LICENSE for details. + * SPDX-License-Identifier: Apache-2.0 + */ +/* + * The "magic" number is 1/5 modulo BLS12_381_r-1. Exponentiation to which + * yields 5th root of the base. + * + * Generated with 'addchain 20974350070050476191779096203274386335076221000211055129041463479975432473805' + * https://github.com/kwantam/addchain + * # Bos-Coster (win=4) : 307 (15) + * # Bos-Coster (win=10) : 307 (18) + * # Yacobi : 319 (16) + * # Bos-Coster (win=2) : 319 ( 5) + * # Bos-Coster (win=5) : 306 (19) <<< + * # Bos-Coster (win=7) : 311 (22) + * # Bos-Coster (win=9) : 313 (20) + * # Bos-Coster (win=3) : 314 ( 9) + * # Bos-Coster (win=6) : 309 (21) + * # Bos-Coster (win=8) : 309 (23) + * # Bergeron-Berstel-Brlek-Duboc : 334 ( 5) + */ + +#define PENTAROOT_MOD_BLS12_381_r(out, inp, ptype) do { \ +ptype t[19]; \ +vec_copy(t[1], inp, sizeof(ptype)); /* 0: 1 */\ +sqr(t[7], t[1]); /* 1: 2 */\ +sqr(t[0], t[7]); /* 2: 4 */\ +sqr(t[2], t[0]); /* 3: 8 */\ +mul(t[10], t[2], t[1]); /* 4: 9 */\ +mul(t[3], t[10], t[7]); /* 5: b */\ +mul(t[1], t[10], t[0]); /* 6: d */\ +mul(t[5], t[3], t[0]); /* 7: f */\ +mul(t[9], t[10], t[2]); /* 8: 11 */\ +mul(t[4], t[3], t[2]); /* 9: 13 */\ +mul(t[15], t[5], t[2]); /* 10: 17 */\ +mul(t[8], t[15], t[2]); /* 11: 1f */\ +mul(t[13], t[8], t[7]); /* 12: 21 */\ +mul(t[14], t[8], t[0]); /* 13: 23 */\ +mul(t[12], t[13], t[0]); /* 14: 25 */\ +mul(t[6], t[8], t[2]); /* 15: 27 */\ +mul(t[11], t[14], t[2]); /* 16: 2b */\ +sqr(t[0], t[15]); /* 17: 2e */\ +mul(t[18], t[6], t[2]); /* 18: 2f */\ +mul(t[2], t[11], t[2]); /* 19: 33 */\ +mul(t[16], t[2], t[7]); /* 20: 35 */\ +mul(t[7], t[0], t[3]); /* 21: 39 */\ +mul(t[17], t[0], t[5]); /* 22: 3d */\ +/* sqr(t[0], t[0]); */ /* 23: 5c */\ +/* sqr(t[0], t[0]); */ /* 24: b8 */\ +/* sqr(t[0], t[0]); */ /* 25: 170 */\ +/* sqr(t[0], t[0]); */ /* 26: 2e0 */\ +/* sqr(t[0], t[0]); */ /* 27: 5c0 */\ +/* sqr(t[0], t[0]); */ /* 28: b80 */\ +/* sqr(t[0], t[0]); */ /* 29: 1700 */\ +sqr_n_mul(t[0], t[0], 7, t[18]); /* 30: 172f */\ +/* sqr(t[0], t[0]); */ /* 31: 2e5e */\ +/* sqr(t[0], t[0]); */ /* 32: 5cbc */\ +/* sqr(t[0], t[0]); */ /* 33: b978 */\ +/* sqr(t[0], t[0]); */ /* 34: 172f0 */\ +/* sqr(t[0], t[0]); */ /* 35: 2e5e0 */\ +/* sqr(t[0], t[0]); */ /* 36: 5cbc0 */\ +sqr_n_mul(t[0], t[0], 6, t[13]); /* 37: 5cbe1 */\ +/* sqr(t[0], t[0]); */ /* 38: b97c2 */\ +/* sqr(t[0], t[0]); */ /* 39: 172f84 */\ +/* sqr(t[0], t[0]); */ /* 40: 2e5f08 */\ +/* sqr(t[0], t[0]); */ /* 41: 5cbe10 */\ +/* sqr(t[0], t[0]); */ /* 42: b97c20 */\ +/* sqr(t[0], t[0]); */ /* 43: 172f840 */\ +sqr_n_mul(t[0], t[0], 6, t[17]); /* 44: 172f87d */\ +/* sqr(t[0], t[0]); */ /* 45: 2e5f0fa */\ +/* sqr(t[0], t[0]); */ /* 46: 5cbe1f4 */\ +/* sqr(t[0], t[0]); */ /* 47: b97c3e8 */\ +/* sqr(t[0], t[0]); */ /* 48: 172f87d0 */\ +/* sqr(t[0], t[0]); */ /* 49: 2e5f0fa0 */\ +/* sqr(t[0], t[0]); */ /* 50: 5cbe1f40 */\ +sqr_n_mul(t[0], t[0], 6, t[16]); /* 51: 5cbe1f75 */\ +/* sqr(t[0], t[0]); */ /* 52: b97c3eea */\ +/* sqr(t[0], t[0]); */ /* 53: 172f87dd4 */\ +/* sqr(t[0], t[0]); */ /* 54: 2e5f0fba8 */\ +/* sqr(t[0], t[0]); */ /* 55: 5cbe1f750 */\ +/* sqr(t[0], t[0]); */ /* 56: b97c3eea0 */\ +sqr_n_mul(t[0], t[0], 5, t[15]); /* 57: b97c3eeb7 */\ +/* sqr(t[0], t[0]); */ /* 58: 172f87dd6e */\ +/* sqr(t[0], t[0]); */ /* 59: 2e5f0fbadc */\ +/* sqr(t[0], t[0]); */ /* 60: 5cbe1f75b8 */\ +/* sqr(t[0], t[0]); */ /* 61: b97c3eeb70 */\ +/* sqr(t[0], t[0]); */ /* 62: 172f87dd6e0 */\ +/* sqr(t[0], t[0]); */ /* 63: 2e5f0fbadc0 */\ +sqr_n_mul(t[0], t[0], 6, t[15]); /* 64: 2e5f0fbadd7 */\ +/* sqr(t[0], t[0]); */ /* 65: 5cbe1f75bae */\ +/* sqr(t[0], t[0]); */ /* 66: b97c3eeb75c */\ +/* sqr(t[0], t[0]); */ /* 67: 172f87dd6eb8 */\ +/* sqr(t[0], t[0]); */ /* 68: 2e5f0fbadd70 */\ +/* sqr(t[0], t[0]); */ /* 69: 5cbe1f75bae0 */\ +/* sqr(t[0], t[0]); */ /* 70: b97c3eeb75c0 */\ +/* sqr(t[0], t[0]); */ /* 71: 172f87dd6eb80 */\ +/* sqr(t[0], t[0]); */ /* 72: 2e5f0fbadd700 */\ +sqr_n_mul(t[0], t[0], 8, t[14]); /* 73: 2e5f0fbadd723 */\ +/* sqr(t[0], t[0]); */ /* 74: 5cbe1f75bae46 */\ +/* sqr(t[0], t[0]); */ /* 75: b97c3eeb75c8c */\ +/* sqr(t[0], t[0]); */ /* 76: 172f87dd6eb918 */\ +/* sqr(t[0], t[0]); */ /* 77: 2e5f0fbadd7230 */\ +/* sqr(t[0], t[0]); */ /* 78: 5cbe1f75bae460 */\ +/* sqr(t[0], t[0]); */ /* 79: b97c3eeb75c8c0 */\ +/* sqr(t[0], t[0]); */ /* 80: 172f87dd6eb9180 */\ +/* sqr(t[0], t[0]); */ /* 81: 2e5f0fbadd72300 */\ +sqr_n_mul(t[0], t[0], 8, t[13]); /* 82: 2e5f0fbadd72321 */\ +/* sqr(t[0], t[0]); */ /* 83: 5cbe1f75bae4642 */\ +/* sqr(t[0], t[0]); */ /* 84: b97c3eeb75c8c84 */\ +/* sqr(t[0], t[0]); */ /* 85: 172f87dd6eb91908 */\ +/* sqr(t[0], t[0]); */ /* 86: 2e5f0fbadd723210 */\ +/* sqr(t[0], t[0]); */ /* 87: 5cbe1f75bae46420 */\ +/* sqr(t[0], t[0]); */ /* 88: b97c3eeb75c8c840 */\ +sqr_n_mul(t[0], t[0], 6, t[2]); /* 89: b97c3eeb75c8c873 */\ +/* sqr(t[0], t[0]); */ /* 90: 172f87dd6eb9190e6 */\ +/* sqr(t[0], t[0]); */ /* 91: 2e5f0fbadd72321cc */\ +/* sqr(t[0], t[0]); */ /* 92: 5cbe1f75bae464398 */\ +/* sqr(t[0], t[0]); */ /* 93: b97c3eeb75c8c8730 */\ +/* sqr(t[0], t[0]); */ /* 94: 172f87dd6eb9190e60 */\ +/* sqr(t[0], t[0]); */ /* 95: 2e5f0fbadd72321cc0 */\ +sqr_n_mul(t[0], t[0], 6, t[13]); /* 96: 2e5f0fbadd72321ce1 */\ +/* sqr(t[0], t[0]); */ /* 97: 5cbe1f75bae46439c2 */\ +/* sqr(t[0], t[0]); */ /* 98: b97c3eeb75c8c87384 */\ +/* sqr(t[0], t[0]); */ /* 99: 172f87dd6eb9190e708 */\ +/* sqr(t[0], t[0]); */ /* 100: 2e5f0fbadd72321ce10 */\ +/* sqr(t[0], t[0]); */ /* 101: 5cbe1f75bae46439c20 */\ +/* sqr(t[0], t[0]); */ /* 102: b97c3eeb75c8c873840 */\ +/* sqr(t[0], t[0]); */ /* 103: 172f87dd6eb9190e7080 */\ +sqr_n_mul(t[0], t[0], 7, t[12]); /* 104: 172f87dd6eb9190e70a5 */\ +/* sqr(t[0], t[0]); */ /* 105: 2e5f0fbadd72321ce14a */\ +/* sqr(t[0], t[0]); */ /* 106: 5cbe1f75bae46439c294 */\ +/* sqr(t[0], t[0]); */ /* 107: b97c3eeb75c8c8738528 */\ +/* sqr(t[0], t[0]); */ /* 108: 172f87dd6eb9190e70a50 */\ +/* sqr(t[0], t[0]); */ /* 109: 2e5f0fbadd72321ce14a0 */\ +/* sqr(t[0], t[0]); */ /* 110: 5cbe1f75bae46439c2940 */\ +/* sqr(t[0], t[0]); */ /* 111: b97c3eeb75c8c87385280 */\ +/* sqr(t[0], t[0]); */ /* 112: 172f87dd6eb9190e70a500 */\ +sqr_n_mul(t[0], t[0], 8, t[11]); /* 113: 172f87dd6eb9190e70a52b */\ +/* sqr(t[0], t[0]); */ /* 114: 2e5f0fbadd72321ce14a56 */\ +/* sqr(t[0], t[0]); */ /* 115: 5cbe1f75bae46439c294ac */\ +/* sqr(t[0], t[0]); */ /* 116: b97c3eeb75c8c873852958 */\ +/* sqr(t[0], t[0]); */ /* 117: 172f87dd6eb9190e70a52b0 */\ +/* sqr(t[0], t[0]); */ /* 118: 2e5f0fbadd72321ce14a560 */\ +/* sqr(t[0], t[0]); */ /* 119: 5cbe1f75bae46439c294ac0 */\ +sqr_n_mul(t[0], t[0], 6, t[1]); /* 120: 5cbe1f75bae46439c294acd */\ +/* sqr(t[0], t[0]); */ /* 121: b97c3eeb75c8c873852959a */\ +/* sqr(t[0], t[0]); */ /* 122: 172f87dd6eb9190e70a52b34 */\ +/* sqr(t[0], t[0]); */ /* 123: 2e5f0fbadd72321ce14a5668 */\ +/* sqr(t[0], t[0]); */ /* 124: 5cbe1f75bae46439c294acd0 */\ +/* sqr(t[0], t[0]); */ /* 125: b97c3eeb75c8c873852959a0 */\ +/* sqr(t[0], t[0]); */ /* 126: 172f87dd6eb9190e70a52b340 */\ +/* sqr(t[0], t[0]); */ /* 127: 2e5f0fbadd72321ce14a56680 */\ +/* sqr(t[0], t[0]); */ /* 128: 5cbe1f75bae46439c294acd00 */\ +sqr_n_mul(t[0], t[0], 8, t[2]); /* 129: 5cbe1f75bae46439c294acd33 */\ +/* sqr(t[0], t[0]); */ /* 130: b97c3eeb75c8c873852959a66 */\ +/* sqr(t[0], t[0]); */ /* 131: 172f87dd6eb9190e70a52b34cc */\ +/* sqr(t[0], t[0]); */ /* 132: 2e5f0fbadd72321ce14a566998 */\ +/* sqr(t[0], t[0]); */ /* 133: 5cbe1f75bae46439c294acd330 */\ +/* sqr(t[0], t[0]); */ /* 134: b97c3eeb75c8c873852959a660 */\ +/* sqr(t[0], t[0]); */ /* 135: 172f87dd6eb9190e70a52b34cc0 */\ +sqr_n_mul(t[0], t[0], 6, t[11]); /* 136: 172f87dd6eb9190e70a52b34ceb */\ +/* sqr(t[0], t[0]); */ /* 137: 2e5f0fbadd72321ce14a56699d6 */\ +/* sqr(t[0], t[0]); */ /* 138: 5cbe1f75bae46439c294acd33ac */\ +/* sqr(t[0], t[0]); */ /* 139: b97c3eeb75c8c873852959a6758 */\ +/* sqr(t[0], t[0]); */ /* 140: 172f87dd6eb9190e70a52b34ceb0 */\ +sqr_n_mul(t[0], t[0], 4, t[10]); /* 141: 172f87dd6eb9190e70a52b34ceb9 */\ +/* sqr(t[0], t[0]); */ /* 142: 2e5f0fbadd72321ce14a56699d72 */\ +/* sqr(t[0], t[0]); */ /* 143: 5cbe1f75bae46439c294acd33ae4 */\ +/* sqr(t[0], t[0]); */ /* 144: b97c3eeb75c8c873852959a675c8 */\ +/* sqr(t[0], t[0]); */ /* 145: 172f87dd6eb9190e70a52b34ceb90 */\ +/* sqr(t[0], t[0]); */ /* 146: 2e5f0fbadd72321ce14a56699d720 */\ +sqr_n_mul(t[0], t[0], 5, t[8]); /* 147: 2e5f0fbadd72321ce14a56699d73f */\ +/* sqr(t[0], t[0]); */ /* 148: 5cbe1f75bae46439c294acd33ae7e */\ +/* sqr(t[0], t[0]); */ /* 149: b97c3eeb75c8c873852959a675cfc */\ +/* sqr(t[0], t[0]); */ /* 150: 172f87dd6eb9190e70a52b34ceb9f8 */\ +/* sqr(t[0], t[0]); */ /* 151: 2e5f0fbadd72321ce14a56699d73f0 */\ +/* sqr(t[0], t[0]); */ /* 152: 5cbe1f75bae46439c294acd33ae7e0 */\ +/* sqr(t[0], t[0]); */ /* 153: b97c3eeb75c8c873852959a675cfc0 */\ +/* sqr(t[0], t[0]); */ /* 154: 172f87dd6eb9190e70a52b34ceb9f80 */\ +/* sqr(t[0], t[0]); */ /* 155: 2e5f0fbadd72321ce14a56699d73f00 */\ +/* sqr(t[0], t[0]); */ /* 156: 5cbe1f75bae46439c294acd33ae7e00 */\ +/* sqr(t[0], t[0]); */ /* 157: b97c3eeb75c8c873852959a675cfc00 */\ +/* sqr(t[0], t[0]); */ /* 158: 172f87dd6eb9190e70a52b34ceb9f800 */\ +/* sqr(t[0], t[0]); */ /* 159: 2e5f0fbadd72321ce14a56699d73f000 */\ +/* sqr(t[0], t[0]); */ /* 160: 5cbe1f75bae46439c294acd33ae7e000 */\ +/* sqr(t[0], t[0]); */ /* 161: b97c3eeb75c8c873852959a675cfc000 */\ +/* sqr(t[0], t[0]); */ /* 162: 172f87dd6eb9190e70a52b34ceb9f8000 */\ +sqr_n_mul(t[0], t[0], 15, t[9]); /* 163: 172f87dd6eb9190e70a52b34ceb9f8011 */\ +/* sqr(t[0], t[0]); */ /* 164: 2e5f0fbadd72321ce14a56699d73f0022 */\ +/* sqr(t[0], t[0]); */ /* 165: 5cbe1f75bae46439c294acd33ae7e0044 */\ +/* sqr(t[0], t[0]); */ /* 166: b97c3eeb75c8c873852959a675cfc0088 */\ +/* sqr(t[0], t[0]); */ /* 167: 172f87dd6eb9190e70a52b34ceb9f80110 */\ +/* sqr(t[0], t[0]); */ /* 168: 2e5f0fbadd72321ce14a56699d73f00220 */\ +/* sqr(t[0], t[0]); */ /* 169: 5cbe1f75bae46439c294acd33ae7e00440 */\ +/* sqr(t[0], t[0]); */ /* 170: b97c3eeb75c8c873852959a675cfc00880 */\ +/* sqr(t[0], t[0]); */ /* 171: 172f87dd6eb9190e70a52b34ceb9f801100 */\ +sqr_n_mul(t[0], t[0], 8, t[3]); /* 172: 172f87dd6eb9190e70a52b34ceb9f80110b */\ +/* sqr(t[0], t[0]); */ /* 173: 2e5f0fbadd72321ce14a56699d73f002216 */\ +/* sqr(t[0], t[0]); */ /* 174: 5cbe1f75bae46439c294acd33ae7e00442c */\ +/* sqr(t[0], t[0]); */ /* 175: b97c3eeb75c8c873852959a675cfc008858 */\ +/* sqr(t[0], t[0]); */ /* 176: 172f87dd6eb9190e70a52b34ceb9f80110b0 */\ +/* sqr(t[0], t[0]); */ /* 177: 2e5f0fbadd72321ce14a56699d73f0022160 */\ +sqr_n_mul(t[0], t[0], 5, t[8]); /* 178: 2e5f0fbadd72321ce14a56699d73f002217f */\ +/* sqr(t[0], t[0]); */ /* 179: 5cbe1f75bae46439c294acd33ae7e00442fe */\ +/* sqr(t[0], t[0]); */ /* 180: b97c3eeb75c8c873852959a675cfc00885fc */\ +/* sqr(t[0], t[0]); */ /* 181: 172f87dd6eb9190e70a52b34ceb9f80110bf8 */\ +/* sqr(t[0], t[0]); */ /* 182: 2e5f0fbadd72321ce14a56699d73f002217f0 */\ +/* sqr(t[0], t[0]); */ /* 183: 5cbe1f75bae46439c294acd33ae7e00442fe0 */\ +/* sqr(t[0], t[0]); */ /* 184: b97c3eeb75c8c873852959a675cfc00885fc0 */\ +/* sqr(t[0], t[0]); */ /* 185: 172f87dd6eb9190e70a52b34ceb9f80110bf80 */\ +/* sqr(t[0], t[0]); */ /* 186: 2e5f0fbadd72321ce14a56699d73f002217f00 */\ +/* sqr(t[0], t[0]); */ /* 187: 5cbe1f75bae46439c294acd33ae7e00442fe00 */\ +/* sqr(t[0], t[0]); */ /* 188: b97c3eeb75c8c873852959a675cfc00885fc00 */\ +sqr_n_mul(t[0], t[0], 10, t[7]); /* 189: b97c3eeb75c8c873852959a675cfc00885fc39 */\ +/* sqr(t[0], t[0]); */ /* 190: 172f87dd6eb9190e70a52b34ceb9f80110bf872 */\ +/* sqr(t[0], t[0]); */ /* 191: 2e5f0fbadd72321ce14a56699d73f002217f0e4 */\ +/* sqr(t[0], t[0]); */ /* 192: 5cbe1f75bae46439c294acd33ae7e00442fe1c8 */\ +/* sqr(t[0], t[0]); */ /* 193: b97c3eeb75c8c873852959a675cfc00885fc390 */\ +/* sqr(t[0], t[0]); */ /* 194: 172f87dd6eb9190e70a52b34ceb9f80110bf8720 */\ +/* sqr(t[0], t[0]); */ /* 195: 2e5f0fbadd72321ce14a56699d73f002217f0e40 */\ +sqr_n_mul(t[0], t[0], 6, t[6]); /* 196: 2e5f0fbadd72321ce14a56699d73f002217f0e67 */\ +/* sqr(t[0], t[0]); */ /* 197: 5cbe1f75bae46439c294acd33ae7e00442fe1cce */\ +/* sqr(t[0], t[0]); */ /* 198: b97c3eeb75c8c873852959a675cfc00885fc399c */\ +/* sqr(t[0], t[0]); */ /* 199: 172f87dd6eb9190e70a52b34ceb9f80110bf87338 */\ +/* sqr(t[0], t[0]); */ /* 200: 2e5f0fbadd72321ce14a56699d73f002217f0e670 */\ +/* sqr(t[0], t[0]); */ /* 201: 5cbe1f75bae46439c294acd33ae7e00442fe1cce0 */\ +sqr_n_mul(t[0], t[0], 5, t[4]); /* 202: 5cbe1f75bae46439c294acd33ae7e00442fe1ccf3 */\ +/* sqr(t[0], t[0]); */ /* 203: b97c3eeb75c8c873852959a675cfc00885fc399e6 */\ +/* sqr(t[0], t[0]); */ /* 204: 172f87dd6eb9190e70a52b34ceb9f80110bf8733cc */\ +/* sqr(t[0], t[0]); */ /* 205: 2e5f0fbadd72321ce14a56699d73f002217f0e6798 */\ +/* sqr(t[0], t[0]); */ /* 206: 5cbe1f75bae46439c294acd33ae7e00442fe1ccf30 */\ +/* sqr(t[0], t[0]); */ /* 207: b97c3eeb75c8c873852959a675cfc00885fc399e60 */\ +/* sqr(t[0], t[0]); */ /* 208: 172f87dd6eb9190e70a52b34ceb9f80110bf8733cc0 */\ +/* sqr(t[0], t[0]); */ /* 209: 2e5f0fbadd72321ce14a56699d73f002217f0e67980 */\ +/* sqr(t[0], t[0]); */ /* 210: 5cbe1f75bae46439c294acd33ae7e00442fe1ccf300 */\ +sqr_n_mul(t[0], t[0], 8, t[2]); /* 211: 5cbe1f75bae46439c294acd33ae7e00442fe1ccf333 */\ +/* sqr(t[0], t[0]); */ /* 212: b97c3eeb75c8c873852959a675cfc00885fc399e666 */\ +/* sqr(t[0], t[0]); */ /* 213: 172f87dd6eb9190e70a52b34ceb9f80110bf8733cccc */\ +/* sqr(t[0], t[0]); */ /* 214: 2e5f0fbadd72321ce14a56699d73f002217f0e679998 */\ +/* sqr(t[0], t[0]); */ /* 215: 5cbe1f75bae46439c294acd33ae7e00442fe1ccf3330 */\ +/* sqr(t[0], t[0]); */ /* 216: b97c3eeb75c8c873852959a675cfc00885fc399e6660 */\ +/* sqr(t[0], t[0]); */ /* 217: 172f87dd6eb9190e70a52b34ceb9f80110bf8733cccc0 */\ +/* sqr(t[0], t[0]); */ /* 218: 2e5f0fbadd72321ce14a56699d73f002217f0e6799980 */\ +sqr_n_mul(t[0], t[0], 7, t[5]); /* 219: 2e5f0fbadd72321ce14a56699d73f002217f0e679998f */\ +/* sqr(t[0], t[0]); */ /* 220: 5cbe1f75bae46439c294acd33ae7e00442fe1ccf3331e */\ +/* sqr(t[0], t[0]); */ /* 221: b97c3eeb75c8c873852959a675cfc00885fc399e6663c */\ +/* sqr(t[0], t[0]); */ /* 222: 172f87dd6eb9190e70a52b34ceb9f80110bf8733cccc78 */\ +/* sqr(t[0], t[0]); */ /* 223: 2e5f0fbadd72321ce14a56699d73f002217f0e679998f0 */\ +/* sqr(t[0], t[0]); */ /* 224: 5cbe1f75bae46439c294acd33ae7e00442fe1ccf3331e0 */\ +/* sqr(t[0], t[0]); */ /* 225: b97c3eeb75c8c873852959a675cfc00885fc399e6663c0 */\ +/* sqr(t[0], t[0]); */ /* 226: 172f87dd6eb9190e70a52b34ceb9f80110bf8733cccc780 */\ +/* sqr(t[0], t[0]); */ /* 227: 2e5f0fbadd72321ce14a56699d73f002217f0e679998f00 */\ +/* sqr(t[0], t[0]); */ /* 228: 5cbe1f75bae46439c294acd33ae7e00442fe1ccf3331e00 */\ +sqr_n_mul(t[0], t[0], 9, t[2]); /* 229: 5cbe1f75bae46439c294acd33ae7e00442fe1ccf3331e33 */\ +/* sqr(t[0], t[0]); */ /* 230: b97c3eeb75c8c873852959a675cfc00885fc399e6663c66 */\ +/* sqr(t[0], t[0]); */ /* 231: 172f87dd6eb9190e70a52b34ceb9f80110bf8733cccc78cc */\ +/* sqr(t[0], t[0]); */ /* 232: 2e5f0fbadd72321ce14a56699d73f002217f0e679998f198 */\ +/* sqr(t[0], t[0]); */ /* 233: 5cbe1f75bae46439c294acd33ae7e00442fe1ccf3331e330 */\ +/* sqr(t[0], t[0]); */ /* 234: b97c3eeb75c8c873852959a675cfc00885fc399e6663c660 */\ +/* sqr(t[0], t[0]); */ /* 235: 172f87dd6eb9190e70a52b34ceb9f80110bf8733cccc78cc0 */\ +/* sqr(t[0], t[0]); */ /* 236: 2e5f0fbadd72321ce14a56699d73f002217f0e679998f1980 */\ +sqr_n_mul(t[0], t[0], 7, t[4]); /* 237: 2e5f0fbadd72321ce14a56699d73f002217f0e679998f1993 */\ +/* sqr(t[0], t[0]); */ /* 238: 5cbe1f75bae46439c294acd33ae7e00442fe1ccf3331e3326 */\ +/* sqr(t[0], t[0]); */ /* 239: b97c3eeb75c8c873852959a675cfc00885fc399e6663c664c */\ +/* sqr(t[0], t[0]); */ /* 240: 172f87dd6eb9190e70a52b34ceb9f80110bf8733cccc78cc98 */\ +/* sqr(t[0], t[0]); */ /* 241: 2e5f0fbadd72321ce14a56699d73f002217f0e679998f19930 */\ +/* sqr(t[0], t[0]); */ /* 242: 5cbe1f75bae46439c294acd33ae7e00442fe1ccf3331e33260 */\ +/* sqr(t[0], t[0]); */ /* 243: b97c3eeb75c8c873852959a675cfc00885fc399e6663c664c0 */\ +/* sqr(t[0], t[0]); */ /* 244: 172f87dd6eb9190e70a52b34ceb9f80110bf8733cccc78cc980 */\ +/* sqr(t[0], t[0]); */ /* 245: 2e5f0fbadd72321ce14a56699d73f002217f0e679998f199300 */\ +sqr_n_mul(t[0], t[0], 8, t[2]); /* 246: 2e5f0fbadd72321ce14a56699d73f002217f0e679998f199333 */\ +/* sqr(t[0], t[0]); */ /* 247: 5cbe1f75bae46439c294acd33ae7e00442fe1ccf3331e332666 */\ +/* sqr(t[0], t[0]); */ /* 248: b97c3eeb75c8c873852959a675cfc00885fc399e6663c664ccc */\ +/* sqr(t[0], t[0]); */ /* 249: 172f87dd6eb9190e70a52b34ceb9f80110bf8733cccc78cc9998 */\ +/* sqr(t[0], t[0]); */ /* 250: 2e5f0fbadd72321ce14a56699d73f002217f0e679998f1993330 */\ +/* sqr(t[0], t[0]); */ /* 251: 5cbe1f75bae46439c294acd33ae7e00442fe1ccf3331e3326660 */\ +/* sqr(t[0], t[0]); */ /* 252: b97c3eeb75c8c873852959a675cfc00885fc399e6663c664ccc0 */\ +/* sqr(t[0], t[0]); */ /* 253: 172f87dd6eb9190e70a52b34ceb9f80110bf8733cccc78cc99980 */\ +/* sqr(t[0], t[0]); */ /* 254: 2e5f0fbadd72321ce14a56699d73f002217f0e679998f19933300 */\ +sqr_n_mul(t[0], t[0], 8, t[2]); /* 255: 2e5f0fbadd72321ce14a56699d73f002217f0e679998f19933333 */\ +/* sqr(t[0], t[0]); */ /* 256: 5cbe1f75bae46439c294acd33ae7e00442fe1ccf3331e33266666 */\ +/* sqr(t[0], t[0]); */ /* 257: b97c3eeb75c8c873852959a675cfc00885fc399e6663c664ccccc */\ +/* sqr(t[0], t[0]); */ /* 258: 172f87dd6eb9190e70a52b34ceb9f80110bf8733cccc78cc999998 */\ +/* sqr(t[0], t[0]); */ /* 259: 2e5f0fbadd72321ce14a56699d73f002217f0e679998f199333330 */\ +/* sqr(t[0], t[0]); */ /* 260: 5cbe1f75bae46439c294acd33ae7e00442fe1ccf3331e332666660 */\ +/* sqr(t[0], t[0]); */ /* 261: b97c3eeb75c8c873852959a675cfc00885fc399e6663c664ccccc0 */\ +/* sqr(t[0], t[0]); */ /* 262: 172f87dd6eb9190e70a52b34ceb9f80110bf8733cccc78cc9999980 */\ +/* sqr(t[0], t[0]); */ /* 263: 2e5f0fbadd72321ce14a56699d73f002217f0e679998f1993333300 */\ +sqr_n_mul(t[0], t[0], 8, t[2]); /* 264: 2e5f0fbadd72321ce14a56699d73f002217f0e679998f1993333333 */\ +/* sqr(t[0], t[0]); */ /* 265: 5cbe1f75bae46439c294acd33ae7e00442fe1ccf3331e3326666666 */\ +/* sqr(t[0], t[0]); */ /* 266: b97c3eeb75c8c873852959a675cfc00885fc399e6663c664ccccccc */\ +/* sqr(t[0], t[0]); */ /* 267: 172f87dd6eb9190e70a52b34ceb9f80110bf8733cccc78cc99999998 */\ +/* sqr(t[0], t[0]); */ /* 268: 2e5f0fbadd72321ce14a56699d73f002217f0e679998f19933333330 */\ +/* sqr(t[0], t[0]); */ /* 269: 5cbe1f75bae46439c294acd33ae7e00442fe1ccf3331e33266666660 */\ +/* sqr(t[0], t[0]); */ /* 270: b97c3eeb75c8c873852959a675cfc00885fc399e6663c664ccccccc0 */\ +sqr_n_mul(t[0], t[0], 6, t[3]); /* 271: b97c3eeb75c8c873852959a675cfc00885fc399e6663c664cccccccb */\ +/* sqr(t[0], t[0]); */ /* 272: 172f87dd6eb9190e70a52b34ceb9f80110bf8733cccc78cc999999996 */\ +/* sqr(t[0], t[0]); */ /* 273: 2e5f0fbadd72321ce14a56699d73f002217f0e679998f19933333332c */\ +/* sqr(t[0], t[0]); */ /* 274: 5cbe1f75bae46439c294acd33ae7e00442fe1ccf3331e332666666658 */\ +/* sqr(t[0], t[0]); */ /* 275: b97c3eeb75c8c873852959a675cfc00885fc399e6663c664cccccccb0 */\ +/* sqr(t[0], t[0]); */ /* 276: 172f87dd6eb9190e70a52b34ceb9f80110bf8733cccc78cc9999999960 */\ +/* sqr(t[0], t[0]); */ /* 277: 2e5f0fbadd72321ce14a56699d73f002217f0e679998f19933333332c0 */\ +/* sqr(t[0], t[0]); */ /* 278: 5cbe1f75bae46439c294acd33ae7e00442fe1ccf3331e3326666666580 */\ +/* sqr(t[0], t[0]); */ /* 279: b97c3eeb75c8c873852959a675cfc00885fc399e6663c664cccccccb00 */\ +sqr_n_mul(t[0], t[0], 8, t[2]); /* 280: b97c3eeb75c8c873852959a675cfc00885fc399e6663c664cccccccb33 */\ +/* sqr(t[0], t[0]); */ /* 281: 172f87dd6eb9190e70a52b34ceb9f80110bf8733cccc78cc99999999666 */\ +/* sqr(t[0], t[0]); */ /* 282: 2e5f0fbadd72321ce14a56699d73f002217f0e679998f19933333332ccc */\ +/* sqr(t[0], t[0]); */ /* 283: 5cbe1f75bae46439c294acd33ae7e00442fe1ccf3331e33266666665998 */\ +/* sqr(t[0], t[0]); */ /* 284: b97c3eeb75c8c873852959a675cfc00885fc399e6663c664cccccccb330 */\ +/* sqr(t[0], t[0]); */ /* 285: 172f87dd6eb9190e70a52b34ceb9f80110bf8733cccc78cc999999996660 */\ +/* sqr(t[0], t[0]); */ /* 286: 2e5f0fbadd72321ce14a56699d73f002217f0e679998f19933333332ccc0 */\ +/* sqr(t[0], t[0]); */ /* 287: 5cbe1f75bae46439c294acd33ae7e00442fe1ccf3331e332666666659980 */\ +/* sqr(t[0], t[0]); */ /* 288: b97c3eeb75c8c873852959a675cfc00885fc399e6663c664cccccccb3300 */\ +sqr_n_mul(t[0], t[0], 8, t[2]); /* 289: b97c3eeb75c8c873852959a675cfc00885fc399e6663c664cccccccb3333 */\ +/* sqr(t[0], t[0]); */ /* 290: 172f87dd6eb9190e70a52b34ceb9f80110bf8733cccc78cc9999999966666 */\ +/* sqr(t[0], t[0]); */ /* 291: 2e5f0fbadd72321ce14a56699d73f002217f0e679998f19933333332ccccc */\ +/* sqr(t[0], t[0]); */ /* 292: 5cbe1f75bae46439c294acd33ae7e00442fe1ccf3331e3326666666599998 */\ +/* sqr(t[0], t[0]); */ /* 293: b97c3eeb75c8c873852959a675cfc00885fc399e6663c664cccccccb33330 */\ +/* sqr(t[0], t[0]); */ /* 294: 172f87dd6eb9190e70a52b34ceb9f80110bf8733cccc78cc99999999666660 */\ +/* sqr(t[0], t[0]); */ /* 295: 2e5f0fbadd72321ce14a56699d73f002217f0e679998f19933333332ccccc0 */\ +/* sqr(t[0], t[0]); */ /* 296: 5cbe1f75bae46439c294acd33ae7e00442fe1ccf3331e33266666665999980 */\ +/* sqr(t[0], t[0]); */ /* 297: b97c3eeb75c8c873852959a675cfc00885fc399e6663c664cccccccb333300 */\ +sqr_n_mul(t[0], t[0], 8, t[2]); /* 298: b97c3eeb75c8c873852959a675cfc00885fc399e6663c664cccccccb333333 */\ +/* sqr(t[0], t[0]); */ /* 299: 172f87dd6eb9190e70a52b34ceb9f80110bf8733cccc78cc999999996666666 */\ +/* sqr(t[0], t[0]); */ /* 300: 2e5f0fbadd72321ce14a56699d73f002217f0e679998f19933333332ccccccc */\ +/* sqr(t[0], t[0]); */ /* 301: 5cbe1f75bae46439c294acd33ae7e00442fe1ccf3331e332666666659999998 */\ +/* sqr(t[0], t[0]); */ /* 302: b97c3eeb75c8c873852959a675cfc00885fc399e6663c664cccccccb3333330 */\ +/* sqr(t[0], t[0]); */ /* 303: 172f87dd6eb9190e70a52b34ceb9f80110bf8733cccc78cc9999999966666660 */\ +/* sqr(t[0], t[0]); */ /* 304: 2e5f0fbadd72321ce14a56699d73f002217f0e679998f19933333332ccccccc0 */\ +sqr_n_mul(out, t[0], 6, t[1]); /* 305: 2e5f0fbadd72321ce14a56699d73f002217f0e679998f19933333332cccccccd */\ +} while(0) diff --git a/crypto/blst_src/pentaroot.c b/crypto/blst_src/pentaroot.c new file mode 100644 index 00000000000..71f334df50a --- /dev/null +++ b/crypto/blst_src/pentaroot.c @@ -0,0 +1,76 @@ +/* + * Copyright Supranational LLC + * Licensed under the Apache License, Version 2.0, see LICENSE for details. + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "fields.h" + +static inline void mul_fr(vec256 ret, const vec256 a, const vec256 b) +{ mul_mont_sparse_256(ret, a, b, BLS12_381_r, r0); } + +static inline void sqr_fr(vec256 ret, const vec256 a) +{ sqr_mont_sparse_256(ret, a, BLS12_381_r, r0); } + +#ifdef __OPTIMIZE_SIZE__ +void blst_fr_pentaroot(vec256 out, const vec256 inp) +{ + static const byte pow[] = { + TO_BYTES(0x33333332cccccccd), TO_BYTES(0x217f0e679998f199), + TO_BYTES(0xe14a56699d73f002), TO_BYTES(0x2e5f0fbadd72321c) + }; + size_t pow_bits = 254; + vec256 ret; + + vec_copy(ret, inp, sizeof(ret)); /* ret = inp^1 */ + --pow_bits; /* most significant bit is set, skip over */ + while (pow_bits--) { + sqr_fr(ret, ret); + if (is_bit_set(pow, pow_bits)) + mul_fr(ret, ret, inp); + } + vec_copy(out, ret, sizeof(ret)); /* out = ret */ +} +#else +# if 0 +/* + * "255"-bit variant omits full reductions at the ends of squarings, + * not implemented yet[?]. + */ +static inline void sqr_n_mul_fr(vec256 out, const vec256 a, size_t count, + const vec256 b) +{ sqr_n_mul_mont_255(out, a, count, BLS12_381_r, r0, b); } +# else +static void sqr_n_mul_fr(vec256 out, const vec256 a, size_t count, + const vec256 b) +{ + do { + sqr_fr(out, a); + a = out; + } while (--count); + mul_fr(out, out, b); +} +# endif + +# define sqr(ret,a) sqr_fr(ret,a) +# define mul(ret,a,b) mul_fr(ret,a,b) +# define sqr_n_mul(ret,a,n,b) sqr_n_mul_fr(ret,a,n,b) + +# include "pentaroot-addchain.h" +void blst_fr_pentaroot(vec256 out, const vec256 inp) +{ PENTAROOT_MOD_BLS12_381_r(out, inp, vec256); } +# undef PENTAROOT_MOD_BLS12_381_r + +# undef sqr_n_mul +# undef sqr +# undef mul +#endif + +void blst_fr_pentapow(vec256 out, const vec256 inp) +{ + vec256 tmp; + + sqr_fr(tmp, inp); + sqr_fr(tmp, tmp); + mul_fr(out, tmp, inp); +} diff --git a/crypto/blst_src/point.h b/crypto/blst_src/point.h new file mode 100644 index 00000000000..0aa7379671f --- /dev/null +++ b/crypto/blst_src/point.h @@ -0,0 +1,62 @@ +/* + * Copyright Supranational LLC + * Licensed under the Apache License, Version 2.0, see LICENSE for details. + * SPDX-License-Identifier: Apache-2.0 + */ +#ifndef __BLS12_381_ASM_POINT_H__ +#define __BLS12_381_ASM_POINT_H__ + +#include "vect.h" +#include "bytes.h" + +#define DECLARE_POINT(ptype, bits) \ +typedef struct { vec##bits X,Y,Z; } ptype; \ +typedef struct { vec##bits X,Y; } ptype##_affine; \ +\ +static void ptype##_dadd(ptype *out, const ptype *p1, const ptype *p2, \ + const vec##bits a4); \ +static void ptype##_dadd_affine(ptype *out, const ptype *p1, \ + const ptype##_affine *p2); \ +static void ptype##_add(ptype *out, const ptype *p1, const ptype *p2); \ +static void ptype##_add_affine(ptype *out, const ptype *p1, \ + const ptype##_affine *p2); \ +static void ptype##_double(ptype *out, const ptype *p1); \ +static void ptype##_mult_w5(ptype *out, const ptype *point, \ + const byte *scalar, size_t nbits); \ +static void ptype##_cneg(ptype *p, limb_t cbit); \ +static void ptype##_to_affine(ptype##_affine *out, const ptype *in); \ +static void ptype##_from_Jacobian(ptype *out, const ptype *in); \ +\ +static inline void ptype##_cswap(ptype *restrict a, \ + ptype *restrict b, bool_t cbit) { \ + vec_cswap(a, b, sizeof(ptype), cbit); \ +} \ +static inline void ptype##_ccopy(ptype *restrict a, \ + const ptype *restrict b, bool_t cbit) {\ + vec_select(a, b, a, sizeof(ptype), cbit); \ +} + +#define DECLARE_PRIVATE_POINTXZ(ptype, bits) \ +typedef struct { vec##bits X,Z; } ptype##xz; \ +\ +static void ptype##xz_ladder_pre(ptype##xz *out, const ptype *in); \ +static void ptype##xz_ladder_step(ptype##xz *r, ptype##xz *s, \ + const ptype##xz *p); \ +static void ptype##xz_ladder_post(ptype *ret, \ + const ptype##xz *r, const ptype##xz *s, \ + const ptype##xz *p, const vec##bits Y1);\ +\ +static inline void ptype##xz_cswap(ptype##xz *restrict a, \ + ptype##xz *restrict b, bool_t cbit) {\ + vec_cswap(a, b, sizeof(ptype##xz), cbit); \ +} + +DECLARE_POINT(POINTonE1, 384) + +DECLARE_POINT(POINTonE2, 384x) + +#ifdef __GNUC__ +# pragma GCC diagnostic ignored "-Wunused-function" +#endif + +#endif diff --git a/crypto/blst_src/rb_tree.c b/crypto/blst_src/rb_tree.c new file mode 100644 index 00000000000..207becdad18 --- /dev/null +++ b/crypto/blst_src/rb_tree.c @@ -0,0 +1,145 @@ +/* + * Copyright Supranational LLC + * Licensed under the Apache License, Version 2.0, see LICENSE for details. + * SPDX-License-Identifier: Apache-2.0 + */ + +#include + +/* + * Red-black tree tailored for uniqueness test. Amount of messages to be + * checked is known prior context initialization, implementation is + * insert-only, failure is returned if message is already in the tree. + */ + +struct node { + struct node *leafs[2]; + const void *data; + size_t len_n_colour; /* len<<1 | colour */ +}; + +struct rb_tree { + struct node *root; + size_t n_nodes; + struct node nodes[1]; +}; + +static long bytes_compare(const unsigned char *ptr0, size_t len0, + const unsigned char *ptr1, size_t len1) +{ + size_t i, len = len0len_n_colour &= ~(size_t)1) +#define PAINT_RED(p) ((p)->len_n_colour |= 1) +#define IS_RED(p) ((p)->len_n_colour & 1) + +static int rb_tree_insert(struct rb_tree *tree, const void *data, size_t len) +{ + struct node *nodes[8*sizeof(void *)]; /* visited nodes */ + unsigned char dirs[8*sizeof(void *)]; /* taken directions */ + size_t k = 0; /* walked distance */ + struct node *p, *y, *z; + + for (p = tree->root; p != NULL; k++) { + long cmp = bytes_compare(data, len, p->data, p->len_n_colour>>1); + + if (cmp == 0) + return 0; /* already in tree, no insertion */ + + /* record the step */ + nodes[k] = p; + p = p->leafs[(dirs[k] = cmp>0)]; + } + + /* allocate new node */ + z = &tree->nodes[tree->n_nodes++]; + z->leafs[0] = z->leafs[1] = NULL; + z->data = data; + z->len_n_colour = len<<1; + PAINT_RED(z); + + /* graft |z| */ + if (k > 0) + nodes[k-1]->leafs[dirs[k-1]] = z; + else + tree->root = z; + + /* re-balance |tree| */ + while (k >= 2 && IS_RED(y = nodes[k-1])) { + size_t ydir = dirs[k-2]; + struct node *x = nodes[k-2], /* |z|'s grandparent */ + *s = x->leafs[ydir^1]; /* |z|'s uncle */ + + if (s != NULL && IS_RED(s)) { + PAINT_RED(x); + PAINT_BLACK(y); + PAINT_BLACK(s); + k -= 2; + } else { + if (dirs[k-1] != ydir) { + /* | | + * x x + * / \ \ + * y s -> z s + * \ / + * z y + * / \ + * ? ? + */ + struct node *t = y; + y = y->leafs[ydir^1]; + t->leafs[ydir^1] = y->leafs[ydir]; + y->leafs[ydir] = t; + } + + /* | | + * x y + * \ / \ + * y s -> z x + * / \ / \ + * z ? ? s + */ + x->leafs[ydir] = y->leafs[ydir^1]; + y->leafs[ydir^1] = x; + + PAINT_RED(x); + PAINT_BLACK(y); + + if (k > 2) + nodes[k-3]->leafs[dirs[k-3]] = y; + else + tree->root = y; + + break; + } + } + + PAINT_BLACK(tree->root); + + return 1; +} + +#undef IS_RED +#undef PAINT_RED +#undef PAINT_BLACK + +size_t blst_uniq_sizeof(size_t n_nodes) +{ return sizeof(struct rb_tree) + sizeof(struct node)*(n_nodes-1); } + +void blst_uniq_init(struct rb_tree *tree) +{ + tree->root = NULL; + tree->n_nodes = 0; +} + +int blst_uniq_test(struct rb_tree *tree, const void *data, size_t len) +{ return (int)rb_tree_insert(tree, data, len); } diff --git a/crypto/blst_src/recip-addchain.h b/crypto/blst_src/recip-addchain.h new file mode 100644 index 00000000000..e4e436a3f09 --- /dev/null +++ b/crypto/blst_src/recip-addchain.h @@ -0,0 +1,489 @@ +/* + * Copyright Supranational LLC + * Licensed under the Apache License, Version 2.0, see LICENSE for details. + * SPDX-License-Identifier: Apache-2.0 + */ +/* + * The "magic" number is BLS12_381_P-2. Exponentiation to which yields + * reciprocal to input base. + * + * Generated with 'addchain 4002409555221667393417789825735904156556882819939007885332058136124031650490837864442687629129015664037894272559785' + * https://github.com/kwantam/addchain + * + * # Bos-Coster (win=4) : 461 (16) <<< + * # Bos-Coster (win=3) : 464 ( 9) + * # Bos-Coster (win=8) : 469 (35) + * # Bos-Coster (win=5) : 463 (28) + * # Bos-Coster (win=9) : 467 (32) + * # Bos-Coster (win=7) : 462 (27) + * # Yacobi : 481 (31) + * # Bos-Coster (win=10) : 475 (30) + * # Bos-Coster (win=6) : 463 (32) + * # Bos-Coster (win=2) : 489 ( 5) + * # Bergeron-Berstel-Brlek-Duboc : 498 ( 5) + */ + +#define RECIPROCAL_MOD_BLS12_381_P(out, inp, ptype) do { \ +ptype t[16]; \ +vec_copy(t[1], inp, sizeof(ptype)); /* 0: 1 */\ +sqr(t[0], t[1]); /* 1: 2 */\ +mul(t[9], t[0], t[1]); /* 2: 3 */\ +sqr(t[5], t[0]); /* 3: 4 */\ +mul(t[2], t[9], t[0]); /* 4: 5 */\ +mul(t[7], t[5], t[9]); /* 5: 7 */\ +mul(t[10], t[2], t[5]); /* 6: 9 */\ +mul(t[13], t[7], t[5]); /* 7: b */\ +mul(t[4], t[10], t[5]); /* 8: d */\ +mul(t[8], t[13], t[5]); /* 9: f */\ +mul(t[15], t[4], t[5]); /* 10: 11 */\ +mul(t[11], t[8], t[5]); /* 11: 13 */\ +mul(t[3], t[15], t[5]); /* 12: 15 */\ +mul(t[12], t[11], t[5]); /* 13: 17 */\ +sqr(t[0], t[4]); /* 14: 1a */\ +mul(t[14], t[12], t[5]); /* 15: 1b */\ +mul(t[6], t[0], t[9]); /* 16: 1d */\ +mul(t[5], t[0], t[2]); /* 17: 1f */\ +/* sqr(t[0], t[0]); */ /* 18: 34 */\ +/* sqr(t[0], t[0]); */ /* 19: 68 */\ +/* sqr(t[0], t[0]); */ /* 20: d0 */\ +/* sqr(t[0], t[0]); */ /* 21: 1a0 */\ +/* sqr(t[0], t[0]); */ /* 22: 340 */\ +/* sqr(t[0], t[0]); */ /* 23: 680 */\ +/* sqr(t[0], t[0]); */ /* 24: d00 */\ +/* sqr(t[0], t[0]); */ /* 25: 1a00 */\ +/* sqr(t[0], t[0]); */ /* 26: 3400 */\ +/* sqr(t[0], t[0]); */ /* 27: 6800 */\ +/* sqr(t[0], t[0]); */ /* 28: d000 */\ +/* sqr(t[0], t[0]); */ /* 29: 1a000 */\ +sqr_n_mul(t[0], t[0], 12, t[15]); /* 30: 1a011 */\ +/* sqr(t[0], t[0]); */ /* 31: 34022 */\ +/* sqr(t[0], t[0]); */ /* 32: 68044 */\ +/* sqr(t[0], t[0]); */ /* 33: d0088 */\ +/* sqr(t[0], t[0]); */ /* 34: 1a0110 */\ +/* sqr(t[0], t[0]); */ /* 35: 340220 */\ +/* sqr(t[0], t[0]); */ /* 36: 680440 */\ +/* sqr(t[0], t[0]); */ /* 37: d00880 */\ +sqr_n_mul(t[0], t[0], 7, t[8]); /* 38: d0088f */\ +/* sqr(t[0], t[0]); */ /* 39: 1a0111e */\ +/* sqr(t[0], t[0]); */ /* 40: 340223c */\ +/* sqr(t[0], t[0]); */ /* 41: 6804478 */\ +/* sqr(t[0], t[0]); */ /* 42: d0088f0 */\ +sqr_n_mul(t[0], t[0], 4, t[2]); /* 43: d0088f5 */\ +/* sqr(t[0], t[0]); */ /* 44: 1a0111ea */\ +/* sqr(t[0], t[0]); */ /* 45: 340223d4 */\ +/* sqr(t[0], t[0]); */ /* 46: 680447a8 */\ +/* sqr(t[0], t[0]); */ /* 47: d0088f50 */\ +/* sqr(t[0], t[0]); */ /* 48: 1a0111ea0 */\ +/* sqr(t[0], t[0]); */ /* 49: 340223d40 */\ +sqr_n_mul(t[0], t[0], 6, t[7]); /* 50: 340223d47 */\ +/* sqr(t[0], t[0]); */ /* 51: 680447a8e */\ +/* sqr(t[0], t[0]); */ /* 52: d0088f51c */\ +/* sqr(t[0], t[0]); */ /* 53: 1a0111ea38 */\ +/* sqr(t[0], t[0]); */ /* 54: 340223d470 */\ +/* sqr(t[0], t[0]); */ /* 55: 680447a8e0 */\ +/* sqr(t[0], t[0]); */ /* 56: d0088f51c0 */\ +/* sqr(t[0], t[0]); */ /* 57: 1a0111ea380 */\ +sqr_n_mul(t[0], t[0], 7, t[12]); /* 58: 1a0111ea397 */\ +/* sqr(t[0], t[0]); */ /* 59: 340223d472e */\ +/* sqr(t[0], t[0]); */ /* 60: 680447a8e5c */\ +/* sqr(t[0], t[0]); */ /* 61: d0088f51cb8 */\ +/* sqr(t[0], t[0]); */ /* 62: 1a0111ea3970 */\ +/* sqr(t[0], t[0]); */ /* 63: 340223d472e0 */\ +sqr_n_mul(t[0], t[0], 5, t[5]); /* 64: 340223d472ff */\ +/* sqr(t[0], t[0]); */ /* 65: 680447a8e5fe */\ +/* sqr(t[0], t[0]); */ /* 66: d0088f51cbfc */\ +sqr_n_mul(t[0], t[0], 2, t[9]); /* 67: d0088f51cbff */\ +/* sqr(t[0], t[0]); */ /* 68: 1a0111ea397fe */\ +/* sqr(t[0], t[0]); */ /* 69: 340223d472ffc */\ +/* sqr(t[0], t[0]); */ /* 70: 680447a8e5ff8 */\ +/* sqr(t[0], t[0]); */ /* 71: d0088f51cbff0 */\ +/* sqr(t[0], t[0]); */ /* 72: 1a0111ea397fe0 */\ +/* sqr(t[0], t[0]); */ /* 73: 340223d472ffc0 */\ +sqr_n_mul(t[0], t[0], 6, t[4]); /* 74: 340223d472ffcd */\ +/* sqr(t[0], t[0]); */ /* 75: 680447a8e5ff9a */\ +/* sqr(t[0], t[0]); */ /* 76: d0088f51cbff34 */\ +/* sqr(t[0], t[0]); */ /* 77: 1a0111ea397fe68 */\ +/* sqr(t[0], t[0]); */ /* 78: 340223d472ffcd0 */\ +/* sqr(t[0], t[0]); */ /* 79: 680447a8e5ff9a0 */\ +/* sqr(t[0], t[0]); */ /* 80: d0088f51cbff340 */\ +sqr_n_mul(t[0], t[0], 6, t[4]); /* 81: d0088f51cbff34d */\ +/* sqr(t[0], t[0]); */ /* 82: 1a0111ea397fe69a */\ +/* sqr(t[0], t[0]); */ /* 83: 340223d472ffcd34 */\ +/* sqr(t[0], t[0]); */ /* 84: 680447a8e5ff9a68 */\ +/* sqr(t[0], t[0]); */ /* 85: d0088f51cbff34d0 */\ +/* sqr(t[0], t[0]); */ /* 86: 1a0111ea397fe69a0 */\ +/* sqr(t[0], t[0]); */ /* 87: 340223d472ffcd340 */\ +sqr_n_mul(t[0], t[0], 6, t[10]); /* 88: 340223d472ffcd349 */\ +/* sqr(t[0], t[0]); */ /* 89: 680447a8e5ff9a692 */\ +/* sqr(t[0], t[0]); */ /* 90: d0088f51cbff34d24 */\ +/* sqr(t[0], t[0]); */ /* 91: 1a0111ea397fe69a48 */\ +sqr_n_mul(t[0], t[0], 3, t[9]); /* 92: 1a0111ea397fe69a4b */\ +/* sqr(t[0], t[0]); */ /* 93: 340223d472ffcd3496 */\ +/* sqr(t[0], t[0]); */ /* 94: 680447a8e5ff9a692c */\ +/* sqr(t[0], t[0]); */ /* 95: d0088f51cbff34d258 */\ +/* sqr(t[0], t[0]); */ /* 96: 1a0111ea397fe69a4b0 */\ +/* sqr(t[0], t[0]); */ /* 97: 340223d472ffcd34960 */\ +/* sqr(t[0], t[0]); */ /* 98: 680447a8e5ff9a692c0 */\ +/* sqr(t[0], t[0]); */ /* 99: d0088f51cbff34d2580 */\ +sqr_n_mul(t[0], t[0], 7, t[4]); /* 100: d0088f51cbff34d258d */\ +/* sqr(t[0], t[0]); */ /* 101: 1a0111ea397fe69a4b1a */\ +/* sqr(t[0], t[0]); */ /* 102: 340223d472ffcd349634 */\ +/* sqr(t[0], t[0]); */ /* 103: 680447a8e5ff9a692c68 */\ +/* sqr(t[0], t[0]); */ /* 104: d0088f51cbff34d258d0 */\ +sqr_n_mul(t[0], t[0], 4, t[4]); /* 105: d0088f51cbff34d258dd */\ +/* sqr(t[0], t[0]); */ /* 106: 1a0111ea397fe69a4b1ba */\ +/* sqr(t[0], t[0]); */ /* 107: 340223d472ffcd3496374 */\ +/* sqr(t[0], t[0]); */ /* 108: 680447a8e5ff9a692c6e8 */\ +/* sqr(t[0], t[0]); */ /* 109: d0088f51cbff34d258dd0 */\ +/* sqr(t[0], t[0]); */ /* 110: 1a0111ea397fe69a4b1ba0 */\ +/* sqr(t[0], t[0]); */ /* 111: 340223d472ffcd34963740 */\ +sqr_n_mul(t[0], t[0], 6, t[8]); /* 112: 340223d472ffcd3496374f */\ +/* sqr(t[0], t[0]); */ /* 113: 680447a8e5ff9a692c6e9e */\ +/* sqr(t[0], t[0]); */ /* 114: d0088f51cbff34d258dd3c */\ +/* sqr(t[0], t[0]); */ /* 115: 1a0111ea397fe69a4b1ba78 */\ +/* sqr(t[0], t[0]); */ /* 116: 340223d472ffcd3496374f0 */\ +/* sqr(t[0], t[0]); */ /* 117: 680447a8e5ff9a692c6e9e0 */\ +/* sqr(t[0], t[0]); */ /* 118: d0088f51cbff34d258dd3c0 */\ +sqr_n_mul(t[0], t[0], 6, t[14]); /* 119: d0088f51cbff34d258dd3db */\ +/* sqr(t[0], t[0]); */ /* 120: 1a0111ea397fe69a4b1ba7b6 */\ +/* sqr(t[0], t[0]); */ /* 121: 340223d472ffcd3496374f6c */\ +/* sqr(t[0], t[0]); */ /* 122: 680447a8e5ff9a692c6e9ed8 */\ +sqr_n_mul(t[0], t[0], 3, t[1]); /* 123: 680447a8e5ff9a692c6e9ed9 */\ +/* sqr(t[0], t[0]); */ /* 124: d0088f51cbff34d258dd3db2 */\ +/* sqr(t[0], t[0]); */ /* 125: 1a0111ea397fe69a4b1ba7b64 */\ +/* sqr(t[0], t[0]); */ /* 126: 340223d472ffcd3496374f6c8 */\ +/* sqr(t[0], t[0]); */ /* 127: 680447a8e5ff9a692c6e9ed90 */\ +/* sqr(t[0], t[0]); */ /* 128: d0088f51cbff34d258dd3db20 */\ +/* sqr(t[0], t[0]); */ /* 129: 1a0111ea397fe69a4b1ba7b640 */\ +/* sqr(t[0], t[0]); */ /* 130: 340223d472ffcd3496374f6c80 */\ +/* sqr(t[0], t[0]); */ /* 131: 680447a8e5ff9a692c6e9ed900 */\ +sqr_n_mul(t[0], t[0], 8, t[4]); /* 132: 680447a8e5ff9a692c6e9ed90d */\ +/* sqr(t[0], t[0]); */ /* 133: d0088f51cbff34d258dd3db21a */\ +/* sqr(t[0], t[0]); */ /* 134: 1a0111ea397fe69a4b1ba7b6434 */\ +/* sqr(t[0], t[0]); */ /* 135: 340223d472ffcd3496374f6c868 */\ +/* sqr(t[0], t[0]); */ /* 136: 680447a8e5ff9a692c6e9ed90d0 */\ +/* sqr(t[0], t[0]); */ /* 137: d0088f51cbff34d258dd3db21a0 */\ +/* sqr(t[0], t[0]); */ /* 138: 1a0111ea397fe69a4b1ba7b64340 */\ +/* sqr(t[0], t[0]); */ /* 139: 340223d472ffcd3496374f6c8680 */\ +sqr_n_mul(t[0], t[0], 7, t[12]); /* 140: 340223d472ffcd3496374f6c8697 */\ +/* sqr(t[0], t[0]); */ /* 141: 680447a8e5ff9a692c6e9ed90d2e */\ +/* sqr(t[0], t[0]); */ /* 142: d0088f51cbff34d258dd3db21a5c */\ +/* sqr(t[0], t[0]); */ /* 143: 1a0111ea397fe69a4b1ba7b6434b8 */\ +/* sqr(t[0], t[0]); */ /* 144: 340223d472ffcd3496374f6c86970 */\ +/* sqr(t[0], t[0]); */ /* 145: 680447a8e5ff9a692c6e9ed90d2e0 */\ +sqr_n_mul(t[0], t[0], 5, t[13]); /* 146: 680447a8e5ff9a692c6e9ed90d2eb */\ +/* sqr(t[0], t[0]); */ /* 147: d0088f51cbff34d258dd3db21a5d6 */\ +/* sqr(t[0], t[0]); */ /* 148: 1a0111ea397fe69a4b1ba7b6434bac */\ +/* sqr(t[0], t[0]); */ /* 149: 340223d472ffcd3496374f6c869758 */\ +/* sqr(t[0], t[0]); */ /* 150: 680447a8e5ff9a692c6e9ed90d2eb0 */\ +/* sqr(t[0], t[0]); */ /* 151: d0088f51cbff34d258dd3db21a5d60 */\ +/* sqr(t[0], t[0]); */ /* 152: 1a0111ea397fe69a4b1ba7b6434bac0 */\ +sqr_n_mul(t[0], t[0], 6, t[4]); /* 153: 1a0111ea397fe69a4b1ba7b6434bacd */\ +/* sqr(t[0], t[0]); */ /* 154: 340223d472ffcd3496374f6c869759a */\ +/* sqr(t[0], t[0]); */ /* 155: 680447a8e5ff9a692c6e9ed90d2eb34 */\ +/* sqr(t[0], t[0]); */ /* 156: d0088f51cbff34d258dd3db21a5d668 */\ +/* sqr(t[0], t[0]); */ /* 157: 1a0111ea397fe69a4b1ba7b6434bacd0 */\ +/* sqr(t[0], t[0]); */ /* 158: 340223d472ffcd3496374f6c869759a0 */\ +/* sqr(t[0], t[0]); */ /* 159: 680447a8e5ff9a692c6e9ed90d2eb340 */\ +sqr_n_mul(t[0], t[0], 6, t[6]); /* 160: 680447a8e5ff9a692c6e9ed90d2eb35d */\ +/* sqr(t[0], t[0]); */ /* 161: d0088f51cbff34d258dd3db21a5d66ba */\ +/* sqr(t[0], t[0]); */ /* 162: 1a0111ea397fe69a4b1ba7b6434bacd74 */\ +/* sqr(t[0], t[0]); */ /* 163: 340223d472ffcd3496374f6c869759ae8 */\ +/* sqr(t[0], t[0]); */ /* 164: 680447a8e5ff9a692c6e9ed90d2eb35d0 */\ +sqr_n_mul(t[0], t[0], 4, t[10]); /* 165: 680447a8e5ff9a692c6e9ed90d2eb35d9 */\ +/* sqr(t[0], t[0]); */ /* 166: d0088f51cbff34d258dd3db21a5d66bb2 */\ +/* sqr(t[0], t[0]); */ /* 167: 1a0111ea397fe69a4b1ba7b6434bacd764 */\ +/* sqr(t[0], t[0]); */ /* 168: 340223d472ffcd3496374f6c869759aec8 */\ +/* sqr(t[0], t[0]); */ /* 169: 680447a8e5ff9a692c6e9ed90d2eb35d90 */\ +/* sqr(t[0], t[0]); */ /* 170: d0088f51cbff34d258dd3db21a5d66bb20 */\ +/* sqr(t[0], t[0]); */ /* 171: 1a0111ea397fe69a4b1ba7b6434bacd7640 */\ +/* sqr(t[0], t[0]); */ /* 172: 340223d472ffcd3496374f6c869759aec80 */\ +/* sqr(t[0], t[0]); */ /* 173: 680447a8e5ff9a692c6e9ed90d2eb35d900 */\ +sqr_n_mul(t[0], t[0], 8, t[6]); /* 174: 680447a8e5ff9a692c6e9ed90d2eb35d91d */\ +/* sqr(t[0], t[0]); */ /* 175: d0088f51cbff34d258dd3db21a5d66bb23a */\ +/* sqr(t[0], t[0]); */ /* 176: 1a0111ea397fe69a4b1ba7b6434bacd76474 */\ +/* sqr(t[0], t[0]); */ /* 177: 340223d472ffcd3496374f6c869759aec8e8 */\ +/* sqr(t[0], t[0]); */ /* 178: 680447a8e5ff9a692c6e9ed90d2eb35d91d0 */\ +sqr_n_mul(t[0], t[0], 4, t[4]); /* 179: 680447a8e5ff9a692c6e9ed90d2eb35d91dd */\ +/* sqr(t[0], t[0]); */ /* 180: d0088f51cbff34d258dd3db21a5d66bb23ba */\ +/* sqr(t[0], t[0]); */ /* 181: 1a0111ea397fe69a4b1ba7b6434bacd764774 */\ +/* sqr(t[0], t[0]); */ /* 182: 340223d472ffcd3496374f6c869759aec8ee8 */\ +/* sqr(t[0], t[0]); */ /* 183: 680447a8e5ff9a692c6e9ed90d2eb35d91dd0 */\ +/* sqr(t[0], t[0]); */ /* 184: d0088f51cbff34d258dd3db21a5d66bb23ba0 */\ +/* sqr(t[0], t[0]); */ /* 185: 1a0111ea397fe69a4b1ba7b6434bacd7647740 */\ +/* sqr(t[0], t[0]); */ /* 186: 340223d472ffcd3496374f6c869759aec8ee80 */\ +sqr_n_mul(t[0], t[0], 7, t[12]); /* 187: 340223d472ffcd3496374f6c869759aec8ee97 */\ +/* sqr(t[0], t[0]); */ /* 188: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e */\ +/* sqr(t[0], t[0]); */ /* 189: d0088f51cbff34d258dd3db21a5d66bb23ba5c */\ +/* sqr(t[0], t[0]); */ /* 190: 1a0111ea397fe69a4b1ba7b6434bacd764774b8 */\ +/* sqr(t[0], t[0]); */ /* 191: 340223d472ffcd3496374f6c869759aec8ee970 */\ +/* sqr(t[0], t[0]); */ /* 192: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e0 */\ +/* sqr(t[0], t[0]); */ /* 193: d0088f51cbff34d258dd3db21a5d66bb23ba5c0 */\ +/* sqr(t[0], t[0]); */ /* 194: 1a0111ea397fe69a4b1ba7b6434bacd764774b80 */\ +/* sqr(t[0], t[0]); */ /* 195: 340223d472ffcd3496374f6c869759aec8ee9700 */\ +/* sqr(t[0], t[0]); */ /* 196: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e00 */\ +sqr_n_mul(t[0], t[0], 9, t[11]); /* 197: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13 */\ +/* sqr(t[0], t[0]); */ /* 198: d0088f51cbff34d258dd3db21a5d66bb23ba5c26 */\ +/* sqr(t[0], t[0]); */ /* 199: 1a0111ea397fe69a4b1ba7b6434bacd764774b84c */\ +sqr_n_mul(t[0], t[0], 2, t[9]); /* 200: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f */\ +/* sqr(t[0], t[0]); */ /* 201: 340223d472ffcd3496374f6c869759aec8ee9709e */\ +/* sqr(t[0], t[0]); */ /* 202: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13c */\ +/* sqr(t[0], t[0]); */ /* 203: d0088f51cbff34d258dd3db21a5d66bb23ba5c278 */\ +/* sqr(t[0], t[0]); */ /* 204: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f0 */\ +/* sqr(t[0], t[0]); */ /* 205: 340223d472ffcd3496374f6c869759aec8ee9709e0 */\ +sqr_n_mul(t[0], t[0], 5, t[7]); /* 206: 340223d472ffcd3496374f6c869759aec8ee9709e7 */\ +/* sqr(t[0], t[0]); */ /* 207: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce */\ +/* sqr(t[0], t[0]); */ /* 208: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c */\ +/* sqr(t[0], t[0]); */ /* 209: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38 */\ +/* sqr(t[0], t[0]); */ /* 210: 340223d472ffcd3496374f6c869759aec8ee9709e70 */\ +/* sqr(t[0], t[0]); */ /* 211: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce0 */\ +/* sqr(t[0], t[0]); */ /* 212: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c0 */\ +/* sqr(t[0], t[0]); */ /* 213: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f380 */\ +sqr_n_mul(t[0], t[0], 7, t[2]); /* 214: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f385 */\ +/* sqr(t[0], t[0]); */ /* 215: 340223d472ffcd3496374f6c869759aec8ee9709e70a */\ +/* sqr(t[0], t[0]); */ /* 216: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce14 */\ +/* sqr(t[0], t[0]); */ /* 217: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c28 */\ +/* sqr(t[0], t[0]); */ /* 218: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f3850 */\ +/* sqr(t[0], t[0]); */ /* 219: 340223d472ffcd3496374f6c869759aec8ee9709e70a0 */\ +/* sqr(t[0], t[0]); */ /* 220: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce140 */\ +/* sqr(t[0], t[0]); */ /* 221: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c280 */\ +sqr_n_mul(t[0], t[0], 7, t[10]); /* 222: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c289 */\ +/* sqr(t[0], t[0]); */ /* 223: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512 */\ +/* sqr(t[0], t[0]); */ /* 224: 340223d472ffcd3496374f6c869759aec8ee9709e70a24 */\ +/* sqr(t[0], t[0]); */ /* 225: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce1448 */\ +/* sqr(t[0], t[0]); */ /* 226: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2890 */\ +/* sqr(t[0], t[0]); */ /* 227: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f385120 */\ +/* sqr(t[0], t[0]); */ /* 228: 340223d472ffcd3496374f6c869759aec8ee9709e70a240 */\ +sqr_n_mul(t[0], t[0], 6, t[12]); /* 229: 340223d472ffcd3496374f6c869759aec8ee9709e70a257 */\ +/* sqr(t[0], t[0]); */ /* 230: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144ae */\ +/* sqr(t[0], t[0]); */ /* 231: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895c */\ +/* sqr(t[0], t[0]); */ /* 232: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512b8 */\ +/* sqr(t[0], t[0]); */ /* 233: 340223d472ffcd3496374f6c869759aec8ee9709e70a2570 */\ +/* sqr(t[0], t[0]); */ /* 234: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144ae0 */\ +sqr_n_mul(t[0], t[0], 5, t[6]); /* 235: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd */\ +/* sqr(t[0], t[0]); */ /* 236: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fa */\ +/* sqr(t[0], t[0]); */ /* 237: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf4 */\ +/* sqr(t[0], t[0]); */ /* 238: 340223d472ffcd3496374f6c869759aec8ee9709e70a257e8 */\ +/* sqr(t[0], t[0]); */ /* 239: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd0 */\ +/* sqr(t[0], t[0]); */ /* 240: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fa0 */\ +sqr_n_mul(t[0], t[0], 5, t[11]); /* 241: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb3 */\ +/* sqr(t[0], t[0]); */ /* 242: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf66 */\ +/* sqr(t[0], t[0]); */ /* 243: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ecc */\ +/* sqr(t[0], t[0]); */ /* 244: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd98 */\ +/* sqr(t[0], t[0]); */ /* 245: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb30 */\ +/* sqr(t[0], t[0]); */ /* 246: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf660 */\ +sqr_n_mul(t[0], t[0], 5, t[11]); /* 247: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf673 */\ +/* sqr(t[0], t[0]); */ /* 248: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece6 */\ +/* sqr(t[0], t[0]); */ /* 249: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc */\ +/* sqr(t[0], t[0]); */ /* 250: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb398 */\ +/* sqr(t[0], t[0]); */ /* 251: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730 */\ +/* sqr(t[0], t[0]); */ /* 252: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece60 */\ +/* sqr(t[0], t[0]); */ /* 253: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc0 */\ +/* sqr(t[0], t[0]); */ /* 254: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb3980 */\ +/* sqr(t[0], t[0]); */ /* 255: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf67300 */\ +sqr_n_mul(t[0], t[0], 8, t[4]); /* 256: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d */\ +/* sqr(t[0], t[0]); */ /* 257: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a */\ +/* sqr(t[0], t[0]); */ /* 258: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34 */\ +/* sqr(t[0], t[0]); */ /* 259: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39868 */\ +/* sqr(t[0], t[0]); */ /* 260: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d0 */\ +/* sqr(t[0], t[0]); */ /* 261: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a0 */\ +/* sqr(t[0], t[0]); */ /* 262: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc340 */\ +/* sqr(t[0], t[0]); */ /* 263: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb398680 */\ +sqr_n_mul(t[0], t[0], 7, t[3]); /* 264: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb398695 */\ +/* sqr(t[0], t[0]); */ /* 265: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a */\ +/* sqr(t[0], t[0]); */ /* 266: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a54 */\ +/* sqr(t[0], t[0]); */ /* 267: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a8 */\ +/* sqr(t[0], t[0]); */ /* 268: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb3986950 */\ +/* sqr(t[0], t[0]); */ /* 269: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0 */\ +/* sqr(t[0], t[0]); */ /* 270: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a540 */\ +/* sqr(t[0], t[0]); */ /* 271: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a80 */\ +/* sqr(t[0], t[0]); */ /* 272: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869500 */\ +/* sqr(t[0], t[0]); */ /* 273: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a00 */\ +sqr_n_mul(t[0], t[0], 9, t[8]); /* 274: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f */\ +/* sqr(t[0], t[0]); */ /* 275: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541e */\ +/* sqr(t[0], t[0]); */ /* 276: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83c */\ +/* sqr(t[0], t[0]); */ /* 277: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb398695078 */\ +/* sqr(t[0], t[0]); */ /* 278: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f0 */\ +/* sqr(t[0], t[0]); */ /* 279: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541e0 */\ +sqr_n_mul(t[0], t[0], 5, t[4]); /* 280: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed */\ +/* sqr(t[0], t[0]); */ /* 281: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83da */\ +/* sqr(t[0], t[0]); */ /* 282: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b4 */\ +/* sqr(t[0], t[0]); */ /* 283: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f68 */\ +sqr_n_mul(t[0], t[0], 3, t[9]); /* 284: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b */\ +/* sqr(t[0], t[0]); */ /* 285: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed6 */\ +/* sqr(t[0], t[0]); */ /* 286: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac */\ +/* sqr(t[0], t[0]); */ /* 287: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b58 */\ +/* sqr(t[0], t[0]); */ /* 288: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0 */\ +/* sqr(t[0], t[0]); */ /* 289: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed60 */\ +/* sqr(t[0], t[0]); */ /* 290: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac0 */\ +/* sqr(t[0], t[0]); */ /* 291: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b580 */\ +/* sqr(t[0], t[0]); */ /* 292: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b00 */\ +sqr_n_mul(t[0], t[0], 8, t[8]); /* 293: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f */\ +/* sqr(t[0], t[0]); */ /* 294: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61e */\ +/* sqr(t[0], t[0]); */ /* 295: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3c */\ +/* sqr(t[0], t[0]); */ /* 296: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b5878 */\ +sqr_n_mul(t[0], t[0], 3, t[9]); /* 297: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b */\ +/* sqr(t[0], t[0]); */ /* 298: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6 */\ +/* sqr(t[0], t[0]); */ /* 299: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec */\ +/* sqr(t[0], t[0]); */ /* 300: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8 */\ +/* sqr(t[0], t[0]); */ /* 301: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b0 */\ +/* sqr(t[0], t[0]); */ /* 302: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f60 */\ +/* sqr(t[0], t[0]); */ /* 303: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec0 */\ +/* sqr(t[0], t[0]); */ /* 304: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d80 */\ +sqr_n_mul(t[0], t[0], 7, t[10]); /* 305: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d89 */\ +/* sqr(t[0], t[0]); */ /* 306: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b12 */\ +/* sqr(t[0], t[0]); */ /* 307: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f624 */\ +/* sqr(t[0], t[0]); */ /* 308: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec48 */\ +/* sqr(t[0], t[0]); */ /* 309: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d890 */\ +/* sqr(t[0], t[0]); */ /* 310: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120 */\ +/* sqr(t[0], t[0]); */ /* 311: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6240 */\ +/* sqr(t[0], t[0]); */ /* 312: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec480 */\ +/* sqr(t[0], t[0]); */ /* 313: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8900 */\ +/* sqr(t[0], t[0]); */ /* 314: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b1200 */\ +sqr_n_mul(t[0], t[0], 9, t[8]); /* 315: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f */\ +/* sqr(t[0], t[0]); */ /* 316: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241e */\ +/* sqr(t[0], t[0]); */ /* 317: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483c */\ +/* sqr(t[0], t[0]); */ /* 318: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d89078 */\ +/* sqr(t[0], t[0]); */ /* 319: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f0 */\ +/* sqr(t[0], t[0]); */ /* 320: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241e0 */\ +/* sqr(t[0], t[0]); */ /* 321: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483c0 */\ +sqr_n_mul(t[0], t[0], 6, t[3]); /* 322: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d5 */\ +/* sqr(t[0], t[0]); */ /* 323: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aa */\ +/* sqr(t[0], t[0]); */ /* 324: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f54 */\ +/* sqr(t[0], t[0]); */ /* 325: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241ea8 */\ +/* sqr(t[0], t[0]); */ /* 326: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d50 */\ +/* sqr(t[0], t[0]); */ /* 327: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aa0 */\ +/* sqr(t[0], t[0]); */ /* 328: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f540 */\ +sqr_n_mul(t[0], t[0], 6, t[5]); /* 329: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55f */\ +/* sqr(t[0], t[0]); */ /* 330: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabe */\ +/* sqr(t[0], t[0]); */ /* 331: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57c */\ +/* sqr(t[0], t[0]); */ /* 332: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aaf8 */\ +/* sqr(t[0], t[0]); */ /* 333: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55f0 */\ +/* sqr(t[0], t[0]); */ /* 334: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabe0 */\ +sqr_n_mul(t[0], t[0], 5, t[5]); /* 335: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabff */\ +/* sqr(t[0], t[0]); */ /* 336: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fe */\ +/* sqr(t[0], t[0]); */ /* 337: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aaffc */\ +/* sqr(t[0], t[0]); */ /* 338: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55ff8 */\ +/* sqr(t[0], t[0]); */ /* 339: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabff0 */\ +/* sqr(t[0], t[0]); */ /* 340: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fe0 */\ +sqr_n_mul(t[0], t[0], 5, t[5]); /* 341: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fff */\ +/* sqr(t[0], t[0]); */ /* 342: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aafffe */\ +/* sqr(t[0], t[0]); */ /* 343: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55fffc */\ +/* sqr(t[0], t[0]); */ /* 344: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfff8 */\ +/* sqr(t[0], t[0]); */ /* 345: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fff0 */\ +sqr_n_mul(t[0], t[0], 4, t[4]); /* 346: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd */\ +/* sqr(t[0], t[0]); */ /* 347: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aaffffa */\ +/* sqr(t[0], t[0]); */ /* 348: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55ffff4 */\ +/* sqr(t[0], t[0]); */ /* 349: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffe8 */\ +sqr_n_mul(t[0], t[0], 3, t[9]); /* 350: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb */\ +/* sqr(t[0], t[0]); */ /* 351: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd6 */\ +/* sqr(t[0], t[0]); */ /* 352: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aaffffac */\ +/* sqr(t[0], t[0]); */ /* 353: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55ffff58 */\ +/* sqr(t[0], t[0]); */ /* 354: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb0 */\ +/* sqr(t[0], t[0]); */ /* 355: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd60 */\ +/* sqr(t[0], t[0]); */ /* 356: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aaffffac0 */\ +/* sqr(t[0], t[0]); */ /* 357: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55ffff580 */\ +/* sqr(t[0], t[0]); */ /* 358: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb00 */\ +sqr_n_mul(t[0], t[0], 8, t[3]); /* 359: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb15 */\ +/* sqr(t[0], t[0]); */ /* 360: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd62a */\ +/* sqr(t[0], t[0]); */ /* 361: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aaffffac54 */\ +/* sqr(t[0], t[0]); */ /* 362: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55ffff58a8 */\ +/* sqr(t[0], t[0]); */ /* 363: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb150 */\ +/* sqr(t[0], t[0]); */ /* 364: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd62a0 */\ +/* sqr(t[0], t[0]); */ /* 365: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aaffffac540 */\ +/* sqr(t[0], t[0]); */ /* 366: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55ffff58a80 */\ +sqr_n_mul(t[0], t[0], 7, t[5]); /* 367: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55ffff58a9f */\ +/* sqr(t[0], t[0]); */ /* 368: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153e */\ +/* sqr(t[0], t[0]); */ /* 369: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd62a7c */\ +/* sqr(t[0], t[0]); */ /* 370: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aaffffac54f8 */\ +/* sqr(t[0], t[0]); */ /* 371: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55ffff58a9f0 */\ +/* sqr(t[0], t[0]); */ /* 372: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153e0 */\ +sqr_n_mul(t[0], t[0], 5, t[5]); /* 373: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ff */\ +/* sqr(t[0], t[0]); */ /* 374: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd62a7fe */\ +/* sqr(t[0], t[0]); */ /* 375: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aaffffac54ffc */\ +/* sqr(t[0], t[0]); */ /* 376: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55ffff58a9ff8 */\ +/* sqr(t[0], t[0]); */ /* 377: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ff0 */\ +/* sqr(t[0], t[0]); */ /* 378: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd62a7fe0 */\ +sqr_n_mul(t[0], t[0], 5, t[5]); /* 379: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd62a7fff */\ +/* sqr(t[0], t[0]); */ /* 380: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aaffffac54fffe */\ +/* sqr(t[0], t[0]); */ /* 381: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55ffff58a9fffc */\ +/* sqr(t[0], t[0]); */ /* 382: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153fff8 */\ +/* sqr(t[0], t[0]); */ /* 383: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd62a7fff0 */\ +sqr_n_mul(t[0], t[0], 4, t[8]); /* 384: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd62a7ffff */\ +/* sqr(t[0], t[0]); */ /* 385: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aaffffac54ffffe */\ +/* sqr(t[0], t[0]); */ /* 386: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55ffff58a9ffffc */\ +/* sqr(t[0], t[0]); */ /* 387: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffff8 */\ +/* sqr(t[0], t[0]); */ /* 388: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd62a7ffff0 */\ +sqr_n_mul(t[0], t[0], 4, t[7]); /* 389: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd62a7ffff7 */\ +/* sqr(t[0], t[0]); */ /* 390: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aaffffac54ffffee */\ +/* sqr(t[0], t[0]); */ /* 391: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55ffff58a9ffffdc */\ +/* sqr(t[0], t[0]); */ /* 392: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb8 */\ +/* sqr(t[0], t[0]); */ /* 393: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd62a7ffff70 */\ +/* sqr(t[0], t[0]); */ /* 394: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aaffffac54ffffee0 */\ +/* sqr(t[0], t[0]); */ /* 395: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55ffff58a9ffffdc0 */\ +/* sqr(t[0], t[0]); */ /* 396: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb80 */\ +sqr_n_mul(t[0], t[0], 7, t[5]); /* 397: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9f */\ +/* sqr(t[0], t[0]); */ /* 398: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd62a7ffff73e */\ +/* sqr(t[0], t[0]); */ /* 399: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aaffffac54ffffee7c */\ +/* sqr(t[0], t[0]); */ /* 400: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55ffff58a9ffffdcf8 */\ +/* sqr(t[0], t[0]); */ /* 401: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9f0 */\ +/* sqr(t[0], t[0]); */ /* 402: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd62a7ffff73e0 */\ +sqr_n_mul(t[0], t[0], 5, t[6]); /* 403: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd62a7ffff73fd */\ +/* sqr(t[0], t[0]); */ /* 404: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aaffffac54ffffee7fa */\ +/* sqr(t[0], t[0]); */ /* 405: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55ffff58a9ffffdcff4 */\ +/* sqr(t[0], t[0]); */ /* 406: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9fe8 */\ +/* sqr(t[0], t[0]); */ /* 407: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd62a7ffff73fd0 */\ +/* sqr(t[0], t[0]); */ /* 408: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aaffffac54ffffee7fa0 */\ +sqr_n_mul(t[0], t[0], 5, t[5]); /* 409: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aaffffac54ffffee7fbf */\ +/* sqr(t[0], t[0]); */ /* 410: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55ffff58a9ffffdcff7e */\ +/* sqr(t[0], t[0]); */ /* 411: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9fefc */\ +/* sqr(t[0], t[0]); */ /* 412: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd62a7ffff73fdf8 */\ +/* sqr(t[0], t[0]); */ /* 413: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aaffffac54ffffee7fbf0 */\ +/* sqr(t[0], t[0]); */ /* 414: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55ffff58a9ffffdcff7e0 */\ +sqr_n_mul(t[0], t[0], 5, t[5]); /* 415: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55ffff58a9ffffdcff7ff */\ +/* sqr(t[0], t[0]); */ /* 416: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9feffe */\ +/* sqr(t[0], t[0]); */ /* 417: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd62a7ffff73fdffc */\ +/* sqr(t[0], t[0]); */ /* 418: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aaffffac54ffffee7fbff8 */\ +/* sqr(t[0], t[0]); */ /* 419: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55ffff58a9ffffdcff7ff0 */\ +/* sqr(t[0], t[0]); */ /* 420: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9feffe0 */\ +sqr_n_mul(t[0], t[0], 5, t[5]); /* 421: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9feffff */\ +/* sqr(t[0], t[0]); */ /* 422: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd62a7ffff73fdfffe */\ +/* sqr(t[0], t[0]); */ /* 423: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aaffffac54ffffee7fbfffc */\ +/* sqr(t[0], t[0]); */ /* 424: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55ffff58a9ffffdcff7fff8 */\ +/* sqr(t[0], t[0]); */ /* 425: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9feffff0 */\ +/* sqr(t[0], t[0]); */ /* 426: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd62a7ffff73fdfffe0 */\ +sqr_n_mul(t[0], t[0], 5, t[5]); /* 427: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd62a7ffff73fdfffff */\ +/* sqr(t[0], t[0]); */ /* 428: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aaffffac54ffffee7fbffffe */\ +/* sqr(t[0], t[0]); */ /* 429: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55ffff58a9ffffdcff7ffffc */\ +/* sqr(t[0], t[0]); */ /* 430: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9fefffff8 */\ +/* sqr(t[0], t[0]); */ /* 431: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd62a7ffff73fdfffff0 */\ +/* sqr(t[0], t[0]); */ /* 432: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aaffffac54ffffee7fbffffe0 */\ +sqr_n_mul(t[0], t[0], 5, t[5]); /* 433: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aaffffac54ffffee7fbffffff */\ +/* sqr(t[0], t[0]); */ /* 434: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55ffff58a9ffffdcff7fffffe */\ +/* sqr(t[0], t[0]); */ /* 435: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9feffffffc */\ +/* sqr(t[0], t[0]); */ /* 436: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd62a7ffff73fdffffff8 */\ +/* sqr(t[0], t[0]); */ /* 437: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aaffffac54ffffee7fbffffff0 */\ +/* sqr(t[0], t[0]); */ /* 438: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55ffff58a9ffffdcff7fffffe0 */\ +sqr_n_mul(t[0], t[0], 5, t[5]); /* 439: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55ffff58a9ffffdcff7fffffff */\ +/* sqr(t[0], t[0]); */ /* 440: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9fefffffffe */\ +/* sqr(t[0], t[0]); */ /* 441: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd62a7ffff73fdfffffffc */\ +/* sqr(t[0], t[0]); */ /* 442: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aaffffac54ffffee7fbfffffff8 */\ +/* sqr(t[0], t[0]); */ /* 443: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55ffff58a9ffffdcff7fffffff0 */\ +sqr_n_mul(t[0], t[0], 4, t[4]); /* 444: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55ffff58a9ffffdcff7fffffffd */\ +/* sqr(t[0], t[0]); */ /* 445: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9feffffffffa */\ +/* sqr(t[0], t[0]); */ /* 446: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd62a7ffff73fdffffffff4 */\ +/* sqr(t[0], t[0]); */ /* 447: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aaffffac54ffffee7fbfffffffe8 */\ +/* sqr(t[0], t[0]); */ /* 448: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55ffff58a9ffffdcff7fffffffd0 */\ +/* sqr(t[0], t[0]); */ /* 449: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9feffffffffa0 */\ +/* sqr(t[0], t[0]); */ /* 450: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd62a7ffff73fdffffffff40 */\ +sqr_n_mul(t[0], t[0], 6, t[3]); /* 451: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd62a7ffff73fdffffffff55 */\ +/* sqr(t[0], t[0]); */ /* 452: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aaffffac54ffffee7fbfffffffeaa */\ +/* sqr(t[0], t[0]); */ /* 453: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55ffff58a9ffffdcff7fffffffd54 */\ +/* sqr(t[0], t[0]); */ /* 454: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9feffffffffaa8 */\ +/* sqr(t[0], t[0]); */ /* 455: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd62a7ffff73fdffffffff550 */\ +sqr_n_mul(t[0], t[0], 4, t[2]); /* 456: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd62a7ffff73fdffffffff555 */\ +/* sqr(t[0], t[0]); */ /* 457: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aaffffac54ffffee7fbfffffffeaaa */\ +/* sqr(t[0], t[0]); */ /* 458: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55ffff58a9ffffdcff7fffffffd554 */\ +/* sqr(t[0], t[0]); */ /* 459: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9feffffffffaaa8 */\ +sqr_n_mul(out, t[0], 3, t[1]); /* 460: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9feffffffffaaa9 */\ +} while(0) diff --git a/crypto/blst_src/recip.c b/crypto/blst_src/recip.c new file mode 100644 index 00000000000..e0c700635ed --- /dev/null +++ b/crypto/blst_src/recip.c @@ -0,0 +1,139 @@ +/* + * Copyright Supranational LLC + * Licensed under the Apache License, Version 2.0, see LICENSE for details. + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "fields.h" + +#ifdef __OPTIMIZE_SIZE__ +/* + * 608 multiplications for scalar inversion modulo BLS12-381 prime, 32% + * more than corresponding optimal addition-chain, plus mispredicted + * branch penalties on top of that... The addition chain below was + * measured to be >50% faster. + */ +static void flt_reciprocal_fp(vec384 out, const vec384 inp) +{ + static const byte BLS12_381_P_minus_2[] = { + TO_BYTES(0xb9feffffffffaaa9), TO_BYTES(0x1eabfffeb153ffff), + TO_BYTES(0x6730d2a0f6b0f624), TO_BYTES(0x64774b84f38512bf), + TO_BYTES(0x4b1ba7b6434bacd7), TO_BYTES(0x1a0111ea397fe69a) + }; + + exp_mont_384(out, inp, BLS12_381_P_minus_2, 381, BLS12_381_P, p0); +} +#else +# define sqr(ret,a) sqr_fp(ret,a) +# define mul(ret,a,b) mul_fp(ret,a,b) +# define sqr_n_mul(ret,a,n,b) sqr_n_mul_fp(ret,a,n,b) + +# include "recip-addchain.h" +static void flt_reciprocal_fp(vec384 out, const vec384 inp) +{ + RECIPROCAL_MOD_BLS12_381_P(out, inp, vec384); +} +# undef RECIPROCAL_MOD_BLS12_381_P +# undef sqr_n_mul +# undef mul +# undef sqr +#endif + +static void flt_reciprocal_fp2(vec384x out, const vec384x inp) +{ + vec384 t0, t1; + + /* + * |out| = 1/(a + b*i) = a/(a^2+b^2) - b/(a^2+b^2)*i + */ + sqr_fp(t0, inp[0]); + sqr_fp(t1, inp[1]); + add_fp(t0, t0, t1); + flt_reciprocal_fp(t1, t0); + mul_fp(out[0], inp[0], t1); + mul_fp(out[1], inp[1], t1); + neg_fp(out[1], out[1]); +} + +static void reciprocal_fp(vec384 out, const vec384 inp) +{ + static const vec384 Px8 = { /* left-aligned value of the modulus */ + TO_LIMB_T(0xcff7fffffffd5558), TO_LIMB_T(0xf55ffff58a9ffffd), + TO_LIMB_T(0x39869507b587b120), TO_LIMB_T(0x23ba5c279c2895fb), + TO_LIMB_T(0x58dd3db21a5d66bb), TO_LIMB_T(0xd0088f51cbff34d2) + }; +#ifdef __BLST_NO_ASM__ +# define RRx4 BLS12_381_RR +#else + static const vec384 RRx4 = { /* (4<<768)%P */ + TO_LIMB_T(0x5f7e7cd070d107c2), TO_LIMB_T(0xec839a9ac49c13c8), + TO_LIMB_T(0x6933786f44f4ef0b), TO_LIMB_T(0xd6bf8b9c676be983), + TO_LIMB_T(0xd3adaaaa4dcefb06), TO_LIMB_T(0x12601bc1d82bc175) + }; +#endif + union { vec768 x; vec384 r[2]; } temp; + + ct_inverse_mod_383(temp.x, inp, BLS12_381_P, Px8); + redc_mont_384(temp.r[0], temp.x, BLS12_381_P, p0); + mul_mont_384(temp.r[0], temp.r[0], RRx4, BLS12_381_P, p0); + +#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + /* sign goes straight to flt_reciprocal */ + mul_mont_384(temp.r[1], temp.r[0], inp, BLS12_381_P, p0); + if (vec_is_equal(temp.r[1], BLS12_381_Rx.p, sizeof(vec384)) | + vec_is_zero(temp.r[1], sizeof(vec384))) + vec_copy(out, temp.r[0], sizeof(vec384)); + else + flt_reciprocal_fp(out, inp); +#else + vec_copy(out, temp.r[0], sizeof(vec384)); +#endif +#undef RRx4 +} + +void blst_fp_inverse(vec384 out, const vec384 inp) +{ reciprocal_fp(out, inp); } + +void blst_fp_eucl_inverse(vec384 ret, const vec384 a) +{ reciprocal_fp(ret, a); } + +static void reciprocal_fp2(vec384x out, const vec384x inp) +{ + vec384 t0, t1; + + /* + * |out| = 1/(a + b*i) = a/(a^2+b^2) - b/(a^2+b^2)*i + */ + sqr_fp(t0, inp[0]); + sqr_fp(t1, inp[1]); + add_fp(t0, t0, t1); + reciprocal_fp(t1, t0); + mul_fp(out[0], inp[0], t1); + mul_fp(out[1], inp[1], t1); + neg_fp(out[1], out[1]); +} + +void blst_fp2_inverse(vec384x out, const vec384x inp) +{ reciprocal_fp2(out, inp); } + +void blst_fp2_eucl_inverse(vec384x out, const vec384x inp) +{ reciprocal_fp2(out, inp); } + +static void reciprocal_fr(vec256 out, const vec256 inp) +{ + static const vec256 rx2 = { /* left-aligned value of the modulus */ + TO_LIMB_T(0xfffffffe00000002), TO_LIMB_T(0xa77b4805fffcb7fd), + TO_LIMB_T(0x6673b0101343b00a), TO_LIMB_T(0xe7db4ea6533afa90), + }; + vec512 temp; + + ct_inverse_mod_256(temp, inp, BLS12_381_r, rx2); + redc_mont_256(out, temp, BLS12_381_r, r0); + mul_mont_sparse_256(out, out, BLS12_381_rRR, BLS12_381_r, r0); +} + +void blst_fr_inverse(vec256 out, const vec256 inp) +{ reciprocal_fr(out, inp); } + +void blst_fr_eucl_inverse(vec256 out, const vec256 inp) +{ reciprocal_fr(out, inp); } diff --git a/crypto/blst_src/sha256.h b/crypto/blst_src/sha256.h new file mode 100644 index 00000000000..77ddb6dc848 --- /dev/null +++ b/crypto/blst_src/sha256.h @@ -0,0 +1,140 @@ +/* + * Copyright Supranational LLC + * Licensed under the Apache License, Version 2.0, see LICENSE for details. + * SPDX-License-Identifier: Apache-2.0 + */ +#ifndef __BLS12_381_ASM_SHA256_H__ +#define __BLS12_381_ASM_SHA256_H__ + +#include "vect.h" + +#if (defined(__x86_64__) || defined(__x86_64) || defined(_M_X64)) && \ + defined(__SHA__) /* -msha */ && !defined(__BLST_PORTABLE__) +# define sha256_block_data_order blst_sha256_block_data_order_shaext +#elif defined(__aarch64__) && \ + defined(__ARM_FEATURE_CRYPTO) && !defined(__BLST_PORTABLE__) +# define sha256_block_data_order blst_sha256_block_armv8 +#else +# define sha256_block_data_order blst_sha256_block_data_order +#endif +#define sha256_hcopy blst_sha256_hcopy +#define sha256_bcopy blst_sha256_bcopy +#define sha256_emit blst_sha256_emit + +void sha256_block_data_order(unsigned int *h, const void *inp, size_t blocks); +void sha256_hcopy(unsigned int dst[8], const unsigned int src[8]); +void sha256_bcopy(void *dst, const void *src, size_t len); + +/* + * If SHA256_CTX conflicts with something, just redefine it to alternative + * custom name prior including this header. + */ +typedef struct { + unsigned int h[8]; + unsigned long long N; + unsigned char buf[64]; + size_t off; +} SHA256_CTX; + + +static void sha256_init_h(unsigned int h[8]) +{ + h[0] = 0x6a09e667U; + h[1] = 0xbb67ae85U; + h[2] = 0x3c6ef372U; + h[3] = 0xa54ff53aU; + h[4] = 0x510e527fU; + h[5] = 0x9b05688cU; + h[6] = 0x1f83d9abU; + h[7] = 0x5be0cd19U; +} + +static void sha256_init(SHA256_CTX *ctx) +{ + sha256_init_h(ctx->h); + ctx->N = 0; + vec_zero(ctx->buf, sizeof(ctx->buf)); + ctx->off = 0; +} + +static void sha256_update(SHA256_CTX *ctx, const void *_inp, size_t len) +{ + size_t n; + const unsigned char *inp = _inp; + + ctx->N += len; + + if ((len != 0) & ((n = ctx->off) != 0)) { + size_t rem = sizeof(ctx->buf) - n; + + if (rem > len) { + sha256_bcopy(ctx->buf + n, inp, len); + ctx->off += len; + return; + } else { + sha256_bcopy(ctx->buf + n, inp, rem); + inp += rem; + len -= rem; + sha256_block_data_order(ctx->h, ctx->buf, 1); + vec_zero(ctx->buf, sizeof(ctx->buf)); + ctx->off = 0; + } + } + + n = len / sizeof(ctx->buf); + if (n > 0) { + sha256_block_data_order(ctx->h, inp, n); + n *= sizeof(ctx->buf); + inp += n; + len -= n; + } + + if (len) + sha256_bcopy(ctx->buf, inp, ctx->off = len); +} + +#define __TOBE32(ptr, val) ((ptr)[0] = (unsigned char)((val)>>24), \ + (ptr)[1] = (unsigned char)((val)>>16), \ + (ptr)[2] = (unsigned char)((val)>>8), \ + (ptr)[3] = (unsigned char)(val)) + +#if 1 +void sha256_emit(unsigned char md[32], const unsigned int h[8]); +#else +static void sha256_emit(unsigned char md[32], const unsigned int h[8]) +{ + unsigned int h_i; + + h_i = h[0]; __TOBE32(md + 0, h_i); + h_i = h[1]; __TOBE32(md + 4, h_i); + h_i = h[2]; __TOBE32(md + 8, h_i); + h_i = h[3]; __TOBE32(md + 12, h_i); + h_i = h[4]; __TOBE32(md + 16, h_i); + h_i = h[5]; __TOBE32(md + 20, h_i); + h_i = h[6]; __TOBE32(md + 24, h_i); + h_i = h[7]; __TOBE32(md + 28, h_i); +} +#endif + +static void sha256_final(unsigned char md[32], SHA256_CTX *ctx) +{ + unsigned long long bits = ctx->N * 8; + size_t n = ctx->off; + unsigned char *tail; + + ctx->buf[n++] = 0x80; + + if (n > (sizeof(ctx->buf) - 8)) { + sha256_block_data_order(ctx->h, ctx->buf, 1); + vec_zero(ctx->buf, sizeof(ctx->buf)); + } + + tail = ctx->buf + sizeof(ctx->buf) - 8; + __TOBE32(tail, (unsigned int)(bits >> 32)); + __TOBE32(tail + 4, (unsigned int)bits); + sha256_block_data_order(ctx->h, ctx->buf, 1); + sha256_emit(md, ctx->h); +} + +#undef __TOBE32 +#endif diff --git a/crypto/blst_src/sqrt-addchain.h b/crypto/blst_src/sqrt-addchain.h new file mode 100644 index 00000000000..4e7f0beb6b1 --- /dev/null +++ b/crypto/blst_src/sqrt-addchain.h @@ -0,0 +1,489 @@ +/* + * Copyright Supranational LLC + * Licensed under the Apache License, Version 2.0, see LICENSE for details. + * SPDX-License-Identifier: Apache-2.0 + */ +/* + * The "magic" number is (BLS12_381_P-3)/4. Exponentiation to which + * yields reciprocal of sqrt(x), which is used in simplified Shallue- + * van de Woestijne-Ulas map-to-curve method, but it's trivial to adapt + * it for more "traditional" sqrt(x) as 'x*ret' (or for is_square(x) + * as 'x*ret^2==1'). + * + * Generated with 'addchain 1000602388805416848354447456433976039139220704984751971333014534031007912622709466110671907282253916009473568139946' + * https://github.com/kwantam/addchain + * + * # Bos-Coster (win=4) : 458 (16) <<< + * # Bos-Coster (win=5) : 460 (28) + * # Bos-Coster (win=6) : 461 (33) + * # Bos-Coster (win=7) : 460 (28) + * # Bos-Coster (win=3) : 462 ( 9) + * # Bos-Coster (win=8) : 466 (34) + * # Bos-Coster (win=9) : 464 (31) + * # Yacobi : 478 (31) + * # Bos-Coster (win=10) : 473 (30) + * # Bos-Coster (win=2) : 486 ( 5) + * # Bergeron-Berstel-Brlek-Duboc : 489 ( 5) + */ + +#define RECIP_SQRT_MOD_BLS12_381_P(out, inp, ptype) do { \ +ptype t[16]; \ +vec_copy(t[13], inp, sizeof(ptype));/* 0: 1 */\ +sqr(t[0], t[13]); /* 1: 2 */\ +mul(t[8], t[0], t[13]); /* 2: 3 */\ +sqr(t[4], t[0]); /* 3: 4 */\ +mul(t[1], t[8], t[0]); /* 4: 5 */\ +mul(t[6], t[4], t[8]); /* 5: 7 */\ +mul(t[9], t[1], t[4]); /* 6: 9 */\ +mul(t[12], t[6], t[4]); /* 7: b */\ +mul(t[3], t[9], t[4]); /* 8: d */\ +mul(t[7], t[12], t[4]); /* 9: f */\ +mul(t[15], t[3], t[4]); /* 10: 11 */\ +mul(t[10], t[7], t[4]); /* 11: 13 */\ +mul(t[2], t[15], t[4]); /* 12: 15 */\ +mul(t[11], t[10], t[4]); /* 13: 17 */\ +sqr(t[0], t[3]); /* 14: 1a */\ +mul(t[14], t[11], t[4]); /* 15: 1b */\ +mul(t[5], t[0], t[8]); /* 16: 1d */\ +mul(t[4], t[0], t[1]); /* 17: 1f */\ +/* sqr(t[0], t[0]); */ /* 18: 34 */\ +/* sqr(t[0], t[0]); */ /* 19: 68 */\ +/* sqr(t[0], t[0]); */ /* 20: d0 */\ +/* sqr(t[0], t[0]); */ /* 21: 1a0 */\ +/* sqr(t[0], t[0]); */ /* 22: 340 */\ +/* sqr(t[0], t[0]); */ /* 23: 680 */\ +/* sqr(t[0], t[0]); */ /* 24: d00 */\ +/* sqr(t[0], t[0]); */ /* 25: 1a00 */\ +/* sqr(t[0], t[0]); */ /* 26: 3400 */\ +/* sqr(t[0], t[0]); */ /* 27: 6800 */\ +/* sqr(t[0], t[0]); */ /* 28: d000 */\ +/* sqr(t[0], t[0]); */ /* 29: 1a000 */\ +sqr_n_mul(t[0], t[0], 12, t[15]); /* 30: 1a011 */\ +/* sqr(t[0], t[0]); */ /* 31: 34022 */\ +/* sqr(t[0], t[0]); */ /* 32: 68044 */\ +/* sqr(t[0], t[0]); */ /* 33: d0088 */\ +/* sqr(t[0], t[0]); */ /* 34: 1a0110 */\ +/* sqr(t[0], t[0]); */ /* 35: 340220 */\ +/* sqr(t[0], t[0]); */ /* 36: 680440 */\ +/* sqr(t[0], t[0]); */ /* 37: d00880 */\ +sqr_n_mul(t[0], t[0], 7, t[7]); /* 38: d0088f */\ +/* sqr(t[0], t[0]); */ /* 39: 1a0111e */\ +/* sqr(t[0], t[0]); */ /* 40: 340223c */\ +/* sqr(t[0], t[0]); */ /* 41: 6804478 */\ +/* sqr(t[0], t[0]); */ /* 42: d0088f0 */\ +sqr_n_mul(t[0], t[0], 4, t[1]); /* 43: d0088f5 */\ +/* sqr(t[0], t[0]); */ /* 44: 1a0111ea */\ +/* sqr(t[0], t[0]); */ /* 45: 340223d4 */\ +/* sqr(t[0], t[0]); */ /* 46: 680447a8 */\ +/* sqr(t[0], t[0]); */ /* 47: d0088f50 */\ +/* sqr(t[0], t[0]); */ /* 48: 1a0111ea0 */\ +/* sqr(t[0], t[0]); */ /* 49: 340223d40 */\ +sqr_n_mul(t[0], t[0], 6, t[6]); /* 50: 340223d47 */\ +/* sqr(t[0], t[0]); */ /* 51: 680447a8e */\ +/* sqr(t[0], t[0]); */ /* 52: d0088f51c */\ +/* sqr(t[0], t[0]); */ /* 53: 1a0111ea38 */\ +/* sqr(t[0], t[0]); */ /* 54: 340223d470 */\ +/* sqr(t[0], t[0]); */ /* 55: 680447a8e0 */\ +/* sqr(t[0], t[0]); */ /* 56: d0088f51c0 */\ +/* sqr(t[0], t[0]); */ /* 57: 1a0111ea380 */\ +sqr_n_mul(t[0], t[0], 7, t[11]); /* 58: 1a0111ea397 */\ +/* sqr(t[0], t[0]); */ /* 59: 340223d472e */\ +/* sqr(t[0], t[0]); */ /* 60: 680447a8e5c */\ +/* sqr(t[0], t[0]); */ /* 61: d0088f51cb8 */\ +/* sqr(t[0], t[0]); */ /* 62: 1a0111ea3970 */\ +/* sqr(t[0], t[0]); */ /* 63: 340223d472e0 */\ +sqr_n_mul(t[0], t[0], 5, t[4]); /* 64: 340223d472ff */\ +/* sqr(t[0], t[0]); */ /* 65: 680447a8e5fe */\ +/* sqr(t[0], t[0]); */ /* 66: d0088f51cbfc */\ +sqr_n_mul(t[0], t[0], 2, t[8]); /* 67: d0088f51cbff */\ +/* sqr(t[0], t[0]); */ /* 68: 1a0111ea397fe */\ +/* sqr(t[0], t[0]); */ /* 69: 340223d472ffc */\ +/* sqr(t[0], t[0]); */ /* 70: 680447a8e5ff8 */\ +/* sqr(t[0], t[0]); */ /* 71: d0088f51cbff0 */\ +/* sqr(t[0], t[0]); */ /* 72: 1a0111ea397fe0 */\ +/* sqr(t[0], t[0]); */ /* 73: 340223d472ffc0 */\ +sqr_n_mul(t[0], t[0], 6, t[3]); /* 74: 340223d472ffcd */\ +/* sqr(t[0], t[0]); */ /* 75: 680447a8e5ff9a */\ +/* sqr(t[0], t[0]); */ /* 76: d0088f51cbff34 */\ +/* sqr(t[0], t[0]); */ /* 77: 1a0111ea397fe68 */\ +/* sqr(t[0], t[0]); */ /* 78: 340223d472ffcd0 */\ +/* sqr(t[0], t[0]); */ /* 79: 680447a8e5ff9a0 */\ +/* sqr(t[0], t[0]); */ /* 80: d0088f51cbff340 */\ +sqr_n_mul(t[0], t[0], 6, t[3]); /* 81: d0088f51cbff34d */\ +/* sqr(t[0], t[0]); */ /* 82: 1a0111ea397fe69a */\ +/* sqr(t[0], t[0]); */ /* 83: 340223d472ffcd34 */\ +/* sqr(t[0], t[0]); */ /* 84: 680447a8e5ff9a68 */\ +/* sqr(t[0], t[0]); */ /* 85: d0088f51cbff34d0 */\ +/* sqr(t[0], t[0]); */ /* 86: 1a0111ea397fe69a0 */\ +/* sqr(t[0], t[0]); */ /* 87: 340223d472ffcd340 */\ +sqr_n_mul(t[0], t[0], 6, t[9]); /* 88: 340223d472ffcd349 */\ +/* sqr(t[0], t[0]); */ /* 89: 680447a8e5ff9a692 */\ +/* sqr(t[0], t[0]); */ /* 90: d0088f51cbff34d24 */\ +/* sqr(t[0], t[0]); */ /* 91: 1a0111ea397fe69a48 */\ +sqr_n_mul(t[0], t[0], 3, t[8]); /* 92: 1a0111ea397fe69a4b */\ +/* sqr(t[0], t[0]); */ /* 93: 340223d472ffcd3496 */\ +/* sqr(t[0], t[0]); */ /* 94: 680447a8e5ff9a692c */\ +/* sqr(t[0], t[0]); */ /* 95: d0088f51cbff34d258 */\ +/* sqr(t[0], t[0]); */ /* 96: 1a0111ea397fe69a4b0 */\ +/* sqr(t[0], t[0]); */ /* 97: 340223d472ffcd34960 */\ +/* sqr(t[0], t[0]); */ /* 98: 680447a8e5ff9a692c0 */\ +/* sqr(t[0], t[0]); */ /* 99: d0088f51cbff34d2580 */\ +sqr_n_mul(t[0], t[0], 7, t[3]); /* 100: d0088f51cbff34d258d */\ +/* sqr(t[0], t[0]); */ /* 101: 1a0111ea397fe69a4b1a */\ +/* sqr(t[0], t[0]); */ /* 102: 340223d472ffcd349634 */\ +/* sqr(t[0], t[0]); */ /* 103: 680447a8e5ff9a692c68 */\ +/* sqr(t[0], t[0]); */ /* 104: d0088f51cbff34d258d0 */\ +sqr_n_mul(t[0], t[0], 4, t[3]); /* 105: d0088f51cbff34d258dd */\ +/* sqr(t[0], t[0]); */ /* 106: 1a0111ea397fe69a4b1ba */\ +/* sqr(t[0], t[0]); */ /* 107: 340223d472ffcd3496374 */\ +/* sqr(t[0], t[0]); */ /* 108: 680447a8e5ff9a692c6e8 */\ +/* sqr(t[0], t[0]); */ /* 109: d0088f51cbff34d258dd0 */\ +/* sqr(t[0], t[0]); */ /* 110: 1a0111ea397fe69a4b1ba0 */\ +/* sqr(t[0], t[0]); */ /* 111: 340223d472ffcd34963740 */\ +sqr_n_mul(t[0], t[0], 6, t[7]); /* 112: 340223d472ffcd3496374f */\ +/* sqr(t[0], t[0]); */ /* 113: 680447a8e5ff9a692c6e9e */\ +/* sqr(t[0], t[0]); */ /* 114: d0088f51cbff34d258dd3c */\ +/* sqr(t[0], t[0]); */ /* 115: 1a0111ea397fe69a4b1ba78 */\ +/* sqr(t[0], t[0]); */ /* 116: 340223d472ffcd3496374f0 */\ +/* sqr(t[0], t[0]); */ /* 117: 680447a8e5ff9a692c6e9e0 */\ +/* sqr(t[0], t[0]); */ /* 118: d0088f51cbff34d258dd3c0 */\ +sqr_n_mul(t[0], t[0], 6, t[14]); /* 119: d0088f51cbff34d258dd3db */\ +/* sqr(t[0], t[0]); */ /* 120: 1a0111ea397fe69a4b1ba7b6 */\ +/* sqr(t[0], t[0]); */ /* 121: 340223d472ffcd3496374f6c */\ +/* sqr(t[0], t[0]); */ /* 122: 680447a8e5ff9a692c6e9ed8 */\ +sqr_n_mul(t[0], t[0], 3, t[13]); /* 123: 680447a8e5ff9a692c6e9ed9 */\ +/* sqr(t[0], t[0]); */ /* 124: d0088f51cbff34d258dd3db2 */\ +/* sqr(t[0], t[0]); */ /* 125: 1a0111ea397fe69a4b1ba7b64 */\ +/* sqr(t[0], t[0]); */ /* 126: 340223d472ffcd3496374f6c8 */\ +/* sqr(t[0], t[0]); */ /* 127: 680447a8e5ff9a692c6e9ed90 */\ +/* sqr(t[0], t[0]); */ /* 128: d0088f51cbff34d258dd3db20 */\ +/* sqr(t[0], t[0]); */ /* 129: 1a0111ea397fe69a4b1ba7b640 */\ +/* sqr(t[0], t[0]); */ /* 130: 340223d472ffcd3496374f6c80 */\ +/* sqr(t[0], t[0]); */ /* 131: 680447a8e5ff9a692c6e9ed900 */\ +sqr_n_mul(t[0], t[0], 8, t[3]); /* 132: 680447a8e5ff9a692c6e9ed90d */\ +/* sqr(t[0], t[0]); */ /* 133: d0088f51cbff34d258dd3db21a */\ +/* sqr(t[0], t[0]); */ /* 134: 1a0111ea397fe69a4b1ba7b6434 */\ +/* sqr(t[0], t[0]); */ /* 135: 340223d472ffcd3496374f6c868 */\ +/* sqr(t[0], t[0]); */ /* 136: 680447a8e5ff9a692c6e9ed90d0 */\ +/* sqr(t[0], t[0]); */ /* 137: d0088f51cbff34d258dd3db21a0 */\ +/* sqr(t[0], t[0]); */ /* 138: 1a0111ea397fe69a4b1ba7b64340 */\ +/* sqr(t[0], t[0]); */ /* 139: 340223d472ffcd3496374f6c8680 */\ +sqr_n_mul(t[0], t[0], 7, t[11]); /* 140: 340223d472ffcd3496374f6c8697 */\ +/* sqr(t[0], t[0]); */ /* 141: 680447a8e5ff9a692c6e9ed90d2e */\ +/* sqr(t[0], t[0]); */ /* 142: d0088f51cbff34d258dd3db21a5c */\ +/* sqr(t[0], t[0]); */ /* 143: 1a0111ea397fe69a4b1ba7b6434b8 */\ +/* sqr(t[0], t[0]); */ /* 144: 340223d472ffcd3496374f6c86970 */\ +/* sqr(t[0], t[0]); */ /* 145: 680447a8e5ff9a692c6e9ed90d2e0 */\ +sqr_n_mul(t[0], t[0], 5, t[12]); /* 146: 680447a8e5ff9a692c6e9ed90d2eb */\ +/* sqr(t[0], t[0]); */ /* 147: d0088f51cbff34d258dd3db21a5d6 */\ +/* sqr(t[0], t[0]); */ /* 148: 1a0111ea397fe69a4b1ba7b6434bac */\ +/* sqr(t[0], t[0]); */ /* 149: 340223d472ffcd3496374f6c869758 */\ +/* sqr(t[0], t[0]); */ /* 150: 680447a8e5ff9a692c6e9ed90d2eb0 */\ +/* sqr(t[0], t[0]); */ /* 151: d0088f51cbff34d258dd3db21a5d60 */\ +/* sqr(t[0], t[0]); */ /* 152: 1a0111ea397fe69a4b1ba7b6434bac0 */\ +sqr_n_mul(t[0], t[0], 6, t[3]); /* 153: 1a0111ea397fe69a4b1ba7b6434bacd */\ +/* sqr(t[0], t[0]); */ /* 154: 340223d472ffcd3496374f6c869759a */\ +/* sqr(t[0], t[0]); */ /* 155: 680447a8e5ff9a692c6e9ed90d2eb34 */\ +/* sqr(t[0], t[0]); */ /* 156: d0088f51cbff34d258dd3db21a5d668 */\ +/* sqr(t[0], t[0]); */ /* 157: 1a0111ea397fe69a4b1ba7b6434bacd0 */\ +/* sqr(t[0], t[0]); */ /* 158: 340223d472ffcd3496374f6c869759a0 */\ +/* sqr(t[0], t[0]); */ /* 159: 680447a8e5ff9a692c6e9ed90d2eb340 */\ +sqr_n_mul(t[0], t[0], 6, t[5]); /* 160: 680447a8e5ff9a692c6e9ed90d2eb35d */\ +/* sqr(t[0], t[0]); */ /* 161: d0088f51cbff34d258dd3db21a5d66ba */\ +/* sqr(t[0], t[0]); */ /* 162: 1a0111ea397fe69a4b1ba7b6434bacd74 */\ +/* sqr(t[0], t[0]); */ /* 163: 340223d472ffcd3496374f6c869759ae8 */\ +/* sqr(t[0], t[0]); */ /* 164: 680447a8e5ff9a692c6e9ed90d2eb35d0 */\ +sqr_n_mul(t[0], t[0], 4, t[9]); /* 165: 680447a8e5ff9a692c6e9ed90d2eb35d9 */\ +/* sqr(t[0], t[0]); */ /* 166: d0088f51cbff34d258dd3db21a5d66bb2 */\ +/* sqr(t[0], t[0]); */ /* 167: 1a0111ea397fe69a4b1ba7b6434bacd764 */\ +/* sqr(t[0], t[0]); */ /* 168: 340223d472ffcd3496374f6c869759aec8 */\ +/* sqr(t[0], t[0]); */ /* 169: 680447a8e5ff9a692c6e9ed90d2eb35d90 */\ +/* sqr(t[0], t[0]); */ /* 170: d0088f51cbff34d258dd3db21a5d66bb20 */\ +/* sqr(t[0], t[0]); */ /* 171: 1a0111ea397fe69a4b1ba7b6434bacd7640 */\ +/* sqr(t[0], t[0]); */ /* 172: 340223d472ffcd3496374f6c869759aec80 */\ +/* sqr(t[0], t[0]); */ /* 173: 680447a8e5ff9a692c6e9ed90d2eb35d900 */\ +sqr_n_mul(t[0], t[0], 8, t[5]); /* 174: 680447a8e5ff9a692c6e9ed90d2eb35d91d */\ +/* sqr(t[0], t[0]); */ /* 175: d0088f51cbff34d258dd3db21a5d66bb23a */\ +/* sqr(t[0], t[0]); */ /* 176: 1a0111ea397fe69a4b1ba7b6434bacd76474 */\ +/* sqr(t[0], t[0]); */ /* 177: 340223d472ffcd3496374f6c869759aec8e8 */\ +/* sqr(t[0], t[0]); */ /* 178: 680447a8e5ff9a692c6e9ed90d2eb35d91d0 */\ +sqr_n_mul(t[0], t[0], 4, t[3]); /* 179: 680447a8e5ff9a692c6e9ed90d2eb35d91dd */\ +/* sqr(t[0], t[0]); */ /* 180: d0088f51cbff34d258dd3db21a5d66bb23ba */\ +/* sqr(t[0], t[0]); */ /* 181: 1a0111ea397fe69a4b1ba7b6434bacd764774 */\ +/* sqr(t[0], t[0]); */ /* 182: 340223d472ffcd3496374f6c869759aec8ee8 */\ +/* sqr(t[0], t[0]); */ /* 183: 680447a8e5ff9a692c6e9ed90d2eb35d91dd0 */\ +/* sqr(t[0], t[0]); */ /* 184: d0088f51cbff34d258dd3db21a5d66bb23ba0 */\ +/* sqr(t[0], t[0]); */ /* 185: 1a0111ea397fe69a4b1ba7b6434bacd7647740 */\ +/* sqr(t[0], t[0]); */ /* 186: 340223d472ffcd3496374f6c869759aec8ee80 */\ +sqr_n_mul(t[0], t[0], 7, t[11]); /* 187: 340223d472ffcd3496374f6c869759aec8ee97 */\ +/* sqr(t[0], t[0]); */ /* 188: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e */\ +/* sqr(t[0], t[0]); */ /* 189: d0088f51cbff34d258dd3db21a5d66bb23ba5c */\ +/* sqr(t[0], t[0]); */ /* 190: 1a0111ea397fe69a4b1ba7b6434bacd764774b8 */\ +/* sqr(t[0], t[0]); */ /* 191: 340223d472ffcd3496374f6c869759aec8ee970 */\ +/* sqr(t[0], t[0]); */ /* 192: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e0 */\ +/* sqr(t[0], t[0]); */ /* 193: d0088f51cbff34d258dd3db21a5d66bb23ba5c0 */\ +/* sqr(t[0], t[0]); */ /* 194: 1a0111ea397fe69a4b1ba7b6434bacd764774b80 */\ +/* sqr(t[0], t[0]); */ /* 195: 340223d472ffcd3496374f6c869759aec8ee9700 */\ +/* sqr(t[0], t[0]); */ /* 196: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e00 */\ +sqr_n_mul(t[0], t[0], 9, t[10]); /* 197: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13 */\ +/* sqr(t[0], t[0]); */ /* 198: d0088f51cbff34d258dd3db21a5d66bb23ba5c26 */\ +/* sqr(t[0], t[0]); */ /* 199: 1a0111ea397fe69a4b1ba7b6434bacd764774b84c */\ +sqr_n_mul(t[0], t[0], 2, t[8]); /* 200: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f */\ +/* sqr(t[0], t[0]); */ /* 201: 340223d472ffcd3496374f6c869759aec8ee9709e */\ +/* sqr(t[0], t[0]); */ /* 202: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13c */\ +/* sqr(t[0], t[0]); */ /* 203: d0088f51cbff34d258dd3db21a5d66bb23ba5c278 */\ +/* sqr(t[0], t[0]); */ /* 204: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f0 */\ +/* sqr(t[0], t[0]); */ /* 205: 340223d472ffcd3496374f6c869759aec8ee9709e0 */\ +sqr_n_mul(t[0], t[0], 5, t[6]); /* 206: 340223d472ffcd3496374f6c869759aec8ee9709e7 */\ +/* sqr(t[0], t[0]); */ /* 207: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce */\ +/* sqr(t[0], t[0]); */ /* 208: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c */\ +/* sqr(t[0], t[0]); */ /* 209: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38 */\ +/* sqr(t[0], t[0]); */ /* 210: 340223d472ffcd3496374f6c869759aec8ee9709e70 */\ +/* sqr(t[0], t[0]); */ /* 211: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce0 */\ +/* sqr(t[0], t[0]); */ /* 212: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c0 */\ +/* sqr(t[0], t[0]); */ /* 213: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f380 */\ +sqr_n_mul(t[0], t[0], 7, t[1]); /* 214: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f385 */\ +/* sqr(t[0], t[0]); */ /* 215: 340223d472ffcd3496374f6c869759aec8ee9709e70a */\ +/* sqr(t[0], t[0]); */ /* 216: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce14 */\ +/* sqr(t[0], t[0]); */ /* 217: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c28 */\ +/* sqr(t[0], t[0]); */ /* 218: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f3850 */\ +/* sqr(t[0], t[0]); */ /* 219: 340223d472ffcd3496374f6c869759aec8ee9709e70a0 */\ +/* sqr(t[0], t[0]); */ /* 220: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce140 */\ +/* sqr(t[0], t[0]); */ /* 221: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c280 */\ +sqr_n_mul(t[0], t[0], 7, t[9]); /* 222: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c289 */\ +/* sqr(t[0], t[0]); */ /* 223: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512 */\ +/* sqr(t[0], t[0]); */ /* 224: 340223d472ffcd3496374f6c869759aec8ee9709e70a24 */\ +/* sqr(t[0], t[0]); */ /* 225: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce1448 */\ +/* sqr(t[0], t[0]); */ /* 226: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2890 */\ +/* sqr(t[0], t[0]); */ /* 227: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f385120 */\ +/* sqr(t[0], t[0]); */ /* 228: 340223d472ffcd3496374f6c869759aec8ee9709e70a240 */\ +sqr_n_mul(t[0], t[0], 6, t[11]); /* 229: 340223d472ffcd3496374f6c869759aec8ee9709e70a257 */\ +/* sqr(t[0], t[0]); */ /* 230: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144ae */\ +/* sqr(t[0], t[0]); */ /* 231: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895c */\ +/* sqr(t[0], t[0]); */ /* 232: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512b8 */\ +/* sqr(t[0], t[0]); */ /* 233: 340223d472ffcd3496374f6c869759aec8ee9709e70a2570 */\ +/* sqr(t[0], t[0]); */ /* 234: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144ae0 */\ +sqr_n_mul(t[0], t[0], 5, t[5]); /* 235: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd */\ +/* sqr(t[0], t[0]); */ /* 236: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fa */\ +/* sqr(t[0], t[0]); */ /* 237: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf4 */\ +/* sqr(t[0], t[0]); */ /* 238: 340223d472ffcd3496374f6c869759aec8ee9709e70a257e8 */\ +/* sqr(t[0], t[0]); */ /* 239: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd0 */\ +/* sqr(t[0], t[0]); */ /* 240: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fa0 */\ +sqr_n_mul(t[0], t[0], 5, t[10]); /* 241: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb3 */\ +/* sqr(t[0], t[0]); */ /* 242: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf66 */\ +/* sqr(t[0], t[0]); */ /* 243: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ecc */\ +/* sqr(t[0], t[0]); */ /* 244: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd98 */\ +/* sqr(t[0], t[0]); */ /* 245: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb30 */\ +/* sqr(t[0], t[0]); */ /* 246: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf660 */\ +sqr_n_mul(t[0], t[0], 5, t[10]); /* 247: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf673 */\ +/* sqr(t[0], t[0]); */ /* 248: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece6 */\ +/* sqr(t[0], t[0]); */ /* 249: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc */\ +/* sqr(t[0], t[0]); */ /* 250: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb398 */\ +/* sqr(t[0], t[0]); */ /* 251: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730 */\ +/* sqr(t[0], t[0]); */ /* 252: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece60 */\ +/* sqr(t[0], t[0]); */ /* 253: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc0 */\ +/* sqr(t[0], t[0]); */ /* 254: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb3980 */\ +/* sqr(t[0], t[0]); */ /* 255: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf67300 */\ +sqr_n_mul(t[0], t[0], 8, t[3]); /* 256: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d */\ +/* sqr(t[0], t[0]); */ /* 257: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a */\ +/* sqr(t[0], t[0]); */ /* 258: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34 */\ +/* sqr(t[0], t[0]); */ /* 259: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39868 */\ +/* sqr(t[0], t[0]); */ /* 260: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d0 */\ +/* sqr(t[0], t[0]); */ /* 261: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a0 */\ +/* sqr(t[0], t[0]); */ /* 262: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc340 */\ +/* sqr(t[0], t[0]); */ /* 263: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb398680 */\ +sqr_n_mul(t[0], t[0], 7, t[2]); /* 264: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb398695 */\ +/* sqr(t[0], t[0]); */ /* 265: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a */\ +/* sqr(t[0], t[0]); */ /* 266: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a54 */\ +/* sqr(t[0], t[0]); */ /* 267: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a8 */\ +/* sqr(t[0], t[0]); */ /* 268: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb3986950 */\ +/* sqr(t[0], t[0]); */ /* 269: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0 */\ +/* sqr(t[0], t[0]); */ /* 270: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a540 */\ +/* sqr(t[0], t[0]); */ /* 271: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a80 */\ +/* sqr(t[0], t[0]); */ /* 272: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869500 */\ +/* sqr(t[0], t[0]); */ /* 273: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a00 */\ +sqr_n_mul(t[0], t[0], 9, t[7]); /* 274: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f */\ +/* sqr(t[0], t[0]); */ /* 275: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541e */\ +/* sqr(t[0], t[0]); */ /* 276: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83c */\ +/* sqr(t[0], t[0]); */ /* 277: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb398695078 */\ +/* sqr(t[0], t[0]); */ /* 278: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f0 */\ +/* sqr(t[0], t[0]); */ /* 279: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541e0 */\ +sqr_n_mul(t[0], t[0], 5, t[3]); /* 280: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed */\ +/* sqr(t[0], t[0]); */ /* 281: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83da */\ +/* sqr(t[0], t[0]); */ /* 282: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b4 */\ +/* sqr(t[0], t[0]); */ /* 283: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f68 */\ +sqr_n_mul(t[0], t[0], 3, t[8]); /* 284: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b */\ +/* sqr(t[0], t[0]); */ /* 285: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed6 */\ +/* sqr(t[0], t[0]); */ /* 286: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac */\ +/* sqr(t[0], t[0]); */ /* 287: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b58 */\ +/* sqr(t[0], t[0]); */ /* 288: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0 */\ +/* sqr(t[0], t[0]); */ /* 289: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed60 */\ +/* sqr(t[0], t[0]); */ /* 290: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac0 */\ +/* sqr(t[0], t[0]); */ /* 291: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b580 */\ +/* sqr(t[0], t[0]); */ /* 292: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b00 */\ +sqr_n_mul(t[0], t[0], 8, t[7]); /* 293: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f */\ +/* sqr(t[0], t[0]); */ /* 294: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61e */\ +/* sqr(t[0], t[0]); */ /* 295: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3c */\ +/* sqr(t[0], t[0]); */ /* 296: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b5878 */\ +sqr_n_mul(t[0], t[0], 3, t[8]); /* 297: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b */\ +/* sqr(t[0], t[0]); */ /* 298: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6 */\ +/* sqr(t[0], t[0]); */ /* 299: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec */\ +/* sqr(t[0], t[0]); */ /* 300: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8 */\ +/* sqr(t[0], t[0]); */ /* 301: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b0 */\ +/* sqr(t[0], t[0]); */ /* 302: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f60 */\ +/* sqr(t[0], t[0]); */ /* 303: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec0 */\ +/* sqr(t[0], t[0]); */ /* 304: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d80 */\ +sqr_n_mul(t[0], t[0], 7, t[9]); /* 305: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d89 */\ +/* sqr(t[0], t[0]); */ /* 306: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b12 */\ +/* sqr(t[0], t[0]); */ /* 307: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f624 */\ +/* sqr(t[0], t[0]); */ /* 308: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec48 */\ +/* sqr(t[0], t[0]); */ /* 309: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d890 */\ +/* sqr(t[0], t[0]); */ /* 310: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120 */\ +/* sqr(t[0], t[0]); */ /* 311: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6240 */\ +/* sqr(t[0], t[0]); */ /* 312: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec480 */\ +/* sqr(t[0], t[0]); */ /* 313: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8900 */\ +/* sqr(t[0], t[0]); */ /* 314: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b1200 */\ +sqr_n_mul(t[0], t[0], 9, t[7]); /* 315: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f */\ +/* sqr(t[0], t[0]); */ /* 316: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241e */\ +/* sqr(t[0], t[0]); */ /* 317: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483c */\ +/* sqr(t[0], t[0]); */ /* 318: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d89078 */\ +/* sqr(t[0], t[0]); */ /* 319: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f0 */\ +/* sqr(t[0], t[0]); */ /* 320: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241e0 */\ +/* sqr(t[0], t[0]); */ /* 321: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483c0 */\ +sqr_n_mul(t[0], t[0], 6, t[2]); /* 322: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d5 */\ +/* sqr(t[0], t[0]); */ /* 323: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aa */\ +/* sqr(t[0], t[0]); */ /* 324: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f54 */\ +/* sqr(t[0], t[0]); */ /* 325: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241ea8 */\ +/* sqr(t[0], t[0]); */ /* 326: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d50 */\ +/* sqr(t[0], t[0]); */ /* 327: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aa0 */\ +/* sqr(t[0], t[0]); */ /* 328: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f540 */\ +sqr_n_mul(t[0], t[0], 6, t[4]); /* 329: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55f */\ +/* sqr(t[0], t[0]); */ /* 330: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabe */\ +/* sqr(t[0], t[0]); */ /* 331: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57c */\ +/* sqr(t[0], t[0]); */ /* 332: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aaf8 */\ +/* sqr(t[0], t[0]); */ /* 333: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55f0 */\ +/* sqr(t[0], t[0]); */ /* 334: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabe0 */\ +sqr_n_mul(t[0], t[0], 5, t[4]); /* 335: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabff */\ +/* sqr(t[0], t[0]); */ /* 336: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fe */\ +/* sqr(t[0], t[0]); */ /* 337: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aaffc */\ +/* sqr(t[0], t[0]); */ /* 338: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55ff8 */\ +/* sqr(t[0], t[0]); */ /* 339: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabff0 */\ +/* sqr(t[0], t[0]); */ /* 340: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fe0 */\ +sqr_n_mul(t[0], t[0], 5, t[4]); /* 341: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fff */\ +/* sqr(t[0], t[0]); */ /* 342: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aafffe */\ +/* sqr(t[0], t[0]); */ /* 343: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55fffc */\ +/* sqr(t[0], t[0]); */ /* 344: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfff8 */\ +/* sqr(t[0], t[0]); */ /* 345: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fff0 */\ +sqr_n_mul(t[0], t[0], 4, t[3]); /* 346: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd */\ +/* sqr(t[0], t[0]); */ /* 347: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aaffffa */\ +/* sqr(t[0], t[0]); */ /* 348: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55ffff4 */\ +/* sqr(t[0], t[0]); */ /* 349: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffe8 */\ +sqr_n_mul(t[0], t[0], 3, t[8]); /* 350: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb */\ +/* sqr(t[0], t[0]); */ /* 351: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd6 */\ +/* sqr(t[0], t[0]); */ /* 352: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aaffffac */\ +/* sqr(t[0], t[0]); */ /* 353: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55ffff58 */\ +/* sqr(t[0], t[0]); */ /* 354: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb0 */\ +/* sqr(t[0], t[0]); */ /* 355: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd60 */\ +/* sqr(t[0], t[0]); */ /* 356: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aaffffac0 */\ +/* sqr(t[0], t[0]); */ /* 357: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55ffff580 */\ +/* sqr(t[0], t[0]); */ /* 358: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb00 */\ +sqr_n_mul(t[0], t[0], 8, t[2]); /* 359: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb15 */\ +/* sqr(t[0], t[0]); */ /* 360: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd62a */\ +/* sqr(t[0], t[0]); */ /* 361: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aaffffac54 */\ +/* sqr(t[0], t[0]); */ /* 362: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55ffff58a8 */\ +/* sqr(t[0], t[0]); */ /* 363: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb150 */\ +/* sqr(t[0], t[0]); */ /* 364: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd62a0 */\ +/* sqr(t[0], t[0]); */ /* 365: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aaffffac540 */\ +/* sqr(t[0], t[0]); */ /* 366: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55ffff58a80 */\ +sqr_n_mul(t[0], t[0], 7, t[4]); /* 367: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55ffff58a9f */\ +/* sqr(t[0], t[0]); */ /* 368: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153e */\ +/* sqr(t[0], t[0]); */ /* 369: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd62a7c */\ +/* sqr(t[0], t[0]); */ /* 370: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aaffffac54f8 */\ +/* sqr(t[0], t[0]); */ /* 371: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55ffff58a9f0 */\ +/* sqr(t[0], t[0]); */ /* 372: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153e0 */\ +sqr_n_mul(t[0], t[0], 5, t[4]); /* 373: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ff */\ +/* sqr(t[0], t[0]); */ /* 374: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd62a7fe */\ +/* sqr(t[0], t[0]); */ /* 375: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aaffffac54ffc */\ +/* sqr(t[0], t[0]); */ /* 376: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55ffff58a9ff8 */\ +/* sqr(t[0], t[0]); */ /* 377: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ff0 */\ +/* sqr(t[0], t[0]); */ /* 378: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd62a7fe0 */\ +sqr_n_mul(t[0], t[0], 5, t[4]); /* 379: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd62a7fff */\ +/* sqr(t[0], t[0]); */ /* 380: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aaffffac54fffe */\ +/* sqr(t[0], t[0]); */ /* 381: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55ffff58a9fffc */\ +/* sqr(t[0], t[0]); */ /* 382: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153fff8 */\ +/* sqr(t[0], t[0]); */ /* 383: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd62a7fff0 */\ +sqr_n_mul(t[0], t[0], 4, t[7]); /* 384: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd62a7ffff */\ +/* sqr(t[0], t[0]); */ /* 385: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aaffffac54ffffe */\ +/* sqr(t[0], t[0]); */ /* 386: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55ffff58a9ffffc */\ +/* sqr(t[0], t[0]); */ /* 387: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffff8 */\ +/* sqr(t[0], t[0]); */ /* 388: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd62a7ffff0 */\ +sqr_n_mul(t[0], t[0], 4, t[6]); /* 389: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd62a7ffff7 */\ +/* sqr(t[0], t[0]); */ /* 390: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aaffffac54ffffee */\ +/* sqr(t[0], t[0]); */ /* 391: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55ffff58a9ffffdc */\ +/* sqr(t[0], t[0]); */ /* 392: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb8 */\ +/* sqr(t[0], t[0]); */ /* 393: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd62a7ffff70 */\ +/* sqr(t[0], t[0]); */ /* 394: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aaffffac54ffffee0 */\ +/* sqr(t[0], t[0]); */ /* 395: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55ffff58a9ffffdc0 */\ +/* sqr(t[0], t[0]); */ /* 396: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb80 */\ +sqr_n_mul(t[0], t[0], 7, t[4]); /* 397: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9f */\ +/* sqr(t[0], t[0]); */ /* 398: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd62a7ffff73e */\ +/* sqr(t[0], t[0]); */ /* 399: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aaffffac54ffffee7c */\ +/* sqr(t[0], t[0]); */ /* 400: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55ffff58a9ffffdcf8 */\ +/* sqr(t[0], t[0]); */ /* 401: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9f0 */\ +/* sqr(t[0], t[0]); */ /* 402: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd62a7ffff73e0 */\ +sqr_n_mul(t[0], t[0], 5, t[5]); /* 403: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd62a7ffff73fd */\ +/* sqr(t[0], t[0]); */ /* 404: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aaffffac54ffffee7fa */\ +/* sqr(t[0], t[0]); */ /* 405: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55ffff58a9ffffdcff4 */\ +/* sqr(t[0], t[0]); */ /* 406: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9fe8 */\ +/* sqr(t[0], t[0]); */ /* 407: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd62a7ffff73fd0 */\ +/* sqr(t[0], t[0]); */ /* 408: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aaffffac54ffffee7fa0 */\ +sqr_n_mul(t[0], t[0], 5, t[4]); /* 409: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aaffffac54ffffee7fbf */\ +/* sqr(t[0], t[0]); */ /* 410: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55ffff58a9ffffdcff7e */\ +/* sqr(t[0], t[0]); */ /* 411: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9fefc */\ +/* sqr(t[0], t[0]); */ /* 412: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd62a7ffff73fdf8 */\ +/* sqr(t[0], t[0]); */ /* 413: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aaffffac54ffffee7fbf0 */\ +/* sqr(t[0], t[0]); */ /* 414: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55ffff58a9ffffdcff7e0 */\ +sqr_n_mul(t[0], t[0], 5, t[4]); /* 415: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55ffff58a9ffffdcff7ff */\ +/* sqr(t[0], t[0]); */ /* 416: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9feffe */\ +/* sqr(t[0], t[0]); */ /* 417: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd62a7ffff73fdffc */\ +/* sqr(t[0], t[0]); */ /* 418: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aaffffac54ffffee7fbff8 */\ +/* sqr(t[0], t[0]); */ /* 419: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55ffff58a9ffffdcff7ff0 */\ +/* sqr(t[0], t[0]); */ /* 420: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9feffe0 */\ +sqr_n_mul(t[0], t[0], 5, t[4]); /* 421: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9feffff */\ +/* sqr(t[0], t[0]); */ /* 422: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd62a7ffff73fdfffe */\ +/* sqr(t[0], t[0]); */ /* 423: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aaffffac54ffffee7fbfffc */\ +/* sqr(t[0], t[0]); */ /* 424: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55ffff58a9ffffdcff7fff8 */\ +/* sqr(t[0], t[0]); */ /* 425: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9feffff0 */\ +/* sqr(t[0], t[0]); */ /* 426: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd62a7ffff73fdfffe0 */\ +sqr_n_mul(t[0], t[0], 5, t[4]); /* 427: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd62a7ffff73fdfffff */\ +/* sqr(t[0], t[0]); */ /* 428: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aaffffac54ffffee7fbffffe */\ +/* sqr(t[0], t[0]); */ /* 429: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55ffff58a9ffffdcff7ffffc */\ +/* sqr(t[0], t[0]); */ /* 430: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9fefffff8 */\ +/* sqr(t[0], t[0]); */ /* 431: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd62a7ffff73fdfffff0 */\ +/* sqr(t[0], t[0]); */ /* 432: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aaffffac54ffffee7fbffffe0 */\ +sqr_n_mul(t[0], t[0], 5, t[4]); /* 433: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aaffffac54ffffee7fbffffff */\ +/* sqr(t[0], t[0]); */ /* 434: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55ffff58a9ffffdcff7fffffe */\ +/* sqr(t[0], t[0]); */ /* 435: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9feffffffc */\ +/* sqr(t[0], t[0]); */ /* 436: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd62a7ffff73fdffffff8 */\ +/* sqr(t[0], t[0]); */ /* 437: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aaffffac54ffffee7fbffffff0 */\ +/* sqr(t[0], t[0]); */ /* 438: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55ffff58a9ffffdcff7fffffe0 */\ +sqr_n_mul(t[0], t[0], 5, t[4]); /* 439: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55ffff58a9ffffdcff7fffffff */\ +/* sqr(t[0], t[0]); */ /* 440: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9fefffffffe */\ +/* sqr(t[0], t[0]); */ /* 441: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd62a7ffff73fdfffffffc */\ +/* sqr(t[0], t[0]); */ /* 442: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aaffffac54ffffee7fbfffffff8 */\ +/* sqr(t[0], t[0]); */ /* 443: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55ffff58a9ffffdcff7fffffff0 */\ +sqr_n_mul(t[0], t[0], 4, t[3]); /* 444: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55ffff58a9ffffdcff7fffffffd */\ +/* sqr(t[0], t[0]); */ /* 445: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9feffffffffa */\ +/* sqr(t[0], t[0]); */ /* 446: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd62a7ffff73fdffffffff4 */\ +/* sqr(t[0], t[0]); */ /* 447: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aaffffac54ffffee7fbfffffffe8 */\ +/* sqr(t[0], t[0]); */ /* 448: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55ffff58a9ffffdcff7fffffffd0 */\ +/* sqr(t[0], t[0]); */ /* 449: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9feffffffffa0 */\ +/* sqr(t[0], t[0]); */ /* 450: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd62a7ffff73fdffffffff40 */\ +sqr_n_mul(t[0], t[0], 6, t[2]); /* 451: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd62a7ffff73fdffffffff55 */\ +/* sqr(t[0], t[0]); */ /* 452: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aaffffac54ffffee7fbfffffffeaa */\ +/* sqr(t[0], t[0]); */ /* 453: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55ffff58a9ffffdcff7fffffffd54 */\ +/* sqr(t[0], t[0]); */ /* 454: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9feffffffffaa8 */\ +/* sqr(t[0], t[0]); */ /* 455: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd62a7ffff73fdffffffff550 */\ +sqr_n_mul(t[0], t[0], 4, t[1]); /* 456: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd62a7ffff73fdffffffff555 */\ +sqr(out, t[0]); /* 457: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aaffffac54ffffee7fbfffffffeaaa */\ +} while(0) diff --git a/crypto/blst_src/sqrt.c b/crypto/blst_src/sqrt.c new file mode 100644 index 00000000000..cf149fd1124 --- /dev/null +++ b/crypto/blst_src/sqrt.c @@ -0,0 +1,261 @@ +/* + * Copyright Supranational LLC + * Licensed under the Apache License, Version 2.0, see LICENSE for details. + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "fields.h" + +#ifdef __OPTIMIZE_SIZE__ +static void recip_sqrt_fp_3mod4(vec384 out, const vec384 inp) +{ + static const byte BLS_12_381_P_minus_3_div_4[] = { + TO_BYTES(0xee7fbfffffffeaaa), TO_BYTES(0x07aaffffac54ffff), + TO_BYTES(0xd9cc34a83dac3d89), TO_BYTES(0xd91dd2e13ce144af), + TO_BYTES(0x92c6e9ed90d2eb35), TO_BYTES(0x0680447a8e5ff9a6) + }; + + exp_mont_384(out, inp, BLS_12_381_P_minus_3_div_4, 379, BLS12_381_P, p0); +} +#else +# if 1 +/* + * "383"-bit variant omits full reductions at the ends of squarings, + * which results in up to ~15% improvement. [One can improve further + * by omitting full reductions even after multiplications and + * performing final reduction at the very end of the chain.] + */ +static inline void sqr_n_mul_fp(vec384 out, const vec384 a, size_t count, + const vec384 b) +{ sqr_n_mul_mont_383(out, a, count, BLS12_381_P, p0, b); } +# else +static void sqr_n_mul_fp(vec384 out, const vec384 a, size_t count, + const vec384 b) +{ + while(count--) { + sqr_fp(out, a); + a = out; + } + mul_fp(out, out, b); +} +# endif + +# define sqr(ret,a) sqr_fp(ret,a) +# define mul(ret,a,b) mul_fp(ret,a,b) +# define sqr_n_mul(ret,a,n,b) sqr_n_mul_fp(ret,a,n,b) + +# include "sqrt-addchain.h" +static void recip_sqrt_fp_3mod4(vec384 out, const vec384 inp) +{ + RECIP_SQRT_MOD_BLS12_381_P(out, inp, vec384); +} +# undef RECIP_SQRT_MOD_BLS12_381_P + +# undef sqr_n_mul +# undef sqr +# undef mul +#endif + +static bool_t recip_sqrt_fp(vec384 out, const vec384 inp) +{ + vec384 t0, t1; + bool_t ret; + + recip_sqrt_fp_3mod4(t0, inp); + + mul_fp(t1, t0, inp); + sqr_fp(t1, t1); + ret = vec_is_equal(t1, inp, sizeof(t1)); + vec_copy(out, t0, sizeof(t0)); + + return ret; +} + +static bool_t sqrt_fp(vec384 out, const vec384 inp) +{ + vec384 t0, t1; + bool_t ret; + + recip_sqrt_fp_3mod4(t0, inp); + + mul_fp(t0, t0, inp); + sqr_fp(t1, t0); + ret = vec_is_equal(t1, inp, sizeof(t1)); + vec_copy(out, t0, sizeof(t0)); + + return ret; +} + +int blst_fp_sqrt(vec384 out, const vec384 inp) +{ return (int)sqrt_fp(out, inp); } + +int blst_fp_is_square(const vec384 inp) +{ + return (int)ct_is_square_mod_384(inp, BLS12_381_P); +} + +static bool_t sqrt_align_fp2(vec384x out, const vec384x ret, + const vec384x sqrt, const vec384x inp) +{ + static const vec384x sqrt_minus_1 = { { 0 }, { ONE_MONT_P } }; + static const vec384x sqrt_sqrt_minus_1 = { + /* + * "magic" number is ±2^((p-3)/4)%p, which is "1/sqrt(2)", + * in quotes because 2*"1/sqrt(2)"^2 == -1 mod p, not 1, + * but it pivots into "complex" plane nevertheless... + */ + { TO_LIMB_T(0x3e2f585da55c9ad1), TO_LIMB_T(0x4294213d86c18183), + TO_LIMB_T(0x382844c88b623732), TO_LIMB_T(0x92ad2afd19103e18), + TO_LIMB_T(0x1d794e4fac7cf0b9), TO_LIMB_T(0x0bd592fc7d825ec8) }, + { TO_LIMB_T(0x7bcfa7a25aa30fda), TO_LIMB_T(0xdc17dec12a927e7c), + TO_LIMB_T(0x2f088dd86b4ebef1), TO_LIMB_T(0xd1ca2087da74d4a7), + TO_LIMB_T(0x2da2596696cebc1d), TO_LIMB_T(0x0e2b7eedbbfd87d2) } + }; + static const vec384x sqrt_minus_sqrt_minus_1 = { + { TO_LIMB_T(0x7bcfa7a25aa30fda), TO_LIMB_T(0xdc17dec12a927e7c), + TO_LIMB_T(0x2f088dd86b4ebef1), TO_LIMB_T(0xd1ca2087da74d4a7), + TO_LIMB_T(0x2da2596696cebc1d), TO_LIMB_T(0x0e2b7eedbbfd87d2) }, + { TO_LIMB_T(0x7bcfa7a25aa30fda), TO_LIMB_T(0xdc17dec12a927e7c), + TO_LIMB_T(0x2f088dd86b4ebef1), TO_LIMB_T(0xd1ca2087da74d4a7), + TO_LIMB_T(0x2da2596696cebc1d), TO_LIMB_T(0x0e2b7eedbbfd87d2) } + }; + vec384x coeff, t0, t1; + bool_t is_sqrt, flag; + + /* + * Instead of multiple trial squarings we can perform just one + * and see if the result is "rotated by multiple of 90°" in + * relation to |inp|, and "rotate" |ret| accordingly. + */ + sqr_fp2(t0, sqrt); + /* "sqrt(|inp|)"^2 = (a + b*i)^2 = (a^2-b^2) + 2ab*i */ + + /* (a^2-b^2) + 2ab*i == |inp| ? |ret| is spot on */ + sub_fp2(t1, t0, inp); + is_sqrt = vec_is_zero(t1, sizeof(t1)); + vec_copy(coeff, BLS12_381_Rx.p2, sizeof(coeff)); + + /* -(a^2-b^2) - 2ab*i == |inp| ? "rotate |ret| by 90°" */ + add_fp2(t1, t0, inp); + vec_select(coeff, sqrt_minus_1, coeff, sizeof(coeff), + flag = vec_is_zero(t1, sizeof(t1))); + is_sqrt |= flag; + + /* 2ab - (a^2-b^2)*i == |inp| ? "rotate |ret| by 135°" */ + sub_fp(t1[0], t0[0], inp[1]); + add_fp(t1[1], t0[1], inp[0]); + vec_select(coeff, sqrt_sqrt_minus_1, coeff, sizeof(coeff), + flag = vec_is_zero(t1, sizeof(t1))); + is_sqrt |= flag; + + /* -2ab + (a^2-b^2)*i == |inp| ? "rotate |ret| by 45°" */ + add_fp(t1[0], t0[0], inp[1]); + sub_fp(t1[1], t0[1], inp[0]); + vec_select(coeff, sqrt_minus_sqrt_minus_1, coeff, sizeof(coeff), + flag = vec_is_zero(t1, sizeof(t1))); + is_sqrt |= flag; + + /* actual "rotation" */ + mul_fp2(out, ret, coeff); + + return is_sqrt; +} + +/* + * |inp| = a + b*i + */ +static bool_t recip_sqrt_fp2(vec384x out, const vec384x inp, + const vec384x recip_ZZZ, + const vec384x magic_ZZZ) +{ + vec384 aa, bb, cc; + vec384x inp_; + bool_t is_sqrt; + + sqr_fp(aa, inp[0]); + sqr_fp(bb, inp[1]); + add_fp(aa, aa, bb); + + is_sqrt = recip_sqrt_fp(cc, aa); /* 1/sqrt(a²+b²) */ + + /* if |inp| doesn't have quadratic residue, multiply by "1/Z³" ... */ + mul_fp2(inp_, inp, recip_ZZZ); + /* ... and adjust |aa| and |cc| accordingly */ + { + vec384 za, zc; + + mul_fp(za, aa, magic_ZZZ[0]); /* aa*(za² + zb²) */ + mul_fp(zc, cc, magic_ZZZ[1]); /* cc*(za² + zb²)^((p-3)/4) */ + vec_select(aa, aa, za, sizeof(aa), is_sqrt); + vec_select(cc, cc, zc, sizeof(cc), is_sqrt); + } + vec_select(inp_, inp, inp_, sizeof(inp_), is_sqrt); + + mul_fp(aa, aa, cc); /* sqrt(a²+b²) */ + + sub_fp(bb, inp_[0], aa); + add_fp(aa, inp_[0], aa); + vec_select(aa, bb, aa, sizeof(aa), vec_is_zero(aa, sizeof(aa))); + div_by_2_fp(aa, aa); /* (a ± sqrt(a²+b²))/2 */ + + /* if it says "no sqrt," final "align" will find right one... */ + (void)recip_sqrt_fp(out[0], aa); /* 1/sqrt((a ± sqrt(a²+b²))/2) */ + + div_by_2_fp(out[1], inp_[1]); + mul_fp(out[1], out[1], out[0]); /* b/(2*sqrt((a ± sqrt(a²+b²))/2)) */ + mul_fp(out[0], out[0], aa); /* sqrt((a ± sqrt(a²+b²))/2) */ + + /* bound to succeed */ + (void)sqrt_align_fp2(out, out, out, inp_); + + mul_fp(out[0], out[0], cc); /* inverse the result */ + mul_fp(out[1], out[1], cc); + neg_fp(out[1], out[1]); + + return is_sqrt; +} + +static bool_t sqrt_fp2(vec384x out, const vec384x inp) +{ + vec384x ret; + vec384 aa, bb; + + sqr_fp(aa, inp[0]); + sqr_fp(bb, inp[1]); + add_fp(aa, aa, bb); + + /* don't pay attention to return value, final "align" will tell... */ + (void)sqrt_fp(aa, aa); /* sqrt(a²+b²) */ + + sub_fp(bb, inp[0], aa); + add_fp(aa, inp[0], aa); + vec_select(aa, bb, aa, sizeof(aa), vec_is_zero(aa, sizeof(aa))); + div_by_2_fp(aa, aa); /* (a ± sqrt(a²+b²))/2 */ + + /* if it says "no sqrt," final "align" will find right one... */ + (void)recip_sqrt_fp(ret[0], aa); /* 1/sqrt((a ± sqrt(a²+b²))/2) */ + + div_by_2_fp(ret[1], inp[1]); + mul_fp(ret[1], ret[1], ret[0]); /* b/(2*sqrt((a ± sqrt(a²+b²))/2)) */ + mul_fp(ret[0], ret[0], aa); /* sqrt((a ± sqrt(a²+b²))/2) */ + + /* + * Now see if |ret| is or can be made sqrt(|inp|)... + */ + + return sqrt_align_fp2(out, ret, ret, inp); +} + +int blst_fp2_sqrt(vec384x out, const vec384x inp) +{ return (int)sqrt_fp2(out, inp); } + +int blst_fp2_is_square(const vec384x inp) +{ + vec384 aa, bb; + + sqr_fp(aa, inp[0]); + sqr_fp(bb, inp[1]); + add_fp(aa, aa, bb); + + return (int)ct_is_square_mod_384(aa, BLS12_381_P); +} diff --git a/crypto/blst_src/vect.c b/crypto/blst_src/vect.c new file mode 100644 index 00000000000..1834a48fadd --- /dev/null +++ b/crypto/blst_src/vect.c @@ -0,0 +1,176 @@ +/* + * Copyright Supranational LLC + * Licensed under the Apache License, Version 2.0, see LICENSE for details. + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "vect.h" + +#ifdef __BLST_NO_ASM__ +# include "no_asm.h" +#endif + +/* + * Following are some reference C implementations to assist new + * assembly modules development, as starting-point stand-ins and for + * cross-checking. In order to "polyfil" specific subroutine redefine + * it on compiler command line, e.g. -Dmul_mont_384x=_mul_mont_384x. + */ + +#ifdef lshift_mod_384 +inline void lshift_mod_384(vec384 ret, const vec384 a, size_t n, + const vec384 mod) +{ + while(n--) + add_mod_384(ret, a, a, mod), a = ret; +} +#endif + +#ifdef mul_by_8_mod_384 +inline void mul_by_8_mod_384(vec384 ret, const vec384 a, const vec384 mod) +{ lshift_mod_384(ret, a, 3, mod); } +#endif + +#ifdef mul_by_3_mod_384 +inline void mul_by_3_mod_384(vec384 ret, const vec384 a, const vec384 mod) +{ + vec384 t; + + add_mod_384(t, a, a, mod); + add_mod_384(ret, t, a, mod); +} +#endif + +#ifdef mul_by_3_mod_384x +inline void mul_by_3_mod_384x(vec384x ret, const vec384x a, const vec384 mod) +{ + mul_by_3_mod_384(ret[0], a[0], mod); + mul_by_3_mod_384(ret[1], a[1], mod); +} +#endif + +#ifdef mul_by_8_mod_384x +inline void mul_by_8_mod_384x(vec384x ret, const vec384x a, const vec384 mod) +{ + mul_by_8_mod_384(ret[0], a[0], mod); + mul_by_8_mod_384(ret[1], a[1], mod); +} +#endif + +#ifdef mul_by_1_plus_i_mod_384x +inline void mul_by_1_plus_i_mod_384x(vec384x ret, const vec384x a, + const vec384 mod) +{ + vec384 t; + + add_mod_384(t, a[0], a[1], mod); + sub_mod_384(ret[0], a[0], a[1], mod); + vec_copy(ret[1], t, sizeof(t)); +} +#endif + +#ifdef add_mod_384x +inline void add_mod_384x(vec384x ret, const vec384x a, const vec384x b, + const vec384 mod) +{ + add_mod_384(ret[0], a[0], b[0], mod); + add_mod_384(ret[1], a[1], b[1], mod); +} +#endif + +#ifdef sub_mod_384x +inline void sub_mod_384x(vec384x ret, const vec384x a, const vec384x b, + const vec384 mod) +{ + sub_mod_384(ret[0], a[0], b[0], mod); + sub_mod_384(ret[1], a[1], b[1], mod); +} +#endif + +#ifdef lshift_mod_384x +inline void lshift_mod_384x(vec384x ret, const vec384x a, size_t n, + const vec384 mod) +{ + lshift_mod_384(ret[0], a[0], n, mod); + lshift_mod_384(ret[1], a[1], n, mod); +} +#endif + +#if defined(mul_mont_384x) && !(defined(__ADX__) && !defined(__BLST_PORTABLE__)) +void mul_mont_384x(vec384x ret, const vec384x a, const vec384x b, + const vec384 mod, limb_t n0) +{ + vec768 t0, t1, t2; + vec384 aa, bb; + + mul_384(t0, a[0], b[0]); + mul_384(t1, a[1], b[1]); + + add_mod_384(aa, a[0], a[1], mod); + add_mod_384(bb, b[0], b[1], mod); + mul_384(t2, aa, bb); + sub_mod_384x384(t2, t2, t0, mod); + sub_mod_384x384(t2, t2, t1, mod); + + sub_mod_384x384(t0, t0, t1, mod); + + redc_mont_384(ret[0], t0, mod, n0); + redc_mont_384(ret[1], t2, mod, n0); +} +#endif + +#if defined(sqr_mont_384x) && !(defined(__ADX__) && !defined(__BLST_PORTABLE__)) +void sqr_mont_384x(vec384x ret, const vec384x a, const vec384 mod, limb_t n0) +{ + vec384 t0, t1; + + add_mod_384(t0, a[0], a[1], mod); + sub_mod_384(t1, a[0], a[1], mod); + + mul_mont_384(ret[1], a[0], a[1], mod, n0); + add_mod_384(ret[1], ret[1], ret[1], mod); + + mul_mont_384(ret[0], t0, t1, mod, n0); +} +#endif + +limb_t div_3_limbs(const limb_t dividend_top[2], limb_t d_lo, limb_t d_hi); +limb_t quot_rem_128(limb_t *quot_rem, const limb_t *divisor, limb_t quotient); +limb_t quot_rem_64(limb_t *quot_rem, const limb_t *divisor, limb_t quotient); + +/* + * Divide 255-bit |val| by z^2 yielding 128-bit quotient and remainder in place. + */ +static void div_by_zz(limb_t val[]) +{ + static const limb_t zz[] = { TO_LIMB_T(0x0000000100000000), + TO_LIMB_T(0xac45a4010001a402) }; + size_t loop, zz_len = sizeof(zz)/sizeof(zz[0]); + limb_t d_lo, d_hi; + + d_lo = zz[zz_len - 2]; + d_hi = zz[zz_len - 1]; + for (loop = zz_len, zz_len--; loop--;) { + limb_t q = div_3_limbs(val + loop + zz_len, d_lo, d_hi); + (void)quot_rem_128(val + loop, zz, q); + } + /* remainder is in low half of val[], quotient is in high */ +} + +/* + * Divide 128-bit |val| by z yielding 64-bit quotient and remainder in place. + */ +static void div_by_z(limb_t val[]) +{ + static const limb_t z[] = { TO_LIMB_T(0xd201000000010000) }; + size_t loop, z_len = sizeof(z)/sizeof(z[0]); + limb_t d_lo, d_hi; + + d_lo = (sizeof(z) == sizeof(limb_t)) ? 0 : z[z_len - 2]; + d_hi = z[z_len - 1]; + for (loop = z_len, z_len--; loop--;) { + limb_t q = div_3_limbs(val + loop + z_len, d_lo, d_hi); + (void)quot_rem_64(val + loop, z, q); + } + /* remainder is in low half of val[], quotient is in high */ +} diff --git a/crypto/blst_src/vect.h b/crypto/blst_src/vect.h new file mode 100644 index 00000000000..554dd5daefc --- /dev/null +++ b/crypto/blst_src/vect.h @@ -0,0 +1,418 @@ +/* + * Copyright Supranational LLC + * Licensed under the Apache License, Version 2.0, see LICENSE for details. + * SPDX-License-Identifier: Apache-2.0 + */ +#ifndef __BLS12_381_ASM_VECT_H__ +#define __BLS12_381_ASM_VECT_H__ + +#include + +#if defined(__x86_64__) || defined(__aarch64__) +/* These are available even in ILP32 flavours, but even then they are + * capable of performing 64-bit operations as efficiently as in *P64. */ +typedef unsigned long long limb_t; +# define LIMB_T_BITS 64 + +#elif defined(_WIN64) /* Win64 is P64 */ +typedef unsigned __int64 limb_t; +# define LIMB_T_BITS 64 + +#elif defined(__BLST_NO_ASM__) || defined(__wasm64__) +typedef unsigned int limb_t; +# define LIMB_T_BITS 32 +# ifndef __BLST_NO_ASM__ +# define __BLST_NO_ASM__ +# endif + +#else /* 32 bits on 32-bit platforms, 64 - on 64-bit */ +typedef unsigned long limb_t; +# ifdef _LP64 +# define LIMB_T_BITS 64 +# else +# define LIMB_T_BITS 32 +# define __BLST_NO_ASM__ +# endif +#endif + +/* + * Why isn't LIMB_T_BITS defined as 8*sizeof(limb_t)? Because pre-processor + * knows nothing about sizeof(anything)... + */ +#if LIMB_T_BITS == 64 +# define TO_LIMB_T(limb64) limb64 +#else +# define TO_LIMB_T(limb64) (limb_t)limb64,(limb_t)(limb64>>32) +#endif + +#define NLIMBS(bits) (bits/LIMB_T_BITS) + +typedef limb_t vec256[NLIMBS(256)]; +typedef limb_t vec512[NLIMBS(512)]; +typedef limb_t vec384[NLIMBS(384)]; +typedef limb_t vec768[NLIMBS(768)]; +typedef vec384 vec384x[2]; /* 0 is "real" part, 1 is "imaginary" */ + +typedef unsigned char byte; +#define TO_BYTES(limb64) (byte)limb64,(byte)(limb64>>8),\ + (byte)(limb64>>16),(byte)(limb64>>24),\ + (byte)(limb64>>32),(byte)(limb64>>40),\ + (byte)(limb64>>48),(byte)(limb64>>56) +typedef byte pow256[256/8]; + +/* + * Internal Boolean type, Boolean by value, hence safe to cast to or + * reinterpret as 'bool'. + */ +typedef limb_t bool_t; + +/* + * Assembly subroutines... + */ +#if defined(__ADX__) /* e.g. -march=broadwell */ && !defined(__BLST_PORTABLE__)\ + && !defined(__BLST_NO_ASM__) +# define mul_mont_sparse_256 mulx_mont_sparse_256 +# define sqr_mont_sparse_256 sqrx_mont_sparse_256 +# define from_mont_256 fromx_mont_256 +# define redc_mont_256 redcx_mont_256 +# define mul_mont_384 mulx_mont_384 +# define sqr_mont_384 sqrx_mont_384 +# define sqr_n_mul_mont_384 sqrx_n_mul_mont_384 +# define sqr_n_mul_mont_383 sqrx_n_mul_mont_383 +# define mul_384 mulx_384 +# define sqr_384 sqrx_384 +# define redc_mont_384 redcx_mont_384 +# define from_mont_384 fromx_mont_384 +# define sgn0_pty_mont_384 sgn0x_pty_mont_384 +# define sgn0_pty_mont_384x sgn0x_pty_mont_384x +# define ct_inverse_mod_383 ctx_inverse_mod_383 +#elif defined(__BLST_NO_ASM__) +# define ct_inverse_mod_383 ct_inverse_mod_384 +#endif + +void mul_mont_sparse_256(vec256 ret, const vec256 a, const vec256 b, + const vec256 p, limb_t n0); +void sqr_mont_sparse_256(vec256 ret, const vec256 a, const vec256 p, limb_t n0); +void redc_mont_256(vec256 ret, const vec512 a, const vec256 p, limb_t n0); +void from_mont_256(vec256 ret, const vec256 a, const vec256 p, limb_t n0); + +void add_mod_256(vec256 ret, const vec256 a, const vec256 b, const vec256 p); +void sub_mod_256(vec256 ret, const vec256 a, const vec256 b, const vec256 p); +void mul_by_3_mod_256(vec256 ret, const vec256 a, const vec256 p); +void cneg_mod_256(vec256 ret, const vec256 a, bool_t flag, const vec256 p); +void lshift_mod_256(vec256 ret, const vec256 a, size_t count, const vec256 p); +void rshift_mod_256(vec256 ret, const vec256 a, size_t count, const vec256 p); +bool_t eucl_inverse_mod_256(vec256 ret, const vec256 a, const vec256 p, + const vec256 one); +limb_t check_mod_256(const pow256 a, const vec256 p); +limb_t add_n_check_mod_256(pow256 ret, const pow256 a, const pow256 b, + const vec256 p); +limb_t sub_n_check_mod_256(pow256 ret, const pow256 a, const pow256 b, + const vec256 p); + +void vec_prefetch(const void *ptr, size_t len); + +void mul_mont_384(vec384 ret, const vec384 a, const vec384 b, + const vec384 p, limb_t n0); +void sqr_mont_384(vec384 ret, const vec384 a, const vec384 p, limb_t n0); +void sqr_n_mul_mont_384(vec384 ret, const vec384 a, size_t count, + const vec384 p, limb_t n0, const vec384 b); +void sqr_n_mul_mont_383(vec384 ret, const vec384 a, size_t count, + const vec384 p, limb_t n0, const vec384 b); + +void mul_384(vec768 ret, const vec384 a, const vec384 b); +void sqr_384(vec768 ret, const vec384 a); +void redc_mont_384(vec384 ret, const vec768 a, const vec384 p, limb_t n0); +void from_mont_384(vec384 ret, const vec384 a, const vec384 p, limb_t n0); +limb_t sgn0_pty_mont_384(const vec384 a, const vec384 p, limb_t n0); +limb_t sgn0_pty_mont_384x(const vec384x a, const vec384 p, limb_t n0); +limb_t sgn0_pty_mod_384(const vec384 a, const vec384 p); +limb_t sgn0_pty_mod_384x(const vec384x a, const vec384 p); + +void add_mod_384(vec384 ret, const vec384 a, const vec384 b, const vec384 p); +void sub_mod_384(vec384 ret, const vec384 a, const vec384 b, const vec384 p); +void mul_by_8_mod_384(vec384 ret, const vec384 a, const vec384 p); +void mul_by_3_mod_384(vec384 ret, const vec384 a, const vec384 p); +void cneg_mod_384(vec384 ret, const vec384 a, bool_t flag, const vec384 p); +void lshift_mod_384(vec384 ret, const vec384 a, size_t count, const vec384 p); +void rshift_mod_384(vec384 ret, const vec384 a, size_t count, const vec384 p); +void div_by_2_mod_384(vec384 ret, const vec384 a, const vec384 p); +void ct_inverse_mod_383(vec768 ret, const vec384 inp, const vec384 mod, + const vec384 modx); +void ct_inverse_mod_256(vec512 ret, const vec256 inp, const vec256 mod, + const vec256 modx); +bool_t ct_is_square_mod_384(const vec384 inp, const vec384 mod); + +#if defined(__ADX__) /* e.g. -march=broadwell */ && !defined(__BLST_PORTABLE__) +# define mul_mont_384x mulx_mont_384x +# define sqr_mont_384x sqrx_mont_384x +# define sqr_mont_382x sqrx_mont_382x +# define mul_382x mulx_382x +# define sqr_382x sqrx_382x +#endif + +void mul_mont_384x(vec384x ret, const vec384x a, const vec384x b, + const vec384 p, limb_t n0); +void sqr_mont_384x(vec384x ret, const vec384x a, const vec384 p, limb_t n0); +void sqr_mont_382x(vec384x ret, const vec384x a, const vec384 p, limb_t n0); +void mul_382x(vec768 ret[2], const vec384x a, const vec384x b, const vec384 p); +void sqr_382x(vec768 ret[2], const vec384x a, const vec384 p); + +void add_mod_384x(vec384x ret, const vec384x a, const vec384x b, + const vec384 p); +void sub_mod_384x(vec384x ret, const vec384x a, const vec384x b, + const vec384 p); +void mul_by_8_mod_384x(vec384x ret, const vec384x a, const vec384 p); +void mul_by_3_mod_384x(vec384x ret, const vec384x a, const vec384 p); +void mul_by_1_plus_i_mod_384x(vec384x ret, const vec384x a, const vec384 p); +void add_mod_384x384(vec768 ret, const vec768 a, const vec768 b, + const vec384 p); +void sub_mod_384x384(vec768 ret, const vec768 a, const vec768 b, + const vec384 p); + +/* + * C subroutines + */ +static void exp_mont_384(vec384 out, const vec384 inp, const byte *pow, + size_t pow_bits, const vec384 p, limb_t n0); +static void exp_mont_384x(vec384x out, const vec384x inp, const byte *pow, + size_t pow_bits, const vec384 p, limb_t n0); +static void div_by_zz(limb_t val[]); +static void div_by_z(limb_t val[]); + +#ifdef __UINTPTR_TYPE__ +typedef __UINTPTR_TYPE__ uptr_t; +#else +typedef const void *uptr_t; +#endif + +#if !defined(restrict) +# if !defined(__STDC_VERSION__) || __STDC_VERSION__<199901 +# if defined(__GNUC__) && __GNUC__>=2 +# define restrict __restrict__ +# elif defined(_MSC_VER) +# define restrict __restrict +# else +# define restrict +# endif +# endif +#endif + +#if !defined(inline) && !defined(__cplusplus) +# if !defined(__STDC_VERSION__) || __STDC_VERSION__<199901 +# if defined(__GNUC__) && __GNUC__>=2 +# define inline __inline__ +# elif defined(_MSC_VER) +# define inline __inline +# else +# define inline +# endif +# endif +#endif + +#if defined(__GNUC__) || defined(__clang__) +# define launder(var) __asm__ __volatile__("" : "+r"(var)) +#else +# define launder(var) +#endif + +static inline bool_t is_bit_set(const byte *v, size_t i) +{ + bool_t ret = (v[i/8] >> (i%8)) & 1; + launder(ret); + return ret; +} + +static inline bool_t byte_is_zero(unsigned char c) +{ + limb_t ret = ((limb_t)(c) - 1) >> (LIMB_T_BITS - 1); + launder(ret); + return ret; +} + +static inline bool_t bytes_are_zero(const unsigned char *a, size_t num) +{ + unsigned char acc; + size_t i; + + for (acc = 0, i = 0; i < num; i++) + acc |= a[i]; + + return byte_is_zero(acc); +} + +static inline void vec_cswap(void *restrict a, void *restrict b, size_t num, + bool_t cbit) +{ + limb_t ai, *ap = (limb_t *)a; + limb_t bi, *bp = (limb_t *)b; + limb_t xorm, mask; + size_t i; + + launder(cbit); + mask = (limb_t)0 - cbit; + + num /= sizeof(limb_t); + + for (i = 0; i < num; i++) { + xorm = ((ai = ap[i]) ^ (bi = bp[i])) & mask; + ap[i] = ai ^ xorm; + bp[i] = bi ^ xorm; + } +} + +/* ret = bit ? a : b */ +void vec_select_32(void *ret, const void *a, const void *b, bool_t sel_a); +void vec_select_48(void *ret, const void *a, const void *b, bool_t sel_a); +void vec_select_96(void *ret, const void *a, const void *b, bool_t sel_a); +void vec_select_144(void *ret, const void *a, const void *b, bool_t sel_a); +void vec_select_192(void *ret, const void *a, const void *b, bool_t sel_a); +void vec_select_288(void *ret, const void *a, const void *b, bool_t sel_a); +static inline void vec_select(void *ret, const void *a, const void *b, + size_t num, bool_t sel_a) +{ + launder(sel_a); +#ifndef __BLST_NO_ASM__ + if (num == 32) vec_select_32(ret, a, b, sel_a); + else if (num == 48) vec_select_48(ret, a, b, sel_a); + else if (num == 96) vec_select_96(ret, a, b, sel_a); + else if (num == 144) vec_select_144(ret, a, b, sel_a); + else if (num == 192) vec_select_192(ret, a, b, sel_a); + else if (num == 288) vec_select_288(ret, a, b, sel_a); +#else + if (0) ; +#endif + else { + limb_t bi; + volatile limb_t *rp = (limb_t *)ret; + const limb_t *ap = (const limb_t *)a; + const limb_t *bp = (const limb_t *)b; + limb_t xorm, mask = (limb_t)0 - sel_a; + size_t i; + + num /= sizeof(limb_t); + + for (i = 0; i < num; i++) { + xorm = (ap[i] ^ (bi = bp[i])) & mask; + rp[i] = bi ^ xorm; + } + } +} + +static inline bool_t is_zero(limb_t l) +{ + limb_t ret = (~l & (l - 1)) >> (LIMB_T_BITS - 1); + launder(ret); + return ret; +} + +static inline bool_t vec_is_zero(const void *a, size_t num) +{ + const limb_t *ap = (const limb_t *)a; + limb_t acc; + size_t i; + +#ifndef __BLST_NO_ASM__ + bool_t vec_is_zero_16x(const void *a, size_t num); + if ((num & 15) == 0) + return vec_is_zero_16x(a, num); +#endif + + num /= sizeof(limb_t); + + for (acc = 0, i = 0; i < num; i++) + acc |= ap[i]; + + return is_zero(acc); +} + +static inline bool_t vec_is_equal(const void *a, const void *b, size_t num) +{ + const limb_t *ap = (const limb_t *)a; + const limb_t *bp = (const limb_t *)b; + limb_t acc; + size_t i; + +#ifndef __BLST_NO_ASM__ + bool_t vec_is_equal_16x(const void *a, const void *b, size_t num); + if ((num & 15) == 0) + return vec_is_equal_16x(a, b, num); +#endif + + num /= sizeof(limb_t); + + for (acc = 0, i = 0; i < num; i++) + acc |= ap[i] ^ bp[i]; + + return is_zero(acc); +} + +static inline void cneg_mod_384x(vec384x ret, const vec384x a, bool_t flag, + const vec384 p) +{ + cneg_mod_384(ret[0], a[0], flag, p); + cneg_mod_384(ret[1], a[1], flag, p); +} + +static inline void vec_copy(void *restrict ret, const void *a, size_t num) +{ + limb_t *rp = (limb_t *)ret; + const limb_t *ap = (const limb_t *)a; + size_t i; + + num /= sizeof(limb_t); + + for (i = 0; i < num; i++) + rp[i] = ap[i]; +} + +static inline void vec_zero(void *ret, size_t num) +{ + volatile limb_t *rp = (volatile limb_t *)ret; + size_t i; + + num /= sizeof(limb_t); + + for (i = 0; i < num; i++) + rp[i] = 0; + +#if defined(__GNUC__) || defined(__clang__) + __asm__ __volatile__("" : : "r"(ret) : "memory"); +#endif +} + +/* + * Some compilers get arguably overzealous(*) when passing pointer to + * multi-dimensional array [such as vec384x] as 'const' argument. + * General direction seems to be to legitimize such constification, + * so it's argued that suppressing the warning is appropriate. + * + * (*) http://www.open-std.org/jtc1/sc22/wg14/www/docs/n1923.htm + */ +#if defined(__INTEL_COMPILER) +# pragma warning(disable:167) +# pragma warning(disable:556) +#elif defined(__GNUC__) && !defined(__clang__) +# pragma GCC diagnostic ignored "-Wpedantic" +#elif defined(_MSC_VER) +# pragma warning(disable: 4127 4189) +#endif + +#if !defined(__wasm__) && __STDC_HOSTED__-0 != 0 +# include +#endif + +#if defined(__GNUC__) +# ifndef alloca +# define alloca(s) __builtin_alloca(s) +# endif +#elif defined(__sun) +# include +#elif defined(_WIN32) +# include +# ifndef alloca +# define alloca(s) _alloca(s) +# endif +#endif + +#endif /* __BLS12_381_ASM_VECT_H__ */ diff --git a/crypto/build_dependency.sh b/crypto/build_dependency.sh deleted file mode 100644 index 4bfe99dbad2..00000000000 --- a/crypto/build_dependency.sh +++ /dev/null @@ -1,36 +0,0 @@ -#!/bin/bash - -set -euo pipefail - -PKG_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" -RELIC_DIR_NAME="relic" -RELIC_DIR="${PKG_DIR}/${RELIC_DIR_NAME}" - -# grant permissions if not existant -if [[ ! -r ${PKG_DIR} || ! -w ${PKG_DIR} || ! -x ${PKG_DIR} ]]; then - chmod -R 755 "${PKG_DIR}" -fi - -rm -rf "${RELIC_DIR}" - -# relic version or tag -relic_version="7d885d1ba34be61bf22190943a73549a910c1714" - -# clone a specific version of Relic without history if it's tagged. -# git -c http.sslVerify=true clone --branch $(relic_version) --single-branch --depth 1 https://github.com/relic-toolkit/relic.git ${RELIC_DIR_NAME} || { echo "git clone failed"; exit 1; } - -# clone all the history if the version is only defined by a commit hash. -git -c http.sslVerify=true clone --branch main --single-branch https://github.com/relic-toolkit/relic.git ${RELIC_DIR_NAME} || { echo "git clone failed"; exit 1; } - -if [ -d "${RELIC_DIR}" ] -then - ( - cd ${RELIC_DIR_NAME} || { echo "cd relic failed"; exit 1; } - git checkout $relic_version - ) - # build relic - bash relic_build.sh -else - { echo "couldn't find relic directory"; exit 1; } -fi - diff --git a/crypto/common.go b/crypto/common.go index f476de92e3f..b9e072c9930 100644 --- a/crypto/common.go +++ b/crypto/common.go @@ -8,9 +8,6 @@ import ( //revive:disable:var-naming -// the `go generate` command requires bash scripting, `cmake` and `git`. -//go:generate bash ./build_dependency.sh - const ( // Minimum targeted bits of security. // This is used as a reference but it doesn't mean all implemented primitives provide this minimum. @@ -21,9 +18,6 @@ const ( // it is still recommened that seed is generated using a secure RNG. KeyGenSeedMinLen = 2 * (securityBits / 8) KeyGenSeedMaxLen = 256 - - // max relic PRG seed length in bytes - maxRelicPrgSeed = 1 << 32 ) // TODO: update this code to make sure diff --git a/crypto/dkg.go b/crypto/dkg.go index 6e74f3d54a5..03305d016c7 100644 --- a/crypto/dkg.go +++ b/crypto/dkg.go @@ -22,7 +22,7 @@ import ( // Flow uses DKG with the value t = floor((n-1)/2) to optimize for unforgeability and robustness // of the threshold signature scheme using the output keys. // -// Private keys are scalar in Zr, where r is the group order of G1/G2. +// Private keys are scalar in Fr, where r is the group order of G1/G2. // Public keys are in G2. const ( @@ -34,9 +34,6 @@ const ( DKGMinSize int = MinimumThreshold + 1 // DKGMaxSize is the maximum size of a group participating in a DKG protocol DKGMaxSize int = 254 - // SeedMinLenDKG is the minumum seed length required to participate in a DKG protocol - SeedMinLenDKG = securityBits / 8 - SeedMaxLenDKG = maxRelicPrgSeed ) type DKGState interface { diff --git a/crypto/dkg_core.c b/crypto/dkg_core.c index 3a2bce01559..c8fee6917f6 100644 --- a/crypto/dkg_core.c +++ b/crypto/dkg_core.c @@ -1,127 +1,109 @@ -// +build relic - #include "dkg_include.h" - -#define N_max 250 -#define N_bits_max 8 // log(250) -#define T_max ((N_max-1)/2) - -// computes P(x) = a_0 + a_1*x + .. + a_n x^n (mod r) -// r being the order of G1 -// writes P(x) in out and P(x).g2 in y if y is non NULL -// x being a small integer -void Zr_polynomialImage_export(byte* out, ep2_t y, const bn_st* a, const int a_size, const byte x){ - bn_t image; - bn_new(image); - Zr_polynomialImage(image, y, a, a_size, x); - // exports the result - const int out_size = Fr_BYTES; - bn_write_bin(out, out_size, image); - bn_free(image); +// computes P(x) = a_0 + a_1*x + .. + a_n x^n in F_r +// where `x` is a small integer (byte) and `degree` is P's degree n. +// P(x) is written in `out` and P(x).g2 is written in `y` if `y` is non NULL. +void Fr_polynomial_image_write(byte *out, E2 *y, const Fr *a, const int degree, + const byte x) { + Fr image; + Fr_polynomial_image(&image, y, a, degree, x); + // exports the result + Fr_write_bytes(out, &image); } -// computes P(x) = a_0 + a_1*x + .. + a_n x^n (mod r) -// r being the order of G1 -// writes P(x) in out and P(x).g2 in y if y is non NULL -// x being a small integer -void Zr_polynomialImage(bn_t image, ep2_t y, const bn_st *a, const int a_size, const byte x){ - bn_t r; - bn_new(r); - g2_get_ord(r); - - // temp variables - bn_t acc; - bn_new(acc); - bn_new_size(acc, BITS_TO_DIGITS(Fr_BITS+8+1)); - bn_set_dig(acc, 0); - - for (int i=a_size-1; i >= 0; i--) { - bn_mul_dig(acc, acc, x); - // Use basic reduction as it's an 9-bits reduction - // in the worst case (|acc|<|r|+9 ) - bn_mod_basic(acc, acc, r); - bn_add(acc, acc, &a[i]); - } - // export the result - bn_mod_basic(image, acc, r); - - // compute y = P(x).g2 - if (y) g2_mul_gen(y, acc); +// computes P(x) = a_0 + a_1 * x + .. + a_n * x^n where P is in Fr[X]. +// a_i are all in Fr, `degree` is P's degree, x is a small integer less than +// `MAX_IND` (currently 255). +// The function writes P(x) in `image` and P(x).g2 in `y` if `y` is non NULL. +void Fr_polynomial_image(Fr *image, E2 *y, const Fr *a, const int degree, + const byte x) { + Fr_set_zero(image); + // convert `x` to Montgomery form + Fr xR; + Fr_set_limb(&xR, (limb_t)x); + Fr_to_montg(&xR, &xR); - bn_free(acc) - bn_free(r); + for (int i = degree; i >= 0; i--) { + Fr_mul_montg(image, image, &xR); + Fr_add(image, image, &a[i]); // image is in normal form + } + // compute y = P(x).g2 + if (y) { + G2_mult_gen(y, image); + } } // computes Q(x) = A_0 + A_1*x + ... + A_n*x^n in G2 -// and stores the point in y -// r is the order of G2 -static void G2_polynomialImage(ep2_t y, const ep2_st* A, const int len_A, - const byte x, const bn_t r){ - - bn_t bn_x; - bn_new(bn_x); - ep2_set_infty(y); - bn_set_dig(bn_x, x); - for (int i = len_A-1; i >= 0 ; i--) { - ep2_mul_lwnaf(y, y, bn_x); - ep2_add_projc(y, y, (ep2_st*)&A[i]); - } - - ep2_norm(y, y); // not necessary but left here to optimize the - // multiple pairing computations with the same public key - bn_free(bn_x); +// and stores the point in y. +// - A_i being G2 points +// - x being a small scalar (less than `MAX_IND`) +static void E2_polynomial_image(E2 *y, const E2 *A, const int degree, + const byte x) { + E2_set_infty(y); + for (int i = degree; i >= 0; i--) { + E2_mult_small_expo(y, y, x); + E2_add(y, y, &A[i]); + } } -// compute the participants public keys from the verification vector -// y[i] = Q(i+1) for all participants i, with: -// Q(x) = A_0 + A_1*x + ... + A_n*x^n in G2 -void G2_polynomialImages(ep2_st *y, const int len_y, const ep2_st* A, const int len_A) { - // order r - bn_t r; - bn_new(r); - g2_get_ord(r); - for (byte i=0; i 0 { + // genarate a_i on F_r, for 0 0 { - for i := 1; i < s.threshold; i++ { - C.bn_new_wrapper((*C.bn_st)(&s.a[i])) - randZr(&s.a[i]) - generatorScalarMultG2(&s.vA[i], &s.a[i]) - } - // non-zero a[t] to enforce the polynomial degree - randZrStar(&s.a[s.threshold]) - generatorScalarMultG2(&s.vA[s.threshold], &s.a[s.threshold]) + return fmt.Errorf("failed to generate random polynomial: %w", err) + } + + // compute the verification vector A_i = g2^a_i + s.vA = make([]pointE2, s.threshold+1) + for i := 0; i <= s.threshold; i++ { + generatorScalarMultG2(&s.vA[i], &s.a[i]) } // compute the shares @@ -287,17 +319,17 @@ func (s *feldmanVSSstate) generateShares(seed []byte) error { // the dealer's own share if i-1 == s.myIndex { xdata := make([]byte, shareSize) - zrPolynomialImage(xdata, s.a, i, &s.y[i-1]) - C.bn_read_bin((*C.bn_st)(&s.x), - (*C.uchar)(&xdata[0]), - PrKeyLenBLSBLS12381, - ) + frPolynomialImage(xdata, s.a, i, &s.y[i-1]) + err := readScalarFrStar(&s.x, xdata) + if err != nil { + return fmt.Errorf("unexpected error when generating the dealer's own share: %w", err) + } continue } // the-other-participant shares data := make([]byte, shareSize+1) data[0] = byte(feldmanVSSShare) - zrPolynomialImage(data[1:], s.a, i, &s.y[i-1]) + frPolynomialImage(data[1:], s.a, i, &s.y[i-1]) s.processor.PrivateSend(int(i-1), data) } // broadcast the vector @@ -350,13 +382,11 @@ func (s *feldmanVSSstate) receiveShare(origin index, data []byte) { } // read the participant private share - if C.bn_read_Zr_bin((*C.bn_st)(&s.x), - (*C.uchar)(&data[0]), - PrKeyLenBLSBLS12381, - ) != valid { + err := readScalarFrStar(&s.x, data) + if err != nil { s.validKey = false s.processor.FlagMisbehavior(int(origin), - fmt.Sprintf("invalid share value %x", data)) + fmt.Sprintf("invalid share value %x: %s", data, err)) return } @@ -365,9 +395,9 @@ func (s *feldmanVSSstate) receiveShare(origin index, data []byte) { } } -// receives the public vector from the +// receives the public vector from the dealer func (s *feldmanVSSstate) receiveVerifVector(origin index, data []byte) { - // only accept the verification vector from the . + // only accept the verification vector from the dealer. if origin != s.dealerIndex { return } @@ -387,7 +417,7 @@ func (s *feldmanVSSstate) receiveVerifVector(origin index, data []byte) { return } // read the verification vector - s.vA = make([]pointG2, s.threshold+1) + s.vA = make([]pointE2, s.threshold+1) err := readVerifVector(s.vA, data) if err != nil { s.vAReceived = true @@ -396,7 +426,7 @@ func (s *feldmanVSSstate) receiveVerifVector(origin index, data []byte) { fmt.Sprintf("reading the verification vector failed: %s", err)) } - s.y = make([]pointG2, s.size) + s.y = make([]pointE2, s.size) s.computePublicKeys() s.vAReceived = true @@ -405,44 +435,46 @@ func (s *feldmanVSSstate) receiveVerifVector(origin index, data []byte) { } } -// zrPolynomialImage computes P(x) = a_0 + a_1*x + .. + a_n*x^n (mod r) in Z/Zr +// frPolynomialImage computes P(x) = a_0 + a_1*x + .. + a_n*x^n (mod r) in Fr[X] // r being the order of G1 // P(x) is written in dest, while g2^P(x) is written in y // x being a small integer -func zrPolynomialImage(dest []byte, a []scalar, x index, y *pointG2) { - C.Zr_polynomialImage_export((*C.uchar)(&dest[0]), - (*C.ep2_st)(y), - (*C.bn_st)(&a[0]), (C.int)(len(a)), +func frPolynomialImage(dest []byte, a []scalar, x index, y *pointE2) { + C.Fr_polynomial_image_write((*C.uchar)(&dest[0]), + (*C.E2)(y), + (*C.Fr)(&a[0]), (C.int)(len(a)-1), (C.uint8_t)(x), ) } // writeVerifVector exports a vector A into an array of bytes // assuming the array length matches the vector length -func writeVerifVector(dest []byte, A []pointG2) { - C.ep2_vector_write_bin((*C.uchar)(&dest[0]), - (*C.ep2_st)(&A[0]), +func writeVerifVector(dest []byte, A []pointE2) { + C.E2_vector_write_bytes((*C.uchar)(&dest[0]), + (*C.E2)(&A[0]), (C.int)(len(A)), ) } -// readVerifVector imports A vector from an array of bytes, -// assuming the slice length matches the vector length -func readVerifVector(A []pointG2, src []byte) error { - read := C.ep2_vector_read_bin((*C.ep2_st)(&A[0]), +// readVerifVector imports A vector (G2 points) from an array of bytes, +// assuming the slice length matches the vector length. +func readVerifVector(A []pointE2, src []byte) error { + read := C.G2_vector_read_bytes( + (*C.E2)(&A[0]), (*C.uchar)(&src[0]), (C.int)(len(A))) if read == valid { return nil } // invalid A vector - return invalidInputsErrorf("the verifcation vector does not serialize G2 points") + return invalidInputsErrorf("the verification vector does not serialize valid G2 points: error code %d", read) } func (s *feldmanVSSstate) verifyShare() bool { // check y[current] == x.G2 - return C.verifyshare((*C.bn_st)(&s.x), - (*C.ep2_st)(&s.y[s.myIndex])) == 1 + return bool(C.G2_check_log( + (*C.Fr)(&s.x), + (*C.E2)(&s.y[s.myIndex]))) } // computePublicKeys extracts the participants public keys from the verification vector @@ -450,8 +482,8 @@ func (s *feldmanVSSstate) verifyShare() bool { // // Q(x) = A_0 + A_1*x + ... + A_n*x^n in G2 func (s *feldmanVSSstate) computePublicKeys() { - C.G2_polynomialImages( - (*C.ep2_st)(&s.y[0]), (C.int)(len(s.y)), - (*C.ep2_st)(&s.vA[0]), (C.int)(len(s.vA)), + C.E2_polynomial_images( + (*C.E2)(&s.y[0]), (C.int)(len(s.y)), + (*C.E2)(&s.vA[0]), (C.int)(len(s.vA)-1), ) } diff --git a/crypto/dkg_feldmanvssq.go b/crypto/dkg_feldmanvssq.go index 335ce6fc86d..c3aca992ee2 100644 --- a/crypto/dkg_feldmanvssq.go +++ b/crypto/dkg_feldmanvssq.go @@ -1,9 +1,5 @@ -//go:build relic -// +build relic - package crypto -// #cgo CFLAGS: -g -Wall -std=c99 // #include "dkg_include.h" import "C" @@ -27,7 +23,7 @@ import ( // a complaint answer. The protocol ends with all honest participants // reaching a consensus about the dealer qualification/disqualification. -// Private keys are scalar in Zr, where r is the group order of G1/G2 +// Private keys are scalar in Fr, where r is the group order of G1/G2 // Public keys are in G2. // feldman VSS protocol, with complaint mechanism, implements DKGState @@ -162,7 +158,7 @@ func (s *feldmanVSSQualState) End() (PrivateKey, PublicKey, []PublicKey, error) if c.received && !c.answerReceived { s.disqualified = true s.processor.Disqualify(int(s.dealerIndex), - fmt.Sprintf("complaint from %d was not answered", + fmt.Sprintf("complaint from (%d) was not answered", complainer)) break } @@ -204,9 +200,9 @@ func (s *feldmanVSSQualState) End() (PrivateKey, PublicKey, []PublicKey, error) return x, Y, y, nil } -const ( +var ( complaintSize = 1 - complaintAnswerSize = 1 + PrKeyLenBLSBLS12381 + complaintAnswerSize = 1 + frBytesLen ) // HandleBroadcastMsg processes a new broadcasted message received by the current participant. @@ -402,19 +398,17 @@ func (s *feldmanVSSQualState) receiveShare(origin index, data []byte) { return } // read the participant private share - if C.bn_read_Zr_bin((*C.bn_st)(&s.x), - (*C.uchar)(&data[0]), - PrKeyLenBLSBLS12381, - ) != valid { + err := readScalarFrStar(&s.x, data) + if err != nil { s.buildAndBroadcastComplaint() s.processor.FlagMisbehavior(int(origin), - fmt.Sprintf("invalid share value %x", data)) + fmt.Sprintf("invalid share value %x: %s", data, err)) return } if s.vAReceived { if !s.verifyShare() { - // otherwise, build a complaint + // build a complaint s.buildAndBroadcastComplaint() } } @@ -448,7 +442,7 @@ func (s *feldmanVSSQualState) receiveVerifVector(origin index, data []byte) { return } // read the verification vector - s.vA = make([]pointG2, s.threshold+1) + s.vA = make([]pointE2, s.threshold+1) err := readVerifVector(s.vA, data) if err != nil { s.disqualified = true @@ -457,7 +451,8 @@ func (s *feldmanVSSQualState) receiveVerifVector(origin index, data []byte) { return } - s.y = make([]pointG2, s.size) + s.y = make([]pointE2, s.size) + // compute all public keys s.computePublicKeys() // check the (already) registered complaints @@ -466,8 +461,8 @@ func (s *feldmanVSSQualState) receiveVerifVector(origin index, data []byte) { if s.checkComplaint(complainer, c) { s.disqualified = true s.processor.Disqualify(int(s.dealerIndex), - fmt.Sprintf("verification vector received: a complaint answer to %d is invalid", - complainer)) + fmt.Sprintf("verification vector received: a complaint answer to (%d) is invalid, answer is %s, computed key is %s", + complainer, &c.answer, &s.y[complainer])) return } } @@ -483,6 +478,14 @@ func (s *feldmanVSSQualState) receiveVerifVector(origin index, data []byte) { // build a complaint against the dealer, add it to the local // complaint map and broadcast it func (s *feldmanVSSQualState) buildAndBroadcastComplaint() { + var logMsg string + if s.vAReceived && s.xReceived { + logMsg = fmt.Sprintf("building a complaint, share is %s, computed public key is %s", + &s.x, &s.y[s.myIndex]) + } else { + logMsg = "building a complaint" + } + s.processor.FlagMisbehavior(int(s.dealerIndex), logMsg) s.complaints[s.myIndex] = &complaint{ received: true, answerReceived: false, @@ -497,7 +500,7 @@ func (s *feldmanVSSQualState) buildAndBroadcastComplaintAnswer(complainee index) data := make([]byte, complaintAnswerSize+1) data[0] = byte(feldmanVSSComplaintAnswer) data[1] = byte(complainee) - zrPolynomialImage(data[2:], s.a, complainee+1, nil) + frPolynomialImage(data[2:], s.a, complainee+1, nil) s.complaints[complainee].answerReceived = true s.processor.Broadcast(data) } @@ -507,8 +510,10 @@ func (s *feldmanVSSQualState) buildAndBroadcastComplaintAnswer(complainee index) // - true if the complaint answer is not correct func (s *feldmanVSSQualState) checkComplaint(complainer index, c *complaint) bool { // check y[complainer] == share.G2 - return C.verifyshare((*C.bn_st)(&c.answer), - (*C.ep2_st)(&s.y[complainer])) == 0 + isLog := C.G2_check_log( + (*C.Fr)(&c.answer), + (*C.E2)(&s.y[complainer])) + return !bool(isLog) } // data = |complainee| @@ -582,8 +587,8 @@ func (s *feldmanVSSQualState) receiveComplaint(origin index, data []byte) { s.disqualified = s.checkComplaint(origin, c) if s.disqualified { s.processor.Disqualify(int(s.dealerIndex), - fmt.Sprintf("complaint received: complaint answer to %d is invalid", - origin)) + fmt.Sprintf("complaint received: answer to (%d) is invalid, answer is %s, computed public key is %s", + origin, &c.answer, &s.y[origin])) } return } @@ -624,14 +629,11 @@ func (s *feldmanVSSQualState) receiveComplaintAnswer(origin index, data []byte) } // read the complainer private share - C.bn_new_wrapper((*C.bn_st)(&s.complaints[complainer].answer)) - if C.bn_read_Zr_bin((*C.bn_st)(&s.complaints[complainer].answer), - (*C.uchar)(&data[1]), - PrKeyLenBLSBLS12381, - ) != valid { + err := readScalarFrStar(&s.complaints[complainer].answer, data[1:]) + if err != nil { s.disqualified = true s.processor.Disqualify(int(s.dealerIndex), - fmt.Sprintf("invalid complaint answer value %x", data)) + fmt.Sprintf("invalid complaint answer value %x: %s", data, err)) return } return @@ -648,22 +650,19 @@ func (s *feldmanVSSQualState) receiveComplaintAnswer(origin index, data []byte) // flag check is a sanity check if c.received { // read the complainer private share - C.bn_new_wrapper((*C.bn_st)(&c.answer)) - if C.bn_read_Zr_bin((*C.bn_st)(&c.answer), - (*C.uchar)(&data[1]), - PrKeyLenBLSBLS12381, - ) != valid { + err := readScalarFrStar(&c.answer, data[1:]) + if err != nil { s.disqualified = true s.processor.Disqualify(int(s.dealerIndex), - fmt.Sprintf("invalid complaint answer value %x", data)) + fmt.Sprintf("invalid complaint answer value %x: %s", data, err)) return } if s.vAReceived { s.disqualified = s.checkComplaint(complainer, c) if s.disqualified { s.processor.Disqualify(int(s.dealerIndex), - fmt.Sprintf("complaint answer received: complaint answer to %d is invalid", - complainer)) + fmt.Sprintf("complaint answer received: answer to (%d) is invalid, answer is %s, computed key is %s", + complainer, &c.answer, &s.y[complainer])) } } diff --git a/crypto/dkg_include.h b/crypto/dkg_include.h index 5e518300071..02fb9a363f4 100644 --- a/crypto/dkg_include.h +++ b/crypto/dkg_include.h @@ -1,19 +1,15 @@ -// +build relic - -#ifndef _REL_DKG_INCLUDE_H -#define _REL_DKG_INCLUDE_H +#ifndef _DKG_INCLUDE_H +#define _DKG_INCLUDE_H #include "bls12381_utils.h" -// the highest index of a DKG participant -#define MAX_IND 255 -#define MAX_IND_BITS 8 - -void Zr_polynomialImage_export(byte* out, ep2_t y, const bn_st* a, const int a_size, const byte x); -void Zr_polynomialImage(bn_t out, ep2_t y, const bn_st* a, const int a_size, const byte x); -void G2_polynomialImages(ep2_st* y, const int len_y, const ep2_st* A, const int len_A); -void ep2_vector_write_bin(byte* out, const ep2_st* A, const int len); -int ep2_vector_read_bin(ep2_st* A, const byte* src, const int len); -int verifyshare(const bn_t x, const ep2_t y); +void Fr_polynomial_image_write(byte *out, E2 *y, const Fr *a, const int deg, + const byte x); +void Fr_polynomial_image(Fr *out, E2 *y, const Fr *a, const int deg, + const byte x); +void E2_polynomial_images(E2 *y, const int len_y, const E2 *A, const int deg); +void E2_vector_write_bytes(byte *out, const E2 *A, const int len); +ERROR G2_vector_read_bytes(E2 *A, const byte *src, const int len); +bool G2_check_log(const Fr *x, const E2 *y); #endif diff --git a/crypto/dkg_jointfeldman.go b/crypto/dkg_jointfeldman.go index 7b63f88e810..115730e33d9 100644 --- a/crypto/dkg_jointfeldman.go +++ b/crypto/dkg_jointfeldman.go @@ -1,10 +1,5 @@ -//go:build relic -// +build relic - package crypto -// #cgo CFLAGS: -g -Wall -std=c99 -// #cgo LDFLAGS: -L${SRCDIR}/relic/build/lib -l relic_s // #include "dkg_include.h" import "C" @@ -34,7 +29,7 @@ import ( // from the protocol, and the overall key is taking into account // all chunks from qualified dealers. -// Private keys are scalar in Zr, where r is the group order of G1/G2 +// Private keys are scalar in Fr, where r is the group order of G1/G2 // Public keys are in G2. // Joint Feldman protocol, with complaint mechanism, implements DKGState @@ -45,11 +40,11 @@ type JointFeldmanState struct { // feldmanVSSQualState parallel states fvss []feldmanVSSQualState // is the group public key - jointPublicKey pointG2 + jointPublicKey pointE2 // Private share of the current participant jointx scalar // Public keys of the group participants, the vector size is (n) - jointy []pointG2 + jointy []pointE2 } // NewJointFeldman creates a new instance of a Joint Feldman protocol. @@ -194,7 +189,7 @@ func (s *JointFeldmanState) End() (PrivateKey, PublicKey, []PublicKey, error) { if disqualifiedTotal > s.threshold || s.size-disqualifiedTotal <= s.threshold { return nil, nil, nil, dkgFailureErrorf( - "Joint-Feldman failed because the diqualified participants number is high: %d disqualified, threshold is %d, size is %d", + "Joint-Feldman failed because the disqualified participants number is high: %d disqualified, threshold is %d, size is %d", disqualifiedTotal, s.threshold, s.size) } @@ -298,34 +293,33 @@ func (s *JointFeldmanState) ForceDisqualify(participant int) error { } // sum up the 3 type of keys from all qualified dealers to end the protocol -func (s *JointFeldmanState) sumUpQualifiedKeys(qualified int) (*scalar, *pointG2, []pointG2) { +func (s *JointFeldmanState) sumUpQualifiedKeys(qualified int) (*scalar, *pointE2, []pointE2) { qualifiedx, qualifiedPubKey, qualifiedy := s.getQualifiedKeys(qualified) // sum up x var jointx scalar - C.bn_new_wrapper((*C.bn_st)(&jointx)) - C.bn_sum_vector((*C.bn_st)(&jointx), (*C.bn_st)(&qualifiedx[0]), + C.Fr_sum_vector((*C.Fr)(&jointx), (*C.Fr)(&qualifiedx[0]), (C.int)(qualified)) // sum up Y - var jointPublicKey pointG2 - C.ep2_sum_vector((*C.ep2_st)(&jointPublicKey), - (*C.ep2_st)(&qualifiedPubKey[0]), (C.int)(qualified)) + var jointPublicKey pointE2 + C.E2_sum_vector_to_affine((*C.E2)(&jointPublicKey), + (*C.E2)(&qualifiedPubKey[0]), (C.int)(qualified)) // sum up []y - jointy := make([]pointG2, s.size) + jointy := make([]pointE2, s.size) for i := 0; i < s.size; i++ { - C.ep2_sum_vector((*C.ep2_st)(&jointy[i]), - (*C.ep2_st)(&qualifiedy[i][0]), (C.int)(qualified)) + C.E2_sum_vector_to_affine((*C.E2)(&jointy[i]), + (*C.E2)(&qualifiedy[i][0]), (C.int)(qualified)) } return &jointx, &jointPublicKey, jointy } // get the 3 type of keys from all qualified dealers -func (s *JointFeldmanState) getQualifiedKeys(qualified int) ([]scalar, []pointG2, [][]pointG2) { +func (s *JointFeldmanState) getQualifiedKeys(qualified int) ([]scalar, []pointE2, [][]pointE2) { qualifiedx := make([]scalar, 0, qualified) - qualifiedPubKey := make([]pointG2, 0, qualified) - qualifiedy := make([][]pointG2, s.size) + qualifiedPubKey := make([]pointE2, 0, qualified) + qualifiedy := make([][]pointE2, s.size) for i := 0; i < s.size; i++ { - qualifiedy[i] = make([]pointG2, 0, qualified) + qualifiedy[i] = make([]pointE2, 0, qualified) } for i := 0; i < s.size; i++ { diff --git a/crypto/dkg_test.go b/crypto/dkg_test.go index 3cc1d172cca..2bd4dc51fa0 100644 --- a/crypto/dkg_test.go +++ b/crypto/dkg_test.go @@ -1,6 +1,3 @@ -//go:build relic -// +build relic - package crypto import ( @@ -292,7 +289,7 @@ func dkgCommonTest(t *testing.T, dkg int, n int, threshold int, test testCase) { // start DKG in all participants // start listening on the channels - seed := make([]byte, SeedMinLenDKG) + seed := make([]byte, KeyGenSeedMinLen) sync.Add(n) log.Info("DKG protocol starts") @@ -366,7 +363,6 @@ func dkgCommonTest(t *testing.T, dkg int, n int, threshold int, test testCase) { "2 group public keys are mismatching") } } - } // time after which a silent channel causes switching to the next dkg phase @@ -591,13 +587,12 @@ func (proc *testDKGProcessor) invalidShareSend(dest int, data []byte) { } } else { - gt.Logf("turns out to be a honest send\n%x\n", data) + gt.Logf("%d to %d: turns out to be a honest send\n%x\n", data, proc.current, dest) } // honest send case: this is the only message sent // malicious send case: this is a second correct send, to test the second message gets ignored // by the receiver (sender has been tagged malicious after the first send) proc.chans[dest] <- originalMsg - } // This is a testing function @@ -771,7 +766,7 @@ func TestDKGTransitionErrors(t *testing.T) { threshold := 3 myIndex := 0 dealer := 1 - seed := make([]byte, SeedMinLenDKG) + seed := make([]byte, KeyGenSeedMinLen) t.Run("feldman VSS", func(t *testing.T) { state, err := NewFeldmanVSS(n, threshold, myIndex, dummyTestDKGProcessor{}, dealer) diff --git a/crypto/ecdsa.go b/crypto/ecdsa.go index dca3604570a..b09d3d5922f 100644 --- a/crypto/ecdsa.go +++ b/crypto/ecdsa.go @@ -321,7 +321,7 @@ func (a *ecdsaAlgo) decodePublicKeyCompressed(pkBytes []byte) (PublicKey, error) return &pubKeyECDSA{a, goPubKey}, nil } -// prKeyECDSA is the private key of ECDSA, it implements the generic PrivateKey +// prKeyECDSA is the private key of ECDSA, it implements the interface PrivateKey type prKeyECDSA struct { // the signature algo alg *ecdsaAlgo @@ -331,6 +331,8 @@ type prKeyECDSA struct { pubKey *pubKeyECDSA } +var _ PrivateKey = (*prKeyECDSA)(nil) + // Algorithm returns the algo related to the private key func (sk *prKeyECDSA) Algorithm() SigningAlgorithm { return sk.alg.algo @@ -399,6 +401,8 @@ type pubKeyECDSA struct { goPubKey *ecdsa.PublicKey } +var _ PublicKey = (*pubKeyECDSA)(nil) + // Algorithm returns the the algo related to the private key func (pk *pubKeyECDSA) Algorithm() SigningAlgorithm { return pk.alg.algo diff --git a/crypto/ecdsa_test.go b/crypto/ecdsa_test.go index cf9a137e1e7..ed005a11e07 100644 --- a/crypto/ecdsa_test.go +++ b/crypto/ecdsa_test.go @@ -1,6 +1,3 @@ -//go:build !relic -// +build !relic - package crypto import ( @@ -160,7 +157,7 @@ func TestECDSAUtils(t *testing.T) { // TestScalarMult is a unit test of the scalar multiplication // This is only a sanity check meant to make sure the curve implemented // is checked against an independent test vector -func TestScalarMult(t *testing.T) { +func TestScalarMultP256_secp256k1(t *testing.T) { secp256k1 := secp256k1Instance.curve p256 := p256Instance.curve genericMultTests := []struct { diff --git a/crypto/go.mod b/crypto/go.mod index 9895e1c35db..d31f36cf023 100644 --- a/crypto/go.mod +++ b/crypto/go.mod @@ -6,7 +6,6 @@ require ( github.com/btcsuite/btcd/btcec/v2 v2.2.1 github.com/sirupsen/logrus v1.4.2 github.com/stretchr/testify v1.8.0 - github.com/supranational/blst v0.3.10 golang.org/x/crypto v0.0.0-20220622213112-05595931fe9d gonum.org/v1/gonum v0.6.1 pgregory.net/rapid v0.4.7 diff --git a/crypto/go.sum b/crypto/go.sum index 19a05d05d6d..820bb87a41c 100644 --- a/crypto/go.sum +++ b/crypto/go.sum @@ -28,8 +28,6 @@ github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXf github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.8.0 h1:pSgiaMZlXftHpm5L7V1+rVB+AZJydKsMxsQBIJw4PKk= github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= -github.com/supranational/blst v0.3.10 h1:CMciDZ/h4pXDDXQASe8ZGTNKUiVNxVVA5hpci2Uuhuk= -github.com/supranational/blst v0.3.10/go.mod h1:jZJtfjgudtNl4en1tzwPIV3KjUnQUvG3/j+w+fVonLw= golang.org/x/crypto v0.0.0-20220622213112-05595931fe9d h1:sK3txAijHtOK88l68nt020reeT1ZdKLIYetKl95FzVY= golang.org/x/crypto v0.0.0-20220622213112-05595931fe9d/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4= golang.org/x/exp v0.0.0-20180321215751-8460e604b9de/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= diff --git a/crypto/internal/blst/blst.go b/crypto/internal/blst/blst.go new file mode 100644 index 00000000000..c890f55e367 --- /dev/null +++ b/crypto/internal/blst/blst.go @@ -0,0 +1,3434 @@ +//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +// DO NOT EDIT THIS FILE!! +// The file is generated from *.tgo by generate.py +//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +/* + * Copyright Supranational LLC + * Licensed under the Apache License, Version 2.0, see LICENSE for details. + * SPDX-License-Identifier: Apache-2.0 + */ + +package blst + +// #cgo CFLAGS: -I${SRCDIR}/.. -I${SRCDIR}/../../build -I${SRCDIR}/../../src -D__BLST_CGO__ -fno-builtin-memcpy -fno-builtin-memset +// #cgo amd64 CFLAGS: -D__ADX__ -mno-avx +// #cgo mips64 mips64le ppc64 ppc64le riscv64 s390x CFLAGS: -D__BLST_NO_ASM__ +// #include "blst.h" +// +// #if defined(__x86_64__) && (defined(__unix__) || defined(__APPLE__)) +// # include +// # include +// static void handler(int signum) +// { ssize_t n = write(2, "Caught SIGILL in blst_cgo_init, " +// "consult /bindings/go/README.md.\n", 70); +// _exit(128+SIGILL); +// (void)n; +// } +// __attribute__((constructor)) static void blst_cgo_init() +// { blst_fp temp = { 0 }; +// struct sigaction act = { handler }, oact; +// sigaction(SIGILL, &act, &oact); +// blst_fp_sqr(&temp, &temp); +// sigaction(SIGILL, &oact, NULL); +// } +// #endif +// +// static size_t go_pairing_sizeof(size_t DST_len) +// { return (blst_pairing_sizeof() + DST_len + sizeof(blst_pairing) - 1) / +// sizeof(blst_pairing); +// } +// static void go_pairing_init(blst_pairing *new_ctx, bool hash_or_encode, +// const byte *DST, size_t DST_len) +// { if (DST != NULL) { +// byte *dst = (byte*)new_ctx + blst_pairing_sizeof(); +// for(size_t i = 0; i < DST_len; i++) dst[i] = DST[i]; +// DST = dst; +// } +// blst_pairing_init(new_ctx, hash_or_encode, DST, DST_len); +// } +// static void go_pairing_as_fp12(blst_fp12 *pt, blst_pairing *ctx) +// { *pt = *blst_pairing_as_fp12(ctx); } +// +// static void go_p1slice_to_affine(blst_p1_affine dst[], +// const blst_p1 points[], size_t npoints) +// { const blst_p1 *ppoints[2] = { points, NULL }; +// blst_p1s_to_affine(dst, ppoints, npoints); +// } +// static void go_p1slice_add(blst_p1 *dst, const blst_p1_affine points[], +// size_t npoints) +// { const blst_p1_affine *ppoints[2] = { points, NULL }; +// blst_p1s_add(dst, ppoints, npoints); +// } +// static void go_p2slice_to_affine(blst_p2_affine dst[], +// const blst_p2 points[], size_t npoints) +// { const blst_p2 *ppoints[2] = { points, NULL }; +// blst_p2s_to_affine(dst, ppoints, npoints); +// } +// static void go_p2slice_add(blst_p2 *dst, const blst_p2_affine points[], +// size_t npoints) +// { const blst_p2_affine *ppoints[2] = { points, NULL }; +// blst_p2s_add(dst, ppoints, npoints); +// } +// +// static void go_p1_mult_n_acc(blst_p1 *acc, const blst_fp *x, bool affine, +// const byte *scalar, size_t nbits) +// { blst_p1 m[1]; +// const void *p = x; +// if (p == NULL) +// p = blst_p1_generator(); +// else if (affine) +// blst_p1_from_affine(m, p), p = m; +// blst_p1_mult(m, p, scalar, nbits); +// blst_p1_add_or_double(acc, acc, m); +// } +// static void go_p2_mult_n_acc(blst_p2 *acc, const blst_fp2 *x, bool affine, +// const byte *scalar, size_t nbits) +// { blst_p2 m[1]; +// const void *p = x; +// if (p == NULL) +// p = blst_p2_generator(); +// else if (affine) +// blst_p2_from_affine(m, p), p = m; +// blst_p2_mult(m, p, scalar, nbits); +// blst_p2_add_or_double(acc, acc, m); +// } +// +// static void go_p1_sub_assign(blst_p1 *a, const blst_fp *x, bool affine) +// { blst_p1 minus_b; +// if (affine) +// blst_p1_from_affine(&minus_b, (const blst_p1_affine*)x); +// else +// minus_b = *(const blst_p1*)x; +// blst_p1_cneg(&minus_b, 1); +// blst_p1_add_or_double(a, a, &minus_b); +// } +// +// static void go_p2_sub_assign(blst_p2 *a, const blst_fp2 *x, bool affine) +// { blst_p2 minus_b; +// if (affine) +// blst_p2_from_affine(&minus_b, (const blst_p2_affine*)x); +// else +// minus_b = *(const blst_p2*)x; +// blst_p2_cneg(&minus_b, 1); +// blst_p2_add_or_double(a, a, &minus_b); +// } +// +// static bool go_scalar_from_bendian(blst_scalar *ret, const byte *in) +// { blst_scalar_from_bendian(ret, in); +// return blst_sk_check(ret); +// } +// static bool go_hash_to_scalar(blst_scalar *ret, +// const byte *msg, size_t msg_len, +// const byte *DST, size_t DST_len) +// { byte elem[48]; +// blst_expand_message_xmd(elem, sizeof(elem), msg, msg_len, DST, DST_len); +// return blst_scalar_from_be_bytes(ret, elem, sizeof(elem)); +// } +// static void go_miller_loop_n(blst_fp12 *dst, const blst_p2_affine Q[], +// const blst_p1_affine P[], +// size_t npoints, bool acc) +// { const blst_p2_affine *Qs[2] = { Q, NULL }; +// const blst_p1_affine *Ps[2] = { P, NULL }; +// if (acc) { +// blst_fp12 tmp; +// blst_miller_loop_n(&tmp, Qs, Ps, npoints); +// blst_fp12_mul(dst, dst, &tmp); +// } else { +// blst_miller_loop_n(dst, Qs, Ps, npoints); +// } +// } +// static void go_fp12slice_mul(blst_fp12 *dst, const blst_fp12 in[], size_t n) +// { size_t i; +// blst_fp12_mul(dst, &in[0], &in[1]); +// for (i = 2; i < n; i++) +// blst_fp12_mul(dst, dst, &in[i]); +// } +import "C" +import ( + "fmt" + "math/bits" + "runtime" + "sync" + "sync/atomic" +) + +const BLST_SCALAR_BYTES = 256 / 8 +const BLST_FP_BYTES = 384 / 8 +const BLST_P1_COMPRESS_BYTES = BLST_FP_BYTES +const BLST_P1_SERIALIZE_BYTES = BLST_FP_BYTES * 2 +const BLST_P2_COMPRESS_BYTES = BLST_FP_BYTES * 2 +const BLST_P2_SERIALIZE_BYTES = BLST_FP_BYTES * 4 + +type Scalar = C.blst_scalar +type Fp = C.blst_fp +type Fp2 = C.blst_fp2 +type Fp6 = C.blst_fp6 +type Fp12 = C.blst_fp12 +type P1 = C.blst_p1 +type P2 = C.blst_p2 +type P1Affine = C.blst_p1_affine +type P2Affine = C.blst_p2_affine +type Message = []byte +type Pairing = []C.blst_pairing +type SecretKey = Scalar +type P1s []P1 +type P2s []P2 +type P1Affines []P1Affine +type P2Affines []P2Affine + +// +// Configuration +// + +var maxProcs = initMaxProcs() + +func initMaxProcs() int { + maxProcs := runtime.GOMAXPROCS(0) + var version float32 + _, err := fmt.Sscanf(runtime.Version(), "go%f", &version) + if err != nil || version < 1.14 { + // be cooperative and leave one processor for the application + maxProcs -= 1 + } + if maxProcs <= 0 { + maxProcs = 1 + } + return maxProcs +} + +func SetMaxProcs(max int) { + if max <= 0 { + max = 1 + } + maxProcs = max +} + +// Secret key +func (sk *SecretKey) Zeroize() { + var zero SecretKey + *sk = zero +} + +func KeyGen(ikm []byte, optional ...[]byte) *SecretKey { + var sk SecretKey + var info []byte + var infoP *C.byte + if len(optional) > 0 { + info = optional[0] + if len(info) > 0 { + infoP = (*C.byte)(&info[0]) + } + } + if len(ikm) < 32 { + return nil + } + C.blst_keygen(&sk, (*C.byte)(&ikm[0]), C.size_t(len(ikm)), + infoP, C.size_t(len(info))) + // Postponing secret key zeroing till garbage collection can be too + // late to be effective, but every little bit helps... + runtime.SetFinalizer(&sk, func(sk *SecretKey) { sk.Zeroize() }) + return &sk +} + +func KeyGenV3(ikm []byte, optional ...[]byte) *SecretKey { + if len(ikm) < 32 { + return nil + } + var sk SecretKey + var info []byte + var infoP *C.byte + if len(optional) > 0 { + info = optional[0] + if len(info) > 0 { + infoP = (*C.byte)(&info[0]) + } + } + C.blst_keygen_v3(&sk, (*C.byte)(&ikm[0]), C.size_t(len(ikm)), + infoP, C.size_t(len(info))) + // Postponing secret key zeroing till garbage collection can be too + // late to be effective, but every little bit helps... + runtime.SetFinalizer(&sk, func(sk *SecretKey) { sk.Zeroize() }) + return &sk +} + +func KeyGenV45(ikm []byte, salt []byte, optional ...[]byte) *SecretKey { + if len(ikm) < 32 { + return nil + } + var sk SecretKey + var info []byte + var infoP *C.byte + if len(optional) > 0 { + info = optional[0] + if len(info) > 0 { + infoP = (*C.byte)(&info[0]) + } + } + C.blst_keygen_v4_5(&sk, (*C.byte)(&ikm[0]), C.size_t(len(ikm)), + (*C.byte)(&salt[0]), C.size_t(len(salt)), + infoP, C.size_t(len(info))) + // Postponing secret key zeroing till garbage collection can be too + // late to be effective, but every little bit helps... + runtime.SetFinalizer(&sk, func(sk *SecretKey) { sk.Zeroize() }) + return &sk +} + +func KeyGenV5(ikm []byte, salt []byte, optional ...[]byte) *SecretKey { + if len(ikm) < 32 { + return nil + } + var sk SecretKey + var info []byte + var infoP *C.byte + if len(optional) > 0 { + info = optional[0] + if len(info) > 0 { + infoP = (*C.byte)(&info[0]) + } + } + C.blst_keygen_v5(&sk, (*C.byte)(&ikm[0]), C.size_t(len(ikm)), + (*C.byte)(&salt[0]), C.size_t(len(salt)), + infoP, C.size_t(len(info))) + // Postponing secret key zeroing till garbage collection can be too + // late to be effective, but every little bit helps... + runtime.SetFinalizer(&sk, func(sk *SecretKey) { sk.Zeroize() }) + return &sk +} + +func DeriveMasterEip2333(ikm []byte) *SecretKey { + if len(ikm) < 32 { + return nil + } + var sk SecretKey + C.blst_derive_master_eip2333(&sk, (*C.byte)(&ikm[0]), C.size_t(len(ikm))) + // Postponing secret key zeroing till garbage collection can be too + // late to be effective, but every little bit helps... + runtime.SetFinalizer(&sk, func(sk *SecretKey) { sk.Zeroize() }) + return &sk +} + +func (master *SecretKey) DeriveChildEip2333(child_index uint32) *SecretKey { + var sk SecretKey + C.blst_derive_child_eip2333(&sk, master, C.uint(child_index)) + // Postponing secret key zeroing till garbage collection can be too + // late to be effective, but every little bit helps... + runtime.SetFinalizer(&sk, func(sk *SecretKey) { sk.Zeroize() }) + return &sk +} + +// Pairing +func PairingCtx(hash_or_encode bool, DST []byte) Pairing { + DST_len := C.size_t(len(DST)) + ctx := make([]C.blst_pairing, int(C.go_pairing_sizeof(DST_len))) + var uDST *C.byte + if DST_len > 0 { + uDST = (*C.byte)(&DST[0]) + } + C.go_pairing_init(&ctx[0], C.bool(hash_or_encode), uDST, DST_len) + return ctx +} + +func PairingCommit(ctx Pairing) { + C.blst_pairing_commit(&ctx[0]) +} + +func PairingMerge(ctx Pairing, ctx1 Pairing) int { + r := C.blst_pairing_merge(&ctx[0], &ctx1[0]) + return int(r) +} + +func PairingFinalVerify(ctx Pairing, optional ...*Fp12) bool { + var gtsig *Fp12 = nil + if len(optional) > 0 { + gtsig = optional[0] + } + return bool(C.blst_pairing_finalverify(&ctx[0], gtsig)) +} + +func PairingRawAggregate(ctx Pairing, q *P2Affine, p *P1Affine) { + C.blst_pairing_raw_aggregate(&ctx[0], q, p) +} + +func PairingAsFp12(ctx Pairing) *Fp12 { + var pt Fp12 + C.go_pairing_as_fp12(&pt, &ctx[0]) + return &pt +} + +func Fp12One() Fp12 { + return *C.blst_fp12_one() +} + +func Fp12FinalVerify(pt1 *Fp12, pt2 *Fp12) bool { + return bool(C.blst_fp12_finalverify(pt1, pt2)) +} + +func Fp12MillerLoop(q *P2Affine, p *P1Affine) *Fp12 { + var pt Fp12 + C.blst_miller_loop(&pt, q, p) + return &pt +} + +func Fp12MillerLoopN(qs []P2Affine, ps []P1Affine) *Fp12 { + if len(qs) != len(ps) || len(qs) == 0 { + panic("inputs' lengths mismatch") + } + + nElems := uint32(len(qs)) + nThreads := uint32(maxProcs) + + if nThreads == 1 || nElems == 1 { + var pt Fp12 + C.go_miller_loop_n(&pt, &qs[0], &ps[0], C.size_t(nElems), false) + return &pt + } + + stride := (nElems + nThreads - 1) / nThreads + if stride > 16 { + stride = 16 + } + + strides := (nElems + stride - 1) / stride + if nThreads > strides { + nThreads = strides + } + + msgsCh := make(chan Fp12, nThreads) + curElem := uint32(0) + + for tid := uint32(0); tid < nThreads; tid++ { + go func() { + acc := Fp12One() + first := true + for { + work := atomic.AddUint32(&curElem, stride) - stride + if work >= nElems { + break + } + n := nElems - work + if n > stride { + n = stride + } + C.go_miller_loop_n(&acc, &qs[work], &ps[work], C.size_t(n), + C.bool(!first)) + first = false + } + msgsCh <- acc + }() + } + + var ret = make([]Fp12, nThreads) + for i := range ret { + ret[i] = <-msgsCh + } + + var pt Fp12 + C.go_fp12slice_mul(&pt, &ret[0], C.size_t(nThreads)) + return &pt +} + +func (pt *Fp12) MulAssign(p *Fp12) { + C.blst_fp12_mul(pt, pt, p) +} + +func (pt *Fp12) FinalExp() { + C.blst_final_exp(pt, pt) +} + +func (pt *Fp12) InGroup() bool { + return bool(C.blst_fp12_in_group(pt)) +} + +func (pt *Fp12) ToBendian() []byte { + var out [BLST_FP_BYTES * 12]byte + C.blst_bendian_from_fp12((*C.byte)(&out[0]), pt) + return out[:] +} + +func (pt1 *Fp12) Equals(pt2 *Fp12) bool { + return *pt1 == *pt2 +} + +// +// MIN-PK +// + +// +// PublicKey +// + +func (pk *P1Affine) From(s *Scalar) *P1Affine { + C.blst_sk_to_pk2_in_g1(nil, pk, s) + return pk +} + +func (pk *P1Affine) KeyValidate() bool { + return !bool(C.blst_p1_affine_is_inf(pk)) && + bool(C.blst_p1_affine_in_g1(pk)) +} + +// sigInfcheck, check for infinity, is a way to avoid going +// into resource-consuming verification. Passing 'false' is +// always cryptographically safe, but application might want +// to guard against obviously bogus individual[!] signatures. +func (sig *P2Affine) SigValidate(sigInfcheck bool) bool { + if sigInfcheck && bool(C.blst_p2_affine_is_inf(sig)) { + return false + } + return bool(C.blst_p2_affine_in_g2(sig)) +} + +// +// Sign +// + +func (sig *P2Affine) Sign(sk *SecretKey, msg []byte, dst []byte, + optional ...interface{}) *P2Affine { + augSingle, aug, useHash, ok := parseOpts(optional...) + if !ok || len(aug) != 0 { + return nil + } + + var q *P2 + if useHash { + q = HashToG2(msg, dst, augSingle) + } else { + q = EncodeToG2(msg, dst, augSingle) + } + C.blst_sign_pk2_in_g1(nil, sig, q, sk) + return sig +} + +// +// Signature +// + +// Functions to return a signature and public key+augmentation tuple. +// This enables point decompression (if needed) to happen in parallel. +type sigGetterP2 func() *P2Affine +type pkGetterP1 func(i uint32, temp *P1Affine) (*P1Affine, []byte) + +// Single verify with decompressed pk +func (sig *P2Affine) Verify(sigGroupcheck bool, pk *P1Affine, pkValidate bool, + msg Message, dst []byte, + optional ...interface{}) bool { // useHash bool, aug []byte + + aug, _, useHash, ok := parseOpts(optional...) + if !ok { + return false + } + return sig.AggregateVerify(sigGroupcheck, []*P1Affine{pk}, pkValidate, + []Message{msg}, dst, useHash, [][]byte{aug}) +} + +// Single verify with compressed pk +// Uses a dummy signature to get the correct type +func (dummy *P2Affine) VerifyCompressed(sig []byte, sigGroupcheck bool, + pk []byte, pkValidate bool, msg Message, dst []byte, + optional ...bool) bool { // useHash bool, usePksAsAugs bool + + return dummy.AggregateVerifyCompressed(sig, sigGroupcheck, + [][]byte{pk}, pkValidate, + []Message{msg}, dst, optional...) +} + +// Aggregate verify with uncompressed signature and public keys +// Note that checking message uniqueness, if required, is left to the user. +// Not all signature schemes require it and this keeps the binding minimal +// and fast. Refer to the Uniq function for one method method of performing +// this check. +func (sig *P2Affine) AggregateVerify(sigGroupcheck bool, + pks []*P1Affine, pksVerify bool, msgs []Message, dst []byte, + optional ...interface{}) bool { // useHash bool, augs [][]byte + + // sanity checks and argument parsing + n := len(pks) + if n == 0 || len(msgs) != n { + return false + } + _, augs, useHash, ok := parseOpts(optional...) + useAugs := len(augs) != 0 + if !ok || (useAugs && len(augs) != n) { + return false + } + + sigFn := func() *P2Affine { + return sig + } + + pkFn := func(i uint32, _ *P1Affine) (*P1Affine, []byte) { + if useAugs { + return pks[i], augs[i] + } else { + return pks[i], nil + } + } + + return coreAggregateVerifyPkInG1(sigFn, sigGroupcheck, pkFn, pksVerify, + msgs, dst, useHash) +} + +// Aggregate verify with compressed signature and public keys +// Uses a dummy signature to get the correct type +func (dummy *P2Affine) AggregateVerifyCompressed(sig []byte, sigGroupcheck bool, + pks [][]byte, pksVerify bool, msgs []Message, dst []byte, + optional ...bool) bool { // useHash bool, usePksAsAugs bool + + // sanity checks and argument parsing + if len(pks) != len(msgs) { + return false + } + useHash := true + if len(optional) > 0 { + useHash = optional[0] + } + usePksAsAugs := false + if len(optional) > 1 { + usePksAsAugs = optional[1] + } + + sigFn := func() *P2Affine { + sigP := new(P2Affine) + if sigP.Uncompress(sig) == nil { + return nil + } + return sigP + } + pkFn := func(i uint32, pk *P1Affine) (*P1Affine, []byte) { + bytes := pks[i] + if len(bytes) == BLST_P1_SERIALIZE_BYTES && (bytes[0]&0x80) == 0 { + // Not compressed + if pk.Deserialize(bytes) == nil { + return nil, nil + } + } else if len(bytes) == BLST_P1_COMPRESS_BYTES && (bytes[0]&0x80) != 0 { + if pk.Uncompress(bytes) == nil { + return nil, nil + } + } else { + return nil, nil + } + if usePksAsAugs { + return pk, bytes + } + return pk, nil + } + return coreAggregateVerifyPkInG1(sigFn, sigGroupcheck, pkFn, pksVerify, + msgs, dst, useHash) +} + +func coreAggregateVerifyPkInG1(sigFn sigGetterP2, sigGroupcheck bool, + pkFn pkGetterP1, pkValidate bool, msgs []Message, dst []byte, + optional ...bool) bool { // useHash + + n := len(msgs) + if n == 0 { + return false + } + + useHash := true + if len(optional) > 0 { + useHash = optional[0] + } + + numCores := runtime.GOMAXPROCS(0) + numThreads := maxProcs + if numThreads > numCores { + numThreads = numCores + } + if numThreads > n { + numThreads = n + } + // Each thread will determine next message to process by atomically + // incrementing curItem, process corresponding pk,msg[,aug] tuple and + // repeat until n is exceeded. The resulting accumulations will be + // fed into the msgsCh channel. + msgsCh := make(chan Pairing, numThreads) + valid := int32(1) + curItem := uint32(0) + mutex := sync.Mutex{} + + mutex.Lock() + for tid := 0; tid < numThreads; tid++ { + go func() { + pairing := PairingCtx(useHash, dst) + var temp P1Affine + for atomic.LoadInt32(&valid) > 0 { + // Get a work item + work := atomic.AddUint32(&curItem, 1) - 1 + if work >= uint32(n) { + break + } else if work == 0 && maxProcs == numCores-1 && + numThreads == maxProcs { + // Avoid consuming all cores by waiting until the + // main thread has completed its miller loop before + // proceeding. + mutex.Lock() + mutex.Unlock() + } + + // Pull Public Key and augmentation blob + curPk, aug := pkFn(work, &temp) + if curPk == nil { + atomic.StoreInt32(&valid, 0) + break + } + + // Pairing and accumulate + ret := PairingAggregatePkInG1(pairing, curPk, pkValidate, + nil, false, msgs[work], aug) + if ret != C.BLST_SUCCESS { + atomic.StoreInt32(&valid, 0) + break + } + + // application might have some async work to do + runtime.Gosched() + } + if atomic.LoadInt32(&valid) > 0 { + PairingCommit(pairing) + msgsCh <- pairing + } else { + msgsCh <- nil + } + }() + } + + // Uncompress and check signature + var gtsig Fp12 + sig := sigFn() + if sig == nil { + atomic.StoreInt32(&valid, 0) + } + if atomic.LoadInt32(&valid) > 0 && sigGroupcheck && + !sig.SigValidate(false) { + atomic.StoreInt32(&valid, 0) + } + if atomic.LoadInt32(&valid) > 0 { + C.blst_aggregated_in_g2(>sig, sig) + } + mutex.Unlock() + + // Accumulate the thread results + var pairings Pairing + for i := 0; i < numThreads; i++ { + msg := <-msgsCh + if msg != nil { + if pairings == nil { + pairings = msg + } else { + ret := PairingMerge(pairings, msg) + if ret != C.BLST_SUCCESS { + atomic.StoreInt32(&valid, 0) + } + } + } + } + if atomic.LoadInt32(&valid) == 0 || pairings == nil { + return false + } + + return PairingFinalVerify(pairings, >sig) +} + +func CoreVerifyPkInG1(pk *P1Affine, sig *P2Affine, hash_or_encode bool, + msg Message, dst []byte, optional ...[]byte) int { + + var aug []byte + var uaug *C.byte + if len(optional) > 0 { + aug = optional[0] + if len(aug) > 0 { + uaug = (*C.byte)(&aug[0]) + } + } + + if runtime.NumGoroutine() < maxProcs { + sigFn := func() *P2Affine { + return sig + } + pkFn := func(_ uint32, _ *P1Affine) (*P1Affine, []byte) { + return pk, aug + } + if !coreAggregateVerifyPkInG1(sigFn, true, pkFn, true, []Message{msg}, + dst, hash_or_encode) { + return C.BLST_VERIFY_FAIL + } + return C.BLST_SUCCESS + } + + var udst *C.byte + if len(dst) > 0 { + udst = (*C.byte)(&dst[0]) + } + var umsg *C.byte + if len(msg) > 0 { + umsg = (*C.byte)(&msg[0]) + } + + return int(C.blst_core_verify_pk_in_g1(pk, sig, C.bool(hash_or_encode), + umsg, C.size_t(len(msg)), + udst, C.size_t(len(dst)), + uaug, C.size_t(len(aug)))) +} + +// pks are assumed to be verified for proof of possession, +// which implies that they are already group-checked +func (sig *P2Affine) FastAggregateVerify(sigGroupcheck bool, + pks []*P1Affine, msg Message, dst []byte, + optional ...interface{}) bool { // pass-through to Verify + n := len(pks) + + // TODO: return value for length zero? + if n == 0 { + return false + } + + aggregator := new(P1Aggregate) + if !aggregator.Aggregate(pks, false) { + return false + } + pkAff := aggregator.ToAffine() + + // Verify + return sig.Verify(sigGroupcheck, pkAff, false, msg, dst, optional...) +} + +func (dummy *P2Affine) MultipleAggregateVerify(sigs []*P2Affine, + sigsGroupcheck bool, pks []*P1Affine, pksVerify bool, + msgs []Message, dst []byte, randFn func(*Scalar), randBits int, + optional ...interface{}) bool { // useHash + + // Sanity checks and argument parsing + n := len(pks) + if n == 0 || len(msgs) != n || len(sigs) != n { + return false + } + _, augs, useHash, ok := parseOpts(optional...) + useAugs := len(augs) != 0 + if !ok || (useAugs && len(augs) != n) { + return false + } + + paramsFn := + func(work uint32, sig *P2Affine, pk *P1Affine, rand *Scalar) ( + *P2Affine, *P1Affine, *Scalar, []byte) { + randFn(rand) + var aug []byte + if useAugs { + aug = augs[work] + } + return sigs[work], pks[work], rand, aug + } + + return multipleAggregateVerifyPkInG1(paramsFn, sigsGroupcheck, pksVerify, + msgs, dst, randBits, useHash) +} + +type mulAggGetterPkInG1 func(work uint32, sig *P2Affine, pk *P1Affine, + rand *Scalar) (*P2Affine, *P1Affine, *Scalar, []byte) + +func multipleAggregateVerifyPkInG1(paramsFn mulAggGetterPkInG1, + sigsGroupcheck bool, pksVerify bool, msgs []Message, + dst []byte, randBits int, + optional ...bool) bool { // useHash + n := len(msgs) + if n == 0 { + return false + } + + useHash := true + if len(optional) > 0 { + useHash = optional[0] + } + + numCores := runtime.GOMAXPROCS(0) + numThreads := maxProcs + if numThreads > numCores { + numThreads = numCores + } + if numThreads > n { + numThreads = n + } + // Each thread will determine next message to process by atomically + // incrementing curItem, process corresponding pk,msg[,aug] tuple and + // repeat until n is exceeded. The resulting accumulations will be + // fed into the msgsCh channel. + msgsCh := make(chan Pairing, numThreads) + valid := int32(1) + curItem := uint32(0) + + for tid := 0; tid < numThreads; tid++ { + go func() { + pairing := PairingCtx(useHash, dst) + var tempRand Scalar + var tempPk P1Affine + var tempSig P2Affine + for atomic.LoadInt32(&valid) > 0 { + // Get a work item + work := atomic.AddUint32(&curItem, 1) - 1 + if work >= uint32(n) { + break + } + + curSig, curPk, curRand, aug := paramsFn(work, &tempSig, + &tempPk, &tempRand) + + if PairingMulNAggregatePkInG1(pairing, curPk, pksVerify, + curSig, sigsGroupcheck, curRand, + randBits, msgs[work], aug) != + C.BLST_SUCCESS { + atomic.StoreInt32(&valid, 0) + break + } + + // application might have some async work to do + runtime.Gosched() + } + if atomic.LoadInt32(&valid) > 0 { + PairingCommit(pairing) + msgsCh <- pairing + } else { + msgsCh <- nil + } + }() + } + + // Accumulate the thread results + var pairings Pairing + for i := 0; i < numThreads; i++ { + msg := <-msgsCh + if msg != nil { + if pairings == nil { + pairings = msg + } else { + ret := PairingMerge(pairings, msg) + if ret != C.BLST_SUCCESS { + atomic.StoreInt32(&valid, 0) + } + } + } + } + if atomic.LoadInt32(&valid) == 0 || pairings == nil { + return false + } + + return PairingFinalVerify(pairings, nil) +} + +// +// Aggregate P2 +// + +type aggGetterP2 func(i uint32, temp *P2Affine) *P2Affine +type P2Aggregate struct { + v *P2 +} + +// Aggregate uncompressed elements +func (agg *P2Aggregate) Aggregate(elmts []*P2Affine, + groupcheck bool) bool { + if len(elmts) == 0 { + return true + } + getter := func(i uint32, _ *P2Affine) *P2Affine { return elmts[i] } + return agg.aggregate(getter, groupcheck, len(elmts)) +} + +// Aggregate compressed elements +func (agg *P2Aggregate) AggregateCompressed(elmts [][]byte, + groupcheck bool) bool { + if len(elmts) == 0 { + return true + } + getter := func(i uint32, p *P2Affine) *P2Affine { + bytes := elmts[i] + if p.Uncompress(bytes) == nil { + return nil + } + return p + } + return agg.aggregate(getter, groupcheck, len(elmts)) +} + +func (agg *P2Aggregate) AddAggregate(other *P2Aggregate) { + if other.v == nil { + // do nothing + } else if agg.v == nil { + agg.v = other.v + } else { + C.blst_p2_add_or_double(agg.v, agg.v, other.v) + } +} + +func (agg *P2Aggregate) Add(elmt *P2Affine, groupcheck bool) bool { + if groupcheck && !bool(C.blst_p2_affine_in_g2(elmt)) { + return false + } + if agg.v == nil { + agg.v = new(P2) + C.blst_p2_from_affine(agg.v, elmt) + } else { + C.blst_p2_add_or_double_affine(agg.v, agg.v, elmt) + } + return true +} + +func (agg *P2Aggregate) ToAffine() *P2Affine { + if agg.v == nil { + return new(P2Affine) + } + return agg.v.ToAffine() +} + +func (agg *P2Aggregate) aggregate(getter aggGetterP2, groupcheck bool, + n int) bool { + + if n == 0 { + return true + } + // operations are considered short enough for not to care about + // keeping one core free... + numThreads := runtime.GOMAXPROCS(0) + if numThreads > n { + numThreads = n + } + + valid := int32(1) + type result struct { + agg *P2 + empty bool + } + msgs := make(chan result, numThreads) + curItem := uint32(0) + for tid := 0; tid < numThreads; tid++ { + go func() { + first := true + var agg P2 + var temp P2Affine + for atomic.LoadInt32(&valid) > 0 { + // Get a work item + work := atomic.AddUint32(&curItem, 1) - 1 + if work >= uint32(n) { + break + } + + // Signature validate + curElmt := getter(work, &temp) + if curElmt == nil { + atomic.StoreInt32(&valid, 0) + break + } + if groupcheck && !bool(C.blst_p2_affine_in_g2(curElmt)) { + atomic.StoreInt32(&valid, 0) + break + } + if first { + C.blst_p2_from_affine(&agg, curElmt) + first = false + } else { + C.blst_p2_add_or_double_affine(&agg, &agg, curElmt) + } + // application might have some async work to do + runtime.Gosched() + } + if first { + msgs <- result{nil, true} + } else if atomic.LoadInt32(&valid) > 0 { + msgs <- result{&agg, false} + } else { + msgs <- result{nil, false} + } + }() + } + + // Accumulate the thread results + first := agg.v == nil + validLocal := true + for i := 0; i < numThreads; i++ { + msg := <-msgs + if !validLocal || msg.empty { + // do nothing + } else if msg.agg == nil { + validLocal = false + // This should be unnecessary but seems safer + atomic.StoreInt32(&valid, 0) + } else { + if first { + agg.v = msg.agg + first = false + } else { + C.blst_p2_add_or_double(agg.v, agg.v, msg.agg) + } + } + } + if atomic.LoadInt32(&valid) == 0 { + agg.v = nil + return false + } + return true +} + +// +// MIN-SIG +// + +// +// PublicKey +// + +func (pk *P2Affine) From(s *Scalar) *P2Affine { + C.blst_sk_to_pk2_in_g2(nil, pk, s) + return pk +} + +func (pk *P2Affine) KeyValidate() bool { + return !bool(C.blst_p2_affine_is_inf(pk)) && + bool(C.blst_p2_affine_in_g2(pk)) +} + +// sigInfcheck, check for infinity, is a way to avoid going +// into resource-consuming verification. Passing 'false' is +// always cryptographically safe, but application might want +// to guard against obviously bogus individual[!] signatures. +func (sig *P1Affine) SigValidate(sigInfcheck bool) bool { + if sigInfcheck && bool(C.blst_p1_affine_is_inf(sig)) { + return false + } + return bool(C.blst_p1_affine_in_g1(sig)) +} + +// +// Sign +// + +func (sig *P1Affine) Sign(sk *SecretKey, msg []byte, dst []byte, + optional ...interface{}) *P1Affine { + augSingle, aug, useHash, ok := parseOpts(optional...) + if !ok || len(aug) != 0 { + return nil + } + + var q *P1 + if useHash { + q = HashToG1(msg, dst, augSingle) + } else { + q = EncodeToG1(msg, dst, augSingle) + } + C.blst_sign_pk2_in_g2(nil, sig, q, sk) + return sig +} + +// +// Signature +// + +// Functions to return a signature and public key+augmentation tuple. +// This enables point decompression (if needed) to happen in parallel. +type sigGetterP1 func() *P1Affine +type pkGetterP2 func(i uint32, temp *P2Affine) (*P2Affine, []byte) + +// Single verify with decompressed pk +func (sig *P1Affine) Verify(sigGroupcheck bool, pk *P2Affine, pkValidate bool, + msg Message, dst []byte, + optional ...interface{}) bool { // useHash bool, aug []byte + + aug, _, useHash, ok := parseOpts(optional...) + if !ok { + return false + } + return sig.AggregateVerify(sigGroupcheck, []*P2Affine{pk}, pkValidate, + []Message{msg}, dst, useHash, [][]byte{aug}) +} + +// Single verify with compressed pk +// Uses a dummy signature to get the correct type +func (dummy *P1Affine) VerifyCompressed(sig []byte, sigGroupcheck bool, + pk []byte, pkValidate bool, msg Message, dst []byte, + optional ...bool) bool { // useHash bool, usePksAsAugs bool + + return dummy.AggregateVerifyCompressed(sig, sigGroupcheck, + [][]byte{pk}, pkValidate, + []Message{msg}, dst, optional...) +} + +// Aggregate verify with uncompressed signature and public keys +// Note that checking message uniqueness, if required, is left to the user. +// Not all signature schemes require it and this keeps the binding minimal +// and fast. Refer to the Uniq function for one method method of performing +// this check. +func (sig *P1Affine) AggregateVerify(sigGroupcheck bool, + pks []*P2Affine, pksVerify bool, msgs []Message, dst []byte, + optional ...interface{}) bool { // useHash bool, augs [][]byte + + // sanity checks and argument parsing + n := len(pks) + if n == 0 || len(msgs) != n { + return false + } + _, augs, useHash, ok := parseOpts(optional...) + useAugs := len(augs) != 0 + if !ok || (useAugs && len(augs) != n) { + return false + } + + sigFn := func() *P1Affine { + return sig + } + + pkFn := func(i uint32, _ *P2Affine) (*P2Affine, []byte) { + if useAugs { + return pks[i], augs[i] + } else { + return pks[i], nil + } + } + + return coreAggregateVerifyPkInG2(sigFn, sigGroupcheck, pkFn, pksVerify, + msgs, dst, useHash) +} + +// Aggregate verify with compressed signature and public keys +// Uses a dummy signature to get the correct type +func (dummy *P1Affine) AggregateVerifyCompressed(sig []byte, sigGroupcheck bool, + pks [][]byte, pksVerify bool, msgs []Message, dst []byte, + optional ...bool) bool { // useHash bool, usePksAsAugs bool + + // sanity checks and argument parsing + if len(pks) != len(msgs) { + return false + } + useHash := true + if len(optional) > 0 { + useHash = optional[0] + } + usePksAsAugs := false + if len(optional) > 1 { + usePksAsAugs = optional[1] + } + + sigFn := func() *P1Affine { + sigP := new(P1Affine) + if sigP.Uncompress(sig) == nil { + return nil + } + return sigP + } + pkFn := func(i uint32, pk *P2Affine) (*P2Affine, []byte) { + bytes := pks[i] + if len(bytes) == BLST_P2_SERIALIZE_BYTES && (bytes[0]&0x80) == 0 { + // Not compressed + if pk.Deserialize(bytes) == nil { + return nil, nil + } + } else if len(bytes) == BLST_P2_COMPRESS_BYTES && (bytes[0]&0x80) != 0 { + if pk.Uncompress(bytes) == nil { + return nil, nil + } + } else { + return nil, nil + } + if usePksAsAugs { + return pk, bytes + } + return pk, nil + } + return coreAggregateVerifyPkInG2(sigFn, sigGroupcheck, pkFn, pksVerify, + msgs, dst, useHash) +} + +func coreAggregateVerifyPkInG2(sigFn sigGetterP1, sigGroupcheck bool, + pkFn pkGetterP2, pkValidate bool, msgs []Message, dst []byte, + optional ...bool) bool { // useHash + + n := len(msgs) + if n == 0 { + return false + } + + useHash := true + if len(optional) > 0 { + useHash = optional[0] + } + + numCores := runtime.GOMAXPROCS(0) + numThreads := maxProcs + if numThreads > numCores { + numThreads = numCores + } + if numThreads > n { + numThreads = n + } + // Each thread will determine next message to process by atomically + // incrementing curItem, process corresponding pk,msg[,aug] tuple and + // repeat until n is exceeded. The resulting accumulations will be + // fed into the msgsCh channel. + msgsCh := make(chan Pairing, numThreads) + valid := int32(1) + curItem := uint32(0) + mutex := sync.Mutex{} + + mutex.Lock() + for tid := 0; tid < numThreads; tid++ { + go func() { + pairing := PairingCtx(useHash, dst) + var temp P2Affine + for atomic.LoadInt32(&valid) > 0 { + // Get a work item + work := atomic.AddUint32(&curItem, 1) - 1 + if work >= uint32(n) { + break + } else if work == 0 && maxProcs == numCores-1 && + numThreads == maxProcs { + // Avoid consuming all cores by waiting until the + // main thread has completed its miller loop before + // proceeding. + mutex.Lock() + mutex.Unlock() + } + + // Pull Public Key and augmentation blob + curPk, aug := pkFn(work, &temp) + if curPk == nil { + atomic.StoreInt32(&valid, 0) + break + } + + // Pairing and accumulate + ret := PairingAggregatePkInG2(pairing, curPk, pkValidate, + nil, false, msgs[work], aug) + if ret != C.BLST_SUCCESS { + atomic.StoreInt32(&valid, 0) + break + } + + // application might have some async work to do + runtime.Gosched() + } + if atomic.LoadInt32(&valid) > 0 { + PairingCommit(pairing) + msgsCh <- pairing + } else { + msgsCh <- nil + } + }() + } + + // Uncompress and check signature + var gtsig Fp12 + sig := sigFn() + if sig == nil { + atomic.StoreInt32(&valid, 0) + } + if atomic.LoadInt32(&valid) > 0 && sigGroupcheck && + !sig.SigValidate(false) { + atomic.StoreInt32(&valid, 0) + } + if atomic.LoadInt32(&valid) > 0 { + C.blst_aggregated_in_g1(>sig, sig) + } + mutex.Unlock() + + // Accumulate the thread results + var pairings Pairing + for i := 0; i < numThreads; i++ { + msg := <-msgsCh + if msg != nil { + if pairings == nil { + pairings = msg + } else { + ret := PairingMerge(pairings, msg) + if ret != C.BLST_SUCCESS { + atomic.StoreInt32(&valid, 0) + } + } + } + } + if atomic.LoadInt32(&valid) == 0 || pairings == nil { + return false + } + + return PairingFinalVerify(pairings, >sig) +} + +func CoreVerifyPkInG2(pk *P2Affine, sig *P1Affine, hash_or_encode bool, + msg Message, dst []byte, optional ...[]byte) int { + + var aug []byte + var uaug *C.byte + if len(optional) > 0 { + aug = optional[0] + if len(aug) > 0 { + uaug = (*C.byte)(&aug[0]) + } + } + + if runtime.NumGoroutine() < maxProcs { + sigFn := func() *P1Affine { + return sig + } + pkFn := func(_ uint32, _ *P2Affine) (*P2Affine, []byte) { + return pk, aug + } + if !coreAggregateVerifyPkInG2(sigFn, true, pkFn, true, []Message{msg}, + dst, hash_or_encode) { + return C.BLST_VERIFY_FAIL + } + return C.BLST_SUCCESS + } + + var udst *C.byte + if len(dst) > 0 { + udst = (*C.byte)(&dst[0]) + } + var umsg *C.byte + if len(msg) > 0 { + umsg = (*C.byte)(&msg[0]) + } + + return int(C.blst_core_verify_pk_in_g2(pk, sig, C.bool(hash_or_encode), + umsg, C.size_t(len(msg)), + udst, C.size_t(len(dst)), + uaug, C.size_t(len(aug)))) +} + +// pks are assumed to be verified for proof of possession, +// which implies that they are already group-checked +func (sig *P1Affine) FastAggregateVerify(sigGroupcheck bool, + pks []*P2Affine, msg Message, dst []byte, + optional ...interface{}) bool { // pass-through to Verify + n := len(pks) + + // TODO: return value for length zero? + if n == 0 { + return false + } + + aggregator := new(P2Aggregate) + if !aggregator.Aggregate(pks, false) { + return false + } + pkAff := aggregator.ToAffine() + + // Verify + return sig.Verify(sigGroupcheck, pkAff, false, msg, dst, optional...) +} + +func (dummy *P1Affine) MultipleAggregateVerify(sigs []*P1Affine, + sigsGroupcheck bool, pks []*P2Affine, pksVerify bool, + msgs []Message, dst []byte, randFn func(*Scalar), randBits int, + optional ...interface{}) bool { // useHash + + // Sanity checks and argument parsing + n := len(pks) + if n == 0 || len(msgs) != n || len(sigs) != n { + return false + } + _, augs, useHash, ok := parseOpts(optional...) + useAugs := len(augs) != 0 + if !ok || (useAugs && len(augs) != n) { + return false + } + + paramsFn := + func(work uint32, sig *P1Affine, pk *P2Affine, rand *Scalar) ( + *P1Affine, *P2Affine, *Scalar, []byte) { + randFn(rand) + var aug []byte + if useAugs { + aug = augs[work] + } + return sigs[work], pks[work], rand, aug + } + + return multipleAggregateVerifyPkInG2(paramsFn, sigsGroupcheck, pksVerify, + msgs, dst, randBits, useHash) +} + +type mulAggGetterPkInG2 func(work uint32, sig *P1Affine, pk *P2Affine, + rand *Scalar) (*P1Affine, *P2Affine, *Scalar, []byte) + +func multipleAggregateVerifyPkInG2(paramsFn mulAggGetterPkInG2, + sigsGroupcheck bool, pksVerify bool, msgs []Message, + dst []byte, randBits int, + optional ...bool) bool { // useHash + n := len(msgs) + if n == 0 { + return false + } + + useHash := true + if len(optional) > 0 { + useHash = optional[0] + } + + numCores := runtime.GOMAXPROCS(0) + numThreads := maxProcs + if numThreads > numCores { + numThreads = numCores + } + if numThreads > n { + numThreads = n + } + // Each thread will determine next message to process by atomically + // incrementing curItem, process corresponding pk,msg[,aug] tuple and + // repeat until n is exceeded. The resulting accumulations will be + // fed into the msgsCh channel. + msgsCh := make(chan Pairing, numThreads) + valid := int32(1) + curItem := uint32(0) + + for tid := 0; tid < numThreads; tid++ { + go func() { + pairing := PairingCtx(useHash, dst) + var tempRand Scalar + var tempPk P2Affine + var tempSig P1Affine + for atomic.LoadInt32(&valid) > 0 { + // Get a work item + work := atomic.AddUint32(&curItem, 1) - 1 + if work >= uint32(n) { + break + } + + curSig, curPk, curRand, aug := paramsFn(work, &tempSig, + &tempPk, &tempRand) + + if PairingMulNAggregatePkInG2(pairing, curPk, pksVerify, + curSig, sigsGroupcheck, curRand, + randBits, msgs[work], aug) != + C.BLST_SUCCESS { + atomic.StoreInt32(&valid, 0) + break + } + + // application might have some async work to do + runtime.Gosched() + } + if atomic.LoadInt32(&valid) > 0 { + PairingCommit(pairing) + msgsCh <- pairing + } else { + msgsCh <- nil + } + }() + } + + // Accumulate the thread results + var pairings Pairing + for i := 0; i < numThreads; i++ { + msg := <-msgsCh + if msg != nil { + if pairings == nil { + pairings = msg + } else { + ret := PairingMerge(pairings, msg) + if ret != C.BLST_SUCCESS { + atomic.StoreInt32(&valid, 0) + } + } + } + } + if atomic.LoadInt32(&valid) == 0 || pairings == nil { + return false + } + + return PairingFinalVerify(pairings, nil) +} + +// +// Aggregate P1 +// + +type aggGetterP1 func(i uint32, temp *P1Affine) *P1Affine +type P1Aggregate struct { + v *P1 +} + +// Aggregate uncompressed elements +func (agg *P1Aggregate) Aggregate(elmts []*P1Affine, + groupcheck bool) bool { + if len(elmts) == 0 { + return true + } + getter := func(i uint32, _ *P1Affine) *P1Affine { return elmts[i] } + return agg.aggregate(getter, groupcheck, len(elmts)) +} + +// Aggregate compressed elements +func (agg *P1Aggregate) AggregateCompressed(elmts [][]byte, + groupcheck bool) bool { + if len(elmts) == 0 { + return true + } + getter := func(i uint32, p *P1Affine) *P1Affine { + bytes := elmts[i] + if p.Uncompress(bytes) == nil { + return nil + } + return p + } + return agg.aggregate(getter, groupcheck, len(elmts)) +} + +func (agg *P1Aggregate) AddAggregate(other *P1Aggregate) { + if other.v == nil { + // do nothing + } else if agg.v == nil { + agg.v = other.v + } else { + C.blst_p1_add_or_double(agg.v, agg.v, other.v) + } +} + +func (agg *P1Aggregate) Add(elmt *P1Affine, groupcheck bool) bool { + if groupcheck && !bool(C.blst_p1_affine_in_g1(elmt)) { + return false + } + if agg.v == nil { + agg.v = new(P1) + C.blst_p1_from_affine(agg.v, elmt) + } else { + C.blst_p1_add_or_double_affine(agg.v, agg.v, elmt) + } + return true +} + +func (agg *P1Aggregate) ToAffine() *P1Affine { + if agg.v == nil { + return new(P1Affine) + } + return agg.v.ToAffine() +} + +func (agg *P1Aggregate) aggregate(getter aggGetterP1, groupcheck bool, + n int) bool { + + if n == 0 { + return true + } + // operations are considered short enough for not to care about + // keeping one core free... + numThreads := runtime.GOMAXPROCS(0) + if numThreads > n { + numThreads = n + } + + valid := int32(1) + type result struct { + agg *P1 + empty bool + } + msgs := make(chan result, numThreads) + curItem := uint32(0) + for tid := 0; tid < numThreads; tid++ { + go func() { + first := true + var agg P1 + var temp P1Affine + for atomic.LoadInt32(&valid) > 0 { + // Get a work item + work := atomic.AddUint32(&curItem, 1) - 1 + if work >= uint32(n) { + break + } + + // Signature validate + curElmt := getter(work, &temp) + if curElmt == nil { + atomic.StoreInt32(&valid, 0) + break + } + if groupcheck && !bool(C.blst_p1_affine_in_g1(curElmt)) { + atomic.StoreInt32(&valid, 0) + break + } + if first { + C.blst_p1_from_affine(&agg, curElmt) + first = false + } else { + C.blst_p1_add_or_double_affine(&agg, &agg, curElmt) + } + // application might have some async work to do + runtime.Gosched() + } + if first { + msgs <- result{nil, true} + } else if atomic.LoadInt32(&valid) > 0 { + msgs <- result{&agg, false} + } else { + msgs <- result{nil, false} + } + }() + } + + // Accumulate the thread results + first := agg.v == nil + validLocal := true + for i := 0; i < numThreads; i++ { + msg := <-msgs + if !validLocal || msg.empty { + // do nothing + } else if msg.agg == nil { + validLocal = false + // This should be unnecessary but seems safer + atomic.StoreInt32(&valid, 0) + } else { + if first { + agg.v = msg.agg + first = false + } else { + C.blst_p1_add_or_double(agg.v, agg.v, msg.agg) + } + } + } + if atomic.LoadInt32(&valid) == 0 { + agg.v = nil + return false + } + return true +} +func PairingAggregatePkInG1(ctx Pairing, PK *P1Affine, pkValidate bool, + sig *P2Affine, sigGroupcheck bool, msg []byte, + optional ...[]byte) int { // aug + var aug []byte + var uaug *C.byte + if len(optional) > 0 { + aug = optional[0] + if len(aug) > 0 { + uaug = (*C.byte)(&aug[0]) + } + } + var umsg *C.byte + if len(msg) > 0 { + umsg = (*C.byte)(&msg[0]) + } + + r := C.blst_pairing_chk_n_aggr_pk_in_g1(&ctx[0], + PK, C.bool(pkValidate), + sig, C.bool(sigGroupcheck), + umsg, C.size_t(len(msg)), + uaug, C.size_t(len(aug))) + + return int(r) +} + +func PairingMulNAggregatePkInG1(ctx Pairing, PK *P1Affine, pkValidate bool, + sig *P2Affine, sigGroupcheck bool, + rand *Scalar, randBits int, msg []byte, + optional ...[]byte) int { // aug + var aug []byte + var uaug *C.byte + if len(optional) > 0 { + aug = optional[0] + if len(aug) > 0 { + uaug = (*C.byte)(&aug[0]) + } + } + var umsg *C.byte + if len(msg) > 0 { + umsg = (*C.byte)(&msg[0]) + } + + r := C.blst_pairing_chk_n_mul_n_aggr_pk_in_g1(&ctx[0], + PK, C.bool(pkValidate), + sig, C.bool(sigGroupcheck), + &rand.b[0], C.size_t(randBits), + umsg, C.size_t(len(msg)), + uaug, C.size_t(len(aug))) + + return int(r) +} + +// +// Serialization/Deserialization. +// + +// P1 Serdes +func (p1 *P1Affine) Serialize() []byte { + var out [BLST_P1_SERIALIZE_BYTES]byte + C.blst_p1_affine_serialize((*C.byte)(&out[0]), p1) + return out[:] +} + +func (p1 *P1Affine) Deserialize(in []byte) *P1Affine { + if len(in) != BLST_P1_SERIALIZE_BYTES { + return nil + } + if C.blst_p1_deserialize(p1, (*C.byte)(&in[0])) != C.BLST_SUCCESS { + return nil + } + return p1 +} +func (p1 *P1Affine) Compress() []byte { + var out [BLST_P1_COMPRESS_BYTES]byte + C.blst_p1_affine_compress((*C.byte)(&out[0]), p1) + return out[:] +} + +func (p1 *P1Affine) Uncompress(in []byte) *P1Affine { + if len(in) != BLST_P1_COMPRESS_BYTES { + return nil + } + if C.blst_p1_uncompress(p1, (*C.byte)(&in[0])) != C.BLST_SUCCESS { + return nil + } + return p1 +} + +func (p1 *P1Affine) InG1() bool { + return bool(C.blst_p1_affine_in_g1(p1)) +} + +func (dummy *P1Affine) BatchUncompress(in [][]byte) []*P1Affine { + // Allocate space for all of the resulting points. Later we'll save pointers + // and return those so that the result could be used in other functions, + // such as MultipleAggregateVerify. + n := len(in) + points := make([]P1Affine, n) + pointsPtrs := make([]*P1Affine, n) + + numCores := runtime.GOMAXPROCS(0) + numThreads := maxProcs + if numThreads > numCores { + numThreads = numCores + } + if numThreads > n { + numThreads = n + } + // Each thread will determine next message to process by atomically + // incrementing curItem, process corresponding point, and + // repeat until n is exceeded. Each thread will send a result (true for + // success, false for failure) into the channel when complete. + resCh := make(chan bool, numThreads) + valid := int32(1) + curItem := uint32(0) + for tid := 0; tid < numThreads; tid++ { + go func() { + for atomic.LoadInt32(&valid) > 0 { + // Get a work item + work := atomic.AddUint32(&curItem, 1) - 1 + if work >= uint32(n) { + break + } + if points[work].Uncompress(in[work]) == nil { + atomic.StoreInt32(&valid, 0) + break + } + pointsPtrs[work] = &points[work] + } + if atomic.LoadInt32(&valid) > 0 { + resCh <- true + } else { + resCh <- false + } + }() + } + + // Collect the threads + result := true + for i := 0; i < numThreads; i++ { + if !<-resCh { + result = false + } + } + if atomic.LoadInt32(&valid) == 0 || !result { + return nil + } + return pointsPtrs +} + +func (p1 *P1) Serialize() []byte { + var out [BLST_P1_SERIALIZE_BYTES]byte + C.blst_p1_serialize((*C.byte)(&out[0]), p1) + return out[:] +} +func (p1 *P1) Compress() []byte { + var out [BLST_P1_COMPRESS_BYTES]byte + C.blst_p1_compress((*C.byte)(&out[0]), p1) + return out[:] +} + +func (p1 *P1) MultAssign(scalarIf interface{}, optional ...int) *P1 { + var nbits int + var scalar *C.byte + switch val := scalarIf.(type) { + case []byte: + scalar = (*C.byte)(&val[0]) + nbits = len(val) * 8 + case *Scalar: + scalar = &val.b[0] + nbits = 255 + default: + panic(fmt.Sprintf("unsupported type %T", val)) + } + if len(optional) > 0 { + nbits = optional[0] + } + C.blst_p1_mult(p1, p1, scalar, C.size_t(nbits)) + return p1 +} + +func (p1 *P1) Mult(scalarIf interface{}, optional ...int) *P1 { + ret := *p1 + return ret.MultAssign(scalarIf, optional...) +} + +func (p1 *P1) AddAssign(pointIf interface{}) *P1 { + switch val := pointIf.(type) { + case *P1: + C.blst_p1_add_or_double(p1, p1, val) + case *P1Affine: + C.blst_p1_add_or_double_affine(p1, p1, val) + default: + panic(fmt.Sprintf("unsupported type %T", val)) + } + return p1 +} + +func (p1 *P1) Add(pointIf interface{}) *P1 { + ret := *p1 + return ret.AddAssign(pointIf) +} + +func (p1 *P1) SubAssign(pointIf interface{}) *P1 { + var x *Fp + var affine C.bool + switch val := pointIf.(type) { + case *P1: + x = &val.x + affine = false + case *P1Affine: + x = &val.x + affine = true + default: + panic(fmt.Sprintf("unsupported type %T", val)) + } + C.go_p1_sub_assign(p1, x, affine) + return p1 +} + +func (p1 *P1) Sub(pointIf interface{}) *P1 { + ret := *p1 + return ret.SubAssign(pointIf) +} + +func P1Generator() *P1 { + return C.blst_p1_generator() +} + +// 'acc += point * scalar', passing 'nil' for 'point' means "use the +// +// group generator point" +func (acc *P1) MultNAccumulate(pointIf interface{}, scalarIf interface{}, + optional ...int) *P1 { + var x *Fp + var affine C.bool + if pointIf != nil { + switch val := pointIf.(type) { + case *P1: + x = &val.x + affine = false + case *P1Affine: + x = &val.x + affine = true + default: + panic(fmt.Sprintf("unsupported type %T", val)) + } + } + var nbits int + var scalar *C.byte + switch val := scalarIf.(type) { + case []byte: + scalar = (*C.byte)(&val[0]) + nbits = len(val) * 8 + case *Scalar: + scalar = &val.b[0] + nbits = 255 + default: + panic(fmt.Sprintf("unsupported type %T", val)) + } + if len(optional) > 0 { + nbits = optional[0] + } + C.go_p1_mult_n_acc(acc, x, affine, scalar, C.size_t(nbits)) + return acc +} + +// +// Affine +// + +func (p *P1) ToAffine() *P1Affine { + var pa P1Affine + C.blst_p1_to_affine(&pa, p) + return &pa +} + +func (p *P1) FromAffine(pa *P1Affine) { + C.blst_p1_from_affine(p, pa) +} + +// Hash +func HashToG1(msg []byte, dst []byte, + optional ...[]byte) *P1 { // aug + var q P1 + + // Handle zero length message + var msgC *C.byte + if len(msg) > 0 { + msgC = (*C.byte)(&msg[0]) + } + + var dstC *C.byte + if len(dst) > 0 { + dstC = (*C.byte)(&dst[0]) + } + + var aug []byte + var augC *C.byte + if len(optional) > 0 { + aug = optional[0] + if len(aug) > 0 { + augC = (*C.byte)(&aug[0]) + } + } + + C.blst_hash_to_g1(&q, msgC, C.size_t(len(msg)), + dstC, C.size_t(len(dst)), + augC, C.size_t(len(aug))) + return &q +} + +func EncodeToG1(msg []byte, dst []byte, + optional ...[]byte) *P1 { // aug + var q P1 + + // Handle zero length message + var msgC *C.byte + if len(msg) > 0 { + msgC = (*C.byte)(&msg[0]) + } + + var dstC *C.byte + if len(dst) > 0 { + dstC = (*C.byte)(&dst[0]) + } + + var aug []byte + var augC *C.byte + if len(optional) > 0 { + aug = optional[0] + if len(aug) > 0 { + augC = (*C.byte)(&aug[0]) + } + } + + C.blst_encode_to_g1(&q, msgC, C.size_t(len(msg)), + dstC, C.size_t(len(dst)), + augC, C.size_t(len(aug))) + return &q +} + +// +// Multi-point/scalar operations +// + +func P1sToAffine(points []*P1, optional ...int) P1Affines { + var npoints int + if len(optional) > 0 { + npoints = optional[0] + } else { + npoints = len(points) + } + ret := make([]P1Affine, npoints) + _cgoCheckPointer := func(...interface{}) {} + C.blst_p1s_to_affine(&ret[0], &points[0], C.size_t(npoints)) + return ret +} + +func (points P1s) ToAffine(optional ...P1Affines) P1Affines { + npoints := len(points) + var ret P1Affines + + if len(optional) > 0 { // used in benchmark + ret = optional[0] + if len(ret) < npoints { + panic("npoints mismatch") + } + } else { + ret = make([]P1Affine, npoints) + } + + if maxProcs < 2 || npoints < 768 { + C.go_p1slice_to_affine(&ret[0], &points[0], C.size_t(npoints)) + return ret + } + + nslices := (npoints + 511) / 512 + if nslices > maxProcs { + nslices = maxProcs + } + delta, rem := npoints/nslices+1, npoints%nslices + + var wg sync.WaitGroup + wg.Add(nslices) + for x := 0; x < npoints; x += delta { + if rem == 0 { + delta -= 1 + } + rem -= 1 + go func(out *P1Affine, inp *P1, delta int) { + C.go_p1slice_to_affine(out, inp, C.size_t(delta)) + wg.Done() + }(&ret[x], &points[x], delta) + } + wg.Wait() + + return ret +} + +// +// Batch addition +// + +func P1AffinesAdd(points []*P1Affine, optional ...int) *P1 { + var npoints int + if len(optional) > 0 { + npoints = optional[0] + } else { + npoints = len(points) + } + var ret P1 + _cgoCheckPointer := func(...interface{}) {} + C.blst_p1s_add(&ret, &points[0], C.size_t(npoints)) + return &ret +} + +func (points P1Affines) Add() *P1 { + npoints := len(points) + if maxProcs < 2 || npoints < 768 { + var ret P1 + C.go_p1slice_add(&ret, &points[0], C.size_t(npoints)) + return &ret + } + + nslices := (npoints + 511) / 512 + if nslices > maxProcs { + nslices = maxProcs + } + delta, rem := npoints/nslices+1, npoints%nslices + + msgs := make(chan P1, nslices) + for x := 0; x < npoints; x += delta { + if rem == 0 { + delta -= 1 + } + rem -= 1 + go func(points *P1Affine, delta int) { + var ret P1 + C.go_p1slice_add(&ret, points, C.size_t(delta)) + msgs <- ret + }(&points[x], delta) + } + + ret := <-msgs + for i := 1; i < nslices; i++ { + msg := <-msgs + C.blst_p1_add_or_double(&ret, &ret, &msg) + } + return &ret +} + +func (points P1s) Add() *P1 { + return points.ToAffine().Add() +} + +// +// Multi-scalar multiplication +// + +func P1AffinesMult(pointsIf interface{}, scalarsIf interface{}, nbits int) *P1 { + var npoints int + switch val := pointsIf.(type) { + case []*P1Affine: + npoints = len(val) + case []P1Affine: + npoints = len(val) + case P1Affines: + npoints = len(val) + default: + panic(fmt.Sprintf("unsupported type %T", val)) + } + + nbytes := (nbits + 7) / 8 + var scalars []*C.byte + switch val := scalarsIf.(type) { + case []byte: + if len(val) < npoints*nbytes { + return nil + } + case [][]byte: + if len(val) < npoints { + return nil + } + scalars = make([]*C.byte, npoints) + for i := range scalars { + scalars[i] = (*C.byte)(&val[i][0]) + } + case []Scalar: + if len(val) < npoints { + return nil + } + if nbits <= 248 { + scalars = make([]*C.byte, npoints) + for i := range scalars { + scalars[i] = &val[i].b[0] + } + } + case []*Scalar: + if len(val) < npoints { + return nil + } + scalars = make([]*C.byte, npoints) + for i := range scalars { + scalars[i] = &val[i].b[0] + } + default: + panic(fmt.Sprintf("unsupported type %T", val)) + } + + numThreads := maxProcs + numCores := runtime.GOMAXPROCS(0) + if numCores < maxProcs { + numThreads = numCores + } + + if numThreads < 2 || npoints < 32 { + sz := int(C.blst_p1s_mult_pippenger_scratch_sizeof(C.size_t(npoints))) / 8 + scratch := make([]uint64, sz) + + pointsBySlice := [2]*P1Affine{nil, nil} + var p_points **P1Affine + switch val := pointsIf.(type) { + case []*P1Affine: + p_points = &val[0] + case []P1Affine: + pointsBySlice[0] = &val[0] + p_points = &pointsBySlice[0] + case P1Affines: + pointsBySlice[0] = &val[0] + p_points = &pointsBySlice[0] + } + + scalarsBySlice := [2]*C.byte{nil, nil} + var p_scalars **C.byte + switch val := scalarsIf.(type) { + case []byte: + scalarsBySlice[0] = (*C.byte)(&val[0]) + p_scalars = &scalarsBySlice[0] + case [][]byte: + p_scalars = &scalars[0] + case []Scalar: + if nbits > 248 { + scalarsBySlice[0] = (*C.byte)(&val[0].b[0]) + p_scalars = &scalarsBySlice[0] + } else { + p_scalars = &scalars[0] + } + case []*Scalar: + p_scalars = &scalars[0] + } + + var ret P1 + _cgoCheckPointer := func(...interface{}) {} + C.blst_p1s_mult_pippenger(&ret, p_points, C.size_t(npoints), + p_scalars, C.size_t(nbits), + (*C.limb_t)(&scratch[0])) + + for i := range scalars { + scalars[i] = nil + } + + return &ret + } + + // this is sizeof(scratch[0]) + sz := int(C.blst_p1s_mult_pippenger_scratch_sizeof(0)) / 8 + + nx, ny, window := breakdown(nbits, pippenger_window_size(npoints), + numThreads) + + // |grid[]| holds "coordinates" and place for result + grid := make([]struct { + x, dx, y, dy int + point P1 + }, nx*ny) + + dx := npoints / nx + y := window * (ny - 1) + total := 0 + for ; total < nx; total++ { + grid[total].x = total * dx + grid[total].dx = dx + grid[total].y = y + grid[total].dy = nbits - y + } + grid[total-1].dx = npoints - grid[total-1].x + + for y > 0 { + y -= window + for i := 0; i < nx; i++ { + grid[total].x = grid[i].x + grid[total].dx = grid[i].dx + grid[total].y = y + grid[total].dy = window + total++ + } + } + + if numThreads > total { + numThreads = total + } + + msgsCh := make(chan int, ny) + rowSync := make([]int32, ny) // count up to |nx| + curItem := int32(0) + for tid := 0; tid < numThreads; tid++ { + go func() { + scratch := make([]uint64, sz<= total { + break + } + + x := grid[workItem].x + y := grid[workItem].y + + var p_points **P1Affine + switch val := pointsIf.(type) { + case []*P1Affine: + p_points = &val[x] + case []P1Affine: + pointsBySlice[0] = &val[x] + p_points = &pointsBySlice[0] + case P1Affines: + pointsBySlice[0] = &val[x] + p_points = &pointsBySlice[0] + } + + var p_scalars **C.byte + switch val := scalarsIf.(type) { + case []byte: + scalarsBySlice[0] = (*C.byte)(&val[x*nbytes]) + p_scalars = &scalarsBySlice[0] + case [][]byte: + p_scalars = &scalars[x] + case []Scalar: + if nbits > 248 { + scalarsBySlice[0] = (*C.byte)(&val[x].b[0]) + p_scalars = &scalarsBySlice[0] + } else { + p_scalars = &scalars[x] + } + case []*Scalar: + p_scalars = &scalars[x] + } + + C.blst_p1s_tile_pippenger(&grid[workItem].point, + p_points, C.size_t(grid[workItem].dx), + p_scalars, C.size_t(nbits), + (*C.limb_t)(&scratch[0]), + C.size_t(y), C.size_t(window)) + + if atomic.AddInt32(&rowSync[y/window], 1) == int32(nx) { + msgsCh <- y // "row" is done + } else { + runtime.Gosched() // be nice to the application + } + } + + pointsBySlice[0] = nil + scalarsBySlice[0] = nil + }() + } + + var ret P1 + rows := make([]bool, ny) + row := 0 // actually index in |grid[]| + for i := 0; i < ny; i++ { // we expect |ny| messages, one per "row" + y := <-msgsCh + rows[y/window] = true // mark the "row" + for grid[row].y == y { // if it's current "row", process it + for row < total && grid[row].y == y { + C.blst_p1_add_or_double(&ret, &ret, &grid[row].point) + row++ + } + if y == 0 { + break // one can as well 'return &ret' here + } + for j := 0; j < window; j++ { + C.blst_p1_double(&ret, &ret) + } + y -= window + if !rows[y/window] { // see if next "row" was marked already + break + } + } + } + + for i := range scalars { + scalars[i] = nil + } + + return &ret +} + +func (points P1Affines) Mult(scalarsIf interface{}, nbits int) *P1 { + return P1AffinesMult(points, scalarsIf, nbits) +} + +func (points P1s) Mult(scalarsIf interface{}, nbits int) *P1 { + return points.ToAffine().Mult(scalarsIf, nbits) +} +func PairingAggregatePkInG2(ctx Pairing, PK *P2Affine, pkValidate bool, + sig *P1Affine, sigGroupcheck bool, msg []byte, + optional ...[]byte) int { // aug + var aug []byte + var uaug *C.byte + if len(optional) > 0 { + aug = optional[0] + if len(aug) > 0 { + uaug = (*C.byte)(&aug[0]) + } + } + var umsg *C.byte + if len(msg) > 0 { + umsg = (*C.byte)(&msg[0]) + } + + r := C.blst_pairing_chk_n_aggr_pk_in_g2(&ctx[0], + PK, C.bool(pkValidate), + sig, C.bool(sigGroupcheck), + umsg, C.size_t(len(msg)), + uaug, C.size_t(len(aug))) + + return int(r) +} + +func PairingMulNAggregatePkInG2(ctx Pairing, PK *P2Affine, pkValidate bool, + sig *P1Affine, sigGroupcheck bool, + rand *Scalar, randBits int, msg []byte, + optional ...[]byte) int { // aug + var aug []byte + var uaug *C.byte + if len(optional) > 0 { + aug = optional[0] + if len(aug) > 0 { + uaug = (*C.byte)(&aug[0]) + } + } + var umsg *C.byte + if len(msg) > 0 { + umsg = (*C.byte)(&msg[0]) + } + + r := C.blst_pairing_chk_n_mul_n_aggr_pk_in_g2(&ctx[0], + PK, C.bool(pkValidate), + sig, C.bool(sigGroupcheck), + &rand.b[0], C.size_t(randBits), + umsg, C.size_t(len(msg)), + uaug, C.size_t(len(aug))) + + return int(r) +} + +// +// Serialization/Deserialization. +// + +// P2 Serdes +func (p2 *P2Affine) Serialize() []byte { + var out [BLST_P2_SERIALIZE_BYTES]byte + C.blst_p2_affine_serialize((*C.byte)(&out[0]), p2) + return out[:] +} + +func (p2 *P2Affine) Deserialize(in []byte) *P2Affine { + if len(in) != BLST_P2_SERIALIZE_BYTES { + return nil + } + if C.blst_p2_deserialize(p2, (*C.byte)(&in[0])) != C.BLST_SUCCESS { + return nil + } + return p2 +} +func (p2 *P2Affine) Compress() []byte { + var out [BLST_P2_COMPRESS_BYTES]byte + C.blst_p2_affine_compress((*C.byte)(&out[0]), p2) + return out[:] +} + +func (p2 *P2Affine) Uncompress(in []byte) *P2Affine { + if len(in) != BLST_P2_COMPRESS_BYTES { + return nil + } + if C.blst_p2_uncompress(p2, (*C.byte)(&in[0])) != C.BLST_SUCCESS { + return nil + } + return p2 +} + +func (p2 *P2Affine) InG2() bool { + return bool(C.blst_p2_affine_in_g2(p2)) +} + +func (dummy *P2Affine) BatchUncompress(in [][]byte) []*P2Affine { + // Allocate space for all of the resulting points. Later we'll save pointers + // and return those so that the result could be used in other functions, + // such as MultipleAggregateVerify. + n := len(in) + points := make([]P2Affine, n) + pointsPtrs := make([]*P2Affine, n) + + numCores := runtime.GOMAXPROCS(0) + numThreads := maxProcs + if numThreads > numCores { + numThreads = numCores + } + if numThreads > n { + numThreads = n + } + // Each thread will determine next message to process by atomically + // incrementing curItem, process corresponding point, and + // repeat until n is exceeded. Each thread will send a result (true for + // success, false for failure) into the channel when complete. + resCh := make(chan bool, numThreads) + valid := int32(1) + curItem := uint32(0) + for tid := 0; tid < numThreads; tid++ { + go func() { + for atomic.LoadInt32(&valid) > 0 { + // Get a work item + work := atomic.AddUint32(&curItem, 1) - 1 + if work >= uint32(n) { + break + } + if points[work].Uncompress(in[work]) == nil { + atomic.StoreInt32(&valid, 0) + break + } + pointsPtrs[work] = &points[work] + } + if atomic.LoadInt32(&valid) > 0 { + resCh <- true + } else { + resCh <- false + } + }() + } + + // Collect the threads + result := true + for i := 0; i < numThreads; i++ { + if !<-resCh { + result = false + } + } + if atomic.LoadInt32(&valid) == 0 || !result { + return nil + } + return pointsPtrs +} + +func (p2 *P2) Serialize() []byte { + var out [BLST_P2_SERIALIZE_BYTES]byte + C.blst_p2_serialize((*C.byte)(&out[0]), p2) + return out[:] +} +func (p2 *P2) Compress() []byte { + var out [BLST_P2_COMPRESS_BYTES]byte + C.blst_p2_compress((*C.byte)(&out[0]), p2) + return out[:] +} + +func (p2 *P2) MultAssign(scalarIf interface{}, optional ...int) *P2 { + var nbits int + var scalar *C.byte + switch val := scalarIf.(type) { + case []byte: + scalar = (*C.byte)(&val[0]) + nbits = len(val) * 8 + case *Scalar: + scalar = &val.b[0] + nbits = 255 + default: + panic(fmt.Sprintf("unsupported type %T", val)) + } + if len(optional) > 0 { + nbits = optional[0] + } + C.blst_p2_mult(p2, p2, scalar, C.size_t(nbits)) + return p2 +} + +func (p2 *P2) Mult(scalarIf interface{}, optional ...int) *P2 { + ret := *p2 + return ret.MultAssign(scalarIf, optional...) +} + +func (p2 *P2) AddAssign(pointIf interface{}) *P2 { + switch val := pointIf.(type) { + case *P2: + C.blst_p2_add_or_double(p2, p2, val) + case *P2Affine: + C.blst_p2_add_or_double_affine(p2, p2, val) + default: + panic(fmt.Sprintf("unsupported type %T", val)) + } + return p2 +} + +func (p2 *P2) Add(pointIf interface{}) *P2 { + ret := *p2 + return ret.AddAssign(pointIf) +} + +func (p2 *P2) SubAssign(pointIf interface{}) *P2 { + var x *Fp2 + var affine C.bool + switch val := pointIf.(type) { + case *P2: + x = &val.x + affine = false + case *P2Affine: + x = &val.x + affine = true + default: + panic(fmt.Sprintf("unsupported type %T", val)) + } + C.go_p2_sub_assign(p2, x, affine) + return p2 +} + +func (p2 *P2) Sub(pointIf interface{}) *P2 { + ret := *p2 + return ret.SubAssign(pointIf) +} + +func P2Generator() *P2 { + return C.blst_p2_generator() +} + +// 'acc += point * scalar', passing 'nil' for 'point' means "use the +// +// group generator point" +func (acc *P2) MultNAccumulate(pointIf interface{}, scalarIf interface{}, + optional ...int) *P2 { + var x *Fp2 + var affine C.bool + if pointIf != nil { + switch val := pointIf.(type) { + case *P2: + x = &val.x + affine = false + case *P2Affine: + x = &val.x + affine = true + default: + panic(fmt.Sprintf("unsupported type %T", val)) + } + } + var nbits int + var scalar *C.byte + switch val := scalarIf.(type) { + case []byte: + scalar = (*C.byte)(&val[0]) + nbits = len(val) * 8 + case *Scalar: + scalar = &val.b[0] + nbits = 255 + default: + panic(fmt.Sprintf("unsupported type %T", val)) + } + if len(optional) > 0 { + nbits = optional[0] + } + C.go_p2_mult_n_acc(acc, x, affine, scalar, C.size_t(nbits)) + return acc +} + +// +// Affine +// + +func (p *P2) ToAffine() *P2Affine { + var pa P2Affine + C.blst_p2_to_affine(&pa, p) + return &pa +} + +func (p *P2) FromAffine(pa *P2Affine) { + C.blst_p2_from_affine(p, pa) +} + +// Hash +func HashToG2(msg []byte, dst []byte, + optional ...[]byte) *P2 { // aug + var q P2 + + // Handle zero length message + var msgC *C.byte + if len(msg) > 0 { + msgC = (*C.byte)(&msg[0]) + } + + var dstC *C.byte + if len(dst) > 0 { + dstC = (*C.byte)(&dst[0]) + } + + var aug []byte + var augC *C.byte + if len(optional) > 0 { + aug = optional[0] + if len(aug) > 0 { + augC = (*C.byte)(&aug[0]) + } + } + + C.blst_hash_to_g2(&q, msgC, C.size_t(len(msg)), + dstC, C.size_t(len(dst)), + augC, C.size_t(len(aug))) + return &q +} + +func EncodeToG2(msg []byte, dst []byte, + optional ...[]byte) *P2 { // aug + var q P2 + + // Handle zero length message + var msgC *C.byte + if len(msg) > 0 { + msgC = (*C.byte)(&msg[0]) + } + + var dstC *C.byte + if len(dst) > 0 { + dstC = (*C.byte)(&dst[0]) + } + + var aug []byte + var augC *C.byte + if len(optional) > 0 { + aug = optional[0] + if len(aug) > 0 { + augC = (*C.byte)(&aug[0]) + } + } + + C.blst_encode_to_g2(&q, msgC, C.size_t(len(msg)), + dstC, C.size_t(len(dst)), + augC, C.size_t(len(aug))) + return &q +} + +// +// Multi-point/scalar operations +// + +func P2sToAffine(points []*P2, optional ...int) P2Affines { + var npoints int + if len(optional) > 0 { + npoints = optional[0] + } else { + npoints = len(points) + } + ret := make([]P2Affine, npoints) + _cgoCheckPointer := func(...interface{}) {} + C.blst_p2s_to_affine(&ret[0], &points[0], C.size_t(npoints)) + return ret +} + +func (points P2s) ToAffine(optional ...P2Affines) P2Affines { + npoints := len(points) + var ret P2Affines + + if len(optional) > 0 { // used in benchmark + ret = optional[0] + if len(ret) < npoints { + panic("npoints mismatch") + } + } else { + ret = make([]P2Affine, npoints) + } + + if maxProcs < 2 || npoints < 768 { + C.go_p2slice_to_affine(&ret[0], &points[0], C.size_t(npoints)) + return ret + } + + nslices := (npoints + 511) / 512 + if nslices > maxProcs { + nslices = maxProcs + } + delta, rem := npoints/nslices+1, npoints%nslices + + var wg sync.WaitGroup + wg.Add(nslices) + for x := 0; x < npoints; x += delta { + if rem == 0 { + delta -= 1 + } + rem -= 1 + go func(out *P2Affine, inp *P2, delta int) { + C.go_p2slice_to_affine(out, inp, C.size_t(delta)) + wg.Done() + }(&ret[x], &points[x], delta) + } + wg.Wait() + + return ret +} + +// +// Batch addition +// + +func P2AffinesAdd(points []*P2Affine, optional ...int) *P2 { + var npoints int + if len(optional) > 0 { + npoints = optional[0] + } else { + npoints = len(points) + } + var ret P2 + _cgoCheckPointer := func(...interface{}) {} + C.blst_p2s_add(&ret, &points[0], C.size_t(npoints)) + return &ret +} + +func (points P2Affines) Add() *P2 { + npoints := len(points) + if maxProcs < 2 || npoints < 768 { + var ret P2 + C.go_p2slice_add(&ret, &points[0], C.size_t(npoints)) + return &ret + } + + nslices := (npoints + 511) / 512 + if nslices > maxProcs { + nslices = maxProcs + } + delta, rem := npoints/nslices+1, npoints%nslices + + msgs := make(chan P2, nslices) + for x := 0; x < npoints; x += delta { + if rem == 0 { + delta -= 1 + } + rem -= 1 + go func(points *P2Affine, delta int) { + var ret P2 + C.go_p2slice_add(&ret, points, C.size_t(delta)) + msgs <- ret + }(&points[x], delta) + } + + ret := <-msgs + for i := 1; i < nslices; i++ { + msg := <-msgs + C.blst_p2_add_or_double(&ret, &ret, &msg) + } + return &ret +} + +func (points P2s) Add() *P2 { + return points.ToAffine().Add() +} + +// +// Multi-scalar multiplication +// + +func P2AffinesMult(pointsIf interface{}, scalarsIf interface{}, nbits int) *P2 { + var npoints int + switch val := pointsIf.(type) { + case []*P2Affine: + npoints = len(val) + case []P2Affine: + npoints = len(val) + case P2Affines: + npoints = len(val) + default: + panic(fmt.Sprintf("unsupported type %T", val)) + } + + nbytes := (nbits + 7) / 8 + var scalars []*C.byte + switch val := scalarsIf.(type) { + case []byte: + if len(val) < npoints*nbytes { + return nil + } + case [][]byte: + if len(val) < npoints { + return nil + } + scalars = make([]*C.byte, npoints) + for i := range scalars { + scalars[i] = (*C.byte)(&val[i][0]) + } + case []Scalar: + if len(val) < npoints { + return nil + } + if nbits <= 248 { + scalars = make([]*C.byte, npoints) + for i := range scalars { + scalars[i] = &val[i].b[0] + } + } + case []*Scalar: + if len(val) < npoints { + return nil + } + scalars = make([]*C.byte, npoints) + for i := range scalars { + scalars[i] = &val[i].b[0] + } + default: + panic(fmt.Sprintf("unsupported type %T", val)) + } + + numThreads := maxProcs + numCores := runtime.GOMAXPROCS(0) + if numCores < maxProcs { + numThreads = numCores + } + + if numThreads < 2 || npoints < 32 { + sz := int(C.blst_p2s_mult_pippenger_scratch_sizeof(C.size_t(npoints))) / 8 + scratch := make([]uint64, sz) + + pointsBySlice := [2]*P2Affine{nil, nil} + var p_points **P2Affine + switch val := pointsIf.(type) { + case []*P2Affine: + p_points = &val[0] + case []P2Affine: + pointsBySlice[0] = &val[0] + p_points = &pointsBySlice[0] + case P2Affines: + pointsBySlice[0] = &val[0] + p_points = &pointsBySlice[0] + } + + scalarsBySlice := [2]*C.byte{nil, nil} + var p_scalars **C.byte + switch val := scalarsIf.(type) { + case []byte: + scalarsBySlice[0] = (*C.byte)(&val[0]) + p_scalars = &scalarsBySlice[0] + case [][]byte: + p_scalars = &scalars[0] + case []Scalar: + if nbits > 248 { + scalarsBySlice[0] = (*C.byte)(&val[0].b[0]) + p_scalars = &scalarsBySlice[0] + } else { + p_scalars = &scalars[0] + } + case []*Scalar: + p_scalars = &scalars[0] + } + + var ret P2 + _cgoCheckPointer := func(...interface{}) {} + C.blst_p2s_mult_pippenger(&ret, p_points, C.size_t(npoints), + p_scalars, C.size_t(nbits), + (*C.limb_t)(&scratch[0])) + + for i := range scalars { + scalars[i] = nil + } + + return &ret + } + + // this is sizeof(scratch[0]) + sz := int(C.blst_p2s_mult_pippenger_scratch_sizeof(0)) / 8 + + nx, ny, window := breakdown(nbits, pippenger_window_size(npoints), + numThreads) + + // |grid[]| holds "coordinates" and place for result + grid := make([]struct { + x, dx, y, dy int + point P2 + }, nx*ny) + + dx := npoints / nx + y := window * (ny - 1) + total := 0 + for ; total < nx; total++ { + grid[total].x = total * dx + grid[total].dx = dx + grid[total].y = y + grid[total].dy = nbits - y + } + grid[total-1].dx = npoints - grid[total-1].x + + for y > 0 { + y -= window + for i := 0; i < nx; i++ { + grid[total].x = grid[i].x + grid[total].dx = grid[i].dx + grid[total].y = y + grid[total].dy = window + total++ + } + } + + if numThreads > total { + numThreads = total + } + + msgsCh := make(chan int, ny) + rowSync := make([]int32, ny) // count up to |nx| + curItem := int32(0) + for tid := 0; tid < numThreads; tid++ { + go func() { + scratch := make([]uint64, sz<= total { + break + } + + x := grid[workItem].x + y := grid[workItem].y + + var p_points **P2Affine + switch val := pointsIf.(type) { + case []*P2Affine: + p_points = &val[x] + case []P2Affine: + pointsBySlice[0] = &val[x] + p_points = &pointsBySlice[0] + case P2Affines: + pointsBySlice[0] = &val[x] + p_points = &pointsBySlice[0] + } + + var p_scalars **C.byte + switch val := scalarsIf.(type) { + case []byte: + scalarsBySlice[0] = (*C.byte)(&val[x*nbytes]) + p_scalars = &scalarsBySlice[0] + case [][]byte: + p_scalars = &scalars[x] + case []Scalar: + if nbits > 248 { + scalarsBySlice[0] = (*C.byte)(&val[x].b[0]) + p_scalars = &scalarsBySlice[0] + } else { + p_scalars = &scalars[x] + } + case []*Scalar: + p_scalars = &scalars[x] + } + + C.blst_p2s_tile_pippenger(&grid[workItem].point, + p_points, C.size_t(grid[workItem].dx), + p_scalars, C.size_t(nbits), + (*C.limb_t)(&scratch[0]), + C.size_t(y), C.size_t(window)) + + if atomic.AddInt32(&rowSync[y/window], 1) == int32(nx) { + msgsCh <- y // "row" is done + } else { + runtime.Gosched() // be nice to the application + } + } + + pointsBySlice[0] = nil + scalarsBySlice[0] = nil + }() + } + + var ret P2 + rows := make([]bool, ny) + row := 0 // actually index in |grid[]| + for i := 0; i < ny; i++ { // we expect |ny| messages, one per "row" + y := <-msgsCh + rows[y/window] = true // mark the "row" + for grid[row].y == y { // if it's current "row", process it + for row < total && grid[row].y == y { + C.blst_p2_add_or_double(&ret, &ret, &grid[row].point) + row++ + } + if y == 0 { + break // one can as well 'return &ret' here + } + for j := 0; j < window; j++ { + C.blst_p2_double(&ret, &ret) + } + y -= window + if !rows[y/window] { // see if next "row" was marked already + break + } + } + } + + for i := range scalars { + scalars[i] = nil + } + + return &ret +} + +func (points P2Affines) Mult(scalarsIf interface{}, nbits int) *P2 { + return P2AffinesMult(points, scalarsIf, nbits) +} + +func (points P2s) Mult(scalarsIf interface{}, nbits int) *P2 { + return points.ToAffine().Mult(scalarsIf, nbits) +} + +func parseOpts(optional ...interface{}) ([]byte, [][]byte, bool, bool) { + var aug [][]byte // For aggregate verify + var augSingle []byte // For signing + useHash := true // hash (true), encode (false) + + for _, arg := range optional { + switch v := arg.(type) { + case []byte: + augSingle = v + case [][]byte: + aug = v + case bool: + useHash = v + default: + return nil, nil, useHash, false + } + } + return augSingle, aug, useHash, true +} + +func bytesAllZero(s []byte) bool { + for _, v := range s { + if v != 0 { + return false + } + } + return true +} + +// These methods are inefficient because of cgo call overhead. For this +// reason they should be used primarily for prototyping with a goal to +// formulate interfaces that would process multiple scalars per cgo call. +func (a *Scalar) MulAssign(b *Scalar) (*Scalar, bool) { + return a, bool(C.blst_sk_mul_n_check(a, a, b)) +} + +func (a *Scalar) Mul(b *Scalar) (*Scalar, bool) { + var ret Scalar + return &ret, bool(C.blst_sk_mul_n_check(&ret, a, b)) +} + +func (a *Scalar) AddAssign(b *Scalar) (*Scalar, bool) { + return a, bool(C.blst_sk_add_n_check(a, a, b)) +} + +func (a *Scalar) Add(b *Scalar) (*Scalar, bool) { + var ret Scalar + return &ret, bool(C.blst_sk_add_n_check(&ret, a, b)) +} + +func (a *Scalar) SubAssign(b *Scalar) (*Scalar, bool) { + return a, bool(C.blst_sk_sub_n_check(a, a, b)) +} + +func (a *Scalar) Sub(b *Scalar) (*Scalar, bool) { + var ret Scalar + return &ret, bool(C.blst_sk_sub_n_check(&ret, a, b)) +} + +func (a *Scalar) Inverse() *Scalar { + var ret Scalar + C.blst_sk_inverse(&ret, a) + return &ret +} + +// +// Serialization/Deserialization. +// + +// Scalar serdes +func (s *Scalar) Serialize() []byte { + var out [BLST_SCALAR_BYTES]byte + C.blst_bendian_from_scalar((*C.byte)(&out[0]), s) + return out[:] +} + +func (s *Scalar) Deserialize(in []byte) *Scalar { + if len(in) != BLST_SCALAR_BYTES || + !C.go_scalar_from_bendian(s, (*C.byte)(&in[0])) { + return nil + } + return s +} + +func (s *Scalar) Valid() bool { + return bool(C.blst_sk_check(s)) +} + +func (s *Scalar) HashTo(msg []byte, dst []byte) bool { + ret := HashToScalar(msg, dst) + if ret != nil { + *s = *ret + return true + } + return false +} + +func HashToScalar(msg []byte, dst []byte) *Scalar { + var ret Scalar + + var msgC *C.byte + if len(msg) > 0 { + msgC = (*C.byte)(&msg[0]) + } + + var dstC *C.byte + if len(dst) > 0 { + dstC = (*C.byte)(&dst[0]) + } + + if C.go_hash_to_scalar(&ret, msgC, C.size_t(len(msg)), + dstC, C.size_t(len(dst))) { + return &ret + } + + return nil +} + +// +// LEndian +// + +func (fr *Scalar) ToLEndian() []byte { + var arr [BLST_SCALAR_BYTES]byte + C.blst_lendian_from_scalar((*C.byte)(&arr[0]), fr) + return arr[:] +} + +func (fp *Fp) ToLEndian() []byte { + var arr [BLST_FP_BYTES]byte + C.blst_lendian_from_fp((*C.byte)(&arr[0]), fp) + return arr[:] +} + +func (fr *Scalar) FromLEndian(arr []byte) *Scalar { + nbytes := len(arr) + if nbytes < BLST_SCALAR_BYTES || + !C.blst_scalar_from_le_bytes(fr, (*C.byte)(&arr[0]), C.size_t(nbytes)) { + return nil + } + return fr +} + +func (fp *Fp) FromLEndian(arr []byte) *Fp { + if len(arr) != BLST_FP_BYTES { + return nil + } + C.blst_fp_from_lendian(fp, (*C.byte)(&arr[0])) + return fp +} + +// +// BEndian +// + +func (fr *Scalar) ToBEndian() []byte { + var arr [BLST_SCALAR_BYTES]byte + C.blst_bendian_from_scalar((*C.byte)(&arr[0]), fr) + return arr[:] +} + +func (fp *Fp) ToBEndian() []byte { + var arr [BLST_FP_BYTES]byte + C.blst_bendian_from_fp((*C.byte)(&arr[0]), fp) + return arr[:] +} + +func (fr *Scalar) FromBEndian(arr []byte) *Scalar { + nbytes := len(arr) + if nbytes < BLST_SCALAR_BYTES || + !C.blst_scalar_from_be_bytes(fr, (*C.byte)(&arr[0]), C.size_t(nbytes)) { + return nil + } + return fr +} + +func (fp *Fp) FromBEndian(arr []byte) *Fp { + if len(arr) != BLST_FP_BYTES { + return nil + } + C.blst_fp_from_bendian(fp, (*C.byte)(&arr[0])) + return fp +} + +// +// Printing +// + +func PrintBytes(val []byte, name string) { + fmt.Printf("%s = %02x\n", name, val) +} + +func (s *Scalar) Print(name string) { + arr := s.ToBEndian() + PrintBytes(arr[:], name) +} + +func (p *P1Affine) Print(name string) { + fmt.Printf("%s:\n", name) + arr := p.x.ToBEndian() + PrintBytes(arr, " x") + arr = p.y.ToBEndian() + PrintBytes(arr, " y") +} + +func (p *P1) Print(name string) { + fmt.Printf("%s:\n", name) + aff := p.ToAffine() + aff.Print(name) +} + +func (f *Fp2) Print(name string) { + fmt.Printf("%s:\n", name) + arr := f.fp[0].ToBEndian() + PrintBytes(arr, " 0") + arr = f.fp[1].ToBEndian() + PrintBytes(arr, " 1") +} + +func (p *P2Affine) Print(name string) { + fmt.Printf("%s:\n", name) + p.x.Print(" x") + p.y.Print(" y") +} + +func (p *P2) Print(name string) { + fmt.Printf("%s:\n", name) + aff := p.ToAffine() + aff.Print(name) +} + +// +// Equality +// + +func (s1 *Scalar) Equals(s2 *Scalar) bool { + return *s1 == *s2 +} + +func (e1 *Fp) Equals(e2 *Fp) bool { + return *e1 == *e2 +} + +func (e1 *Fp2) Equals(e2 *Fp2) bool { + return *e1 == *e2 +} + +func (e1 *P1Affine) Equals(e2 *P1Affine) bool { + return bool(C.blst_p1_affine_is_equal(e1, e2)) +} + +func (e1 *P1) Equals(e2 *P1) bool { + return bool(C.blst_p1_is_equal(e1, e2)) +} + +func (e1 *P2Affine) Equals(e2 *P2Affine) bool { + return bool(C.blst_p2_affine_is_equal(e1, e2)) +} + +func (e1 *P2) Equals(e2 *P2) bool { + return bool(C.blst_p2_is_equal(e1, e2)) +} + +// private thunk for testing + +func expandMessageXmd(msg []byte, dst []byte, len_in_bytes int) []byte { + ret := make([]byte, len_in_bytes) + + var msgC *C.byte + if len(msg) > 0 { + msgC = (*C.byte)(&msg[0]) + } + + var dstC *C.byte + if len(dst) > 0 { + dstC = (*C.byte)(&dst[0]) + } + + C.blst_expand_message_xmd((*C.byte)(&ret[0]), C.size_t(len(ret)), + msgC, C.size_t(len(msg)), + dstC, C.size_t(len(dst))) + return ret +} + +func breakdown(nbits, window, ncpus int) (int, int, int) { + var nx, ny, wnd int + + if nbits > window*ncpus { + nx = 1 + wnd = bits.Len(uint(ncpus) / 4) + if (window + wnd) > 18 { + wnd = window - wnd + } else { + wnd = (nbits/window + ncpus - 1) / ncpus + if (nbits/(window+1)+ncpus-1)/ncpus < wnd { + wnd = window + 1 + } else { + wnd = window + } + } + } else { + nx = 2 + wnd = window - 2 + for (nbits/wnd+1)*nx < ncpus { + nx += 1 + wnd = window - bits.Len(3*uint(nx)/2) + } + nx -= 1 + wnd = window - bits.Len(3*uint(nx)/2) + } + ny = nbits/wnd + 1 + wnd = nbits/ny + 1 + + return nx, ny, wnd +} + +func pippenger_window_size(npoints int) int { + wbits := bits.Len(uint(npoints)) + + if wbits > 13 { + return wbits - 4 + } + if wbits > 5 { + return wbits - 3 + } + return 2 +} diff --git a/crypto/internal/blst/blst.h b/crypto/internal/blst/blst.h new file mode 100644 index 00000000000..1349896a3f8 --- /dev/null +++ b/crypto/internal/blst/blst.h @@ -0,0 +1,482 @@ +/* + * Copyright Supranational LLC + * Licensed under the Apache License, Version 2.0, see LICENSE for details. + * SPDX-License-Identifier: Apache-2.0 + */ +#ifndef __BLST_H__ +#define __BLST_H__ + +#ifdef __SIZE_TYPE__ +typedef __SIZE_TYPE__ size_t; +#else +#include +#endif + +#if defined(__UINT8_TYPE__) && defined(__UINT32_TYPE__) \ + && defined(__UINT64_TYPE__) +typedef __UINT8_TYPE__ uint8_t; +typedef __UINT32_TYPE__ uint32_t; +typedef __UINT64_TYPE__ uint64_t; +#else +#include +#endif + +#ifdef __cplusplus +extern "C" { +#elif defined(__BLST_CGO__) +typedef _Bool bool; /* it's assumed that cgo calls modern enough compiler */ +#elif defined(__STDC_VERSION__) && __STDC_VERSION__>=199901 +# define bool _Bool +#else +# define bool int +#endif + +#ifdef SWIG +# define DEFNULL =NULL +#elif defined __cplusplus +# define DEFNULL =0 +#else +# define DEFNULL +#endif + +typedef enum { + BLST_SUCCESS = 0, + BLST_BAD_ENCODING, + BLST_POINT_NOT_ON_CURVE, + BLST_POINT_NOT_IN_GROUP, + BLST_AGGR_TYPE_MISMATCH, + BLST_VERIFY_FAIL, + BLST_PK_IS_INFINITY, + BLST_BAD_SCALAR, +} BLST_ERROR; + +typedef uint8_t byte; +typedef uint64_t limb_t; + +typedef struct { byte b[256/8]; } blst_scalar; +typedef struct { limb_t l[256/8/sizeof(limb_t)]; } blst_fr; +typedef struct { limb_t l[384/8/sizeof(limb_t)]; } blst_fp; +/* 0 is "real" part, 1 is "imaginary" */ +typedef struct { blst_fp fp[2]; } blst_fp2; +typedef struct { blst_fp2 fp2[3]; } blst_fp6; +typedef struct { blst_fp6 fp6[2]; } blst_fp12; + +void blst_scalar_from_uint32(blst_scalar *out, const uint32_t a[8]); +void blst_uint32_from_scalar(uint32_t out[8], const blst_scalar *a); +void blst_scalar_from_uint64(blst_scalar *out, const uint64_t a[4]); +void blst_uint64_from_scalar(uint64_t out[4], const blst_scalar *a); +void blst_scalar_from_bendian(blst_scalar *out, const byte a[32]); +void blst_bendian_from_scalar(byte out[32], const blst_scalar *a); +void blst_scalar_from_lendian(blst_scalar *out, const byte a[32]); +void blst_lendian_from_scalar(byte out[32], const blst_scalar *a); +bool blst_scalar_fr_check(const blst_scalar *a); +bool blst_sk_check(const blst_scalar *a); +bool blst_sk_add_n_check(blst_scalar *out, const blst_scalar *a, + const blst_scalar *b); +bool blst_sk_sub_n_check(blst_scalar *out, const blst_scalar *a, + const blst_scalar *b); +bool blst_sk_mul_n_check(blst_scalar *out, const blst_scalar *a, + const blst_scalar *b); +void blst_sk_inverse(blst_scalar *out, const blst_scalar *a); +bool blst_scalar_from_le_bytes(blst_scalar *out, const byte *in, size_t len); +bool blst_scalar_from_be_bytes(blst_scalar *out, const byte *in, size_t len); + +#ifndef SWIG +/* + * BLS12-381-specific Fr operations. + */ +void blst_fr_add(blst_fr *ret, const blst_fr *a, const blst_fr *b); +void blst_fr_sub(blst_fr *ret, const blst_fr *a, const blst_fr *b); +void blst_fr_mul_by_3(blst_fr *ret, const blst_fr *a); +void blst_fr_lshift(blst_fr *ret, const blst_fr *a, size_t count); +void blst_fr_rshift(blst_fr *ret, const blst_fr *a, size_t count); +void blst_fr_mul(blst_fr *ret, const blst_fr *a, const blst_fr *b); +void blst_fr_sqr(blst_fr *ret, const blst_fr *a); +void blst_fr_cneg(blst_fr *ret, const blst_fr *a, bool flag); +void blst_fr_eucl_inverse(blst_fr *ret, const blst_fr *a); +void blst_fr_inverse(blst_fr *ret, const blst_fr *a); + +void blst_fr_from_uint64(blst_fr *ret, const uint64_t a[4]); +void blst_uint64_from_fr(uint64_t ret[4], const blst_fr *a); +void blst_fr_from_scalar(blst_fr *ret, const blst_scalar *a); +void blst_scalar_from_fr(blst_scalar *ret, const blst_fr *a); + +/* + * BLS12-381-specific Fp operations. + */ +void blst_fp_add(blst_fp *ret, const blst_fp *a, const blst_fp *b); +void blst_fp_sub(blst_fp *ret, const blst_fp *a, const blst_fp *b); +void blst_fp_mul_by_3(blst_fp *ret, const blst_fp *a); +void blst_fp_mul_by_8(blst_fp *ret, const blst_fp *a); +void blst_fp_lshift(blst_fp *ret, const blst_fp *a, size_t count); +void blst_fp_mul(blst_fp *ret, const blst_fp *a, const blst_fp *b); +void blst_fp_sqr(blst_fp *ret, const blst_fp *a); +void blst_fp_cneg(blst_fp *ret, const blst_fp *a, bool flag); +void blst_fp_eucl_inverse(blst_fp *ret, const blst_fp *a); +void blst_fp_inverse(blst_fp *ret, const blst_fp *a); +bool blst_fp_sqrt(blst_fp *ret, const blst_fp *a); + +void blst_fp_from_uint32(blst_fp *ret, const uint32_t a[12]); +void blst_uint32_from_fp(uint32_t ret[12], const blst_fp *a); +void blst_fp_from_uint64(blst_fp *ret, const uint64_t a[6]); +void blst_uint64_from_fp(uint64_t ret[6], const blst_fp *a); +void blst_fp_from_bendian(blst_fp *ret, const byte a[48]); +void blst_bendian_from_fp(byte ret[48], const blst_fp *a); +void blst_fp_from_lendian(blst_fp *ret, const byte a[48]); +void blst_lendian_from_fp(byte ret[48], const blst_fp *a); + +/* + * BLS12-381-specific Fp2 operations. + */ +void blst_fp2_add(blst_fp2 *ret, const blst_fp2 *a, const blst_fp2 *b); +void blst_fp2_sub(blst_fp2 *ret, const blst_fp2 *a, const blst_fp2 *b); +void blst_fp2_mul_by_3(blst_fp2 *ret, const blst_fp2 *a); +void blst_fp2_mul_by_8(blst_fp2 *ret, const blst_fp2 *a); +void blst_fp2_lshift(blst_fp2 *ret, const blst_fp2 *a, size_t count); +void blst_fp2_mul(blst_fp2 *ret, const blst_fp2 *a, const blst_fp2 *b); +void blst_fp2_sqr(blst_fp2 *ret, const blst_fp2 *a); +void blst_fp2_cneg(blst_fp2 *ret, const blst_fp2 *a, bool flag); +void blst_fp2_eucl_inverse(blst_fp2 *ret, const blst_fp2 *a); +void blst_fp2_inverse(blst_fp2 *ret, const blst_fp2 *a); +bool blst_fp2_sqrt(blst_fp2 *ret, const blst_fp2 *a); + +/* + * BLS12-381-specific Fp12 operations. + */ +void blst_fp12_sqr(blst_fp12 *ret, const blst_fp12 *a); +void blst_fp12_cyclotomic_sqr(blst_fp12 *ret, const blst_fp12 *a); +void blst_fp12_mul(blst_fp12 *ret, const blst_fp12 *a, const blst_fp12 *b); +void blst_fp12_mul_by_xy00z0(blst_fp12 *ret, const blst_fp12 *a, + const blst_fp6 *xy00z0); +void blst_fp12_conjugate(blst_fp12 *a); +void blst_fp12_inverse(blst_fp12 *ret, const blst_fp12 *a); +/* caveat lector! |n| has to be non-zero and not more than 3! */ +void blst_fp12_frobenius_map(blst_fp12 *ret, const blst_fp12 *a, size_t n); +bool blst_fp12_is_equal(const blst_fp12 *a, const blst_fp12 *b); +bool blst_fp12_is_one(const blst_fp12 *a); +bool blst_fp12_in_group(const blst_fp12 *a); +const blst_fp12 *blst_fp12_one(void); +#endif // SWIG + +/* + * BLS12-381-specific point operations. + */ +typedef struct { blst_fp x, y, z; } blst_p1; +typedef struct { blst_fp x, y; } blst_p1_affine; + +void blst_p1_add(blst_p1 *out, const blst_p1 *a, const blst_p1 *b); +void blst_p1_add_or_double(blst_p1 *out, const blst_p1 *a, const blst_p1 *b); +void blst_p1_add_affine(blst_p1 *out, const blst_p1 *a, + const blst_p1_affine *b); +void blst_p1_add_or_double_affine(blst_p1 *out, const blst_p1 *a, + const blst_p1_affine *b); +void blst_p1_double(blst_p1 *out, const blst_p1 *a); +void blst_p1_mult(blst_p1 *out, const blst_p1 *p, const byte *scalar, + size_t nbits); +void blst_p1_cneg(blst_p1 *p, bool cbit); +void blst_p1_to_affine(blst_p1_affine *out, const blst_p1 *in); +void blst_p1_from_affine(blst_p1 *out, const blst_p1_affine *in); +bool blst_p1_on_curve(const blst_p1 *p); +bool blst_p1_in_g1(const blst_p1 *p); +bool blst_p1_is_equal(const blst_p1 *a, const blst_p1 *b); +bool blst_p1_is_inf(const blst_p1 *a); +const blst_p1 *blst_p1_generator(void); + +bool blst_p1_affine_on_curve(const blst_p1_affine *p); +bool blst_p1_affine_in_g1(const blst_p1_affine *p); +bool blst_p1_affine_is_equal(const blst_p1_affine *a, const blst_p1_affine *b); +bool blst_p1_affine_is_inf(const blst_p1_affine *a); +const blst_p1_affine *blst_p1_affine_generator(void); + +typedef struct { blst_fp2 x, y, z; } blst_p2; +typedef struct { blst_fp2 x, y; } blst_p2_affine; + +void blst_p2_add(blst_p2 *out, const blst_p2 *a, const blst_p2 *b); +void blst_p2_add_or_double(blst_p2 *out, const blst_p2 *a, const blst_p2 *b); +void blst_p2_add_affine(blst_p2 *out, const blst_p2 *a, + const blst_p2_affine *b); +void blst_p2_add_or_double_affine(blst_p2 *out, const blst_p2 *a, + const blst_p2_affine *b); +void blst_p2_double(blst_p2 *out, const blst_p2 *a); +void blst_p2_mult(blst_p2 *out, const blst_p2 *p, const byte *scalar, + size_t nbits); +void blst_p2_cneg(blst_p2 *p, bool cbit); +void blst_p2_to_affine(blst_p2_affine *out, const blst_p2 *in); +void blst_p2_from_affine(blst_p2 *out, const blst_p2_affine *in); +bool blst_p2_on_curve(const blst_p2 *p); +bool blst_p2_in_g2(const blst_p2 *p); +bool blst_p2_is_equal(const blst_p2 *a, const blst_p2 *b); +bool blst_p2_is_inf(const blst_p2 *a); +const blst_p2 *blst_p2_generator(void); + +bool blst_p2_affine_on_curve(const blst_p2_affine *p); +bool blst_p2_affine_in_g2(const blst_p2_affine *p); +bool blst_p2_affine_is_equal(const blst_p2_affine *a, const blst_p2_affine *b); +bool blst_p2_affine_is_inf(const blst_p2_affine *a); +const blst_p2_affine *blst_p2_affine_generator(void); + +/* + * Multi-scalar multiplications and other multi-point operations. + */ + +void blst_p1s_to_affine(blst_p1_affine dst[], const blst_p1 *const points[], + size_t npoints); +void blst_p1s_add(blst_p1 *ret, const blst_p1_affine *const points[], + size_t npoints); + +size_t blst_p1s_mult_wbits_precompute_sizeof(size_t wbits, size_t npoints); +void blst_p1s_mult_wbits_precompute(blst_p1_affine table[], size_t wbits, + const blst_p1_affine *const points[], + size_t npoints); +size_t blst_p1s_mult_wbits_scratch_sizeof(size_t npoints); +void blst_p1s_mult_wbits(blst_p1 *ret, const blst_p1_affine table[], + size_t wbits, size_t npoints, + const byte *const scalars[], size_t nbits, + limb_t *scratch); + +size_t blst_p1s_mult_pippenger_scratch_sizeof(size_t npoints); +void blst_p1s_mult_pippenger(blst_p1 *ret, const blst_p1_affine *const points[], + size_t npoints, const byte *const scalars[], + size_t nbits, limb_t *scratch); +void blst_p1s_tile_pippenger(blst_p1 *ret, const blst_p1_affine *const points[], + size_t npoints, const byte *const scalars[], + size_t nbits, limb_t *scratch, + size_t bit0, size_t window); + +void blst_p2s_to_affine(blst_p2_affine dst[], const blst_p2 *const points[], + size_t npoints); +void blst_p2s_add(blst_p2 *ret, const blst_p2_affine *const points[], + size_t npoints); + +size_t blst_p2s_mult_wbits_precompute_sizeof(size_t wbits, size_t npoints); +void blst_p2s_mult_wbits_precompute(blst_p2_affine table[], size_t wbits, + const blst_p2_affine *const points[], + size_t npoints); +size_t blst_p2s_mult_wbits_scratch_sizeof(size_t npoints); +void blst_p2s_mult_wbits(blst_p2 *ret, const blst_p2_affine table[], + size_t wbits, size_t npoints, + const byte *const scalars[], size_t nbits, + limb_t *scratch); + +size_t blst_p2s_mult_pippenger_scratch_sizeof(size_t npoints); +void blst_p2s_mult_pippenger(blst_p2 *ret, const blst_p2_affine *const points[], + size_t npoints, const byte *const scalars[], + size_t nbits, limb_t *scratch); +void blst_p2s_tile_pippenger(blst_p2 *ret, const blst_p2_affine *const points[], + size_t npoints, const byte *const scalars[], + size_t nbits, limb_t *scratch, + size_t bit0, size_t window); + +/* + * Hash-to-curve operations. + */ +#ifndef SWIG +void blst_map_to_g1(blst_p1 *out, const blst_fp *u, const blst_fp *v DEFNULL); +void blst_map_to_g2(blst_p2 *out, const blst_fp2 *u, const blst_fp2 *v DEFNULL); +#endif + +void blst_encode_to_g1(blst_p1 *out, + const byte *msg, size_t msg_len, + const byte *DST DEFNULL, size_t DST_len DEFNULL, + const byte *aug DEFNULL, size_t aug_len DEFNULL); +void blst_hash_to_g1(blst_p1 *out, + const byte *msg, size_t msg_len, + const byte *DST DEFNULL, size_t DST_len DEFNULL, + const byte *aug DEFNULL, size_t aug_len DEFNULL); + +void blst_encode_to_g2(blst_p2 *out, + const byte *msg, size_t msg_len, + const byte *DST DEFNULL, size_t DST_len DEFNULL, + const byte *aug DEFNULL, size_t aug_len DEFNULL); +void blst_hash_to_g2(blst_p2 *out, + const byte *msg, size_t msg_len, + const byte *DST DEFNULL, size_t DST_len DEFNULL, + const byte *aug DEFNULL, size_t aug_len DEFNULL); + +/* + * Zcash-compatible serialization/deserialization. + */ +void blst_p1_serialize(byte out[96], const blst_p1 *in); +void blst_p1_compress(byte out[48], const blst_p1 *in); +void blst_p1_affine_serialize(byte out[96], const blst_p1_affine *in); +void blst_p1_affine_compress(byte out[48], const blst_p1_affine *in); +BLST_ERROR blst_p1_uncompress(blst_p1_affine *out, const byte in[48]); +BLST_ERROR blst_p1_deserialize(blst_p1_affine *out, const byte in[96]); + +void blst_p2_serialize(byte out[192], const blst_p2 *in); +void blst_p2_compress(byte out[96], const blst_p2 *in); +void blst_p2_affine_serialize(byte out[192], const blst_p2_affine *in); +void blst_p2_affine_compress(byte out[96], const blst_p2_affine *in); +BLST_ERROR blst_p2_uncompress(blst_p2_affine *out, const byte in[96]); +BLST_ERROR blst_p2_deserialize(blst_p2_affine *out, const byte in[192]); + +/* + * Specification defines two variants, 'minimal-signature-size' and + * 'minimal-pubkey-size'. To unify appearance we choose to distinguish + * them by suffix referring to the public key type, more specifically + * _pk_in_g1 corresponds to 'minimal-pubkey-size' and _pk_in_g2 - to + * 'minimal-signature-size'. It might appear a bit counterintuitive + * in sign call, but no matter how you twist it, something is bound to + * turn a little odd. + */ +/* + * Secret-key operations. + */ +void blst_keygen(blst_scalar *out_SK, const byte *IKM, size_t IKM_len, + const byte *info DEFNULL, size_t info_len DEFNULL); +void blst_sk_to_pk_in_g1(blst_p1 *out_pk, const blst_scalar *SK); +void blst_sign_pk_in_g1(blst_p2 *out_sig, const blst_p2 *hash, + const blst_scalar *SK); +void blst_sk_to_pk_in_g2(blst_p2 *out_pk, const blst_scalar *SK); +void blst_sign_pk_in_g2(blst_p1 *out_sig, const blst_p1 *hash, + const blst_scalar *SK); + +/* + * Pairing interface. + */ +#ifndef SWIG +void blst_miller_loop(blst_fp12 *ret, const blst_p2_affine *Q, + const blst_p1_affine *P); +void blst_miller_loop_n(blst_fp12 *ret, const blst_p2_affine *const Qs[], + const blst_p1_affine *const Ps[], + size_t n); +void blst_final_exp(blst_fp12 *ret, const blst_fp12 *f); +void blst_precompute_lines(blst_fp6 Qlines[68], const blst_p2_affine *Q); +void blst_miller_loop_lines(blst_fp12 *ret, const blst_fp6 Qlines[68], + const blst_p1_affine *P); +bool blst_fp12_finalverify(const blst_fp12 *gt1, const blst_fp12 *gt2); +#endif + +#ifdef __BLST_CGO__ +typedef limb_t blst_pairing; +#elif defined(__BLST_RUST_BINDGEN__) +typedef struct {} blst_pairing; +#else +typedef struct blst_opaque blst_pairing; +#endif + +size_t blst_pairing_sizeof(void); +void blst_pairing_init(blst_pairing *new_ctx, bool hash_or_encode, + const byte *DST DEFNULL, size_t DST_len DEFNULL); +const byte *blst_pairing_get_dst(const blst_pairing *ctx); +void blst_pairing_commit(blst_pairing *ctx); +BLST_ERROR blst_pairing_aggregate_pk_in_g2(blst_pairing *ctx, + const blst_p2_affine *PK, + const blst_p1_affine *signature, + const byte *msg, size_t msg_len, + const byte *aug DEFNULL, + size_t aug_len DEFNULL); +BLST_ERROR blst_pairing_chk_n_aggr_pk_in_g2(blst_pairing *ctx, + const blst_p2_affine *PK, + bool pk_grpchk, + const blst_p1_affine *signature, + bool sig_grpchk, + const byte *msg, size_t msg_len, + const byte *aug DEFNULL, + size_t aug_len DEFNULL); +BLST_ERROR blst_pairing_mul_n_aggregate_pk_in_g2(blst_pairing *ctx, + const blst_p2_affine *PK, + const blst_p1_affine *sig, + const byte *scalar, + size_t nbits, + const byte *msg, + size_t msg_len, + const byte *aug DEFNULL, + size_t aug_len DEFNULL); +BLST_ERROR blst_pairing_chk_n_mul_n_aggr_pk_in_g2(blst_pairing *ctx, + const blst_p2_affine *PK, + bool pk_grpchk, + const blst_p1_affine *sig, + bool sig_grpchk, + const byte *scalar, + size_t nbits, + const byte *msg, + size_t msg_len, + const byte *aug DEFNULL, + size_t aug_len DEFNULL); +BLST_ERROR blst_pairing_aggregate_pk_in_g1(blst_pairing *ctx, + const blst_p1_affine *PK, + const blst_p2_affine *signature, + const byte *msg, size_t msg_len, + const byte *aug DEFNULL, + size_t aug_len DEFNULL); +BLST_ERROR blst_pairing_chk_n_aggr_pk_in_g1(blst_pairing *ctx, + const blst_p1_affine *PK, + bool pk_grpchk, + const blst_p2_affine *signature, + bool sig_grpchk, + const byte *msg, size_t msg_len, + const byte *aug DEFNULL, + size_t aug_len DEFNULL); +BLST_ERROR blst_pairing_mul_n_aggregate_pk_in_g1(blst_pairing *ctx, + const blst_p1_affine *PK, + const blst_p2_affine *sig, + const byte *scalar, + size_t nbits, + const byte *msg, + size_t msg_len, + const byte *aug DEFNULL, + size_t aug_len DEFNULL); +BLST_ERROR blst_pairing_chk_n_mul_n_aggr_pk_in_g1(blst_pairing *ctx, + const blst_p1_affine *PK, + bool pk_grpchk, + const blst_p2_affine *sig, + bool sig_grpchk, + const byte *scalar, + size_t nbits, + const byte *msg, + size_t msg_len, + const byte *aug DEFNULL, + size_t aug_len DEFNULL); +BLST_ERROR blst_pairing_merge(blst_pairing *ctx, const blst_pairing *ctx1); +bool blst_pairing_finalverify(const blst_pairing *ctx, + const blst_fp12 *gtsig DEFNULL); + + +/* + * Customarily applications aggregate signatures separately. + * In which case application would have to pass NULLs for |signature| + * to blst_pairing_aggregate calls and pass aggregated signature + * collected with these calls to blst_pairing_finalverify. Inputs are + * Zcash-compatible "straight-from-wire" byte vectors, compressed or + * not. + */ +BLST_ERROR blst_aggregate_in_g1(blst_p1 *out, const blst_p1 *in, + const byte *zwire); +BLST_ERROR blst_aggregate_in_g2(blst_p2 *out, const blst_p2 *in, + const byte *zwire); + +void blst_aggregated_in_g1(blst_fp12 *out, const blst_p1_affine *signature); +void blst_aggregated_in_g2(blst_fp12 *out, const blst_p2_affine *signature); + +/* + * "One-shot" CoreVerify entry points. + */ +BLST_ERROR blst_core_verify_pk_in_g1(const blst_p1_affine *pk, + const blst_p2_affine *signature, + bool hash_or_encode, + const byte *msg, size_t msg_len, + const byte *DST DEFNULL, + size_t DST_len DEFNULL, + const byte *aug DEFNULL, + size_t aug_len DEFNULL); +BLST_ERROR blst_core_verify_pk_in_g2(const blst_p2_affine *pk, + const blst_p1_affine *signature, + bool hash_or_encode, + const byte *msg, size_t msg_len, + const byte *DST DEFNULL, + size_t DST_len DEFNULL, + const byte *aug DEFNULL, + size_t aug_len DEFNULL); + +extern const blst_p1_affine BLS12_381_G1; +extern const blst_p1_affine BLS12_381_NEG_G1; +extern const blst_p2_affine BLS12_381_G2; +extern const blst_p2_affine BLS12_381_NEG_G2; + +#include "blst_aux.h" + +#ifdef __cplusplus +} +#endif +#endif diff --git a/crypto/internal/blst/blst_aux.h b/crypto/internal/blst/blst_aux.h new file mode 100644 index 00000000000..3de0850e330 --- /dev/null +++ b/crypto/internal/blst/blst_aux.h @@ -0,0 +1,117 @@ +/* + * Copyright Supranational LLC + * Licensed under the Apache License, Version 2.0, see LICENSE for details. + * SPDX-License-Identifier: Apache-2.0 + */ +#ifndef __BLST_AUX_H__ +#define __BLST_AUX_H__ +/* + * This file lists interfaces that might be promoted to blst.h or removed, + * depending on their proven/unproven worthiness. + */ + +void blst_fr_ct_bfly(blst_fr *x0, blst_fr *x1, const blst_fr *twiddle); +void blst_fr_gs_bfly(blst_fr *x0, blst_fr *x1, const blst_fr *twiddle); +void blst_fr_to(blst_fr *ret, const blst_fr *a); +void blst_fr_from(blst_fr *ret, const blst_fr *a); +#ifdef BLST_FR_PENTAROOT +void blst_fr_pentaroot(blst_fr *ret, const blst_fr *a); +void blst_fr_pentapow(blst_fr *ret, const blst_fr *a); +#endif + +void blst_fp_to(blst_fp *ret, const blst_fp *a); +void blst_fp_from(blst_fp *ret, const blst_fp *a); + +bool blst_fp_is_square(const blst_fp *a); +bool blst_fp2_is_square(const blst_fp2 *a); + +void blst_p1_from_jacobian(blst_p1 *out, const blst_p1 *in); +void blst_p2_from_jacobian(blst_p2 *out, const blst_p2 *in); + +/* + * Below functions produce both point and deserialized outcome of + * SkToPk and Sign. However, deserialized outputs are pre-decorated + * with sign and infinity bits. This means that you have to bring the + * output into compliance prior returning to application. If you want + * compressed point value, then do [equivalent of] + * + * byte temp[96]; + * blst_sk_to_pk2_in_g1(temp, out_pk, SK); + * temp[0] |= 0x80; + * memcpy(out, temp, 48); + * + * Otherwise do + * + * blst_sk_to_pk2_in_g1(out, out_pk, SK); + * out[0] &= ~0x20; + * + * Either |out| or |out_| can be NULL. + */ +void blst_sk_to_pk2_in_g1(byte out[96], blst_p1_affine *out_pk, + const blst_scalar *SK); +void blst_sign_pk2_in_g1(byte out[192], blst_p2_affine *out_sig, + const blst_p2 *hash, const blst_scalar *SK); +void blst_sk_to_pk2_in_g2(byte out[192], blst_p2_affine *out_pk, + const blst_scalar *SK); +void blst_sign_pk2_in_g2(byte out[96], blst_p1_affine *out_sig, + const blst_p1 *hash, const blst_scalar *SK); + +#ifdef __BLST_RUST_BINDGEN__ +typedef struct {} blst_uniq; +#else +typedef struct blst_opaque blst_uniq; +#endif + +size_t blst_uniq_sizeof(size_t n_nodes); +void blst_uniq_init(blst_uniq *tree); +bool blst_uniq_test(blst_uniq *tree, const byte *msg, size_t len); + +#ifdef expand_message_xmd +void expand_message_xmd(unsigned char *bytes, size_t len_in_bytes, + const unsigned char *aug, size_t aug_len, + const unsigned char *msg, size_t msg_len, + const unsigned char *DST, size_t DST_len); +#else +void blst_expand_message_xmd(byte *out, size_t out_len, + const byte *msg, size_t msg_len, + const byte *DST, size_t DST_len); +#endif + +void blst_p1_unchecked_mult(blst_p1 *out, const blst_p1 *p, const byte *scalar, + size_t nbits); +void blst_p2_unchecked_mult(blst_p2 *out, const blst_p2 *p, const byte *scalar, + size_t nbits); + +void blst_pairing_raw_aggregate(blst_pairing *ctx, const blst_p2_affine *q, + const blst_p1_affine *p); +blst_fp12 *blst_pairing_as_fp12(blst_pairing *ctx); +void blst_bendian_from_fp12(byte out[48*12], const blst_fp12 *a); + +void blst_keygen_v3(blst_scalar *out_SK, const byte *IKM, size_t IKM_len, + const byte *info DEFNULL, size_t info_len DEFNULL); +void blst_keygen_v4_5(blst_scalar *out_SK, const byte *IKM, size_t IKM_len, + const byte *salt, size_t salt_len, + const byte *info DEFNULL, size_t info_len DEFNULL); +void blst_keygen_v5(blst_scalar *out_SK, const byte *IKM, size_t IKM_len, + const byte *salt, size_t salt_len, + const byte *info DEFNULL, size_t info_len DEFNULL); +void blst_derive_master_eip2333(blst_scalar *out_SK, + const byte *IKM, size_t IKM_len); +void blst_derive_child_eip2333(blst_scalar *out_SK, const blst_scalar *SK, + uint32_t child_index); + +void blst_scalar_from_hexascii(blst_scalar *out, const byte *hex); +void blst_fr_from_hexascii(blst_fr *ret, const byte *hex); +void blst_fp_from_hexascii(blst_fp *ret, const byte *hex); + +size_t blst_p1_sizeof(void); +size_t blst_p1_affine_sizeof(void); +size_t blst_p2_sizeof(void); +size_t blst_p2_affine_sizeof(void); +size_t blst_fp12_sizeof(void); + +/* + * Single-shot SHA-256 hash function. + */ +void blst_sha256(byte out[32], const byte *msg, size_t msg_len); +#endif diff --git a/crypto/relic_build.sh b/crypto/relic_build.sh deleted file mode 100755 index 6cff3a6b478..00000000000 --- a/crypto/relic_build.sh +++ /dev/null @@ -1,90 +0,0 @@ -#!/bin/bash - -set -euo pipefail - -DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" - -pushd "$DIR" - -# Ensure the directory is writeable -chmod -R +w "$(pwd)" - -mkdir -p "$DIR/relic/build" -pushd "$DIR/relic/build" - - -# make cmake print its CC interpretation -CMAKE_FILE="${DIR}/relic/CMakeLists.txt" -# parameter expansion is not suitable here -# shellcheck disable=SC2089 -CMAKE_PRINT_CC="message ( STATUS \"CC=\$ENV{CC}\" )" -# Make the cmake run print its interpretation of CC -echo "$CMAKE_PRINT_CC" >> "${CMAKE_FILE}" - -# Probe cmake's MakeFile generation and extract the CC version -CMAKE_TEMP=$(mktemp) -cmake .. > "$CMAKE_TEMP" -CC_VAL="$(tail -n 5 "$CMAKE_TEMP" | grep -oE -m 1 'CC=.*$')" -CC_VAL="${CC_VAL:3}" - -# de-mangle the CMakeLists file, using a temporary file for BSD compatibility -sed '$d' ../CMakeLists.txt > "$CMAKE_TEMP" -mv "$CMAKE_TEMP" ../CMakeLists.txt - -# default to which -CC_VAL=${CC_VAL:-"$(which cc)"} -CC_VERSION_STR="$($CC_VAL --version)" - -# we use uname to record which arch we are running on -ARCH=$(uname -m 2>/dev/null || true) - -if [[ "$ARCH" =~ "x86_64" ]]; then - # Compile as westmere arch to avoid cross-compilation issues on machines not supporting AVX extensions. - # Relic performance as used in flow crypto library is not impacted by whether it is compiled with "native" or "westmere", as proven by benchmark results. - MARCH="-march=westmere" -elif [[ "$ARCH" =~ ^(arm64|armv7|armv7s)$ && "${CC_VERSION_STR[0]}" =~ (clang) ]]; then - # the "-march=native" option is not supported with clang on ARM - MARCH="" -else - MARCH="-march=native" -fi - -# Set RELIC config for Flow -COMP=(-DCFLAGS="-O3 -funroll-loops -fomit-frame-pointer ${MARCH} -mtune=native") -GENERAL=(-DTIMER=CYCLE -DCHECK=OFF -DVERBS=OFF) -LIBS=(-DSHLIB=OFF -DSTLIB=ON) -RAND=(-DRAND=HASHD -DSEED=) - -# -BN_REP=(-DALLOC=AUTO -DALIGN=1 -DWSIZE=64 -DBN_PRECI=1024 -DBN_MAGNI=DOUBLE) -ARITH=(-DARITH=EASY) -PRIME=(-DFP_PRIME=381) - -# -BN_METH=(-DBN_KARAT=0 -DBN_METHD="COMBA;COMBA;MONTY;SLIDE;BINAR;BASIC") -FP_METH=(-DFP_KARAT=0 -DFP_METHD="INTEG;INTEG;INTEG;MONTY;MONTY;JMPDS;SLIDE") -PRIMES=(-DFP_PMERS=OFF -DFP_QNRES=ON) -FPX_METH=(-DFPX_METHD="INTEG;INTEG;LAZYR") -EP_METH=(-DEP_MIXED=ON -DEP_PLAIN=OFF -DEP_ENDOM=ON -DEP_SUPER=OFF\ - -DEP_CTMAP=ON -DEP_METHD="JACOB;LWNAF;COMBS;INTER") -PP_METH=(-DPP_METHD="LAZYR;OATEP") - -# run cmake -cmake "${COMP[@]}" "${GENERAL[@]}" \ - "${LIBS[@]}" "${RAND[@]}" \ - "${BN_REP[@]}" "${ARITH[@]}" \ - "${PRIME[@]}" "${PRIMES[@]}" \ - "${EP_METH[@]}" \ - "${BN_METH[@]}" \ - "${FP_METH[@]}" \ - "${FPX_METH[@]}" \ - "${PP_METH[@]}" .. - - -# Compile the static library -make clean -make relic_s -j8 -rm -f CMakeCache.txt - -popd -popd diff --git a/crypto/sign.go b/crypto/sign.go index 68196acba2d..d400898d97d 100644 --- a/crypto/sign.go +++ b/crypto/sign.go @@ -49,44 +49,36 @@ type signer interface { decodePublicKeyCompressed([]byte) (PublicKey, error) } -// newNonRelicSigner returns a signer that does not depend on Relic library. -func newNonRelicSigner(algo SigningAlgorithm) (signer, error) { +// newSigner returns a signer instance +func newSigner(algo SigningAlgorithm) (signer, error) { switch algo { case ECDSAP256: return p256Instance, nil case ECDSASecp256k1: return secp256k1Instance, nil + case BLSBLS12381: + return blsInstance, nil default: return nil, invalidInputsErrorf("the signature scheme %s is not supported", algo) } } -// Initialize the context of all algos not requiring Relic -func initNonRelic() { - // P-256 +// Initialize the context of all algos +func init() { + // ECDSA p256Instance = &(ecdsaAlgo{ curve: elliptic.P256(), algo: ECDSAP256, }) - - // secp256k1 secp256k1Instance = &(ecdsaAlgo{ curve: btcec.S256(), algo: ECDSASecp256k1, }) -} -// Signature format Check for non-relic algos (ECDSA) -func signatureFormatCheckNonRelic(algo SigningAlgorithm, s Signature) (bool, error) { - switch algo { - case ECDSAP256: - return p256Instance.signatureFormatCheck(s), nil - case ECDSASecp256k1: - return secp256k1Instance.signatureFormatCheck(s), nil - default: - return false, invalidInputsErrorf( - "the signature scheme %s is not supported", - algo) + // BLS + initBLS12381() + blsInstance = &blsBLS12381Algo{ + algo: BLSBLS12381, } } @@ -98,8 +90,16 @@ func signatureFormatCheckNonRelic(algo SigningAlgorithm, s Signature) (bool, err // If SignatureFormatCheck returns false then the input is not a valid // signature and will fail a verification against any message and public key. func SignatureFormatCheck(algo SigningAlgorithm, s Signature) (bool, error) { - // For now, signatureFormatCheckNonRelic is only defined for non-Relic algos. - return signatureFormatCheckNonRelic(algo, s) + switch algo { + case ECDSAP256: + return p256Instance.signatureFormatCheck(s), nil + case ECDSASecp256k1: + return secp256k1Instance.signatureFormatCheck(s), nil + default: + return false, invalidInputsErrorf( + "the signature scheme %s is not supported", + algo) + } } // GeneratePrivateKey generates a private key of the algorithm using the entropy of the given seed. diff --git a/crypto/sign_norelic.go b/crypto/sign_norelic.go deleted file mode 100644 index 7e6dd4c0d10..00000000000 --- a/crypto/sign_norelic.go +++ /dev/null @@ -1,13 +0,0 @@ -//go:build !relic -// +build !relic - -package crypto - -// newSigner chooses and initializes a signature scheme -func newSigner(algo SigningAlgorithm) (signer, error) { - return newNonRelicSigner(algo) -} - -func init() { - initNonRelic() -} diff --git a/crypto/sign_relic.go b/crypto/sign_relic.go deleted file mode 100644 index 980fca20c51..00000000000 --- a/crypto/sign_relic.go +++ /dev/null @@ -1,42 +0,0 @@ -//go:build relic -// +build relic - -package crypto - -import ( - "fmt" -) - -// newSigner chooses and initializes a signature scheme -func newSigner(algo SigningAlgorithm) (signer, error) { - // try Relic algos - if signer := relicSigner(algo); signer != nil { - return signer, nil - } - // return a non-Relic algo - return newNonRelicSigner(algo) -} - -// relicSigner returns a signer that depends on Relic library. -func relicSigner(algo SigningAlgorithm) signer { - if algo == BLSBLS12381 { - return blsInstance - } - return nil -} - -// Initialize Relic with the BLS context on BLS 12-381 -func init() { - initRelic() - initNonRelic() -} - -// Initialize the context of all algos requiring Relic -func initRelic() { - blsInstance = &blsBLS12381Algo{ - algo: BLSBLS12381, - } - if err := blsInstance.init(); err != nil { - panic(fmt.Sprintf("initialization of BLS failed: %s", err.Error())) - } -} diff --git a/crypto/sign_test_utils.go b/crypto/sign_test_utils.go index a98f7d0713b..9ecc684a4be 100644 --- a/crypto/sign_test_utils.go +++ b/crypto/sign_test_utils.go @@ -55,60 +55,55 @@ func TestHasherErrors(t *testing.T) { // tests sign and verify are consistent for multiple generated keys and messages func testGenSignVerify(t *testing.T, salg SigningAlgorithm, halg hash.Hasher) { - t.Logf("Testing Generation/Signature/Verification for %s", salg) - // make sure the length is larger than minimum lengths of all the signaure algos - seedMinLength := 48 - seed := make([]byte, seedMinLength) - input := make([]byte, 100) - rand := getPRG(t) - - loops := 50 - for j := 0; j < loops; j++ { - n, err := rand.Read(seed) - require.Equal(t, n, seedMinLength) - require.NoError(t, err) - sk, err := GeneratePrivateKey(salg, seed) - require.NoError(t, err) - _, err = rand.Read(input) - require.NoError(t, err) - s, err := sk.Sign(input, halg) - require.NoError(t, err) - pk := sk.PublicKey() + t.Run(fmt.Sprintf("Generation/Signature/Verification for %s", salg), func(t *testing.T) { + seed := make([]byte, KeyGenSeedMinLen) + input := make([]byte, 100) + rand := getPRG(t) - // test a valid signature - result, err := pk.Verify(s, input, halg) - require.NoError(t, err) - assert.True(t, result, fmt.Sprintf( - "Verification should succeed:\n signature:%s\n message:%x\n private key:%s", s, input, sk)) + loops := 50 + for j := 0; j < loops; j++ { + n, err := rand.Read(seed) + require.Equal(t, n, KeyGenSeedMinLen) + require.NoError(t, err) + sk, err := GeneratePrivateKey(salg, seed) + require.NoError(t, err) + _, err = rand.Read(input) + require.NoError(t, err) + s, err := sk.Sign(input, halg) + require.NoError(t, err) + pk := sk.PublicKey() - // test with a different message - input[0] ^= 1 - result, err = pk.Verify(s, input, halg) - require.NoError(t, err) - assert.False(t, result, fmt.Sprintf( - "Verification should fail:\n signature:%s\n message:%x\n private key:%s", s, input, sk)) - input[0] ^= 1 + // test a valid signature + result, err := pk.Verify(s, input, halg) + require.NoError(t, err) + assert.True(t, result) - // test with a valid but different key - seed[0] ^= 1 - wrongSk, err := GeneratePrivateKey(salg, seed) - require.NoError(t, err) - result, err = wrongSk.PublicKey().Verify(s, input, halg) - require.NoError(t, err) - assert.False(t, result, fmt.Sprintf( - "Verification should fail:\n signature:%s\n message:%x\n private key:%s", s, input, sk)) + // test with a different message + input[0] ^= 1 + result, err = pk.Verify(s, input, halg) + require.NoError(t, err) + assert.False(t, result) + input[0] ^= 1 - // test a wrong signature length - invalidLen := rand.Intn(2 * len(s)) // try random invalid lengths - if invalidLen == len(s) { // map to an invalid length - invalidLen = 0 + // test with a valid but different key + seed[0] ^= 1 + wrongSk, err := GeneratePrivateKey(salg, seed) + require.NoError(t, err) + result, err = wrongSk.PublicKey().Verify(s, input, halg) + require.NoError(t, err) + assert.False(t, result) + + // test a wrong signature length + invalidLen := rand.Intn(2 * len(s)) // try random invalid lengths + if invalidLen == len(s) { // map to an invalid length + invalidLen = 0 + } + invalidSig := make([]byte, invalidLen) + result, err = pk.Verify(invalidSig, input, halg) + require.NoError(t, err) + assert.False(t, result) } - invalidSig := make([]byte, invalidLen) - result, err = pk.Verify(invalidSig, input, halg) - require.NoError(t, err) - assert.False(t, result, fmt.Sprintf( - "Verification should fail:\n signature:%s\n with invalid length %d", invalidSig, invalidLen)) - } + }) } // tests the key generation constraints with regards to the input seed, mainly @@ -138,7 +133,6 @@ func testKeyGenSeed(t *testing.T, salg SigningAlgorithm, minLen int, maxLen int) }) t.Run("deterministic generation", func(t *testing.T) { - // same seed results in the same key seed := make([]byte, minLen) read, err := crand.Read(seed) @@ -162,161 +156,164 @@ var BLS12381Order = []byte{0x73, 0xED, 0xA7, 0x53, 0x29, 0x9D, 0x7D, 0x48, 0x33, 0x5B, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x01} func testEncodeDecode(t *testing.T, salg SigningAlgorithm) { - t.Logf("Testing encode/decode for %s", salg) - rand := getPRG(t) - // make sure the length is larger than minimum lengths of all the signaure algos - seedMinLength := 48 + t.Run(fmt.Sprintf("generic encode/decode for %s", salg), func(t *testing.T) { + rand := getPRG(t) + + t.Run("happy path tests", func(t *testing.T) { + loops := 50 + for j := 0; j < loops; j++ { + // generate a private key + seed := make([]byte, KeyGenSeedMinLen) + read, err := rand.Read(seed) + require.Equal(t, read, KeyGenSeedMinLen) + require.NoError(t, err) + sk, err := GeneratePrivateKey(salg, seed) + assert.Nil(t, err) + seed[0] ^= 1 // alter the seed to get a new private key + distinctSk, err := GeneratePrivateKey(salg, seed) + require.NoError(t, err) + + // check private key encoding + skBytes := sk.Encode() + skCheck, err := DecodePrivateKey(salg, skBytes) + require.Nil(t, err) + assert.True(t, sk.Equals(skCheck)) + skCheckBytes := skCheck.Encode() + assert.Equal(t, skBytes, skCheckBytes) + distinctSkBytes := distinctSk.Encode() + assert.NotEqual(t, skBytes, distinctSkBytes) + + // check public key encoding + pk := sk.PublicKey() + pkBytes := pk.Encode() + pkCheck, err := DecodePublicKey(salg, pkBytes) + require.Nil(t, err) + assert.True(t, pk.Equals(pkCheck)) + pkCheckBytes := pkCheck.Encode() + assert.Equal(t, pkBytes, pkCheckBytes) + distinctPkBytes := distinctSk.PublicKey().Encode() + assert.NotEqual(t, pkBytes, distinctPkBytes) + + // same for the compressed encoding + // skip is BLS is used and compression isn't supported + if !(salg == BLSBLS12381 && !isG2Compressed()) { + pkComprBytes := pk.EncodeCompressed() + pkComprCheck, err := DecodePublicKeyCompressed(salg, pkComprBytes) + require.Nil(t, err) + assert.True(t, pk.Equals(pkComprCheck)) + pkCheckComprBytes := pkComprCheck.EncodeCompressed() + assert.Equal(t, pkComprBytes, pkCheckComprBytes) + distinctPkComprBytes := distinctSk.PublicKey().EncodeCompressed() + assert.NotEqual(t, pkComprBytes, distinctPkComprBytes) + } + } + }) + + // test invalid private keys (equal to the curve group order) + + t.Run("private keys equal to the group order", func(t *testing.T) { + groupOrder := make(map[SigningAlgorithm][]byte) + groupOrder[ECDSAP256] = []byte{255, 255, 255, 255, 0, 0, 0, 0, 255, 255, 255, + 255, 255, 255, 255, 255, 188, 230, 250, 173, 167, + 23, 158, 132, 243, 185, 202, 194, 252, 99, 37, 81} + + groupOrder[ECDSASecp256k1] = []byte{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 254, 186, 174, 220, 230, + 175, 72, 160, 59, 191, 210, 94, 140, 208, 54, 65, 65} + + groupOrder[BLSBLS12381] = BLS12381Order + + sk, err := DecodePrivateKey(salg, groupOrder[salg]) + require.Error(t, err) + assert.True(t, IsInvalidInputsError(err)) + assert.Nil(t, sk) + }) + + // test invalid private and public keys (invalid length) + t.Run("invalid key length", func(t *testing.T) { + // private key + skLens := make(map[SigningAlgorithm]int) + skLens[ECDSAP256] = PrKeyLenECDSAP256 + skLens[ECDSASecp256k1] = PrKeyLenECDSASecp256k1 + skLens[BLSBLS12381] = 32 + + bytes := make([]byte, skLens[salg]+1) + sk, err := DecodePrivateKey(salg, bytes) + require.Error(t, err) + assert.True(t, IsInvalidInputsError(err)) + assert.Nil(t, sk) - t.Run("happy path tests", func(t *testing.T) { - loops := 50 - for j := 0; j < loops; j++ { - // generate a private key - seed := make([]byte, seedMinLength) - read, err := rand.Read(seed) - require.Equal(t, read, seedMinLength) - require.NoError(t, err) - sk, err := GeneratePrivateKey(salg, seed) - assert.Nil(t, err, "the key generation failed") - seed[0] ^= 1 // alter the seed to get a new private key - distinctSk, err := GeneratePrivateKey(salg, seed) - require.NoError(t, err) + // public key + pkLens := make(map[SigningAlgorithm]int) + pkLens[ECDSAP256] = PubKeyLenECDSAP256 + pkLens[ECDSASecp256k1] = PubKeyLenECDSASecp256k1 + pkLens[BLSBLS12381] = 96 - // check private key encoding - skBytes := sk.Encode() - skCheck, err := DecodePrivateKey(salg, skBytes) - require.Nil(t, err, "the key decoding failed") - assert.True(t, sk.Equals(skCheck), "key equality check failed") - skCheckBytes := skCheck.Encode() - assert.Equal(t, skBytes, skCheckBytes, "keys should be equal") - distinctSkBytes := distinctSk.Encode() - assert.NotEqual(t, skBytes, distinctSkBytes, "keys should be different") - - // check public key encoding - pk := sk.PublicKey() - pkBytes := pk.Encode() - pkCheck, err := DecodePublicKey(salg, pkBytes) - require.Nil(t, err, "the key decoding failed") - assert.True(t, pk.Equals(pkCheck), "key equality check failed") - pkCheckBytes := pkCheck.Encode() - assert.Equal(t, pkBytes, pkCheckBytes, "keys should be equal") - distinctPkBytes := distinctSk.PublicKey().Encode() - assert.NotEqual(t, pkBytes, distinctPkBytes, "keys should be different") - - // same for the compressed encoding - pkComprBytes := pk.EncodeCompressed() - pkComprCheck, err := DecodePublicKeyCompressed(salg, pkComprBytes) - require.Nil(t, err, "the key decoding failed") - assert.True(t, pk.Equals(pkComprCheck), "key equality check failed") - pkCheckComprBytes := pkComprCheck.EncodeCompressed() - assert.Equal(t, pkComprBytes, pkCheckComprBytes, "keys should be equal") - distinctPkComprBytes := distinctSk.PublicKey().EncodeCompressed() - assert.NotEqual(t, pkComprBytes, distinctPkComprBytes, "keys should be different") - } + bytes = make([]byte, pkLens[salg]+1) + pk, err := DecodePublicKey(salg, bytes) + require.Error(t, err) + assert.True(t, IsInvalidInputsError(err)) + assert.Nil(t, pk) + }) }) +} - // test invalid private keys (equal to the curve group order) - t.Run("private keys equal to the group order", func(t *testing.T) { - groupOrder := make(map[SigningAlgorithm][]byte) - groupOrder[ECDSAP256] = []byte{255, 255, 255, 255, 0, 0, 0, 0, 255, 255, 255, - 255, 255, 255, 255, 255, 188, 230, 250, 173, 167, - 23, 158, 132, 243, 185, 202, 194, 252, 99, 37, 81} - - groupOrder[ECDSASecp256k1] = []byte{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 254, 186, 174, 220, 230, - 175, 72, 160, 59, 191, 210, 94, 140, 208, 54, 65, 65} +func testEquals(t *testing.T, salg SigningAlgorithm, otherSigAlgo SigningAlgorithm) { + t.Run(fmt.Sprintf("equals for %s", salg), func(t *testing.T) { + rand := getPRG(t) + // generate a key pair + seed := make([]byte, KeyGenSeedMinLen) + n, err := rand.Read(seed) + require.Equal(t, n, KeyGenSeedMinLen) + require.NoError(t, err) - groupOrder[BLSBLS12381] = BLS12381Order + // first pair + sk1, err := GeneratePrivateKey(salg, seed) + require.NoError(t, err) + pk1 := sk1.PublicKey() - sk, err := DecodePrivateKey(salg, groupOrder[salg]) - require.Error(t, err, "the key decoding should fail - private key value is too large") - assert.True(t, IsInvalidInputsError(err)) - assert.Nil(t, sk) - }) + // second pair without changing the seed + sk2, err := GeneratePrivateKey(salg, seed) + require.NoError(t, err) + pk2 := sk2.PublicKey() - // test invalid private and public keys (invalid length) - t.Run("invalid key length", func(t *testing.T) { - // private key - skLens := make(map[SigningAlgorithm]int) - skLens[ECDSAP256] = PrKeyLenECDSAP256 - skLens[ECDSASecp256k1] = PrKeyLenECDSASecp256k1 - skLens[BLSBLS12381] = 32 - - bytes := make([]byte, skLens[salg]+1) - sk, err := DecodePrivateKey(salg, bytes) - require.Error(t, err) - assert.True(t, IsInvalidInputsError(err)) - assert.Nil(t, sk) + // unrelated algo pair + sk3, err := GeneratePrivateKey(otherSigAlgo, seed) + require.NoError(t, err) + pk3 := sk3.PublicKey() - // public key - pkLens := make(map[SigningAlgorithm]int) - pkLens[ECDSAP256] = PubKeyLenECDSAP256 - pkLens[ECDSASecp256k1] = PubKeyLenECDSASecp256k1 - pkLens[BLSBLS12381] = 96 + // fourth pair with same algo but a different seed + seed[0] ^= 1 + sk4, err := GeneratePrivateKey(salg, seed) + require.NoError(t, err) + pk4 := sk4.PublicKey() - bytes = make([]byte, pkLens[salg]+1) - pk, err := DecodePublicKey(salg, bytes) - require.Error(t, err) - assert.True(t, IsInvalidInputsError(err)) - assert.Nil(t, pk) + // tests + assert.True(t, sk1.Equals(sk2)) + assert.True(t, pk1.Equals(pk2)) + assert.False(t, sk1.Equals(sk3)) + assert.False(t, pk1.Equals(pk3)) + assert.False(t, sk1.Equals(sk4)) + assert.False(t, pk1.Equals(pk4)) }) } -func testEquals(t *testing.T, salg SigningAlgorithm, otherSigAlgo SigningAlgorithm) { - t.Logf("Testing Equals for %s", salg) - rand := getPRG(t) - // make sure the length is larger than minimum lengths of all the signaure algos - seedMinLength := 48 - - // generate a key pair - seed := make([]byte, seedMinLength) - n, err := rand.Read(seed) - require.Equal(t, n, seedMinLength) - require.NoError(t, err) - - // first pair - sk1, err := GeneratePrivateKey(salg, seed) - require.NoError(t, err) - pk1 := sk1.PublicKey() - - // second pair without changing the seed - sk2, err := GeneratePrivateKey(salg, seed) - require.NoError(t, err) - pk2 := sk2.PublicKey() - - // unrelated algo pair - sk3, err := GeneratePrivateKey(otherSigAlgo, seed) - require.NoError(t, err) - pk3 := sk3.PublicKey() - - // fourth pair with same algo but a different seed - seed[0] ^= 1 - sk4, err := GeneratePrivateKey(salg, seed) - require.NoError(t, err) - pk4 := sk4.PublicKey() - - // tests - assert.True(t, sk1.Equals(sk2), "key equality should return true") - assert.True(t, pk1.Equals(pk2), "key equality should return true") - assert.False(t, sk1.Equals(sk3), "key equality should return false") - assert.False(t, pk1.Equals(pk3), "key equality should return false") - assert.False(t, sk1.Equals(sk4), "key equality should return false") - assert.False(t, pk1.Equals(pk4), "key equality should return false") -} - func testKeysAlgorithm(t *testing.T, sk PrivateKey, salg SigningAlgorithm) { - t.Logf("Testing key.Algorithm for %s", salg) - alg := sk.Algorithm() - assert.Equal(t, alg, salg) - alg = sk.PublicKey().Algorithm() - assert.Equal(t, alg, salg) + t.Run(fmt.Sprintf("key.Algorithm for %s", salg), func(t *testing.T) { + alg := sk.Algorithm() + assert.Equal(t, alg, salg) + alg = sk.PublicKey().Algorithm() + assert.Equal(t, alg, salg) + }) } func testKeySize(t *testing.T, sk PrivateKey, skLen int, pkLen int) { - t.Logf("Testing key.Size for %s", sk.Algorithm()) - size := sk.Size() - assert.Equal(t, size, skLen) - size = sk.PublicKey().Size() - assert.Equal(t, size, pkLen) + t.Run(fmt.Sprintf("key.Size for %s", sk.Algorithm()), func(t *testing.T) { + size := sk.Size() + assert.Equal(t, size, skLen) + size = sk.PublicKey().Size() + assert.Equal(t, size, pkLen) + }) } func benchVerify(b *testing.B, algo SigningAlgorithm, halg hash.Hasher) { diff --git a/crypto/spock.go b/crypto/spock.go index 2487f39ce1b..da269c23ac1 100644 --- a/crypto/spock.go +++ b/crypto/spock.go @@ -1,13 +1,8 @@ -//go:build relic -// +build relic - package crypto // SPoCK design based on the BLS signature scheme. // BLS is using BLS12-381 curve and the same settings in bls.go. -// #cgo CFLAGS: -g -Wall -std=c99 -// #cgo LDFLAGS: -L${SRCDIR}/relic/build/lib -l relic_s // #include "bls_include.h" import "C" import ( @@ -78,7 +73,7 @@ func SPOCKVerify(pk1 PublicKey, proof1 Signature, pk2 PublicKey, proof2 Signatur return false, notBLSKeyError } - if len(proof1) != signatureLengthBLSBLS12381 || len(proof2) != signatureLengthBLSBLS12381 { + if len(proof1) != g1BytesLen || len(proof2) != g1BytesLen { return false, nil } @@ -90,9 +85,9 @@ func SPOCKVerify(pk1 PublicKey, proof1 Signature, pk2 PublicKey, proof2 Signatur } // verify the spock proof using the secret data - verif := C.bls_spock_verify((*C.ep2_st)(&blsPk1.point), + verif := C.bls_spock_verify((*C.E2)(&blsPk1.point), (*C.uchar)(&proof1[0]), - (*C.ep2_st)(&blsPk2.point), + (*C.E2)(&blsPk2.point), (*C.uchar)(&proof2[0])) switch verif { diff --git a/crypto/spock_test.go b/crypto/spock_test.go index 596968234e4..59498a42f6f 100644 --- a/crypto/spock_test.go +++ b/crypto/spock_test.go @@ -1,6 +1,3 @@ -//go:build relic -// +build relic - package crypto import ( @@ -72,7 +69,7 @@ func TestSPOCKProveVerifyAgainstData(t *testing.T) { t.Run("identity proof", func(t *testing.T) { // verifying with a pair of (proof, publicKey) equal to (identity_signature, identity_key) should // return false - identityProof := identityBLSSignature + identityProof := g1Serialization result, err := SPOCKVerifyAgainstData(IdentityBLSPublicKey(), identityProof, data, kmac) assert.NoError(t, err) assert.False(t, result) @@ -169,7 +166,7 @@ func TestSPOCKProveVerify(t *testing.T) { t.Run("identity proof", func(t *testing.T) { // verifying with either pair of (proof, publicKey) equal to (identity_signature, identity_key) should // return falsen with any other (proof, key) pair. - identityProof := identityBLSSignature + identityProof := g1Serialization result, err := SPOCKVerify(IdentityBLSPublicKey(), identityProof, sk2.PublicKey(), pr2) assert.NoError(t, err) assert.False(t, result) diff --git a/crypto_adx_flag.mk b/crypto_adx_flag.mk new file mode 100644 index 00000000000..667c8d493d3 --- /dev/null +++ b/crypto_adx_flag.mk @@ -0,0 +1,17 @@ +# `ADX_SUPPORT` is 1 if ADX instructions are supported and 0 otherwise. +ifeq ($(shell uname -s),Linux) +# detect ADX support on the CURRENT linux machine. + ADX_SUPPORT := $(shell if ([ -f "/proc/cpuinfo" ] && grep -q -e '^flags.*\badx\b' /proc/cpuinfo); then echo 1; else echo 0; fi) +else +# on non-linux machines, set the flag to 1 by default + ADX_SUPPORT := 1 +endif + +# the crypto package uses BLST source files underneath which may use ADX instructions. +ifeq ($(ADX_SUPPORT), 1) +# if ADX instructions are supported, default is to use a fast ADX BLST implementation + CRYPTO_FLAG := "" +else +# if ADX instructions aren't supported, this CGO flags uses a slower non-ADX BLST implementation + CRYPTO_FLAG := "-O -D__BLST_PORTABLE__" +endif \ No newline at end of file diff --git a/crypto_setup.sh b/crypto_setup.sh deleted file mode 100644 index e9789c74a23..00000000000 --- a/crypto_setup.sh +++ /dev/null @@ -1,32 +0,0 @@ - -#!/bin/bash - -# crypto package -PKG_NAME="github.com/onflow/flow-go/crypto" - -# go.mod -MOD_FILE="./go.mod" - -# the version of onflow/flow-go/crypto used in the project is read from the go.mod file -if [ -f "${MOD_FILE}" ] -then - # extract the imported version - VERSION="$(go list -f '{{.Version}}' -m ${PKG_NAME})" - # go get the package - go get "${PKG_NAME}@${VERSION}" || { echo "go get the package failed"; exit 1; } - # using the right version, get the package directory path - PKG_DIR="$(go env GOPATH)/pkg/mod/${PKG_NAME}@${VERSION}" -else - { echo "couldn't find go.mod file - make sure the script is in the project root directory"; exit 1; } -fi - -# grant permissions if not existant -if [[ ! -r ${PKG_DIR} || ! -w ${PKG_DIR} || ! -x ${PKG_DIR} ]]; then - chmod -R 755 "${PKG_DIR}" -fi - -# get into the package directory and set up the external dependencies -( - cd "${PKG_DIR}" || { echo "cd into the GOPATH package folder failed"; exit 1; } - go generate -) diff --git a/engine/access/access_test.go b/engine/access/access_test.go index 28b366d3e14..011709e360e 100644 --- a/engine/access/access_test.go +++ b/engine/access/access_test.go @@ -319,19 +319,20 @@ func (suite *Suite) TestSendTransactionToRandomCollectionNode() { // create a mock connection factory connFactory := connectionmock.NewConnectionFactory(suite.T()) - connFactory.On("GetAccessAPIClient", collNode1.Address).Return(col1ApiClient, &mockCloser{}, nil) - connFactory.On("GetAccessAPIClient", collNode2.Address).Return(col2ApiClient, &mockCloser{}, nil) + connFactory.On("GetAccessAPIClient", collNode1.Address, nil).Return(col1ApiClient, &mockCloser{}, nil) + connFactory.On("GetAccessAPIClient", collNode2.Address, nil).Return(col2ApiClient, &mockCloser{}, nil) bnd, err := backend.New(backend.Params{State: suite.state, - Collections: collections, - Transactions: transactions, - ChainID: suite.chainID, - AccessMetrics: metrics, - ConnFactory: connFactory, - MaxHeightRange: backend.DefaultMaxHeightRange, - Log: suite.log, - SnapshotHistoryLimit: backend.DefaultSnapshotHistoryLimit, - Communicator: backend.NewNodeCommunicator(false), + Collections: collections, + Transactions: transactions, + ChainID: suite.chainID, + AccessMetrics: metrics, + ConnFactory: connFactory, + MaxHeightRange: backend.DefaultMaxHeightRange, + Log: suite.log, + SnapshotHistoryLimit: backend.DefaultSnapshotHistoryLimit, + Communicator: backend.NewNodeCommunicator(false), + TxErrorMessagesCacheSize: 1000, }) require.NoError(suite.T(), err) @@ -551,15 +552,12 @@ func (suite *Suite) TestGetExecutionResultByBlockID() { for i, serviceEvent := range executionResult.ServiceEvents { assert.Equal(suite.T(), serviceEvent.Type.String(), er.ServiceEvents[i].Type) event := serviceEvent.Event - marshalledEvent, err := json.Marshal(event) require.NoError(suite.T(), err) - assert.Equal(suite.T(), marshalledEvent, er.ServiceEvents[i].Payload) } parsedExecResult, err := convert.MessageToExecutionResult(resp.ExecutionResult) require.NoError(suite.T(), err) - assert.Equal(suite.T(), parsedExecResult, executionResult) assert.Equal(suite.T(), parsedExecResult.ID(), executionResult.ID()) } @@ -656,6 +654,7 @@ func (suite *Suite) TestGetSealedTransaction() { Log: suite.log, SnapshotHistoryLimit: backend.DefaultSnapshotHistoryLimit, Communicator: backend.NewNodeCommunicator(false), + TxErrorMessagesCacheSize: 1000, }) require.NoError(suite.T(), err) @@ -793,6 +792,7 @@ func (suite *Suite) TestGetTransactionResult() { Log: suite.log, SnapshotHistoryLimit: backend.DefaultSnapshotHistoryLimit, Communicator: backend.NewNodeCommunicator(false), + TxErrorMessagesCacheSize: 1000, }) require.NoError(suite.T(), err) @@ -968,23 +968,24 @@ func (suite *Suite) TestExecuteScript() { var err error suite.backend, err = backend.New(backend.Params{ - State: suite.state, - CollectionRPC: suite.collClient, - Blocks: all.Blocks, - Headers: all.Headers, - Collections: collections, - Transactions: transactions, - ExecutionReceipts: receipts, - ExecutionResults: results, - ChainID: suite.chainID, - AccessMetrics: suite.metrics, - ConnFactory: connFactory, - MaxHeightRange: backend.DefaultMaxHeightRange, - FixedExecutionNodeIDs: (identities.NodeIDs()).Strings(), - Log: suite.log, - SnapshotHistoryLimit: backend.DefaultSnapshotHistoryLimit, - Communicator: backend.NewNodeCommunicator(false), - ScriptExecutionMode: backend.ScriptExecutionModeExecutionNodesOnly, + State: suite.state, + CollectionRPC: suite.collClient, + Blocks: all.Blocks, + Headers: all.Headers, + Collections: collections, + Transactions: transactions, + ExecutionReceipts: receipts, + ExecutionResults: results, + ChainID: suite.chainID, + AccessMetrics: suite.metrics, + ConnFactory: connFactory, + MaxHeightRange: backend.DefaultMaxHeightRange, + FixedExecutionNodeIDs: (identities.NodeIDs()).Strings(), + Log: suite.log, + SnapshotHistoryLimit: backend.DefaultSnapshotHistoryLimit, + Communicator: backend.NewNodeCommunicator(false), + ScriptExecutionMode: backend.ScriptExecutionModeExecutionNodesOnly, + TxErrorMessagesCacheSize: 1000, }) require.NoError(suite.T(), err) diff --git a/engine/access/apiproxy/access_api_proxy.go b/engine/access/apiproxy/access_api_proxy.go index 1fc5545f6f0..761cadda878 100644 --- a/engine/access/apiproxy/access_api_proxy.go +++ b/engine/access/apiproxy/access_api_proxy.go @@ -2,13 +2,14 @@ package apiproxy import ( "context" - "time" "google.golang.org/grpc/status" - "github.com/onflow/flow/protobuf/go/flow/access" "github.com/rs/zerolog" + "github.com/onflow/flow/protobuf/go/flow/access" + + "github.com/onflow/flow-go/engine/access/rpc/connection" "github.com/onflow/flow-go/engine/common/grpc/forwarder" "github.com/onflow/flow-go/engine/protocol" "github.com/onflow/flow-go/model/flow" @@ -42,6 +43,16 @@ func (h *FlowAccessAPIRouter) log(handler, rpc string, err error) { logger.Info().Msg("request succeeded") } +// TODO: this is implemented in https://github.com/onflow/flow-go/pull/4957, remove when merged +func (h *FlowAccessAPIRouter) GetProtocolStateSnapshotByBlockID(ctx context.Context, request *access.GetProtocolStateSnapshotByBlockIDRequest) (*access.ProtocolStateSnapshotResponse, error) { + panic("implement me") +} + +// TODO: this is implemented in https://github.com/onflow/flow-go/pull/4957, remove when merged +func (h *FlowAccessAPIRouter) GetProtocolStateSnapshotByHeight(ctx context.Context, request *access.GetProtocolStateSnapshotByHeightRequest) (*access.ProtocolStateSnapshotResponse, error) { + panic("implement me") +} + // Ping pings the service. It is special in the sense that it responds successful, // only if all underlying services are ready. func (h *FlowAccessAPIRouter) Ping(context context.Context, req *access.PingRequest) (*access.PingResponse, error) { @@ -133,6 +144,18 @@ func (h *FlowAccessAPIRouter) GetTransactionResultByIndex(context context.Contex return res, err } +func (h *FlowAccessAPIRouter) GetSystemTransaction(context context.Context, req *access.GetSystemTransactionRequest) (*access.TransactionResponse, error) { + res, err := h.Upstream.GetSystemTransaction(context, req) + h.log("upstream", "GetSystemTransaction", err) + return res, err +} + +func (h *FlowAccessAPIRouter) GetSystemTransactionResult(context context.Context, req *access.GetSystemTransactionResultRequest) (*access.TransactionResultResponse, error) { + res, err := h.Upstream.GetSystemTransactionResult(context, req) + h.log("upstream", "GetSystemTransactionResult", err) + return res, err +} + func (h *FlowAccessAPIRouter) GetAccount(context context.Context, req *access.GetAccountRequest) (*access.GetAccountResponse, error) { res, err := h.Upstream.GetAccount(context, req) h.log("upstream", "GetAccount", err) @@ -210,8 +233,8 @@ type FlowAccessAPIForwarder struct { *forwarder.Forwarder } -func NewFlowAccessAPIForwarder(identities flow.IdentitySkeletonList, timeout time.Duration, maxMsgSize uint) (*FlowAccessAPIForwarder, error) { - forwarder, err := forwarder.NewForwarder(identities, timeout, maxMsgSize) +func NewFlowAccessAPIForwarder(identities flow.IdentitySkeletonList, connectionFactory connection.ConnectionFactory) (*FlowAccessAPIForwarder, error) { + forwarder, err := forwarder.NewForwarder(identities, connectionFactory) if err != nil { return nil, err } @@ -225,242 +248,289 @@ func NewFlowAccessAPIForwarder(identities flow.IdentitySkeletonList, timeout tim // only if all underlying services are ready. func (h *FlowAccessAPIForwarder) Ping(context context.Context, req *access.PingRequest) (*access.PingResponse, error) { // This is a passthrough request - upstream, err := h.FaultTolerantClient() + upstream, closer, err := h.FaultTolerantClient() if err != nil { return nil, err } + defer closer.Close() return upstream.Ping(context, req) } func (h *FlowAccessAPIForwarder) GetNodeVersionInfo(context context.Context, req *access.GetNodeVersionInfoRequest) (*access.GetNodeVersionInfoResponse, error) { // This is a passthrough request - upstream, err := h.FaultTolerantClient() + upstream, closer, err := h.FaultTolerantClient() if err != nil { return nil, err } + defer closer.Close() return upstream.GetNodeVersionInfo(context, req) } func (h *FlowAccessAPIForwarder) GetLatestBlockHeader(context context.Context, req *access.GetLatestBlockHeaderRequest) (*access.BlockHeaderResponse, error) { // This is a passthrough request - upstream, err := h.FaultTolerantClient() + upstream, closer, err := h.FaultTolerantClient() if err != nil { return nil, err } + defer closer.Close() return upstream.GetLatestBlockHeader(context, req) } func (h *FlowAccessAPIForwarder) GetBlockHeaderByID(context context.Context, req *access.GetBlockHeaderByIDRequest) (*access.BlockHeaderResponse, error) { // This is a passthrough request - upstream, err := h.FaultTolerantClient() + upstream, closer, err := h.FaultTolerantClient() if err != nil { return nil, err } + defer closer.Close() return upstream.GetBlockHeaderByID(context, req) } func (h *FlowAccessAPIForwarder) GetBlockHeaderByHeight(context context.Context, req *access.GetBlockHeaderByHeightRequest) (*access.BlockHeaderResponse, error) { // This is a passthrough request - upstream, err := h.FaultTolerantClient() + upstream, closer, err := h.FaultTolerantClient() if err != nil { return nil, err } + defer closer.Close() return upstream.GetBlockHeaderByHeight(context, req) } func (h *FlowAccessAPIForwarder) GetLatestBlock(context context.Context, req *access.GetLatestBlockRequest) (*access.BlockResponse, error) { // This is a passthrough request - upstream, err := h.FaultTolerantClient() + upstream, closer, err := h.FaultTolerantClient() if err != nil { return nil, err } + defer closer.Close() return upstream.GetLatestBlock(context, req) } func (h *FlowAccessAPIForwarder) GetBlockByID(context context.Context, req *access.GetBlockByIDRequest) (*access.BlockResponse, error) { // This is a passthrough request - upstream, err := h.FaultTolerantClient() + upstream, closer, err := h.FaultTolerantClient() if err != nil { return nil, err } + defer closer.Close() return upstream.GetBlockByID(context, req) } func (h *FlowAccessAPIForwarder) GetBlockByHeight(context context.Context, req *access.GetBlockByHeightRequest) (*access.BlockResponse, error) { // This is a passthrough request - upstream, err := h.FaultTolerantClient() + upstream, closer, err := h.FaultTolerantClient() if err != nil { return nil, err } + defer closer.Close() return upstream.GetBlockByHeight(context, req) } func (h *FlowAccessAPIForwarder) GetCollectionByID(context context.Context, req *access.GetCollectionByIDRequest) (*access.CollectionResponse, error) { // This is a passthrough request - upstream, err := h.FaultTolerantClient() + upstream, closer, err := h.FaultTolerantClient() if err != nil { return nil, err } + defer closer.Close() return upstream.GetCollectionByID(context, req) } func (h *FlowAccessAPIForwarder) SendTransaction(context context.Context, req *access.SendTransactionRequest) (*access.SendTransactionResponse, error) { // This is a passthrough request - upstream, err := h.FaultTolerantClient() + upstream, closer, err := h.FaultTolerantClient() if err != nil { return nil, err } + defer closer.Close() return upstream.SendTransaction(context, req) } func (h *FlowAccessAPIForwarder) GetTransaction(context context.Context, req *access.GetTransactionRequest) (*access.TransactionResponse, error) { // This is a passthrough request - upstream, err := h.FaultTolerantClient() + upstream, closer, err := h.FaultTolerantClient() if err != nil { return nil, err } + defer closer.Close() return upstream.GetTransaction(context, req) } func (h *FlowAccessAPIForwarder) GetTransactionResult(context context.Context, req *access.GetTransactionRequest) (*access.TransactionResultResponse, error) { // This is a passthrough request - upstream, err := h.FaultTolerantClient() + upstream, closer, err := h.FaultTolerantClient() if err != nil { return nil, err } + defer closer.Close() return upstream.GetTransactionResult(context, req) } +func (h *FlowAccessAPIForwarder) GetSystemTransaction(context context.Context, req *access.GetSystemTransactionRequest) (*access.TransactionResponse, error) { + // This is a passthrough request + upstream, closer, err := h.FaultTolerantClient() + if err != nil { + return nil, err + } + defer closer.Close() + return upstream.GetSystemTransaction(context, req) +} + +func (h *FlowAccessAPIForwarder) GetSystemTransactionResult(context context.Context, req *access.GetSystemTransactionResultRequest) (*access.TransactionResultResponse, error) { + // This is a passthrough request + upstream, closer, err := h.FaultTolerantClient() + if err != nil { + return nil, err + } + defer closer.Close() + return upstream.GetSystemTransactionResult(context, req) +} + func (h *FlowAccessAPIForwarder) GetTransactionResultByIndex(context context.Context, req *access.GetTransactionByIndexRequest) (*access.TransactionResultResponse, error) { // This is a passthrough request - upstream, err := h.FaultTolerantClient() + upstream, closer, err := h.FaultTolerantClient() if err != nil { return nil, err } + defer closer.Close() return upstream.GetTransactionResultByIndex(context, req) } func (h *FlowAccessAPIForwarder) GetTransactionResultsByBlockID(context context.Context, req *access.GetTransactionsByBlockIDRequest) (*access.TransactionResultsResponse, error) { // This is a passthrough request - upstream, err := h.FaultTolerantClient() + upstream, closer, err := h.FaultTolerantClient() if err != nil { return nil, err } + defer closer.Close() return upstream.GetTransactionResultsByBlockID(context, req) } func (h *FlowAccessAPIForwarder) GetTransactionsByBlockID(context context.Context, req *access.GetTransactionsByBlockIDRequest) (*access.TransactionsResponse, error) { - upstream, err := h.FaultTolerantClient() + upstream, closer, err := h.FaultTolerantClient() if err != nil { return nil, err } + defer closer.Close() return upstream.GetTransactionsByBlockID(context, req) } func (h *FlowAccessAPIForwarder) GetAccount(context context.Context, req *access.GetAccountRequest) (*access.GetAccountResponse, error) { // This is a passthrough request - upstream, err := h.FaultTolerantClient() + upstream, closer, err := h.FaultTolerantClient() if err != nil { return nil, err } + defer closer.Close() return upstream.GetAccount(context, req) } func (h *FlowAccessAPIForwarder) GetAccountAtLatestBlock(context context.Context, req *access.GetAccountAtLatestBlockRequest) (*access.AccountResponse, error) { // This is a passthrough request - upstream, err := h.FaultTolerantClient() + upstream, closer, err := h.FaultTolerantClient() if err != nil { return nil, err } + defer closer.Close() return upstream.GetAccountAtLatestBlock(context, req) } func (h *FlowAccessAPIForwarder) GetAccountAtBlockHeight(context context.Context, req *access.GetAccountAtBlockHeightRequest) (*access.AccountResponse, error) { // This is a passthrough request - upstream, err := h.FaultTolerantClient() + upstream, closer, err := h.FaultTolerantClient() if err != nil { return nil, err } + defer closer.Close() return upstream.GetAccountAtBlockHeight(context, req) } func (h *FlowAccessAPIForwarder) ExecuteScriptAtLatestBlock(context context.Context, req *access.ExecuteScriptAtLatestBlockRequest) (*access.ExecuteScriptResponse, error) { // This is a passthrough request - upstream, err := h.FaultTolerantClient() + upstream, closer, err := h.FaultTolerantClient() if err != nil { return nil, err } + defer closer.Close() return upstream.ExecuteScriptAtLatestBlock(context, req) } func (h *FlowAccessAPIForwarder) ExecuteScriptAtBlockID(context context.Context, req *access.ExecuteScriptAtBlockIDRequest) (*access.ExecuteScriptResponse, error) { // This is a passthrough request - upstream, err := h.FaultTolerantClient() + upstream, closer, err := h.FaultTolerantClient() if err != nil { return nil, err } + defer closer.Close() return upstream.ExecuteScriptAtBlockID(context, req) } func (h *FlowAccessAPIForwarder) ExecuteScriptAtBlockHeight(context context.Context, req *access.ExecuteScriptAtBlockHeightRequest) (*access.ExecuteScriptResponse, error) { // This is a passthrough request - upstream, err := h.FaultTolerantClient() + upstream, closer, err := h.FaultTolerantClient() if err != nil { return nil, err } + defer closer.Close() return upstream.ExecuteScriptAtBlockHeight(context, req) } func (h *FlowAccessAPIForwarder) GetEventsForHeightRange(context context.Context, req *access.GetEventsForHeightRangeRequest) (*access.EventsResponse, error) { // This is a passthrough request - upstream, err := h.FaultTolerantClient() + upstream, closer, err := h.FaultTolerantClient() if err != nil { return nil, err } + defer closer.Close() return upstream.GetEventsForHeightRange(context, req) } func (h *FlowAccessAPIForwarder) GetEventsForBlockIDs(context context.Context, req *access.GetEventsForBlockIDsRequest) (*access.EventsResponse, error) { // This is a passthrough request - upstream, err := h.FaultTolerantClient() + upstream, closer, err := h.FaultTolerantClient() if err != nil { return nil, err } + defer closer.Close() return upstream.GetEventsForBlockIDs(context, req) } func (h *FlowAccessAPIForwarder) GetNetworkParameters(context context.Context, req *access.GetNetworkParametersRequest) (*access.GetNetworkParametersResponse, error) { // This is a passthrough request - upstream, err := h.FaultTolerantClient() + upstream, closer, err := h.FaultTolerantClient() if err != nil { return nil, err } + defer closer.Close() return upstream.GetNetworkParameters(context, req) } func (h *FlowAccessAPIForwarder) GetLatestProtocolStateSnapshot(context context.Context, req *access.GetLatestProtocolStateSnapshotRequest) (*access.ProtocolStateSnapshotResponse, error) { // This is a passthrough request - upstream, err := h.FaultTolerantClient() + upstream, closer, err := h.FaultTolerantClient() if err != nil { return nil, err } + defer closer.Close() return upstream.GetLatestProtocolStateSnapshot(context, req) } func (h *FlowAccessAPIForwarder) GetExecutionResultForBlockID(context context.Context, req *access.GetExecutionResultForBlockIDRequest) (*access.ExecutionResultForBlockIDResponse, error) { // This is a passthrough request - upstream, err := h.FaultTolerantClient() + upstream, closer, err := h.FaultTolerantClient() if err != nil { return nil, err } + defer closer.Close() return upstream.GetExecutionResultForBlockID(context, req) } func (h *FlowAccessAPIForwarder) GetExecutionResultByID(context context.Context, req *access.GetExecutionResultByIDRequest) (*access.ExecutionResultByIDResponse, error) { // This is a passthrough request - upstream, err := h.FaultTolerantClient() + upstream, closer, err := h.FaultTolerantClient() if err != nil { return nil, err } + defer closer.Close() return upstream.GetExecutionResultByID(context, req) } diff --git a/engine/access/apiproxy/access_api_proxy_test.go b/engine/access/apiproxy/access_api_proxy_test.go index 96fcd465ecb..27f96413c52 100644 --- a/engine/access/apiproxy/access_api_proxy_test.go +++ b/engine/access/apiproxy/access_api_proxy_test.go @@ -8,11 +8,14 @@ import ( "time" "github.com/onflow/flow/protobuf/go/flow/access" + "github.com/stretchr/testify/assert" "google.golang.org/grpc" grpcinsecure "google.golang.org/grpc/credentials/insecure" + "github.com/onflow/flow-go/engine/access/rpc/connection" "github.com/onflow/flow-go/engine/common/grpc/forwarder" "github.com/onflow/flow-go/model/flow" + "github.com/onflow/flow-go/module/metrics" "github.com/onflow/flow-go/utils/grpcutils" "github.com/onflow/flow-go/utils/unittest" ) @@ -135,51 +138,60 @@ func TestNewFlowCachedAccessAPIProxy(t *testing.T) { t.Fatal(err) } - // Prepare a proxy that fails due to the second connection being idle - l := flow.IdentitySkeletonList{ - {Address: unittest.IPPort("11634")}, - {Address: unittest.IPPort("11635")}, - } - c := FlowAccessAPIForwarder{} - c.Forwarder, err = forwarder.NewForwarder(l, time.Second, grpcutils.DefaultMaxMsgSize) - - if err == nil { - t.Fatal(fmt.Errorf("should not start with one connection ready")) - } - // Bring up 2nd upstream server charlie2, _, err := newFlowLite("tcp", unittest.IPPort("11635"), done) if err != nil { t.Fatal(err) } - background := context.Background() + metrics := metrics.NewNoopCollector() - // Prepare a proxy - l = flow.IdentitySkeletonList{ - {Address: unittest.IPPort("11634")}, - {Address: unittest.IPPort("11635")}, + // create the factory + connectionFactory := &connection.ConnectionFactoryImpl{ + // set metrics reporting + AccessMetrics: metrics, + CollectionNodeGRPCTimeout: time.Second, + Manager: connection.NewManager( + nil, + unittest.Logger(), + metrics, + grpcutils.DefaultMaxMsgSize, + connection.CircuitBreakerConfig{}, + grpcutils.NoCompressor, + ), } - c = FlowAccessAPIForwarder{} - c.Forwarder, err = forwarder.NewForwarder(l, time.Second, grpcutils.DefaultMaxMsgSize) + + // Prepare a proxy that fails due to the second connection being idle + l := flow.IdentitySkeletonList{{Address: unittest.IPPort("11634")}, {Address: unittest.IPPort("11635")}} + c := FlowAccessAPIForwarder{} + c.Forwarder, err = forwarder.NewForwarder(l, connectionFactory) if err != nil { t.Fatal(err) } + ctx := context.Background() + // Wait until proxy call passes - _, err = c.Ping(background, &access.PingRequest{}) + _, err = c.Ping(ctx, &access.PingRequest{}) if err != nil { t.Fatal(err) } + // get and close first connection + _, closer, err := c.Forwarder.FaultTolerantClient() + assert.NoError(t, err) + closer.Close() + + // connection factory created a new gRPC connection which was closed before + // if creation fails should use second connection // Wait until proxy call passes - _, err = c.Ping(background, &access.PingRequest{}) + _, err = c.Ping(ctx, &access.PingRequest{}) if err != nil { t.Fatal(err) } // Wait until proxy call passes - _, err = c.Ping(background, &access.PingRequest{}) + _, err = c.Ping(ctx, &access.PingRequest{}) if err != nil { t.Fatal(err) } @@ -188,7 +200,7 @@ func TestNewFlowCachedAccessAPIProxy(t *testing.T) { charlie2.Stop() // Wait until proxy call fails - _, err = c.Ping(background, &access.PingRequest{}) + _, err = c.Ping(ctx, &access.PingRequest{}) if err == nil { t.Fatal(fmt.Errorf("should fail on no connections")) } diff --git a/engine/access/handle_irrecoverable_state_test.go b/engine/access/handle_irrecoverable_state_test.go new file mode 100644 index 00000000000..d57750dc86a --- /dev/null +++ b/engine/access/handle_irrecoverable_state_test.go @@ -0,0 +1,258 @@ +package access + +import ( + "context" + "fmt" + "io" + "os" + "testing" + "time" + + "github.com/antihax/optional" + accessproto "github.com/onflow/flow/protobuf/go/flow/access" + "github.com/rs/zerolog" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/suite" + "google.golang.org/grpc" + "google.golang.org/grpc/credentials" + "google.golang.org/grpc/credentials/insecure" + + restclient "github.com/onflow/flow/openapi/go-client-generated" + + "github.com/onflow/flow-go/crypto" + accessmock "github.com/onflow/flow-go/engine/access/mock" + "github.com/onflow/flow-go/engine/access/rest" + "github.com/onflow/flow-go/engine/access/rest/routes" + "github.com/onflow/flow-go/engine/access/rpc" + "github.com/onflow/flow-go/engine/access/rpc/backend" + statestreambackend "github.com/onflow/flow-go/engine/access/state_stream/backend" + "github.com/onflow/flow-go/model/flow" + "github.com/onflow/flow-go/module/grpcserver" + "github.com/onflow/flow-go/module/irrecoverable" + "github.com/onflow/flow-go/module/metrics" + module "github.com/onflow/flow-go/module/mock" + "github.com/onflow/flow-go/network/mocknetwork" + protocol "github.com/onflow/flow-go/state/protocol/mock" + storagemock "github.com/onflow/flow-go/storage/mock" + "github.com/onflow/flow-go/utils/grpcutils" + "github.com/onflow/flow-go/utils/unittest" +) + +// IrrecoverableStateTestSuite tests that Access node indicate an inconsistent or corrupted node state +type IrrecoverableStateTestSuite struct { + suite.Suite + state *protocol.State + snapshot *protocol.Snapshot + epochQuery *protocol.EpochQuery + log zerolog.Logger + net *mocknetwork.EngineRegistry + request *module.Requester + collClient *accessmock.AccessAPIClient + execClient *accessmock.ExecutionAPIClient + me *module.Local + chainID flow.ChainID + metrics *metrics.NoopCollector + rpcEng *rpc.Engine + publicKey crypto.PublicKey + + // storage + blocks *storagemock.Blocks + headers *storagemock.Headers + collections *storagemock.Collections + transactions *storagemock.Transactions + receipts *storagemock.ExecutionReceipts + + ctx irrecoverable.SignalerContext + + // grpc servers + secureGrpcServer *grpcserver.GrpcServer + unsecureGrpcServer *grpcserver.GrpcServer +} + +func (suite *IrrecoverableStateTestSuite) SetupTest() { + suite.log = zerolog.New(os.Stdout) + suite.net = mocknetwork.NewEngineRegistry(suite.T()) + suite.state = protocol.NewState(suite.T()) + suite.snapshot = protocol.NewSnapshot(suite.T()) + + rootHeader := unittest.BlockHeaderFixture() + params := protocol.NewParams(suite.T()) + params.On("SporkID").Return(unittest.IdentifierFixture(), nil) + params.On("ProtocolVersion").Return(uint(unittest.Uint64InRange(10, 30)), nil) + params.On("SporkRootBlockHeight").Return(rootHeader.Height, nil) + params.On("SealedRoot").Return(rootHeader, nil) + + suite.epochQuery = protocol.NewEpochQuery(suite.T()) + suite.state.On("Sealed").Return(suite.snapshot, nil).Maybe() + suite.state.On("Final").Return(suite.snapshot, nil).Maybe() + suite.state.On("Params").Return(params, nil).Maybe() + suite.snapshot.On("Epochs").Return(suite.epochQuery).Maybe() + suite.blocks = storagemock.NewBlocks(suite.T()) + suite.headers = storagemock.NewHeaders(suite.T()) + suite.transactions = storagemock.NewTransactions(suite.T()) + suite.collections = storagemock.NewCollections(suite.T()) + suite.receipts = storagemock.NewExecutionReceipts(suite.T()) + + suite.collClient = accessmock.NewAccessAPIClient(suite.T()) + suite.execClient = accessmock.NewExecutionAPIClient(suite.T()) + + suite.request = module.NewRequester(suite.T()) + suite.me = module.NewLocal(suite.T()) + + accessIdentity := unittest.IdentityFixture(unittest.WithRole(flow.RoleAccess)) + suite.me. + On("NodeID"). + Return(accessIdentity.NodeID).Maybe() + + suite.chainID = flow.Testnet + suite.metrics = metrics.NewNoopCollector() + + config := rpc.Config{ + UnsecureGRPCListenAddr: unittest.DefaultAddress, + SecureGRPCListenAddr: unittest.DefaultAddress, + HTTPListenAddr: unittest.DefaultAddress, + RestConfig: rest.Config{ + ListenAddress: unittest.DefaultAddress, + }, + } + + // generate a server certificate that will be served by the GRPC server + networkingKey := unittest.NetworkingPrivKeyFixture() + x509Certificate, err := grpcutils.X509Certificate(networkingKey) + assert.NoError(suite.T(), err) + tlsConfig := grpcutils.DefaultServerTLSConfig(x509Certificate) + // set the transport credentials for the server to use + config.TransportCredentials = credentials.NewTLS(tlsConfig) + // save the public key to use later in tests later + suite.publicKey = networkingKey.PublicKey() + + suite.secureGrpcServer = grpcserver.NewGrpcServerBuilder(suite.log, + config.SecureGRPCListenAddr, + grpcutils.DefaultMaxMsgSize, + false, + nil, + nil, + grpcserver.WithTransportCredentials(config.TransportCredentials)).Build() + + suite.unsecureGrpcServer = grpcserver.NewGrpcServerBuilder(suite.log, + config.UnsecureGRPCListenAddr, + grpcutils.DefaultMaxMsgSize, + false, + nil, + nil).Build() + + blockHeader := unittest.BlockHeaderFixture() + suite.snapshot.On("Head").Return(blockHeader, nil).Once() + + bnd, err := backend.New(backend.Params{ + State: suite.state, + CollectionRPC: suite.collClient, + Blocks: suite.blocks, + Headers: suite.headers, + Collections: suite.collections, + Transactions: suite.transactions, + ChainID: suite.chainID, + AccessMetrics: suite.metrics, + MaxHeightRange: 0, + Log: suite.log, + SnapshotHistoryLimit: 0, + Communicator: backend.NewNodeCommunicator(false), + }) + suite.Require().NoError(err) + + stateStreamConfig := statestreambackend.Config{} + rpcEngBuilder, err := rpc.NewBuilder( + suite.log, + suite.state, + config, + suite.chainID, + suite.metrics, + false, + suite.me, + bnd, + bnd, + suite.secureGrpcServer, + suite.unsecureGrpcServer, + nil, + stateStreamConfig, + ) + assert.NoError(suite.T(), err) + suite.rpcEng, err = rpcEngBuilder.WithLegacy().Build() + assert.NoError(suite.T(), err) + + err = fmt.Errorf("inconsistent node's state") + signCtxErr := irrecoverable.NewExceptionf("failed to lookup sealed header: %w", err) + ctx := irrecoverable.NewMockSignalerContextExpectError(suite.T(), context.Background(), signCtxErr) + + suite.rpcEng.Start(ctx) + + suite.secureGrpcServer.Start(ctx) + suite.unsecureGrpcServer.Start(ctx) + + // wait for the servers to startup + unittest.AssertClosesBefore(suite.T(), suite.secureGrpcServer.Ready(), 2*time.Second) + unittest.AssertClosesBefore(suite.T(), suite.unsecureGrpcServer.Ready(), 2*time.Second) + + // wait for the engine to startup + unittest.AssertClosesBefore(suite.T(), suite.rpcEng.Ready(), 2*time.Second) +} + +func TestIrrecoverableState(t *testing.T) { + suite.Run(t, new(IrrecoverableStateTestSuite)) +} + +// TestGRPCInconsistentNodeState tests the behavior when gRPC encounters an inconsistent node state. +func (suite *IrrecoverableStateTestSuite) TestGRPCInconsistentNodeState() { + err := fmt.Errorf("inconsistent node's state") + suite.snapshot.On("Head").Return(nil, err) + + conn, err := grpc.Dial( + suite.unsecureGrpcServer.GRPCAddress().String(), + grpc.WithTransportCredentials(insecure.NewCredentials())) + assert.NoError(suite.T(), err) + defer io.Closer(conn).Close() + + client := accessproto.NewAccessAPIClient(conn) + + req := &accessproto.GetAccountAtLatestBlockRequest{ + Address: unittest.AddressFixture().Bytes(), + } + + ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second) + defer cancel() + + actual, err := client.GetAccountAtLatestBlock(ctx, req) + suite.Require().Error(err) + suite.Require().Nil(actual) +} + +// TestRestInconsistentNodeState tests the behavior when the REST API encounters an inconsistent node state. +func (suite *IrrecoverableStateTestSuite) TestRestInconsistentNodeState() { + collections := unittest.CollectionListFixture(1) + blockHeader := unittest.BlockWithGuaranteesFixture( + unittest.CollectionGuaranteesWithCollectionIDFixture(collections), + ) + suite.blocks.On("ByID", blockHeader.ID()).Return(blockHeader, nil) + + err := fmt.Errorf("inconsistent node's state") + suite.snapshot.On("Head").Return(nil, err) + + config := restclient.NewConfiguration() + config.BasePath = fmt.Sprintf("http://%s/v1", suite.rpcEng.RestApiAddress().String()) + client := restclient.NewAPIClient(config) + + ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second) + defer cancel() + + actual, _, err := client.BlocksApi.BlocksIdGet(ctx, []string{blockHeader.ID().String()}, optionsForBlocksIdGetOpts()) + suite.Require().Error(err) + suite.Require().Nil(actual) +} + +// optionsForBlocksIdGetOpts returns options for the BlocksApi.BlocksIdGet function. +func optionsForBlocksIdGetOpts() *restclient.BlocksApiBlocksIdGetOpts { + return &restclient.BlocksApiBlocksIdGetOpts{ + Expand: optional.NewInterface([]string{routes.ExpandableFieldPayload}), + Select_: optional.NewInterface([]string{"header.id"}), + } +} diff --git a/engine/access/integration_unsecure_grpc_server_test.go b/engine/access/integration_unsecure_grpc_server_test.go index 5de8b073239..810868e36fe 100644 --- a/engine/access/integration_unsecure_grpc_server_test.go +++ b/engine/access/integration_unsecure_grpc_server_test.go @@ -26,6 +26,7 @@ import ( statestreambackend "github.com/onflow/flow-go/engine/access/state_stream/backend" "github.com/onflow/flow-go/model/flow" "github.com/onflow/flow-go/module/blobs" + "github.com/onflow/flow-go/module/execution" "github.com/onflow/flow-go/module/executiondatasync/execution_data" "github.com/onflow/flow-go/module/executiondatasync/execution_data/cache" "github.com/onflow/flow-go/module/grpcserver" @@ -67,6 +68,7 @@ type SameGRPCPortTestSuite struct { receipts *storagemock.ExecutionReceipts seals *storagemock.Seals results *storagemock.ExecutionResults + registers *execution.RegistersAsyncStore ctx irrecoverable.SignalerContext cancel context.CancelFunc @@ -90,6 +92,7 @@ func (suite *SameGRPCPortTestSuite) SetupTest() { suite.state = new(protocol.State) suite.snapshot = new(protocol.Snapshot) params := new(protocol.Params) + suite.registers = execution.NewRegistersAsyncStore() suite.epochQuery = new(protocol.EpochQuery) suite.state.On("Sealed").Return(suite.snapshot, nil).Maybe() @@ -245,6 +248,7 @@ func (suite *SameGRPCPortTestSuite) SetupTest() { nil, rootBlock.Header.Height, rootBlock.Header.Height, + suite.registers, ) assert.NoError(suite.T(), err) diff --git a/engine/access/mock/access_api_client.go b/engine/access/mock/access_api_client.go index 4e2b1d065c7..496ee06b58c 100644 --- a/engine/access/mock/access_api_client.go +++ b/engine/access/mock/access_api_client.go @@ -677,6 +677,138 @@ func (_m *AccessAPIClient) GetNodeVersionInfo(ctx context.Context, in *access.Ge return r0, r1 } +// GetProtocolStateSnapshotByBlockID provides a mock function with given fields: ctx, in, opts +func (_m *AccessAPIClient) GetProtocolStateSnapshotByBlockID(ctx context.Context, in *access.GetProtocolStateSnapshotByBlockIDRequest, opts ...grpc.CallOption) (*access.ProtocolStateSnapshotResponse, error) { + _va := make([]interface{}, len(opts)) + for _i := range opts { + _va[_i] = opts[_i] + } + var _ca []interface{} + _ca = append(_ca, ctx, in) + _ca = append(_ca, _va...) + ret := _m.Called(_ca...) + + var r0 *access.ProtocolStateSnapshotResponse + var r1 error + if rf, ok := ret.Get(0).(func(context.Context, *access.GetProtocolStateSnapshotByBlockIDRequest, ...grpc.CallOption) (*access.ProtocolStateSnapshotResponse, error)); ok { + return rf(ctx, in, opts...) + } + if rf, ok := ret.Get(0).(func(context.Context, *access.GetProtocolStateSnapshotByBlockIDRequest, ...grpc.CallOption) *access.ProtocolStateSnapshotResponse); ok { + r0 = rf(ctx, in, opts...) + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).(*access.ProtocolStateSnapshotResponse) + } + } + + if rf, ok := ret.Get(1).(func(context.Context, *access.GetProtocolStateSnapshotByBlockIDRequest, ...grpc.CallOption) error); ok { + r1 = rf(ctx, in, opts...) + } else { + r1 = ret.Error(1) + } + + return r0, r1 +} + +// GetProtocolStateSnapshotByHeight provides a mock function with given fields: ctx, in, opts +func (_m *AccessAPIClient) GetProtocolStateSnapshotByHeight(ctx context.Context, in *access.GetProtocolStateSnapshotByHeightRequest, opts ...grpc.CallOption) (*access.ProtocolStateSnapshotResponse, error) { + _va := make([]interface{}, len(opts)) + for _i := range opts { + _va[_i] = opts[_i] + } + var _ca []interface{} + _ca = append(_ca, ctx, in) + _ca = append(_ca, _va...) + ret := _m.Called(_ca...) + + var r0 *access.ProtocolStateSnapshotResponse + var r1 error + if rf, ok := ret.Get(0).(func(context.Context, *access.GetProtocolStateSnapshotByHeightRequest, ...grpc.CallOption) (*access.ProtocolStateSnapshotResponse, error)); ok { + return rf(ctx, in, opts...) + } + if rf, ok := ret.Get(0).(func(context.Context, *access.GetProtocolStateSnapshotByHeightRequest, ...grpc.CallOption) *access.ProtocolStateSnapshotResponse); ok { + r0 = rf(ctx, in, opts...) + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).(*access.ProtocolStateSnapshotResponse) + } + } + + if rf, ok := ret.Get(1).(func(context.Context, *access.GetProtocolStateSnapshotByHeightRequest, ...grpc.CallOption) error); ok { + r1 = rf(ctx, in, opts...) + } else { + r1 = ret.Error(1) + } + + return r0, r1 +} + +// GetSystemTransaction provides a mock function with given fields: ctx, in, opts +func (_m *AccessAPIClient) GetSystemTransaction(ctx context.Context, in *access.GetSystemTransactionRequest, opts ...grpc.CallOption) (*access.TransactionResponse, error) { + _va := make([]interface{}, len(opts)) + for _i := range opts { + _va[_i] = opts[_i] + } + var _ca []interface{} + _ca = append(_ca, ctx, in) + _ca = append(_ca, _va...) + ret := _m.Called(_ca...) + + var r0 *access.TransactionResponse + var r1 error + if rf, ok := ret.Get(0).(func(context.Context, *access.GetSystemTransactionRequest, ...grpc.CallOption) (*access.TransactionResponse, error)); ok { + return rf(ctx, in, opts...) + } + if rf, ok := ret.Get(0).(func(context.Context, *access.GetSystemTransactionRequest, ...grpc.CallOption) *access.TransactionResponse); ok { + r0 = rf(ctx, in, opts...) + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).(*access.TransactionResponse) + } + } + + if rf, ok := ret.Get(1).(func(context.Context, *access.GetSystemTransactionRequest, ...grpc.CallOption) error); ok { + r1 = rf(ctx, in, opts...) + } else { + r1 = ret.Error(1) + } + + return r0, r1 +} + +// GetSystemTransactionResult provides a mock function with given fields: ctx, in, opts +func (_m *AccessAPIClient) GetSystemTransactionResult(ctx context.Context, in *access.GetSystemTransactionResultRequest, opts ...grpc.CallOption) (*access.TransactionResultResponse, error) { + _va := make([]interface{}, len(opts)) + for _i := range opts { + _va[_i] = opts[_i] + } + var _ca []interface{} + _ca = append(_ca, ctx, in) + _ca = append(_ca, _va...) + ret := _m.Called(_ca...) + + var r0 *access.TransactionResultResponse + var r1 error + if rf, ok := ret.Get(0).(func(context.Context, *access.GetSystemTransactionResultRequest, ...grpc.CallOption) (*access.TransactionResultResponse, error)); ok { + return rf(ctx, in, opts...) + } + if rf, ok := ret.Get(0).(func(context.Context, *access.GetSystemTransactionResultRequest, ...grpc.CallOption) *access.TransactionResultResponse); ok { + r0 = rf(ctx, in, opts...) + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).(*access.TransactionResultResponse) + } + } + + if rf, ok := ret.Get(1).(func(context.Context, *access.GetSystemTransactionResultRequest, ...grpc.CallOption) error); ok { + r1 = rf(ctx, in, opts...) + } else { + r1 = ret.Error(1) + } + + return r0, r1 +} + // GetTransaction provides a mock function with given fields: ctx, in, opts func (_m *AccessAPIClient) GetTransaction(ctx context.Context, in *access.GetTransactionRequest, opts ...grpc.CallOption) (*access.TransactionResponse, error) { _va := make([]interface{}, len(opts)) diff --git a/engine/access/mock/access_api_server.go b/engine/access/mock/access_api_server.go index 1a2c3772e44..c9545b26450 100644 --- a/engine/access/mock/access_api_server.go +++ b/engine/access/mock/access_api_server.go @@ -535,6 +535,110 @@ func (_m *AccessAPIServer) GetNodeVersionInfo(_a0 context.Context, _a1 *access.G return r0, r1 } +// GetProtocolStateSnapshotByBlockID provides a mock function with given fields: _a0, _a1 +func (_m *AccessAPIServer) GetProtocolStateSnapshotByBlockID(_a0 context.Context, _a1 *access.GetProtocolStateSnapshotByBlockIDRequest) (*access.ProtocolStateSnapshotResponse, error) { + ret := _m.Called(_a0, _a1) + + var r0 *access.ProtocolStateSnapshotResponse + var r1 error + if rf, ok := ret.Get(0).(func(context.Context, *access.GetProtocolStateSnapshotByBlockIDRequest) (*access.ProtocolStateSnapshotResponse, error)); ok { + return rf(_a0, _a1) + } + if rf, ok := ret.Get(0).(func(context.Context, *access.GetProtocolStateSnapshotByBlockIDRequest) *access.ProtocolStateSnapshotResponse); ok { + r0 = rf(_a0, _a1) + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).(*access.ProtocolStateSnapshotResponse) + } + } + + if rf, ok := ret.Get(1).(func(context.Context, *access.GetProtocolStateSnapshotByBlockIDRequest) error); ok { + r1 = rf(_a0, _a1) + } else { + r1 = ret.Error(1) + } + + return r0, r1 +} + +// GetProtocolStateSnapshotByHeight provides a mock function with given fields: _a0, _a1 +func (_m *AccessAPIServer) GetProtocolStateSnapshotByHeight(_a0 context.Context, _a1 *access.GetProtocolStateSnapshotByHeightRequest) (*access.ProtocolStateSnapshotResponse, error) { + ret := _m.Called(_a0, _a1) + + var r0 *access.ProtocolStateSnapshotResponse + var r1 error + if rf, ok := ret.Get(0).(func(context.Context, *access.GetProtocolStateSnapshotByHeightRequest) (*access.ProtocolStateSnapshotResponse, error)); ok { + return rf(_a0, _a1) + } + if rf, ok := ret.Get(0).(func(context.Context, *access.GetProtocolStateSnapshotByHeightRequest) *access.ProtocolStateSnapshotResponse); ok { + r0 = rf(_a0, _a1) + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).(*access.ProtocolStateSnapshotResponse) + } + } + + if rf, ok := ret.Get(1).(func(context.Context, *access.GetProtocolStateSnapshotByHeightRequest) error); ok { + r1 = rf(_a0, _a1) + } else { + r1 = ret.Error(1) + } + + return r0, r1 +} + +// GetSystemTransaction provides a mock function with given fields: _a0, _a1 +func (_m *AccessAPIServer) GetSystemTransaction(_a0 context.Context, _a1 *access.GetSystemTransactionRequest) (*access.TransactionResponse, error) { + ret := _m.Called(_a0, _a1) + + var r0 *access.TransactionResponse + var r1 error + if rf, ok := ret.Get(0).(func(context.Context, *access.GetSystemTransactionRequest) (*access.TransactionResponse, error)); ok { + return rf(_a0, _a1) + } + if rf, ok := ret.Get(0).(func(context.Context, *access.GetSystemTransactionRequest) *access.TransactionResponse); ok { + r0 = rf(_a0, _a1) + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).(*access.TransactionResponse) + } + } + + if rf, ok := ret.Get(1).(func(context.Context, *access.GetSystemTransactionRequest) error); ok { + r1 = rf(_a0, _a1) + } else { + r1 = ret.Error(1) + } + + return r0, r1 +} + +// GetSystemTransactionResult provides a mock function with given fields: _a0, _a1 +func (_m *AccessAPIServer) GetSystemTransactionResult(_a0 context.Context, _a1 *access.GetSystemTransactionResultRequest) (*access.TransactionResultResponse, error) { + ret := _m.Called(_a0, _a1) + + var r0 *access.TransactionResultResponse + var r1 error + if rf, ok := ret.Get(0).(func(context.Context, *access.GetSystemTransactionResultRequest) (*access.TransactionResultResponse, error)); ok { + return rf(_a0, _a1) + } + if rf, ok := ret.Get(0).(func(context.Context, *access.GetSystemTransactionResultRequest) *access.TransactionResultResponse); ok { + r0 = rf(_a0, _a1) + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).(*access.TransactionResultResponse) + } + } + + if rf, ok := ret.Get(1).(func(context.Context, *access.GetSystemTransactionResultRequest) error); ok { + r1 = rf(_a0, _a1) + } else { + r1 = ret.Error(1) + } + + return r0, r1 +} + // GetTransaction provides a mock function with given fields: _a0, _a1 func (_m *AccessAPIServer) GetTransaction(_a0 context.Context, _a1 *access.GetTransactionRequest) (*access.TransactionResponse, error) { ret := _m.Called(_a0, _a1) diff --git a/engine/access/mock/execution_api_client.go b/engine/access/mock/execution_api_client.go index 759ca90c81f..597eae4f253 100644 --- a/engine/access/mock/execution_api_client.go +++ b/engine/access/mock/execution_api_client.go @@ -214,6 +214,105 @@ func (_m *ExecutionAPIClient) GetRegisterAtBlockID(ctx context.Context, in *exec return r0, r1 } +// GetTransactionErrorMessage provides a mock function with given fields: ctx, in, opts +func (_m *ExecutionAPIClient) GetTransactionErrorMessage(ctx context.Context, in *execution.GetTransactionErrorMessageRequest, opts ...grpc.CallOption) (*execution.GetTransactionErrorMessageResponse, error) { + _va := make([]interface{}, len(opts)) + for _i := range opts { + _va[_i] = opts[_i] + } + var _ca []interface{} + _ca = append(_ca, ctx, in) + _ca = append(_ca, _va...) + ret := _m.Called(_ca...) + + var r0 *execution.GetTransactionErrorMessageResponse + var r1 error + if rf, ok := ret.Get(0).(func(context.Context, *execution.GetTransactionErrorMessageRequest, ...grpc.CallOption) (*execution.GetTransactionErrorMessageResponse, error)); ok { + return rf(ctx, in, opts...) + } + if rf, ok := ret.Get(0).(func(context.Context, *execution.GetTransactionErrorMessageRequest, ...grpc.CallOption) *execution.GetTransactionErrorMessageResponse); ok { + r0 = rf(ctx, in, opts...) + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).(*execution.GetTransactionErrorMessageResponse) + } + } + + if rf, ok := ret.Get(1).(func(context.Context, *execution.GetTransactionErrorMessageRequest, ...grpc.CallOption) error); ok { + r1 = rf(ctx, in, opts...) + } else { + r1 = ret.Error(1) + } + + return r0, r1 +} + +// GetTransactionErrorMessageByIndex provides a mock function with given fields: ctx, in, opts +func (_m *ExecutionAPIClient) GetTransactionErrorMessageByIndex(ctx context.Context, in *execution.GetTransactionErrorMessageByIndexRequest, opts ...grpc.CallOption) (*execution.GetTransactionErrorMessageResponse, error) { + _va := make([]interface{}, len(opts)) + for _i := range opts { + _va[_i] = opts[_i] + } + var _ca []interface{} + _ca = append(_ca, ctx, in) + _ca = append(_ca, _va...) + ret := _m.Called(_ca...) + + var r0 *execution.GetTransactionErrorMessageResponse + var r1 error + if rf, ok := ret.Get(0).(func(context.Context, *execution.GetTransactionErrorMessageByIndexRequest, ...grpc.CallOption) (*execution.GetTransactionErrorMessageResponse, error)); ok { + return rf(ctx, in, opts...) + } + if rf, ok := ret.Get(0).(func(context.Context, *execution.GetTransactionErrorMessageByIndexRequest, ...grpc.CallOption) *execution.GetTransactionErrorMessageResponse); ok { + r0 = rf(ctx, in, opts...) + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).(*execution.GetTransactionErrorMessageResponse) + } + } + + if rf, ok := ret.Get(1).(func(context.Context, *execution.GetTransactionErrorMessageByIndexRequest, ...grpc.CallOption) error); ok { + r1 = rf(ctx, in, opts...) + } else { + r1 = ret.Error(1) + } + + return r0, r1 +} + +// GetTransactionErrorMessagesByBlockID provides a mock function with given fields: ctx, in, opts +func (_m *ExecutionAPIClient) GetTransactionErrorMessagesByBlockID(ctx context.Context, in *execution.GetTransactionErrorMessagesByBlockIDRequest, opts ...grpc.CallOption) (*execution.GetTransactionErrorMessagesResponse, error) { + _va := make([]interface{}, len(opts)) + for _i := range opts { + _va[_i] = opts[_i] + } + var _ca []interface{} + _ca = append(_ca, ctx, in) + _ca = append(_ca, _va...) + ret := _m.Called(_ca...) + + var r0 *execution.GetTransactionErrorMessagesResponse + var r1 error + if rf, ok := ret.Get(0).(func(context.Context, *execution.GetTransactionErrorMessagesByBlockIDRequest, ...grpc.CallOption) (*execution.GetTransactionErrorMessagesResponse, error)); ok { + return rf(ctx, in, opts...) + } + if rf, ok := ret.Get(0).(func(context.Context, *execution.GetTransactionErrorMessagesByBlockIDRequest, ...grpc.CallOption) *execution.GetTransactionErrorMessagesResponse); ok { + r0 = rf(ctx, in, opts...) + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).(*execution.GetTransactionErrorMessagesResponse) + } + } + + if rf, ok := ret.Get(1).(func(context.Context, *execution.GetTransactionErrorMessagesByBlockIDRequest, ...grpc.CallOption) error); ok { + r1 = rf(ctx, in, opts...) + } else { + r1 = ret.Error(1) + } + + return r0, r1 +} + // GetTransactionResult provides a mock function with given fields: ctx, in, opts func (_m *ExecutionAPIClient) GetTransactionResult(ctx context.Context, in *execution.GetTransactionResultRequest, opts ...grpc.CallOption) (*execution.GetTransactionResultResponse, error) { _va := make([]interface{}, len(opts)) diff --git a/engine/access/mock/execution_api_server.go b/engine/access/mock/execution_api_server.go index 32ff605850a..11eff9dea23 100644 --- a/engine/access/mock/execution_api_server.go +++ b/engine/access/mock/execution_api_server.go @@ -170,6 +170,84 @@ func (_m *ExecutionAPIServer) GetRegisterAtBlockID(_a0 context.Context, _a1 *exe return r0, r1 } +// GetTransactionErrorMessage provides a mock function with given fields: _a0, _a1 +func (_m *ExecutionAPIServer) GetTransactionErrorMessage(_a0 context.Context, _a1 *execution.GetTransactionErrorMessageRequest) (*execution.GetTransactionErrorMessageResponse, error) { + ret := _m.Called(_a0, _a1) + + var r0 *execution.GetTransactionErrorMessageResponse + var r1 error + if rf, ok := ret.Get(0).(func(context.Context, *execution.GetTransactionErrorMessageRequest) (*execution.GetTransactionErrorMessageResponse, error)); ok { + return rf(_a0, _a1) + } + if rf, ok := ret.Get(0).(func(context.Context, *execution.GetTransactionErrorMessageRequest) *execution.GetTransactionErrorMessageResponse); ok { + r0 = rf(_a0, _a1) + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).(*execution.GetTransactionErrorMessageResponse) + } + } + + if rf, ok := ret.Get(1).(func(context.Context, *execution.GetTransactionErrorMessageRequest) error); ok { + r1 = rf(_a0, _a1) + } else { + r1 = ret.Error(1) + } + + return r0, r1 +} + +// GetTransactionErrorMessageByIndex provides a mock function with given fields: _a0, _a1 +func (_m *ExecutionAPIServer) GetTransactionErrorMessageByIndex(_a0 context.Context, _a1 *execution.GetTransactionErrorMessageByIndexRequest) (*execution.GetTransactionErrorMessageResponse, error) { + ret := _m.Called(_a0, _a1) + + var r0 *execution.GetTransactionErrorMessageResponse + var r1 error + if rf, ok := ret.Get(0).(func(context.Context, *execution.GetTransactionErrorMessageByIndexRequest) (*execution.GetTransactionErrorMessageResponse, error)); ok { + return rf(_a0, _a1) + } + if rf, ok := ret.Get(0).(func(context.Context, *execution.GetTransactionErrorMessageByIndexRequest) *execution.GetTransactionErrorMessageResponse); ok { + r0 = rf(_a0, _a1) + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).(*execution.GetTransactionErrorMessageResponse) + } + } + + if rf, ok := ret.Get(1).(func(context.Context, *execution.GetTransactionErrorMessageByIndexRequest) error); ok { + r1 = rf(_a0, _a1) + } else { + r1 = ret.Error(1) + } + + return r0, r1 +} + +// GetTransactionErrorMessagesByBlockID provides a mock function with given fields: _a0, _a1 +func (_m *ExecutionAPIServer) GetTransactionErrorMessagesByBlockID(_a0 context.Context, _a1 *execution.GetTransactionErrorMessagesByBlockIDRequest) (*execution.GetTransactionErrorMessagesResponse, error) { + ret := _m.Called(_a0, _a1) + + var r0 *execution.GetTransactionErrorMessagesResponse + var r1 error + if rf, ok := ret.Get(0).(func(context.Context, *execution.GetTransactionErrorMessagesByBlockIDRequest) (*execution.GetTransactionErrorMessagesResponse, error)); ok { + return rf(_a0, _a1) + } + if rf, ok := ret.Get(0).(func(context.Context, *execution.GetTransactionErrorMessagesByBlockIDRequest) *execution.GetTransactionErrorMessagesResponse); ok { + r0 = rf(_a0, _a1) + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).(*execution.GetTransactionErrorMessagesResponse) + } + } + + if rf, ok := ret.Get(1).(func(context.Context, *execution.GetTransactionErrorMessagesByBlockIDRequest) error); ok { + r1 = rf(_a0, _a1) + } else { + r1 = ret.Error(1) + } + + return r0, r1 +} + // GetTransactionResult provides a mock function with given fields: _a0, _a1 func (_m *ExecutionAPIServer) GetTransactionResult(_a0 context.Context, _a1 *execution.GetTransactionResultRequest) (*execution.GetTransactionResultResponse, error) { ret := _m.Called(_a0, _a1) diff --git a/engine/access/rest/apiproxy/rest_proxy_handler.go b/engine/access/rest/apiproxy/rest_proxy_handler.go index bfe15b4c9ea..5bd4c34c48e 100644 --- a/engine/access/rest/apiproxy/rest_proxy_handler.go +++ b/engine/access/rest/apiproxy/rest_proxy_handler.go @@ -3,13 +3,13 @@ package apiproxy import ( "context" "fmt" - "time" "google.golang.org/grpc/status" "github.com/rs/zerolog" "github.com/onflow/flow-go/access" + "github.com/onflow/flow-go/engine/access/rpc/connection" "github.com/onflow/flow-go/engine/common/grpc/forwarder" "github.com/onflow/flow-go/engine/common/rpc/convert" "github.com/onflow/flow-go/model/flow" @@ -33,17 +33,15 @@ type RestProxyHandler struct { func NewRestProxyHandler( api access.API, identities flow.IdentitySkeletonList, - timeout time.Duration, - maxMsgSize uint, + connectionFactory connection.ConnectionFactory, log zerolog.Logger, metrics metrics.ObserverMetrics, chain flow.Chain, ) (*RestProxyHandler, error) { - forwarder, err := forwarder.NewForwarder( identities, - timeout, - maxMsgSize) + connectionFactory, + ) if err != nil { return nil, fmt.Errorf("could not create REST forwarder: %w", err) } @@ -80,10 +78,11 @@ func (r *RestProxyHandler) log(handler, rpc string, err error) { // GetCollectionByID returns a collection by ID. func (r *RestProxyHandler) GetCollectionByID(ctx context.Context, id flow.Identifier) (*flow.LightCollection, error) { - upstream, err := r.FaultTolerantClient() + upstream, closer, err := r.FaultTolerantClient() if err != nil { return nil, err } + defer closer.Close() getCollectionByIDRequest := &accessproto.GetCollectionByIDRequest{ Id: id[:], @@ -106,10 +105,11 @@ func (r *RestProxyHandler) GetCollectionByID(ctx context.Context, id flow.Identi // SendTransaction sends already created transaction. func (r *RestProxyHandler) SendTransaction(ctx context.Context, tx *flow.TransactionBody) error { - upstream, err := r.FaultTolerantClient() + upstream, closer, err := r.FaultTolerantClient() if err != nil { return err } + defer closer.Close() transaction := convert.TransactionToMessage(*tx) sendTransactionRequest := &accessproto.SendTransactionRequest{ @@ -124,10 +124,11 @@ func (r *RestProxyHandler) SendTransaction(ctx context.Context, tx *flow.Transac // GetTransaction returns transaction by ID. func (r *RestProxyHandler) GetTransaction(ctx context.Context, id flow.Identifier) (*flow.TransactionBody, error) { - upstream, err := r.FaultTolerantClient() + upstream, closer, err := r.FaultTolerantClient() if err != nil { return nil, err } + defer closer.Close() getTransactionRequest := &accessproto.GetTransactionRequest{ Id: id[:], @@ -155,11 +156,12 @@ func (r *RestProxyHandler) GetTransactionResult( collectionID flow.Identifier, requiredEventEncodingVersion entities.EventEncodingVersion, ) (*access.TransactionResult, error) { - upstream, err := r.FaultTolerantClient() + upstream, closer, err := r.FaultTolerantClient() if err != nil { return nil, err } + defer closer.Close() getTransactionResultRequest := &accessproto.GetTransactionRequest{ Id: id[:], @@ -180,10 +182,11 @@ func (r *RestProxyHandler) GetTransactionResult( // GetAccountAtBlockHeight returns account by account address and block height. func (r *RestProxyHandler) GetAccountAtBlockHeight(ctx context.Context, address flow.Address, height uint64) (*flow.Account, error) { - upstream, err := r.FaultTolerantClient() + upstream, closer, err := r.FaultTolerantClient() if err != nil { return nil, err } + defer closer.Close() getAccountAtBlockHeightRequest := &accessproto.GetAccountAtBlockHeightRequest{ Address: address.Bytes(), @@ -202,10 +205,11 @@ func (r *RestProxyHandler) GetAccountAtBlockHeight(ctx context.Context, address // ExecuteScriptAtLatestBlock executes script at latest block. func (r *RestProxyHandler) ExecuteScriptAtLatestBlock(ctx context.Context, script []byte, arguments [][]byte) ([]byte, error) { - upstream, err := r.FaultTolerantClient() + upstream, closer, err := r.FaultTolerantClient() if err != nil { return nil, err } + defer closer.Close() executeScriptAtLatestBlockRequest := &accessproto.ExecuteScriptAtLatestBlockRequest{ Script: script, @@ -223,10 +227,11 @@ func (r *RestProxyHandler) ExecuteScriptAtLatestBlock(ctx context.Context, scrip // ExecuteScriptAtBlockHeight executes script at the given block height . func (r *RestProxyHandler) ExecuteScriptAtBlockHeight(ctx context.Context, blockHeight uint64, script []byte, arguments [][]byte) ([]byte, error) { - upstream, err := r.FaultTolerantClient() + upstream, closer, err := r.FaultTolerantClient() if err != nil { return nil, err } + defer closer.Close() executeScriptAtBlockHeightRequest := &accessproto.ExecuteScriptAtBlockHeightRequest{ BlockHeight: blockHeight, @@ -245,10 +250,11 @@ func (r *RestProxyHandler) ExecuteScriptAtBlockHeight(ctx context.Context, block // ExecuteScriptAtBlockID executes script at the given block id . func (r *RestProxyHandler) ExecuteScriptAtBlockID(ctx context.Context, blockID flow.Identifier, script []byte, arguments [][]byte) ([]byte, error) { - upstream, err := r.FaultTolerantClient() + upstream, closer, err := r.FaultTolerantClient() if err != nil { return nil, err } + defer closer.Close() executeScriptAtBlockIDRequest := &accessproto.ExecuteScriptAtBlockIDRequest{ BlockId: blockID[:], @@ -272,10 +278,11 @@ func (r *RestProxyHandler) GetEventsForHeightRange( startHeight, endHeight uint64, requiredEventEncodingVersion entities.EventEncodingVersion, ) ([]flow.BlockEvents, error) { - upstream, err := r.FaultTolerantClient() + upstream, closer, err := r.FaultTolerantClient() if err != nil { return nil, err } + defer closer.Close() getEventsForHeightRangeRequest := &accessproto.GetEventsForHeightRangeRequest{ Type: eventType, @@ -300,10 +307,11 @@ func (r *RestProxyHandler) GetEventsForBlockIDs( blockIDs []flow.Identifier, requiredEventEncodingVersion entities.EventEncodingVersion, ) ([]flow.BlockEvents, error) { - upstream, err := r.FaultTolerantClient() + upstream, closer, err := r.FaultTolerantClient() if err != nil { return nil, err } + defer closer.Close() blockIds := convert.IdentifiersToMessages(blockIDs) @@ -324,10 +332,11 @@ func (r *RestProxyHandler) GetEventsForBlockIDs( // GetExecutionResultForBlockID gets execution result by provided block ID. func (r *RestProxyHandler) GetExecutionResultForBlockID(ctx context.Context, blockID flow.Identifier) (*flow.ExecutionResult, error) { - upstream, err := r.FaultTolerantClient() + upstream, closer, err := r.FaultTolerantClient() if err != nil { return nil, err } + defer closer.Close() getExecutionResultForBlockID := &accessproto.GetExecutionResultForBlockIDRequest{ BlockId: blockID[:], @@ -344,10 +353,11 @@ func (r *RestProxyHandler) GetExecutionResultForBlockID(ctx context.Context, blo // GetExecutionResultByID gets execution result by its ID. func (r *RestProxyHandler) GetExecutionResultByID(ctx context.Context, id flow.Identifier) (*flow.ExecutionResult, error) { - upstream, err := r.FaultTolerantClient() + upstream, closer, err := r.FaultTolerantClient() if err != nil { return nil, err } + defer closer.Close() executionResultByIDRequest := &accessproto.GetExecutionResultByIDRequest{ Id: id[:], diff --git a/engine/access/rest/routes/subscribe_events_test.go b/engine/access/rest/routes/subscribe_events_test.go index ec48ac0586b..0b5626c64b2 100644 --- a/engine/access/rest/routes/subscribe_events_test.go +++ b/engine/access/rest/routes/subscribe_events_test.go @@ -14,6 +14,8 @@ import ( "golang.org/x/exp/slices" + jsoncdc "github.com/onflow/cadence/encoding/json" + "github.com/onflow/flow/protobuf/go/flow/entities" mocks "github.com/stretchr/testify/mock" "github.com/stretchr/testify/require" "github.com/stretchr/testify/suite" @@ -24,6 +26,7 @@ import ( mockstatestream "github.com/onflow/flow-go/engine/access/state_stream/mock" "github.com/onflow/flow-go/model/flow" "github.com/onflow/flow-go/utils/unittest" + "github.com/onflow/flow-go/utils/unittest/generator" ) type testType struct { @@ -66,6 +69,9 @@ func (s *SubscribeEventsSuite) SetupTest() { s.blocks = make([]*flow.Block, 0, blockCount) s.blockEvents = make(map[flow.Identifier]flow.EventsList, blockCount) + // by default, events are in CCF encoding + eventsGenerator := generator.EventGenerator(generator.WithEncoding(entities.EventEncodingVersion_CCF_V0)) + for i := 0; i < blockCount; i++ { block := unittest.BlockWithParentFixture(parent) // update for next iteration @@ -74,6 +80,11 @@ func (s *SubscribeEventsSuite) SetupTest() { result := unittest.ExecutionResultFixture() blockEvents := unittest.BlockEventsFixture(block.Header, (i%len(testEventTypes))*3+1, testEventTypes...) + // update payloads with valid CCF encoded data + for i := range blockEvents.Events { + blockEvents.Events[i].Payload = eventsGenerator.New().Payload + } + s.blocks = append(s.blocks, block) s.blockEvents[block.ID()] = blockEvents.Events @@ -171,26 +182,35 @@ func (s *SubscribeEventsSuite) TestSubscribeEvents() { // construct expected event responses based on the provided test configuration for i, block := range s.blocks { - if startBlockFound || block.ID() == test.startBlockID { + blockID := block.ID() + if startBlockFound || blockID == test.startBlockID { startBlockFound = true if test.startHeight == request.EmptyHeight || block.Header.Height >= test.startHeight { - eventsForBlock := flow.EventsList{} - for _, event := range s.blockEvents[block.ID()] { + // track 2 lists, one for the expected results and one that is passed back + // from the subscription to the handler. These cannot be shared since the + // response struct is passed by reference from the mock to the handler, so + // a bug within the handler could go unnoticed + expectedEvents := flow.EventsList{} + subscriptionEvents := flow.EventsList{} + for _, event := range s.blockEvents[blockID] { if slices.Contains(test.eventTypes, string(event.Type)) || - len(test.eventTypes) == 0 { //Include all events - eventsForBlock = append(eventsForBlock, event) + len(test.eventTypes) == 0 { // Include all events + expectedEvents = append(expectedEvents, event) + subscriptionEvents = append(subscriptionEvents, event) } } - eventResponse := &backend.EventsResponse{ - Height: block.Header.Height, - BlockID: block.ID(), - Events: eventsForBlock, - } - - if len(eventsForBlock) > 0 || (i+1)%int(test.heartbeatInterval) == 0 { - expectedEventsResponses = append(expectedEventsResponses, eventResponse) + if len(expectedEvents) > 0 || (i+1)%int(test.heartbeatInterval) == 0 { + expectedEventsResponses = append(expectedEventsResponses, &backend.EventsResponse{ + Height: block.Header.Height, + BlockID: blockID, + Events: expectedEvents, + }) } - subscriptionEventsResponses = append(subscriptionEventsResponses, eventResponse) + subscriptionEventsResponses = append(subscriptionEventsResponses, &backend.EventsResponse{ + Height: block.Header.Height, + BlockID: blockID, + Events: subscriptionEvents, + }) } } } @@ -410,7 +430,11 @@ func requireResponse(t *testing.T, recorder *testHijackResponseRecorder, expecte require.Equal(t, expectedEvent.TransactionID, actualEvent.TransactionID) require.Equal(t, expectedEvent.TransactionIndex, actualEvent.TransactionIndex) require.Equal(t, expectedEvent.EventIndex, actualEvent.EventIndex) - require.Equal(t, expectedEvent.Payload, actualEvent.Payload) + // payload is not expected to match, but it should decode + + // payload must decode to valid json-cdc encoded data + _, err := jsoncdc.Decode(nil, actualEvent.Payload) + require.NoError(t, err) } } } diff --git a/engine/access/rest/routes/websocket_handler.go b/engine/access/rest/routes/websocket_handler.go index 063cc4ed5c4..221a18ea7b0 100644 --- a/engine/access/rest/routes/websocket_handler.go +++ b/engine/access/rest/routes/websocket_handler.go @@ -15,6 +15,7 @@ import ( "github.com/onflow/flow-go/engine/access/rest/request" "github.com/onflow/flow-go/engine/access/state_stream" "github.com/onflow/flow-go/engine/access/state_stream/backend" + "github.com/onflow/flow-go/engine/common/rpc/convert" "github.com/onflow/flow-go/model/flow" ) @@ -158,6 +159,18 @@ func (wsController *WebsocketController) writeEvents(sub state_stream.Subscripti blocksSinceLastMessage = 0 } + // EventsResponse contains CCF encoded events, and this API returns JSON-CDC events. + // convert event payload formats. + for i, e := range resp.Events { + payload, err := convert.CcfPayloadToJsonPayload(e.Payload) + if err != nil { + err = fmt.Errorf("could not convert event payload from CCF to Json: %w", err) + wsController.wsErrorHandler(err) + return + } + resp.Events[i].Payload = payload + } + // Write the response to the WebSocket connection err = wsController.conn.WriteJSON(event) if err != nil { diff --git a/engine/access/rpc/backend/backend.go b/engine/access/rpc/backend/backend.go index 26e6ac19218..430fe01cb97 100644 --- a/engine/access/rpc/backend/backend.go +++ b/engine/access/rpc/backend/backend.go @@ -88,6 +88,7 @@ type Params struct { Transactions storage.Transactions ExecutionReceipts storage.ExecutionReceipts ExecutionResults storage.ExecutionResults + LightTransactionResults storage.LightTransactionResults ChainID flow.ChainID AccessMetrics module.AccessMetrics ConnFactory connection.ConnectionFactory @@ -99,13 +100,14 @@ type Params struct { SnapshotHistoryLimit int Communicator Communicator TxResultCacheSize uint + TxErrorMessagesCacheSize uint ScriptExecutor execution.ScriptExecutor ScriptExecutionMode ScriptExecutionMode } // New creates backend instance func New(params Params) (*Backend, error) { - retry := newRetry() + retry := newRetry(params.Log) if params.RetryEnabled { retry.Activate() } @@ -123,6 +125,18 @@ func New(params Params) (*Backend, error) { } } + // NOTE: The transaction error message cache is currently only used by the access node and not by the observer node. + // To avoid introducing unnecessary command line arguments in the observer, one case could be that the error + // message cache is nil for the observer node. + var txErrorMessagesCache *lru.Cache[flow.Identifier, string] + + if params.TxErrorMessagesCacheSize > 0 { + txErrorMessagesCache, err = lru.New[flow.Identifier, string](int(params.TxErrorMessagesCacheSize)) + if err != nil { + return nil, fmt.Errorf("failed to init cache for transaction error messages: %w", err) + } + } + // initialize node version info nodeInfo := getNodeVersionInfo(params.State.Params()) @@ -148,6 +162,7 @@ func New(params Params) (*Backend, error) { collections: params.Collections, blocks: params.Blocks, transactions: params.Transactions, + results: params.LightTransactionResults, executionReceipts: params.ExecutionReceipts, transactionValidator: configureTransactionValidator(params.State, params.ChainID), transactionMetrics: params.AccessMetrics, @@ -157,6 +172,7 @@ func New(params Params) (*Backend, error) { log: params.Log, nodeCommunicator: params.Communicator, txResultCache: txResCache, + txErrorMessagesCache: txErrorMessagesCache, }, backendEvents: backendEvents{ state: params.State, diff --git a/engine/access/rpc/backend/backend_accounts.go b/engine/access/rpc/backend/backend_accounts.go index 7ff38c748c5..b5dcea664af 100644 --- a/engine/access/rpc/backend/backend_accounts.go +++ b/engine/access/rpc/backend/backend_accounts.go @@ -18,6 +18,7 @@ import ( fvmerrors "github.com/onflow/flow-go/fvm/errors" "github.com/onflow/flow-go/model/flow" "github.com/onflow/flow-go/module/execution" + "github.com/onflow/flow-go/module/irrecoverable" "github.com/onflow/flow-go/state/protocol" "github.com/onflow/flow-go/storage" ) @@ -43,7 +44,9 @@ func (b *backendAccounts) GetAccount(ctx context.Context, address flow.Address) func (b *backendAccounts) GetAccountAtLatestBlock(ctx context.Context, address flow.Address) (*flow.Account, error) { sealed, err := b.state.Sealed().Head() if err != nil { - return nil, status.Errorf(codes.Internal, "failed to get latest sealed header: %v", err) + err := irrecoverable.NewExceptionf("failed to lookup sealed header: %w", err) + irrecoverable.Throw(ctx, err) + return nil, err } sealedBlockID := sealed.ID() diff --git a/engine/access/rpc/backend/backend_accounts_test.go b/engine/access/rpc/backend/backend_accounts_test.go index e876c2325e3..614f91bfb49 100644 --- a/engine/access/rpc/backend/backend_accounts_test.go +++ b/engine/access/rpc/backend/backend_accounts_test.go @@ -5,7 +5,6 @@ import ( "fmt" "testing" - execproto "github.com/onflow/flow/protobuf/go/flow/execution" "github.com/rs/zerolog" "github.com/stretchr/testify/mock" "github.com/stretchr/testify/suite" @@ -18,10 +17,13 @@ import ( "github.com/onflow/flow-go/model/flow" "github.com/onflow/flow-go/module/execution" execmock "github.com/onflow/flow-go/module/execution/mock" + "github.com/onflow/flow-go/module/irrecoverable" protocol "github.com/onflow/flow-go/state/protocol/mock" "github.com/onflow/flow-go/storage" storagemock "github.com/onflow/flow-go/storage/mock" "github.com/onflow/flow-go/utils/unittest" + + execproto "github.com/onflow/flow/protobuf/go/flow/execution" ) type BackendAccountsSuite struct { @@ -316,6 +318,30 @@ func (s *BackendAccountsSuite) TestGetAccountFromFailover_ReturnsENErrors() { }) } +// TestGetAccountAtLatestBlock_InconsistentState tests that signaler context received error when node state is +// inconsistent +func (s *BackendAccountsSuite) TestGetAccountAtLatestBlockFromStorage_InconsistentState() { + scriptExecutor := execmock.NewScriptExecutor(s.T()) + + backend := s.defaultBackend() + backend.scriptExecMode = ScriptExecutionModeLocalOnly + backend.scriptExecutor = scriptExecutor + + s.Run(fmt.Sprintf("GetAccountAtLatestBlock - fails with %v", "inconsistent node's state"), func() { + s.state.On("Sealed").Return(s.snapshot, nil) + + err := fmt.Errorf("inconsistent node's state") + s.snapshot.On("Head").Return(nil, err) + + signCtxErr := irrecoverable.NewExceptionf("failed to lookup sealed header: %w", err) + signalerCtx := irrecoverable.WithSignalerContext(context.Background(), irrecoverable.NewMockSignalerContextExpectError(s.T(), context.Background(), signCtxErr)) + + actual, err := backend.GetAccountAtLatestBlock(signalerCtx, s.failingAddress) + s.Require().Error(err) + s.Require().Nil(actual) + }) +} + func (s *BackendAccountsSuite) testGetAccount(ctx context.Context, backend *backendAccounts, statusCode codes.Code) { s.state.On("Sealed").Return(s.snapshot, nil).Once() s.snapshot.On("Head").Return(s.block.Header, nil).Once() diff --git a/engine/access/rpc/backend/backend_block_details.go b/engine/access/rpc/backend/backend_block_details.go index fc9eee618c4..698e66b0727 100644 --- a/engine/access/rpc/backend/backend_block_details.go +++ b/engine/access/rpc/backend/backend_block_details.go @@ -8,6 +8,7 @@ import ( "github.com/onflow/flow-go/engine/common/rpc" "github.com/onflow/flow-go/model/flow" + "github.com/onflow/flow-go/module/irrecoverable" "github.com/onflow/flow-go/state/protocol" "github.com/onflow/flow-go/storage" ) @@ -17,16 +18,23 @@ type backendBlockDetails struct { state protocol.State } -func (b *backendBlockDetails) GetLatestBlock(_ context.Context, isSealed bool) (*flow.Block, flow.BlockStatus, error) { +func (b *backendBlockDetails) GetLatestBlock(ctx context.Context, isSealed bool) (*flow.Block, flow.BlockStatus, error) { var header *flow.Header var err error if isSealed { // get the latest seal header from storage header, err = b.state.Sealed().Head() + if err != nil { + err = irrecoverable.NewExceptionf("failed to lookup sealed header: %w", err) + + } } else { // get the finalized header from state header, err = b.state.Final().Head() + if err != nil { + err = irrecoverable.NewExceptionf("failed to lookup final header: %w", err) + } } if err != nil { @@ -35,12 +43,12 @@ func (b *backendBlockDetails) GetLatestBlock(_ context.Context, isSealed bool) ( // In the RPC engine, if we encounter an error from the protocol state indicating state corruption, // we should halt processing requests, but do throw an exception which might cause a crash: // - It is unsafe to process requests if we have an internally bad state. - // TODO: https://github.com/onflow/flow-go/issues/4028 // - We would like to avoid throwing an exception as a result of an Access API request by policy // because this can cause DOS potential // - Since the protocol state is widely shared, we assume that in practice another component will // observe the protocol state error and throw an exception. - return nil, flow.BlockStatusUnknown, status.Errorf(codes.Internal, "could not get latest block: %v", err) + irrecoverable.Throw(ctx, err) + return nil, flow.BlockStatusUnknown, err } // since we are querying a finalized or sealed block, we can use the height index and save an ID computation @@ -49,51 +57,53 @@ func (b *backendBlockDetails) GetLatestBlock(_ context.Context, isSealed bool) ( return nil, flow.BlockStatusUnknown, status.Errorf(codes.Internal, "could not get latest block: %v", err) } - stat, err := b.getBlockStatus(block) + stat, err := b.getBlockStatus(ctx, block) if err != nil { return nil, stat, err } return block, stat, nil } -func (b *backendBlockDetails) GetBlockByID(_ context.Context, id flow.Identifier) (*flow.Block, flow.BlockStatus, error) { +func (b *backendBlockDetails) GetBlockByID(ctx context.Context, id flow.Identifier) (*flow.Block, flow.BlockStatus, error) { block, err := b.blocks.ByID(id) if err != nil { return nil, flow.BlockStatusUnknown, rpc.ConvertStorageError(err) } - stat, err := b.getBlockStatus(block) + stat, err := b.getBlockStatus(ctx, block) if err != nil { return nil, stat, err } return block, stat, nil } -func (b *backendBlockDetails) GetBlockByHeight(_ context.Context, height uint64) (*flow.Block, flow.BlockStatus, error) { +func (b *backendBlockDetails) GetBlockByHeight(ctx context.Context, height uint64) (*flow.Block, flow.BlockStatus, error) { block, err := b.blocks.ByHeight(height) if err != nil { return nil, flow.BlockStatusUnknown, rpc.ConvertStorageError(err) } - stat, err := b.getBlockStatus(block) + stat, err := b.getBlockStatus(ctx, block) if err != nil { return nil, stat, err } return block, stat, nil } -func (b *backendBlockDetails) getBlockStatus(block *flow.Block) (flow.BlockStatus, error) { +// No errors are expected during normal operations. +func (b *backendBlockDetails) getBlockStatus(ctx context.Context, block *flow.Block) (flow.BlockStatus, error) { sealed, err := b.state.Sealed().Head() if err != nil { // In the RPC engine, if we encounter an error from the protocol state indicating state corruption, // we should halt processing requests, but do throw an exception which might cause a crash: // - It is unsafe to process requests if we have an internally bad state. - // TODO: https://github.com/onflow/flow-go/issues/4028 // - We would like to avoid throwing an exception as a result of an Access API request by policy // because this can cause DOS potential // - Since the protocol state is widely shared, we assume that in practice another component will // observe the protocol state error and throw an exception. - return flow.BlockStatusUnknown, status.Errorf(codes.Internal, "failed to find latest sealed header: %v", err) + err := irrecoverable.NewExceptionf("failed to lookup sealed header: %w", err) + irrecoverable.Throw(ctx, err) + return flow.BlockStatusUnknown, err } if block.Header.Height > sealed.Height { diff --git a/engine/access/rpc/backend/backend_block_headers.go b/engine/access/rpc/backend/backend_block_headers.go index ac4116224d4..a61fcab711a 100644 --- a/engine/access/rpc/backend/backend_block_headers.go +++ b/engine/access/rpc/backend/backend_block_headers.go @@ -3,11 +3,9 @@ package backend import ( "context" - "google.golang.org/grpc/codes" - "google.golang.org/grpc/status" - "github.com/onflow/flow-go/engine/common/rpc" "github.com/onflow/flow-go/model/flow" + "github.com/onflow/flow-go/module/irrecoverable" "github.com/onflow/flow-go/state/protocol" "github.com/onflow/flow-go/storage" ) @@ -17,16 +15,22 @@ type backendBlockHeaders struct { state protocol.State } -func (b *backendBlockHeaders) GetLatestBlockHeader(_ context.Context, isSealed bool) (*flow.Header, flow.BlockStatus, error) { +func (b *backendBlockHeaders) GetLatestBlockHeader(ctx context.Context, isSealed bool) (*flow.Header, flow.BlockStatus, error) { var header *flow.Header var err error if isSealed { // get the latest seal header from storage header, err = b.state.Sealed().Head() + if err != nil { + err = irrecoverable.NewExceptionf("failed to lookup sealed header: %w", err) + } } else { // get the finalized header from state header, err = b.state.Final().Head() + if err != nil { + err = irrecoverable.NewExceptionf("failed to lookup final header: %w", err) + } } if err != nil { @@ -34,59 +38,61 @@ func (b *backendBlockHeaders) GetLatestBlockHeader(_ context.Context, isSealed b // In the RPC engine, if we encounter an error from the protocol state indicating state corruption, // we should halt processing requests, but do throw an exception which might cause a crash: // - It is unsafe to process requests if we have an internally bad state. - // TODO: https://github.com/onflow/flow-go/issues/4028 // - We would like to avoid throwing an exception as a result of an Access API request by policy // because this can cause DOS potential // - Since the protocol state is widely shared, we assume that in practice another component will // observe the protocol state error and throw an exception. - return nil, flow.BlockStatusUnknown, status.Errorf(codes.Internal, "could not get latest block header: %v", err) + irrecoverable.Throw(ctx, err) + return nil, flow.BlockStatusUnknown, err } - stat, err := b.getBlockStatus(header) + stat, err := b.getBlockStatus(ctx, header) if err != nil { return nil, stat, err } return header, stat, nil } -func (b *backendBlockHeaders) GetBlockHeaderByID(_ context.Context, id flow.Identifier) (*flow.Header, flow.BlockStatus, error) { +func (b *backendBlockHeaders) GetBlockHeaderByID(ctx context.Context, id flow.Identifier) (*flow.Header, flow.BlockStatus, error) { header, err := b.headers.ByBlockID(id) if err != nil { return nil, flow.BlockStatusUnknown, rpc.ConvertStorageError(err) } - stat, err := b.getBlockStatus(header) + stat, err := b.getBlockStatus(ctx, header) if err != nil { return nil, stat, err } return header, stat, nil } -func (b *backendBlockHeaders) GetBlockHeaderByHeight(_ context.Context, height uint64) (*flow.Header, flow.BlockStatus, error) { +func (b *backendBlockHeaders) GetBlockHeaderByHeight(ctx context.Context, height uint64) (*flow.Header, flow.BlockStatus, error) { header, err := b.headers.ByHeight(height) if err != nil { return nil, flow.BlockStatusUnknown, rpc.ConvertStorageError(err) } - stat, err := b.getBlockStatus(header) + stat, err := b.getBlockStatus(ctx, header) if err != nil { return nil, stat, err } return header, stat, nil } -func (b *backendBlockHeaders) getBlockStatus(header *flow.Header) (flow.BlockStatus, error) { +// No errors are expected during normal operations. +func (b *backendBlockHeaders) getBlockStatus(ctx context.Context, header *flow.Header) (flow.BlockStatus, error) { sealed, err := b.state.Sealed().Head() if err != nil { // In the RPC engine, if we encounter an error from the protocol state indicating state corruption, // we should halt processing requests, but do throw an exception which might cause a crash: // - It is unsafe to process requests if we have an internally bad State. - // TODO: https://github.com/onflow/flow-go/issues/4028 // - We would like to avoid throwing an exception as a result of an Access API request by policy // because this can cause DOS potential // - Since the protocol state is widely shared, we assume that in practice another component will // observe the protocol state error and throw an exception. - return flow.BlockStatusUnknown, status.Errorf(codes.Internal, "failed to find latest sealed header: %v", err) + err := irrecoverable.NewExceptionf("failed to lookup sealed header: %w", err) + irrecoverable.Throw(ctx, err) + return flow.BlockStatusUnknown, err } if header.Height > sealed.Height { diff --git a/engine/access/rpc/backend/backend_events.go b/engine/access/rpc/backend/backend_events.go index 9a7700550d4..08d0115f504 100644 --- a/engine/access/rpc/backend/backend_events.go +++ b/engine/access/rpc/backend/backend_events.go @@ -18,6 +18,7 @@ import ( "github.com/onflow/flow-go/engine/common/rpc" "github.com/onflow/flow-go/engine/common/rpc/convert" "github.com/onflow/flow-go/model/flow" + "github.com/onflow/flow-go/module/irrecoverable" "github.com/onflow/flow-go/state/protocol" "github.com/onflow/flow-go/storage" ) @@ -54,7 +55,9 @@ func (b *backendEvents) GetEventsForHeightRange( head, err := b.state.Sealed().Head() if err != nil { // sealed block must be in the store, so return an Internal code even if we got NotFound - return nil, status.Errorf(codes.Internal, "failed to get events: %v", err) + err := irrecoverable.NewExceptionf("failed to lookup sealed header: %w", err) + irrecoverable.Throw(ctx, err) + return nil, err } // start height should not be beyond the last sealed height diff --git a/engine/access/rpc/backend/backend_scripts.go b/engine/access/rpc/backend/backend_scripts.go index 0c157388422..958c0251881 100644 --- a/engine/access/rpc/backend/backend_scripts.go +++ b/engine/access/rpc/backend/backend_scripts.go @@ -18,6 +18,7 @@ import ( "github.com/onflow/flow-go/model/flow" "github.com/onflow/flow-go/module" "github.com/onflow/flow-go/module/execution" + "github.com/onflow/flow-go/module/irrecoverable" "github.com/onflow/flow-go/state/protocol" "github.com/onflow/flow-go/storage" "github.com/onflow/flow-go/utils/logging" @@ -72,7 +73,9 @@ func (b *backendScripts) ExecuteScriptAtLatestBlock( latestHeader, err := b.state.Sealed().Head() if err != nil { // the latest sealed header MUST be available - return nil, status.Errorf(codes.Internal, "failed to get latest sealed header: %v", err) + err := irrecoverable.NewExceptionf("failed to lookup sealed header: %w", err) + irrecoverable.Throw(ctx, err) + return nil, err } return b.executeScript(ctx, newScriptExecutionRequest(latestHeader.ID(), latestHeader.Height, script, arguments)) @@ -125,9 +128,11 @@ func (b *backendScripts) executeScript( case ScriptExecutionModeFailover: localResult, localDuration, localErr := b.executeScriptLocally(ctx, scriptRequest) - if localErr == nil || isInvalidArgumentError(localErr) { + if localErr == nil || isInvalidArgumentError(localErr) || status.Code(localErr) == codes.Canceled { return localResult, localErr } + // Note: scripts that timeout are retried on the execution nodes since ANs may have performance + // issues for some scripts. execResult, execDuration, execErr := b.executeScriptOnAvailableExecutionNodes(ctx, scriptRequest) resultComparer := newScriptResultComparison(b.log, b.metrics, scriptRequest) @@ -185,11 +190,13 @@ func (b *backendScripts) executeScriptLocally( if err != nil { convertedErr := convertScriptExecutionError(err, r.height) - if status.Code(convertedErr) == codes.InvalidArgument { + switch status.Code(convertedErr) { + case codes.InvalidArgument, codes.Canceled, codes.DeadlineExceeded: lg.Debug().Err(err). Str("script", string(r.script)). Msg("script failed to execute locally") - } else { + + default: lg.Error().Err(err).Msg("script execution failed") b.metrics.ScriptExecutionErrorLocal() } @@ -332,8 +339,17 @@ func convertScriptExecutionError(err error, height uint64) error { return rpc.ConvertError(err, "failed to execute script", codes.Internal) } - // runtime errors - return status.Errorf(codes.InvalidArgument, "failed to execute script: %v", err) + switch coded.Code() { + case fvmerrors.ErrCodeScriptExecutionCancelledError: + return status.Errorf(codes.Canceled, "script execution canceled: %v", err) + + case fvmerrors.ErrCodeScriptExecutionTimedOutError: + return status.Errorf(codes.DeadlineExceeded, "script execution timed out: %v", err) + + default: + // runtime errors + return status.Errorf(codes.InvalidArgument, "failed to execute script: %v", err) + } } return convertIndexError(err, height, "failed to execute script") diff --git a/engine/access/rpc/backend/backend_scripts_test.go b/engine/access/rpc/backend/backend_scripts_test.go index 951adc9b50c..fbcc4105f9c 100644 --- a/engine/access/rpc/backend/backend_scripts_test.go +++ b/engine/access/rpc/backend/backend_scripts_test.go @@ -22,6 +22,7 @@ import ( "github.com/onflow/flow-go/model/flow" "github.com/onflow/flow-go/module/execution" execmock "github.com/onflow/flow-go/module/execution/mock" + "github.com/onflow/flow-go/module/irrecoverable" "github.com/onflow/flow-go/module/metrics" protocol "github.com/onflow/flow-go/state/protocol/mock" "github.com/onflow/flow-go/storage" @@ -34,6 +35,8 @@ var ( cadenceErr = fvmerrors.NewCodedError(fvmerrors.ErrCodeCadenceRunTimeError, "cadence error") fvmFailureErr = fvmerrors.NewCodedError(fvmerrors.FailureCodeBlockFinderFailure, "fvm error") + ctxCancelErr = fvmerrors.NewCodedError(fvmerrors.ErrCodeScriptExecutionCancelledError, "context canceled error") + timeoutErr = fvmerrors.NewCodedError(fvmerrors.ErrCodeScriptExecutionTimedOutError, "timeout error") ) // Create a suite similar to GetAccount that covers each of the modes @@ -319,31 +322,49 @@ func (s *BackendScriptsSuite) TestExecuteScriptWithFailover_HappyPath() { } } -// TestExecuteScriptWithFailover_SkippedForInvalidArgument tests that failover is skipped for -// FVM errors that result in InvalidArgument errors -func (s *BackendScriptsSuite) TestExecuteScriptWithFailover_SkippedForInvalidArgument() { +// TestExecuteScriptWithFailover_SkippedForCorrectCodes tests that failover is skipped for +// FVM errors that result in InvalidArgument or Canceled errors +func (s *BackendScriptsSuite) TestExecuteScriptWithFailover_SkippedForCorrectCodes() { ctx := context.Background() // configure local script executor to fail scriptExecutor := execmock.NewScriptExecutor(s.T()) - scriptExecutor.On("ExecuteAtBlockHeight", mock.Anything, s.failingScript, s.arguments, s.block.Header.Height). - Return(nil, cadenceErr) backend := s.defaultBackend() backend.scriptExecMode = ScriptExecutionModeFailover backend.scriptExecutor = scriptExecutor - s.Run("ExecuteScriptAtLatestBlock", func() { - s.testExecuteScriptAtLatestBlock(ctx, backend, codes.InvalidArgument) - }) + testCases := []struct { + err error + statusCode codes.Code + }{ + { + err: cadenceErr, + statusCode: codes.InvalidArgument, + }, + { + err: ctxCancelErr, + statusCode: codes.Canceled, + }, + } - s.Run("ExecuteScriptAtBlockID", func() { - s.testExecuteScriptAtBlockID(ctx, backend, codes.InvalidArgument) - }) + for _, tt := range testCases { + scriptExecutor.On("ExecuteAtBlockHeight", mock.Anything, s.failingScript, s.arguments, s.block.Header.Height). + Return(nil, tt.err). + Times(3) - s.Run("ExecuteScriptAtBlockHeight", func() { - s.testExecuteScriptAtBlockHeight(ctx, backend, codes.InvalidArgument) - }) + s.Run(fmt.Sprintf("ExecuteScriptAtLatestBlock - %s", tt.statusCode), func() { + s.testExecuteScriptAtLatestBlock(ctx, backend, tt.statusCode) + }) + + s.Run(fmt.Sprintf("ExecuteScriptAtBlockID - %s", tt.statusCode), func() { + s.testExecuteScriptAtBlockID(ctx, backend, tt.statusCode) + }) + + s.Run(fmt.Sprintf("ExecuteScriptAtBlockHeight - %s", tt.statusCode), func() { + s.testExecuteScriptAtBlockHeight(ctx, backend, tt.statusCode) + }) + } } // TestExecuteScriptWithFailover_ReturnsENErrors tests that when an error is returned from the execution @@ -381,6 +402,31 @@ func (s *BackendScriptsSuite) TestExecuteScriptWithFailover_ReturnsENErrors() { }) } +// TestExecuteScriptAtLatestBlockFromStorage_InconsistentState tests that signaler context received error when node state is +// inconsistent +func (s *BackendScriptsSuite) TestExecuteScriptAtLatestBlockFromStorage_InconsistentState() { + scriptExecutor := execmock.NewScriptExecutor(s.T()) + + backend := s.defaultBackend() + backend.scriptExecMode = ScriptExecutionModeLocalOnly + backend.scriptExecutor = scriptExecutor + + s.Run(fmt.Sprintf("ExecuteScriptAtLatestBlock - fails with %v", "inconsistent node's state"), func() { + s.state.On("Sealed").Return(s.snapshot, nil) + + err := fmt.Errorf("inconsistent node's state") + s.snapshot.On("Head").Return(nil, err) + + signCtxErr := irrecoverable.NewExceptionf("failed to lookup sealed header: %w", err) + signalerCtx := irrecoverable.WithSignalerContext(context.Background(), + irrecoverable.NewMockSignalerContextExpectError(s.T(), context.Background(), signCtxErr)) + + actual, err := backend.ExecuteScriptAtLatestBlock(signalerCtx, s.script, s.arguments) + s.Require().Error(err) + s.Require().Nil(actual) + }) +} + func (s *BackendScriptsSuite) testExecuteScriptAtLatestBlock(ctx context.Context, backend *backendScripts, statusCode codes.Code) { s.state.On("Sealed").Return(s.snapshot, nil).Once() s.snapshot.On("Head").Return(s.block.Header, nil).Once() diff --git a/engine/access/rpc/backend/backend_test.go b/engine/access/rpc/backend/backend_test.go index a5afb782f30..acd5259741c 100644 --- a/engine/access/rpc/backend/backend_test.go +++ b/engine/access/rpc/backend/backend_test.go @@ -10,6 +10,7 @@ import ( entitiesproto "github.com/onflow/flow/protobuf/go/flow/entities" execproto "github.com/onflow/flow/protobuf/go/flow/execution" "github.com/rs/zerolog" + "github.com/sony/gobreaker" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/mock" "github.com/stretchr/testify/require" @@ -25,6 +26,7 @@ import ( connectionmock "github.com/onflow/flow-go/engine/access/rpc/connection/mock" "github.com/onflow/flow-go/engine/common/rpc/convert" "github.com/onflow/flow-go/model/flow" + "github.com/onflow/flow-go/module/irrecoverable" "github.com/onflow/flow-go/module/metrics" realprotocol "github.com/onflow/flow-go/state/protocol" bprotocol "github.com/onflow/flow-go/state/protocol/badger" @@ -50,12 +52,13 @@ type Suite struct { snapshot *protocol.Snapshot log zerolog.Logger - blocks *storagemock.Blocks - headers *storagemock.Headers - collections *storagemock.Collections - transactions *storagemock.Transactions - receipts *storagemock.ExecutionReceipts - results *storagemock.ExecutionResults + blocks *storagemock.Blocks + headers *storagemock.Headers + collections *storagemock.Collections + transactions *storagemock.Transactions + receipts *storagemock.ExecutionReceipts + results *storagemock.ExecutionResults + transactionResults *storagemock.LightTransactionResults colClient *access.AccessAPIClient execClient *access.ExecutionAPIClient @@ -94,6 +97,7 @@ func (suite *Suite) SetupTest() { suite.colClient = new(access.AccessAPIClient) suite.archiveClient = new(access.AccessAPIClient) suite.execClient = new(access.ExecutionAPIClient) + suite.transactionResults = storagemock.NewLightTransactionResults(suite.T()) suite.chainID = flow.Testnet suite.historicalAccessClient = new(access.AccessAPIClient) suite.connectionFactory = connectionmock.NewConnectionFactory(suite.T()) @@ -397,29 +401,46 @@ func (suite *Suite) TestGetLatestProtocolStateSnapshot_HistoryLimit() { func (suite *Suite) TestGetLatestSealedBlockHeader() { // setup the mocks suite.state.On("Sealed").Return(suite.snapshot, nil).Maybe() - - block := unittest.BlockHeaderFixture() - suite.snapshot.On("Head").Return(block, nil).Once() - suite.state.On("Sealed").Return(suite.snapshot, nil) - suite.snapshot.On("Head").Return(block, nil).Once() params := suite.defaultBackendParams() backend, err := New(params) suite.Require().NoError(err) - // query the handler for the latest sealed block - header, stat, err := backend.GetLatestBlockHeader(context.Background(), true) - suite.checkResponse(header, err) + suite.Run("GetLatestSealedBlockHeader - happy path", func() { + block := unittest.BlockHeaderFixture() + suite.snapshot.On("Head").Return(block, nil).Once() + suite.snapshot.On("Head").Return(block, nil).Once() - // make sure we got the latest sealed block - suite.Require().Equal(block.ID(), header.ID()) - suite.Require().Equal(block.Height, header.Height) - suite.Require().Equal(block.ParentID, header.ParentID) - suite.Require().Equal(stat, flow.BlockStatusSealed) + // query the handler for the latest sealed block + header, stat, err := backend.GetLatestBlockHeader(context.Background(), true) + suite.checkResponse(header, err) - suite.assertAllExpectations() + // make sure we got the latest sealed block + suite.Require().Equal(block.ID(), header.ID()) + suite.Require().Equal(block.Height, header.Height) + suite.Require().Equal(block.ParentID, header.ParentID) + suite.Require().Equal(stat, flow.BlockStatusSealed) + + suite.assertAllExpectations() + }) + + // tests that signaler context received error when node state is inconsistent + suite.Run("GetLatestSealedBlockHeader - fails with inconsistent node's state", func() { + err := fmt.Errorf("inconsistent node's state") + suite.snapshot.On("Head").Return(nil, err) + + // mock signaler context expect an error + signCtxErr := irrecoverable.NewExceptionf("failed to lookup sealed header: %w", err) + signalerCtx := irrecoverable.WithSignalerContext(context.Background(), + irrecoverable.NewMockSignalerContextExpectError(suite.T(), context.Background(), signCtxErr)) + + actualHeader, actualStatus, err := backend.GetLatestBlockHeader(signalerCtx, true) + suite.Require().Error(err) + suite.Require().Nil(actualHeader) + suite.Require().Equal(flow.BlockStatusUnknown, actualStatus) + }) } func (suite *Suite) TestGetTransaction() { @@ -478,8 +499,6 @@ func (suite *Suite) TestGetTransactionResultByIndex() { blockId := block.ID() index := uint32(0) - suite.snapshot.On("Head").Return(block.Header, nil) - // block storage returns the corresponding block suite.blocks. On("ByID", blockId). @@ -511,24 +530,43 @@ func (suite *Suite) TestGetTransactionResultByIndex() { suite.Require().NoError(err) suite.execClient. - On("GetTransactionResultByIndex", ctx, exeEventReq). - Return(exeEventResp, nil). - Once() + On("GetTransactionResultByIndex", mock.Anything, exeEventReq). + Return(exeEventResp, nil) - result, err := backend.GetTransactionResultByIndex(ctx, blockId, index, entitiesproto.EventEncodingVersion_JSON_CDC_V0) - suite.checkResponse(result, err) - suite.Assert().Equal(result.BlockHeight, block.Header.Height) + suite.Run("TestGetTransactionResultByIndex - happy path", func() { + suite.snapshot.On("Head").Return(block.Header, nil).Once() + result, err := backend.GetTransactionResultByIndex(ctx, blockId, index, entitiesproto.EventEncodingVersion_JSON_CDC_V0) + suite.checkResponse(result, err) + suite.Assert().Equal(result.BlockHeight, block.Header.Height) - suite.assertAllExpectations() + suite.assertAllExpectations() + }) + + // tests that signaler context received error when node state is inconsistent + suite.Run("TestGetTransactionResultByIndex - fails with inconsistent node's state", func() { + err := fmt.Errorf("inconsistent node's state") + suite.snapshot.On("Head").Return(nil, err).Once() + + // mock signaler context expect an error + signCtxErr := irrecoverable.NewExceptionf("failed to lookup sealed header: %w", err) + signalerCtx := irrecoverable.WithSignalerContext(context.Background(), + irrecoverable.NewMockSignalerContextExpectError(suite.T(), context.Background(), signCtxErr)) + + actual, err := backend.GetTransactionResultByIndex(signalerCtx, blockId, index, entitiesproto.EventEncodingVersion_JSON_CDC_V0) + suite.Require().Error(err) + suite.Require().Nil(actual) + }) } func (suite *Suite) TestGetTransactionResultsByBlockID() { - head := unittest.BlockHeaderFixture() suite.state.On("Sealed").Return(suite.snapshot, nil).Maybe() - suite.snapshot.On("Head").Return(head, nil).Maybe() ctx := context.Background() + params := suite.defaultBackendParams() + block := unittest.BlockFixture() + sporkRootBlockHeight := suite.state.Params().SporkRootBlockHeight() + block.Header.Height = sporkRootBlockHeight + 1 blockId := block.ID() // block storage returns the corresponding block @@ -552,7 +590,6 @@ func (suite *Suite) TestGetTransactionResultsByBlockID() { TransactionResults: []*execproto.GetTransactionResultResponse{{}}, } - params := suite.defaultBackendParams() // the connection factory should be used to get the execution node client params.ConnFactory = connFactory params.FixedExecutionNodeIDs = (fixedENIDs.NodeIDs()).Strings() @@ -561,14 +598,32 @@ func (suite *Suite) TestGetTransactionResultsByBlockID() { suite.Require().NoError(err) suite.execClient. - On("GetTransactionResultsByBlockID", ctx, exeEventReq). - Return(exeEventResp, nil). - Once() + On("GetTransactionResultsByBlockID", mock.Anything, exeEventReq). + Return(exeEventResp, nil) - result, err := backend.GetTransactionResultsByBlockID(ctx, blockId, entitiesproto.EventEncodingVersion_JSON_CDC_V0) - suite.checkResponse(result, err) + suite.Run("GetTransactionResultsByBlockID - happy path", func() { + suite.snapshot.On("Head").Return(block.Header, nil).Once() - suite.assertAllExpectations() + result, err := backend.GetTransactionResultsByBlockID(ctx, blockId, entitiesproto.EventEncodingVersion_JSON_CDC_V0) + suite.checkResponse(result, err) + + suite.assertAllExpectations() + }) + + //tests that signaler context received error when node state is inconsistent + suite.Run("GetTransactionResultsByBlockID - fails with inconsistent node's state", func() { + err := fmt.Errorf("inconsistent node's state") + suite.snapshot.On("Head").Return(nil, err).Once() + + // mock signaler context expect an error + signCtxErr := irrecoverable.NewExceptionf("failed to lookup sealed header: %w", err) + signalerCtx := irrecoverable.WithSignalerContext(context.Background(), + irrecoverable.NewMockSignalerContextExpectError(suite.T(), context.Background(), signCtxErr)) + + actual, err := backend.GetTransactionResultsByBlockID(signalerCtx, blockId, entitiesproto.EventEncodingVersion_JSON_CDC_V0) + suite.Require().Error(err) + suite.Require().Nil(actual) + }) } // TestTransactionStatusTransition tests that the status of transaction changes from Finalized to Sealed @@ -919,7 +974,6 @@ func (suite *Suite) TestTransactionPendingToFinalizedStatusTransition() { // TestTransactionResultUnknown tests that the status of transaction is reported as unknown when it is not found in the // local storage func (suite *Suite) TestTransactionResultUnknown() { - ctx := context.Background() txID := unittest.IdentifierFixture() @@ -945,40 +999,58 @@ func (suite *Suite) TestGetLatestFinalizedBlock() { suite.state.On("Sealed").Return(suite.snapshot, nil).Maybe() suite.state.On("Final").Return(suite.snapshot, nil).Maybe() - // setup the mocks - expected := unittest.BlockFixture() - header := expected.Header + params := suite.defaultBackendParams() - suite.snapshot. - On("Head"). - Return(header, nil).Once() + backend, err := New(params) + suite.Require().NoError(err) - headerClone := *header - headerClone.Height = 0 + suite.Run("GetLatestFinalizedBlock - happy path", func() { + // setup the mocks + expected := unittest.BlockFixture() + header := expected.Header - suite.snapshot. - On("Head"). - Return(&headerClone, nil). - Once() + suite.snapshot. + On("Head"). + Return(header, nil).Once() - suite.blocks. - On("ByHeight", header.Height). - Return(&expected, nil) + headerClone := *header + headerClone.Height = 0 - params := suite.defaultBackendParams() + suite.snapshot. + On("Head"). + Return(&headerClone, nil). + Once() - backend, err := New(params) - suite.Require().NoError(err) + suite.blocks. + On("ByHeight", header.Height). + Return(&expected, nil) - // query the handler for the latest finalized header - actual, stat, err := backend.GetLatestBlock(context.Background(), false) - suite.checkResponse(actual, err) + // query the handler for the latest finalized header + actual, stat, err := backend.GetLatestBlock(context.Background(), false) + suite.checkResponse(actual, err) - // make sure we got the latest header - suite.Require().Equal(expected, *actual) - suite.Assert().Equal(stat, flow.BlockStatusFinalized) + // make sure we got the latest header + suite.Require().Equal(expected, *actual) + suite.Assert().Equal(stat, flow.BlockStatusFinalized) - suite.assertAllExpectations() + suite.assertAllExpectations() + }) + + // tests that signaler context received error when node state is inconsistent + suite.Run("GetLatestFinalizedBlock - fails with inconsistent node's state", func() { + err := fmt.Errorf("inconsistent node's state") + suite.snapshot.On("Head").Return(nil, err) + + // mock signaler context expect an error + signCtxErr := irrecoverable.NewExceptionf("failed to lookup final header: %w", err) + signalerCtx := irrecoverable.WithSignalerContext(context.Background(), + irrecoverable.NewMockSignalerContextExpectError(suite.T(), context.Background(), signCtxErr)) + + actualBlock, actualStatus, err := backend.GetLatestBlock(signalerCtx, false) + suite.Require().Error(err) + suite.Require().Nil(actualBlock) + suite.Require().Equal(flow.BlockStatusUnknown, actualStatus) + }) } type mockCloser struct{} @@ -1261,7 +1333,6 @@ func (suite *Suite) TestGetExecutionResultByBlockID() { } func (suite *Suite) TestGetEventsForHeightRange() { - ctx := context.Background() const minHeight uint64 = 5 const maxHeight uint64 = 10 @@ -1282,11 +1353,6 @@ func (suite *Suite) TestGetEventsForHeightRange() { stateParams.On("FinalizedRoot").Return(rootHeader, nil) state.On("Params").Return(stateParams).Maybe() - // mock snapshot to return head backend - snapshot.On("Head").Return( - func() *flow.Header { return head }, - func() error { return nil }, - ) snapshot.On("Identities", mock.Anything).Return( func(_ flow.IdentityFilter[flow.Identity]) flow.IdentityList { return nodeIdentities @@ -1377,7 +1443,42 @@ func (suite *Suite) TestGetEventsForHeightRange() { return results } + // tests that signaler context received error when node state is inconsistent + suite.Run("inconsistent node's state", func() { + headHeight = maxHeight - 1 + setupHeadHeight(headHeight) + + // setup mocks + stateParams.On("SporkID").Return(unittest.IdentifierFixture(), nil) + stateParams.On("ProtocolVersion").Return(uint(unittest.Uint64InRange(10, 30)), nil) + stateParams.On("SporkRootBlockHeight").Return(headHeight, nil) + stateParams.On("SealedRoot").Return(head, nil) + + params := suite.defaultBackendParams() + params.State = state + + backend, err := New(params) + suite.Require().NoError(err) + + err = fmt.Errorf("inconsistent node's state") + snapshot.On("Head").Return(nil, err).Once() + + signCtxErr := irrecoverable.NewExceptionf("failed to lookup sealed header: %w", err) + signalerCtx := irrecoverable.WithSignalerContext(context.Background(), + irrecoverable.NewMockSignalerContextExpectError(suite.T(), context.Background(), signCtxErr)) + + actual, err := backend.GetEventsForHeightRange(signalerCtx, string(flow.EventAccountCreated), minHeight, maxHeight, + entitiesproto.EventEncodingVersion_JSON_CDC_V0) + suite.Require().Error(err) + suite.Require().Nil(actual) + }) + connFactory := suite.setupConnectionFactory() + // mock snapshot to return head backend + snapshot.On("Head").Return( + func() *flow.Header { return head }, + func() error { return nil }, + ) //suite.state = state suite.Run("invalid request max height < min height", func() { @@ -1916,6 +2017,56 @@ func (suite *Suite) TestGetTransactionResultByIndexAndBlockIdEventEncodingVersio } } +// TestNodeCommunicator tests special case for node communicator, when only one node available and communicator gets +// gobreaker.ErrOpenState +func (suite *Suite) TestNodeCommunicator() { + head := unittest.BlockHeaderFixture() + suite.state.On("Sealed").Return(suite.snapshot, nil).Maybe() + suite.snapshot.On("Head").Return(head, nil).Maybe() + + ctx := context.Background() + block := unittest.BlockFixture() + blockId := block.ID() + + // block storage returns the corresponding block + suite.blocks. + On("ByID", blockId). + Return(&block, nil) + + _, fixedENIDs := suite.setupReceipts(&block) + suite.state.On("Final").Return(suite.snapshot, nil).Maybe() + suite.snapshot.On("Identities", mock.Anything).Return(fixedENIDs, nil) + + // create a mock connection factory + connFactory := connectionmock.NewConnectionFactory(suite.T()) + connFactory.On("GetExecutionAPIClient", mock.Anything).Return(suite.execClient, &mockCloser{}, nil) + + exeEventReq := &execproto.GetTransactionsByBlockIDRequest{ + BlockId: blockId[:], + } + + params := suite.defaultBackendParams() + // the connection factory should be used to get the execution node client + params.ConnFactory = connFactory + params.FixedExecutionNodeIDs = (fixedENIDs.NodeIDs()).Strings() + // Left only one preferred execution node + params.PreferredExecutionNodeIDs = []string{fixedENIDs[0].NodeID.String()} + + backend, err := New(params) + suite.Require().NoError(err) + + // Simulate closed circuit breaker error + suite.execClient. + On("GetTransactionResultsByBlockID", ctx, exeEventReq). + Return(nil, gobreaker.ErrOpenState). + Once() + + result, err := backend.GetTransactionResultsByBlockID(ctx, blockId, entitiesproto.EventEncodingVersion_JSON_CDC_V0) + suite.Assert().Nil(result) + suite.Assert().Error(err) + suite.Assert().Equal(codes.Unavailable, status.Code(err)) +} + func (suite *Suite) assertAllExpectations() { suite.snapshot.AssertExpectations(suite.T()) suite.state.AssertExpectations(suite.T()) @@ -1964,19 +2115,21 @@ func getEvents(n int) []flow.Event { func (suite *Suite) defaultBackendParams() Params { return Params{ - State: suite.state, - Blocks: suite.blocks, - Headers: suite.headers, - Collections: suite.collections, - Transactions: suite.transactions, - ExecutionReceipts: suite.receipts, - ExecutionResults: suite.results, - ChainID: suite.chainID, - CollectionRPC: suite.colClient, - MaxHeightRange: DefaultMaxHeightRange, - SnapshotHistoryLimit: DefaultSnapshotHistoryLimit, - Communicator: NewNodeCommunicator(false), - AccessMetrics: metrics.NewNoopCollector(), - Log: suite.log, + State: suite.state, + Blocks: suite.blocks, + Headers: suite.headers, + Collections: suite.collections, + Transactions: suite.transactions, + ExecutionReceipts: suite.receipts, + ExecutionResults: suite.results, + LightTransactionResults: suite.transactionResults, + ChainID: suite.chainID, + CollectionRPC: suite.colClient, + MaxHeightRange: DefaultMaxHeightRange, + SnapshotHistoryLimit: DefaultSnapshotHistoryLimit, + Communicator: NewNodeCommunicator(false), + AccessMetrics: metrics.NewNoopCollector(), + Log: suite.log, + TxErrorMessagesCacheSize: 1000, } } diff --git a/engine/access/rpc/backend/backend_transactions.go b/engine/access/rpc/backend/backend_transactions.go index 39ecab0d97d..f1670eee7e3 100644 --- a/engine/access/rpc/backend/backend_transactions.go +++ b/engine/access/rpc/backend/backend_transactions.go @@ -21,6 +21,8 @@ import ( "github.com/onflow/flow-go/fvm/blueprints" "github.com/onflow/flow-go/model/flow" "github.com/onflow/flow-go/module" + "github.com/onflow/flow-go/module/irrecoverable" + "github.com/onflow/flow-go/state" "github.com/onflow/flow-go/state/protocol" "github.com/onflow/flow-go/storage" ) @@ -31,6 +33,7 @@ type backendTransactions struct { executionReceipts storage.ExecutionReceipts collections storage.Collections blocks storage.Blocks + results storage.LightTransactionResults state protocol.State chainID flow.ChainID transactionMetrics module.TransactionMetrics @@ -38,10 +41,11 @@ type backendTransactions struct { retry *Retry connFactory connection.ConnectionFactory - previousAccessNodes []accessproto.AccessAPIClient - log zerolog.Logger - nodeCommunicator Communicator - txResultCache *lru.Cache[flow.Identifier, *access.TransactionResult] + previousAccessNodes []accessproto.AccessAPIClient + log zerolog.Logger + nodeCommunicator Communicator + txResultCache *lru.Cache[flow.Identifier, *access.TransactionResult] + txErrorMessagesCache *lru.Cache[flow.Identifier, string] // cache for transactions error messages, indexed by hash(block_id, tx_id). } // SendTransaction forwards the transaction to the collection node @@ -140,7 +144,7 @@ func (b *backendTransactions) sendTransactionToCollector(ctx context.Context, tx *flow.TransactionBody, collectionNodeAddr string) error { - collectionRPC, closer, err := b.connFactory.GetAccessAPIClient(collectionNodeAddr) + collectionRPC, closer, err := b.connFactory.GetAccessAPIClient(collectionNodeAddr, nil) if err != nil { return fmt.Errorf("failed to connect to collection node at %s: %w", collectionNodeAddr, err) } @@ -193,7 +197,7 @@ func (b *backendTransactions) GetTransaction(ctx context.Context, txID flow.Iden } func (b *backendTransactions) GetTransactionsByBlockID( - ctx context.Context, + _ context.Context, blockID flow.Identifier, ) ([]*flow.TransactionBody, error) { var transactions []*flow.TransactionBody @@ -317,6 +321,9 @@ func (b *backendTransactions) GetTransactionResult( // derive status of the transaction txStatus, err := b.deriveTransactionStatus(tx, transactionWasExecuted, block) if err != nil { + if !errors.Is(err, state.ErrUnknownSnapshotReference) { + irrecoverable.Throw(ctx, err) + } return nil, rpc.ConvertStorageError(err) } @@ -358,7 +365,6 @@ func (b *backendTransactions) lookupCollectionIDInBlock( // retrieveBlock function returns a block based on the input argument. The block ID lookup has the highest priority, // followed by the collection ID lookup. If both are missing, the default lookup by transaction ID is performed. func (b *backendTransactions) retrieveBlock( - // the requested block or collection was not found. If looking up the block based solely on the txID returns // not found, then no error is returned. blockID flow.Identifier, @@ -434,6 +440,9 @@ func (b *backendTransactions) GetTransactionResultsByBlockID( // tx body is irrelevant to status if it's in an executed block txStatus, err := b.deriveTransactionStatus(nil, true, block) if err != nil { + if !errors.Is(err, state.ErrUnknownSnapshotReference) { + irrecoverable.Throw(ctx, err) + } return nil, rpc.ConvertStorageError(err) } events, err := convert.MessagesToEventsWithEncodingConversion(txResult.GetEvents(), resp.GetEventEncodingVersion(), requiredEventEncodingVersion) @@ -484,6 +493,9 @@ func (b *backendTransactions) GetTransactionResultsByBlockID( systemTxResult := resp.TransactionResults[len(resp.TransactionResults)-1] systemTxStatus, err := b.deriveTransactionStatus(systemTx, true, block) if err != nil { + if !errors.Is(err, state.ErrUnknownSnapshotReference) { + irrecoverable.Throw(ctx, err) + } return nil, rpc.ConvertStorageError(err) } @@ -542,6 +554,9 @@ func (b *backendTransactions) GetTransactionResultByIndex( // tx body is irrelevant to status if it's in an executed block txStatus, err := b.deriveTransactionStatus(nil, true, block) if err != nil { + if !errors.Is(err, state.ErrUnknownSnapshotReference) { + irrecoverable.Throw(ctx, err) + } return nil, rpc.ConvertStorageError(err) } @@ -561,13 +576,75 @@ func (b *backendTransactions) GetTransactionResultByIndex( }, nil } +// GetSystemTransaction returns system transaction +func (b *backendTransactions) GetSystemTransaction(ctx context.Context, _ flow.Identifier) (*flow.TransactionBody, error) { + systemTx, err := blueprints.SystemChunkTransaction(b.chainID.Chain()) + if err != nil { + return nil, status.Errorf(codes.Internal, "could not get system chunk transaction: %v", err) + } + + return systemTx, nil +} + +// GetSystemTransactionResult returns system transaction result +func (b *backendTransactions) GetSystemTransactionResult(ctx context.Context, blockID flow.Identifier, requiredEventEncodingVersion entities.EventEncodingVersion) (*access.TransactionResult, error) { + block, err := b.blocks.ByID(blockID) + if err != nil { + return nil, rpc.ConvertStorageError(err) + } + + req := &execproto.GetTransactionsByBlockIDRequest{ + BlockId: blockID[:], + } + execNodes, err := executionNodesForBlockID(ctx, blockID, b.executionReceipts, b.state, b.log) + if err != nil { + if IsInsufficientExecutionReceipts(err) { + return nil, status.Errorf(codes.NotFound, err.Error()) + } + return nil, rpc.ConvertError(err, "failed to retrieve result from any execution node", codes.Internal) + } + + resp, err := b.getTransactionResultsByBlockIDFromAnyExeNode(ctx, execNodes, req) + if err != nil { + return nil, rpc.ConvertError(err, "failed to retrieve result from execution node", codes.Internal) + } + + systemTx, err := blueprints.SystemChunkTransaction(b.chainID.Chain()) + if err != nil { + return nil, status.Errorf(codes.Internal, "could not get system chunk transaction: %v", err) + } + + systemTxResult := resp.TransactionResults[len(resp.TransactionResults)-1] + systemTxStatus, err := b.deriveTransactionStatus(systemTx, true, block) + if err != nil { + return nil, rpc.ConvertStorageError(err) + } + + events, err := convert.MessagesToEventsWithEncodingConversion(systemTxResult.GetEvents(), resp.GetEventEncodingVersion(), requiredEventEncodingVersion) + if err != nil { + return nil, rpc.ConvertError(err, "failed to convert events from system tx result", codes.Internal) + } + + return &access.TransactionResult{ + Status: systemTxStatus, + StatusCode: uint(systemTxResult.GetStatusCode()), + Events: events, + ErrorMessage: systemTxResult.GetErrorMessage(), + BlockID: blockID, + TransactionID: systemTx.ID(), + BlockHeight: block.Header.Height, + }, nil +} + // deriveTransactionStatus derives the transaction status based on current protocol state +// Error returns: +// - state.ErrUnknownSnapshotReference - block referenced by transaction has not been found. +// - all other errors are unexpected and potentially symptoms of internal implementation bugs or state corruption (fatal). func (b *backendTransactions) deriveTransactionStatus( tx *flow.TransactionBody, executed bool, block *flow.Block, ) (flow.TransactionStatus, error) { - if block == nil { // Not in a block, let's see if it's expired referenceBlock, err := b.state.AtBlockID(tx.ReferenceBlockID).Head() @@ -578,7 +655,7 @@ func (b *backendTransactions) deriveTransactionStatus( // get the latest finalized block from the state finalized, err := b.state.Final().Head() if err != nil { - return flow.TransactionStatusUnknown, err + return flow.TransactionStatusUnknown, irrecoverable.NewExceptionf("failed to lookup final header: %w", err) } finalizedHeight := finalized.Height @@ -622,7 +699,7 @@ func (b *backendTransactions) deriveTransactionStatus( // get the latest sealed block from the State sealed, err := b.state.Sealed().Head() if err != nil { - return flow.TransactionStatusUnknown, err + return flow.TransactionStatusUnknown, irrecoverable.NewExceptionf("failed to lookup sealed header: %w", err) } if block.Header.Height > sealed.Height { @@ -643,6 +720,9 @@ func (b *backendTransactions) isExpired(refHeight, compareToHeight uint64) bool return compareToHeight-refHeight > flow.DefaultTransactionExpiry } +// Error returns: +// - `storage.ErrNotFound` - collection referenced by transaction or block by a collection has not been found. +// - all other errors are unexpected and potentially symptoms of internal implementation bugs or state corruption (fatal). func (b *backendTransactions) lookupBlock(txID flow.Identifier) (*flow.Block, error) { collection, err := b.collections.LightByTransactionID(txID) @@ -778,8 +858,13 @@ func (b *backendTransactions) getTransactionResultFromExecutionNode( return events, resp.GetStatusCode(), resp.GetErrorMessage(), nil } -func (b *backendTransactions) NotifyFinalizedBlockHeight(height uint64) { - b.retry.Retry(height) +// ATTENTION: might be a source of problems in future. We run this code on finalization gorotuine, +// potentially lagging finalization events if operations take long time. +// We might need to move this logic on dedicated goroutine and provide a way to skip finalization events if they are delivered +// too often for this engine. An example of similar approach - https://github.com/onflow/flow-go/blob/10b0fcbf7e2031674c00f3cdd280f27bd1b16c47/engine/common/follower/compliance_engine.go#L201.. +// No errors expected during normal operations. +func (b *backendTransactions) ProcessFinalizedBlockHeight(height uint64) error { + return b.retry.Retry(height) } func (b *backendTransactions) getTransactionResultFromAnyExeNode( @@ -951,3 +1036,346 @@ func (b *backendTransactions) tryGetTransactionResultByIndex( return resp, nil } + +// lookupTransactionErrorMessage returns transaction error message for specified transaction. +// If an error message for transaction can be found in the cache then it will be used to serve the request, otherwise +// an RPC call will be made to the EN to fetch that error message, fetched value will be cached in the LRU cache. +// Expected errors during normal operation: +// - InsufficientExecutionReceipts - found insufficient receipts for given block ID. +// - status.Error - remote GRPC call to EN has failed. +func (b *backendTransactions) lookupTransactionErrorMessage( + ctx context.Context, + blockID flow.Identifier, + transactionID flow.Identifier, +) (string, error) { + var cacheKey flow.Identifier + var value string + + if b.txErrorMessagesCache != nil { + cacheKey = flow.MakeIDFromFingerPrint(append(blockID[:], transactionID[:]...)) + value, cached := b.txErrorMessagesCache.Get(cacheKey) + if cached { + return value, nil + } + } + + execNodes, err := executionNodesForBlockID(ctx, blockID, b.executionReceipts, b.state, b.log) + if err != nil { + if IsInsufficientExecutionReceipts(err) { + return "", status.Errorf(codes.NotFound, err.Error()) + } + return "", rpc.ConvertError(err, "failed to select execution nodes", codes.Internal) + } + req := &execproto.GetTransactionErrorMessageRequest{ + BlockId: convert.IdentifierToMessage(blockID), + TransactionId: convert.IdentifierToMessage(transactionID), + } + + resp, err := b.getTransactionErrorMessageFromAnyEN(ctx, execNodes, req) + if err != nil { + return "", fmt.Errorf("could not fetch error message from ENs: %w", err) + } + value = resp.ErrorMessage + + if b.txErrorMessagesCache != nil { + b.txErrorMessagesCache.Add(cacheKey, value) + } + + return value, nil +} + +// lookupTransactionErrorMessageByIndex returns transaction error message for specified transaction using its index. +// If an error message for transaction can be found in cache then it will be used to serve the request, otherwise +// an RPC call will be made to the EN to fetch that error message, fetched value will be cached in the LRU cache. +// Expected errors during normal operation: +// - status.Error[codes.NotFound] - transaction result for given block ID and tx index is not available. +// - InsufficientExecutionReceipts - found insufficient receipts for given block ID. +// - status.Error - remote GRPC call to EN has failed. +func (b *backendTransactions) lookupTransactionErrorMessageByIndex( + ctx context.Context, + blockID flow.Identifier, + index uint32, +) (string, error) { + txResult, err := b.results.ByBlockIDTransactionIndex(blockID, index) + if err != nil { + return "", rpc.ConvertStorageError(err) + } + + var cacheKey flow.Identifier + var value string + + if b.txErrorMessagesCache != nil { + cacheKey = flow.MakeIDFromFingerPrint(append(blockID[:], txResult.TransactionID[:]...)) + value, cached := b.txErrorMessagesCache.Get(cacheKey) + if cached { + return value, nil + } + } + + execNodes, err := executionNodesForBlockID(ctx, blockID, b.executionReceipts, b.state, b.log) + if err != nil { + if IsInsufficientExecutionReceipts(err) { + return "", status.Errorf(codes.NotFound, err.Error()) + } + return "", rpc.ConvertError(err, "failed to select execution nodes", codes.Internal) + } + req := &execproto.GetTransactionErrorMessageByIndexRequest{ + BlockId: convert.IdentifierToMessage(blockID), + Index: index, + } + + resp, err := b.getTransactionErrorMessageByIndexFromAnyEN(ctx, execNodes, req) + if err != nil { + return "", fmt.Errorf("could not fetch error message from ENs: %w", err) + } + value = resp.ErrorMessage + + if b.txErrorMessagesCache != nil { + b.txErrorMessagesCache.Add(cacheKey, value) + } + + return value, nil +} + +// lookupTransactionErrorMessagesByBlockID returns all error messages for failed transactions by blockID. +// An RPC call will be made to the EN to fetch missing errors messages, fetched value will be cached in the LRU cache. +// Expected errors during normal operation: +// - status.Error[codes.NotFound] - transaction results for given block ID are not available. +// - InsufficientExecutionReceipts - found insufficient receipts for given block ID. +// - status.Error - remote GRPC call to EN has failed. +func (b *backendTransactions) lookupTransactionErrorMessagesByBlockID( + ctx context.Context, + blockID flow.Identifier, +) (map[flow.Identifier]string, error) { + txResults, err := b.results.ByBlockID(blockID) + if err != nil { + return nil, rpc.ConvertStorageError(err) + } + + results := make(map[flow.Identifier]string) + + if b.txErrorMessagesCache != nil { + needToFetch := false + for _, txResult := range txResults { + if txResult.Failed { + cacheKey := flow.MakeIDFromFingerPrint(append(blockID[:], txResult.TransactionID[:]...)) + if value, ok := b.txErrorMessagesCache.Get(cacheKey); ok { + results[txResult.TransactionID] = value + } else { + needToFetch = true + } + } + } + + // all transactions were served from cache or there were no failed transactions + if !needToFetch { + return results, nil + } + } + + execNodes, err := executionNodesForBlockID(ctx, blockID, b.executionReceipts, b.state, b.log) + if err != nil { + if IsInsufficientExecutionReceipts(err) { + return nil, status.Errorf(codes.NotFound, err.Error()) + } + return nil, rpc.ConvertError(err, "failed to select execution nodes", codes.Internal) + } + req := &execproto.GetTransactionErrorMessagesByBlockIDRequest{ + BlockId: convert.IdentifierToMessage(blockID), + } + + resp, err := b.getTransactionErrorMessagesFromAnyEN(ctx, execNodes, req) + if err != nil { + return nil, fmt.Errorf("could not fetch error message from ENs: %w", err) + } + result := make(map[flow.Identifier]string, len(resp)) + for _, value := range resp { + if b.txErrorMessagesCache != nil { + cacheKey := flow.MakeIDFromFingerPrint(append(req.BlockId, value.TransactionId...)) + b.txErrorMessagesCache.Add(cacheKey, value.ErrorMessage) + } + result[convert.MessageToIdentifier(value.TransactionId)] = value.ErrorMessage + } + return result, nil +} + +// getTransactionErrorMessageFromAnyEN performs an RPC call using available nodes passed as argument. List of nodes must be non-empty otherwise an error will be returned. +// Expected errors during normal operation: +// - status.Error - GRPC call failed, some of possible codes are: +// - codes.NotFound - request cannot be served by EN because of absence of data. +// - codes.Unavailable - remote node is not unavailable. +func (b *backendTransactions) getTransactionErrorMessageFromAnyEN( + ctx context.Context, + execNodes flow.IdentitySkeletonList, + req *execproto.GetTransactionErrorMessageRequest, +) (*execproto.GetTransactionErrorMessageResponse, error) { + // if we were passed 0 execution nodes add a specific error + if len(execNodes) == 0 { + return nil, errors.New("zero execution nodes") + } + + var resp *execproto.GetTransactionErrorMessageResponse + errToReturn := b.nodeCommunicator.CallAvailableNode( + execNodes, + func(node *flow.IdentitySkeleton) error { + var err error + resp, err = b.tryGetTransactionErrorMessageFromEN(ctx, node, req) + if err == nil { + b.log.Debug(). + Str("execution_node", node.String()). + Hex("block_id", req.GetBlockId()). + Hex("transaction_id", req.GetTransactionId()). + Msg("Successfully got transaction error message from any node") + return nil + } + return err + }, + nil, + ) + + // log the errors + if errToReturn != nil { + b.log.Err(errToReturn).Msg("failed to get transaction error message from execution nodes") + return nil, errToReturn + } + + return resp, nil +} + +// getTransactionErrorMessageFromAnyEN performs an RPC call using available nodes passed as argument. List of nodes must be non-empty otherwise an error will be returned. +// Expected errors during normal operation: +// - status.Error - GRPC call failed, some of possible codes are: +// - codes.NotFound - request cannot be served by EN because of absence of data. +// - codes.Unavailable - remote node is not unavailable. +func (b *backendTransactions) getTransactionErrorMessageByIndexFromAnyEN( + ctx context.Context, + execNodes flow.IdentitySkeletonList, + req *execproto.GetTransactionErrorMessageByIndexRequest, +) (*execproto.GetTransactionErrorMessageResponse, error) { + // if we were passed 0 execution nodes add a specific error + if len(execNodes) == 0 { + return nil, errors.New("zero execution nodes") + } + + var resp *execproto.GetTransactionErrorMessageResponse + errToReturn := b.nodeCommunicator.CallAvailableNode( + execNodes, + func(node *flow.IdentitySkeleton) error { + var err error + resp, err = b.tryGetTransactionErrorMessageByIndexFromEN(ctx, node, req) + if err == nil { + b.log.Debug(). + Str("execution_node", node.String()). + Hex("block_id", req.GetBlockId()). + Uint32("index", req.GetIndex()). + Msg("Successfully got transaction error message by index from any node") + return nil + } + return err + }, + nil, + ) + if errToReturn != nil { + b.log.Err(errToReturn).Msg("failed to get transaction error message by index from execution nodes") + return nil, errToReturn + } + + return resp, nil +} + +// getTransactionErrorMessagesFromAnyEN performs an RPC call using available nodes passed as argument. List of nodes must be non-empty otherwise an error will be returned. +// Expected errors during normal operation: +// - status.Error - GRPC call failed, some of possible codes are: +// - codes.NotFound - request cannot be served by EN because of absence of data. +// - codes.Unavailable - remote node is not unavailable. +func (b *backendTransactions) getTransactionErrorMessagesFromAnyEN( + ctx context.Context, + execNodes flow.IdentitySkeletonList, + req *execproto.GetTransactionErrorMessagesByBlockIDRequest, +) ([]*execproto.GetTransactionErrorMessagesResponse_Result, error) { + // if we were passed 0 execution nodes add a specific error + if len(execNodes) == 0 { + return nil, errors.New("zero execution nodes") + } + + var resp *execproto.GetTransactionErrorMessagesResponse + errToReturn := b.nodeCommunicator.CallAvailableNode( + execNodes, + func(node *flow.IdentitySkeleton) error { + var err error + resp, err = b.tryGetTransactionErrorMessagesByBlockIDFromEN(ctx, node, req) + if err == nil { + b.log.Debug(). + Str("execution_node", node.String()). + Hex("block_id", req.GetBlockId()). + Msg("Successfully got transaction error messages from any node") + return nil + } + return err + }, + nil, + ) + + // log the errors + if errToReturn != nil { + b.log.Err(errToReturn).Msg("failed to get transaction error messages from execution nodes") + return nil, errToReturn + } + + return resp.GetResults(), nil +} + +// Expected errors during normal operation: +// - status.Error - GRPC call failed, some of possible codes are: +// - codes.NotFound - request cannot be served by EN because of absence of data. +// - codes.Unavailable - remote node is not unavailable. +// tryGetTransactionErrorMessageFromEN performs a grpc call to the specified execution node and returns response. + +func (b *backendTransactions) tryGetTransactionErrorMessageFromEN( + ctx context.Context, + execNode *flow.IdentitySkeleton, + req *execproto.GetTransactionErrorMessageRequest, +) (*execproto.GetTransactionErrorMessageResponse, error) { + execRPCClient, closer, err := b.connFactory.GetExecutionAPIClient(execNode.Address) + if err != nil { + return nil, err + } + defer closer.Close() + return execRPCClient.GetTransactionErrorMessage(ctx, req) +} + +// tryGetTransactionErrorMessageByIndexFromEN performs a grpc call to the specified execution node and returns response. +// Expected errors during normal operation: +// - status.Error - GRPC call failed, some of possible codes are: +// - codes.NotFound - request cannot be served by EN because of absence of data. +// - codes.Unavailable - remote node is not unavailable. +func (b *backendTransactions) tryGetTransactionErrorMessageByIndexFromEN( + ctx context.Context, + execNode *flow.IdentitySkeleton, + req *execproto.GetTransactionErrorMessageByIndexRequest, +) (*execproto.GetTransactionErrorMessageResponse, error) { + execRPCClient, closer, err := b.connFactory.GetExecutionAPIClient(execNode.Address) + if err != nil { + return nil, err + } + defer closer.Close() + return execRPCClient.GetTransactionErrorMessageByIndex(ctx, req) +} + +// tryGetTransactionErrorMessagesByBlockIDFromEN performs a grpc call to the specified execution node and returns response. +// Expected errors during normal operation: +// - status.Error - GRPC call failed, some of possible codes are: +// - codes.NotFound - request cannot be served by EN because of absence of data. +// - codes.Unavailable - remote node is not unavailable. +func (b *backendTransactions) tryGetTransactionErrorMessagesByBlockIDFromEN( + ctx context.Context, + execNode *flow.IdentitySkeleton, + req *execproto.GetTransactionErrorMessagesByBlockIDRequest, +) (*execproto.GetTransactionErrorMessagesResponse, error) { + execRPCClient, closer, err := b.connFactory.GetExecutionAPIClient(execNode.Address) + if err != nil { + return nil, err + } + defer closer.Close() + return execRPCClient.GetTransactionErrorMessagesByBlockID(ctx, req) +} diff --git a/engine/access/rpc/backend/backend_transactions_test.go b/engine/access/rpc/backend/backend_transactions_test.go index 3275f04bf9d..2810bb7de72 100644 --- a/engine/access/rpc/backend/backend_transactions_test.go +++ b/engine/access/rpc/backend/backend_transactions_test.go @@ -3,22 +3,30 @@ package backend import ( "context" "fmt" + "math/rand" "github.com/dgraph-io/badger/v2" + jsoncdc "github.com/onflow/cadence/encoding/json" "github.com/onflow/flow/protobuf/go/flow/access" "github.com/onflow/flow/protobuf/go/flow/entities" + execproto "github.com/onflow/flow/protobuf/go/flow/execution" "github.com/stretchr/testify/mock" "github.com/stretchr/testify/require" "google.golang.org/grpc/codes" "google.golang.org/grpc/status" acc "github.com/onflow/flow-go/access" + connectionmock "github.com/onflow/flow-go/engine/access/rpc/connection/mock" + "github.com/onflow/flow-go/engine/common/rpc/convert" + "github.com/onflow/flow-go/fvm/blueprints" "github.com/onflow/flow-go/model/flow" + "github.com/onflow/flow-go/model/flow/filter" "github.com/onflow/flow-go/state/protocol" bprotocol "github.com/onflow/flow-go/state/protocol/badger" "github.com/onflow/flow-go/state/protocol/util" "github.com/onflow/flow-go/storage" "github.com/onflow/flow-go/utils/unittest" + "github.com/onflow/flow-go/utils/unittest/generator" ) func (suite *Suite) withPreConfiguredState(f func(snap protocol.Snapshot)) { @@ -40,8 +48,8 @@ func (suite *Suite) withPreConfiguredState(f func(snap protocol.Snapshot)) { suite.state.On("AtHeight", epoch1.Range()).Return(state.AtHeight(height)) } - snap := state.AtHeight(epoch1.Range()[0]) - suite.state.On("Final").Return(snap).Once() + snap := state.AtHeight(epoch1.FinalHeight()) + suite.state.On("Final").Return(snap) suite.communicator.On("CallAvailableNode", mock.Anything, mock.Anything, @@ -277,7 +285,7 @@ func (suite *Suite) TestGetTransactionResultCacheNonExistent() { }) } -// TestGetTransactionResultUnknownFromCache retrive unknown result from cache +// TestGetTransactionResultUnknownFromCache retrieve unknown result from cache. func (suite *Suite) TestGetTransactionResultUnknownFromCache() { suite.withGetTransactionCachingTestSetup(func(block *flow.Block, tx *flow.Transaction) { suite.historicalAccessClient. @@ -328,3 +336,623 @@ func (suite *Suite) TestGetTransactionResultUnknownFromCache() { suite.historicalAccessClient.AssertExpectations(suite.T()) }) } + +// TestLookupTransactionErrorMessage_HappyPath tests lookup of a transaction error message. In a happy path if it wasn't found in the cache, it +// has to be fetched from the execution node, otherwise served from the cache. +// If the transaction has not failed, the error message must be empty. +func (suite *Suite) TestLookupTransactionErrorMessage_HappyPath() { + block := unittest.BlockFixture() + blockId := block.ID() + failedTx := unittest.TransactionFixture() + failedTxId := failedTx.ID() + + _, fixedENIDs := suite.setupReceipts(&block) + suite.state.On("Final").Return(suite.snapshot, nil).Maybe() + suite.snapshot.On("Identities", mock.Anything).Return(fixedENIDs, nil) + + // create a mock connection factory + connFactory := connectionmock.NewConnectionFactory(suite.T()) + connFactory.On("GetExecutionAPIClient", mock.Anything).Return(suite.execClient, &mockCloser{}, nil) + + params := suite.defaultBackendParams() + // the connection factory should be used to get the execution node client + params.ConnFactory = connFactory + params.FixedExecutionNodeIDs = fixedENIDs.NodeIDs().Strings() + + backend, err := New(params) + suite.Require().NoError(err) + + expectedErrorMsg := "some error" + + exeEventReq := &execproto.GetTransactionErrorMessageRequest{ + BlockId: blockId[:], + TransactionId: failedTxId[:], + } + + exeEventResp := &execproto.GetTransactionErrorMessageResponse{ + TransactionId: failedTxId[:], + ErrorMessage: expectedErrorMsg, + } + + suite.execClient.On("GetTransactionErrorMessage", mock.Anything, exeEventReq).Return(exeEventResp, nil).Once() + + errMsg, err := backend.lookupTransactionErrorMessage(context.Background(), blockId, failedTxId) + suite.Require().NoError(err) + suite.Require().Equal(expectedErrorMsg, errMsg) + + // ensure the transaction error message is cached after retrieval; we do this by mocking the grpc call + // only once + errMsg, err = backend.lookupTransactionErrorMessage(context.Background(), blockId, failedTxId) + suite.Require().NoError(err) + suite.Require().Equal(expectedErrorMsg, errMsg) + suite.assertAllExpectations() +} + +// TestLookupTransactionErrorMessage_FailedToFetch tests lookup of a transaction error message, when a transaction result +// is not in the cache and needs to be fetched from EN, but the EN fails to return it. +func (suite *Suite) TestLookupTransactionErrorMessage_FailedToFetch() { + block := unittest.BlockFixture() + blockId := block.ID() + failedTx := unittest.TransactionFixture() + failedTxId := failedTx.ID() + + _, fixedENIDs := suite.setupReceipts(&block) + suite.state.On("Final").Return(suite.snapshot, nil).Maybe() + suite.snapshot.On("Identities", mock.Anything).Return(fixedENIDs, nil) + + // create a mock connection factory + connFactory := connectionmock.NewConnectionFactory(suite.T()) + connFactory.On("GetExecutionAPIClient", mock.Anything).Return(suite.execClient, &mockCloser{}, nil) + + params := suite.defaultBackendParams() + // the connection factory should be used to get the execution node client + params.ConnFactory = connFactory + params.FixedExecutionNodeIDs = fixedENIDs.NodeIDs().Strings() + + backend, err := New(params) + suite.Require().NoError(err) + + // lookup should try each of the 2 ENs in fixedENIDs + suite.execClient.On("GetTransactionErrorMessage", mock.Anything, mock.Anything).Return(nil, + status.Error(codes.Unavailable, "")).Twice() + + errMsg, err := backend.lookupTransactionErrorMessage(context.Background(), blockId, failedTxId) + suite.Require().Error(err) + suite.Require().Equal(codes.Unavailable, status.Code(err)) + suite.Require().Empty(errMsg) + + suite.assertAllExpectations() +} + +// TestLookupTransactionErrorMessageByIndex_HappyPath tests lookup of a transaction error message by index. +// In a happy path if it wasn't found in the cache, it has to be fetched from the execution node, otherwise served from the cache. +// If the transaction has not failed, the error message must be empty. +func (suite *Suite) TestLookupTransactionErrorMessageByIndex_HappyPath() { + block := unittest.BlockFixture() + blockId := block.ID() + failedTx := unittest.TransactionFixture() + failedTxId := failedTx.ID() + failedTxIndex := rand.Uint32() + + suite.transactionResults.On("ByBlockIDTransactionIndex", blockId, failedTxIndex). + Return(&flow.LightTransactionResult{ + TransactionID: failedTxId, + Failed: true, + ComputationUsed: 0, + }, nil).Twice() + + _, fixedENIDs := suite.setupReceipts(&block) + suite.state.On("Final").Return(suite.snapshot, nil).Maybe() + suite.snapshot.On("Identities", mock.Anything).Return(fixedENIDs, nil) + + // create a mock connection factory + connFactory := connectionmock.NewConnectionFactory(suite.T()) + connFactory.On("GetExecutionAPIClient", mock.Anything).Return(suite.execClient, &mockCloser{}, nil) + + params := suite.defaultBackendParams() + // the connection factory should be used to get the execution node client + params.ConnFactory = connFactory + params.FixedExecutionNodeIDs = fixedENIDs.NodeIDs().Strings() + + backend, err := New(params) + suite.Require().NoError(err) + + expectedErrorMsg := "some error" + + exeEventReq := &execproto.GetTransactionErrorMessageByIndexRequest{ + BlockId: blockId[:], + Index: failedTxIndex, + } + + exeEventResp := &execproto.GetTransactionErrorMessageResponse{ + TransactionId: failedTxId[:], + ErrorMessage: expectedErrorMsg, + } + + suite.execClient.On("GetTransactionErrorMessageByIndex", mock.Anything, exeEventReq).Return(exeEventResp, nil).Once() + + errMsg, err := backend.lookupTransactionErrorMessageByIndex(context.Background(), blockId, failedTxIndex) + suite.Require().NoError(err) + suite.Require().Equal(expectedErrorMsg, errMsg) + + // ensure the transaction error message is cached after retrieval; we do this by mocking the grpc call + // only once + errMsg, err = backend.lookupTransactionErrorMessageByIndex(context.Background(), blockId, failedTxIndex) + suite.Require().NoError(err) + suite.Require().Equal(expectedErrorMsg, errMsg) + suite.assertAllExpectations() +} + +// TestLookupTransactionErrorMessageByIndex_UnknownTransaction tests lookup of a transaction error message by index, +// when a transaction result has not been synced yet, in this case nothing we can do but return an error. +func (suite *Suite) TestLookupTransactionErrorMessageByIndex_UnknownTransaction() { + block := unittest.BlockFixture() + blockId := block.ID() + failedTxIndex := rand.Uint32() + + suite.transactionResults.On("ByBlockIDTransactionIndex", blockId, failedTxIndex). + Return(nil, storage.ErrNotFound).Once() + + params := suite.defaultBackendParams() + backend, err := New(params) + suite.Require().NoError(err) + + errMsg, err := backend.lookupTransactionErrorMessageByIndex(context.Background(), blockId, failedTxIndex) + suite.Require().Error(err) + suite.Require().Equal(codes.NotFound, status.Code(err)) + suite.Require().Empty(errMsg) + + suite.assertAllExpectations() +} + +// TestLookupTransactionErrorMessageByIndex_FailedToFetch tests lookup of a transaction error message by index, +// when a transaction result is not in the cache and needs to be fetched from EN, but the EN fails to return it. +func (suite *Suite) TestLookupTransactionErrorMessageByIndex_FailedToFetch() { + block := unittest.BlockFixture() + blockId := block.ID() + failedTx := unittest.TransactionFixture() + failedTxId := failedTx.ID() + failedTxIndex := rand.Uint32() + + suite.transactionResults.On("ByBlockIDTransactionIndex", blockId, failedTxIndex). + Return(&flow.LightTransactionResult{ + TransactionID: failedTxId, + Failed: true, + ComputationUsed: 0, + }, nil).Once() + + _, fixedENIDs := suite.setupReceipts(&block) + suite.state.On("Final").Return(suite.snapshot, nil).Maybe() + suite.snapshot.On("Identities", mock.Anything).Return(fixedENIDs, nil) + + // create a mock connection factory + connFactory := connectionmock.NewConnectionFactory(suite.T()) + connFactory.On("GetExecutionAPIClient", mock.Anything).Return(suite.execClient, &mockCloser{}, nil) + + params := suite.defaultBackendParams() + // the connection factory should be used to get the execution node client + params.ConnFactory = connFactory + params.FixedExecutionNodeIDs = fixedENIDs.NodeIDs().Strings() + + backend, err := New(params) + suite.Require().NoError(err) + + // lookup should try each of the 2 ENs in fixedENIDs + suite.execClient.On("GetTransactionErrorMessageByIndex", mock.Anything, mock.Anything).Return(nil, + status.Error(codes.Unavailable, "")).Twice() + + errMsg, err := backend.lookupTransactionErrorMessageByIndex(context.Background(), blockId, failedTxIndex) + suite.Require().Error(err) + suite.Require().Equal(codes.Unavailable, status.Code(err)) + suite.Require().Empty(errMsg) + + suite.assertAllExpectations() +} + +// TestLookupTransactionErrorMessages_HappyPath tests lookup of a transaction error messages by block ID. +// In a happy path, it has to be fetched from the execution node if there are no cached results. +// All fetched transactions have to be cached for future calls. +func (suite *Suite) TestLookupTransactionErrorMessages_HappyPath() { + block := unittest.BlockFixture() + blockId := block.ID() + + resultsByBlockID := make([]flow.LightTransactionResult, 0) + for i := 0; i < 5; i++ { + resultsByBlockID = append(resultsByBlockID, flow.LightTransactionResult{ + TransactionID: unittest.IdentifierFixture(), + Failed: i%2 == 0, // create a mix of failed and non-failed transactions + ComputationUsed: 0, + }) + } + + suite.transactionResults.On("ByBlockID", blockId). + Return(resultsByBlockID, nil).Twice() + + _, fixedENIDs := suite.setupReceipts(&block) + suite.state.On("Final").Return(suite.snapshot, nil).Maybe() + suite.snapshot.On("Identities", mock.Anything).Return(fixedENIDs, nil) + + // create a mock connection factory + connFactory := connectionmock.NewConnectionFactory(suite.T()) + connFactory.On("GetExecutionAPIClient", mock.Anything).Return(suite.execClient, &mockCloser{}, nil) + + params := suite.defaultBackendParams() + // the connection factory should be used to get the execution node client + params.ConnFactory = connFactory + params.FixedExecutionNodeIDs = fixedENIDs.NodeIDs().Strings() + + backend, err := New(params) + suite.Require().NoError(err) + + expectedErrorMsg := "some error" + + exeEventReq := &execproto.GetTransactionErrorMessagesByBlockIDRequest{ + BlockId: blockId[:], + } + + exeEventResp := &execproto.GetTransactionErrorMessagesResponse{} + for _, result := range resultsByBlockID { + r := result + if r.Failed { + errMsg := fmt.Sprintf("%s.%s", expectedErrorMsg, r.TransactionID) + exeEventResp.Results = append(exeEventResp.Results, &execproto.GetTransactionErrorMessagesResponse_Result{ + TransactionId: r.TransactionID[:], + ErrorMessage: errMsg, + }) + } + } + + suite.execClient.On("GetTransactionErrorMessagesByBlockID", mock.Anything, exeEventReq). + Return(exeEventResp, nil). + Once() + + errMessages, err := backend.lookupTransactionErrorMessagesByBlockID(context.Background(), blockId) + suite.Require().NoError(err) + suite.Require().Len(errMessages, len(exeEventResp.Results)) + for _, expectedResult := range exeEventResp.Results { + errMsg, ok := errMessages[convert.MessageToIdentifier(expectedResult.TransactionId)] + suite.Require().True(ok) + suite.Assert().Equal(expectedResult.ErrorMessage, errMsg) + } + + // ensure the transaction error message is cached after retrieval; we do this by mocking the grpc call + // only once + errMessages, err = backend.lookupTransactionErrorMessagesByBlockID(context.Background(), blockId) + suite.Require().NoError(err) + suite.Require().Len(errMessages, len(exeEventResp.Results)) + for _, expectedResult := range exeEventResp.Results { + errMsg, ok := errMessages[convert.MessageToIdentifier(expectedResult.TransactionId)] + suite.Require().True(ok) + suite.Assert().Equal(expectedResult.ErrorMessage, errMsg) + } + suite.assertAllExpectations() +} + +// TestLookupTransactionErrorMessages_HappyPath_NoFailedTxns tests lookup of a transaction error messages by block ID. +// In a happy path where a block with no failed txns is requested. We don't want to perform an RPC call in this case. +func (suite *Suite) TestLookupTransactionErrorMessages_HappyPath_NoFailedTxns() { + block := unittest.BlockFixture() + blockId := block.ID() + + resultsByBlockID := []flow.LightTransactionResult{ + { + TransactionID: unittest.IdentifierFixture(), + Failed: false, + ComputationUsed: 0, + }, + { + TransactionID: unittest.IdentifierFixture(), + Failed: false, + ComputationUsed: 0, + }, + } + + suite.transactionResults.On("ByBlockID", blockId). + Return(resultsByBlockID, nil).Once() + + params := suite.defaultBackendParams() + + backend, err := New(params) + suite.Require().NoError(err) + + errMessages, err := backend.lookupTransactionErrorMessagesByBlockID(context.Background(), blockId) + suite.Require().NoError(err) + suite.Require().Empty(errMessages) + suite.assertAllExpectations() +} + +// TestLookupTransactionErrorMessages_UnknownTransaction tests lookup of a transaction error messages by block ID, +// when a transaction results for block has not been synced yet, in this case nothing we can do but return an error. +func (suite *Suite) TestLookupTransactionErrorMessages_UnknownTransaction() { + block := unittest.BlockFixture() + blockId := block.ID() + + suite.transactionResults.On("ByBlockID", blockId). + Return(nil, storage.ErrNotFound).Once() + + params := suite.defaultBackendParams() + backend, err := New(params) + suite.Require().NoError(err) + + errMsg, err := backend.lookupTransactionErrorMessagesByBlockID(context.Background(), blockId) + suite.Require().Error(err) + suite.Require().Equal(codes.NotFound, status.Code(err)) + suite.Require().Empty(errMsg) + + suite.assertAllExpectations() +} + +// TestLookupTransactionErrorMessages_FailedToFetch tests lookup of a transaction error messages by block ID, +// when a transaction result is not in the cache and needs to be fetched from EN, but the EN fails to return it. +func (suite *Suite) TestLookupTransactionErrorMessages_FailedToFetch() { + block := unittest.BlockFixture() + blockId := block.ID() + + resultsByBlockID := []flow.LightTransactionResult{ + { + TransactionID: unittest.IdentifierFixture(), + Failed: true, + ComputationUsed: 0, + }, + { + TransactionID: unittest.IdentifierFixture(), + Failed: true, + ComputationUsed: 0, + }, + } + + suite.transactionResults.On("ByBlockID", blockId). + Return(resultsByBlockID, nil).Once() + + _, fixedENIDs := suite.setupReceipts(&block) + suite.state.On("Final").Return(suite.snapshot, nil).Maybe() + suite.snapshot.On("Identities", mock.Anything).Return(fixedENIDs, nil) + + // create a mock connection factory + connFactory := connectionmock.NewConnectionFactory(suite.T()) + connFactory.On("GetExecutionAPIClient", mock.Anything).Return(suite.execClient, &mockCloser{}, nil) + + params := suite.defaultBackendParams() + // the connection factory should be used to get the execution node client + params.ConnFactory = connFactory + params.FixedExecutionNodeIDs = fixedENIDs.NodeIDs().Strings() + + backend, err := New(params) + suite.Require().NoError(err) + + // pretend the first transaction has been cached, but there are multiple failed txns so still a request has to be made. + backend.txErrorMessagesCache.Add(resultsByBlockID[0].TransactionID, "some error") + + suite.execClient.On("GetTransactionErrorMessagesByBlockID", mock.Anything, mock.Anything).Return(nil, + status.Error(codes.Unavailable, "")).Twice() + + errMsg, err := backend.lookupTransactionErrorMessagesByBlockID(context.Background(), blockId) + suite.Require().Error(err) + suite.Require().Equal(codes.Unavailable, status.Code(err)) + suite.Require().Empty(errMsg) + + suite.assertAllExpectations() +} + +// TestGetSystemTransaction_HappyPath tests that GetSystemTransaction call returns system chunk transaction. +func (suite *Suite) TestGetSystemTransaction_HappyPath() { + suite.withPreConfiguredState(func(snap protocol.Snapshot) { + suite.state.On("Sealed").Return(snap, nil).Maybe() + + params := suite.defaultBackendParams() + backend, err := New(params) + suite.Require().NoError(err) + + block := unittest.BlockFixture() + blockID := block.ID() + + // Make the call for the system chunk transaction + res, err := backend.GetSystemTransaction(context.Background(), blockID) + suite.Require().NoError(err) + // Expected system chunk transaction + systemTx, err := blueprints.SystemChunkTransaction(suite.chainID.Chain()) + suite.Require().NoError(err) + + suite.Require().Equal(systemTx, res) + }) +} + +// TestGetSystemTransactionResult_HappyPath tests that GetSystemTransactionResult call returns system transaction +// result for required block id. +func (suite *Suite) TestGetSystemTransactionResult_HappyPath() { + suite.withPreConfiguredState(func(snap protocol.Snapshot) { + suite.state.On("Sealed").Return(snap, nil).Maybe() + lastBlock, err := snap.Head() + suite.Require().NoError(err) + identities, err := snap.Identities(filter.Any) + suite.Require().NoError(err) + + block := unittest.BlockWithParentFixture(lastBlock) + blockID := block.ID() + suite.state.On("AtBlockID", blockID).Return( + unittest.StateSnapshotForKnownBlock(block.Header, identities.Lookup()), nil).Once() + + // block storage returns the corresponding block + suite.blocks. + On("ByID", blockID). + Return(block, nil). + Once() + + receipt1 := unittest.ReceiptForBlockFixture(block) + suite.receipts. + On("ByBlockID", block.ID()). + Return(flow.ExecutionReceiptList{receipt1}, nil) + + // create a mock connection factory + connFactory := connectionmock.NewConnectionFactory(suite.T()) + connFactory.On("GetExecutionAPIClient", mock.Anything).Return(suite.execClient, &mockCloser{}, nil) + + // the connection factory should be used to get the execution node client + params := suite.defaultBackendParams() + params.ConnFactory = connFactory + + exeEventReq := &execproto.GetTransactionsByBlockIDRequest{ + BlockId: blockID[:], + } + + // Generating events with event generator + exeNodeEventEncodingVersion := entities.EventEncodingVersion_CCF_V0 + events := generator.GetEventsWithEncoding(1, exeNodeEventEncodingVersion) + eventMessages := convert.EventsToMessages(events) + + exeEventResp := &execproto.GetTransactionResultsResponse{ + TransactionResults: []*execproto.GetTransactionResultResponse{{ + Events: eventMessages, + EventEncodingVersion: exeNodeEventEncodingVersion, + }}, + EventEncodingVersion: exeNodeEventEncodingVersion, + } + + suite.execClient. + On("GetTransactionResultsByBlockID", mock.Anything, exeEventReq). + Return(exeEventResp, nil). + Once() + + backend, err := New(params) + suite.Require().NoError(err) + + // Make the call for the system transaction result + res, err := backend.GetSystemTransactionResult( + context.Background(), + block.ID(), + entities.EventEncodingVersion_JSON_CDC_V0, + ) + suite.Require().NoError(err) + + // Expected system chunk transaction + systemTx, err := blueprints.SystemChunkTransaction(suite.chainID.Chain()) + suite.Require().NoError(err) + + suite.Require().Equal(flow.TransactionStatusExecuted, res.Status) + suite.Require().Equal(systemTx.ID(), res.TransactionID) + + // Check for successful decoding of event + _, err = jsoncdc.Decode(nil, res.Events[0].Payload) + suite.Require().NoError(err) + + events, err = convert.MessagesToEventsWithEncodingConversion(eventMessages, + exeNodeEventEncodingVersion, + entities.EventEncodingVersion_JSON_CDC_V0) + suite.Require().NoError(err) + suite.Require().Equal(events, res.Events) + }) +} + +// TestGetSystemTransactionResult_BlockNotFound tests GetSystemTransactionResult function when block was not found. +func (suite *Suite) TestGetSystemTransactionResult_BlockNotFound() { + suite.withPreConfiguredState(func(snap protocol.Snapshot) { + suite.state.On("Sealed").Return(snap, nil).Maybe() + lastBlock, err := snap.Head() + suite.Require().NoError(err) + identities, err := snap.Identities(filter.Any) + suite.Require().NoError(err) + + block := unittest.BlockWithParentFixture(lastBlock) + blockID := block.ID() + suite.state.On("AtBlockID", blockID).Return( + unittest.StateSnapshotForKnownBlock(block.Header, identities.Lookup()), nil).Once() + + // block storage returns the ErrNotFound error + suite.blocks. + On("ByID", blockID). + Return(nil, storage.ErrNotFound). + Once() + + receipt1 := unittest.ReceiptForBlockFixture(block) + suite.receipts. + On("ByBlockID", block.ID()). + Return(flow.ExecutionReceiptList{receipt1}, nil) + + params := suite.defaultBackendParams() + + backend, err := New(params) + suite.Require().NoError(err) + + // Make the call for the system transaction result + res, err := backend.GetSystemTransactionResult( + context.Background(), + block.ID(), + entities.EventEncodingVersion_JSON_CDC_V0, + ) + + suite.Require().Nil(res) + suite.Require().Error(err) + suite.Require().Equal(err, status.Errorf(codes.NotFound, "not found: %v", fmt.Errorf("key not found"))) + }) +} + +// TestGetSystemTransactionResult_FailedEncodingConversion tests the GetSystemTransactionResult function with different +// event encoding versions. +func (suite *Suite) TestGetSystemTransactionResult_FailedEncodingConversion() { + suite.withPreConfiguredState(func(snap protocol.Snapshot) { + suite.state.On("Sealed").Return(snap, nil).Maybe() + lastBlock, err := snap.Head() + suite.Require().NoError(err) + identities, err := snap.Identities(filter.Any) + suite.Require().NoError(err) + + block := unittest.BlockWithParentFixture(lastBlock) + blockID := block.ID() + suite.state.On("AtBlockID", blockID).Return( + unittest.StateSnapshotForKnownBlock(block.Header, identities.Lookup()), nil).Once() + + // block storage returns the corresponding block + suite.blocks. + On("ByID", blockID). + Return(block, nil). + Once() + + receipt1 := unittest.ReceiptForBlockFixture(block) + suite.receipts. + On("ByBlockID", block.ID()). + Return(flow.ExecutionReceiptList{receipt1}, nil) + + // create a mock connection factory + connFactory := connectionmock.NewConnectionFactory(suite.T()) + connFactory.On("GetExecutionAPIClient", mock.Anything).Return(suite.execClient, &mockCloser{}, nil) + + // the connection factory should be used to get the execution node client + params := suite.defaultBackendParams() + params.ConnFactory = connFactory + + exeEventReq := &execproto.GetTransactionsByBlockIDRequest{ + BlockId: blockID[:], + } + + // create empty events + eventsPerBlock := 10 + eventMessages := make([]*entities.Event, eventsPerBlock) + + exeEventResp := &execproto.GetTransactionResultsResponse{ + TransactionResults: []*execproto.GetTransactionResultResponse{{ + Events: eventMessages, + EventEncodingVersion: entities.EventEncodingVersion_JSON_CDC_V0, + }}, + } + + suite.execClient. + On("GetTransactionResultsByBlockID", mock.Anything, exeEventReq). + Return(exeEventResp, nil). + Once() + + backend, err := New(params) + suite.Require().NoError(err) + + // Make the call for the system transaction result + res, err := backend.GetSystemTransactionResult( + context.Background(), + block.ID(), + entities.EventEncodingVersion_CCF_V0, + ) + + suite.Require().Nil(res) + suite.Require().Error(err) + suite.Require().Equal(err, status.Errorf(codes.Internal, "failed to convert events from system tx result: %v", + fmt.Errorf("conversion from format JSON_CDC_V0 to CCF_V0 is not supported"))) + }) +} diff --git a/engine/access/rpc/backend/node_communicator.go b/engine/access/rpc/backend/node_communicator.go index 99dcbcdf1ba..34b75dddab0 100644 --- a/engine/access/rpc/backend/node_communicator.go +++ b/engine/access/rpc/backend/node_communicator.go @@ -71,7 +71,7 @@ func (b *NodeCommunicator) CallAvailableNode( } if err == gobreaker.ErrOpenState { - if !nodeSelector.HasNext() && len(errs.Errors) == 0 { + if !nodeSelector.HasNext() && errs == nil { errs = multierror.Append(errs, status.Error(codes.Unavailable, "there are no available nodes")) } continue diff --git a/engine/access/rpc/backend/retry.go b/engine/access/rpc/backend/retry.go index 5d967e657bb..bd6e6744ae9 100644 --- a/engine/access/rpc/backend/retry.go +++ b/engine/access/rpc/backend/retry.go @@ -3,9 +3,13 @@ package backend import ( "context" "errors" + "fmt" "sync" + "github.com/rs/zerolog" + "github.com/onflow/flow-go/model/flow" + "github.com/onflow/flow-go/state" "github.com/onflow/flow-go/storage" ) @@ -19,10 +23,12 @@ type Retry struct { transactionByReferencBlockHeight map[uint64]map[flow.Identifier]*flow.TransactionBody backend *Backend active bool + log zerolog.Logger // default logger } -func newRetry() *Retry { +func newRetry(log zerolog.Logger) *Retry { return &Retry{ + log: log, transactionByReferencBlockHeight: map[uint64]map[flow.Identifier]*flow.TransactionBody{}, } } @@ -41,10 +47,16 @@ func (r *Retry) SetBackend(b *Backend) *Retry { return r } -func (r *Retry) Retry(height uint64) { +// Retry attempts to resend transactions for a specified block height. +// It performs cleanup operations, including pruning old transactions, and retries sending +// transactions that are still pending. +// The method takes a block height as input. If the provided height is lower than +// flow.DefaultTransactionExpiry, no retries are performed, and the method returns nil. +// No errors expected during normal operations. +func (r *Retry) Retry(height uint64) error { // No need to retry if height is lower than DefaultTransactionExpiry if height < flow.DefaultTransactionExpiry { - return + return nil } // naive cleanup for now, prune every 120 Blocks @@ -55,20 +67,13 @@ func (r *Retry) Retry(height uint64) { heightToRetry := height - flow.DefaultTransactionExpiry + retryFrequency for heightToRetry < height { - r.retryTxsAtHeight(heightToRetry) - + err := r.retryTxsAtHeight(heightToRetry) + if err != nil { + return err + } heightToRetry = heightToRetry + retryFrequency } - -} - -func (b *Retry) Notify(signal interface{}) bool { - height, ok := signal.(uint64) - if !ok { - return false - } - b.Retry(height) - return true + return nil } // RegisterTransaction adds a transaction that could possibly be retried @@ -95,7 +100,13 @@ func (r *Retry) prune(height uint64) { } } -func (r *Retry) retryTxsAtHeight(heightToRetry uint64) { +// retryTxsAtHeight retries transactions at a specific block height. +// It looks up transactions at the specified height and retries sending +// raw transactions for those that are still pending. It also cleans up +// transactions that are no longer pending or have an unknown status. +// Error returns: +// - errors are unexpected and potentially symptoms of internal implementation bugs or state corruption (fatal). +func (r *Retry) retryTxsAtHeight(heightToRetry uint64) error { r.mu.Lock() defer r.mu.Unlock() txsAtHeight := r.transactionByReferencBlockHeight[heightToRetry] @@ -104,7 +115,7 @@ func (r *Retry) retryTxsAtHeight(heightToRetry uint64) { block, err := r.backend.lookupBlock(txID) if err != nil { if !errors.Is(err, storage.ErrNotFound) { - continue + return err } block = nil } @@ -112,13 +123,20 @@ func (r *Retry) retryTxsAtHeight(heightToRetry uint64) { // find the transaction status status, err := r.backend.deriveTransactionStatus(tx, false, block) if err != nil { + if !errors.Is(err, state.ErrUnknownSnapshotReference) { + return err + } continue } if status == flow.TransactionStatusPending { - _ = r.backend.SendRawTransaction(context.Background(), tx) + err = r.backend.SendRawTransaction(context.Background(), tx) + if err != nil { + r.log.Info().Str("retry", fmt.Sprintf("retryTxsAtHeight: %v", heightToRetry)).Err(err).Msg("failed to send raw transactions") + } } else if status != flow.TransactionStatusUnknown { // not pending or unknown, don't need to retry anymore delete(txsAtHeight, txID) } } + return nil } diff --git a/engine/access/rpc/backend/retry_test.go b/engine/access/rpc/backend/retry_test.go index 3b705049ff5..544c9a9669b 100644 --- a/engine/access/rpc/backend/retry_test.go +++ b/engine/access/rpc/backend/retry_test.go @@ -18,8 +18,6 @@ import ( // TestTransactionRetry tests that the retry mechanism will send retries at specific times func (suite *Suite) TestTransactionRetry() { - - // ctx := context.Background() collection := unittest.CollectionFixture(1) transactionBody := collection.Transactions[0] block := unittest.BlockFixture() @@ -44,7 +42,7 @@ func (suite *Suite) TestTransactionRetry() { backend, err := New(params) suite.Require().NoError(err) - retry := newRetry().SetBackend(backend).Activate() + retry := newRetry(suite.log).SetBackend(backend).Activate() backend.retry = retry retry.RegisterTransaction(block.Header.Height, transactionBody) @@ -52,17 +50,20 @@ func (suite *Suite) TestTransactionRetry() { suite.colClient.On("SendTransaction", mock.Anything, mock.Anything).Return(&access.SendTransactionResponse{}, nil) // Don't retry on every height - retry.Retry(block.Header.Height + 1) + err = retry.Retry(block.Header.Height + 1) + suite.Require().NoError(err) suite.colClient.AssertNotCalled(suite.T(), "SendTransaction", mock.Anything, mock.Anything) // Retry every `retryFrequency` - retry.Retry(block.Header.Height + retryFrequency) + err = retry.Retry(block.Header.Height + retryFrequency) + suite.Require().NoError(err) suite.colClient.AssertNumberOfCalls(suite.T(), "SendTransaction", 1) // do not retry if expired - retry.Retry(block.Header.Height + retryFrequency + flow.DefaultTransactionExpiry) + err = retry.Retry(block.Header.Height + retryFrequency + flow.DefaultTransactionExpiry) + suite.Require().NoError(err) // Should've still only been called once suite.colClient.AssertNumberOfCalls(suite.T(), "SendTransaction", 1) @@ -72,7 +73,6 @@ func (suite *Suite) TestTransactionRetry() { // TestSuccessfulTransactionsDontRetry tests that the retry mechanism will send retries at specific times func (suite *Suite) TestSuccessfulTransactionsDontRetry() { - ctx := context.Background() collection := unittest.CollectionFixture(1) transactionBody := collection.Transactions[0] @@ -118,7 +118,7 @@ func (suite *Suite) TestSuccessfulTransactionsDontRetry() { backend, err := New(params) suite.Require().NoError(err) - retry := newRetry().SetBackend(backend).Activate() + retry := newRetry(suite.log).SetBackend(backend).Activate() backend.retry = retry retry.RegisterTransaction(block.Header.Height, transactionBody) @@ -144,17 +144,20 @@ func (suite *Suite) TestSuccessfulTransactionsDontRetry() { suite.Assert().Equal(flow.TransactionStatusFinalized, result.Status) // Don't retry now now that block is finalized - retry.Retry(block.Header.Height + 1) + err = retry.Retry(block.Header.Height + 1) + suite.Require().NoError(err) suite.colClient.AssertNotCalled(suite.T(), "SendTransaction", mock.Anything, mock.Anything) // Don't retry now now that block is finalized - retry.Retry(block.Header.Height + retryFrequency) + err = retry.Retry(block.Header.Height + retryFrequency) + suite.Require().NoError(err) suite.colClient.AssertNotCalled(suite.T(), "SendTransaction", mock.Anything, mock.Anything) // Don't retry now now that block is finalized - retry.Retry(block.Header.Height + retryFrequency + flow.DefaultTransactionExpiry) + err = retry.Retry(block.Header.Height + retryFrequency + flow.DefaultTransactionExpiry) + suite.Require().NoError(err) // Should've still should not be called suite.colClient.AssertNotCalled(suite.T(), "SendTransaction", mock.Anything, mock.Anything) diff --git a/engine/access/rpc/connection/connection.go b/engine/access/rpc/connection/connection.go index 69f5d93375e..aeea158e292 100644 --- a/engine/access/rpc/connection/connection.go +++ b/engine/access/rpc/connection/connection.go @@ -6,17 +6,27 @@ import ( "net" "time" - "github.com/onflow/flow/protobuf/go/flow/access" - "github.com/onflow/flow/protobuf/go/flow/execution" "github.com/rs/zerolog" + "github.com/onflow/flow-go/crypto" "github.com/onflow/flow-go/module" + + "github.com/onflow/flow/protobuf/go/flow/access" + "github.com/onflow/flow/protobuf/go/flow/execution" ) // ConnectionFactory is an interface for creating access and execution API clients. type ConnectionFactory interface { - GetAccessAPIClient(address string) (access.AccessAPIClient, io.Closer, error) - GetAccessAPIClientWithPort(address string, port uint) (access.AccessAPIClient, io.Closer, error) + // GetAccessAPIClient gets an access API client for the specified address using the default CollectionGRPCPort, networkPubKey is optional, + // and it is used for secure gRPC connection. Can be nil for an unsecured connection. + // The returned io.Closer should close the connection after the call if no error occurred during client creation. + GetAccessAPIClient(address string, networkPubKey crypto.PublicKey) (access.AccessAPIClient, io.Closer, error) + // GetAccessAPIClientWithPort gets an access API client for the specified address with port, networkPubKey is optional, + // and it is used for secure gRPC connection. Can be nil for an unsecured connection. + // The returned io.Closer should close the connection after the call if no error occurred during client creation. + GetAccessAPIClientWithPort(address string, networkPubKey crypto.PublicKey) (access.AccessAPIClient, io.Closer, error) + // GetExecutionAPIClient gets an execution API client for the specified address using the default ExecutionGRPCPort. + // The returned io.Closer should close the connection after the call if no error occurred during client creation. GetExecutionAPIClient(address string) (execution.ExecutionAPIClient, io.Closer, error) } @@ -26,9 +36,15 @@ type ProxyConnectionFactory struct { targetAddress string } -func (p *ProxyConnectionFactory) GetAccessAPIClient(address string) (access.AccessAPIClient, io.Closer, error) { - return p.ConnectionFactory.GetAccessAPIClient(p.targetAddress) +// GetAccessAPIClient gets an access API client for a target address using the default CollectionGRPCPort. +// The networkPubKey is the public key used for a secure gRPC connection. It can be nil for an unsecured connection. +// The returned io.Closer should close the connection after the call if no error occurred during client creation. +func (p *ProxyConnectionFactory) GetAccessAPIClient(address string, networkPubKey crypto.PublicKey) (access.AccessAPIClient, io.Closer, error) { + return p.ConnectionFactory.GetAccessAPIClient(p.targetAddress, networkPubKey) } + +// GetExecutionAPIClient gets an execution API client for a target address using the default ExecutionGRPCPort. +// The returned io.Closer should close the connection after the call if no error occurred during client creation. func (p *ProxyConnectionFactory) GetExecutionAPIClient(address string) (execution.ExecutionAPIClient, io.Closer, error) { return p.ConnectionFactory.GetExecutionAPIClient(p.targetAddress) } @@ -46,18 +62,21 @@ type ConnectionFactoryImpl struct { } // GetAccessAPIClient gets an access API client for the specified address using the default CollectionGRPCPort. -func (cf *ConnectionFactoryImpl) GetAccessAPIClient(address string) (access.AccessAPIClient, io.Closer, error) { - return cf.GetAccessAPIClientWithPort(address, cf.CollectionGRPCPort) -} - -// GetAccessAPIClientWithPort gets an access API client for the specified address and port. -func (cf *ConnectionFactoryImpl) GetAccessAPIClientWithPort(address string, port uint) (access.AccessAPIClient, io.Closer, error) { - grpcAddress, err := getGRPCAddress(address, port) +// The networkPubKey is the public key used for secure gRPC connection. Can be nil for an unsecured connection. +// The returned io.Closer should close the connection after the call if no error occurred during client creation. +func (cf *ConnectionFactoryImpl) GetAccessAPIClient(address string, networkPubKey crypto.PublicKey) (access.AccessAPIClient, io.Closer, error) { + address, err := getGRPCAddress(address, cf.CollectionGRPCPort) if err != nil { return nil, nil, err } + return cf.GetAccessAPIClientWithPort(address, networkPubKey) +} - conn, closer, err := cf.Manager.GetConnection(grpcAddress, cf.CollectionNodeGRPCTimeout, AccessClient) +// GetAccessAPIClientWithPort gets an access API client for the specified address with port. +// The networkPubKey is the public key used for secure gRPC connection. Can be nil for an unsecured connection. +// The returned io.Closer should close the connection after the call if no error occurred during client creation. +func (cf *ConnectionFactoryImpl) GetAccessAPIClientWithPort(address string, networkPubKey crypto.PublicKey) (access.AccessAPIClient, io.Closer, error) { + conn, closer, err := cf.Manager.GetConnection(address, cf.CollectionNodeGRPCTimeout, AccessClient, networkPubKey) if err != nil { return nil, nil, err } @@ -66,13 +85,14 @@ func (cf *ConnectionFactoryImpl) GetAccessAPIClientWithPort(address string, port } // GetExecutionAPIClient gets an execution API client for the specified address using the default ExecutionGRPCPort. +// The returned io.Closer should close the connection after the call if no error occurred during client creation. func (cf *ConnectionFactoryImpl) GetExecutionAPIClient(address string) (execution.ExecutionAPIClient, io.Closer, error) { grpcAddress, err := getGRPCAddress(address, cf.ExecutionGRPCPort) if err != nil { return nil, nil, err } - conn, closer, err := cf.Manager.GetConnection(grpcAddress, cf.ExecutionNodeGRPCTimeout, ExecutionClient) + conn, closer, err := cf.Manager.GetConnection(grpcAddress, cf.ExecutionNodeGRPCTimeout, ExecutionClient, nil) if err != nil { return nil, nil, err } diff --git a/engine/access/rpc/connection/connection_test.go b/engine/access/rpc/connection/connection_test.go index a3e1ee3988c..4f024105a95 100644 --- a/engine/access/rpc/connection/connection_test.go +++ b/engine/access/rpc/connection/connection_test.go @@ -59,7 +59,7 @@ func TestProxyAccessAPI(t *testing.T) { } // get a collection API client - client, conn, err := proxyConnectionFactory.GetAccessAPIClient("foo") + client, conn, err := proxyConnectionFactory.GetAccessAPIClient("foo", nil) defer conn.Close() assert.NoError(t, err) @@ -155,7 +155,7 @@ func TestProxyAccessAPIConnectionReuse(t *testing.T) { } // get a collection API client - _, closer, err := proxyConnectionFactory.GetAccessAPIClient("foo") + _, closer, err := proxyConnectionFactory.GetAccessAPIClient("foo", nil) assert.Equal(t, connectionCache.Len(), 1) assert.NoError(t, err) assert.Nil(t, closer.Close()) @@ -308,7 +308,7 @@ func TestCollectionNodeClientTimeout(t *testing.T) { ) // create the collection API client - client, _, err := connectionFactory.GetAccessAPIClient(cn.listener.Addr().String()) + client, _, err := connectionFactory.GetAccessAPIClient(cn.listener.Addr().String(), nil) assert.NoError(t, err) ctx := context.Background() @@ -361,22 +361,22 @@ func TestConnectionPoolFull(t *testing.T) { // get a collection API client // Create and add first client to cache - _, _, err := connectionFactory.GetAccessAPIClient(cn1Address) + _, _, err := connectionFactory.GetAccessAPIClient(cn1Address, nil) assert.Equal(t, connectionCache.Len(), 1) assert.NoError(t, err) // Create and add second client to cache - _, _, err = connectionFactory.GetAccessAPIClient(cn2Address) + _, _, err = connectionFactory.GetAccessAPIClient(cn2Address, nil) assert.Equal(t, connectionCache.Len(), 2) assert.NoError(t, err) // Peek first client from cache. "recently used"-ness will not be updated, so it will be wiped out first. - _, _, err = connectionFactory.GetAccessAPIClient(cn1Address) + _, _, err = connectionFactory.GetAccessAPIClient(cn1Address, nil) assert.Equal(t, connectionCache.Len(), 2) assert.NoError(t, err) // Create and add third client to cache, firs client will be removed from cache - _, _, err = connectionFactory.GetAccessAPIClient(cn3Address) + _, _, err = connectionFactory.GetAccessAPIClient(cn3Address, nil) assert.Equal(t, connectionCache.Len(), 2) assert.NoError(t, err) @@ -436,7 +436,7 @@ func TestConnectionPoolStale(t *testing.T) { } // get a collection API client - client, _, err := proxyConnectionFactory.GetAccessAPIClient("foo") + client, _, err := proxyConnectionFactory.GetAccessAPIClient("foo", nil) assert.Equal(t, connectionCache.Len(), 1) assert.NoError(t, err) // close connection to simulate something "going wrong" with our stored connection @@ -451,7 +451,7 @@ func TestConnectionPoolStale(t *testing.T) { assert.Error(t, err) // re-access, should replace stale connection in cache with new one - _, _, _ = proxyConnectionFactory.GetAccessAPIClient("foo") + _, _, _ = proxyConnectionFactory.GetAccessAPIClient("foo", nil) assert.Equal(t, connectionCache.Len(), 1) var conn *grpc.ClientConn @@ -620,7 +620,7 @@ func TestEvictingCacheClients(t *testing.T) { clientAddress := cn.listener.Addr().String() // Create the execution API client - client, _, err := connectionFactory.GetAccessAPIClient(clientAddress) + client, _, err := connectionFactory.GetAccessAPIClient(clientAddress, nil) require.NoError(t, err) ctx := context.Background() @@ -802,7 +802,15 @@ func setupGRPCServer(t *testing.T) *grpc.ClientConn { return conn } -// TestCircuitBreakerExecutionNode tests the circuit breaker state changes for execution nodes. +var successCodes = []codes.Code{ + codes.Canceled, + codes.InvalidArgument, + codes.NotFound, + codes.Unimplemented, + codes.OutOfRange, +} + +// TestCircuitBreakerExecutionNode tests the circuit breaker for execution nodes. func TestCircuitBreakerExecutionNode(t *testing.T) { requestTimeout := 500 * time.Millisecond circuitBreakerRestoreTimeout := 1500 * time.Millisecond @@ -812,11 +820,6 @@ func TestCircuitBreakerExecutionNode(t *testing.T) { en.start(t) defer en.stop(t) - // Set up the handler mock to not respond within the requestTimeout. - req := &execution.PingRequest{} - resp := &execution.PingResponse{} - en.handler.On("Ping", testifymock.Anything, req).After(2*requestTimeout).Return(resp, nil) - // Create the connection factory. connectionFactory := new(ConnectionFactoryImpl) @@ -852,10 +855,11 @@ func TestCircuitBreakerExecutionNode(t *testing.T) { client, _, err := connectionFactory.GetExecutionAPIClient(en.listener.Addr().String()) require.NoError(t, err) - ctx := context.Background() + req := &execution.PingRequest{} + resp := &execution.PingResponse{} // Helper function to make the Ping call to the execution node and measure the duration. - callAndMeasurePingDuration := func() (time.Duration, error) { + callAndMeasurePingDuration := func(ctx context.Context) (time.Duration, error) { start := time.Now() // Make the call to the execution node. @@ -865,30 +869,51 @@ func TestCircuitBreakerExecutionNode(t *testing.T) { return time.Since(start), err } - // Call and measure the duration for the first invocation. - duration, err := callAndMeasurePingDuration() - assert.Equal(t, codes.DeadlineExceeded, status.Code(err)) - assert.LessOrEqual(t, requestTimeout, duration) + t.Run("test different states of the circuit breaker", func(t *testing.T) { + ctx := context.Background() + + // Set up the handler mock to not respond within the requestTimeout. + en.handler.On("Ping", testifymock.Anything, req).After(2*requestTimeout).Return(resp, nil) + + // Call and measure the duration for the first invocation. + duration, err := callAndMeasurePingDuration(ctx) + assert.Equal(t, codes.DeadlineExceeded, status.Code(err)) + assert.LessOrEqual(t, requestTimeout, duration) - // Call and measure the duration for the second invocation (circuit breaker state is now "Open"). - duration, err = callAndMeasurePingDuration() - assert.Equal(t, gobreaker.ErrOpenState, err) - assert.Greater(t, requestTimeout, duration) + // Call and measure the duration for the second invocation (circuit breaker state is now "Open"). + duration, err = callAndMeasurePingDuration(ctx) + assert.Equal(t, gobreaker.ErrOpenState, err) + assert.Greater(t, requestTimeout, duration) + + // Reset the mock Ping for the next invocation to return response without delay + en.handler.On("Ping", testifymock.Anything, req).Unset() + en.handler.On("Ping", testifymock.Anything, req).Return(resp, nil) + + // Wait until the circuit breaker transitions to the "HalfOpen" state. + time.Sleep(circuitBreakerRestoreTimeout + (500 * time.Millisecond)) + + // Call and measure the duration for the third invocation (circuit breaker state is now "HalfOpen"). + duration, err = callAndMeasurePingDuration(ctx) + assert.Greater(t, requestTimeout, duration) + assert.Equal(t, nil, err) + }) - // Reset the mock Ping for the next invocation to return response without delay - en.handler.On("Ping", testifymock.Anything, req).Unset() - en.handler.On("Ping", testifymock.Anything, req).Return(resp, nil) + for _, code := range successCodes { + t.Run(fmt.Sprintf("test error %s treated as a success for circuit breaker ", code.String()), func(t *testing.T) { + ctx := context.Background() - // Wait until the circuit breaker transitions to the "HalfOpen" state. - time.Sleep(circuitBreakerRestoreTimeout + (500 * time.Millisecond)) + en.handler.On("Ping", testifymock.Anything, req).Unset() + en.handler.On("Ping", testifymock.Anything, req).Return(nil, status.Error(code, code.String())) - // Call and measure the duration for the third invocation (circuit breaker state is now "HalfOpen"). - duration, err = callAndMeasurePingDuration() - assert.Greater(t, requestTimeout, duration) - assert.Equal(t, nil, err) + duration, err := callAndMeasurePingDuration(ctx) + require.Error(t, err) + require.Equal(t, code, status.Code(err)) + require.Greater(t, requestTimeout, duration) + }) + } } -// TestCircuitBreakerCollectionNode tests the circuit breaker state changes for collection nodes. +// TestCircuitBreakerCollectionNode tests the circuit breaker for collection nodes. func TestCircuitBreakerCollectionNode(t *testing.T) { requestTimeout := 500 * time.Millisecond circuitBreakerRestoreTimeout := 1500 * time.Millisecond @@ -898,11 +923,6 @@ func TestCircuitBreakerCollectionNode(t *testing.T) { cn.start(t) defer cn.stop(t) - // Set up the handler mock to not respond within the requestTimeout. - req := &access.PingRequest{} - resp := &access.PingResponse{} - cn.handler.On("Ping", testifymock.Anything, req).After(2*requestTimeout).Return(resp, nil) - // Create the connection factory. connectionFactory := new(ConnectionFactoryImpl) @@ -935,13 +955,14 @@ func TestCircuitBreakerCollectionNode(t *testing.T) { connectionFactory.AccessMetrics = metrics.NewNoopCollector() // Create the collection API client. - client, _, err := connectionFactory.GetAccessAPIClient(cn.listener.Addr().String()) + client, _, err := connectionFactory.GetAccessAPIClient(cn.listener.Addr().String(), nil) assert.NoError(t, err) - ctx := context.Background() + req := &access.PingRequest{} + resp := &access.PingResponse{} // Helper function to make the Ping call to the collection node and measure the duration. - callAndMeasurePingDuration := func() (time.Duration, error) { + callAndMeasurePingDuration := func(ctx context.Context) (time.Duration, error) { start := time.Now() // Make the call to the collection node. @@ -951,25 +972,46 @@ func TestCircuitBreakerCollectionNode(t *testing.T) { return time.Since(start), err } - // Call and measure the duration for the first invocation. - duration, err := callAndMeasurePingDuration() - assert.Equal(t, codes.DeadlineExceeded, status.Code(err)) - assert.LessOrEqual(t, requestTimeout, duration) + t.Run("test different states of the circuit breaker", func(t *testing.T) { + ctx := context.Background() + + // Set up the handler mock to not respond within the requestTimeout. + cn.handler.On("Ping", testifymock.Anything, req).After(2*requestTimeout).Return(resp, nil) + + // Call and measure the duration for the first invocation. + duration, err := callAndMeasurePingDuration(ctx) + assert.Equal(t, codes.DeadlineExceeded, status.Code(err)) + assert.LessOrEqual(t, requestTimeout, duration) + + // Call and measure the duration for the second invocation (circuit breaker state is now "Open"). + duration, err = callAndMeasurePingDuration(ctx) + assert.Equal(t, gobreaker.ErrOpenState, err) + assert.Greater(t, requestTimeout, duration) - // Call and measure the duration for the second invocation (circuit breaker state is now "Open"). - duration, err = callAndMeasurePingDuration() - assert.Equal(t, gobreaker.ErrOpenState, err) - assert.Greater(t, requestTimeout, duration) + // Reset the mock Ping for the next invocation to return response without delay + cn.handler.On("Ping", testifymock.Anything, req).Unset() + cn.handler.On("Ping", testifymock.Anything, req).Return(resp, nil) - // Reset the mock Ping for the next invocation to return response without delay - cn.handler.On("Ping", testifymock.Anything, req).Unset() - cn.handler.On("Ping", testifymock.Anything, req).Return(resp, nil) + // Wait until the circuit breaker transitions to the "HalfOpen" state. + time.Sleep(circuitBreakerRestoreTimeout + (500 * time.Millisecond)) - // Wait until the circuit breaker transitions to the "HalfOpen" state. - time.Sleep(circuitBreakerRestoreTimeout + (500 * time.Millisecond)) + // Call and measure the duration for the third invocation (circuit breaker state is now "HalfOpen"). + duration, err = callAndMeasurePingDuration(ctx) + assert.Greater(t, requestTimeout, duration) + assert.Equal(t, nil, err) + }) + + for _, code := range successCodes { + t.Run(fmt.Sprintf("test error %s treated as a success for circuit breaker ", code.String()), func(t *testing.T) { + ctx := context.Background() - // Call and measure the duration for the third invocation (circuit breaker state is now "HalfOpen"). - duration, err = callAndMeasurePingDuration() - assert.Greater(t, requestTimeout, duration) - assert.Equal(t, nil, err) + cn.handler.On("Ping", testifymock.Anything, req).Unset() + cn.handler.On("Ping", testifymock.Anything, req).Return(nil, status.Error(code, code.String())) + + duration, err := callAndMeasurePingDuration(ctx) + require.Error(t, err) + require.Equal(t, code, status.Code(err)) + require.Greater(t, requestTimeout, duration) + }) + } } diff --git a/engine/access/rpc/connection/manager.go b/engine/access/rpc/connection/manager.go index c50a9026748..3bb8ffc7286 100644 --- a/engine/access/rpc/connection/manager.go +++ b/engine/access/rpc/connection/manager.go @@ -6,17 +6,18 @@ import ( "io" "time" - "github.com/sony/gobreaker" - "github.com/rs/zerolog" + "github.com/sony/gobreaker" "google.golang.org/grpc" "google.golang.org/grpc/codes" "google.golang.org/grpc/connectivity" + "google.golang.org/grpc/credentials" "google.golang.org/grpc/credentials/insecure" _ "google.golang.org/grpc/encoding/gzip" //required for gRPC compression "google.golang.org/grpc/keepalive" "google.golang.org/grpc/status" + "github.com/onflow/flow-go/crypto" _ "github.com/onflow/flow-go/engine/common/grpc/compressor/deflate" //required for gRPC compression _ "github.com/onflow/flow-go/engine/common/grpc/compressor/snappy" //required for gRPC compression "github.com/onflow/flow-go/module" @@ -86,16 +87,22 @@ func NewManager( // GetConnection returns a gRPC client connection for the given grpcAddress and timeout. // If a cache is used, it retrieves a cached connection, otherwise creates a new connection. // It returns the client connection and an io.Closer to close the connection when done. -func (m *Manager) GetConnection(grpcAddress string, timeout time.Duration, clientType clientType) (*grpc.ClientConn, io.Closer, error) { +// The networkPubKey is the public key used for creating secure gRPC connection. Can be nil for an unsecured connection. +func (m *Manager) GetConnection( + grpcAddress string, + timeout time.Duration, + clientType clientType, + networkPubKey crypto.PublicKey, +) (*grpc.ClientConn, io.Closer, error) { if m.cache != nil { - conn, err := m.retrieveConnection(grpcAddress, timeout, clientType) + conn, err := m.retrieveConnection(grpcAddress, timeout, clientType, networkPubKey) if err != nil { return nil, nil, err } return conn, &noopCloser{}, nil } - conn, err := m.createConnection(grpcAddress, timeout, nil, clientType) + conn, err := m.createConnection(grpcAddress, timeout, nil, clientType, networkPubKey) if err != nil { return nil, nil, err } @@ -135,7 +142,13 @@ func (m *Manager) HasCache() bool { // retrieveConnection retrieves the CachedClient for the given grpcAddress from the cache or adds a new one if not present. // If the connection is already cached, it waits for the lock and returns the connection from the cache. // Otherwise, it creates a new connection and caches it. -func (m *Manager) retrieveConnection(grpcAddress string, timeout time.Duration, clientType clientType) (*grpc.ClientConn, error) { +// The networkPubKey is the public key used for retrieving secure gRPC connection. Can be nil for an unsecured connection. +func (m *Manager) retrieveConnection( + grpcAddress string, + timeout time.Duration, + clientType clientType, + networkPubKey crypto.PublicKey, +) (*grpc.ClientConn, error) { client, ok := m.cache.GetOrAdd(grpcAddress, timeout) if ok { // The client was retrieved from the cache, wait for the lock @@ -157,7 +170,7 @@ func (m *Manager) retrieveConnection(grpcAddress string, timeout time.Duration, } // The connection is not cached or is closed, create a new connection and cache it - conn, err := m.createConnection(grpcAddress, timeout, client, clientType) + conn, err := m.createConnection(grpcAddress, timeout, client, clientType, networkPubKey) if err != nil { return nil, err } @@ -174,7 +187,15 @@ func (m *Manager) retrieveConnection(grpcAddress string, timeout time.Duration, // createConnection creates a new gRPC connection to the remote node at the given address with the specified timeout. // If the cachedClient is not nil, it means a new entry in the cache is being created, so it's locked to give priority // to the caller working with the new client, allowing it to create the underlying connection. -func (m *Manager) createConnection(address string, timeout time.Duration, cachedClient *CachedClient, clientType clientType) (*grpc.ClientConn, error) { +// The networkPubKey is optional and configures a connection level security for gRPC connection. If it is not nil, +// it means that it used for creating secure gRPC connection. If it is nil, it means unsecure gRPC connection is being created. +func (m *Manager) createConnection( + address string, + timeout time.Duration, + cachedClient *CachedClient, + clientType clientType, + networkPubKey crypto.PublicKey, +) (*grpc.ClientConn, error) { if timeout == 0 { timeout = DefaultClientTimeout } @@ -212,7 +233,6 @@ func (m *Manager) createConnection(address string, timeout time.Duration, cached // https://grpc.io/blog/grpc-on-http2/#keeping-connections-alive var opts []grpc.DialOption opts = append(opts, grpc.WithDefaultCallOptions(grpc.MaxCallRecvMsgSize(int(m.maxMsgSize)))) - opts = append(opts, grpc.WithTransportCredentials(insecure.NewCredentials())) opts = append(opts, grpc.WithKeepaliveParams(keepaliveParams)) opts = append(opts, grpc.WithChainUnaryInterceptor(connInterceptors...)) @@ -220,6 +240,16 @@ func (m *Manager) createConnection(address string, timeout time.Duration, cached opts = append(opts, grpc.WithDefaultCallOptions(grpc.UseCompressor(m.compressorName))) } + if networkPubKey != nil { + tlsConfig, err := grpcutils.DefaultClientTLSConfig(networkPubKey) + if err != nil { + return nil, fmt.Errorf("failed to get default TLS client config using public flow networking key %s %w", networkPubKey.String(), err) + } + opts = append(opts, grpc.WithTransportCredentials(credentials.NewTLS(tlsConfig))) + } else { + opts = append(opts, grpc.WithTransportCredentials(insecure.NewCredentials())) + } + conn, err := grpc.Dial( address, opts..., @@ -368,6 +398,25 @@ func (m *Manager) createCircuitBreakerInterceptor() grpc.UnaryClientInterceptor // MaxRequests defines the max number of concurrent requests while the circuit breaker is in the HalfClosed // state. MaxRequests: m.circuitBreakerConfig.MaxRequests, + // IsSuccessful defines gRPC status codes that should be treated as a successful result for the circuit breaker. + IsSuccessful: func(err error) bool { + if se, ok := status.FromError(err); ok { + if se == nil { + return true + } + + // There are several error cases that may occur during normal operation and should be considered + // as "successful" from the perspective of the circuit breaker. + switch se.Code() { + case codes.OK, codes.Canceled, codes.InvalidArgument, codes.NotFound, codes.Unimplemented, codes.OutOfRange: + return true + default: + return false + } + } + + return false + }, }) circuitBreakerInterceptor := func( diff --git a/engine/access/rpc/connection/mock/connection_factory.go b/engine/access/rpc/connection/mock/connection_factory.go index 76205d59915..32da8057be2 100644 --- a/engine/access/rpc/connection/mock/connection_factory.go +++ b/engine/access/rpc/connection/mock/connection_factory.go @@ -5,6 +5,8 @@ package mock import ( access "github.com/onflow/flow/protobuf/go/flow/access" + crypto "github.com/onflow/flow-go/crypto" + execution "github.com/onflow/flow/protobuf/go/flow/execution" io "io" @@ -17,34 +19,34 @@ type ConnectionFactory struct { mock.Mock } -// GetAccessAPIClient provides a mock function with given fields: address -func (_m *ConnectionFactory) GetAccessAPIClient(address string) (access.AccessAPIClient, io.Closer, error) { - ret := _m.Called(address) +// GetAccessAPIClient provides a mock function with given fields: address, networkPubKey +func (_m *ConnectionFactory) GetAccessAPIClient(address string, networkPubKey crypto.PublicKey) (access.AccessAPIClient, io.Closer, error) { + ret := _m.Called(address, networkPubKey) var r0 access.AccessAPIClient var r1 io.Closer var r2 error - if rf, ok := ret.Get(0).(func(string) (access.AccessAPIClient, io.Closer, error)); ok { - return rf(address) + if rf, ok := ret.Get(0).(func(string, crypto.PublicKey) (access.AccessAPIClient, io.Closer, error)); ok { + return rf(address, networkPubKey) } - if rf, ok := ret.Get(0).(func(string) access.AccessAPIClient); ok { - r0 = rf(address) + if rf, ok := ret.Get(0).(func(string, crypto.PublicKey) access.AccessAPIClient); ok { + r0 = rf(address, networkPubKey) } else { if ret.Get(0) != nil { r0 = ret.Get(0).(access.AccessAPIClient) } } - if rf, ok := ret.Get(1).(func(string) io.Closer); ok { - r1 = rf(address) + if rf, ok := ret.Get(1).(func(string, crypto.PublicKey) io.Closer); ok { + r1 = rf(address, networkPubKey) } else { if ret.Get(1) != nil { r1 = ret.Get(1).(io.Closer) } } - if rf, ok := ret.Get(2).(func(string) error); ok { - r2 = rf(address) + if rf, ok := ret.Get(2).(func(string, crypto.PublicKey) error); ok { + r2 = rf(address, networkPubKey) } else { r2 = ret.Error(2) } @@ -52,34 +54,34 @@ func (_m *ConnectionFactory) GetAccessAPIClient(address string) (access.AccessAP return r0, r1, r2 } -// GetAccessAPIClientWithPort provides a mock function with given fields: address, port -func (_m *ConnectionFactory) GetAccessAPIClientWithPort(address string, port uint) (access.AccessAPIClient, io.Closer, error) { - ret := _m.Called(address, port) +// GetAccessAPIClientWithPort provides a mock function with given fields: address, networkPubKey +func (_m *ConnectionFactory) GetAccessAPIClientWithPort(address string, networkPubKey crypto.PublicKey) (access.AccessAPIClient, io.Closer, error) { + ret := _m.Called(address, networkPubKey) var r0 access.AccessAPIClient var r1 io.Closer var r2 error - if rf, ok := ret.Get(0).(func(string, uint) (access.AccessAPIClient, io.Closer, error)); ok { - return rf(address, port) + if rf, ok := ret.Get(0).(func(string, crypto.PublicKey) (access.AccessAPIClient, io.Closer, error)); ok { + return rf(address, networkPubKey) } - if rf, ok := ret.Get(0).(func(string, uint) access.AccessAPIClient); ok { - r0 = rf(address, port) + if rf, ok := ret.Get(0).(func(string, crypto.PublicKey) access.AccessAPIClient); ok { + r0 = rf(address, networkPubKey) } else { if ret.Get(0) != nil { r0 = ret.Get(0).(access.AccessAPIClient) } } - if rf, ok := ret.Get(1).(func(string, uint) io.Closer); ok { - r1 = rf(address, port) + if rf, ok := ret.Get(1).(func(string, crypto.PublicKey) io.Closer); ok { + r1 = rf(address, networkPubKey) } else { if ret.Get(1) != nil { r1 = ret.Get(1).(io.Closer) } } - if rf, ok := ret.Get(2).(func(string, uint) error); ok { - r2 = rf(address, port) + if rf, ok := ret.Get(2).(func(string, crypto.PublicKey) error); ok { + r2 = rf(address, networkPubKey) } else { r2 = ret.Error(2) } diff --git a/engine/access/rpc/engine.go b/engine/access/rpc/engine.go index 8bf8acd72c3..4137a1ad976 100644 --- a/engine/access/rpc/engine.go +++ b/engine/access/rpc/engine.go @@ -113,7 +113,7 @@ func NewBuilder(log zerolog.Logger, stateStreamBackend: stateStreamBackend, stateStreamConfig: stateStreamConfig, } - backendNotifierActor, backendNotifierWorker := events.NewFinalizationActor(eng.notifyBackendOnBlockFinalized) + backendNotifierActor, backendNotifierWorker := events.NewFinalizationActor(eng.processOnFinalizedBlock) eng.backendNotifierActor = backendNotifierActor eng.Component = component.NewComponentManagerBuilder(). @@ -171,12 +171,13 @@ func (e *Engine) OnFinalizedBlock(block *model.Block) { e.backendNotifierActor.OnFinalizedBlock(block) } -// notifyBackendOnBlockFinalized is invoked by the FinalizationActor when a new block is finalized. -// It notifies the backend of the newly finalized block. -func (e *Engine) notifyBackendOnBlockFinalized(_ *model.Block) error { +// processOnFinalizedBlock is invoked by the FinalizationActor when a new block is finalized. +// It informs the backend of the newly finalized block. +// The input to this callback is treated as trusted. +// No errors expected during normal operations. +func (e *Engine) processOnFinalizedBlock(_ *model.Block) error { finalizedHeader := e.finalizedHeaderCache.Get() - e.backend.NotifyFinalizedBlockHeight(finalizedHeader.Height) - return nil + return e.backend.ProcessFinalizedBlockHeight(finalizedHeader.Height) } // RestApiAddress returns the listen address of the REST API server. @@ -212,6 +213,7 @@ func (e *Engine) serveGRPCWebProxyWorker(ctx irrecoverable.SignalerContext, read // serveREST is a worker routine which starts the HTTP REST server. // The ready callback is called after the server address is bound and set. +// Note: The original REST BaseContext is discarded, and the irrecoverable.SignalerContext is used for error handling. func (e *Engine) serveREST(ctx irrecoverable.SignalerContext, ready component.ReadyFunc) { if e.config.RestConfig.ListenAddress == "" { e.log.Debug().Msg("no REST API address specified - not starting the server") @@ -230,6 +232,11 @@ func (e *Engine) serveREST(ctx irrecoverable.SignalerContext, ready component.Re } e.restServer = r + // Set up the irrecoverable.SignalerContext for error handling in the REST server. + e.restServer.BaseContext = func(_ net.Listener) context.Context { + return irrecoverable.WithSignalerContext(ctx, ctx) + } + l, err := net.Listen("tcp", e.config.RestConfig.ListenAddress) if err != nil { e.log.Err(err).Msg("failed to start the REST server") diff --git a/engine/access/state_stream/backend/backend.go b/engine/access/state_stream/backend/backend.go index 49e1ab966f3..185afc32666 100644 --- a/engine/access/state_stream/backend/backend.go +++ b/engine/access/state_stream/backend/backend.go @@ -12,8 +12,10 @@ import ( "github.com/onflow/flow-go/engine" "github.com/onflow/flow-go/engine/access/state_stream" "github.com/onflow/flow-go/engine/common/rpc" + "github.com/onflow/flow-go/fvm/errors" "github.com/onflow/flow-go/model/flow" "github.com/onflow/flow-go/module/counters" + "github.com/onflow/flow-go/module/execution" "github.com/onflow/flow-go/module/executiondatasync/execution_data" "github.com/onflow/flow-go/module/executiondatasync/execution_data/cache" "github.com/onflow/flow-go/state/protocol" @@ -36,6 +38,9 @@ type Config struct { // MaxGlobalStreams defines the global max number of streams that can be open at the same time. MaxGlobalStreams uint32 + // RegisterIDsRequestLimit defines the max number of register IDs that can be received in a single request. + RegisterIDsRequestLimit uint32 + // ExecutionDataCacheSize is the max number of objects for the execution data cache. ExecutionDataCacheSize uint32 @@ -62,16 +67,18 @@ type StateStreamBackend struct { ExecutionDataBackend EventsBackend - log zerolog.Logger - state protocol.State - headers storage.Headers - seals storage.Seals - results storage.ExecutionResults - execDataStore execution_data.ExecutionDataStore - execDataCache *cache.ExecutionDataCache - broadcaster *engine.Broadcaster - rootBlockHeight uint64 - rootBlockID flow.Identifier + log zerolog.Logger + state protocol.State + headers storage.Headers + seals storage.Seals + results storage.ExecutionResults + execDataStore execution_data.ExecutionDataStore + execDataCache *cache.ExecutionDataCache + broadcaster *engine.Broadcaster + rootBlockHeight uint64 + rootBlockID flow.Identifier + registers *execution.RegistersAsyncStore + registerRequestLimit int // highestHeight contains the highest consecutive block height for which we have received a // new Execution Data notification. @@ -90,6 +97,7 @@ func New( broadcaster *engine.Broadcaster, rootHeight uint64, highestAvailableHeight uint64, + registers *execution.RegistersAsyncStore, ) (*StateStreamBackend, error) { logger := log.With().Str("module", "state_stream_api").Logger() @@ -100,17 +108,19 @@ func New( } b := &StateStreamBackend{ - log: logger, - state: state, - headers: headers, - seals: seals, - results: results, - execDataStore: execDataStore, - execDataCache: execDataCache, - broadcaster: broadcaster, - rootBlockHeight: rootHeight, - rootBlockID: rootBlockID, - highestHeight: counters.NewMonotonousCounter(highestAvailableHeight), + log: logger, + state: state, + headers: headers, + seals: seals, + results: results, + execDataStore: execDataStore, + execDataCache: execDataCache, + broadcaster: broadcaster, + rootBlockHeight: rootHeight, + rootBlockID: rootBlockID, + registers: registers, + registerRequestLimit: int(config.RegisterIDsRequestLimit), + highestHeight: counters.NewMonotonousCounter(highestAvailableHeight), } b.ExecutionDataBackend = ExecutionDataBackend{ @@ -212,3 +222,18 @@ func (b *StateStreamBackend) getStartHeight(startBlockID flow.Identifier, startH func (b *StateStreamBackend) setHighestHeight(height uint64) bool { return b.highestHeight.Set(height) } + +// GetRegisterValues returns the register values for the given register IDs at the given block height. +func (b *StateStreamBackend) GetRegisterValues(ids flow.RegisterIDs, height uint64) ([]flow.RegisterValue, error) { + if len(ids) > b.registerRequestLimit { + return nil, status.Errorf(codes.InvalidArgument, "number of register IDs exceeds limit of %d", b.registerRequestLimit) + } + values, err := b.registers.RegisterValues(ids, height) + if errors.Is(err, storage.ErrHeightNotIndexed) { + return nil, status.Errorf(codes.OutOfRange, "register values for block %d is not available", height) + } + if errors.Is(err, storage.ErrNotFound) { + return nil, status.Errorf(codes.NotFound, "register values for block %d is not available", height) + } + return values, err +} diff --git a/engine/access/state_stream/backend/backend_executiondata_test.go b/engine/access/state_stream/backend/backend_executiondata_test.go index e78b4923906..51a9da291f3 100644 --- a/engine/access/state_stream/backend/backend_executiondata_test.go +++ b/engine/access/state_stream/backend/backend_executiondata_test.go @@ -19,6 +19,7 @@ import ( "github.com/onflow/flow-go/engine/access/state_stream" "github.com/onflow/flow-go/model/flow" "github.com/onflow/flow-go/module/blobs" + "github.com/onflow/flow-go/module/execution" "github.com/onflow/flow-go/module/executiondatasync/execution_data" "github.com/onflow/flow-go/module/executiondatasync/execution_data/cache" "github.com/onflow/flow-go/module/mempool/herocache" @@ -38,12 +39,14 @@ var testEventTypes = []flow.EventType{ type BackendExecutionDataSuite struct { suite.Suite - state *protocolmock.State - params *protocolmock.Params - snapshot *protocolmock.Snapshot - headers *storagemock.Headers - seals *storagemock.Seals - results *storagemock.ExecutionResults + state *protocolmock.State + params *protocolmock.Params + snapshot *protocolmock.Snapshot + headers *storagemock.Headers + seals *storagemock.Seals + results *storagemock.ExecutionResults + registers *storagemock.RegisterIndex + registersAsync *execution.RegistersAsyncStore bs blobs.Blobstore eds execution_data.ExecutionDataStore @@ -83,8 +86,9 @@ func (s *BackendExecutionDataSuite) SetupTest() { s.execDataCache = cache.NewExecutionDataCache(s.eds, s.headers, s.seals, s.results, s.execDataHeroCache) conf := Config{ - ClientSendTimeout: state_stream.DefaultSendTimeout, - ClientSendBufferSize: state_stream.DefaultSendBufferSize, + ClientSendTimeout: state_stream.DefaultSendTimeout, + ClientSendBufferSize: state_stream.DefaultSendBufferSize, + RegisterIDsRequestLimit: state_stream.DefaultRegisterIDsRequestLimit, } var err error @@ -145,6 +149,20 @@ func (s *BackendExecutionDataSuite) SetupTest() { s.T().Logf("adding exec data for block %d %d %v => %v", i, block.Header.Height, block.ID(), result.ExecutionDataID) } + s.registersAsync = execution.NewRegistersAsyncStore() + s.registers = storagemock.NewRegisterIndex(s.T()) + err = s.registersAsync.InitDataAvailable(s.registers) + require.NoError(s.T(), err) + s.registers.On("LatestHeight").Return(rootBlock.Header.Height).Maybe() + s.registers.On("FirstHeight").Return(rootBlock.Header.Height).Maybe() + s.registers.On("Get", mock.AnythingOfType("RegisterID"), mock.AnythingOfType("uint64")).Return( + func(id flow.RegisterID, height uint64) (flow.RegisterValue, error) { + if id == unittest.RegisterIDFixture() { + return flow.RegisterValue{}, nil + } + return nil, storage.ErrNotFound + }).Maybe() + s.state.On("Sealed").Return(s.snapshot, nil).Maybe() s.snapshot.On("Head").Return(s.blocks[0].Header, nil).Maybe() @@ -239,6 +257,7 @@ func (s *BackendExecutionDataSuite) SetupTest() { s.broadcaster, rootBlock.Header.Height, rootBlock.Header.Height, // initialize with no downloaded data + s.registersAsync, ) require.NoError(s.T(), err) } @@ -418,3 +437,27 @@ func (s *BackendExecutionDataSuite) TestSubscribeExecutionDataHandlesErrors() { assert.Equal(s.T(), codes.NotFound, status.Code(sub.Err())) }) } + +func (s *BackendExecutionDataSuite) TestGetRegisterValuesErrors() { + s.Run("normal case", func() { + res, err := s.backend.GetRegisterValues(flow.RegisterIDs{unittest.RegisterIDFixture()}, s.backend.rootBlockHeight) + require.NoError(s.T(), err) + require.NotEmpty(s.T(), res) + }) + + s.Run("returns error if block height is out of range", func() { + _, err := s.backend.GetRegisterValues(flow.RegisterIDs{unittest.RegisterIDFixture()}, s.backend.rootBlockHeight+1) + require.Equal(s.T(), codes.OutOfRange, status.Code(err)) + }) + + s.Run("returns error if register path is not indexed", func() { + falseID := flow.RegisterIDs{flow.RegisterID{Owner: "ha", Key: "ha"}} + _, err := s.backend.GetRegisterValues(falseID, s.backend.rootBlockHeight) + require.Equal(s.T(), codes.NotFound, status.Code(err)) + }) + + s.Run("returns error if too many registers are requested", func() { + _, err := s.backend.GetRegisterValues(make(flow.RegisterIDs, s.backend.registerRequestLimit+1), s.backend.rootBlockHeight) + require.Equal(s.T(), codes.InvalidArgument, status.Code(err)) + }) +} diff --git a/engine/access/state_stream/backend/handler.go b/engine/access/state_stream/backend/handler.go index 5216059b053..9537ce94d4c 100644 --- a/engine/access/state_stream/backend/handler.go +++ b/engine/access/state_stream/backend/handler.go @@ -4,11 +4,12 @@ import ( "context" "sync/atomic" - "github.com/onflow/flow/protobuf/go/flow/entities" - "github.com/onflow/flow/protobuf/go/flow/executiondata" "google.golang.org/grpc/codes" "google.golang.org/grpc/status" + "github.com/onflow/flow/protobuf/go/flow/entities" + "github.com/onflow/flow/protobuf/go/flow/executiondata" + "github.com/onflow/flow-go/engine/access/state_stream" "github.com/onflow/flow-go/engine/common/rpc" "github.com/onflow/flow-go/engine/common/rpc/convert" @@ -199,6 +200,16 @@ func (h *Handler) SubscribeEvents(request *executiondata.SubscribeEventsRequest, } } -func (h *Handler) GetRegisterValues(ctx context.Context, request *executiondata.GetRegisterValuesRequest) (*executiondata.GetRegisterValuesResponse, error) { - return nil, status.Error(codes.Unimplemented, "not implemented") +func (h *Handler) GetRegisterValues(_ context.Context, request *executiondata.GetRegisterValuesRequest) (*executiondata.GetRegisterValuesResponse, error) { + // Convert data + registerIDs, err := convert.MessagesToRegisterIDs(request.GetRegisterIds()) + if err != nil { + return nil, status.Errorf(codes.InvalidArgument, "could not convert register IDs: %v", err) + } + // get payload from store + values, err := h.api.GetRegisterValues(registerIDs, request.GetBlockHeight()) + if err != nil { + return nil, rpc.ConvertError(err, "could not get register values", codes.Internal) + } + return &executiondata.GetRegisterValuesResponse{Values: values}, nil } diff --git a/engine/access/state_stream/backend/handler_test.go b/engine/access/state_stream/backend/handler_test.go index 6c53c1b25f8..a28f58d147d 100644 --- a/engine/access/state_stream/backend/handler_test.go +++ b/engine/access/state_stream/backend/handler_test.go @@ -14,6 +14,8 @@ import ( "github.com/stretchr/testify/suite" pb "google.golang.org/genproto/googleapis/bytestream" "google.golang.org/grpc" + "google.golang.org/grpc/codes" + "google.golang.org/grpc/status" "github.com/onflow/cadence/encoding/ccf" jsoncdc "github.com/onflow/cadence/encoding/json" @@ -24,6 +26,7 @@ import ( ssmock "github.com/onflow/flow-go/engine/access/state_stream/mock" "github.com/onflow/flow-go/engine/common/rpc/convert" "github.com/onflow/flow-go/model/flow" + "github.com/onflow/flow-go/storage" "github.com/onflow/flow-go/utils/unittest" "github.com/onflow/flow-go/utils/unittest/generator" ) @@ -508,6 +511,96 @@ func TestEventStream(t *testing.T) { } } +// TestGetRegisterValues tests the register values. +func TestGetRegisterValues(t *testing.T) { + t.Parallel() + testHeight := uint64(1) + + // test register IDs + values + testIds := flow.RegisterIDs{ + flow.UUIDRegisterID(0), + flow.AccountStatusRegisterID(unittest.AddressFixture()), + unittest.RegisterIDFixture(), + } + testValues := []flow.RegisterValue{ + []byte("uno"), + []byte("dos"), + []byte("tres"), + } + invalidIDs := append(testIds, flow.RegisterID{}) // valid + invalid IDs + + t.Run("invalid message", func(t *testing.T) { + api := ssmock.NewAPI(t) + h := NewHandler(api, flow.Localnet.Chain(), makeConfig(1)) + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + invalidMessage := &executiondata.GetRegisterValuesRequest{ + RegisterIds: nil, + } + _, err := h.GetRegisterValues(ctx, invalidMessage) + require.Equal(t, status.Code(err), codes.InvalidArgument) + }) + + t.Run("valid registers", func(t *testing.T) { + api := ssmock.NewAPI(t) + api.On("GetRegisterValues", testIds, testHeight).Return(testValues, nil) + h := NewHandler(api, flow.Localnet.Chain(), makeConfig(1)) + validRegisters := make([]*entities.RegisterID, len(testIds)) + for i, id := range testIds { + validRegisters[i] = convert.RegisterIDToMessage(id) + } + req := &executiondata.GetRegisterValuesRequest{ + RegisterIds: validRegisters, + BlockHeight: testHeight, + } + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + resp, err := h.GetRegisterValues(ctx, req) + require.NoError(t, err) + require.Equal(t, resp.GetValues(), testValues) + + }) + + t.Run("unavailable registers", func(t *testing.T) { + api := ssmock.NewAPI(t) + expectedErr := status.Errorf(codes.NotFound, "could not get register values: %v", storage.ErrNotFound) + api.On("GetRegisterValues", invalidIDs, testHeight).Return(nil, expectedErr) + h := NewHandler(api, flow.Localnet.Chain(), makeConfig(1)) + unavailableRegisters := make([]*entities.RegisterID, len(invalidIDs)) + for i, id := range invalidIDs { + unavailableRegisters[i] = convert.RegisterIDToMessage(id) + } + req := &executiondata.GetRegisterValuesRequest{ + RegisterIds: unavailableRegisters, + BlockHeight: testHeight, + } + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + _, err := h.GetRegisterValues(ctx, req) + require.Equal(t, status.Code(err), codes.NotFound) + + }) + + t.Run("wrong height", func(t *testing.T) { + api := ssmock.NewAPI(t) + expectedErr := status.Errorf(codes.OutOfRange, "could not get register values: %v", storage.ErrHeightNotIndexed) + api.On("GetRegisterValues", testIds, testHeight+1).Return(nil, expectedErr) + h := NewHandler(api, flow.Localnet.Chain(), makeConfig(1)) + validRegisters := make([]*entities.RegisterID, len(testIds)) + for i, id := range testIds { + validRegisters[i] = convert.RegisterIDToMessage(id) + } + req := &executiondata.GetRegisterValuesRequest{ + RegisterIds: validRegisters, + BlockHeight: testHeight + 1, + } + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + _, err := h.GetRegisterValues(ctx, req) + require.Equal(t, status.Code(err), codes.OutOfRange) + }) +} + func makeConfig(maxGlobalStreams uint32) Config { return Config{ EventFilterConfig: state_stream.DefaultEventFilterConfig, diff --git a/engine/access/state_stream/mock/api.go b/engine/access/state_stream/mock/api.go index 5b57efc917f..99203a9f487 100644 --- a/engine/access/state_stream/mock/api.go +++ b/engine/access/state_stream/mock/api.go @@ -44,6 +44,32 @@ func (_m *API) GetExecutionDataByBlockID(ctx context.Context, blockID flow.Ident return r0, r1 } +// GetRegisterValues provides a mock function with given fields: registerIDs, height +func (_m *API) GetRegisterValues(registerIDs flow.RegisterIDs, height uint64) ([][]byte, error) { + ret := _m.Called(registerIDs, height) + + var r0 [][]byte + var r1 error + if rf, ok := ret.Get(0).(func(flow.RegisterIDs, uint64) ([][]byte, error)); ok { + return rf(registerIDs, height) + } + if rf, ok := ret.Get(0).(func(flow.RegisterIDs, uint64) [][]byte); ok { + r0 = rf(registerIDs, height) + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).([][]byte) + } + } + + if rf, ok := ret.Get(1).(func(flow.RegisterIDs, uint64) error); ok { + r1 = rf(registerIDs, height) + } else { + r1 = ret.Error(1) + } + + return r0, r1 +} + // SubscribeEvents provides a mock function with given fields: ctx, startBlockID, startHeight, filter func (_m *API) SubscribeEvents(ctx context.Context, startBlockID flow.Identifier, startHeight uint64, filter state_stream.EventFilter) state_stream.Subscription { ret := _m.Called(ctx, startBlockID, startHeight, filter) diff --git a/engine/access/state_stream/state_stream.go b/engine/access/state_stream/state_stream.go index 93004fd9f87..2d2cca1bbbf 100644 --- a/engine/access/state_stream/state_stream.go +++ b/engine/access/state_stream/state_stream.go @@ -30,6 +30,9 @@ const ( // DefaultHeartbeatInterval specifies the block interval at which heartbeat messages should be sent. DefaultHeartbeatInterval = 1 + + // DefaultRegisterIDsRequestLimit defines the default limit of register IDs for a single request to the get register endpoint + DefaultRegisterIDsRequestLimit = 100 ) // API represents an interface that defines methods for interacting with a blockchain's execution data and events. @@ -40,6 +43,8 @@ type API interface { SubscribeExecutionData(ctx context.Context, startBlockID flow.Identifier, startBlockHeight uint64) Subscription // SubscribeEvents subscribes to events starting from a specific block ID and block height, with an optional event filter. SubscribeEvents(ctx context.Context, startBlockID flow.Identifier, startHeight uint64, filter EventFilter) Subscription + // GetRegisterValues returns register values for a set of register IDs at the provided block height. + GetRegisterValues(registerIDs flow.RegisterIDs, height uint64) ([]flow.RegisterValue, error) } // Subscription represents a streaming request, and handles the communication between the grpc handler diff --git a/engine/collection/test/cluster_switchover_test.go b/engine/collection/test/cluster_switchover_test.go index 618fc327bc9..0c89e9afbaf 100644 --- a/engine/collection/test/cluster_switchover_test.go +++ b/engine/collection/test/cluster_switchover_test.go @@ -235,7 +235,7 @@ func (tc *ClusterSwitchoverTestCase) StartNodes() { nodes = append(nodes, node) } - unittest.RequireCloseBefore(tc.T(), util.AllReady(nodes...), time.Second, "could not start nodes") + unittest.RequireCloseBefore(tc.T(), util.AllReady(nodes...), 3*time.Second, "could not start nodes") // start continuous delivery for all nodes for _, node := range tc.nodes { diff --git a/engine/common/grpc/forwarder/forwarder.go b/engine/common/grpc/forwarder/forwarder.go index 8241f1ead14..a0af264b55a 100644 --- a/engine/common/grpc/forwarder/forwarder.go +++ b/engine/common/grpc/forwarder/forwarder.go @@ -2,37 +2,36 @@ package forwarder import ( "fmt" + "io" "sync" - "time" - "google.golang.org/grpc" "google.golang.org/grpc/codes" - "google.golang.org/grpc/connectivity" - "google.golang.org/grpc/credentials" - "google.golang.org/grpc/credentials/insecure" "google.golang.org/grpc/status" - rpcConnection "github.com/onflow/flow-go/engine/access/rpc/connection" + "github.com/onflow/flow-go/engine/access/rpc/connection" "github.com/onflow/flow-go/model/flow" - "github.com/onflow/flow-go/utils/grpcutils" "github.com/onflow/flow/protobuf/go/flow/access" ) +// Upstream is a container for an individual upstream containing the id, client and closer for it +type Upstream struct { + id *flow.IdentitySkeleton // the public identity of one network participant (node) + client access.AccessAPIClient // client with gRPC connection + closer io.Closer // closer for client connection, should use to close the connection when done +} + // Forwarder forwards all requests to a set of upstream access nodes or observers type Forwarder struct { lock sync.Mutex roundRobin int - ids flow.IdentitySkeletonList - upstream []access.AccessAPIClient - connections []*grpc.ClientConn - timeout time.Duration - maxMsgSize uint + upstream []Upstream + connFactory connection.ConnectionFactory } -func NewForwarder(identities flow.IdentitySkeletonList, timeout time.Duration, maxMsgSize uint) (*Forwarder, error) { - forwarder := &Forwarder{maxMsgSize: maxMsgSize} - err := forwarder.setFlowAccessAPI(identities, timeout) +func NewForwarder(identities flow.IdentitySkeletonList, connectionFactory connection.ConnectionFactory) (*Forwarder, error) { + forwarder := &Forwarder{connFactory: connectionFactory} + err := forwarder.setFlowAccessAPI(identities) return forwarder, err } @@ -40,15 +39,12 @@ func NewForwarder(identities flow.IdentitySkeletonList, timeout time.Duration, m // It is used by Observer services, Blockchain Data Service, etc. // Make sure that this is just for observation and not a staked participant in the flow network. // This means that observers see a copy of the data but there is no interaction to ensure integrity from the root block. -func (f *Forwarder) setFlowAccessAPI(accessNodeAddressAndPort flow.IdentitySkeletonList, timeout time.Duration) error { - f.timeout = timeout - f.ids = accessNodeAddressAndPort - f.upstream = make([]access.AccessAPIClient, len(accessNodeAddressAndPort)) - f.connections = make([]*grpc.ClientConn, len(accessNodeAddressAndPort)) +func (f *Forwarder) setFlowAccessAPI(accessNodeAddressAndPort flow.IdentitySkeletonList) error { + f.upstream = make([]Upstream, accessNodeAddressAndPort.Count()) for i, identity := range accessNodeAddressAndPort { // Store the faultTolerantClient setup parameters such as address, public, key and timeout, so that // we can refresh the API on connection loss - f.ids[i] = identity + f.upstream[i].id = identity // We fail on any single error on startup, so that // we identify bootstrapping errors early @@ -62,57 +58,25 @@ func (f *Forwarder) setFlowAccessAPI(accessNodeAddressAndPort flow.IdentitySkele return nil } -// reconnectingClient returns an active client, or -// creates one, if the last one is not ready anymore. +// reconnectingClient returns an active client, or creates a new connection. func (f *Forwarder) reconnectingClient(i int) error { - timeout := f.timeout - - if f.connections[i] == nil || f.connections[i].GetState() != connectivity.Ready { - identity := f.ids[i] - var connection *grpc.ClientConn - var err error - if identity.NetworkPubKey == nil { - connection, err = grpc.Dial( - identity.Address, - grpc.WithDefaultCallOptions(grpc.MaxCallRecvMsgSize(int(f.maxMsgSize))), - grpc.WithTransportCredentials(insecure.NewCredentials()), - rpcConnection.WithClientTimeoutOption(timeout)) - if err != nil { - return err - } - } else { - tlsConfig, err := grpcutils.DefaultClientTLSConfig(identity.NetworkPubKey) - if err != nil { - return fmt.Errorf("failed to get default TLS client config using public flow networking key %s %w", identity.NetworkPubKey.String(), err) - } - - connection, err = grpc.Dial( - identity.Address, - grpc.WithDefaultCallOptions(grpc.MaxCallRecvMsgSize(int(f.maxMsgSize))), - grpc.WithTransportCredentials(credentials.NewTLS(tlsConfig)), - rpcConnection.WithClientTimeoutOption(timeout)) - if err != nil { - return fmt.Errorf("cannot connect to %s %w", identity.Address, err) - } - } - connection.Connect() - time.Sleep(1 * time.Second) - state := connection.GetState() - if state != connectivity.Ready && state != connectivity.Connecting { - return fmt.Errorf("%v", state) - } - f.connections[i] = connection - f.upstream[i] = access.NewAccessAPIClient(connection) - } + identity := f.upstream[i].id + accessApiClient, closer, err := f.connFactory.GetAccessAPIClientWithPort(identity.Address, identity.NetworkPubKey) + if err != nil { + return fmt.Errorf("failed to connect to access node at %s: %w", accessApiClient, err) + } + // closer is not nil iff err is nil, should use to close the connection when done + f.upstream[i].closer = closer + f.upstream[i].client = accessApiClient return nil } // FaultTolerantClient implements an upstream connection that reconnects on errors // a reasonable amount of time. -func (f *Forwarder) FaultTolerantClient() (access.AccessAPIClient, error) { +func (f *Forwarder) FaultTolerantClient() (access.AccessAPIClient, io.Closer, error) { if f.upstream == nil || len(f.upstream) == 0 { - return nil, status.Errorf(codes.Unimplemented, "method not implemented") + return nil, nil, status.Errorf(codes.Unimplemented, "method not implemented") } // Reasoning: A retry count of three gives an acceptable 5% failure ratio from a 37% failure ratio. @@ -134,12 +98,8 @@ func (f *Forwarder) FaultTolerantClient() (access.AccessAPIClient, error) { if err != nil { continue } - state := f.connections[f.roundRobin].GetState() - if state != connectivity.Ready && state != connectivity.Connecting { - continue - } - return f.upstream[f.roundRobin], nil + return f.upstream[f.roundRobin].client, f.upstream[f.roundRobin].closer, nil } - return nil, status.Errorf(codes.Unavailable, err.Error()) + return nil, nil, status.Errorf(codes.Unavailable, err.Error()) } diff --git a/engine/common/rpc/convert/execution_data.go b/engine/common/rpc/convert/execution_data.go index 560c4887ade..0c1699c0d90 100644 --- a/engine/common/rpc/convert/execution_data.go +++ b/engine/common/rpc/convert/execution_data.go @@ -316,6 +316,39 @@ func messageToTrustedTransaction( return *t, nil } +func MessageToRegisterID(m *entities.RegisterID) (flow.RegisterID, error) { + if m == nil { + return flow.RegisterID{}, ErrEmptyMessage + } + return flow.RegisterID{ + Owner: m.GetOwner(), + Key: m.GetKey(), + }, nil +} + +// MessagesToRegisterIDs converts a protobuf message to RegisterIDs +func MessagesToRegisterIDs(m []*entities.RegisterID) (flow.RegisterIDs, error) { + if m == nil { + return nil, ErrEmptyMessage + } + result := make(flow.RegisterIDs, len(m)) + for i, entry := range m { + regId, err := MessageToRegisterID(entry) + if err != nil { + return nil, fmt.Errorf("failed to convert register id %d: %w", i, err) + } + result[i] = regId + } + return result, nil +} + +func RegisterIDToMessage(id flow.RegisterID) *entities.RegisterID { + return &entities.RegisterID{ + Owner: id.Owner, + Key: id.Key, + } +} + // insecureAddress converts a raw address to a flow.Address, skipping validation // This is useful when converting transactions from trusted state like BlockExecutionData. // This should only be used for trusted inputs diff --git a/engine/common/rpc/convert/execution_data_test.go b/engine/common/rpc/convert/execution_data_test.go index 99744f9d8cc..4ad06b820d5 100644 --- a/engine/common/rpc/convert/execution_data_test.go +++ b/engine/common/rpc/convert/execution_data_test.go @@ -171,3 +171,34 @@ func TestConvertChunkExecutionData(t *testing.T) { }) } } + +func TestMessageToRegisterIDs(t *testing.T) { + tests := []struct { + name string + regID flow.RegisterID + }{ + { + name: "service level register id", + regID: flow.UUIDRegisterID(0), + }, + { + name: "account level register id", + regID: flow.AccountStatusRegisterID(unittest.AddressFixture()), + }, + { + name: "regular register id", + regID: unittest.RegisterIDFixture(), + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + registerIDMessage := convert.RegisterIDToMessage(test.regID) + reConvertedRegisterID, err := convert.MessageToRegisterID(registerIDMessage) + require.NoError(t, err) + assert.Equal(t, test.regID, reConvertedRegisterID) + }) + } + _, err := convert.MessageToRegisterID(nil) + require.ErrorIs(t, err, convert.ErrEmptyMessage) +} diff --git a/engine/consensus/dkg/reactor_engine.go b/engine/consensus/dkg/reactor_engine.go index b1055b9ff89..7b0a355789e 100644 --- a/engine/consensus/dkg/reactor_engine.go +++ b/engine/consensus/dkg/reactor_engine.go @@ -348,7 +348,7 @@ func (e *ReactorEngine) getDKGInfo(firstBlockID flow.Identifier) (*dkgInfo, erro if err != nil { return nil, fmt.Errorf("could not retrieve epoch dkg final views: %w", err) } - seed := make([]byte, crypto.SeedMinLenDKG) + seed := make([]byte, crypto.KeyGenSeedMinLen) _, err = rand.Read(seed) if err != nil { return nil, fmt.Errorf("could not generate random seed: %w", err) diff --git a/engine/execution/block_result.go b/engine/execution/block_result.go index cdb6e3d54f2..e10a6aeea42 100644 --- a/engine/execution/block_result.go +++ b/engine/execution/block_result.go @@ -94,6 +94,25 @@ func (er *BlockExecutionResult) AllConvertedServiceEvents() flow.ServiceEventLis return res } +// AllUpdatedRegisters returns all updated unique register entries +// Note: order is not determinstic +func (er *BlockExecutionResult) AllUpdatedRegisters() []flow.RegisterEntry { + updates := make(map[flow.RegisterID]flow.RegisterValue) + for _, ce := range er.collectionExecutionResults { + for regID, regVal := range ce.executionSnapshot.WriteSet { + updates[regID] = regVal + } + } + res := make([]flow.RegisterEntry, 0, len(updates)) + for regID, regVal := range updates { + res = append(res, flow.RegisterEntry{ + Key: regID, + Value: regVal, + }) + } + return res +} + // BlockAttestationResult holds collection attestation results type BlockAttestationResult struct { *BlockExecutionResult diff --git a/engine/execution/checker/engine.go b/engine/execution/checker/engine.go index dcf330bd2c7..a1a96184105 100644 --- a/engine/execution/checker/engine.go +++ b/engine/execution/checker/engine.go @@ -82,7 +82,7 @@ func (e *Engine) checkLastSealed(finalizedID flow.Identifier) error { blockID := seal.BlockID sealedCommit := seal.FinalState - mycommit, err := e.execState.StateCommitmentByBlockID(e.unit.Ctx(), blockID) + mycommit, err := e.execState.StateCommitmentByBlockID(blockID) if errors.Is(err, storage.ErrNotFound) { // have not executed the sealed block yet // in other words, this can't detect execution fork, if the execution is behind diff --git a/engine/execution/computation/committer/committer.go b/engine/execution/computation/committer/committer.go index df2ebb035c5..86d72db1ead 100644 --- a/engine/execution/computation/committer/committer.go +++ b/engine/execution/computation/committer/committer.go @@ -6,6 +6,7 @@ import ( "github.com/hashicorp/go-multierror" + "github.com/onflow/flow-go/engine/execution" execState "github.com/onflow/flow-go/engine/execution/state" "github.com/onflow/flow-go/fvm/storage/snapshot" "github.com/onflow/flow-go/ledger" @@ -31,25 +32,26 @@ func NewLedgerViewCommitter( func (committer *LedgerViewCommitter) CommitView( snapshot *snapshot.ExecutionSnapshot, - baseState flow.StateCommitment, + baseStorageSnapshot execution.ExtendableStorageSnapshot, ) ( newCommit flow.StateCommitment, proof []byte, trieUpdate *ledger.TrieUpdate, + newStorageSnapshot execution.ExtendableStorageSnapshot, err error, ) { var err1, err2 error var wg sync.WaitGroup wg.Add(1) go func() { - proof, err2 = committer.collectProofs(snapshot, baseState) + proof, err2 = committer.collectProofs(snapshot, baseStorageSnapshot) wg.Done() }() - newCommit, trieUpdate, err1 = execState.CommitDelta( + newCommit, trieUpdate, newStorageSnapshot, err1 = execState.CommitDelta( committer.ledger, snapshot, - baseState) + baseStorageSnapshot) wg.Wait() if err1 != nil { @@ -63,11 +65,12 @@ func (committer *LedgerViewCommitter) CommitView( func (committer *LedgerViewCommitter) collectProofs( snapshot *snapshot.ExecutionSnapshot, - baseState flow.StateCommitment, + baseStorageSnapshot execution.ExtendableStorageSnapshot, ) ( proof []byte, err error, ) { + baseState := baseStorageSnapshot.Commitment() // Reason for including AllRegisterIDs (read and written registers) instead of ReadRegisterIDs (only read registers): // AllRegisterIDs returns deduplicated register IDs that were touched by both // reads and writes during the block execution. diff --git a/engine/execution/computation/committer/committer_test.go b/engine/execution/computation/committer/committer_test.go index 18657a67f13..b0f927c2807 100644 --- a/engine/execution/computation/committer/committer_test.go +++ b/engine/execution/computation/committer/committer_test.go @@ -1,48 +1,105 @@ package committer_test import ( + "fmt" "testing" "github.com/stretchr/testify/mock" "github.com/stretchr/testify/require" "github.com/onflow/flow-go/engine/execution/computation/committer" + "github.com/onflow/flow-go/engine/execution/storehouse" "github.com/onflow/flow-go/fvm/storage/snapshot" - led "github.com/onflow/flow-go/ledger" + "github.com/onflow/flow-go/ledger" + "github.com/onflow/flow-go/ledger/common/convert" + "github.com/onflow/flow-go/ledger/common/pathfinder" + "github.com/onflow/flow-go/ledger/complete" ledgermock "github.com/onflow/flow-go/ledger/mock" "github.com/onflow/flow-go/model/flow" "github.com/onflow/flow-go/module/trace" - utils "github.com/onflow/flow-go/utils/unittest" + "github.com/onflow/flow-go/utils/unittest" ) func TestLedgerViewCommitter(t *testing.T) { - t.Run("calls to set and prove", func(t *testing.T) { + // verify after committing a snapshot, proof will be generated, + // and changes are saved in storage snapshot + t.Run("CommitView should return proof and statecommitment", func(t *testing.T) { - ledger := new(ledgermock.Ledger) - com := committer.NewLedgerViewCommitter(ledger, trace.NewNoopTracer()) + l := ledgermock.NewLedger(t) + committer := committer.NewLedgerViewCommitter(l, trace.NewNoopTracer()) - var expectedStateCommitment led.State - copy(expectedStateCommitment[:], []byte{1, 2, 3}) - ledger.On("Set", mock.Anything). - Return(expectedStateCommitment, nil, nil). + // CommitDelta will call ledger.Set and ledger.Prove + + reg := unittest.MakeOwnerReg("key1", "val1") + startState := unittest.StateCommitmentFixture() + + update, err := ledger.NewUpdate(ledger.State(startState), []ledger.Key{convert.RegisterIDToLedgerKey(reg.Key)}, []ledger.Value{reg.Value}) + require.NoError(t, err) + + expectedTrieUpdate, err := pathfinder.UpdateToTrieUpdate(update, complete.DefaultPathFinderVersion) + require.NoError(t, err) + + endState := unittest.StateCommitmentFixture() + require.NotEqual(t, startState, endState) + + // mock ledger.Set + l.On("Set", mock.Anything). + Return(func(update *ledger.Update) (newState ledger.State, trieUpdate *ledger.TrieUpdate, err error) { + if update.State().Equals(ledger.State(startState)) { + return ledger.State(endState), expectedTrieUpdate, nil + } + return ledger.DummyState, nil, fmt.Errorf("wrong update") + }). Once() - expectedProof := led.Proof([]byte{2, 3, 4}) - ledger.On("Prove", mock.Anything). - Return(expectedProof, nil). + // mock ledger.Prove + expectedProof := ledger.Proof([]byte{2, 3, 4}) + l.On("Prove", mock.Anything). + Return(func(query *ledger.Query) (proof ledger.Proof, err error) { + if query.Size() != 1 { + return nil, fmt.Errorf("wrong query size: %v", query.Size()) + } + + k := convert.RegisterIDToLedgerKey(reg.Key) + if !query.Keys()[0].Equals(&k) { + return nil, fmt.Errorf("in correct query key for prove: %v", query.Keys()[0]) + } + + return expectedProof, nil + }). Once() - newState, proof, _, err := com.CommitView( - &snapshot.ExecutionSnapshot{ - WriteSet: map[flow.RegisterID]flow.RegisterValue{ - flow.NewRegisterID("owner", "key"): []byte{1}, - }, + // previous block's storage snapshot + oldReg := unittest.MakeOwnerReg("key1", "oldvalue") + previousBlockSnapshot := storehouse.NewExecutingBlockSnapshot( + snapshot.MapStorageSnapshot{ + oldReg.Key: oldReg.Value, + }, + flow.StateCommitment(update.State()), + ) + + // this block's register updates + blockUpdates := &snapshot.ExecutionSnapshot{ + WriteSet: map[flow.RegisterID]flow.RegisterValue{ + reg.Key: oldReg.Value, }, - utils.StateCommitmentFixture()) + } + + newCommit, proof, trieUpdate, newStorageSnapshot, err := committer.CommitView( + blockUpdates, + previousBlockSnapshot, + ) + require.NoError(t, err) - require.Equal(t, flow.StateCommitment(expectedStateCommitment), newState) + + // verify CommitView returns expected proof and statecommitment + require.Equal(t, previousBlockSnapshot.Commitment(), flow.StateCommitment(trieUpdate.RootHash)) + require.Equal(t, newCommit, newStorageSnapshot.Commitment()) + require.Equal(t, endState, newCommit) require.Equal(t, []uint8(expectedProof), proof) + require.True(t, expectedTrieUpdate.Equals(trieUpdate)) + }) } diff --git a/engine/execution/computation/committer/noop.go b/engine/execution/computation/committer/noop.go index dcdefbac634..b4549a78c15 100644 --- a/engine/execution/computation/committer/noop.go +++ b/engine/execution/computation/committer/noop.go @@ -1,6 +1,7 @@ package committer import ( + "github.com/onflow/flow-go/engine/execution" "github.com/onflow/flow-go/fvm/storage/snapshot" "github.com/onflow/flow-go/ledger" "github.com/onflow/flow-go/model/flow" @@ -15,12 +16,17 @@ func NewNoopViewCommitter() *NoopViewCommitter { func (NoopViewCommitter) CommitView( _ *snapshot.ExecutionSnapshot, - s flow.StateCommitment, + baseStorageSnapshot execution.ExtendableStorageSnapshot, ) ( flow.StateCommitment, []byte, *ledger.TrieUpdate, + execution.ExtendableStorageSnapshot, error, ) { - return s, nil, nil, nil + + trieUpdate := &ledger.TrieUpdate{ + RootHash: ledger.RootHash(baseStorageSnapshot.Commitment()), + } + return baseStorageSnapshot.Commitment(), []byte{}, trieUpdate, baseStorageSnapshot, nil } diff --git a/engine/execution/computation/computer/computer.go b/engine/execution/computation/computer/computer.go index 6049ed3d3e8..25345aba997 100644 --- a/engine/execution/computation/computer/computer.go +++ b/engine/execution/computation/computer/computer.go @@ -346,7 +346,9 @@ func (e *blockComputer) executeBlock( parentBlockExecutionResultID, block, numTxns, - e.colResCons) + e.colResCons, + baseSnapshot, + ) defer collector.Stop() requestQueue := make(chan TransactionRequest, numTxns) diff --git a/engine/execution/computation/computer/computer_test.go b/engine/execution/computation/computer/computer_test.go index 9affb301140..1f7e5429761 100644 --- a/engine/execution/computation/computer/computer_test.go +++ b/engine/execution/computation/computer/computer_test.go @@ -27,6 +27,7 @@ import ( "github.com/onflow/flow-go/engine/execution/computation/committer" "github.com/onflow/flow-go/engine/execution/computation/computer" computermock "github.com/onflow/flow-go/engine/execution/computation/computer/mock" + "github.com/onflow/flow-go/engine/execution/storehouse" "github.com/onflow/flow-go/engine/execution/testutil" "github.com/onflow/flow-go/fvm" "github.com/onflow/flow-go/fvm/environment" @@ -40,6 +41,9 @@ import ( "github.com/onflow/flow-go/fvm/storage/state" "github.com/onflow/flow-go/fvm/systemcontracts" "github.com/onflow/flow-go/ledger" + "github.com/onflow/flow-go/ledger/common/convert" + "github.com/onflow/flow-go/ledger/common/pathfinder" + "github.com/onflow/flow-go/ledger/complete" "github.com/onflow/flow-go/model/flow" "github.com/onflow/flow-go/module/epochs" "github.com/onflow/flow-go/module/executiondatasync/execution_data" @@ -69,22 +73,46 @@ type fakeCommitter struct { func (committer *fakeCommitter) CommitView( view *snapshot.ExecutionSnapshot, - startState flow.StateCommitment, + baseStorageSnapshot execution.ExtendableStorageSnapshot, ) ( flow.StateCommitment, []byte, *ledger.TrieUpdate, + execution.ExtendableStorageSnapshot, error, ) { committer.callCount++ + startState := baseStorageSnapshot.Commitment() endState := incStateCommitment(startState) - trieUpdate := &ledger.TrieUpdate{} - trieUpdate.RootHash[0] = byte(committer.callCount) - return endState, + reg := unittest.MakeOwnerReg("key", fmt.Sprintf("%v", committer.callCount)) + regKey := convert.RegisterIDToLedgerKey(reg.Key) + path, err := pathfinder.KeyToPath( + regKey, + complete.DefaultPathFinderVersion, + ) + if err != nil { + return flow.DummyStateCommitment, nil, nil, nil, err + } + trieUpdate := &ledger.TrieUpdate{ + RootHash: ledger.RootHash(startState), + Paths: []ledger.Path{ + path, + }, + Payloads: []*ledger.Payload{ + ledger.NewPayload(regKey, reg.Value), + }, + } + + newStorageSnapshot := baseStorageSnapshot.Extend(endState, map[flow.RegisterID]flow.RegisterValue{ + reg.Key: reg.Value, + }) + + return newStorageSnapshot.Commitment(), []byte{byte(committer.callCount)}, trieUpdate, + newStorageSnapshot, nil } @@ -269,12 +297,12 @@ func TestBlockExecutor_ExecuteBlock(t *testing.T) { chunkDataPack1.Collection, chunkExecutionData1.Collection) assert.NotNil(t, chunkExecutionData1.TrieUpdate) - assert.Equal(t, byte(1), chunkExecutionData1.TrieUpdate.RootHash[0]) + assert.Equal(t, ledger.RootHash(chunk1.StartState), chunkExecutionData1.TrieUpdate.RootHash) chunkExecutionData2 := result.ChunkExecutionDatas[1] assert.NotNil(t, chunkExecutionData2.Collection) assert.NotNil(t, chunkExecutionData2.TrieUpdate) - assert.Equal(t, byte(2), chunkExecutionData2.TrieUpdate.RootHash[0]) + assert.Equal(t, ledger.RootHash(chunk2.StartState), chunkExecutionData2.TrieUpdate.RootHash) assert.GreaterOrEqual(t, vm.CallCount(), 3) // if every transaction is retried once, then the call count should be @@ -322,8 +350,13 @@ func TestBlockExecutor_ExecuteBlock(t *testing.T) { Return(noOpExecutor{}). Once() // just system chunk + snapshot := storehouse.NewExecutingBlockSnapshot( + snapshot.MapStorageSnapshot{}, + unittest.StateCommitmentFixture(), + ) + committer.On("CommitView", mock.Anything, mock.Anything). - Return(nil, nil, nil, nil). + Return(nil, nil, nil, snapshot, nil). Once() // just system chunk result, err := exe.ExecuteBlock( @@ -415,8 +448,13 @@ func TestBlockExecutor_ExecuteBlock(t *testing.T) { // create an empty block block := generateBlock(0, 0, rag) + snapshot := storehouse.NewExecutingBlockSnapshot( + snapshot.MapStorageSnapshot{}, + unittest.StateCommitmentFixture(), + ) + comm.On("CommitView", mock.Anything, mock.Anything). - Return(nil, nil, nil, nil). + Return(nil, nil, nil, snapshot, nil). Once() // just system chunk result, err := exe.ExecuteBlock( @@ -482,8 +520,13 @@ func TestBlockExecutor_ExecuteBlock(t *testing.T) { block := generateBlock(collectionCount, transactionsPerCollection, rag) derivedBlockData := derived.NewEmptyDerivedBlockData(0) + snapshot := storehouse.NewExecutingBlockSnapshot( + snapshot.MapStorageSnapshot{}, + unittest.StateCommitmentFixture(), + ) + committer.On("CommitView", mock.Anything, mock.Anything). - Return(nil, nil, nil, nil). + Return(nil, nil, nil, snapshot, nil). Times(collectionCount + 1) result, err := exe.ExecuteBlock( @@ -556,8 +599,7 @@ func TestBlockExecutor_ExecuteBlock(t *testing.T) { // create a block with 2 collections with 2 transactions each block := generateBlock(collectionCount, transactionsPerCollection, rag) - serviceEvents, err := systemcontracts.ServiceEventsForChain(execCtx.Chain.ChainID()) - require.NoError(t, err) + serviceEvents := systemcontracts.ServiceEventsForChain(execCtx.Chain.ChainID()) payload, err := ccf.Decode(nil, unittest.EpochSetupFixtureCCF) require.NoError(t, err) @@ -600,17 +642,17 @@ func TestBlockExecutor_ExecuteBlock(t *testing.T) { // events to emit for each iteration/transaction events := map[common.Location][]cadence.Event{ common.TransactionLocation(transactions[0].ID()): nil, - common.TransactionLocation(transactions[1].ID()): []cadence.Event{ + common.TransactionLocation(transactions[1].ID()): { serviceEventA, - cadence.Event{ + { EventType: &cadence.EventType{ Location: stdlib.FlowLocation{}, QualifiedIdentifier: "what.ever", }, }, }, - common.TransactionLocation(transactions[2].ID()): []cadence.Event{ - cadence.Event{ + common.TransactionLocation(transactions[2].ID()): { + { EventType: &cadence.EventType{ Location: stdlib.FlowLocation{}, QualifiedIdentifier: "what.ever", @@ -965,8 +1007,13 @@ func TestBlockExecutor_ExecuteBlock(t *testing.T) { transactionsPerCollection := 3 block := generateBlock(collectionCount, transactionsPerCollection, rag) + snapshot := storehouse.NewExecutingBlockSnapshot( + snapshot.MapStorageSnapshot{}, + unittest.StateCommitmentFixture(), + ) + committer.On("CommitView", mock.Anything, mock.Anything). - Return(nil, nil, nil, nil). + Return(nil, nil, nil, snapshot, nil). Times(collectionCount + 1) _, err = exe.ExecuteBlock( @@ -1196,8 +1243,13 @@ func Test_ExecutingSystemCollection(t *testing.T) { ledger := testutil.RootBootstrappedLedger(vm, execCtx) committer := new(computermock.ViewCommitter) + snapshot := storehouse.NewExecutingBlockSnapshot( + snapshot.MapStorageSnapshot{}, + unittest.StateCommitmentFixture(), + ) + committer.On("CommitView", mock.Anything, mock.Anything). - Return(nil, nil, nil, nil). + Return(nil, nil, nil, snapshot, nil). Times(1) // only system chunk noopCollector := metrics.NewNoopCollector() diff --git a/engine/execution/computation/computer/mock/view_committer.go b/engine/execution/computation/computer/mock/view_committer.go index dfcacb97c83..5b635f9804a 100644 --- a/engine/execution/computation/computer/mock/view_committer.go +++ b/engine/execution/computation/computer/mock/view_committer.go @@ -3,9 +3,11 @@ package mock import ( - ledger "github.com/onflow/flow-go/ledger" + execution "github.com/onflow/flow-go/engine/execution" flow "github.com/onflow/flow-go/model/flow" + ledger "github.com/onflow/flow-go/ledger" + mock "github.com/stretchr/testify/mock" snapshot "github.com/onflow/flow-go/fvm/storage/snapshot" @@ -17,17 +19,18 @@ type ViewCommitter struct { } // CommitView provides a mock function with given fields: _a0, _a1 -func (_m *ViewCommitter) CommitView(_a0 *snapshot.ExecutionSnapshot, _a1 flow.StateCommitment) (flow.StateCommitment, []byte, *ledger.TrieUpdate, error) { +func (_m *ViewCommitter) CommitView(_a0 *snapshot.ExecutionSnapshot, _a1 execution.ExtendableStorageSnapshot) (flow.StateCommitment, []byte, *ledger.TrieUpdate, execution.ExtendableStorageSnapshot, error) { ret := _m.Called(_a0, _a1) var r0 flow.StateCommitment var r1 []byte var r2 *ledger.TrieUpdate - var r3 error - if rf, ok := ret.Get(0).(func(*snapshot.ExecutionSnapshot, flow.StateCommitment) (flow.StateCommitment, []byte, *ledger.TrieUpdate, error)); ok { + var r3 execution.ExtendableStorageSnapshot + var r4 error + if rf, ok := ret.Get(0).(func(*snapshot.ExecutionSnapshot, execution.ExtendableStorageSnapshot) (flow.StateCommitment, []byte, *ledger.TrieUpdate, execution.ExtendableStorageSnapshot, error)); ok { return rf(_a0, _a1) } - if rf, ok := ret.Get(0).(func(*snapshot.ExecutionSnapshot, flow.StateCommitment) flow.StateCommitment); ok { + if rf, ok := ret.Get(0).(func(*snapshot.ExecutionSnapshot, execution.ExtendableStorageSnapshot) flow.StateCommitment); ok { r0 = rf(_a0, _a1) } else { if ret.Get(0) != nil { @@ -35,7 +38,7 @@ func (_m *ViewCommitter) CommitView(_a0 *snapshot.ExecutionSnapshot, _a1 flow.St } } - if rf, ok := ret.Get(1).(func(*snapshot.ExecutionSnapshot, flow.StateCommitment) []byte); ok { + if rf, ok := ret.Get(1).(func(*snapshot.ExecutionSnapshot, execution.ExtendableStorageSnapshot) []byte); ok { r1 = rf(_a0, _a1) } else { if ret.Get(1) != nil { @@ -43,7 +46,7 @@ func (_m *ViewCommitter) CommitView(_a0 *snapshot.ExecutionSnapshot, _a1 flow.St } } - if rf, ok := ret.Get(2).(func(*snapshot.ExecutionSnapshot, flow.StateCommitment) *ledger.TrieUpdate); ok { + if rf, ok := ret.Get(2).(func(*snapshot.ExecutionSnapshot, execution.ExtendableStorageSnapshot) *ledger.TrieUpdate); ok { r2 = rf(_a0, _a1) } else { if ret.Get(2) != nil { @@ -51,13 +54,21 @@ func (_m *ViewCommitter) CommitView(_a0 *snapshot.ExecutionSnapshot, _a1 flow.St } } - if rf, ok := ret.Get(3).(func(*snapshot.ExecutionSnapshot, flow.StateCommitment) error); ok { + if rf, ok := ret.Get(3).(func(*snapshot.ExecutionSnapshot, execution.ExtendableStorageSnapshot) execution.ExtendableStorageSnapshot); ok { r3 = rf(_a0, _a1) } else { - r3 = ret.Error(3) + if ret.Get(3) != nil { + r3 = ret.Get(3).(execution.ExtendableStorageSnapshot) + } + } + + if rf, ok := ret.Get(4).(func(*snapshot.ExecutionSnapshot, execution.ExtendableStorageSnapshot) error); ok { + r4 = rf(_a0, _a1) + } else { + r4 = ret.Error(4) } - return r0, r1, r2, r3 + return r0, r1, r2, r3, r4 } type mockConstructorTestingTNewViewCommitter interface { diff --git a/engine/execution/computation/computer/result_collector.go b/engine/execution/computation/computer/result_collector.go index 37ef4540748..703fae44488 100644 --- a/engine/execution/computation/computer/result_collector.go +++ b/engine/execution/computation/computer/result_collector.go @@ -12,6 +12,7 @@ import ( "github.com/onflow/flow-go/crypto/hash" "github.com/onflow/flow-go/engine/execution" "github.com/onflow/flow-go/engine/execution/computation/result" + "github.com/onflow/flow-go/engine/execution/storehouse" "github.com/onflow/flow-go/fvm" "github.com/onflow/flow-go/fvm/meter" "github.com/onflow/flow-go/fvm/storage/snapshot" @@ -31,11 +32,12 @@ type ViewCommitter interface { // CommitView commits an execution snapshot and collects proofs CommitView( *snapshot.ExecutionSnapshot, - flow.StateCommitment, + execution.ExtendableStorageSnapshot, ) ( - flow.StateCommitment, + flow.StateCommitment, // TODO(leo): deprecate. see storehouse.ExtendableStorageSnapshot.Commitment() []byte, *ledger.TrieUpdate, + execution.ExtendableStorageSnapshot, error, ) } @@ -79,9 +81,10 @@ type resultCollector struct { blockStats module.ExecutionResultStats blockMeter *meter.Meter - currentCollectionStartTime time.Time - currentCollectionState *state.ExecutionState - currentCollectionStats module.ExecutionResultStats + currentCollectionStartTime time.Time + currentCollectionState *state.ExecutionState + currentCollectionStats module.ExecutionResultStats + currentCollectionStorageSnapshot execution.ExtendableStorageSnapshot } func newResultCollector( @@ -97,6 +100,7 @@ func newResultCollector( block *entity.ExecutableBlock, numTransactions int, consumers []result.ExecutedCollectionConsumer, + previousBlockSnapshot snapshot.StorageSnapshot, ) *resultCollector { numCollections := len(block.Collections()) + 1 now := time.Now() @@ -122,6 +126,10 @@ func newResultCollector( currentCollectionStats: module.ExecutionResultStats{ NumberOfCollections: 1, }, + currentCollectionStorageSnapshot: storehouse.NewExecutingBlockSnapshot( + previousBlockSnapshot, + *block.StartState, + ), } go collector.runResultProcessor() @@ -138,14 +146,19 @@ func (collector *resultCollector) commitCollection( collector.blockSpan, trace.EXECommitDelta).End() - startState := collector.result.CurrentEndState() - endState, proof, trieUpdate, err := collector.committer.CommitView( + startState := collector.currentCollectionStorageSnapshot.Commitment() + + _, proof, trieUpdate, newSnapshot, err := collector.committer.CommitView( collectionExecutionSnapshot, - startState) + collector.currentCollectionStorageSnapshot, + ) if err != nil { return fmt.Errorf("commit view failed: %w", err) } + endState := newSnapshot.Commitment() + collector.currentCollectionStorageSnapshot = newSnapshot + execColRes := collector.result.CollectionExecutionResultAt(collection.collectionIndex) execColRes.UpdateExecutionSnapshot(collectionExecutionSnapshot) @@ -181,7 +194,7 @@ func (collector *resultCollector) commitCollection( spock, err := collector.signer.SignFunc( collectionExecutionSnapshot.SpockSecret, collector.spockHasher, - SPOCKProve) + crypto.SPOCKProve) if err != nil { return fmt.Errorf("signing spock hash failed: %w", err) } diff --git a/engine/execution/computation/computer/spock_norelic.go b/engine/execution/computation/computer/spock_norelic.go deleted file mode 100644 index 81678d94f33..00000000000 --- a/engine/execution/computation/computer/spock_norelic.go +++ /dev/null @@ -1,26 +0,0 @@ -//go:build !relic -// +build !relic - -package computer - -import ( - "github.com/onflow/flow-go/crypto" - "github.com/onflow/flow-go/crypto/hash" -) - -// This is a temporary wrapper that simulates a call to SPoCK prove, -// required for the emulator build. The function is never called by the -// emulator although it is required for a successful build. -// -// TODO(tarak): remove once the crypto module properly implements a non-relic -// version of SPOCKProve. -func SPOCKProve( - sk crypto.PrivateKey, - data []byte, - kmac hash.Hasher, -) ( - crypto.Signature, - error, -) { - panic("SPoCK prove not supported when flow-go is built without relic") -} diff --git a/engine/execution/computation/computer/spock_relic.go b/engine/execution/computation/computer/spock_relic.go deleted file mode 100644 index 89a8182ba8f..00000000000 --- a/engine/execution/computation/computer/spock_relic.go +++ /dev/null @@ -1,24 +0,0 @@ -//go:build relic -// +build relic - -package computer - -import ( - "github.com/onflow/flow-go/crypto" - "github.com/onflow/flow-go/crypto/hash" -) - -// This is a temporary wrapper that around the crypto library. -// -// TODO(tarak): remove once the crypto module properly implements a non-relic -// version of SPOCKProve. -func SPOCKProve( - sk crypto.PrivateKey, - data []byte, - kmac hash.Hasher, -) ( - crypto.Signature, - error, -) { - return crypto.SPOCKProve(sk, data, kmac) -} diff --git a/engine/execution/computation/execution_verification_test.go b/engine/execution/computation/execution_verification_test.go index 0618d79683e..3583c9e8796 100644 --- a/engine/execution/computation/execution_verification_test.go +++ b/engine/execution/computation/execution_verification_test.go @@ -31,6 +31,7 @@ import ( "github.com/onflow/flow-go/fvm/environment" "github.com/onflow/flow-go/fvm/errors" "github.com/onflow/flow-go/fvm/storage/derived" + "github.com/onflow/flow-go/fvm/systemcontracts" completeLedger "github.com/onflow/flow-go/ledger/complete" "github.com/onflow/flow-go/ledger/complete/wal/fixtures" "github.com/onflow/flow-go/model/flow" @@ -330,6 +331,11 @@ func TestTransactionFeeDeduction(t *testing.T) { fundingAmount := uint64(100_000_000) transferAmount := uint64(123_456) + sc := systemcontracts.SystemContractsForChain(chain.ChainID()) + + depositedEvent := fmt.Sprintf("A.%s.FlowToken.TokensDeposited", sc.FlowToken.Address) + withdrawnEvent := fmt.Sprintf("A.%s.FlowToken.TokensWithdrawn", sc.FlowToken.Address) + testCases := []testCase{ { name: "Transaction fee deduction emits events", @@ -348,10 +354,10 @@ func TestTransactionFeeDeduction(t *testing.T) { // events of the first collection events := cr.CollectionExecutionResultAt(2).Events() for _, e := range events { - if string(e.Type) == fmt.Sprintf("A.%s.FlowToken.TokensDeposited", fvm.FlowTokenAddress(chain)) { + if string(e.Type) == depositedEvent { deposits = append(deposits, e) } - if string(e.Type) == fmt.Sprintf("A.%s.FlowToken.TokensWithdrawn", fvm.FlowTokenAddress(chain)) { + if string(e.Type) == withdrawnEvent { withdraws = append(withdraws, e) } } @@ -377,10 +383,10 @@ func TestTransactionFeeDeduction(t *testing.T) { // events of the last collection events := cr.CollectionExecutionResultAt(2).Events() for _, e := range events { - if string(e.Type) == fmt.Sprintf("A.%s.FlowToken.TokensDeposited", fvm.FlowTokenAddress(chain)) { + if string(e.Type) == depositedEvent { deposits = append(deposits, e) } - if string(e.Type) == fmt.Sprintf("A.%s.FlowToken.TokensWithdrawn", fvm.FlowTokenAddress(chain)) { + if string(e.Type) == withdrawnEvent { withdraws = append(withdraws, e) } } @@ -408,10 +414,10 @@ func TestTransactionFeeDeduction(t *testing.T) { // events of the last collection events := cr.CollectionExecutionResultAt(2).Events() for _, e := range events { - if string(e.Type) == fmt.Sprintf("A.%s.FlowToken.TokensDeposited", fvm.FlowTokenAddress(chain)) { + if string(e.Type) == depositedEvent { deposits = append(deposits, e) } - if string(e.Type) == fmt.Sprintf("A.%s.FlowToken.TokensWithdrawn", fvm.FlowTokenAddress(chain)) { + if string(e.Type) == withdrawnEvent { withdraws = append(withdraws, e) } } @@ -437,10 +443,10 @@ func TestTransactionFeeDeduction(t *testing.T) { // events of the last collection events := cr.CollectionExecutionResultAt(2).Events() for _, e := range events { - if string(e.Type) == fmt.Sprintf("A.%s.FlowToken.TokensDeposited", fvm.FlowTokenAddress(chain)) { + if string(e.Type) == depositedEvent { deposits = append(deposits, e) } - if string(e.Type) == fmt.Sprintf("A.%s.FlowToken.TokensWithdrawn", fvm.FlowTokenAddress(chain)) { + if string(e.Type) == withdrawnEvent { withdraws = append(withdraws, e) } } @@ -469,10 +475,10 @@ func TestTransactionFeeDeduction(t *testing.T) { // events of the last collection events := cr.CollectionExecutionResultAt(2).Events() for _, e := range events { - if string(e.Type) == fmt.Sprintf("A.%s.FlowToken.TokensDeposited", fvm.FlowTokenAddress(chain)) { + if string(e.Type) == depositedEvent { deposits = append(deposits, e) } - if string(e.Type) == fmt.Sprintf("A.%s.FlowToken.TokensWithdrawn", fvm.FlowTokenAddress(chain)) { + if string(e.Type) == withdrawnEvent { withdraws = append(withdraws, e) } } @@ -498,10 +504,10 @@ func TestTransactionFeeDeduction(t *testing.T) { // events of the last collection events := cr.CollectionExecutionResultAt(2).Events() for _, e := range events { - if string(e.Type) == fmt.Sprintf("A.%s.FlowToken.TokensDeposited", fvm.FlowTokenAddress(chain)) { + if string(e.Type) == depositedEvent { deposits = append(deposits, e) } - if string(e.Type) == fmt.Sprintf("A.%s.FlowToken.TokensWithdrawn", fvm.FlowTokenAddress(chain)) { + if string(e.Type) == withdrawnEvent { withdraws = append(withdraws, e) } } @@ -527,10 +533,10 @@ func TestTransactionFeeDeduction(t *testing.T) { // events of the last collection events := cr.CollectionExecutionResultAt(2).Events() for _, e := range events { - if string(e.Type) == fmt.Sprintf("A.%s.FlowToken.TokensDeposited", fvm.FlowTokenAddress(chain)) { + if string(e.Type) == depositedEvent { deposits = append(deposits, e) } - if string(e.Type) == fmt.Sprintf("A.%s.FlowToken.TokensWithdrawn", fvm.FlowTokenAddress(chain)) { + if string(e.Type) == withdrawnEvent { withdraws = append(withdraws, e) } } @@ -556,10 +562,10 @@ func TestTransactionFeeDeduction(t *testing.T) { // events of the last collection events := cr.CollectionExecutionResultAt(2).Events() for _, e := range events { - if string(e.Type) == fmt.Sprintf("A.%s.FlowToken.TokensDeposited", fvm.FlowTokenAddress(chain)) { + if string(e.Type) == depositedEvent { deposits = append(deposits, e) } - if string(e.Type) == fmt.Sprintf("A.%s.FlowToken.TokensWithdrawn", fvm.FlowTokenAddress(chain)) { + if string(e.Type) == withdrawnEvent { withdraws = append(withdraws, e) } } @@ -611,7 +617,7 @@ func TestTransactionFeeDeduction(t *testing.T) { // Deposit the withdrawn tokens in the recipient's receiver receiverRef.deposit(from: <-self.sentVault) } - }`, fvm.FungibleTokenAddress(chain), fvm.FlowTokenAddress(chain))), + }`, sc.FungibleToken.Address, sc.FlowToken.Address)), ) } diff --git a/engine/execution/computation/manager_test.go b/engine/execution/computation/manager_test.go index 0b23a1c7429..b5b3452a769 100644 --- a/engine/execution/computation/manager_test.go +++ b/engine/execution/computation/manager_test.go @@ -33,6 +33,7 @@ import ( "github.com/onflow/flow-go/fvm/storage" "github.com/onflow/flow-go/fvm/storage/derived" "github.com/onflow/flow-go/fvm/storage/snapshot" + "github.com/onflow/flow-go/fvm/systemcontracts" "github.com/onflow/flow-go/ledger/complete" "github.com/onflow/flow-go/ledger/complete/wal/fixtures" "github.com/onflow/flow-go/model/flow" @@ -240,13 +241,15 @@ func TestExecuteScript(t *testing.T) { ledger := testutil.RootBootstrappedLedger(vm, execCtx, fvm.WithExecutionMemoryLimit(math.MaxUint64)) + sc := systemcontracts.SystemContractsForChain(execCtx.Chain.ChainID()) + script := []byte(fmt.Sprintf( ` import FungibleToken from %s pub fun main() {} `, - fvm.FungibleTokenAddress(execCtx.Chain).HexWithPrefix(), + sc.FungibleToken.Address.HexWithPrefix(), )) bservice := requesterunit.MockBlobService(blockstore.NewBlockstore(dssync.MutexWrap(datastore.NewMapDatastore()))) @@ -305,13 +308,15 @@ func TestExecuteScript_BalanceScriptFailsIfViewIsEmpty(t *testing.T) { return nil, fmt.Errorf("error getting register") }) + sc := systemcontracts.SystemContractsForChain(execCtx.Chain.ChainID()) + script := []byte(fmt.Sprintf( ` pub fun main(): UFix64 { return getAccount(%s).balance } `, - fvm.FungibleTokenAddress(execCtx.Chain).HexWithPrefix(), + sc.FungibleToken.Address.HexWithPrefix(), )) bservice := requesterunit.MockBlobService(blockstore.NewBlockstore(dssync.MutexWrap(datastore.NewMapDatastore()))) diff --git a/engine/execution/computation/query/executor.go b/engine/execution/computation/query/executor.go index b129a4d3609..104fa2a9e77 100644 --- a/engine/execution/computation/query/executor.go +++ b/engine/execution/computation/query/executor.go @@ -54,6 +54,7 @@ type Executor interface { type QueryConfig struct { LogTimeThreshold time.Duration ExecutionTimeLimit time.Duration + ComputationLimit uint64 MaxErrorMessageSize int } @@ -61,6 +62,7 @@ func NewDefaultConfig() QueryConfig { return QueryConfig{ LogTimeThreshold: DefaultLogTimeThreshold, ExecutionTimeLimit: DefaultExecutionTimeLimit, + ComputationLimit: fvm.DefaultComputationLimit, MaxErrorMessageSize: DefaultMaxErrorMessageSize, } } @@ -87,6 +89,9 @@ func NewQueryExecutor( derivedChainData *derived.DerivedChainData, entropyPerBlock EntropyProviderPerBlock, ) *QueryExecutor { + if config.ComputationLimit > 0 { + vmCtx = fvm.NewContextFromParent(vmCtx, fvm.WithComputationLimit(config.ComputationLimit)) + } return &QueryExecutor{ config: config, logger: logger, diff --git a/engine/execution/execution_test.go b/engine/execution/execution_test.go index 1c5c98e8229..1690cdfe06c 100644 --- a/engine/execution/execution_test.go +++ b/engine/execution/execution_test.go @@ -236,7 +236,13 @@ func TestExecutionFlow(t *testing.T) { }, time.Second*10, time.Millisecond*500) // check that the block has been executed. - exeNode.AssertHighestExecutedBlock(t, block.Header) + exeNode.AssertBlockIsExecuted(t, block.Header) + + if exeNode.StorehouseEnabled { + exeNode.AssertHighestExecutedBlock(t, genesis.Header) + } else { + exeNode.AssertHighestExecutedBlock(t, block.Header) + } myReceipt, err := exeNode.MyExecutionReceipts.MyReceipt(block.ID()) require.NoError(t, err) @@ -453,12 +459,20 @@ func TestFailedTxWillNotChangeStateCommitment(t *testing.T) { hub.DeliverAllEventually(t, func() bool { return receiptsReceived.Load() == 1 }) - exe1Node.AssertHighestExecutedBlock(t, block1.Header) - scExe1Genesis, err := exe1Node.ExecutionState.StateCommitmentByBlockID(context.Background(), genesis.ID()) + if exe1Node.StorehouseEnabled { + exe1Node.AssertHighestExecutedBlock(t, genesis.Header) + } else { + exe1Node.AssertHighestExecutedBlock(t, block1.Header) + } + + exe1Node.AssertBlockIsExecuted(t, block1.Header) + exe1Node.AssertBlockNotExecuted(t, block2.Header) + + scExe1Genesis, err := exe1Node.ExecutionState.StateCommitmentByBlockID(genesis.ID()) assert.NoError(t, err) - scExe1Block1, err := exe1Node.ExecutionState.StateCommitmentByBlockID(context.Background(), block1.ID()) + scExe1Block1, err := exe1Node.ExecutionState.StateCommitmentByBlockID(block1.ID()) assert.NoError(t, err) assert.NotEqual(t, scExe1Genesis, scExe1Block1) @@ -475,11 +489,11 @@ func TestFailedTxWillNotChangeStateCommitment(t *testing.T) { }) // ensure state has been synced across both nodes - exe1Node.AssertHighestExecutedBlock(t, block3.Header) - // exe2Node.AssertHighestExecutedBlock(t, block3.Header) + exe1Node.AssertBlockIsExecuted(t, block2.Header) + exe1Node.AssertBlockIsExecuted(t, block3.Header) // verify state commitment of block 2 is the same as block 1, since tx failed on seq number verification - scExe1Block2, err := exe1Node.ExecutionState.StateCommitmentByBlockID(context.Background(), block2.ID()) + scExe1Block2, err := exe1Node.ExecutionState.StateCommitmentByBlockID(block2.ID()) assert.NoError(t, err) // TODO this is no longer valid because the system chunk can change the state //assert.Equal(t, scExe1Block1, scExe1Block2) diff --git a/engine/execution/ingestion/engine.go b/engine/execution/ingestion/engine.go index bf8b67ad85c..6f3fe614f89 100644 --- a/engine/execution/ingestion/engine.go +++ b/engine/execution/ingestion/engine.go @@ -284,7 +284,7 @@ func (e *Engine) handleBlock(ctx context.Context, block *flow.Block) error { span, _ := e.tracer.StartBlockSpan(ctx, blockID, trace.EXEHandleBlock) defer span.End() - executed, err := state.IsBlockExecuted(e.unit.Ctx(), e.execState, blockID) + executed, err := e.execState.IsBlockExecuted(block.Header.Height, blockID) if err != nil { return fmt.Errorf("could not check whether block is executed: %w", err) } @@ -357,7 +357,7 @@ func (e *Engine) enqueueBlockAndCheckExecutable( // check if the block's parent has been executed. (we can't execute the block if the parent has // not been executed yet) // check if there is a statecommitment for the parent block - parentCommitment, err := e.execState.StateCommitmentByBlockID(e.unit.Ctx(), block.Header.ParentID) + parentCommitment, err := e.execState.StateCommitmentByBlockID(block.Header.ParentID) // if we found the statecommitment for the parent block, then add it to the executable block. if err == nil { @@ -429,7 +429,10 @@ func (e *Engine) executeBlock( return } - snapshot := e.execState.NewStorageSnapshot(*executableBlock.StartState) + snapshot := e.execState.NewStorageSnapshot(*executableBlock.StartState, + executableBlock.Block.Header.ParentID, + executableBlock.Block.Header.Height-1, + ) computationResult, err := e.computationManager.ComputeBlock( ctx, diff --git a/engine/execution/ingestion/engine_test.go b/engine/execution/ingestion/engine_test.go index 66fd0c259cc..e2eb15c50b1 100644 --- a/engine/execution/ingestion/engine_test.go +++ b/engine/execution/ingestion/engine_test.go @@ -13,6 +13,7 @@ import ( "github.com/stretchr/testify/require" "github.com/onflow/flow-go/crypto" + "github.com/onflow/flow-go/fvm/storage/snapshot" enginePkg "github.com/onflow/flow-go/engine" "github.com/onflow/flow-go/engine/execution" @@ -112,7 +113,7 @@ func runWithEngine(t *testing.T, f func(testingContext)) { uploadMgr := uploader.NewManager(trace.NewNoopTracer()) fetcher := mocks.NewMockFetcher() - loader := loader.NewLoader(log, protocolState, headers, executionState) + loader := loader.NewUnexecutedLoader(log, protocolState, headers, executionState) engine, err = New( unit, @@ -171,9 +172,10 @@ func TestExecuteOneBlock(t *testing.T) { blockA := makeBlockWithCollection(store.RootBlock, &col) result := store.CreateBlockAndMockResult(t, blockA) + ctx.mockIsBlockExecuted(store) ctx.mockStateCommitmentByBlockID(store) ctx.mockGetExecutionResultID(store) - ctx.executionState.On("NewStorageSnapshot", mock.Anything).Return(nil) + ctx.mockNewStorageSnapshot(result) // receive block err := ctx.engine.handleBlock(context.Background(), blockA.Block) @@ -223,12 +225,14 @@ func TestExecuteBlocks(t *testing.T) { // executable as soon as its parent block A is executed. blockA := makeBlockWithCollection(store.RootBlock, &col1) blockB := makeBlockWithCollection(blockA.Block.Header, &col2) - store.CreateBlockAndMockResult(t, blockA) - store.CreateBlockAndMockResult(t, blockB) + resultA := store.CreateBlockAndMockResult(t, blockA) + resultB := store.CreateBlockAndMockResult(t, blockB) + ctx.mockIsBlockExecuted(store) ctx.mockStateCommitmentByBlockID(store) ctx.mockGetExecutionResultID(store) - ctx.executionState.On("NewStorageSnapshot", mock.Anything).Return(nil) + ctx.mockNewStorageSnapshot(resultA) + ctx.mockNewStorageSnapshot(resultB) ctx.providerEngine.On("BroadcastExecutionReceipt", mock.Anything, mock.Anything, mock.Anything).Return(false, nil) // receive block @@ -269,15 +273,17 @@ func TestExecuteNextBlockIfCollectionIsReady(t *testing.T) { // Root <- A[C1] <- B[C2] blockA := makeBlockWithCollection(store.RootBlock, &col1) blockB := makeBlockWithCollection(blockA.Block.Header, &col2) - store.CreateBlockAndMockResult(t, blockA) - store.CreateBlockAndMockResult(t, blockB) + resultA := store.CreateBlockAndMockResult(t, blockA) + resultB := store.CreateBlockAndMockResult(t, blockB) // C2 is available in storage require.NoError(t, ctx.collections.Store(&col2)) + ctx.mockIsBlockExecuted(store) ctx.mockStateCommitmentByBlockID(store) ctx.mockGetExecutionResultID(store) - ctx.executionState.On("NewStorageSnapshot", mock.Anything).Return(nil) + ctx.mockNewStorageSnapshot(resultA) + ctx.mockNewStorageSnapshot(resultB) // receiving block A and B will not trigger any execution // because A is missing collection C1, B is waiting for A to be executed @@ -317,11 +323,12 @@ func TestExecuteBlockOnlyOnce(t *testing.T) { col := unittest.CollectionFixture(1) // Root <- A[C] blockA := makeBlockWithCollection(store.RootBlock, &col) - store.CreateBlockAndMockResult(t, blockA) + resultA := store.CreateBlockAndMockResult(t, blockA) + ctx.mockIsBlockExecuted(store) ctx.mockStateCommitmentByBlockID(store) ctx.mockGetExecutionResultID(store) - ctx.executionState.On("NewStorageSnapshot", mock.Anything).Return(nil) + ctx.mockNewStorageSnapshot(resultA) // receive block err := ctx.engine.handleBlock(context.Background(), blockA.Block) @@ -372,12 +379,14 @@ func TestExecuteForkConcurrently(t *testing.T) { blockA := makeBlockWithCollection(store.RootBlock, &col1, &col2) blockB := makeBlockWithCollection(store.RootBlock, &col1, &col2) - store.CreateBlockAndMockResult(t, blockA) - store.CreateBlockAndMockResult(t, blockB) + resultA := store.CreateBlockAndMockResult(t, blockA) + resultB := store.CreateBlockAndMockResult(t, blockB) + ctx.mockIsBlockExecuted(store) ctx.mockStateCommitmentByBlockID(store) ctx.mockGetExecutionResultID(store) - ctx.executionState.On("NewStorageSnapshot", mock.Anything).Return(nil) + ctx.mockNewStorageSnapshot(resultA) + ctx.mockNewStorageSnapshot(resultB) // receive blocks err := ctx.engine.handleBlock(context.Background(), blockA.Block) @@ -421,13 +430,16 @@ func TestExecuteBlockInOrder(t *testing.T) { blockA := makeBlockWithCollection(store.RootBlock, &col1, &col2) blockB := makeBlockWithCollection(store.RootBlock, &col2) blockC := makeBlockWithCollection(store.RootBlock, &col3) - store.CreateBlockAndMockResult(t, blockA) - store.CreateBlockAndMockResult(t, blockB) - store.CreateBlockAndMockResult(t, blockC) + resultA := store.CreateBlockAndMockResult(t, blockA) + resultB := store.CreateBlockAndMockResult(t, blockB) + resultC := store.CreateBlockAndMockResult(t, blockC) + ctx.mockIsBlockExecuted(store) ctx.mockStateCommitmentByBlockID(store) ctx.mockGetExecutionResultID(store) - ctx.executionState.On("NewStorageSnapshot", mock.Anything).Return(nil) + ctx.mockNewStorageSnapshot(resultA) + ctx.mockNewStorageSnapshot(resultB) + ctx.mockNewStorageSnapshot(resultC) // receive blocks err := ctx.engine.handleBlock(context.Background(), blockA.Block) @@ -483,8 +495,8 @@ func TestStopAtHeightWhenFinalizedBeforeExecuted(t *testing.T) { blockC := makeBlockWithCollection(blockB.Block.Header) blockD := makeBlockWithCollection(blockC.Block.Header) - store.CreateBlockAndMockResult(t, blockA) - store.CreateBlockAndMockResult(t, blockB) + resultA := store.CreateBlockAndMockResult(t, blockA) + resultB := store.CreateBlockAndMockResult(t, blockB) store.CreateBlockAndMockResult(t, blockC) store.CreateBlockAndMockResult(t, blockD) @@ -495,9 +507,11 @@ func TestStopAtHeightWhenFinalizedBeforeExecuted(t *testing.T) { }) require.NoError(t, err) + ctx.mockIsBlockExecuted(store) ctx.mockStateCommitmentByBlockID(store) ctx.mockGetExecutionResultID(store) - ctx.executionState.On("NewStorageSnapshot", mock.Anything).Return(nil) + ctx.mockNewStorageSnapshot(resultA) + ctx.mockNewStorageSnapshot(resultB) // receive blocks err = ctx.engine.handleBlock(context.Background(), blockA.Block) @@ -550,8 +564,8 @@ func TestStopAtHeightWhenExecutedBeforeFinalized(t *testing.T) { blockC := makeBlockWithCollection(blockB.Block.Header) blockD := makeBlockWithCollection(blockC.Block.Header) - store.CreateBlockAndMockResult(t, blockA) - store.CreateBlockAndMockResult(t, blockB) + resultA := store.CreateBlockAndMockResult(t, blockA) + resultB := store.CreateBlockAndMockResult(t, blockB) store.CreateBlockAndMockResult(t, blockC) store.CreateBlockAndMockResult(t, blockD) @@ -562,9 +576,11 @@ func TestStopAtHeightWhenExecutedBeforeFinalized(t *testing.T) { }) require.NoError(t, err) + ctx.mockIsBlockExecuted(store) ctx.mockStateCommitmentByBlockID(store) ctx.mockGetExecutionResultID(store) - ctx.executionState.On("NewStorageSnapshot", mock.Anything).Return(nil) + ctx.mockNewStorageSnapshot(resultA) + ctx.mockNewStorageSnapshot(resultB) ctx.providerEngine.On("BroadcastExecutionReceipt", mock.Anything, mock.Anything, mock.Anything).Return(false, nil) ctx.mockComputeBlock(store) @@ -614,7 +630,7 @@ func TestStopAtHeightWhenExecutionFinalization(t *testing.T) { blockB := makeBlockWithCollection(blockA.Block.Header) blockC := makeBlockWithCollection(blockB.Block.Header) - store.CreateBlockAndMockResult(t, blockA) + resultA := store.CreateBlockAndMockResult(t, blockA) store.CreateBlockAndMockResult(t, blockB) store.CreateBlockAndMockResult(t, blockC) @@ -623,9 +639,10 @@ func TestStopAtHeightWhenExecutionFinalization(t *testing.T) { }) require.NoError(t, err) + ctx.mockIsBlockExecuted(store) ctx.mockStateCommitmentByBlockID(store) ctx.mockGetExecutionResultID(store) - ctx.executionState.On("NewStorageSnapshot", mock.Anything).Return(nil) + ctx.mockNewStorageSnapshot(resultA) ctx.providerEngine.On("BroadcastExecutionReceipt", mock.Anything, mock.Anything, mock.Anything).Return(false, nil) ctx.mockComputeBlock(store) @@ -678,9 +695,10 @@ func TestExecutedBlockUploadedFailureDoesntBlock(t *testing.T) { blockA := makeBlockWithCollection(store.RootBlock, &col) result := store.CreateBlockAndMockResult(t, blockA) + ctx.mockIsBlockExecuted(store) ctx.mockStateCommitmentByBlockID(store) ctx.mockGetExecutionResultID(store) - ctx.executionState.On("NewStorageSnapshot", mock.Anything).Return(nil) + ctx.mockNewStorageSnapshot(result) // receive block err := ctx.engine.handleBlock(context.Background(), blockA.Block) @@ -741,63 +759,74 @@ func makeBlockWithCollection(parent *flow.Header, cols ...*flow.Collection) *ent return executableBlock } +func (ctx *testingContext) mockIsBlockExecuted(store *mocks.MockBlockStore) { + ctx.executionState.On("IsBlockExecuted", mock.Anything, mock.Anything). + Return(func(height uint64, blockID flow.Identifier) (bool, error) { + _, err := store.GetExecuted(blockID) + if err != nil { + return false, nil + } + return true, nil + }) +} + func (ctx *testingContext) mockStateCommitmentByBlockID(store *mocks.MockBlockStore) { - mocked := ctx.executionState.On("StateCommitmentByBlockID", mock.Anything, mock.Anything) - // https://github.com/stretchr/testify/issues/350#issuecomment-570478958 - mocked.RunFn = func(args mock.Arguments) { - blockID := args[1].(flow.Identifier) - result, err := store.GetExecuted(blockID) - if err != nil { - mocked.ReturnArguments = mock.Arguments{flow.StateCommitment{}, storageerr.ErrNotFound} - return - } - mocked.ReturnArguments = mock.Arguments{result.Result.CurrentEndState(), nil} - } + ctx.executionState.On("StateCommitmentByBlockID", mock.Anything). + Return(func(blockID flow.Identifier) (flow.StateCommitment, error) { + result, err := store.GetExecuted(blockID) + if err != nil { + return flow.StateCommitment{}, storageerr.ErrNotFound + } + return result.Result.CurrentEndState(), nil + }) } func (ctx *testingContext) mockGetExecutionResultID(store *mocks.MockBlockStore) { + ctx.executionState.On("GetExecutionResultID", mock.Anything, mock.Anything). + Return(func(ctx context.Context, blockID flow.Identifier) (flow.Identifier, error) { + blockResult, err := store.GetExecuted(blockID) + if err != nil { + return flow.ZeroID, storageerr.ErrNotFound + } + + return blockResult.Result.ExecutionReceipt.ExecutionResult.ID(), nil + }) +} - mocked := ctx.executionState.On("GetExecutionResultID", mock.Anything, mock.Anything) - mocked.RunFn = func(args mock.Arguments) { - blockID := args[1].(flow.Identifier) - blockResult, err := store.GetExecuted(blockID) - if err != nil { - mocked.ReturnArguments = mock.Arguments{nil, storageerr.ErrNotFound} - return - } - - mocked.ReturnArguments = mock.Arguments{ - blockResult.Result.ExecutionReceipt.ExecutionResult.ID(), nil} - } +func (ctx *testingContext) mockNewStorageSnapshot(result *execution.ComputationResult) { + // the result is the mocked result for the block, in other words, if the ingestion executes this block, + // the mocked computationManager will produce this result. + // so when mocking the StorageSnapshot method, it must be called with the StartState, as well as its + // parent block, which is used for retrieving the storage state at the end of the parent block. + ctx.executionState.On("NewStorageSnapshot", + *result.ExecutableBlock.StartState, + result.ExecutableBlock.Block.Header.ParentID, + result.ExecutableBlock.Block.Header.Height-1).Return(nil) } func (ctx *testingContext) mockComputeBlock(store *mocks.MockBlockStore) { - mocked := ctx.computationManager.On("ComputeBlock", mock.Anything, mock.Anything, mock.Anything, mock.Anything) - mocked.RunFn = func(args mock.Arguments) { - block := args[2].(*entity.ExecutableBlock) - blockResult, ok := store.ResultByBlock[block.ID()] - if !ok { - mocked.ReturnArguments = mock.Arguments{nil, fmt.Errorf("block %s not found", block.ID())} - return - } - mocked.ReturnArguments = mock.Arguments{blockResult.Result, nil} - } + ctx.computationManager.On("ComputeBlock", mock.Anything, mock.Anything, mock.Anything, mock.Anything). + Return(func(ctx context.Context, + parentBlockExecutionResultID flow.Identifier, + block *entity.ExecutableBlock, + snapshot snapshot.StorageSnapshot) ( + *execution.ComputationResult, error) { + blockResult, ok := store.ResultByBlock[block.ID()] + if !ok { + return nil, fmt.Errorf("block %s not found", block.ID()) + } + return blockResult.Result, nil + }) } func (ctx *testingContext) mockSaveExecutionResults(store *mocks.MockBlockStore, wg *sync.WaitGroup) { - mocked := ctx.executionState. - On("SaveExecutionResults", mock.Anything, mock.Anything) - - mocked.RunFn = func(args mock.Arguments) { - result := args[1].(*execution.ComputationResult) - - err := store.MarkExecuted(result) - if err != nil { - mocked.ReturnArguments = mock.Arguments{err} - wg.Done() - return - } - mocked.ReturnArguments = mock.Arguments{nil} - wg.Done() - } + ctx.executionState.On("SaveExecutionResults", mock.Anything, mock.Anything). + Return(func(ctx context.Context, result *execution.ComputationResult) error { + defer wg.Done() + err := store.MarkExecuted(result) + if err != nil { + return err + } + return nil + }) } diff --git a/engine/execution/ingestion/loader/loader.go b/engine/execution/ingestion/loader/unexecuted_loader.go similarity index 84% rename from engine/execution/ingestion/loader/loader.go rename to engine/execution/ingestion/loader/unexecuted_loader.go index ec69a94a3a7..09fcf56621c 100644 --- a/engine/execution/ingestion/loader/loader.go +++ b/engine/execution/ingestion/loader/unexecuted_loader.go @@ -13,28 +13,31 @@ import ( "github.com/onflow/flow-go/utils/logging" ) -type Loader struct { +// deprecated. Storehouse is going to use unfinalized loader instead +type UnexecutedLoader struct { log zerolog.Logger state protocol.State headers storage.Headers // see comments on getHeaderByHeight for why we need it execState state.ExecutionState } -func NewLoader( +func NewUnexecutedLoader( log zerolog.Logger, state protocol.State, headers storage.Headers, execState state.ExecutionState, -) *Loader { - return &Loader{ - log: log.With().Str("component", "ingestion_engine_block_loader").Logger(), +) *UnexecutedLoader { + return &UnexecutedLoader{ + log: log.With().Str("component", "ingestion_engine_unexecuted_loader").Logger(), state: state, headers: headers, execState: execState, } } -func (e *Loader) LoadUnexecuted(ctx context.Context) ([]flow.Identifier, error) { +// LoadUnexecuted loads all unexecuted and validated blocks +// any error returned are exceptions +func (e *UnexecutedLoader) LoadUnexecuted(ctx context.Context) ([]flow.Identifier, error) { // saving an executed block is currently not transactional, so it's possible // the block is marked as executed but the receipt might not be saved during a crash. // in order to mitigate this problem, we always re-execute the last executed and finalized @@ -60,7 +63,7 @@ func (e *Loader) LoadUnexecuted(ctx context.Context) ([]flow.Identifier, error) blockIDs := make([]flow.Identifier, 0) isRoot := rootBlock.ID() == last.ID() if !isRoot { - executed, err := state.IsBlockExecuted(ctx, e.execState, lastExecutedID) + executed, err := e.execState.IsBlockExecuted(lastExecutedHeight, lastExecutedID) if err != nil { return nil, fmt.Errorf("cannot check is last exeucted final block has been executed %v: %w", lastExecutedID, err) } @@ -101,7 +104,7 @@ func (e *Loader) LoadUnexecuted(ctx context.Context) ([]flow.Identifier, error) return blockIDs, nil } -func (e *Loader) unexecutedBlocks(ctx context.Context) ( +func (e *UnexecutedLoader) unexecutedBlocks(ctx context.Context) ( finalized []flow.Identifier, pending []flow.Identifier, err error, @@ -123,7 +126,7 @@ func (e *Loader) unexecutedBlocks(ctx context.Context) ( return finalized, pending, nil } -func (e *Loader) finalizedUnexecutedBlocks(ctx context.Context, finalized protocol.Snapshot) ( +func (e *UnexecutedLoader) finalizedUnexecutedBlocks(ctx context.Context, finalized protocol.Snapshot) ( []flow.Identifier, error, ) { @@ -153,7 +156,7 @@ func (e *Loader) finalizedUnexecutedBlocks(ctx context.Context, finalized protoc return nil, fmt.Errorf("could not get header at height: %v, %w", lastExecuted, err) } - executed, err := state.IsBlockExecuted(ctx, e.execState, header.ID()) + executed, err := e.execState.IsBlockExecuted(header.Height, header.ID()) if err != nil { return nil, fmt.Errorf("could not check whether block is executed: %w", err) } @@ -190,7 +193,7 @@ func (e *Loader) finalizedUnexecutedBlocks(ctx context.Context, finalized protoc return unexecuted, nil } -func (e *Loader) pendingUnexecutedBlocks(ctx context.Context, finalized protocol.Snapshot) ( +func (e *UnexecutedLoader) pendingUnexecutedBlocks(ctx context.Context, finalized protocol.Snapshot) ( []flow.Identifier, error, ) { @@ -202,7 +205,11 @@ func (e *Loader) pendingUnexecutedBlocks(ctx context.Context, finalized protocol unexecuted := make([]flow.Identifier, 0) for _, pending := range pendings { - executed, err := state.IsBlockExecuted(ctx, e.execState, pending) + p, err := e.headers.ByBlockID(pending) + if err != nil { + return nil, fmt.Errorf("could not get header by block id: %w", err) + } + executed, err := e.execState.IsBlockExecuted(p.Height, pending) if err != nil { return nil, fmt.Errorf("could not check block executed or not: %w", err) } @@ -218,7 +225,7 @@ func (e *Loader) pendingUnexecutedBlocks(ctx context.Context, finalized protocol // if the EN is dynamically bootstrapped, the finalized blocks at height range: // [ sealedRoot.Height, finalizedRoot.Height - 1] can not be retrieved from // protocol state, but only from headers -func (e *Loader) getHeaderByHeight(height uint64) (*flow.Header, error) { +func (e *UnexecutedLoader) getHeaderByHeight(height uint64) (*flow.Header, error) { // we don't use protocol state because for dynamic boostrapped execution node // the last executed and sealed block is below the finalized root block return e.headers.ByHeight(height) diff --git a/engine/execution/ingestion/loader/loader_test.go b/engine/execution/ingestion/loader/unexecuted_loader_test.go similarity index 83% rename from engine/execution/ingestion/loader/loader_test.go rename to engine/execution/ingestion/loader/unexecuted_loader_test.go index 5b61d155e8c..23779394c5b 100644 --- a/engine/execution/ingestion/loader/loader_test.go +++ b/engine/execution/ingestion/loader/unexecuted_loader_test.go @@ -11,7 +11,6 @@ import ( "github.com/onflow/flow-go/engine/execution/ingestion" "github.com/onflow/flow-go/engine/execution/ingestion/loader" - "github.com/onflow/flow-go/engine/execution/state" stateMock "github.com/onflow/flow-go/engine/execution/state/mock" "github.com/onflow/flow-go/model/flow" storageerr "github.com/onflow/flow-go/storage" @@ -20,7 +19,7 @@ import ( "github.com/onflow/flow-go/utils/unittest/mocks" ) -var _ ingestion.BlockLoader = (*loader.Loader)(nil) +var _ ingestion.BlockLoader = (*loader.UnexecutedLoader)(nil) // ExecutionState is a mocked version of execution state that // simulates some of its behavior for testing purpose @@ -41,7 +40,6 @@ func newMockExecutionState(seal *flow.Seal, genesis *flow.Header) *mockExecution } func (es *mockExecutionState) StateCommitmentByBlockID( - ctx context.Context, blockID flow.Identifier, ) ( flow.StateCommitment, @@ -57,10 +55,16 @@ func (es *mockExecutionState) StateCommitmentByBlockID( return commit, nil } +func (es *mockExecutionState) IsBlockExecuted(height uint64, blockID flow.Identifier) (bool, error) { + es.Lock() + defer es.Unlock() + _, ok := es.commits[blockID] + return ok, nil +} + func (es *mockExecutionState) ExecuteBlock(t *testing.T, block *flow.Block) { - parentExecuted, err := state.IsBlockExecuted( - context.Background(), - es, + parentExecuted, err := es.IsBlockExecuted( + block.Header.Height, block.Header.ParentID) require.NoError(t, err) require.True(t, parentExecuted, "parent block not executed") @@ -93,7 +97,7 @@ func TestLoadingUnexecutedBlocks(t *testing.T) { headers := storage.NewMockHeaders(ctrl) headers.EXPECT().ByBlockID(genesis.ID()).Return(genesis.Header, nil) log := unittest.Logger() - loader := loader.NewLoader(log, ps, headers, es) + loader := loader.NewUnexecutedLoader(log, ps, headers, es) unexecuted, err := loader.LoadUnexecuted(context.Background()) require.NoError(t, err) @@ -120,8 +124,12 @@ func TestLoadingUnexecutedBlocks(t *testing.T) { ctrl := gomock.NewController(t) headers := storage.NewMockHeaders(ctrl) headers.EXPECT().ByBlockID(genesis.ID()).Return(genesis.Header, nil) + headers.EXPECT().ByBlockID(blockA.ID()).Return(blockA.Header, nil) + headers.EXPECT().ByBlockID(blockB.ID()).Return(blockB.Header, nil) + headers.EXPECT().ByBlockID(blockC.ID()).Return(blockC.Header, nil) + headers.EXPECT().ByBlockID(blockD.ID()).Return(blockD.Header, nil) log := unittest.Logger() - loader := loader.NewLoader(log, ps, headers, es) + loader := loader.NewUnexecutedLoader(log, ps, headers, es) unexecuted, err := loader.LoadUnexecuted(context.Background()) require.NoError(t, err) @@ -148,8 +156,13 @@ func TestLoadingUnexecutedBlocks(t *testing.T) { ctrl := gomock.NewController(t) headers := storage.NewMockHeaders(ctrl) headers.EXPECT().ByBlockID(genesis.ID()).Return(genesis.Header, nil) + headers.EXPECT().ByBlockID(blockA.ID()).Return(blockA.Header, nil) + headers.EXPECT().ByBlockID(blockB.ID()).Return(blockB.Header, nil) + headers.EXPECT().ByBlockID(blockC.ID()).Return(blockC.Header, nil) + headers.EXPECT().ByBlockID(blockD.ID()).Return(blockD.Header, nil) + log := unittest.Logger() - loader := loader.NewLoader(log, ps, headers, es) + loader := loader.NewUnexecutedLoader(log, ps, headers, es) es.ExecuteBlock(t, blockA) es.ExecuteBlock(t, blockB) @@ -181,8 +194,10 @@ func TestLoadingUnexecutedBlocks(t *testing.T) { ctrl := gomock.NewController(t) headers := storage.NewMockHeaders(ctrl) headers.EXPECT().ByBlockID(genesis.ID()).Return(genesis.Header, nil) + headers.EXPECT().ByBlockID(blockD.ID()).Return(blockD.Header, nil) + log := unittest.Logger() - loader := loader.NewLoader(log, ps, headers, es) + loader := loader.NewUnexecutedLoader(log, ps, headers, es) // block C is the only finalized block, index its header by its height headers.EXPECT().ByHeight(blockC.Header.Height).Return(blockC.Header, nil) @@ -218,8 +233,9 @@ func TestLoadingUnexecutedBlocks(t *testing.T) { ctrl := gomock.NewController(t) headers := storage.NewMockHeaders(ctrl) headers.EXPECT().ByBlockID(genesis.ID()).Return(genesis.Header, nil) + headers.EXPECT().ByBlockID(blockD.ID()).Return(blockD.Header, nil) log := unittest.Logger() - loader := loader.NewLoader(log, ps, headers, es) + loader := loader.NewUnexecutedLoader(log, ps, headers, es) // block C is finalized, index its header by its height headers.EXPECT().ByHeight(blockC.Header.Height).Return(blockC.Header, nil) @@ -254,8 +270,12 @@ func TestLoadingUnexecutedBlocks(t *testing.T) { ctrl := gomock.NewController(t) headers := storage.NewMockHeaders(ctrl) headers.EXPECT().ByBlockID(genesis.ID()).Return(genesis.Header, nil) + headers.EXPECT().ByBlockID(blockB.ID()).Return(blockB.Header, nil) + headers.EXPECT().ByBlockID(blockC.ID()).Return(blockC.Header, nil) + headers.EXPECT().ByBlockID(blockD.ID()).Return(blockD.Header, nil) + log := unittest.Logger() - loader := loader.NewLoader(log, ps, headers, es) + loader := loader.NewUnexecutedLoader(log, ps, headers, es) // block A is finalized, index its header by its height headers.EXPECT().ByHeight(blockA.Header.Height).Return(blockA.Header, nil) @@ -315,8 +335,15 @@ func TestLoadingUnexecutedBlocks(t *testing.T) { ctrl := gomock.NewController(t) headers := storage.NewMockHeaders(ctrl) headers.EXPECT().ByBlockID(genesis.ID()).Return(genesis.Header, nil) + headers.EXPECT().ByBlockID(blockD.ID()).Return(blockD.Header, nil) + headers.EXPECT().ByBlockID(blockE.ID()).Return(blockE.Header, nil) + headers.EXPECT().ByBlockID(blockF.ID()).Return(blockF.Header, nil) + headers.EXPECT().ByBlockID(blockG.ID()).Return(blockG.Header, nil) + headers.EXPECT().ByBlockID(blockH.ID()).Return(blockH.Header, nil) + headers.EXPECT().ByBlockID(blockI.ID()).Return(blockI.Header, nil) + log := unittest.Logger() - loader := loader.NewLoader(log, ps, headers, es) + loader := loader.NewUnexecutedLoader(log, ps, headers, es) // block C is finalized, index its header by its height headers.EXPECT().ByHeight(blockC.Header.Height).Return(blockC.Header, nil) diff --git a/engine/execution/ingestion/loader/unfinalized_loader.go b/engine/execution/ingestion/loader/unfinalized_loader.go new file mode 100644 index 00000000000..bcfc699074a --- /dev/null +++ b/engine/execution/ingestion/loader/unfinalized_loader.go @@ -0,0 +1,91 @@ +package loader + +import ( + "context" + "fmt" + + "github.com/rs/zerolog" + + "github.com/onflow/flow-go/engine/execution/state" + "github.com/onflow/flow-go/model/flow" + "github.com/onflow/flow-go/state/protocol" + "github.com/onflow/flow-go/storage" +) + +type UnfinalizedLoader struct { + log zerolog.Logger + state protocol.State + headers storage.Headers // see comments on getHeaderByHeight for why we need it + execState state.FinalizedExecutionState +} + +// NewUnfinalizedLoader creates a new loader that loads all unfinalized and validated blocks +func NewUnfinalizedLoader( + log zerolog.Logger, + state protocol.State, + headers storage.Headers, + execState state.FinalizedExecutionState, +) *UnfinalizedLoader { + return &UnfinalizedLoader{ + log: log.With().Str("component", "ingestion_engine_unfinalized_loader").Logger(), + state: state, + headers: headers, + execState: execState, + } +} + +// LoadUnexecuted loads all unfinalized and validated blocks +// any error returned are exceptions +func (e *UnfinalizedLoader) LoadUnexecuted(ctx context.Context) ([]flow.Identifier, error) { + lastExecuted := e.execState.GetHighestFinalizedExecuted() + + // get finalized height + finalized := e.state.Final() + final, err := finalized.Head() + if err != nil { + return nil, fmt.Errorf("could not get finalized block: %w", err) + } + + // TODO: dynamically bootstrapped execution node will reload blocks from + unexecutedFinalized := make([]flow.Identifier, 0) + + // starting from the first unexecuted block, go through each unexecuted and finalized block + // reload its block to execution queues + // loading finalized blocks + for height := lastExecuted + 1; height <= final.Height; height++ { + header, err := e.getHeaderByHeight(height) + if err != nil { + return nil, fmt.Errorf("could not get header at height: %v, %w", height, err) + } + + unexecutedFinalized = append(unexecutedFinalized, header.ID()) + } + + // loaded all pending blocks + pendings, err := finalized.Descendants() + if err != nil { + return nil, fmt.Errorf("could not get descendants of finalized block: %w", err) + } + + unexecuted := append(unexecutedFinalized, pendings...) + + e.log.Info(). + Uint64("last_finalized", final.Height). + Uint64("last_finalized_executed", lastExecuted). + // Uint64("sealed_root_height", rootBlock.Height). + // Hex("sealed_root_id", logging.Entity(rootBlock)). + Int("total_finalized_unexecuted", len(unexecutedFinalized)). + Int("total_unexecuted", len(unexecuted)). + Msgf("finalized unexecuted blocks") + + return unexecuted, nil +} + +// if the EN is dynamically bootstrapped, the finalized blocks at height range: +// [ sealedRoot.Height, finalizedRoot.Height - 1] can not be retrieved from +// protocol state, but only from headers +func (e *UnfinalizedLoader) getHeaderByHeight(height uint64) (*flow.Header, error) { + // we don't use protocol state because for dynamic boostrapped execution node + // the last executed and sealed block is below the finalized root block + return e.headers.ByHeight(height) +} diff --git a/engine/execution/ingestion/loader/unfinalized_loader_test.go b/engine/execution/ingestion/loader/unfinalized_loader_test.go new file mode 100644 index 00000000000..3c8b84aed40 --- /dev/null +++ b/engine/execution/ingestion/loader/unfinalized_loader_test.go @@ -0,0 +1,55 @@ +package loader_test + +import ( + "context" + "testing" + + "github.com/stretchr/testify/require" + + "github.com/onflow/flow-go/engine/execution/ingestion" + "github.com/onflow/flow-go/engine/execution/ingestion/loader" + stateMock "github.com/onflow/flow-go/engine/execution/state/mock" + "github.com/onflow/flow-go/model/flow" + storage "github.com/onflow/flow-go/storage/mock" + "github.com/onflow/flow-go/utils/unittest" + "github.com/onflow/flow-go/utils/unittest/mocks" +) + +var _ ingestion.BlockLoader = (*loader.UnfinalizedLoader)(nil) + +func TestLoadingUnfinalizedBlocks(t *testing.T) { + ps := mocks.NewProtocolState() + + // Genesis <- A <- B <- C (finalized) <- D + chain, result, seal := unittest.ChainFixture(5) + genesis, blockA, blockB, blockC, blockD := + chain[0], chain[1], chain[2], chain[3], chain[4] + + logChain(chain) + + require.NoError(t, ps.Bootstrap(genesis, result, seal)) + require.NoError(t, ps.Extend(blockA)) + require.NoError(t, ps.Extend(blockB)) + require.NoError(t, ps.Extend(blockC)) + require.NoError(t, ps.Extend(blockD)) + require.NoError(t, ps.Finalize(blockC.ID())) + + es := new(stateMock.FinalizedExecutionState) + es.On("GetHighestFinalizedExecuted").Return(genesis.Header.Height) + headers := new(storage.Headers) + headers.On("ByHeight", blockA.Header.Height).Return(blockA.Header, nil) + headers.On("ByHeight", blockB.Header.Height).Return(blockB.Header, nil) + headers.On("ByHeight", blockC.Header.Height).Return(blockC.Header, nil) + + loader := loader.NewUnfinalizedLoader(unittest.Logger(), ps, headers, es) + + unexecuted, err := loader.LoadUnexecuted(context.Background()) + require.NoError(t, err) + + unittest.IDsEqual(t, []flow.Identifier{ + blockA.ID(), + blockB.ID(), + blockC.ID(), + blockD.ID(), + }, unexecuted) +} diff --git a/engine/execution/ingestion/stop/stop_control.go b/engine/execution/ingestion/stop/stop_control.go index 94ec0781191..bb14e8905d5 100644 --- a/engine/execution/ingestion/stop/stop_control.go +++ b/engine/execution/ingestion/stop/stop_control.go @@ -493,7 +493,7 @@ func (s *StopControl) blockFinalized( Msgf("Found ID of the block that should be executed last") // check if the parent block has been executed then stop right away - executed, err := state.IsBlockExecuted(ctx, s.exeState, h.ParentID) + executed, err := state.IsParentExecuted(s.exeState, h) if err != nil { handleErr(fmt.Errorf( "failed to check if the block has been executed: %w", diff --git a/engine/execution/ingestion/stop/stop_control_test.go b/engine/execution/ingestion/stop/stop_control_test.go index 829a1f65a0f..6698c3cc7b8 100644 --- a/engine/execution/ingestion/stop/stop_control_test.go +++ b/engine/execution/ingestion/stop/stop_control_test.go @@ -14,7 +14,6 @@ import ( "github.com/onflow/flow-go/engine/execution/state/mock" "github.com/onflow/flow-go/model/flow" "github.com/onflow/flow-go/module/irrecoverable" - "github.com/onflow/flow-go/storage" storageMock "github.com/onflow/flow-go/storage/mock" "github.com/onflow/flow-go/utils/unittest" ) @@ -92,7 +91,7 @@ func TestCannotSetNewValuesAfterStoppingCommenced(t *testing.T) { require.Equal(t, stop, sc.GetStopParameters()) // make execution check pretends block has been executed - execState.On("StateCommitmentByBlockID", testifyMock.Anything, testifyMock.Anything).Return(nil, nil) + execState.On("IsBlockExecuted", testifyMock.Anything, testifyMock.Anything).Return(true, nil) // no stopping has started yet, block below stop height header := unittest.BlockHeaderFixture(unittest.WithHeaderHeight(20)) @@ -145,9 +144,7 @@ func TestExecutionFallingBehind(t *testing.T) { require.NoError(t, err) require.Equal(t, stop, sc.GetStopParameters()) - execState. - On("StateCommitmentByBlockID", testifyMock.Anything, headerC.ParentID). - Return(nil, storage.ErrNotFound) + execState.On("IsBlockExecuted", headerC.Height-1, headerC.ParentID).Return(false, nil) // finalize blocks first sc.BlockFinalizedForTesting(headerA) @@ -214,9 +211,7 @@ func TestAddStopForPastBlocks(t *testing.T) { sc.OnBlockExecuted(headerC) // block is executed - execState. - On("StateCommitmentByBlockID", testifyMock.Anything, headerD.ParentID). - Return(nil, nil) + execState.On("IsBlockExecuted", headerD.Height-1, headerD.ParentID).Return(true, nil) // set stop at 22, but finalization and execution is at 23 // so stop right away @@ -261,9 +256,7 @@ func TestAddStopForPastBlocksExecutionFallingBehind(t *testing.T) { false, ) - execState. - On("StateCommitmentByBlockID", testifyMock.Anything, headerD.ParentID). - Return(nil, storage.ErrNotFound) + execState.On("IsBlockExecuted", headerD.Height-1, headerD.ParentID).Return(false, nil) // finalize blocks first sc.BlockFinalizedForTesting(headerA) @@ -317,9 +310,7 @@ func TestStopControlWithVersionControl(t *testing.T) { ) // setting this means all finalized blocks are considered already executed - execState. - On("StateCommitmentByBlockID", testifyMock.Anything, headerC.ParentID). - Return(nil, nil) + execState.On("IsBlockExecuted", headerC.Height-1, headerC.ParentID).Return(true, nil) versionBeacons. On("Highest", testifyMock.Anything). @@ -741,12 +732,8 @@ func Test_StopControlWorkers(t *testing.T) { Once() execState := mock.NewExecutionState(t) - execState.On( - "StateCommitmentByBlockID", - testifyMock.Anything, - headerA.ID(), - ).Return(flow.StateCommitment{}, nil). - Once() + + execState.On("IsBlockExecuted", headerA.Height, headerA.ID()).Return(true, nil).Once() headers := &stopControlMockHeaders{ headers: map[uint64]*flow.Header{ @@ -817,12 +804,7 @@ func Test_StopControlWorkers(t *testing.T) { Once() execState := mock.NewExecutionState(t) - execState.On( - "StateCommitmentByBlockID", - testifyMock.Anything, - headerB.ID(), - ).Return(flow.StateCommitment{}, nil). - Once() + execState.On("IsBlockExecuted", headerB.Height, headerB.ID()).Return(true, nil).Once() headers := &stopControlMockHeaders{ headers: map[uint64]*flow.Header{ diff --git a/engine/execution/mock/executed_finalized_wal.go b/engine/execution/mock/executed_finalized_wal.go index faccfaec0cb..321467c9b49 100644 --- a/engine/execution/mock/executed_finalized_wal.go +++ b/engine/execution/mock/executed_finalized_wal.go @@ -15,11 +15,11 @@ type ExecutedFinalizedWAL struct { } // Append provides a mock function with given fields: height, registers -func (_m *ExecutedFinalizedWAL) Append(height uint64, registers []flow.RegisterEntry) error { +func (_m *ExecutedFinalizedWAL) Append(height uint64, registers flow.RegisterEntries) error { ret := _m.Called(height, registers) var r0 error - if rf, ok := ret.Get(0).(func(uint64, []flow.RegisterEntry) error); ok { + if rf, ok := ret.Get(0).(func(uint64, flow.RegisterEntries) error); ok { r0 = rf(height, registers) } else { r0 = ret.Error(0) diff --git a/engine/execution/mock/extendable_storage_snapshot.go b/engine/execution/mock/extendable_storage_snapshot.go new file mode 100644 index 00000000000..6b65c7ca52f --- /dev/null +++ b/engine/execution/mock/extendable_storage_snapshot.go @@ -0,0 +1,88 @@ +// Code generated by mockery v2.21.4. DO NOT EDIT. + +package mock + +import ( + execution "github.com/onflow/flow-go/engine/execution" + flow "github.com/onflow/flow-go/model/flow" + + mock "github.com/stretchr/testify/mock" +) + +// ExtendableStorageSnapshot is an autogenerated mock type for the ExtendableStorageSnapshot type +type ExtendableStorageSnapshot struct { + mock.Mock +} + +// Commitment provides a mock function with given fields: +func (_m *ExtendableStorageSnapshot) Commitment() flow.StateCommitment { + ret := _m.Called() + + var r0 flow.StateCommitment + if rf, ok := ret.Get(0).(func() flow.StateCommitment); ok { + r0 = rf() + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).(flow.StateCommitment) + } + } + + return r0 +} + +// Extend provides a mock function with given fields: newCommit, updatedRegisters +func (_m *ExtendableStorageSnapshot) Extend(newCommit flow.StateCommitment, updatedRegisters map[flow.RegisterID][]byte) execution.ExtendableStorageSnapshot { + ret := _m.Called(newCommit, updatedRegisters) + + var r0 execution.ExtendableStorageSnapshot + if rf, ok := ret.Get(0).(func(flow.StateCommitment, map[flow.RegisterID][]byte) execution.ExtendableStorageSnapshot); ok { + r0 = rf(newCommit, updatedRegisters) + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).(execution.ExtendableStorageSnapshot) + } + } + + return r0 +} + +// Get provides a mock function with given fields: id +func (_m *ExtendableStorageSnapshot) Get(id flow.RegisterID) ([]byte, error) { + ret := _m.Called(id) + + var r0 []byte + var r1 error + if rf, ok := ret.Get(0).(func(flow.RegisterID) ([]byte, error)); ok { + return rf(id) + } + if rf, ok := ret.Get(0).(func(flow.RegisterID) []byte); ok { + r0 = rf(id) + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).([]byte) + } + } + + if rf, ok := ret.Get(1).(func(flow.RegisterID) error); ok { + r1 = rf(id) + } else { + r1 = ret.Error(1) + } + + return r0, r1 +} + +type mockConstructorTestingTNewExtendableStorageSnapshot interface { + mock.TestingT + Cleanup(func()) +} + +// NewExtendableStorageSnapshot creates a new instance of ExtendableStorageSnapshot. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations. +func NewExtendableStorageSnapshot(t mockConstructorTestingTNewExtendableStorageSnapshot) *ExtendableStorageSnapshot { + mock := &ExtendableStorageSnapshot{} + mock.Mock.Test(t) + + t.Cleanup(func() { mock.AssertExpectations(t) }) + + return mock +} diff --git a/engine/execution/mock/in_memory_register_store.go b/engine/execution/mock/in_memory_register_store.go index 606bbe32b05..561f39c8a4d 100644 --- a/engine/execution/mock/in_memory_register_store.go +++ b/engine/execution/mock/in_memory_register_store.go @@ -117,11 +117,11 @@ func (_m *InMemoryRegisterStore) PrunedHeight() uint64 { } // SaveRegisters provides a mock function with given fields: height, blockID, parentID, registers -func (_m *InMemoryRegisterStore) SaveRegisters(height uint64, blockID flow.Identifier, parentID flow.Identifier, registers []flow.RegisterEntry) error { +func (_m *InMemoryRegisterStore) SaveRegisters(height uint64, blockID flow.Identifier, parentID flow.Identifier, registers flow.RegisterEntries) error { ret := _m.Called(height, blockID, parentID, registers) var r0 error - if rf, ok := ret.Get(0).(func(uint64, flow.Identifier, flow.Identifier, []flow.RegisterEntry) error); ok { + if rf, ok := ret.Get(0).(func(uint64, flow.Identifier, flow.Identifier, flow.RegisterEntries) error); ok { r0 = rf(height, blockID, parentID, registers) } else { r0 = ret.Error(0) diff --git a/engine/execution/mock/register_store.go b/engine/execution/mock/register_store.go index 1e73de34a02..e2bd3dba400 100644 --- a/engine/execution/mock/register_store.go +++ b/engine/execution/mock/register_store.go @@ -91,11 +91,11 @@ func (_m *RegisterStore) OnBlockFinalized() error { } // SaveRegisters provides a mock function with given fields: header, registers -func (_m *RegisterStore) SaveRegisters(header *flow.Header, registers []flow.RegisterEntry) error { +func (_m *RegisterStore) SaveRegisters(header *flow.Header, registers flow.RegisterEntries) error { ret := _m.Called(header, registers) var r0 error - if rf, ok := ret.Get(0).(func(*flow.Header, []flow.RegisterEntry) error); ok { + if rf, ok := ret.Get(0).(func(*flow.Header, flow.RegisterEntries) error); ok { r0 = rf(header, registers) } else { r0 = ret.Error(0) diff --git a/engine/execution/mock/wal_reader.go b/engine/execution/mock/wal_reader.go index eb00b4643ef..f9917c8b520 100644 --- a/engine/execution/mock/wal_reader.go +++ b/engine/execution/mock/wal_reader.go @@ -13,13 +13,13 @@ type WALReader struct { } // Next provides a mock function with given fields: -func (_m *WALReader) Next() (uint64, []flow.RegisterEntry, error) { +func (_m *WALReader) Next() (uint64, flow.RegisterEntries, error) { ret := _m.Called() var r0 uint64 - var r1 []flow.RegisterEntry + var r1 flow.RegisterEntries var r2 error - if rf, ok := ret.Get(0).(func() (uint64, []flow.RegisterEntry, error)); ok { + if rf, ok := ret.Get(0).(func() (uint64, flow.RegisterEntries, error)); ok { return rf() } if rf, ok := ret.Get(0).(func() uint64); ok { @@ -28,11 +28,11 @@ func (_m *WALReader) Next() (uint64, []flow.RegisterEntry, error) { r0 = ret.Get(0).(uint64) } - if rf, ok := ret.Get(1).(func() []flow.RegisterEntry); ok { + if rf, ok := ret.Get(1).(func() flow.RegisterEntries); ok { r1 = rf() } else { if ret.Get(1) != nil { - r1 = ret.Get(1).([]flow.RegisterEntry) + r1 = ret.Get(1).(flow.RegisterEntries) } } diff --git a/engine/execution/rpc/engine.go b/engine/execution/rpc/engine.go index 4a0745fc3e1..a1015cc18e6 100644 --- a/engine/execution/rpc/engine.go +++ b/engine/execution/rpc/engine.go @@ -26,7 +26,7 @@ import ( "github.com/onflow/flow-go/engine/common/rpc" "github.com/onflow/flow-go/engine/common/rpc/convert" exeEng "github.com/onflow/flow-go/engine/execution" - "github.com/onflow/flow-go/engine/execution/scripts" + "github.com/onflow/flow-go/engine/execution/state" fvmerrors "github.com/onflow/flow-go/fvm/errors" "github.com/onflow/flow-go/model/flow" "github.com/onflow/flow-go/state/protocol" @@ -169,7 +169,7 @@ type handler struct { maxBlockRange int } -var _ execution.ExecutionAPIServer = &handler{} +var _ execution.ExecutionAPIServer = (*handler)(nil) // Ping responds to requests when the server is up. func (h *handler) Ping( @@ -497,6 +497,164 @@ func (h *handler) GetTransactionResultsByBlockID( }, nil } +// GetTransactionErrorMessage implements a grpc handler for getting a transaction error message by block ID and tx ID. +// Expected error codes during normal operations: +// - codes.InvalidArgument - invalid blockID, tx ID. +// - codes.NotFound - transaction result by tx ID not found. +func (h *handler) GetTransactionErrorMessage( + _ context.Context, + req *execution.GetTransactionErrorMessageRequest, +) (*execution.GetTransactionErrorMessageResponse, error) { + reqBlockID := req.GetBlockId() + blockID, err := convert.BlockID(reqBlockID) + if err != nil { + return nil, status.Errorf(codes.InvalidArgument, "invalid blockID: %v", err) + } + + reqTxID := req.GetTransactionId() + txID, err := convert.TransactionID(reqTxID) + if err != nil { + return nil, status.Errorf(codes.InvalidArgument, "invalid transactionID: %v", err) + } + + // lookup any transaction error that might have occurred + txResult, err := h.transactionResults.ByBlockIDTransactionID(blockID, txID) + if err != nil { + if errors.Is(err, storage.ErrNotFound) { + return nil, status.Error(codes.NotFound, "transaction result not found") + } + + return nil, status.Errorf(codes.Internal, "failed to get transaction result: %v", err) + } + + result := &execution.GetTransactionErrorMessageResponse{ + TransactionId: convert.IdentifierToMessage(txResult.TransactionID), + } + + if len(txResult.ErrorMessage) > 0 { + cadenceErrMessage := txResult.ErrorMessage + if !utf8.ValidString(cadenceErrMessage) { + h.log.Warn(). + Str("block_id", blockID.String()). + Str("transaction_id", txID.String()). + Str("error_mgs", fmt.Sprintf("%q", cadenceErrMessage)). + Msg("invalid character in Cadence error message") + // convert non UTF-8 string to a UTF-8 string for safe GRPC marshaling + cadenceErrMessage = strings.ToValidUTF8(txResult.ErrorMessage, "?") + } + result.ErrorMessage = cadenceErrMessage + } + return result, nil +} + +// GetTransactionErrorMessageByIndex implements a grpc handler for getting a transaction error message by block ID and tx index. +// Expected error codes during normal operations: +// - codes.InvalidArgument - invalid blockID. +// - codes.NotFound - transaction result at index not found. +func (h *handler) GetTransactionErrorMessageByIndex( + _ context.Context, + req *execution.GetTransactionErrorMessageByIndexRequest, +) (*execution.GetTransactionErrorMessageResponse, error) { + reqBlockID := req.GetBlockId() + blockID, err := convert.BlockID(reqBlockID) + if err != nil { + return nil, status.Errorf(codes.InvalidArgument, "invalid blockID: %v", err) + } + + index := req.GetIndex() + + // lookup any transaction error that might have occurred + txResult, err := h.transactionResults.ByBlockIDTransactionIndex(blockID, index) + if err != nil { + if errors.Is(err, storage.ErrNotFound) { + return nil, status.Error(codes.NotFound, "transaction result not found") + } + + return nil, status.Errorf(codes.Internal, "failed to get transaction result: %v", err) + } + + result := &execution.GetTransactionErrorMessageResponse{ + TransactionId: convert.IdentifierToMessage(txResult.TransactionID), + } + + if len(txResult.ErrorMessage) > 0 { + cadenceErrMessage := txResult.ErrorMessage + if !utf8.ValidString(cadenceErrMessage) { + h.log.Warn(). + Str("block_id", blockID.String()). + Str("transaction_id", txResult.TransactionID.String()). + Str("error_mgs", fmt.Sprintf("%q", cadenceErrMessage)). + Msg("invalid character in Cadence error message") + // convert non UTF-8 string to a UTF-8 string for safe GRPC marshaling + cadenceErrMessage = strings.ToValidUTF8(txResult.ErrorMessage, "?") + } + result.ErrorMessage = cadenceErrMessage + } + return result, nil +} + +// GetTransactionErrorMessagesByBlockID implements a grpc handler for getting transaction error messages by block ID. +// Only failed transactions will be returned. +// Expected error codes during normal operations: +// - codes.InvalidArgument - invalid blockID. +// - codes.NotFound - block was not executed or was pruned. +func (h *handler) GetTransactionErrorMessagesByBlockID( + _ context.Context, + req *execution.GetTransactionErrorMessagesByBlockIDRequest, +) (*execution.GetTransactionErrorMessagesResponse, error) { + reqBlockID := req.GetBlockId() + blockID, err := convert.BlockID(reqBlockID) + if err != nil { + return nil, status.Errorf(codes.InvalidArgument, "invalid blockID: %v", err) + } + + // must verify block was locally executed first since transactionResults.ByBlockID will return + // an empty slice if block does not exist + if _, err = h.commits.ByBlockID(blockID); err != nil { + if errors.Is(err, storage.ErrNotFound) { + return nil, status.Errorf(codes.NotFound, "block %s has not been executed by node or was pruned", blockID) + } + return nil, status.Errorf(codes.Internal, "state commitment for block ID %s could not be retrieved", blockID) + } + + // Get all tx results + txResults, err := h.transactionResults.ByBlockID(blockID) + if err != nil { + if errors.Is(err, storage.ErrNotFound) { + return nil, status.Error(codes.NotFound, "transaction results not found") + } + + return nil, status.Errorf(codes.Internal, "failed to get transaction results: %v", err) + } + + var results []*execution.GetTransactionErrorMessagesResponse_Result + for index, txResult := range txResults { + if len(txResult.ErrorMessage) == 0 { + continue + } + txIndex := uint32(index) + cadenceErrMessage := txResult.ErrorMessage + if !utf8.ValidString(cadenceErrMessage) { + h.log.Warn(). + Str("block_id", blockID.String()). + Uint32("index", txIndex). + Str("error_mgs", fmt.Sprintf("%q", cadenceErrMessage)). + Msg("invalid character in Cadence error message") + // convert non UTF-8 string to a UTF-8 string for safe GRPC marshaling + cadenceErrMessage = strings.ToValidUTF8(txResult.ErrorMessage, "?") + } + results = append(results, &execution.GetTransactionErrorMessagesResponse_Result{ + TransactionId: convert.IdentifierToMessage(txResult.TransactionID), + Index: txIndex, + ErrorMessage: cadenceErrMessage, + }) + } + + return &execution.GetTransactionErrorMessagesResponse{ + Results: results, + }, nil +} + // eventResult creates EventsResponse_Result from flow.Event for the given blockID func (h *handler) eventResult( blockID flow.Identifier, @@ -545,11 +703,14 @@ func (h *handler) GetAccountAtBlockID( value, err := h.engine.GetAccount(ctx, flowAddress, blockFlowID) if err != nil { - if errors.Is(err, scripts.ErrStateCommitmentPruned) { + if errors.Is(err, state.ErrExecutionStatePruned) { return nil, status.Errorf(codes.OutOfRange, "state for block ID %s not available", blockFlowID) } + if errors.Is(err, state.ErrNotExecuted) { + return nil, status.Errorf(codes.NotFound, "block %s has not been executed by node or was pruned", blockFlowID) + } if errors.Is(err, storage.ErrNotFound) { - return nil, status.Errorf(codes.NotFound, "account with address %s not found", flowAddress) + return nil, status.Errorf(codes.NotFound, "block %s not found", blockFlowID) } if fvmerrors.IsAccountNotFoundError(err) { return nil, status.Errorf(codes.NotFound, "account not found") diff --git a/engine/execution/rpc/engine_test.go b/engine/execution/rpc/engine_test.go index 6aec7283fb4..d2f3913123a 100644 --- a/engine/execution/rpc/engine_test.go +++ b/engine/execution/rpc/engine_test.go @@ -20,7 +20,7 @@ import ( "github.com/onflow/flow-go/engine/common/rpc/convert" mockEng "github.com/onflow/flow-go/engine/execution/mock" - "github.com/onflow/flow-go/engine/execution/scripts" + "github.com/onflow/flow-go/engine/execution/state" "github.com/onflow/flow-go/model/flow" realstorage "github.com/onflow/flow-go/storage" storage "github.com/onflow/flow-go/storage/mock" @@ -335,7 +335,7 @@ func (suite *Suite) TestGetAccountAtBlockID() { "this error usually happens if the reference "+ "block for this script is not set to a recent block.", id, - scripts.ErrStateCommitmentPruned, + state.ErrExecutionStatePruned, unittest.IdentifierFixture(), ) @@ -651,7 +651,28 @@ func (suite *Suite) TestGetTransactionResult() { // expect a storage call for the invalid tx ID but return an error txResults := storage.NewTransactionResults(suite.T()) - txResults.On("ByBlockIDTransactionID", bID, wrongTxID).Return(nil, status.Error(codes.Internal, "")).Once() + txResults.On("ByBlockIDTransactionID", bID, wrongTxID).Return(nil, realstorage.ErrNotFound).Once() + + handler := createHandler(txResults) + + _, err := handler.GetTransactionResult(context.Background(), req) + + // check that an error was received + suite.Require().Error(err) + errors.Is(err, status.Error(codes.NotFound, "")) + }) + + // failure path - non-existent transaction ID in request results in an exception + suite.Run("request with non-existent transaction ID, exception", func() { + + wrongTxID := unittest.IdentifierFixture() + + // create an API request with the invalid transaction ID + req := concoctReq(bID[:], wrongTxID[:]) + + // expect a storage call for the invalid tx ID but return an exception + txResults := storage.NewTransactionResults(suite.T()) + txResults.On("ByBlockIDTransactionID", bID, wrongTxID).Return(nil, errors.New("internal-error")).Once() handler := createHandler(txResults) @@ -672,7 +693,28 @@ func (suite *Suite) TestGetTransactionResult() { // expect a storage call for the invalid tx ID but return an error txResults := storage.NewTransactionResults(suite.T()) - txResults.On("ByBlockIDTransactionIndex", bID, wrongTxIndex).Return(nil, status.Error(codes.Internal, "")).Once() + txResults.On("ByBlockIDTransactionIndex", bID, wrongTxIndex).Return(nil, realstorage.ErrNotFound).Once() + + handler := createHandler(txResults) + + _, err := handler.GetTransactionResultByIndex(context.Background(), req) + + // check that an error was received + suite.Require().Error(err) + errors.Is(err, status.Error(codes.NotFound, "")) + }) + + // failure path - non-existent transaction index in request results in an exception + suite.Run("request with non-existent transaction index, exception", func() { + + wrongTxIndex := txIndex + 1 + + // create an API request with the invalid transaction ID + req := concoctIndexReq(bID[:], wrongTxIndex) + + // expect a storage call for the invalid tx ID but return an exception + txResults := storage.NewTransactionResults(suite.T()) + txResults.On("ByBlockIDTransactionIndex", bID, wrongTxIndex).Return(nil, errors.New("internal-error")).Once() handler := createHandler(txResults) @@ -888,3 +930,453 @@ func (suite *Suite) TestGetTransactionResultsByBlockID() { errors.Is(err, status.Error(codes.NotFound, "")) }) } + +// TestGetTransactionErrorMessage tests the GetTransactionErrorMessage and GetTransactionErrorMessageByIndex API calls +func (suite *Suite) TestGetTransactionErrorMessage() { + block := unittest.BlockFixture() + tx := unittest.TransactionFixture() + bID := block.ID() + txID := tx.ID() + txIndex := rand.Uint32() + + // create the handler + createHandler := func(txResults *storage.TransactionResults) *handler { + handler := &handler{ + headers: suite.headers, + events: suite.events, + transactionResults: txResults, + commits: suite.commits, + chain: flow.Mainnet, + } + return handler + } + + // concoctReq creates a GetTransactionErrorMessageRequest + concoctReq := func(bID []byte, tID []byte) *execution.GetTransactionErrorMessageRequest { + return &execution.GetTransactionErrorMessageRequest{ + BlockId: bID, + TransactionId: tID, + } + } + + // concoctIndexReq creates a GetTransactionErrorMessageByIndexRequest + concoctIndexReq := func(bID []byte, tIndex uint32) *execution.GetTransactionErrorMessageByIndexRequest { + return &execution.GetTransactionErrorMessageByIndexRequest{ + BlockId: bID, + Index: tIndex, + } + } + + suite.Run("happy path - by tx id - no transaction error", func() { + + // create the expected result + expectedResult := &execution.GetTransactionErrorMessageResponse{ + TransactionId: convert.IdentifierToMessage(txID), + ErrorMessage: "", + } + + // expect a call to lookup transaction result by block ID and transaction ID, return a result with no error + txResults := storage.NewTransactionResults(suite.T()) + txResult := flow.TransactionResult{ + TransactionID: txID, + ErrorMessage: "", + } + txResults.On("ByBlockIDTransactionID", bID, txID).Return(&txResult, nil).Once() + + handler := createHandler(txResults) + + // create a valid API request + req := concoctReq(bID[:], txID[:]) + + // execute the GetTransactionErrorMessage call + actualResult, err := handler.GetTransactionErrorMessage(context.Background(), req) + + // check that a successful response is received + suite.Require().NoError(err) + + // check that all fields in response are as expected + suite.Equal(expectedResult, actualResult) + }) + + suite.Run("happy path - at index - no transaction error", func() { + + // create the expected result + expectedResult := &execution.GetTransactionErrorMessageResponse{ + TransactionId: convert.IdentifierToMessage(txID), + ErrorMessage: "", + } + + // expect a call to lookup transaction result by block ID and transaction ID, return a result with no error + txResults := storage.NewTransactionResults(suite.T()) + txResult := flow.TransactionResult{ + TransactionID: txID, + ErrorMessage: "", + } + txResults.On("ByBlockIDTransactionIndex", bID, txIndex).Return(&txResult, nil).Once() + + handler := createHandler(txResults) + + // create a valid API request + req := concoctIndexReq(bID[:], txIndex) + + // execute the GetTransactionResult call + actualResult, err := handler.GetTransactionErrorMessageByIndex(context.Background(), req) + + // check that a successful response is received + suite.Require().NoError(err) + + // check that all fields in response are as expected + suite.Equal(expectedResult, actualResult) + }) + + suite.Run("happy path - by tx id - transaction error", func() { + + // create the expected result + expectedResult := &execution.GetTransactionErrorMessageResponse{ + TransactionId: convert.IdentifierToMessage(txID), + ErrorMessage: "runtime error", + } + + // setup the storage to return a transaction error + txResults := storage.NewTransactionResults(suite.T()) + txResult := flow.TransactionResult{ + TransactionID: txID, + ErrorMessage: "runtime error", + } + txResults.On("ByBlockIDTransactionID", bID, txID).Return(&txResult, nil).Once() + + handler := createHandler(txResults) + + // create a valid API request + req := concoctReq(bID[:], txID[:]) + + // execute the GetTransactionErrorMessage call + actualResult, err := handler.GetTransactionErrorMessage(context.Background(), req) + + // check that a successful response is received + suite.Require().NoError(err) + + // check that all fields in response are as expected + suite.Equal(expectedResult, actualResult) + }) + + suite.Run("happy path - at index - transaction error", func() { + + // create the expected result + expectedResult := &execution.GetTransactionErrorMessageResponse{ + TransactionId: convert.IdentifierToMessage(txID), + ErrorMessage: "runtime error", + } + + // setup the storage to return a transaction error + txResults := storage.NewTransactionResults(suite.T()) + txResult := flow.TransactionResult{ + TransactionID: txID, + ErrorMessage: "runtime error", + } + txResults.On("ByBlockIDTransactionIndex", bID, txIndex).Return(&txResult, nil).Once() + + handler := createHandler(txResults) + + // create a valid API request + req := concoctIndexReq(bID[:], txIndex) + + // execute the GetTransactionErrorMessageByIndex call + actualResult, err := handler.GetTransactionErrorMessageByIndex(context.Background(), req) + + // check that a successful response is received + suite.Require().NoError(err) + + // check that all fields in response are as expected + suite.Equal(expectedResult, actualResult) + }) + + // failure path - nil transaction ID in the request results in an error + suite.Run("request with nil tx ID", func() { + + // create an API request with transaction ID as nil + req := concoctReq(bID[:], nil) + + txResults := storage.NewTransactionResults(suite.T()) + handler := createHandler(txResults) + + _, err := handler.GetTransactionErrorMessage(context.Background(), req) + + // check that an error was received + suite.Require().Error(err) + errors.Is(err, status.Error(codes.InvalidArgument, "")) + }) + + // failure path - nil block id in the request results in an error + suite.Run("request with nil block ID", func() { + + // create an API request with a nil block id + req := concoctReq(nil, txID[:]) + + txResults := storage.NewTransactionResults(suite.T()) + handler := createHandler(txResults) + + _, err := handler.GetTransactionErrorMessage(context.Background(), req) + + // check that an error was received + suite.Require().Error(err) + errors.Is(err, status.Error(codes.InvalidArgument, "")) + }) + + // failure path - nil block id in the index request results in an error + suite.Run("index request with nil block ID", func() { + + // create an API request with a nil block id + req := concoctIndexReq(nil, txIndex) + + txResults := storage.NewTransactionResults(suite.T()) + handler := createHandler(txResults) + + _, err := handler.GetTransactionErrorMessageByIndex(context.Background(), req) + + // check that an error was received + suite.Require().Error(err) + errors.Is(err, status.Error(codes.InvalidArgument, "")) + }) + + // failure path - non-existent transaction ID in request results in an error + suite.Run("request with non-existent transaction ID", func() { + + wrongTxID := unittest.IdentifierFixture() + + // create an API request with the invalid transaction ID + req := concoctReq(bID[:], wrongTxID[:]) + + // expect a storage call for the invalid tx ID but return an error + txResults := storage.NewTransactionResults(suite.T()) + txResults.On("ByBlockIDTransactionID", bID, wrongTxID).Return(nil, realstorage.ErrNotFound).Once() + + handler := createHandler(txResults) + + _, err := handler.GetTransactionErrorMessage(context.Background(), req) + + // check that an error was received + suite.Require().Error(err) + errors.Is(err, status.Error(codes.NotFound, "")) + }) + + // failure path - non-existent transaction ID in request results in an exception + suite.Run("request with non-existent transaction ID, exception", func() { + + wrongTxID := unittest.IdentifierFixture() + + // create an API request with the invalid transaction ID + req := concoctReq(bID[:], wrongTxID[:]) + + // expect a storage call for the invalid tx ID but return an exception + txResults := storage.NewTransactionResults(suite.T()) + txResults.On("ByBlockIDTransactionID", bID, wrongTxID).Return(nil, errors.New("internal-error")).Once() + + handler := createHandler(txResults) + + _, err := handler.GetTransactionErrorMessage(context.Background(), req) + + // check that an error was received + suite.Require().Error(err) + errors.Is(err, status.Error(codes.Internal, "")) + }) + + // failure path - non-existent transaction index in request results in an error + suite.Run("request with non-existent transaction index", func() { + + wrongTxIndex := txIndex + 1 + + // create an API request with the invalid transaction ID + req := concoctIndexReq(bID[:], wrongTxIndex) + + // expect a storage call for the invalid tx ID but return an error + txResults := storage.NewTransactionResults(suite.T()) + txResults.On("ByBlockIDTransactionIndex", bID, wrongTxIndex).Return(nil, realstorage.ErrNotFound).Once() + + handler := createHandler(txResults) + + _, err := handler.GetTransactionErrorMessageByIndex(context.Background(), req) + + // check that an error was received + suite.Require().Error(err) + errors.Is(err, status.Error(codes.NotFound, "")) + }) + + // failure path - non-existent transaction index in request results in an exception + suite.Run("request with non-existent transaction index, exception", func() { + + wrongTxIndex := txIndex + 1 + + // create an API request with the invalid transaction ID + req := concoctIndexReq(bID[:], wrongTxIndex) + + // expect a storage call for the invalid tx ID but return an exception + txResults := storage.NewTransactionResults(suite.T()) + txResults.On("ByBlockIDTransactionIndex", bID, wrongTxIndex).Return(nil, errors.New("internal-error")).Once() + + handler := createHandler(txResults) + + _, err := handler.GetTransactionErrorMessageByIndex(context.Background(), req) + + // check that an error was received + suite.Require().Error(err) + errors.Is(err, status.Error(codes.Internal, "")) + }) +} + +// TestGetTransactionErrorMessagesByBlockID tests GetTransactionErrorMessagesByBlockID API calls +func (suite *Suite) TestGetTransactionErrorMessagesByBlockID() { + block := unittest.BlockFixture() + tx := unittest.TransactionFixture() + bID := block.ID() + nonexistingBlockID := unittest.IdentifierFixture() + tx1ID := tx.ID() + tx2ID := tx.ID() + tx3ID := tx.ID() + + // create the handler + createHandler := func(txResults *storage.TransactionResults) *handler { + handler := &handler{ + headers: suite.headers, + events: suite.events, + transactionResults: txResults, + commits: suite.commits, + chain: flow.Mainnet, + } + return handler + } + + // concoctReq creates a GetTransactionErrorMessagesByBlockIDRequest + concoctReq := func(bID []byte) *execution.GetTransactionErrorMessagesByBlockIDRequest { + return &execution.GetTransactionErrorMessagesByBlockIDRequest{ + BlockId: bID, + } + } + + // happy path - if no transaction errors are found, an empty list is returned + suite.Run("happy path with no transaction error", func() { + suite.commits.On("ByBlockID", bID).Return(nil, nil).Once() + + // create the expected result + expectedResult := &execution.GetTransactionErrorMessagesResponse{ + Results: []*execution.GetTransactionErrorMessagesResponse_Result{}, + } + + // expect a call to lookup transaction result by block ID return a result with no error + txResultsMock := storage.NewTransactionResults(suite.T()) + txResults := []flow.TransactionResult{ + { + TransactionID: tx1ID, + ErrorMessage: "", + }, + { + TransactionID: tx2ID, + ErrorMessage: "", + }, + } + txResultsMock.On("ByBlockID", bID).Return(txResults, nil).Once() + + handler := createHandler(txResultsMock) + + // create a valid API request + req := concoctReq(bID[:]) + + // execute the GetTransactionErrorMessagesByBlockID call + actualResult, err := handler.GetTransactionErrorMessagesByBlockID(context.Background(), req) + + // check that a successful response is received + suite.Require().NoError(err) + + // check that all fields in response are as expected + suite.Assert().ElementsMatch(expectedResult.Results, actualResult.Results) + }) + + // happy path - valid requests receives error messages for all failed transactions. + suite.Run("happy path with transaction errors", func() { + + suite.commits.On("ByBlockID", bID).Return(nil, nil).Once() + + // create the expected result + expectedResult := &execution.GetTransactionErrorMessagesResponse{ + Results: []*execution.GetTransactionErrorMessagesResponse_Result{ + { + TransactionId: convert.IdentifierToMessage(tx2ID), + Index: 1, + ErrorMessage: "runtime error", + }, + { + TransactionId: convert.IdentifierToMessage(tx3ID), + Index: 2, + ErrorMessage: "runtime error", + }, + }, + } + + // expect a call to lookup transaction result by block ID return a result with no error + txResultsMock := storage.NewTransactionResults(suite.T()) + txResults := []flow.TransactionResult{ + { + TransactionID: tx1ID, + ErrorMessage: "", + }, + { + TransactionID: tx2ID, + ErrorMessage: "runtime error", + }, + { + TransactionID: tx3ID, + ErrorMessage: "runtime error", + }, + } + txResultsMock.On("ByBlockID", bID).Return(txResults, nil).Once() + + handler := createHandler(txResultsMock) + + // create a valid API request + req := concoctReq(bID[:]) + + // execute the GetTransactionErrorMessagesByBlockID call + actualResult, err := handler.GetTransactionErrorMessagesByBlockID(context.Background(), req) + + // check that a successful response is received + suite.Require().NoError(err) + + // check that all fields in response are as expected + suite.Assert().ElementsMatch(expectedResult.Results, actualResult.Results) + }) + + // failure path - nil block id in the request results in an error + suite.Run("request with nil block ID", func() { + + // create an API request with a nil block id + req := concoctReq(nil) + + txResults := storage.NewTransactionResults(suite.T()) + handler := createHandler(txResults) + + _, err := handler.GetTransactionErrorMessagesByBlockID(context.Background(), req) + + // check that an error was received + suite.Require().Error(err) + errors.Is(err, status.Error(codes.InvalidArgument, "")) + }) + + // failure path - nonexisting block id in the request results in not found error + suite.Run("request with nonexisting block ID", func() { + + suite.commits.On("ByBlockID", nonexistingBlockID).Return(nil, realstorage.ErrNotFound).Once() + + txResultsMock := storage.NewTransactionResults(suite.T()) + handler := createHandler(txResultsMock) + + // create a valid API request + req := concoctReq(nonexistingBlockID[:]) + + // execute the GetTransactionResult call + _, err := handler.GetTransactionErrorMessagesByBlockID(context.Background(), req) + + // check that an error was received + suite.Require().Error(err) + errors.Is(err, status.Error(codes.NotFound, "")) + }) +} diff --git a/engine/execution/scripts/engine.go b/engine/execution/scripts/engine.go index 0f25cf409ab..689d7858223 100644 --- a/engine/execution/scripts/engine.go +++ b/engine/execution/scripts/engine.go @@ -2,7 +2,6 @@ package scripts import ( "context" - "encoding/hex" "fmt" "github.com/rs/zerolog" @@ -12,15 +11,11 @@ import ( "github.com/onflow/flow-go/engine/execution/computation/query" "github.com/onflow/flow-go/engine/execution/state" "github.com/onflow/flow-go/model/flow" - "github.com/onflow/flow-go/state/protocol" ) -var ErrStateCommitmentPruned = fmt.Errorf("state commitment not found") - type Engine struct { unit *engine.Unit log zerolog.Logger - state protocol.State queryExecutor query.Executor execState state.ScriptExecutionState } @@ -29,14 +24,12 @@ var _ execution.ScriptExecutor = (*Engine)(nil) func New( logger zerolog.Logger, - state protocol.State, queryExecutor query.Executor, execState state.ScriptExecutionState, ) *Engine { return &Engine{ unit: engine.NewUnit(), log: logger.With().Str("engine", "scripts").Logger(), - state: state, execState: execState, queryExecutor: queryExecutor, } @@ -57,31 +50,11 @@ func (e *Engine) ExecuteScriptAtBlockID( blockID flow.Identifier, ) ([]byte, error) { - stateCommit, err := e.execState.StateCommitmentByBlockID(ctx, blockID) + blockSnapshot, header, err := e.execState.CreateStorageSnapshot(blockID) if err != nil { - return nil, fmt.Errorf("failed to get state commitment for block (%s): %w", blockID, err) - } - - // return early if state with the given state commitment is not in memory - // and already purged. This reduces allocations for scripts targeting old blocks. - if !e.execState.HasState(stateCommit) { - return nil, fmt.Errorf( - "failed to execute script at block (%s): %w (%s). "+ - "this error usually happens if the reference "+ - "block for this script is not set to a recent block.", - blockID.String(), - ErrStateCommitmentPruned, - hex.EncodeToString(stateCommit[:]), - ) + return nil, fmt.Errorf("failed to create storage snapshot: %w", err) } - header, err := e.state.AtBlockID(blockID).Head() - if err != nil { - return nil, fmt.Errorf("failed to get header (%s): %w", blockID, err) - } - - blockSnapshot := e.execState.NewStorageSnapshot(stateCommit) - return e.queryExecutor.ExecuteScript( ctx, script, @@ -96,13 +69,11 @@ func (e *Engine) GetRegisterAtBlockID( blockID flow.Identifier, ) ([]byte, error) { - stateCommit, err := e.execState.StateCommitmentByBlockID(ctx, blockID) + blockSnapshot, _, err := e.execState.CreateStorageSnapshot(blockID) if err != nil { - return nil, fmt.Errorf("failed to get state commitment for block (%s): %w", blockID, err) + return nil, fmt.Errorf("failed to create storage snapshot: %w", err) } - blockSnapshot := e.execState.NewStorageSnapshot(stateCommit) - id := flow.NewRegisterID(string(owner), string(key)) data, err := blockSnapshot.Get(id) if err != nil { @@ -117,29 +88,10 @@ func (e *Engine) GetAccount( addr flow.Address, blockID flow.Identifier, ) (*flow.Account, error) { - stateCommit, err := e.execState.StateCommitmentByBlockID(ctx, blockID) + blockSnapshot, header, err := e.execState.CreateStorageSnapshot(blockID) if err != nil { - return nil, fmt.Errorf("failed to get state commitment for block (%s): %w", blockID, err) - } - - // return early if state with the given state commitment is not in memory - // and already purged. This reduces allocations for get accounts targeting old blocks. - if !e.execState.HasState(stateCommit) { - return nil, fmt.Errorf( - "failed to get account at block (%s): %w (%s). "+ - "this error usually happens if the reference "+ - "block for this script is not set to a recent block.", - blockID.String(), - ErrStateCommitmentPruned, - hex.EncodeToString(stateCommit[:])) + return nil, fmt.Errorf("failed to create storage snapshot: %w", err) } - block, err := e.state.AtBlockID(blockID).Head() - if err != nil { - return nil, fmt.Errorf("failed to get block (%s): %w", blockID, err) - } - - blockSnapshot := e.execState.NewStorageSnapshot(stateCommit) - - return e.queryExecutor.GetAccount(ctx, addr, block, blockSnapshot) + return e.queryExecutor.GetAccount(ctx, addr, header, blockSnapshot) } diff --git a/engine/execution/scripts/engine_test.go b/engine/execution/scripts/engine_test.go deleted file mode 100644 index 5b5c116830f..00000000000 --- a/engine/execution/scripts/engine_test.go +++ /dev/null @@ -1,114 +0,0 @@ -package scripts - -import ( - "context" - "strings" - "sync" - "testing" - - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/mock" - - queryMock "github.com/onflow/flow-go/engine/execution/computation/query/mock" - stateMock "github.com/onflow/flow-go/engine/execution/state/mock" - "github.com/onflow/flow-go/model/flow" - protocol "github.com/onflow/flow-go/state/protocol/mock" - "github.com/onflow/flow-go/utils/unittest" -) - -type testingContext struct { - t *testing.T - engine *Engine - state *protocol.State - executionState *stateMock.ExecutionState - queryExecutor *queryMock.Executor - mu *sync.Mutex -} - -func (ctx *testingContext) stateCommitmentExist(blockID flow.Identifier, commit flow.StateCommitment) { - ctx.executionState.On("StateCommitmentByBlockID", mock.Anything, blockID).Return(commit, nil) -} - -func runWithEngine(t *testing.T, fn func(ctx testingContext)) { - log := unittest.Logger() - - queryExecutor := new(queryMock.Executor) - protocolState := new(protocol.State) - execState := new(stateMock.ExecutionState) - - engine := New(log, protocolState, queryExecutor, execState) - fn(testingContext{ - t: t, - engine: engine, - queryExecutor: queryExecutor, - executionState: execState, - state: protocolState, - }) -} - -func TestExecuteScriptAtBlockID(t *testing.T) { - t.Run("happy path", func(t *testing.T) { - runWithEngine(t, func(ctx testingContext) { - // Meaningless script - script := []byte{1, 1, 2, 3, 5, 8, 11} - scriptResult := []byte{1} - - // Ensure block we're about to query against is executable - blockA := unittest.ExecutableBlockFixture(nil, unittest.StateCommitmentPointerFixture()) - - snapshot := new(protocol.Snapshot) - snapshot.On("Head").Return(blockA.Block.Header, nil) - - commits := make(map[flow.Identifier]flow.StateCommitment) - commits[blockA.ID()] = *blockA.StartState - - ctx.stateCommitmentExist(blockA.ID(), *blockA.StartState) - - ctx.state.On("AtBlockID", blockA.Block.ID()).Return(snapshot) - ctx.executionState.On("NewStorageSnapshot", *blockA.StartState).Return(nil) - - ctx.executionState.On("HasState", *blockA.StartState).Return(true) - - // Successful call to computation manager - ctx.queryExecutor. - On("ExecuteScript", mock.Anything, script, [][]byte(nil), blockA.Block.Header, nil). - Return(scriptResult, nil) - - // Execute our script and expect no error - res, err := ctx.engine.ExecuteScriptAtBlockID(context.Background(), script, nil, blockA.Block.ID()) - assert.NoError(t, err) - assert.Equal(t, scriptResult, res) - - // Assert other components were called as expected - ctx.queryExecutor.AssertExpectations(t) - ctx.executionState.AssertExpectations(t) - ctx.state.AssertExpectations(t) - }) - }) - - t.Run("return early when state commitment not exist", func(t *testing.T) { - runWithEngine(t, func(ctx testingContext) { - // Meaningless script - script := []byte{1, 1, 2, 3, 5, 8, 11} - - // Ensure block we're about to query against is executable - blockA := unittest.ExecutableBlockFixture(nil, unittest.StateCommitmentPointerFixture()) - - // make sure blockID to state commitment mapping exist - ctx.executionState.On("StateCommitmentByBlockID", mock.Anything, blockA.ID()).Return(*blockA.StartState, nil) - - // but the state commitment does not exist (e.g. purged) - ctx.executionState.On("HasState", *blockA.StartState).Return(false) - - // Execute our script and expect no error - _, err := ctx.engine.ExecuteScriptAtBlockID(context.Background(), script, nil, blockA.Block.ID()) - assert.Error(t, err) - assert.True(t, strings.Contains(err.Error(), "state commitment not found")) - - // Assert other components were called as expected - ctx.executionState.AssertExpectations(t) - ctx.state.AssertExpectations(t) - }) - }) - -} diff --git a/engine/execution/state/bootstrap/bootstrap.go b/engine/execution/state/bootstrap/bootstrap.go index 0addc1665d0..7bc96dddb6c 100644 --- a/engine/execution/state/bootstrap/bootstrap.go +++ b/engine/execution/state/bootstrap/bootstrap.go @@ -1,19 +1,23 @@ package bootstrap import ( + "context" "errors" "fmt" + "github.com/cockroachdb/pebble" "github.com/dgraph-io/badger/v2" "github.com/rs/zerolog" "github.com/onflow/flow-go/engine/execution/state" + "github.com/onflow/flow-go/engine/execution/storehouse" "github.com/onflow/flow-go/fvm" "github.com/onflow/flow-go/fvm/storage/snapshot" "github.com/onflow/flow-go/ledger" "github.com/onflow/flow-go/model/flow" "github.com/onflow/flow-go/storage" "github.com/onflow/flow-go/storage/badger/operation" + pStorage "github.com/onflow/flow-go/storage/pebble" ) // an increased limit for bootstrapping @@ -36,9 +40,10 @@ func (b *Bootstrapper) BootstrapLedger( chain flow.Chain, opts ...fvm.BootstrapProcedureOption, ) (flow.StateCommitment, error) { + startCommit := flow.StateCommitment(ledger.InitialState()) storageSnapshot := state.NewLedgerStorageSnapshot( ledger, - flow.StateCommitment(ledger.InitialState())) + startCommit) vm := fvm.NewVirtualMachine() @@ -58,10 +63,11 @@ func (b *Bootstrapper) BootstrapLedger( return flow.DummyStateCommitment, err } - newStateCommitment, _, err := state.CommitDelta( + newStateCommitment, _, _, err := state.CommitDelta( ledger, executionSnapshot, - flow.StateCommitment(ledger.InitialState())) + storehouse.NewExecutingBlockSnapshot(storageSnapshot, startCommit), + ) if err != nil { return flow.DummyStateCommitment, err } @@ -137,3 +143,21 @@ func (b *Bootstrapper) BootstrapExecutionDatabase( return nil } + +func ImportRegistersFromCheckpoint(logger zerolog.Logger, checkpointFile string, checkpointHeight uint64, pdb *pebble.DB, workerCount int) error { + logger.Info().Msgf("importing registers from checkpoint file %s at height %d", checkpointFile, checkpointHeight) + + bootstrap, err := pStorage.NewRegisterBootstrap(pdb, checkpointFile, checkpointHeight, logger) + if err != nil { + return fmt.Errorf("could not create registers bootstrapper: %w", err) + } + + // TODO: find a way to hook a context up to this to allow a graceful shutdown + err = bootstrap.IndexCheckpointFile(context.Background(), workerCount) + if err != nil { + return fmt.Errorf("could not load checkpoint file: %w", err) + } + + logger.Info().Msgf("finish importing registers from checkpoint file %s at height %d", checkpointFile, checkpointHeight) + return nil +} diff --git a/engine/execution/state/mock/execution_state.go b/engine/execution/state/mock/execution_state.go index f847632cd94..88080cf2f50 100644 --- a/engine/execution/state/mock/execution_state.go +++ b/engine/execution/state/mock/execution_state.go @@ -44,6 +44,41 @@ func (_m *ExecutionState) ChunkDataPackByChunkID(_a0 flow.Identifier) (*flow.Chu return r0, r1 } +// CreateStorageSnapshot provides a mock function with given fields: blockID +func (_m *ExecutionState) CreateStorageSnapshot(blockID flow.Identifier) (snapshot.StorageSnapshot, *flow.Header, error) { + ret := _m.Called(blockID) + + var r0 snapshot.StorageSnapshot + var r1 *flow.Header + var r2 error + if rf, ok := ret.Get(0).(func(flow.Identifier) (snapshot.StorageSnapshot, *flow.Header, error)); ok { + return rf(blockID) + } + if rf, ok := ret.Get(0).(func(flow.Identifier) snapshot.StorageSnapshot); ok { + r0 = rf(blockID) + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).(snapshot.StorageSnapshot) + } + } + + if rf, ok := ret.Get(1).(func(flow.Identifier) *flow.Header); ok { + r1 = rf(blockID) + } else { + if ret.Get(1) != nil { + r1 = ret.Get(1).(*flow.Header) + } + } + + if rf, ok := ret.Get(2).(func(flow.Identifier) error); ok { + r2 = rf(blockID) + } else { + r2 = ret.Error(2) + } + + return r0, r1, r2 +} + // GetExecutionResultID provides a mock function with given fields: _a0, _a1 func (_m *ExecutionState) GetExecutionResultID(_a0 context.Context, _a1 flow.Identifier) (flow.Identifier, error) { ret := _m.Called(_a0, _a1) @@ -103,27 +138,37 @@ func (_m *ExecutionState) GetHighestExecutedBlockID(_a0 context.Context) (uint64 return r0, r1, r2 } -// HasState provides a mock function with given fields: _a0 -func (_m *ExecutionState) HasState(_a0 flow.StateCommitment) bool { - ret := _m.Called(_a0) +// IsBlockExecuted provides a mock function with given fields: height, blockID +func (_m *ExecutionState) IsBlockExecuted(height uint64, blockID flow.Identifier) (bool, error) { + ret := _m.Called(height, blockID) var r0 bool - if rf, ok := ret.Get(0).(func(flow.StateCommitment) bool); ok { - r0 = rf(_a0) + var r1 error + if rf, ok := ret.Get(0).(func(uint64, flow.Identifier) (bool, error)); ok { + return rf(height, blockID) + } + if rf, ok := ret.Get(0).(func(uint64, flow.Identifier) bool); ok { + r0 = rf(height, blockID) } else { r0 = ret.Get(0).(bool) } - return r0 + if rf, ok := ret.Get(1).(func(uint64, flow.Identifier) error); ok { + r1 = rf(height, blockID) + } else { + r1 = ret.Error(1) + } + + return r0, r1 } -// NewStorageSnapshot provides a mock function with given fields: _a0 -func (_m *ExecutionState) NewStorageSnapshot(_a0 flow.StateCommitment) snapshot.StorageSnapshot { - ret := _m.Called(_a0) +// NewStorageSnapshot provides a mock function with given fields: commit, blockID, height +func (_m *ExecutionState) NewStorageSnapshot(commit flow.StateCommitment, blockID flow.Identifier, height uint64) snapshot.StorageSnapshot { + ret := _m.Called(commit, blockID, height) var r0 snapshot.StorageSnapshot - if rf, ok := ret.Get(0).(func(flow.StateCommitment) snapshot.StorageSnapshot); ok { - r0 = rf(_a0) + if rf, ok := ret.Get(0).(func(flow.StateCommitment, flow.Identifier, uint64) snapshot.StorageSnapshot); ok { + r0 = rf(commit, blockID, height) } else { if ret.Get(0) != nil { r0 = ret.Get(0).(snapshot.StorageSnapshot) @@ -147,25 +192,25 @@ func (_m *ExecutionState) SaveExecutionResults(ctx context.Context, result *exec return r0 } -// StateCommitmentByBlockID provides a mock function with given fields: _a0, _a1 -func (_m *ExecutionState) StateCommitmentByBlockID(_a0 context.Context, _a1 flow.Identifier) (flow.StateCommitment, error) { - ret := _m.Called(_a0, _a1) +// StateCommitmentByBlockID provides a mock function with given fields: _a0 +func (_m *ExecutionState) StateCommitmentByBlockID(_a0 flow.Identifier) (flow.StateCommitment, error) { + ret := _m.Called(_a0) var r0 flow.StateCommitment var r1 error - if rf, ok := ret.Get(0).(func(context.Context, flow.Identifier) (flow.StateCommitment, error)); ok { - return rf(_a0, _a1) + if rf, ok := ret.Get(0).(func(flow.Identifier) (flow.StateCommitment, error)); ok { + return rf(_a0) } - if rf, ok := ret.Get(0).(func(context.Context, flow.Identifier) flow.StateCommitment); ok { - r0 = rf(_a0, _a1) + if rf, ok := ret.Get(0).(func(flow.Identifier) flow.StateCommitment); ok { + r0 = rf(_a0) } else { if ret.Get(0) != nil { r0 = ret.Get(0).(flow.StateCommitment) } } - if rf, ok := ret.Get(1).(func(context.Context, flow.Identifier) error); ok { - r1 = rf(_a0, _a1) + if rf, ok := ret.Get(1).(func(flow.Identifier) error); ok { + r1 = rf(_a0) } else { r1 = ret.Error(1) } diff --git a/engine/execution/state/mock/finalized_execution_state.go b/engine/execution/state/mock/finalized_execution_state.go new file mode 100644 index 00000000000..ae878be58e0 --- /dev/null +++ b/engine/execution/state/mock/finalized_execution_state.go @@ -0,0 +1,39 @@ +// Code generated by mockery v2.21.4. DO NOT EDIT. + +package mock + +import mock "github.com/stretchr/testify/mock" + +// FinalizedExecutionState is an autogenerated mock type for the FinalizedExecutionState type +type FinalizedExecutionState struct { + mock.Mock +} + +// GetHighestFinalizedExecuted provides a mock function with given fields: +func (_m *FinalizedExecutionState) GetHighestFinalizedExecuted() uint64 { + ret := _m.Called() + + var r0 uint64 + if rf, ok := ret.Get(0).(func() uint64); ok { + r0 = rf() + } else { + r0 = ret.Get(0).(uint64) + } + + return r0 +} + +type mockConstructorTestingTNewFinalizedExecutionState interface { + mock.TestingT + Cleanup(func()) +} + +// NewFinalizedExecutionState creates a new instance of FinalizedExecutionState. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations. +func NewFinalizedExecutionState(t mockConstructorTestingTNewFinalizedExecutionState) *FinalizedExecutionState { + mock := &FinalizedExecutionState{} + mock.Mock.Test(t) + + t.Cleanup(func() { mock.AssertExpectations(t) }) + + return mock +} diff --git a/engine/execution/state/mock/read_only_execution_state.go b/engine/execution/state/mock/read_only_execution_state.go index 24f230ed316..cab9c110b1c 100644 --- a/engine/execution/state/mock/read_only_execution_state.go +++ b/engine/execution/state/mock/read_only_execution_state.go @@ -42,6 +42,41 @@ func (_m *ReadOnlyExecutionState) ChunkDataPackByChunkID(_a0 flow.Identifier) (* return r0, r1 } +// CreateStorageSnapshot provides a mock function with given fields: blockID +func (_m *ReadOnlyExecutionState) CreateStorageSnapshot(blockID flow.Identifier) (snapshot.StorageSnapshot, *flow.Header, error) { + ret := _m.Called(blockID) + + var r0 snapshot.StorageSnapshot + var r1 *flow.Header + var r2 error + if rf, ok := ret.Get(0).(func(flow.Identifier) (snapshot.StorageSnapshot, *flow.Header, error)); ok { + return rf(blockID) + } + if rf, ok := ret.Get(0).(func(flow.Identifier) snapshot.StorageSnapshot); ok { + r0 = rf(blockID) + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).(snapshot.StorageSnapshot) + } + } + + if rf, ok := ret.Get(1).(func(flow.Identifier) *flow.Header); ok { + r1 = rf(blockID) + } else { + if ret.Get(1) != nil { + r1 = ret.Get(1).(*flow.Header) + } + } + + if rf, ok := ret.Get(2).(func(flow.Identifier) error); ok { + r2 = rf(blockID) + } else { + r2 = ret.Error(2) + } + + return r0, r1, r2 +} + // GetExecutionResultID provides a mock function with given fields: _a0, _a1 func (_m *ReadOnlyExecutionState) GetExecutionResultID(_a0 context.Context, _a1 flow.Identifier) (flow.Identifier, error) { ret := _m.Called(_a0, _a1) @@ -101,27 +136,37 @@ func (_m *ReadOnlyExecutionState) GetHighestExecutedBlockID(_a0 context.Context) return r0, r1, r2 } -// HasState provides a mock function with given fields: _a0 -func (_m *ReadOnlyExecutionState) HasState(_a0 flow.StateCommitment) bool { - ret := _m.Called(_a0) +// IsBlockExecuted provides a mock function with given fields: height, blockID +func (_m *ReadOnlyExecutionState) IsBlockExecuted(height uint64, blockID flow.Identifier) (bool, error) { + ret := _m.Called(height, blockID) var r0 bool - if rf, ok := ret.Get(0).(func(flow.StateCommitment) bool); ok { - r0 = rf(_a0) + var r1 error + if rf, ok := ret.Get(0).(func(uint64, flow.Identifier) (bool, error)); ok { + return rf(height, blockID) + } + if rf, ok := ret.Get(0).(func(uint64, flow.Identifier) bool); ok { + r0 = rf(height, blockID) } else { r0 = ret.Get(0).(bool) } - return r0 + if rf, ok := ret.Get(1).(func(uint64, flow.Identifier) error); ok { + r1 = rf(height, blockID) + } else { + r1 = ret.Error(1) + } + + return r0, r1 } -// NewStorageSnapshot provides a mock function with given fields: _a0 -func (_m *ReadOnlyExecutionState) NewStorageSnapshot(_a0 flow.StateCommitment) snapshot.StorageSnapshot { - ret := _m.Called(_a0) +// NewStorageSnapshot provides a mock function with given fields: commit, blockID, height +func (_m *ReadOnlyExecutionState) NewStorageSnapshot(commit flow.StateCommitment, blockID flow.Identifier, height uint64) snapshot.StorageSnapshot { + ret := _m.Called(commit, blockID, height) var r0 snapshot.StorageSnapshot - if rf, ok := ret.Get(0).(func(flow.StateCommitment) snapshot.StorageSnapshot); ok { - r0 = rf(_a0) + if rf, ok := ret.Get(0).(func(flow.StateCommitment, flow.Identifier, uint64) snapshot.StorageSnapshot); ok { + r0 = rf(commit, blockID, height) } else { if ret.Get(0) != nil { r0 = ret.Get(0).(snapshot.StorageSnapshot) @@ -131,25 +176,25 @@ func (_m *ReadOnlyExecutionState) NewStorageSnapshot(_a0 flow.StateCommitment) s return r0 } -// StateCommitmentByBlockID provides a mock function with given fields: _a0, _a1 -func (_m *ReadOnlyExecutionState) StateCommitmentByBlockID(_a0 context.Context, _a1 flow.Identifier) (flow.StateCommitment, error) { - ret := _m.Called(_a0, _a1) +// StateCommitmentByBlockID provides a mock function with given fields: _a0 +func (_m *ReadOnlyExecutionState) StateCommitmentByBlockID(_a0 flow.Identifier) (flow.StateCommitment, error) { + ret := _m.Called(_a0) var r0 flow.StateCommitment var r1 error - if rf, ok := ret.Get(0).(func(context.Context, flow.Identifier) (flow.StateCommitment, error)); ok { - return rf(_a0, _a1) + if rf, ok := ret.Get(0).(func(flow.Identifier) (flow.StateCommitment, error)); ok { + return rf(_a0) } - if rf, ok := ret.Get(0).(func(context.Context, flow.Identifier) flow.StateCommitment); ok { - r0 = rf(_a0, _a1) + if rf, ok := ret.Get(0).(func(flow.Identifier) flow.StateCommitment); ok { + r0 = rf(_a0) } else { if ret.Get(0) != nil { r0 = ret.Get(0).(flow.StateCommitment) } } - if rf, ok := ret.Get(1).(func(context.Context, flow.Identifier) error); ok { - r1 = rf(_a0, _a1) + if rf, ok := ret.Get(1).(func(flow.Identifier) error); ok { + r1 = rf(_a0) } else { r1 = ret.Error(1) } diff --git a/engine/execution/state/mock/register_updates_holder.go b/engine/execution/state/mock/register_updates_holder.go index 69c58edf06f..dd4239d2f6d 100644 --- a/engine/execution/state/mock/register_updates_holder.go +++ b/engine/execution/state/mock/register_updates_holder.go @@ -12,6 +12,22 @@ type RegisterUpdatesHolder struct { mock.Mock } +// UpdatedRegisterSet provides a mock function with given fields: +func (_m *RegisterUpdatesHolder) UpdatedRegisterSet() map[flow.RegisterID][]byte { + ret := _m.Called() + + var r0 map[flow.RegisterID][]byte + if rf, ok := ret.Get(0).(func() map[flow.RegisterID][]byte); ok { + r0 = rf() + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).(map[flow.RegisterID][]byte) + } + } + + return r0 +} + // UpdatedRegisters provides a mock function with given fields: func (_m *RegisterUpdatesHolder) UpdatedRegisters() flow.RegisterEntries { ret := _m.Called() diff --git a/engine/execution/state/mock/script_execution_state.go b/engine/execution/state/mock/script_execution_state.go index 904defab7fa..54b21fab462 100644 --- a/engine/execution/state/mock/script_execution_state.go +++ b/engine/execution/state/mock/script_execution_state.go @@ -3,8 +3,6 @@ package mock import ( - context "context" - flow "github.com/onflow/flow-go/model/flow" mock "github.com/stretchr/testify/mock" @@ -16,27 +14,72 @@ type ScriptExecutionState struct { mock.Mock } -// HasState provides a mock function with given fields: _a0 -func (_m *ScriptExecutionState) HasState(_a0 flow.StateCommitment) bool { - ret := _m.Called(_a0) +// CreateStorageSnapshot provides a mock function with given fields: blockID +func (_m *ScriptExecutionState) CreateStorageSnapshot(blockID flow.Identifier) (snapshot.StorageSnapshot, *flow.Header, error) { + ret := _m.Called(blockID) + + var r0 snapshot.StorageSnapshot + var r1 *flow.Header + var r2 error + if rf, ok := ret.Get(0).(func(flow.Identifier) (snapshot.StorageSnapshot, *flow.Header, error)); ok { + return rf(blockID) + } + if rf, ok := ret.Get(0).(func(flow.Identifier) snapshot.StorageSnapshot); ok { + r0 = rf(blockID) + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).(snapshot.StorageSnapshot) + } + } + + if rf, ok := ret.Get(1).(func(flow.Identifier) *flow.Header); ok { + r1 = rf(blockID) + } else { + if ret.Get(1) != nil { + r1 = ret.Get(1).(*flow.Header) + } + } + + if rf, ok := ret.Get(2).(func(flow.Identifier) error); ok { + r2 = rf(blockID) + } else { + r2 = ret.Error(2) + } + + return r0, r1, r2 +} + +// IsBlockExecuted provides a mock function with given fields: height, blockID +func (_m *ScriptExecutionState) IsBlockExecuted(height uint64, blockID flow.Identifier) (bool, error) { + ret := _m.Called(height, blockID) var r0 bool - if rf, ok := ret.Get(0).(func(flow.StateCommitment) bool); ok { - r0 = rf(_a0) + var r1 error + if rf, ok := ret.Get(0).(func(uint64, flow.Identifier) (bool, error)); ok { + return rf(height, blockID) + } + if rf, ok := ret.Get(0).(func(uint64, flow.Identifier) bool); ok { + r0 = rf(height, blockID) } else { r0 = ret.Get(0).(bool) } - return r0 + if rf, ok := ret.Get(1).(func(uint64, flow.Identifier) error); ok { + r1 = rf(height, blockID) + } else { + r1 = ret.Error(1) + } + + return r0, r1 } -// NewStorageSnapshot provides a mock function with given fields: _a0 -func (_m *ScriptExecutionState) NewStorageSnapshot(_a0 flow.StateCommitment) snapshot.StorageSnapshot { - ret := _m.Called(_a0) +// NewStorageSnapshot provides a mock function with given fields: commit, blockID, height +func (_m *ScriptExecutionState) NewStorageSnapshot(commit flow.StateCommitment, blockID flow.Identifier, height uint64) snapshot.StorageSnapshot { + ret := _m.Called(commit, blockID, height) var r0 snapshot.StorageSnapshot - if rf, ok := ret.Get(0).(func(flow.StateCommitment) snapshot.StorageSnapshot); ok { - r0 = rf(_a0) + if rf, ok := ret.Get(0).(func(flow.StateCommitment, flow.Identifier, uint64) snapshot.StorageSnapshot); ok { + r0 = rf(commit, blockID, height) } else { if ret.Get(0) != nil { r0 = ret.Get(0).(snapshot.StorageSnapshot) @@ -46,25 +89,25 @@ func (_m *ScriptExecutionState) NewStorageSnapshot(_a0 flow.StateCommitment) sna return r0 } -// StateCommitmentByBlockID provides a mock function with given fields: _a0, _a1 -func (_m *ScriptExecutionState) StateCommitmentByBlockID(_a0 context.Context, _a1 flow.Identifier) (flow.StateCommitment, error) { - ret := _m.Called(_a0, _a1) +// StateCommitmentByBlockID provides a mock function with given fields: _a0 +func (_m *ScriptExecutionState) StateCommitmentByBlockID(_a0 flow.Identifier) (flow.StateCommitment, error) { + ret := _m.Called(_a0) var r0 flow.StateCommitment var r1 error - if rf, ok := ret.Get(0).(func(context.Context, flow.Identifier) (flow.StateCommitment, error)); ok { - return rf(_a0, _a1) + if rf, ok := ret.Get(0).(func(flow.Identifier) (flow.StateCommitment, error)); ok { + return rf(_a0) } - if rf, ok := ret.Get(0).(func(context.Context, flow.Identifier) flow.StateCommitment); ok { - r0 = rf(_a0, _a1) + if rf, ok := ret.Get(0).(func(flow.Identifier) flow.StateCommitment); ok { + r0 = rf(_a0) } else { if ret.Get(0) != nil { r0 = ret.Get(0).(flow.StateCommitment) } } - if rf, ok := ret.Get(1).(func(context.Context, flow.Identifier) error); ok { - r1 = rf(_a0, _a1) + if rf, ok := ret.Get(1).(func(flow.Identifier) error); ok { + r1 = rf(_a0) } else { r1 = ret.Error(1) } diff --git a/engine/execution/state/state.go b/engine/execution/state/state.go index 3d996475ff9..27c08acca70 100644 --- a/engine/execution/state/state.go +++ b/engine/execution/state/state.go @@ -9,6 +9,7 @@ import ( "github.com/dgraph-io/badger/v2" "github.com/onflow/flow-go/engine/execution" + "github.com/onflow/flow-go/engine/execution/storehouse" "github.com/onflow/flow-go/fvm/storage/snapshot" "github.com/onflow/flow-go/ledger" "github.com/onflow/flow-go/ledger/common/convert" @@ -21,6 +22,9 @@ import ( "github.com/onflow/flow-go/storage/badger/procedure" ) +var ErrExecutionStatePruned = fmt.Errorf("execution state is pruned") +var ErrNotExecuted = fmt.Errorf("block not executed") + // ReadOnlyExecutionState allows to read the execution state type ReadOnlyExecutionState interface { ScriptExecutionState @@ -36,14 +40,34 @@ type ReadOnlyExecutionState interface { // ScriptExecutionState is a subset of the `state.ExecutionState` interface purposed to only access the state // used for script execution and not mutate the execution state of the blockchain. type ScriptExecutionState interface { - // NewStorageSnapshot creates a new ready-only view at the given state commitment. - NewStorageSnapshot(flow.StateCommitment) snapshot.StorageSnapshot + // NewStorageSnapshot creates a new ready-only view at the given block. + NewStorageSnapshot(commit flow.StateCommitment, blockID flow.Identifier, height uint64) snapshot.StorageSnapshot + + // CreateStorageSnapshot creates a new ready-only view at the given block. + // It returns: + // - (nil, nil, storage.ErrNotFound) if block is unknown + // - (nil, nil, state.ErrNotExecuted) if block is not executed + // - (nil, nil, state.ErrExecutionStatePruned) if the execution state has been pruned + CreateStorageSnapshot(blockID flow.Identifier) (snapshot.StorageSnapshot, *flow.Header, error) // StateCommitmentByBlockID returns the final state commitment for the provided block ID. - StateCommitmentByBlockID(context.Context, flow.Identifier) (flow.StateCommitment, error) + StateCommitmentByBlockID(flow.Identifier) (flow.StateCommitment, error) + + // Any error returned is exception + IsBlockExecuted(height uint64, blockID flow.Identifier) (bool, error) +} + +func IsParentExecuted(state ReadOnlyExecutionState, header *flow.Header) (bool, error) { + // sanity check, caller should not pass a root block + if header.Height == 0 { + return false, fmt.Errorf("root block does not have parent block") + } + return state.IsBlockExecuted(header.Height-1, header.ParentID) +} - // HasState returns true if the state with the given state commitment exists in memory - HasState(flow.StateCommitment) bool +// FinalizedExecutionState is an interface used to access the finalized execution state +type FinalizedExecutionState interface { + GetHighestFinalizedExecuted() uint64 } // TODO Many operations here are should be transactional, so we need to refactor this @@ -76,6 +100,11 @@ type state struct { serviceEvents storage.ServiceEvents transactionResults storage.TransactionResults db *badger.DB + + registerStore execution.RegisterStore + // when it is true, registers are stored in both register store and ledger + // and register queries will send to the register store instead of ledger + enableRegisterStore bool } // NewExecutionState returns a new execution state access layer for the given ledger storage. @@ -93,21 +122,25 @@ func NewExecutionState( transactionResults storage.TransactionResults, db *badger.DB, tracer module.Tracer, + registerStore execution.RegisterStore, + enableRegisterStore bool, ) ExecutionState { return &state{ - tracer: tracer, - ls: ls, - commits: commits, - blocks: blocks, - headers: headers, - collections: collections, - chunkDataPacks: chunkDataPacks, - results: results, - myReceipts: myReceipts, - events: events, - serviceEvents: serviceEvents, - transactionResults: transactionResults, - db: db, + tracer: tracer, + ls: ls, + commits: commits, + blocks: blocks, + headers: headers, + collections: collections, + chunkDataPacks: chunkDataPacks, + results: results, + myReceipts: myReceipts, + events: events, + serviceEvents: serviceEvents, + transactionResults: transactionResults, + db: db, + registerStore: registerStore, + enableRegisterStore: enableRegisterStore, } } @@ -213,36 +246,95 @@ func (storage *LedgerStorageSnapshot) Get( func (s *state) NewStorageSnapshot( commitment flow.StateCommitment, + blockID flow.Identifier, + height uint64, ) snapshot.StorageSnapshot { + if s.enableRegisterStore { + return storehouse.NewBlockEndStateSnapshot(s.registerStore, blockID, height) + } return NewLedgerStorageSnapshot(s.ls, commitment) } +func (s *state) CreateStorageSnapshot( + blockID flow.Identifier, +) (snapshot.StorageSnapshot, *flow.Header, error) { + header, err := s.headers.ByBlockID(blockID) + if err != nil { + return nil, nil, fmt.Errorf("cannot get header by block ID: %w", err) + } + + // make sure the block is executed + commit, err := s.commits.ByBlockID(blockID) + if err != nil { + // statecommitment not exists means the block hasn't been executed yet + if errors.Is(err, storage.ErrNotFound) { + return nil, nil, fmt.Errorf("block %v is not executed: %w", blockID, ErrNotExecuted) + } + + return nil, header, fmt.Errorf("cannot get commit by block ID: %w", err) + } + + // make sure we have trie state for this block + hasState, err := s.hasState(commit, blockID, header.Height) + if err != nil { + return nil, header, fmt.Errorf("cannot check state for commit %x (block %v): %w", commit, blockID, err) + } + + if !hasState { + return nil, header, fmt.Errorf("state not found for commit %x (block %v): %w", commit, blockID, ErrExecutionStatePruned) + } + + return s.NewStorageSnapshot(commit, blockID, header.Height), header, nil +} + type RegisterUpdatesHolder interface { UpdatedRegisters() flow.RegisterEntries + UpdatedRegisterSet() map[flow.RegisterID]flow.RegisterValue } -func CommitDelta(ldg ledger.Ledger, ruh RegisterUpdatesHolder, baseState flow.StateCommitment) (flow.StateCommitment, *ledger.TrieUpdate, error) { - keys, values := RegisterEntriesToKeysValues(ruh.UpdatedRegisters()) +// CommitDelta takes a base storage snapshot and creates a new storage snapshot +// with the register updates from the given RegisterUpdatesHolder +// a new statecommitment is returned from the ledger, along with the trie update +// any error returned are exceptions +func CommitDelta( + ldg ledger.Ledger, + ruh RegisterUpdatesHolder, + baseStorageSnapshot execution.ExtendableStorageSnapshot, +) (flow.StateCommitment, *ledger.TrieUpdate, execution.ExtendableStorageSnapshot, error) { + updatedRegisters := ruh.UpdatedRegisters() + keys, values := RegisterEntriesToKeysValues(updatedRegisters) + baseState := baseStorageSnapshot.Commitment() update, err := ledger.NewUpdate(ledger.State(baseState), keys, values) if err != nil { - return flow.DummyStateCommitment, nil, fmt.Errorf("cannot create ledger update: %w", err) + return flow.DummyStateCommitment, nil, nil, fmt.Errorf("cannot create ledger update: %w", err) } - commit, trieUpdate, err := ldg.Set(update) + newState, trieUpdate, err := ldg.Set(update) if err != nil { - return flow.DummyStateCommitment, nil, err + return flow.DummyStateCommitment, nil, nil, fmt.Errorf("could not update ledger: %w", err) } - return flow.StateCommitment(commit), trieUpdate, nil + newCommit := flow.StateCommitment(newState) + + newStorageSnapshot := baseStorageSnapshot.Extend(newCommit, ruh.UpdatedRegisterSet()) + + return newCommit, trieUpdate, newStorageSnapshot, nil } -func (s *state) HasState(commitment flow.StateCommitment) bool { - return s.ls.HasState(ledger.State(commitment)) +func (s *state) hasState(commitment flow.StateCommitment, blockID flow.Identifier, height uint64) (bool, error) { + ledgerHasState := s.ls.HasState(ledger.State(commitment)) + if !ledgerHasState { + return false, nil + } + if !s.enableRegisterStore { + return true, nil + } + return s.registerStore.IsBlockExecuted(height, blockID) } -func (s *state) StateCommitmentByBlockID(ctx context.Context, blockID flow.Identifier) (flow.StateCommitment, error) { +func (s *state) StateCommitmentByBlockID(blockID flow.Identifier) (flow.StateCommitment, error) { return s.commits.ByBlockID(blockID) } @@ -280,6 +372,18 @@ func (s *state) SaveExecutionResults( return fmt.Errorf("could not save execution results: %w", err) } + if s.enableRegisterStore { + // save registers to register store + err = s.registerStore.SaveRegisters( + result.BlockExecutionResult.ExecutableBlock.Block.Header, + result.BlockExecutionResult.AllUpdatedRegisters(), + ) + + if err != nil { + return fmt.Errorf("could not save updated registers: %w", err) + } + } + //outside batch because it requires read access err = s.UpdateHighestExecutedBlockIfHigher(childCtx, result.ExecutableBlock.Block.Header) if err != nil { @@ -378,7 +482,19 @@ func (s *state) UpdateHighestExecutedBlockIfHigher(ctx context.Context, header * return operation.RetryOnConflict(s.db.Update, procedure.UpdateHighestExecutedBlockIfHigher(header)) } +// deprecated by storehouse's GetHighestFinalizedExecuted func (s *state) GetHighestExecutedBlockID(ctx context.Context) (uint64, flow.Identifier, error) { + if s.enableRegisterStore { + // when storehouse is enabled, the highest executed block is consisted as + // the highest finalized and executed block + height := s.GetHighestFinalizedExecuted() + header, err := s.headers.ByHeight(height) + if err != nil { + return 0, flow.ZeroID, fmt.Errorf("could not get header by height %v: %w", height, err) + } + return height, header.ID(), nil + } + var blockID flow.Identifier var height uint64 err := s.db.View(procedure.GetHighestExecutedBlock(&height, &blockID)) @@ -389,11 +505,23 @@ func (s *state) GetHighestExecutedBlockID(ctx context.Context) (uint64, flow.Ide return height, blockID, nil } +func (s *state) GetHighestFinalizedExecuted() uint64 { + if !s.enableRegisterStore { + panic("could not get highest finalized executed height without register store enabled") + } + return s.registerStore.LastFinalizedAndExecutedHeight() +} + // IsBlockExecuted returns true if the block is executed, which means registers, events, -// results, statecommitment etc are all stored. +// results, etc are all stored. // otherwise returns false -func IsBlockExecuted(ctx context.Context, state ReadOnlyExecutionState, block flow.Identifier) (bool, error) { - _, err := state.StateCommitmentByBlockID(ctx, block) +func (s *state) IsBlockExecuted(height uint64, blockID flow.Identifier) (bool, error) { + if s.enableRegisterStore { + return s.registerStore.IsBlockExecuted(height, blockID) + } + + // ledger-based execution state uses commitment to determine if a block has been executed + _, err := s.StateCommitmentByBlockID(blockID) // statecommitment exists means the block has been executed if err == nil { @@ -406,4 +534,5 @@ func IsBlockExecuted(ctx context.Context, state ReadOnlyExecutionState, block fl } return false, err + } diff --git a/engine/execution/state/state_test.go b/engine/execution/state/state_test.go index 6d6833837f0..394da8ba22a 100644 --- a/engine/execution/state/state_test.go +++ b/engine/execution/state/state_test.go @@ -1,31 +1,33 @@ package state_test import ( - "context" + "errors" + "fmt" "testing" "github.com/dgraph-io/badger/v2" - "github.com/golang/mock/gomock" "github.com/rs/zerolog" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" - ledger2 "github.com/onflow/flow-go/ledger" + "github.com/onflow/flow-go/ledger/common/convert" "github.com/onflow/flow-go/ledger/common/pathfinder" "github.com/onflow/flow-go/engine/execution/state" + "github.com/onflow/flow-go/engine/execution/storehouse" "github.com/onflow/flow-go/fvm/storage/snapshot" + led "github.com/onflow/flow-go/ledger" ledger "github.com/onflow/flow-go/ledger/complete" "github.com/onflow/flow-go/ledger/complete/wal/fixtures" "github.com/onflow/flow-go/model/flow" "github.com/onflow/flow-go/module/metrics" "github.com/onflow/flow-go/module/trace" + storageerr "github.com/onflow/flow-go/storage" storage "github.com/onflow/flow-go/storage/mock" - "github.com/onflow/flow-go/storage/mocks" "github.com/onflow/flow-go/utils/unittest" ) -func prepareTest(f func(t *testing.T, es state.ExecutionState, l *ledger.Ledger)) func(*testing.T) { +func prepareTest(f func(t *testing.T, es state.ExecutionState, l *ledger.Ledger, headers *storage.Headers, commits *storage.Commits)) func(*testing.T) { return func(t *testing.T) { unittest.RunWithBadgerDB(t, func(badgerDB *badger.DB) { metricsCollector := &metrics.NoopCollector{} @@ -39,74 +41,73 @@ func prepareTest(f func(t *testing.T, es state.ExecutionState, l *ledger.Ledger) <-compactor.Done() }() - ctrl := gomock.NewController(t) - - stateCommitments := mocks.NewMockCommits(ctrl) - blocks := mocks.NewMockBlocks(ctrl) - headers := mocks.NewMockHeaders(ctrl) - collections := mocks.NewMockCollections(ctrl) - events := mocks.NewMockEvents(ctrl) - serviceEvents := mocks.NewMockServiceEvents(ctrl) - txResults := mocks.NewMockTransactionResults(ctrl) - - stateCommitment := ls.InitialState() - - stateCommitments.EXPECT().ByBlockID(gomock.Any()).Return(flow.StateCommitment(stateCommitment), nil) - - chunkDataPacks := new(storage.ChunkDataPacks) - - results := new(storage.ExecutionResults) - myReceipts := new(storage.MyExecutionReceipts) + stateCommitments := storage.NewCommits(t) + headers := storage.NewHeaders(t) + blocks := storage.NewBlocks(t) + collections := storage.NewCollections(t) + events := storage.NewEvents(t) + serviceEvents := storage.NewServiceEvents(t) + txResults := storage.NewTransactionResults(t) + chunkDataPacks := storage.NewChunkDataPacks(t) + results := storage.NewExecutionResults(t) + myReceipts := storage.NewMyExecutionReceipts(t) es := state.NewExecutionState( ls, stateCommitments, blocks, headers, collections, chunkDataPacks, results, myReceipts, events, serviceEvents, txResults, badgerDB, trace.NewNoopTracer(), + // TODO: to test with register store + nil, + false, ) - f(t, es, ls) + f(t, es, ls, headers, stateCommitments) }) } } func TestExecutionStateWithTrieStorage(t *testing.T) { - registerID1 := flow.NewRegisterID("fruit", "") - - registerID2 := flow.NewRegisterID("vegetable", "") - - t.Run("commit write and read new state", prepareTest(func(t *testing.T, es state.ExecutionState, l *ledger.Ledger) { - // TODO: use real block ID - sc1, err := es.StateCommitmentByBlockID(context.Background(), flow.Identifier{}) - assert.NoError(t, err) + t.Run("commit write and read new state", prepareTest(func( + t *testing.T, es state.ExecutionState, l *ledger.Ledger, headers *storage.Headers, stateCommitments *storage.Commits) { + header1 := unittest.BlockHeaderFixture() + sc1 := flow.StateCommitment(l.InitialState()) + reg1 := unittest.MakeOwnerReg("fruit", "apple") + reg2 := unittest.MakeOwnerReg("vegetable", "carrot") executionSnapshot := &snapshot.ExecutionSnapshot{ WriteSet: map[flow.RegisterID]flow.RegisterValue{ - registerID1: flow.RegisterValue("apple"), - registerID2: flow.RegisterValue("carrot"), + reg1.Key: reg1.Value, + reg2.Key: reg2.Value, }, } - sc2, update, err := state.CommitDelta(l, executionSnapshot, sc1) + sc2, update, sc2Snapshot, err := state.CommitDelta(l, executionSnapshot, + storehouse.NewExecutingBlockSnapshot(state.NewLedgerStorageSnapshot(l, sc1), sc1)) assert.NoError(t, err) + // validate new snapshot + val, err := sc2Snapshot.Get(reg1.Key) + require.NoError(t, err) + require.Equal(t, reg1.Value, val) + + val, err = sc2Snapshot.Get(reg2.Key) + require.NoError(t, err) + require.Equal(t, reg2.Value, val) + assert.Equal(t, sc1[:], update.RootHash[:]) assert.Len(t, update.Paths, 2) assert.Len(t, update.Payloads, 2) - key1 := ledger2.NewKey( - []ledger2.KeyPart{ - ledger2.NewKeyPart(0, []byte(registerID1.Owner)), - ledger2.NewKeyPart(2, []byte(registerID1.Key)), - }) + // validate sc2 + require.Equal(t, sc2, sc2Snapshot.Commitment()) + + key1 := convert.RegisterIDToLedgerKey(reg1.Key) path1, err := pathfinder.KeyToPath(key1, ledger.DefaultPathFinderVersion) assert.NoError(t, err) - key2 := ledger2.NewKey( - []ledger2.KeyPart{ - ledger2.NewKeyPart(0, []byte(registerID2.Owner)), - ledger2.NewKeyPart(2, []byte(registerID2.Key)), - }) + key2 := convert.RegisterIDToLedgerKey(reg2.Key) path2, err := pathfinder.KeyToPath(key2, ledger.DefaultPathFinderVersion) assert.NoError(t, err) + // validate update assert.Equal(t, path1, update.Paths[0]) assert.Equal(t, path2, update.Paths[1]) @@ -122,122 +123,205 @@ func TestExecutionStateWithTrieStorage(t *testing.T) { assert.Equal(t, []byte("apple"), []byte(update.Payloads[0].Value())) assert.Equal(t, []byte("carrot"), []byte(update.Payloads[1].Value())) - storageSnapshot := es.NewStorageSnapshot(sc2) + header2 := unittest.BlockHeaderWithParentFixture(header1) + storageSnapshot := es.NewStorageSnapshot(sc2, header2.ID(), header2.Height) - b1, err := storageSnapshot.Get(registerID1) + b1, err := storageSnapshot.Get(reg1.Key) assert.NoError(t, err) - b2, err := storageSnapshot.Get(registerID2) + b2, err := storageSnapshot.Get(reg2.Key) assert.NoError(t, err) assert.Equal(t, flow.RegisterValue("apple"), b1) assert.Equal(t, flow.RegisterValue("carrot"), b2) + + // verify has state + require.True(t, l.HasState(led.State(sc2))) + require.False(t, l.HasState(led.State(unittest.StateCommitmentFixture()))) })) - t.Run("commit write and read previous state", prepareTest(func(t *testing.T, es state.ExecutionState, l *ledger.Ledger) { - // TODO: use real block ID - sc1, err := es.StateCommitmentByBlockID(context.Background(), flow.Identifier{}) - assert.NoError(t, err) + t.Run("commit write and read previous state", prepareTest(func( + t *testing.T, es state.ExecutionState, l *ledger.Ledger, headers *storage.Headers, stateCommitments *storage.Commits) { + header1 := unittest.BlockHeaderFixture() + sc1 := flow.StateCommitment(l.InitialState()) + reg1 := unittest.MakeOwnerReg("fruit", "apple") executionSnapshot1 := &snapshot.ExecutionSnapshot{ WriteSet: map[flow.RegisterID]flow.RegisterValue{ - registerID1: []byte("apple"), + reg1.Key: reg1.Value, }, } - sc2, _, err := state.CommitDelta(l, executionSnapshot1, sc1) + sc2, _, sc2Snapshot, err := state.CommitDelta(l, executionSnapshot1, + storehouse.NewExecutingBlockSnapshot(state.NewLedgerStorageSnapshot(l, sc1), sc1), + ) assert.NoError(t, err) // update value and get resulting state commitment executionSnapshot2 := &snapshot.ExecutionSnapshot{ WriteSet: map[flow.RegisterID]flow.RegisterValue{ - registerID1: []byte("orange"), + reg1.Key: flow.RegisterValue("orange"), }, } - sc3, _, err := state.CommitDelta(l, executionSnapshot2, sc2) + sc3, _, _, err := state.CommitDelta(l, executionSnapshot2, sc2Snapshot) assert.NoError(t, err) + header2 := unittest.BlockHeaderWithParentFixture(header1) // create a view for previous state version - storageSnapshot3 := es.NewStorageSnapshot(sc2) + storageSnapshot3 := es.NewStorageSnapshot(sc2, header2.ID(), header2.Height) + header3 := unittest.BlockHeaderWithParentFixture(header1) // create a view for new state version - storageSnapshot4 := es.NewStorageSnapshot(sc3) + storageSnapshot4 := es.NewStorageSnapshot(sc3, header3.ID(), header3.Height) + + // header2 and header3 are different blocks + assert.True(t, header2.ID() != (header3.ID())) // fetch the value at both versions - b1, err := storageSnapshot3.Get(registerID1) + b1, err := storageSnapshot3.Get(reg1.Key) assert.NoError(t, err) - b2, err := storageSnapshot4.Get(registerID1) + b2, err := storageSnapshot4.Get(reg1.Key) assert.NoError(t, err) assert.Equal(t, flow.RegisterValue("apple"), b1) assert.Equal(t, flow.RegisterValue("orange"), b2) })) - t.Run("commit delta and read new state", prepareTest(func(t *testing.T, es state.ExecutionState, l *ledger.Ledger) { - // TODO: use real block ID - sc1, err := es.StateCommitmentByBlockID(context.Background(), flow.Identifier{}) - assert.NoError(t, err) + t.Run("commit delta and read new state", prepareTest(func( + t *testing.T, es state.ExecutionState, l *ledger.Ledger, headers *storage.Headers, stateCommitments *storage.Commits) { + header1 := unittest.BlockHeaderFixture() + sc1 := flow.StateCommitment(l.InitialState()) + reg1 := unittest.MakeOwnerReg("fruit", "apple") + reg2 := unittest.MakeOwnerReg("vegetable", "carrot") // set initial value executionSnapshot1 := &snapshot.ExecutionSnapshot{ WriteSet: map[flow.RegisterID]flow.RegisterValue{ - registerID1: []byte("apple"), - registerID2: []byte("apple"), + reg1.Key: reg1.Value, + reg2.Key: reg2.Value, }, } - sc2, _, err := state.CommitDelta(l, executionSnapshot1, sc1) + sc2, _, sc2Snapshot, err := state.CommitDelta(l, executionSnapshot1, + storehouse.NewExecutingBlockSnapshot(state.NewLedgerStorageSnapshot(l, sc1), sc1), + ) assert.NoError(t, err) // update value and get resulting state commitment executionSnapshot2 := &snapshot.ExecutionSnapshot{ WriteSet: map[flow.RegisterID]flow.RegisterValue{ - registerID1: nil, + reg1.Key: nil, }, } - sc3, _, err := state.CommitDelta(l, executionSnapshot2, sc2) + sc3, _, _, err := state.CommitDelta(l, executionSnapshot2, sc2Snapshot) assert.NoError(t, err) + header2 := unittest.BlockHeaderWithParentFixture(header1) // create a view for previous state version - storageSnapshot3 := es.NewStorageSnapshot(sc2) + storageSnapshot3 := es.NewStorageSnapshot(sc2, header2.ID(), header2.Height) + header3 := unittest.BlockHeaderWithParentFixture(header2) // create a view for new state version - storageSnapshot4 := es.NewStorageSnapshot(sc3) + storageSnapshot4 := es.NewStorageSnapshot(sc3, header3.ID(), header3.Height) // fetch the value at both versions - b1, err := storageSnapshot3.Get(registerID1) + b1, err := storageSnapshot3.Get(reg1.Key) assert.NoError(t, err) - b2, err := storageSnapshot4.Get(registerID1) + b2, err := storageSnapshot4.Get(reg1.Key) assert.NoError(t, err) assert.Equal(t, flow.RegisterValue("apple"), b1) assert.Empty(t, b2) })) - t.Run("commit delta and persist state commit for the second time should be OK", prepareTest(func(t *testing.T, es state.ExecutionState, l *ledger.Ledger) { - // TODO: use real block ID - sc1, err := es.StateCommitmentByBlockID(context.Background(), flow.Identifier{}) - assert.NoError(t, err) + t.Run("commit delta and persist state commit for the second time should be OK", prepareTest(func( + t *testing.T, es state.ExecutionState, l *ledger.Ledger, headers *storage.Headers, stateCommitments *storage.Commits) { + sc1 := flow.StateCommitment(l.InitialState()) + reg1 := unittest.MakeOwnerReg("fruit", "apple") + reg2 := unittest.MakeOwnerReg("vegetable", "carrot") // set initial value executionSnapshot1 := &snapshot.ExecutionSnapshot{ WriteSet: map[flow.RegisterID]flow.RegisterValue{ - registerID1: flow.RegisterValue("apple"), - registerID2: flow.RegisterValue("apple"), + reg1.Key: reg1.Value, + reg2.Key: reg2.Value, }, } - sc2, _, err := state.CommitDelta(l, executionSnapshot1, sc1) + sc2, _, _, err := state.CommitDelta(l, executionSnapshot1, + storehouse.NewExecutingBlockSnapshot(state.NewLedgerStorageSnapshot(l, sc1), sc1), + ) assert.NoError(t, err) // committing for the second time should be OK - sc2Same, _, err := state.CommitDelta(l, executionSnapshot1, sc1) + sc2Same, _, _, err := state.CommitDelta(l, executionSnapshot1, + storehouse.NewExecutingBlockSnapshot(state.NewLedgerStorageSnapshot(l, sc1), sc1), + ) assert.NoError(t, err) require.Equal(t, sc2, sc2Same) })) + t.Run("commit write and create snapshot", prepareTest(func( + t *testing.T, es state.ExecutionState, l *ledger.Ledger, headers *storage.Headers, stateCommitments *storage.Commits) { + header1 := unittest.BlockHeaderFixture() + header2 := unittest.BlockHeaderWithParentFixture(header1) + sc1 := flow.StateCommitment(l.InitialState()) + + reg1 := unittest.MakeOwnerReg("fruit", "apple") + reg2 := unittest.MakeOwnerReg("vegetable", "carrot") + executionSnapshot := &snapshot.ExecutionSnapshot{ + WriteSet: map[flow.RegisterID]flow.RegisterValue{ + reg1.Key: reg1.Value, + reg2.Key: reg2.Value, + }, + } + + sc2, _, _, err := state.CommitDelta(l, executionSnapshot, + storehouse.NewExecutingBlockSnapshot(state.NewLedgerStorageSnapshot(l, sc1), sc1)) + assert.NoError(t, err) + + // test CreateStorageSnapshot for known and executed block + headers.On("ByBlockID", header2.ID()).Return(header2, nil) + stateCommitments.On("ByBlockID", header2.ID()).Return(sc2, nil) + snapshot2, h2, err := es.CreateStorageSnapshot(header2.ID()) + require.NoError(t, err) + require.Equal(t, header2.ID(), h2.ID()) + + val, err := snapshot2.Get(reg1.Key) + require.NoError(t, err) + require.Equal(t, val, reg1.Value) + + val, err = snapshot2.Get(reg2.Key) + require.NoError(t, err) + require.Equal(t, val, reg2.Value) + + // test CreateStorageSnapshot for unknown block + unknown := unittest.BlockHeaderFixture() + headers.On("ByBlockID", unknown.ID()).Return(nil, fmt.Errorf("unknown: %w", storageerr.ErrNotFound)) + _, _, err = es.CreateStorageSnapshot(unknown.ID()) + require.Error(t, err) + require.True(t, errors.Is(err, storageerr.ErrNotFound)) + + // test CreateStorageSnapshot for known and unexecuted block + unexecuted := unittest.BlockHeaderFixture() + headers.On("ByBlockID", unexecuted.ID()).Return(unexecuted, nil) + stateCommitments.On("ByBlockID", unexecuted.ID()).Return(nil, fmt.Errorf("not found: %w", storageerr.ErrNotFound)) + _, _, err = es.CreateStorageSnapshot(unexecuted.ID()) + require.Error(t, err) + require.True(t, errors.Is(err, state.ErrNotExecuted)) + + // test CreateStorageSnapshot for pruned block + pruned := unittest.BlockHeaderFixture() + prunedState := unittest.StateCommitmentFixture() + headers.On("ByBlockID", pruned.ID()).Return(pruned, nil) + stateCommitments.On("ByBlockID", pruned.ID()).Return(prunedState, nil) + _, _, err = es.CreateStorageSnapshot(pruned.ID()) + require.Error(t, err) + require.True(t, errors.Is(err, state.ErrExecutionStatePruned)) + })) + } diff --git a/engine/execution/storehouse.go b/engine/execution/storehouse.go index 21f2add53d9..ab3ebf66e90 100644 --- a/engine/execution/storehouse.go +++ b/engine/execution/storehouse.go @@ -1,8 +1,11 @@ package execution import ( + "github.com/onflow/flow-go/fvm/storage/snapshot" "github.com/onflow/flow-go/model/flow" + "github.com/onflow/flow-go/module/finalizedreader" "github.com/onflow/flow-go/storage" + "github.com/onflow/flow-go/storage/pebble" ) // RegisterStore is the interface for register store @@ -11,7 +14,7 @@ type RegisterStore interface { // GetRegister first try to get the register from InMemoryRegisterStore, then OnDiskRegisterStore // It returns: // - (value, nil) if the register value is found at the given block - // - (nil, storage.ErrNotFound) if the register is not found + // - (nil, nil) if the register is not found // - (nil, storage.ErrHeightNotIndexed) if the height is below the first height that is indexed. // - (nil, storehouse.ErrNotExecuted) if the block is not executed yet // - (nil, storehouse.ErrNotExecuted) if the block is conflicting iwth finalized block @@ -26,9 +29,9 @@ type RegisterStore interface { // - exception if the block is below the pruned height // - exception if the save block is saved again // - exception for any other exception - SaveRegisters(header *flow.Header, registers []flow.RegisterEntry) error + SaveRegisters(header *flow.Header, registers flow.RegisterEntries) error - // Depend on FinalizedReader's GetFinalizedBlockIDAtHeight + // Depend on FinalizedReader's FinalizedBlockIDAtHeight // Depend on ExecutedFinalizedWAL.Append // Depend on OnDiskRegisterStore.SaveRegisters // OnBlockFinalized trigger the check of whether a block at the next height becomes finalized and executed. @@ -55,9 +58,15 @@ type RegisterStore interface { } type FinalizedReader interface { + // FinalizedBlockIDAtHeight returns the block ID of the finalized block at the given height. + // It return storage.NotFound if the given height has not been finalized yet + // any other error returned are exceptions FinalizedBlockIDAtHeight(height uint64) (flow.Identifier, error) } +// finalizedreader.FinalizedReader is an implementation of FinalizedReader interface +var _ FinalizedReader = (*finalizedreader.FinalizedReader)(nil) + // see implementation in engine/execution/storehouse/in_memory_register_store.go type InMemoryRegisterStore interface { Prune(finalizedHeight uint64, finalizedBlockID flow.Identifier) error @@ -72,7 +81,7 @@ type InMemoryRegisterStore interface { height uint64, blockID flow.Identifier, parentID flow.Identifier, - registers []flow.RegisterEntry, + registers flow.RegisterEntries, ) error IsBlockExecuted(height uint64, blockID flow.Identifier) (bool, error) @@ -80,8 +89,11 @@ type InMemoryRegisterStore interface { type OnDiskRegisterStore = storage.RegisterIndex +// pebble.Registers is an implementation of OnDiskRegisterStore interface +var _ OnDiskRegisterStore = (*pebble.Registers)(nil) + type ExecutedFinalizedWAL interface { - Append(height uint64, registers []flow.RegisterEntry) error + Append(height uint64, registers flow.RegisterEntries) error // Latest returns the latest height in the WAL. Latest() (uint64, error) @@ -92,5 +104,11 @@ type ExecutedFinalizedWAL interface { type WALReader interface { // Next returns the next height and trie updates in the WAL. // It returns EOF when there are no more entries. - Next() (height uint64, registers []flow.RegisterEntry, err error) + Next() (height uint64, registers flow.RegisterEntries, err error) +} + +type ExtendableStorageSnapshot interface { + snapshot.StorageSnapshot + Extend(newCommit flow.StateCommitment, updatedRegisters map[flow.RegisterID]flow.RegisterValue) ExtendableStorageSnapshot + Commitment() flow.StateCommitment } diff --git a/engine/execution/storehouse/block_end_snapshot.go b/engine/execution/storehouse/block_end_snapshot.go new file mode 100644 index 00000000000..bf7718a9543 --- /dev/null +++ b/engine/execution/storehouse/block_end_snapshot.go @@ -0,0 +1,88 @@ +package storehouse + +import ( + "errors" + "sync" + + "github.com/onflow/flow-go/engine/execution" + "github.com/onflow/flow-go/fvm/storage/snapshot" + "github.com/onflow/flow-go/model/flow" + "github.com/onflow/flow-go/storage" +) + +var _ snapshot.StorageSnapshot = (*BlockEndStateSnapshot)(nil) + +// BlockEndStateSnapshot represents the storage at the end of a block. +type BlockEndStateSnapshot struct { + storage execution.RegisterStore + + blockID flow.Identifier + height uint64 + + mutex sync.RWMutex + readCache map[flow.RegisterID]flow.RegisterValue // cache the reads from storage at baseBlock +} + +// the caller must ensure the block height is for the given block +func NewBlockEndStateSnapshot( + storage execution.RegisterStore, + blockID flow.Identifier, + height uint64, +) *BlockEndStateSnapshot { + return &BlockEndStateSnapshot{ + storage: storage, + blockID: blockID, + height: height, + readCache: make(map[flow.RegisterID]flow.RegisterValue), + } +} + +// Get returns the value of the register with the given register ID. +// It returns: +// - (value, nil) if the register exists +// - (nil, nil) if the register does not exist +// - (nil, storage.ErrHeightNotIndexed) if the height is below the first height that is indexed. +// - (nil, storehouse.ErrNotExecuted) if the block is not executed yet +// - (nil, storehouse.ErrNotExecuted) if the block is conflicting with finalized block +// - (nil, err) for any other exceptions +func (s *BlockEndStateSnapshot) Get(id flow.RegisterID) (flow.RegisterValue, error) { + value, ok := s.getFromCache(id) + if ok { + return value, nil + } + + value, err := s.getFromStorage(id) + if err != nil { + return nil, err + } + + s.mutex.Lock() + defer s.mutex.Unlock() + + // TODO: consider adding a limit/eviction policy for the cache + s.readCache[id] = value + return value, err +} + +func (s *BlockEndStateSnapshot) getFromCache(id flow.RegisterID) (flow.RegisterValue, bool) { + s.mutex.RLock() + defer s.mutex.RUnlock() + + value, ok := s.readCache[id] + return value, ok +} + +func (s *BlockEndStateSnapshot) getFromStorage(id flow.RegisterID) (flow.RegisterValue, error) { + value, err := s.storage.GetRegister(s.height, s.blockID, id) + if err != nil { + if errors.Is(err, storage.ErrNotFound) { + // if the error is not found, we return a nil RegisterValue, + // in this case, the nil value can be cached, because the storage will not change it + return nil, nil + } + // if the error is not ErrNotFound, such as storage.ErrHeightNotIndexed, storehouse.ErrNotExecuted + // we return the error without caching + return nil, err + } + return value, nil +} diff --git a/engine/execution/storehouse/block_end_snapshot_test.go b/engine/execution/storehouse/block_end_snapshot_test.go new file mode 100644 index 00000000000..3787ec2d552 --- /dev/null +++ b/engine/execution/storehouse/block_end_snapshot_test.go @@ -0,0 +1,102 @@ +package storehouse_test + +import ( + "errors" + "fmt" + "testing" + + "github.com/stretchr/testify/require" + "go.uber.org/atomic" + + executionMock "github.com/onflow/flow-go/engine/execution/mock" + "github.com/onflow/flow-go/engine/execution/storehouse" + "github.com/onflow/flow-go/model/flow" + "github.com/onflow/flow-go/storage" + "github.com/onflow/flow-go/utils/unittest" +) + +func TestBlockEndSnapshot(t *testing.T) { + t.Run("Get register", func(t *testing.T) { + header := unittest.BlockHeaderFixture() + + // create mock for storage + store := executionMock.NewRegisterStore(t) + reg := unittest.MakeOwnerReg("key", "value") + store.On("GetRegister", header.Height, header.ID(), reg.Key).Return(reg.Value, nil).Once() + snapshot := storehouse.NewBlockEndStateSnapshot(store, header.ID(), header.Height) + + // test get from storage + value, err := snapshot.Get(reg.Key) + require.NoError(t, err) + require.Equal(t, reg.Value, value) + + // test get from cache + value, err = snapshot.Get(reg.Key) + require.NoError(t, err) + require.Equal(t, reg.Value, value) + + // test get non existing register + unknownReg := unittest.MakeOwnerReg("unknown", "unknown") + store.On("GetRegister", header.Height, header.ID(), unknownReg.Key). + Return(nil, fmt.Errorf("fail: %w", storage.ErrNotFound)).Once() + + value, err = snapshot.Get(unknownReg.Key) + require.NoError(t, err) + require.Nil(t, value) + + // test get non existing register from cache + _, err = snapshot.Get(unknownReg.Key) + require.NoError(t, err) + require.Nil(t, value) + + // test getting storage.ErrHeightNotIndexed error + heightNotIndexed := unittest.MakeOwnerReg("height not index", "height not index") + store.On("GetRegister", header.Height, header.ID(), heightNotIndexed.Key). + Return(nil, fmt.Errorf("fail: %w", storage.ErrHeightNotIndexed)). + Twice() // to verify the result is not cached + + // verify getting the correct error + _, err = snapshot.Get(heightNotIndexed.Key) + require.Error(t, err) + require.True(t, errors.Is(err, storage.ErrHeightNotIndexed)) + + // verify result is not cached + _, err = snapshot.Get(heightNotIndexed.Key) + require.Error(t, err) + require.True(t, errors.Is(err, storage.ErrHeightNotIndexed)) + + // test getting storage.ErrNotExecuted error + heightNotExecuted := unittest.MakeOwnerReg("height not executed", "height not executed") + counter := atomic.NewInt32(0) + store. + On("GetRegister", header.Height, header.ID(), heightNotExecuted.Key). + Return(func(uint64, flow.Identifier, flow.RegisterID) (flow.RegisterValue, error) { + counter.Inc() + // the first call should return error + if counter.Load() == 1 { + return nil, fmt.Errorf("fail: %w", storehouse.ErrNotExecuted) + } + // the second call, it returns value + return heightNotExecuted.Value, nil + }). + Times(2) + + // first time should return error + _, err = snapshot.Get(heightNotExecuted.Key) + require.Error(t, err) + require.True(t, errors.Is(err, storehouse.ErrNotExecuted)) + + // second time should return value + value, err = snapshot.Get(heightNotExecuted.Key) + require.NoError(t, err) + require.Equal(t, heightNotExecuted.Value, value) + + // third time should be cached + value, err = snapshot.Get(heightNotExecuted.Key) + require.NoError(t, err) + require.Equal(t, heightNotExecuted.Value, value) + + store.AssertExpectations(t) + }) + +} diff --git a/engine/execution/storehouse/executing_block_snapshot.go b/engine/execution/storehouse/executing_block_snapshot.go new file mode 100644 index 00000000000..e9e9b97c32b --- /dev/null +++ b/engine/execution/storehouse/executing_block_snapshot.go @@ -0,0 +1,76 @@ +package storehouse + +import ( + "github.com/onflow/flow-go/engine/execution" + "github.com/onflow/flow-go/fvm/storage/snapshot" + "github.com/onflow/flow-go/model/flow" +) + +var _ execution.ExtendableStorageSnapshot = (*ExecutingBlockSnapshot)(nil) + +// ExecutingBlockSnapshot is a snapshot of the storage at an executed collection. +// It starts with a storage snapshot at the end of previous block, +// The register updates at the executed collection at baseHeight + 1 are cached in +// a map, such that retrieving register values at the snapshot will first check +// the cache, and then the storage. +type ExecutingBlockSnapshot struct { + // the snapshot at the end of previous block + previous snapshot.StorageSnapshot + + commitment flow.StateCommitment + registerUpdates map[flow.RegisterID]flow.RegisterValue +} + +// create a new storage snapshot for an executed collection +// at the base block at height h - 1 +func NewExecutingBlockSnapshot( + previous snapshot.StorageSnapshot, + // the statecommitment of a block at height h + commitment flow.StateCommitment, +) *ExecutingBlockSnapshot { + return &ExecutingBlockSnapshot{ + previous: previous, + commitment: commitment, + registerUpdates: make(map[flow.RegisterID]flow.RegisterValue), + } +} + +// Get returns the register value at the snapshot. +func (s *ExecutingBlockSnapshot) Get(id flow.RegisterID) (flow.RegisterValue, error) { + // get from latest updates first + value, ok := s.getFromUpdates(id) + if ok { + return value, nil + } + + // get from BlockEndStateSnapshot at previous block + value, err := s.previous.Get(id) + return value, err +} + +func (s *ExecutingBlockSnapshot) getFromUpdates(id flow.RegisterID) (flow.RegisterValue, bool) { + value, ok := s.registerUpdates[id] + return value, ok +} + +// Extend returns a new storage snapshot at the same block but but for a different state commitment, +// which contains the given registerUpdates +// Usually it's used to create a new storage snapshot at the next executed collection. +// The registerUpdates contains the register updates at the executed collection. +func (s *ExecutingBlockSnapshot) Extend(newCommit flow.StateCommitment, updates map[flow.RegisterID]flow.RegisterValue) execution.ExtendableStorageSnapshot { + // if there is no update, we can return the original snapshot directly + // instead of wrapping it with a new ExecutingBlockSnapshot that has no update + if len(updates) == 0 { + return s + } + + return &ExecutingBlockSnapshot{ + previous: s, + commitment: newCommit, + registerUpdates: updates, + } +} + +func (s *ExecutingBlockSnapshot) Commitment() flow.StateCommitment { + return s.commitment +} diff --git a/engine/execution/storehouse/executing_block_snapshot_test.go b/engine/execution/storehouse/executing_block_snapshot_test.go new file mode 100644 index 00000000000..616430ec858 --- /dev/null +++ b/engine/execution/storehouse/executing_block_snapshot_test.go @@ -0,0 +1,92 @@ +package storehouse_test + +import ( + "testing" + + "github.com/stretchr/testify/require" + + "github.com/onflow/flow-go/engine/execution/storehouse" + "github.com/onflow/flow-go/fvm/storage/snapshot" + "github.com/onflow/flow-go/model/flow" + "github.com/onflow/flow-go/utils/unittest" +) + +func TestExtendingBlockSnapshot(t *testing.T) { + t.Run("Get register", func(t *testing.T) { + reg1 := makeReg("key1", "val1") + base := snapshot.MapStorageSnapshot{ + reg1.Key: reg1.Value, + } + baseCommit := unittest.StateCommitmentFixture() + snap := storehouse.NewExecutingBlockSnapshot(base, baseCommit) + + // should get value + value, err := snap.Get(reg1.Key) + require.NoError(t, err) + require.Equal(t, reg1.Value, value) + + // should get nil for unknown register + unknown := makeReg("unknown", "unknownV") + value, err = snap.Get(unknown.Key) + require.NoError(t, err) + require.Equal(t, []byte(nil), value) + }) + + t.Run("Extend snapshot", func(t *testing.T) { + reg1 := makeReg("key1", "val1") + reg2 := makeReg("key2", "val2") + base := snapshot.MapStorageSnapshot{ + reg1.Key: reg1.Value, + reg2.Key: reg2.Value, + } + // snap1: { key1: val1, key2: val2 } + snap1 := storehouse.NewExecutingBlockSnapshot(base, unittest.StateCommitmentFixture()) + + updatedReg2 := makeReg("key2", "val22") + reg3 := makeReg("key3", "val3") + // snap2: { key1: val1, key2: val22, key3: val3 } + snap2 := snap1.Extend(unittest.StateCommitmentFixture(), map[flow.RegisterID]flow.RegisterValue{ + updatedReg2.Key: updatedReg2.Value, + reg3.Key: reg3.Value, + }) + + // should get un-changed value + value, err := snap2.Get(reg1.Key) + require.NoError(t, err) + require.Equal(t, []byte("val1"), value) + + value, err = snap2.Get(reg2.Key) + require.NoError(t, err) + require.Equal(t, []byte("val22"), value) + + value, err = snap2.Get(reg3.Key) + require.NoError(t, err) + require.Equal(t, []byte("val3"), value) + + // should get nil for unknown register + unknown := makeReg("unknown", "unknownV") + value, err = snap2.Get(unknown.Key) + require.NoError(t, err) + require.Equal(t, []byte(nil), value) + + // create snap3 with reg3 updated + // snap3: { key1: val1, key2: val22, key3: val33 } + updatedReg3 := makeReg("key3", "val33") + snap3 := snap2.Extend(unittest.StateCommitmentFixture(), map[flow.RegisterID]flow.RegisterValue{ + updatedReg3.Key: updatedReg3.Value, + }) + + // verify all keys + value, err = snap3.Get(reg1.Key) + require.NoError(t, err) + require.Equal(t, []byte("val1"), value) + + value, err = snap3.Get(reg2.Key) + require.NoError(t, err) + require.Equal(t, []byte("val22"), value) + + value, err = snap3.Get(reg3.Key) + require.NoError(t, err) + require.Equal(t, []byte("val33"), value) + }) +} diff --git a/engine/execution/storehouse/in_memory_register_store.go b/engine/execution/storehouse/in_memory_register_store.go new file mode 100644 index 00000000000..ce7438f5a4f --- /dev/null +++ b/engine/execution/storehouse/in_memory_register_store.go @@ -0,0 +1,334 @@ +package storehouse + +import ( + "errors" + "fmt" + "sync" + + "github.com/onflow/flow-go/engine/execution" + "github.com/onflow/flow-go/model/flow" +) + +var _ execution.InMemoryRegisterStore = (*InMemoryRegisterStore)(nil) + +var ErrNotExecuted = fmt.Errorf("block is not executed") + +type PrunedError struct { + PrunedHeight uint64 + PrunedID flow.Identifier + Height uint64 +} + +func NewPrunedError(height uint64, prunedHeight uint64, prunedID flow.Identifier) error { + return PrunedError{Height: height, PrunedHeight: prunedHeight, PrunedID: prunedID} +} + +func (e PrunedError) Error() string { + return fmt.Sprintf("block is pruned at height %d", e.Height) +} + +func IsPrunedError(err error) (PrunedError, bool) { + var e PrunedError + ok := errors.As(err, &e) + if ok { + return e, true + } + return PrunedError{}, false +} + +type InMemoryRegisterStore struct { + sync.RWMutex + registersByBlockID map[flow.Identifier]map[flow.RegisterID]flow.RegisterValue // for storing the registers + parentByBlockID map[flow.Identifier]flow.Identifier // for register updates to be fork-aware + blockIDsByHeight map[uint64]map[flow.Identifier]struct{} // for pruning + prunedHeight uint64 // registers at pruned height are pruned (not saved in registersByBlockID) + prunedID flow.Identifier // to ensure all blocks are extending from pruned block (last finalized and executed block) +} + +func NewInMemoryRegisterStore(lastHeight uint64, lastID flow.Identifier) *InMemoryRegisterStore { + return &InMemoryRegisterStore{ + registersByBlockID: make(map[flow.Identifier]map[flow.RegisterID]flow.RegisterValue), + parentByBlockID: make(map[flow.Identifier]flow.Identifier), + blockIDsByHeight: make(map[uint64]map[flow.Identifier]struct{}), + prunedHeight: lastHeight, + prunedID: lastID, + } +} + +// SaveRegisters saves the registers of a block to InMemoryRegisterStore +// It needs to ensure the block is above the pruned height and is connected to the pruned block +func (s *InMemoryRegisterStore) SaveRegisters( + height uint64, + blockID flow.Identifier, + parentID flow.Identifier, + registers flow.RegisterEntries, +) error { + // preprocess data before acquiring the lock + regs := make(map[flow.RegisterID]flow.RegisterValue, len(registers)) + for _, reg := range registers { + regs[reg.Key] = reg.Value + } + + s.Lock() + defer s.Unlock() + + // ensure all saved registers are above the pruned height + if height <= s.prunedHeight { + return fmt.Errorf("saving pruned registers height %v <= pruned height %v", height, s.prunedHeight) + } + + // ensure the block is not already saved + _, ok := s.registersByBlockID[blockID] + if ok { + // already exist + return fmt.Errorf("saving registers for block %s, but it already exists", blockID) + } + + // make sure parent is a known block or the pruned block, which forms a fork + _, ok = s.registersByBlockID[parentID] + if !ok && parentID != s.prunedID { + return fmt.Errorf("saving registers for block %s, but its parent %s is not saved", blockID, parentID) + } + + // update registers for the block + s.registersByBlockID[blockID] = regs + + // update index on parent + s.parentByBlockID[blockID] = parentID + + // update index on height + sameHeight, ok := s.blockIDsByHeight[height] + if !ok { + sameHeight = make(map[flow.Identifier]struct{}) + s.blockIDsByHeight[height] = sameHeight + } + + sameHeight[blockID] = struct{}{} + return nil +} + +// GetRegister will return the latest updated value of the given register +// since the pruned height. +// It returns PrunedError if the register is unknown or not updated since the pruned height +// Can't return ErrNotFound, since we can't distinguish between not found or not updated since the pruned height +func (s *InMemoryRegisterStore) GetRegister(height uint64, blockID flow.Identifier, register flow.RegisterID) (flow.RegisterValue, error) { + s.RLock() + defer s.RUnlock() + + if height <= s.prunedHeight { + return flow.RegisterValue{}, NewPrunedError(height, s.prunedHeight, s.prunedID) + } + + _, ok := s.registersByBlockID[blockID] + if !ok { + return flow.RegisterValue{}, fmt.Errorf("cannot get register at height %d, block %v is not saved: %w", height, blockID, ErrNotExecuted) + } + + // traverse the fork to find the latest updated value of the given register + // if not found, it means the register is not updated from the pruned block to the given block + block := blockID + for { + // TODO: do not hold the read lock when reading register from the updated register map + reg, ok := s.readRegisterAtBlockID(block, register) + if ok { + return reg, nil + } + + // the register didn't get updated at this block, so check its parent + + parent, ok := s.parentByBlockID[block] + if !ok { + // if the parent doesn't exist because the block itself is the pruned block, + // then it means the register is not updated since the pruned height. + // since we can't distinguish whether the register is not updated or not exist at all, + // we just return PrunedError error along with the prunedHeight, so the + // caller could check with OnDiskRegisterStore to find if this register has a updated value + // at earlier height. + if block == s.prunedID { + return flow.RegisterValue{}, NewPrunedError(height, s.prunedHeight, s.prunedID) + } + + // in this case, it means the state of in-memory register store is inconsistent, + // because all saved block must have their parent saved in `parentByBlockID`, and traversing + // its parent should eventually reach the pruned block, otherwise it's a bug. + + return flow.RegisterValue{}, + fmt.Errorf("inconsistent parent block index in in-memory-register-store, ancient block %v is not found when getting register at block %v", + block, blockID) + } + + block = parent + } +} + +func (s *InMemoryRegisterStore) readRegisterAtBlockID(blockID flow.Identifier, register flow.RegisterID) (flow.RegisterValue, bool) { + registers, ok := s.registersByBlockID[blockID] + if !ok { + return flow.RegisterValue{}, false + } + + value, ok := registers[register] + return value, ok +} + +// GetUpdatedRegisters returns the updated registers of a block +func (s *InMemoryRegisterStore) GetUpdatedRegisters(height uint64, blockID flow.Identifier) (flow.RegisterEntries, error) { + registerUpdates, err := s.getUpdatedRegisters(height, blockID) + if err != nil { + return nil, err + } + + // since the registerUpdates won't be updated and registers for a block can only be set once, + // we don't need to hold the lock when converting it from map into slice. + registers := make(flow.RegisterEntries, 0, len(registerUpdates)) + for regID, reg := range registerUpdates { + registers = append(registers, flow.RegisterEntry{ + Key: regID, + Value: reg, + }) + } + + return registers, nil +} + +func (s *InMemoryRegisterStore) getUpdatedRegisters(height uint64, blockID flow.Identifier) (map[flow.RegisterID]flow.RegisterValue, error) { + s.RLock() + defer s.RUnlock() + if height <= s.prunedHeight { + return nil, fmt.Errorf("cannot get register at height %d, it is pruned %v", height, s.prunedHeight) + } + + registerUpdates, ok := s.registersByBlockID[blockID] + if !ok { + return nil, fmt.Errorf("cannot get register at height %d, block %s is not found: %w", height, blockID, ErrNotExecuted) + } + return registerUpdates, nil +} + +// Prune prunes the register store to the given height +// The pruned height must be an executed block, the caller should ensure that by calling SaveRegisters before. +// +// Pruning is done by walking up the finalized fork from `s.prunedHeight` to `height`. At each height, prune all +// other forks that begin at that height. This ensures that data for all conflicting forks are freed +// +// TODO: It does not block the caller, the pruning work is done async +func (s *InMemoryRegisterStore) Prune(height uint64, blockID flow.Identifier) error { + finalizedFork, err := s.findFinalizedFork(height, blockID) + if err != nil { + return fmt.Errorf("cannot find finalized fork: %w", err) + } + + s.Lock() + defer s.Unlock() + + // prune each height starting at the lowest height in the fork. this will remove all blocks + // below the new pruned height along with any conflicting forks. + for i := len(finalizedFork) - 1; i >= 0; i-- { + blockID := finalizedFork[i] + + err := s.pruneByHeight(s.prunedHeight+1, blockID) + if err != nil { + return fmt.Errorf("could not prune by height %v: %w", s.prunedHeight+1, err) + } + } + + return nil +} + +func (s *InMemoryRegisterStore) PrunedHeight() uint64 { + s.RLock() + defer s.RUnlock() + return s.prunedHeight +} + +func (s *InMemoryRegisterStore) IsBlockExecuted(height uint64, blockID flow.Identifier) (bool, error) { + s.RLock() + defer s.RUnlock() + + // finalized and executed blocks are pruned + if height <= s.prunedHeight { + return false, fmt.Errorf("below pruned height") + } + + _, ok := s.registersByBlockID[blockID] + return ok, nil +} + +// findFinalizedFork returns the finalized fork from higher height to lower height +// the last block's height is s.prunedHeight + 1 +func (s *InMemoryRegisterStore) findFinalizedFork(height uint64, blockID flow.Identifier) ([]flow.Identifier, error) { + s.RLock() + defer s.RUnlock() + + if height <= s.prunedHeight { + return nil, fmt.Errorf("cannot find finalized fork at height %d, it is pruned (prunedHeight: %v)", height, s.prunedHeight) + } + prunedHeight := height + block := blockID + + // walk backwards from the provided finalized block to the last pruned block + // the result must be a chain from height/blockID to s.prunedHeight/s.prunedID + fork := make([]flow.Identifier, 0, height-s.prunedHeight) + for { + fork = append(fork, block) + prunedHeight-- + + parent, ok := s.parentByBlockID[block] + if !ok { + return nil, fmt.Errorf("inconsistent parent block index in in-memory-register-store, ancient block %s is not found when finding finalized fork at height %v", block, height) + } + if parent == s.prunedID { + break + } + block = parent + } + + if prunedHeight != s.prunedHeight { + return nil, fmt.Errorf("inconsistent parent block index in in-memory-register-store, pruned height %d is not equal to %d", prunedHeight, s.prunedHeight) + } + + return fork, nil +} + +func (s *InMemoryRegisterStore) pruneByHeight(height uint64, finalized flow.Identifier) error { + s.removeBlock(height, finalized) + + // remove conflicting forks + for blockID := range s.blockIDsByHeight[height] { + s.pruneFork(height, blockID) + } + + if len(s.blockIDsByHeight[height]) > 0 { + return fmt.Errorf("all forks on the same height should have been pruend, but actually not: %v", len(s.blockIDsByHeight[height])) + } + + delete(s.blockIDsByHeight, height) + s.prunedHeight = height + s.prunedID = finalized + return nil +} + +func (s *InMemoryRegisterStore) removeBlock(height uint64, blockID flow.Identifier) { + delete(s.registersByBlockID, blockID) + delete(s.parentByBlockID, blockID) + delete(s.blockIDsByHeight[height], blockID) +} + +// pruneFork prunes the provided block and all of its children +func (s *InMemoryRegisterStore) pruneFork(height uint64, blockID flow.Identifier) { + s.removeBlock(height, blockID) + // all its children must be at height + 1, whose parent is blockID + + nextHeight := height + 1 + blocksAtNextHeight, ok := s.blockIDsByHeight[nextHeight] + if !ok { + return + } + + for block := range blocksAtNextHeight { + isChild := s.parentByBlockID[block] == blockID + if isChild { + s.pruneFork(nextHeight, block) + } + } +} diff --git a/engine/execution/storehouse/in_memory_register_store_test.go b/engine/execution/storehouse/in_memory_register_store_test.go new file mode 100644 index 00000000000..b9ad40ba3e7 --- /dev/null +++ b/engine/execution/storehouse/in_memory_register_store_test.go @@ -0,0 +1,627 @@ +package storehouse + +import ( + "fmt" + "math/rand" + "sync" + "testing" + + "github.com/stretchr/testify/require" + + "github.com/onflow/flow-go/model/flow" + "github.com/onflow/flow-go/utils/unittest" +) + +// 1. SaveRegisters should fail if height is below or equal to pruned height +func TestInMemoryRegisterStore(t *testing.T) { + t.Run("FailBelowOrEqualPrunedHeight", func(t *testing.T) { + t.Parallel() + // 1. + pruned := uint64(10) + lastID := unittest.IdentifierFixture() + store := NewInMemoryRegisterStore(pruned, lastID) + err := store.SaveRegisters( + pruned-1, // below pruned pruned, will fail + unittest.IdentifierFixture(), + unittest.IdentifierFixture(), + flow.RegisterEntries{}, + ) + require.Error(t, err) + require.Contains(t, err.Error(), "<= pruned height") + + err = store.SaveRegisters( + pruned, // equal to pruned height, will fail + lastID, + unittest.IdentifierFixture(), + flow.RegisterEntries{}, + ) + require.Error(t, err) + require.Contains(t, err.Error(), "<= pruned height") + }) + + // 2. SaveRegisters should fail if its parent block doesn't exist and it is not the pruned block + // SaveRegisters should succeed if height is above pruned height and block is not saved, + // the updates can be retrieved by GetUpdatedRegisters + // GetRegister should return PrunedError if the queried key is not updated since pruned height + // GetRegister should return PrunedError if the queried height is below pruned height + // GetRegister should return ErrNotExecuted if the block is unknown + t.Run("FailParentNotExist", func(t *testing.T) { + t.Parallel() + pruned := uint64(10) + lastID := unittest.IdentifierFixture() + store := NewInMemoryRegisterStore(pruned, lastID) + + height := pruned + 1 // above the pruned pruned + blockID := unittest.IdentifierFixture() + notExistParent := unittest.IdentifierFixture() + reg := unittest.RegisterEntryFixture() + err := store.SaveRegisters( + height, + blockID, + notExistParent, // should fail because parent doesn't exist + flow.RegisterEntries{reg}, + ) + require.Error(t, err) + require.Contains(t, err.Error(), "but its parent") + }) + + t.Run("StoreOK", func(t *testing.T) { + t.Parallel() + // 3. + pruned := uint64(10) + lastID := unittest.IdentifierFixture() + store := NewInMemoryRegisterStore(pruned, lastID) + + height := pruned + 1 // above the pruned pruned + blockID := unittest.IdentifierFixture() + reg := unittest.RegisterEntryFixture() + err := store.SaveRegisters( + height, + blockID, + lastID, + flow.RegisterEntries{reg}, + ) + require.NoError(t, err) + + val, err := store.GetRegister(height, blockID, reg.Key) + require.NoError(t, err) + require.Equal(t, reg.Value, val) + + // unknown key + _, err = store.GetRegister(height, blockID, unknownKey) + require.Error(t, err) + pe, ok := IsPrunedError(err) + require.True(t, ok) + require.Equal(t, pe.PrunedHeight, pruned) + require.Equal(t, pe.Height, height) + + // unknown block with unknown height + _, err = store.GetRegister(height+1, unknownBlock, reg.Key) + require.Error(t, err) + require.ErrorIs(t, err, ErrNotExecuted) + + // unknown block with known height + _, err = store.GetRegister(height, unknownBlock, reg.Key) + require.Error(t, err) + require.ErrorIs(t, err, ErrNotExecuted) + + // too low height + _, err = store.GetRegister(height-1, unknownBlock, reg.Key) + require.Error(t, err) + pe, ok = IsPrunedError(err) + require.True(t, ok) + require.Equal(t, pe.PrunedHeight, pruned) + require.Equal(t, pe.Height, height-1) + }) + + // 3. SaveRegisters should fail if the block is already saved + t.Run("StoreFailAlreadyExist", func(t *testing.T) { + t.Parallel() + pruned := uint64(10) + lastID := unittest.IdentifierFixture() + store := NewInMemoryRegisterStore(pruned, lastID) + + height := pruned + 1 // above the pruned pruned + blockID := unittest.IdentifierFixture() + reg := unittest.RegisterEntryFixture() + err := store.SaveRegisters( + height, + blockID, + lastID, + flow.RegisterEntries{reg}, + ) + require.NoError(t, err) + + // saving again should fail + err = store.SaveRegisters( + height, + blockID, + lastID, + flow.RegisterEntries{reg}, + ) + require.Error(t, err) + require.Contains(t, err.Error(), "already exists") + }) + + // 4. SaveRegisters should succeed if a different block at the same height was saved before, + // updates for different blocks can be retrieved by their blockID + t.Run("StoreOKDifferentBlockSameParent", func(t *testing.T) { + t.Parallel() + pruned := uint64(10) + lastID := unittest.IdentifierFixture() + store := NewInMemoryRegisterStore(pruned, lastID) + + // 10 <- A + // ^- B + height := pruned + 1 // above the pruned pruned + blockA := unittest.IdentifierFixture() + regA := unittest.RegisterEntryFixture() + err := store.SaveRegisters( + height, + blockA, + lastID, + flow.RegisterEntries{regA}, + ) + require.NoError(t, err) + + blockB := unittest.IdentifierFixture() + regB := unittest.RegisterEntryFixture() + err = store.SaveRegisters( + height, + blockB, // different block + lastID, // same parent + flow.RegisterEntries{regB}, + ) + require.NoError(t, err) + + valA, err := store.GetRegister(height, blockA, regA.Key) + require.NoError(t, err) + require.Equal(t, regA.Value, valA) + + valB, err := store.GetRegister(height, blockB, regB.Key) + require.NoError(t, err) + require.Equal(t, regB.Value, valB) + }) + + // 5. Given A(X: 1, Y: 2), GetRegister(A, X) should return 1, GetRegister(A, X) should return 2 + t.Run("GetRegistersOK", func(t *testing.T) { + t.Parallel() + pruned := uint64(10) + lastID := unittest.IdentifierFixture() + store := NewInMemoryRegisterStore(pruned, lastID) + + // 10 <- A (X: 1, Y: 2) + height := pruned + 1 // above the pruned pruned + blockA := unittest.IdentifierFixture() + regX := makeReg("X", "1") + regY := makeReg("Y", "2") + err := store.SaveRegisters( + height, + blockA, + lastID, + flow.RegisterEntries{regX, regY}, + ) + require.NoError(t, err) + + valX, err := store.GetRegister(height, blockA, regX.Key) + require.NoError(t, err) + require.Equal(t, regX.Value, valX) + + valY, err := store.GetRegister(height, blockA, regY.Key) + require.NoError(t, err) + require.Equal(t, regY.Value, valY) + }) + + // 6. Given A(X: 1, Y: 2) <- B(Y: 3), + // GetRegister(B, X) should return 1, because X is not updated in B + // GetRegister(B, Y) should return 3, because Y is updated in B + // GetRegister(A, Y) should return 2, because the query queries the value at A, not B + // GetRegister(B, Z) should return PrunedError, because register is unknown + // GetRegister(C, X) should return BlockNotExecuted, because block is not executed (unexecuted) + t.Run("GetLatestValueOK", func(t *testing.T) { + t.Parallel() + pruned := uint64(10) + lastID := unittest.IdentifierFixture() + store := NewInMemoryRegisterStore(pruned, lastID) + + // 10 <- A (X: 1, Y: 2) <- B (Y: 3) + blockA := unittest.IdentifierFixture() + regX := makeReg("X", "1") + regY := makeReg("Y", "2") + err := store.SaveRegisters( + pruned+1, + blockA, + lastID, + flow.RegisterEntries{regX, regY}, + ) + require.NoError(t, err) + + blockB := unittest.IdentifierFixture() + regY3 := makeReg("Y", "3") + err = store.SaveRegisters( + pruned+2, + blockB, + blockA, + flow.RegisterEntries{regY3}, + ) + require.NoError(t, err) + + val, err := store.GetRegister(pruned+2, blockB, regX.Key) + require.NoError(t, err) + require.Equal(t, regX.Value, val) // X is not updated in B + + val, err = store.GetRegister(pruned+2, blockB, regY.Key) + require.NoError(t, err) + require.Equal(t, regY3.Value, val) // Y is updated in B + + val, err = store.GetRegister(pruned+1, blockA, regY.Key) + require.NoError(t, err) + require.Equal(t, regY.Value, val) // Y's old value at A + + _, err = store.GetRegister(pruned+2, blockB, unknownKey) + require.Error(t, err) + pe, ok := IsPrunedError(err) + require.True(t, ok) + require.Equal(t, pe.PrunedHeight, pruned) + require.Equal(t, pe.Height, pruned+2) + + _, err = store.GetRegister(pruned+3, unittest.IdentifierFixture(), regX.Key) + require.Error(t, err) + require.ErrorIs(t, err, ErrNotExecuted) // unknown block + }) + + // 7. Given the following tree: + // Pruned <- A(X:1) <- B(Y:2) + // .......^- C(X:3) <- D(Y:4) + // GetRegister(D, X) should return 3 + t.Run("StoreMultiForkOK", func(t *testing.T) { + t.Parallel() + pruned := uint64(10) + lastID := unittest.IdentifierFixture() + store := NewInMemoryRegisterStore(pruned, lastID) + + // 10 <- A (X: 1) <- B (Y: 2) + // ^- C (X: 3) <- D (Y: 4) + blockA := unittest.IdentifierFixture() + blockB := unittest.IdentifierFixture() + blockC := unittest.IdentifierFixture() + blockD := unittest.IdentifierFixture() + + require.NoError(t, store.SaveRegisters( + pruned+1, + blockA, + lastID, + flow.RegisterEntries{makeReg("X", "1")}, + )) + + require.NoError(t, store.SaveRegisters( + pruned+2, + blockB, + blockA, + flow.RegisterEntries{makeReg("Y", "2")}, + )) + + require.NoError(t, store.SaveRegisters( + pruned+1, + blockC, + lastID, + flow.RegisterEntries{makeReg("X", "3")}, + )) + + require.NoError(t, store.SaveRegisters( + pruned+2, + blockD, + blockC, + flow.RegisterEntries{makeReg("Y", "4")}, + )) + + reg := makeReg("X", "3") + val, err := store.GetRegister(pruned+2, blockD, reg.Key) + require.NoError(t, err) + require.Equal(t, reg.Value, val) + }) + + // 8. Given the following tree: + // Pruned <- A(X:1) <- B(Y:2), B is not executed + // GetUpdatedRegisters(B) should return ErrNotExecuted + t.Run("GetUpdatedRegisters", func(t *testing.T) { + t.Parallel() + pruned := uint64(10) + lastID := unittest.IdentifierFixture() + store := NewInMemoryRegisterStore(pruned, lastID) + + // 10 <- A (X: 1) <- B (Y: 2) + blockA := unittest.IdentifierFixture() + blockB := unittest.IdentifierFixture() + + require.NoError(t, store.SaveRegisters( + pruned+1, + blockA, + lastID, + flow.RegisterEntries{makeReg("X", "1")}, + )) + + reg, err := store.GetUpdatedRegisters(pruned+1, blockA) + require.NoError(t, err) + require.Equal(t, flow.RegisterEntries{makeReg("X", "1")}, reg) + + _, err = store.GetUpdatedRegisters(pruned+2, blockB) + require.Error(t, err) + require.ErrorIs(t, err, ErrNotExecuted) + }) + + // 9. Prune should fail if the block is unknown + // Prune should succeed if the block is known, and GetUpdatedRegisters should return err + // Prune should prune up to the pruned height. + // Given Pruned <- A(X:1) <- B(X:2) <- C(X:3) <- D(X:4) + // after Prune(B), GetRegister(C, X) should return 3, GetRegister(B, X) should return err + t.Run("StorePrune", func(t *testing.T) { + t.Parallel() + pruned := uint64(10) + lastID := unittest.IdentifierFixture() + store := NewInMemoryRegisterStore(pruned, lastID) + + blockA := unittest.IdentifierFixture() + blockB := unittest.IdentifierFixture() + blockC := unittest.IdentifierFixture() + blockD := unittest.IdentifierFixture() + + require.NoError(t, store.SaveRegisters( + pruned+1, + blockA, + lastID, + flow.RegisterEntries{makeReg("X", "1")}, + )) + + require.NoError(t, store.SaveRegisters( + pruned+2, + blockB, + blockA, + flow.RegisterEntries{makeReg("X", "2")}, + )) + + require.NoError(t, store.SaveRegisters( + pruned+3, + blockC, + blockB, + flow.RegisterEntries{makeReg("X", "3")}, + )) + + require.NoError(t, store.SaveRegisters( + pruned+4, + blockD, + blockC, + flow.RegisterEntries{makeReg("X", "4")}, + )) + + err := store.Prune(pruned+1, unknownBlock) // block is unknown + require.Error(t, err) + + err = store.Prune(pruned+1, blockB) // block is known, but height is wrong + require.Error(t, err) + + err = store.Prune(pruned+4, unknownBlock) // height is unknown + require.Error(t, err) + + err = store.Prune(pruned+1, blockA) // prune next block + require.NoError(t, err) + + require.Equal(t, pruned+1, store.PrunedHeight()) + + reg := makeReg("X", "3") + val, err := store.GetRegister(pruned+3, blockC, reg.Key) + require.NoError(t, err) + require.Equal(t, reg.Value, val) + + _, err = store.GetRegister(pruned+1, blockA, reg.Key) // A is pruned + require.Error(t, err) + pe, ok := IsPrunedError(err) + require.True(t, ok) + require.Equal(t, pe.PrunedHeight, pruned+1) + require.Equal(t, pe.Height, pruned+1) + + err = store.Prune(pruned+3, blockC) // prune both B and C + require.NoError(t, err) + + require.Equal(t, pruned+3, store.PrunedHeight()) + + reg = makeReg("X", "4") + val, err = store.GetRegister(pruned+4, blockD, reg.Key) // can still get X at block D + require.NoError(t, err) + require.Equal(t, reg.Value, val) + }) + + // 10. Prune should prune conflicting forks + // Given Pruned <- A(X:1) <- B(X:2) + // .................. ^----- E(X:5) + // ............ ^- C(X:3) <- D(X:4) + // Prune(A) should prune C and D, and GetUpdatedRegisters(C) should return out of range error, + // GetUpdatedRegisters(D) should return NotFound + t.Run("PruneConflictingForks", func(t *testing.T) { + t.Parallel() + pruned := uint64(10) + lastID := unittest.IdentifierFixture() + store := NewInMemoryRegisterStore(pruned, lastID) + + blockA := unittest.IdentifierFixture() + blockB := unittest.IdentifierFixture() + blockC := unittest.IdentifierFixture() + blockD := unittest.IdentifierFixture() + blockE := unittest.IdentifierFixture() + + require.NoError(t, store.SaveRegisters( + pruned+1, + blockA, + lastID, + flow.RegisterEntries{makeReg("X", "1")}, + )) + + require.NoError(t, store.SaveRegisters( + pruned+2, + blockB, + blockA, + flow.RegisterEntries{makeReg("X", "2")}, + )) + + require.NoError(t, store.SaveRegisters( + pruned+1, + blockC, + lastID, + flow.RegisterEntries{makeReg("X", "3")}, + )) + + require.NoError(t, store.SaveRegisters( + pruned+2, + blockD, + blockC, + flow.RegisterEntries{makeReg("X", "4")}, + )) + + require.NoError(t, store.SaveRegisters( + pruned+2, + blockE, + blockA, + flow.RegisterEntries{makeReg("X", "5")}, + )) + + err := store.Prune(pruned+1, blockA) // prune A should prune C and D + require.NoError(t, err) + + _, err = store.GetUpdatedRegisters(pruned+2, blockD) + require.Error(t, err) + require.Contains(t, err.Error(), "not found") + + _, err = store.GetUpdatedRegisters(pruned+2, blockE) + require.NoError(t, err) + }) + + // 11. Concurrency: SaveRegisters can happen concurrently with GetUpdatedRegisters, and GetRegister + t.Run("ConcurrentSaveAndGet", func(t *testing.T) { + t.Parallel() + pruned := uint64(10) + lastID := unittest.IdentifierFixture() + store := NewInMemoryRegisterStore(pruned, lastID) + + // prepare a chain of 101 blocks with the first as lastID + count := 100 + blocks := make(map[uint64]flow.Identifier, count) + blocks[pruned] = lastID + for i := 1; i < count; i++ { + block := unittest.IdentifierFixture() + blocks[pruned+uint64(i)] = block + } + + reg := makeReg("X", "0") + + var wg sync.WaitGroup + for i := 1; i < count; i++ { + height := pruned + uint64(i) + require.NoError(t, store.SaveRegisters( + height, + blocks[height], + blocks[height-1], + flow.RegisterEntries{makeReg("X", fmt.Sprintf("%v", height))}, + )) + + // concurrently query get registers for past registers + wg.Add(1) + go func(i int) { + defer wg.Done() + + rdHeight := randBetween(pruned+1, pruned+uint64(i)+1) + val, err := store.GetRegister(rdHeight, blocks[rdHeight], reg.Key) + require.NoError(t, err) + r := makeReg("X", fmt.Sprintf("%v", rdHeight)) + require.Equal(t, r.Value, val) + }(i) + + // concurrently query updated registers + wg.Add(1) + go func(i int) { + defer wg.Done() + + rdHeight := randBetween(pruned+1, pruned+uint64(i)+1) + vals, err := store.GetUpdatedRegisters(rdHeight, blocks[rdHeight]) + require.NoError(t, err) + r := makeReg("X", fmt.Sprintf("%v", rdHeight)) + require.Equal(t, flow.RegisterEntries{r}, vals) + }(i) + } + + wg.Wait() + }) + + // 12. Concurrency: Prune can happen concurrently with GetUpdatedRegisters, and GetRegister + t.Run("ConcurrentSaveAndPrune", func(t *testing.T) { + t.Parallel() + pruned := uint64(10) + lastID := unittest.IdentifierFixture() + store := NewInMemoryRegisterStore(pruned, lastID) + + // prepare a chain of 101 blocks with the first as lastID + count := 100 + blocks := make(map[uint64]flow.Identifier, count) + blocks[pruned] = lastID + for i := 1; i < count; i++ { + block := unittest.IdentifierFixture() + blocks[pruned+uint64(i)] = block + } + + var wg sync.WaitGroup + savedHeights := make(chan uint64, 100) + + wg.Add(1) + go func() { + defer wg.Done() + + lastPrunedHeight := pruned + for savedHeight := range savedHeights { + if savedHeight%10 != 0 { + continue + } + rdHeight := randBetween(lastPrunedHeight+1, savedHeight+1) + err := store.Prune(rdHeight, blocks[rdHeight]) + require.NoError(t, err) + lastPrunedHeight = rdHeight + } + }() + + // save 100 blocks + for i := 1; i < count; i++ { + height := pruned + uint64(i) + require.NoError(t, store.SaveRegisters( + height, + blocks[height], + blocks[height-1], + flow.RegisterEntries{makeReg("X", fmt.Sprintf("%v", i))}, + )) + savedHeights <- height + } + + close(savedHeights) + + wg.Wait() + }) + + t.Run("PrunedError", func(t *testing.T) { + e := NewPrunedError(1, 2, unittest.IdentifierFixture()) + pe, ok := IsPrunedError(e) + require.True(t, ok) + require.Equal(t, uint64(1), pe.Height) + require.Equal(t, uint64(2), pe.PrunedHeight) + }) +} + +func randBetween(min, max uint64) uint64 { + return uint64(rand.Intn(int(max)-int(min))) + min +} + +func makeReg(key string, value string) flow.RegisterEntry { + return unittest.MakeOwnerReg(key, value) +} + +var unknownBlock = unittest.IdentifierFixture() +var unknownKey = flow.RegisterID{ + Owner: "unknown", + Key: "unknown", +} diff --git a/engine/execution/storehouse/register_engine.go b/engine/execution/storehouse/register_engine.go new file mode 100644 index 00000000000..d34e28637e5 --- /dev/null +++ b/engine/execution/storehouse/register_engine.go @@ -0,0 +1,57 @@ +package storehouse + +import ( + "fmt" + + "github.com/onflow/flow-go/consensus/hotstuff/model" + "github.com/onflow/flow-go/engine" + "github.com/onflow/flow-go/module/component" + "github.com/onflow/flow-go/module/irrecoverable" +) + +// RegisterEngine is a wrapper for RegisterStore in order to make Block Finalization process +// non-blocking. +type RegisterEngine struct { + *component.ComponentManager + store *RegisterStore + finalizationNotifier engine.Notifier +} + +func NewRegisterEngine(store *RegisterStore) *RegisterEngine { + e := &RegisterEngine{ + store: store, + finalizationNotifier: engine.NewNotifier(), + } + + // Add workers + e.ComponentManager = component.NewComponentManagerBuilder(). + AddWorker(e.finalizationProcessingLoop). + Build() + return e +} + +// OnBlockFinalized will create a single goroutine to notify register store +// when a block is finalized. +// This call is non-blocking in order to avoid blocking the consensus +func (e *RegisterEngine) OnBlockFinalized(*model.Block) { + e.finalizationNotifier.Notify() +} + +// finalizationProcessingLoop notify the register store when a block is finalized +// and handle the error if any +func (e *RegisterEngine) finalizationProcessingLoop(ctx irrecoverable.SignalerContext, ready component.ReadyFunc) { + ready() + notifier := e.finalizationNotifier.Channel() + + for { + select { + case <-ctx.Done(): + return + case <-notifier: + err := e.store.OnBlockFinalized() + if err != nil { + ctx.Throw(fmt.Errorf("could not process finalized block: %w", err)) + } + } + } +} diff --git a/engine/execution/storehouse/register_store.go b/engine/execution/storehouse/register_store.go new file mode 100644 index 00000000000..e7997876fb9 --- /dev/null +++ b/engine/execution/storehouse/register_store.go @@ -0,0 +1,275 @@ +package storehouse + +import ( + "errors" + "fmt" + + "go.uber.org/atomic" + + "github.com/rs/zerolog" + + "github.com/onflow/flow-go/engine/execution" + "github.com/onflow/flow-go/model/flow" + "github.com/onflow/flow-go/storage" +) + +type RegisterStore struct { + memStore *InMemoryRegisterStore + diskStore execution.OnDiskRegisterStore + wal execution.ExecutedFinalizedWAL + finalized execution.FinalizedReader + log zerolog.Logger + finalizing *atomic.Bool // making sure only one goroutine is finalizing at a time +} + +var _ execution.RegisterStore = (*RegisterStore)(nil) + +func NewRegisterStore( + diskStore execution.OnDiskRegisterStore, + wal execution.ExecutedFinalizedWAL, + finalized execution.FinalizedReader, + log zerolog.Logger, +) (*RegisterStore, error) { + // replay the executed and finalized blocks from the write ahead logs + // to the OnDiskRegisterStore + height, err := syncDiskStore(wal, diskStore, log) + if err != nil { + return nil, fmt.Errorf("cannot sync disk store: %w", err) + } + + // fetch the last executed and finalized block ID + finalizedID, err := finalized.FinalizedBlockIDAtHeight(height) + if err != nil { + return nil, fmt.Errorf("cannot get finalized block ID at height %d: %w", height, err) + } + + // init the memStore with the last executed and finalized block ID + memStore := NewInMemoryRegisterStore(height, finalizedID) + + return &RegisterStore{ + memStore: memStore, + diskStore: diskStore, + wal: wal, + finalized: finalized, + finalizing: atomic.NewBool(false), + log: log.With().Str("module", "register-store").Logger(), + }, nil +} + +// GetRegister first try to get the register from InMemoryRegisterStore, then OnDiskRegisterStore +// 1. below pruned height, and is conflicting +// 2. below pruned height, and is finalized +// 3. above pruned height, and is not executed +// 4. above pruned height, and is executed, and register is updated +// 5. above pruned height, and is executed, but register is not updated since pruned height +// It returns: +// - (value, nil) if the register value is found at the given block +// - (nil, nil) if the register is not found +// - (nil, storage.ErrHeightNotIndexed) if the height is below the first height that is indexed. +// - (nil, storehouse.ErrNotExecuted) if the block is not executed yet +// - (nil, storehouse.ErrNotExecuted) if the block is conflicting iwth finalized block +// - (nil, err) for any other exceptions +func (r *RegisterStore) GetRegister(height uint64, blockID flow.Identifier, register flow.RegisterID) (flow.RegisterValue, error) { + reg, err := r.memStore.GetRegister(height, blockID, register) + // the height might be lower than the lowest height in memStore, + // or the register might not be found in memStore. + if err == nil { + // this register was updated before its block is finalized + return reg, nil + } + + prunedError, ok := IsPrunedError(err) + if !ok { + // this means we ran into an exception. finding a register from in-memory store should either + // getting the register value or getting a ErrPruned error. + return flow.RegisterValue{}, fmt.Errorf("cannot get register from memStore: %w", err) + } + + // if in memory store returns PrunedError, and register height is above the pruned height, + // then it means the block is connected to the pruned block of in memory store, which is + // a finalized block and executed block, so we can get its value from on disk store. + if height > prunedError.PrunedHeight { + return r.getAndConvertNotFoundErr(register, prunedError.PrunedHeight) + } + + // if the block is below or equal to the pruned height, then there are two cases: + // the block is a finalized block, or a conflicting block. + // In order to distinguish, we need to query the finalized block ID at that height + + var finalizedID flow.Identifier + if height == prunedError.PrunedHeight { + // if the block is at the pruned height, then the finalized ID is the pruned ID from in memory store, + // this saves a DB query + finalizedID = prunedError.PrunedID + } else { + // if the block is below the pruned height, we query the finalized ID from the finalized reader + finalizedID, err = r.finalized.FinalizedBlockIDAtHeight(height) + if err != nil { + return nil, fmt.Errorf("cannot get finalized block ID at height %d: %w", height, err) + } + } + + isConflictingBlock := blockID != finalizedID + if isConflictingBlock { + // conflicting blocks are considered as un-executed + return flow.RegisterValue{}, fmt.Errorf("getting registers from conflicting block %v at height %v: %w", blockID, height, ErrNotExecuted) + } + return r.getAndConvertNotFoundErr(register, height) +} + +// getAndConvertNotFoundErr returns nil if the register is not found from storage +func (r *RegisterStore) getAndConvertNotFoundErr(register flow.RegisterID, height uint64) (flow.RegisterValue, error) { + val, err := r.diskStore.Get(register, height) + if errors.Is(err, storage.ErrNotFound) { + // FVM expects the error to be nil when register is not found + return nil, nil + } + return val, err +} + +// SaveRegisters saves to InMemoryRegisterStore first, then trigger the same check as OnBlockFinalized +// Depend on InMemoryRegisterStore.SaveRegisters +// It returns: +// - nil if the registers are saved successfully +// - exception is the block is above the pruned height but does not connect to the pruned height (conflicting block). +// - exception if the block is below the pruned height +// - exception if the save block is saved again +// - exception for any other exception +func (r *RegisterStore) SaveRegisters(header *flow.Header, registers flow.RegisterEntries) error { + err := r.memStore.SaveRegisters(header.Height, header.ID(), header.ParentID, registers) + if err != nil { + return fmt.Errorf("cannot save register to memStore: %w", err) + } + + err = r.OnBlockFinalized() + if err != nil { + return fmt.Errorf("cannot trigger OnBlockFinalized: %w", err) + } + return nil +} + +// Depend on FinalizedReader's FinalizedBlockIDAtHeight +// Depend on ExecutedFinalizedWAL.Append +// Depend on OnDiskRegisterStore.SaveRegisters +// OnBlockFinalized trigger the check of whether a block at the next height becomes finalized and executed. +// the next height is the existing finalized and executed block's height + 1. +// If a block at next height becomes finalized and executed, then: +// 1. write the registers to write ahead logs +// 2. save the registers of the block to OnDiskRegisterStore +// 3. prune the height in InMemoryRegisterStore +func (r *RegisterStore) OnBlockFinalized() error { + // only one goroutine can execute OnBlockFinalized at a time + if !r.finalizing.CompareAndSwap(false, true) { + return nil + } + + defer r.finalizing.Store(false) + return r.onBlockFinalized() +} + +func (r *RegisterStore) onBlockFinalized() error { + latest := r.diskStore.LatestHeight() + next := latest + 1 + blockID, err := r.finalized.FinalizedBlockIDAtHeight(next) + if errors.Is(err, storage.ErrNotFound) { + // next block is not finalized yet + return nil + } + + regs, err := r.memStore.GetUpdatedRegisters(next, blockID) + if errors.Is(err, ErrNotExecuted) { + // next block is not executed yet + return nil + } + + // TODO: append WAL + // err = r.wal.Append(next, regs) + // if err != nil { + // return fmt.Errorf("cannot write %v registers to write ahead logs for height %v: %w", len(regs), next, err) + // } + + err = r.diskStore.Store(regs, next) + if err != nil { + return fmt.Errorf("cannot save %v registers to disk store for height %v: %w", len(regs), next, err) + } + + err = r.memStore.Prune(next, blockID) + if err != nil { + return fmt.Errorf("cannot prune memStore for height %v: %w", next, err) + } + + return r.onBlockFinalized() // check again until there is no more finalized block +} + +// LastFinalizedAndExecutedHeight returns the height of the last finalized and executed block, +// which has been saved in OnDiskRegisterStore +func (r *RegisterStore) LastFinalizedAndExecutedHeight() uint64 { + // diskStore caches the latest height in memory + return r.diskStore.LatestHeight() +} + +// IsBlockExecuted returns true if the block is executed, false if not executed +// Note: it returns (true, nil) even if the block has been pruned from on disk register store, +func (r *RegisterStore) IsBlockExecuted(height uint64, blockID flow.Identifier) (bool, error) { + executed, err := r.memStore.IsBlockExecuted(height, blockID) + if err != nil { + // the only error memStore would return is when the given height is lower than the pruned height in memStore. + // Since the pruned height in memStore is a finalized and executed height, in order to know if the block + // is executed, we just need to check if this block is the finalized blcok at the given height. + executed, err = r.isBlockFinalized(height, blockID) + return executed, err + } + + return executed, nil +} + +func (r *RegisterStore) isBlockFinalized(height uint64, blockID flow.Identifier) (bool, error) { + finalizedID, err := r.finalized.FinalizedBlockIDAtHeight(height) + if err != nil { + return false, fmt.Errorf("cannot get finalized block ID at height %d: %w", height, err) + } + return finalizedID == blockID, nil +} + +// syncDiskStore replay WAL to disk store +func syncDiskStore( + wal execution.ExecutedFinalizedWAL, + diskStore execution.OnDiskRegisterStore, + log zerolog.Logger, +) (uint64, error) { + // TODO: replace diskStore.Latest with wal.Latest + // latest, err := r.wal.Latest() + var err error + latest := diskStore.LatestHeight() // tmp + if err != nil { + return 0, fmt.Errorf("cannot get latest height from write ahead logs: %w", err) + } + + stored := diskStore.LatestHeight() + + if stored > latest { + return 0, fmt.Errorf("latest height in storehouse %v is larger than latest height %v in write ahead logs", stored, latest) + } + + if stored < latest { + // replay + reader := wal.GetReader(stored + 1) + for { + height, registers, err := reader.Next() + // TODO: to rename + if errors.Is(err, storage.ErrNotFound) { + break + } + if err != nil { + return 0, fmt.Errorf("cannot read registers from write ahead logs: %w", err) + } + + err = diskStore.Store(registers, height) + if err != nil { + return 0, fmt.Errorf("cannot save registers to disk store at height %v : %w", height, err) + } + } + } + + return latest, nil +} diff --git a/engine/execution/storehouse/register_store_test.go b/engine/execution/storehouse/register_store_test.go new file mode 100644 index 00000000000..2e638ba2cbb --- /dev/null +++ b/engine/execution/storehouse/register_store_test.go @@ -0,0 +1,550 @@ +package storehouse_test + +import ( + "fmt" + "sync" + "testing" + + "github.com/stretchr/testify/require" + + "github.com/onflow/flow-go/engine/execution" + "github.com/onflow/flow-go/engine/execution/storehouse" + "github.com/onflow/flow-go/model/flow" + "github.com/onflow/flow-go/storage/pebble" + "github.com/onflow/flow-go/utils/unittest" +) + +func withRegisterStore(t *testing.T, fn func( + t *testing.T, + rs *storehouse.RegisterStore, + diskStore execution.OnDiskRegisterStore, + finalized *mockFinalizedReader, + rootHeight uint64, + endHeight uint64, + headers map[uint64]*flow.Header, +)) { + pebble.RunWithRegistersStorageAtInitialHeights(t, 10, 10, func(diskStore *pebble.Registers) { + log := unittest.Logger() + var wal execution.ExecutedFinalizedWAL + finalized, headerByHeight, highest := newMockFinalizedReader(10, 100) + rs, err := storehouse.NewRegisterStore(diskStore, wal, finalized, log) + require.NoError(t, err) + fn(t, rs, diskStore, finalized, 10, highest, headerByHeight) + }) +} + +// GetRegister should fail for +// 1. unknown blockID +// 2. height lower than OnDiskRegisterStore's root height +// 3. height too high +// 4. known block, but unknown register +func TestRegisterStoreGetRegisterFail(t *testing.T) { + t.Parallel() + withRegisterStore(t, func( + t *testing.T, + rs *storehouse.RegisterStore, + diskStore execution.OnDiskRegisterStore, + finalized *mockFinalizedReader, + rootHeight uint64, + endHeight uint64, + headerByHeight map[uint64]*flow.Header, + ) { + // unknown block + _, err := rs.GetRegister(rootHeight+1, unknownBlock, unknownReg.Key) + require.Error(t, err) + require.ErrorIs(t, err, storehouse.ErrNotExecuted) + + // too high + block11 := headerByHeight[rootHeight+1] + _, err = rs.GetRegister(rootHeight+1, block11.ID(), unknownReg.Key) + require.Error(t, err) + require.ErrorIs(t, err, storehouse.ErrNotExecuted) + + // lower than root height + _, err = rs.GetRegister(rootHeight-1, unknownBlock, unknownReg.Key) + require.Error(t, err) + // TODO: enable it once implemented + // require.ErrorIs(t, err, storehouse.ErrPruned) + + // known block, unknown register + rootBlock := headerByHeight[rootHeight] + val, err := rs.GetRegister(rootHeight, rootBlock.ID(), unknownReg.Key) + require.NoError(t, err) + require.Nil(t, val) + }) +} + +// SaveRegisters should fail for +// 1. mismatching parent +// 2. saved block +func TestRegisterStoreSaveRegistersShouldFail(t *testing.T) { + t.Parallel() + withRegisterStore(t, func( + t *testing.T, + rs *storehouse.RegisterStore, + diskStore execution.OnDiskRegisterStore, + finalized *mockFinalizedReader, + rootHeight uint64, + endHeight uint64, + headerByHeight map[uint64]*flow.Header, + ) { + wrongParent := unittest.BlockHeaderFixture(unittest.WithHeaderHeight(rootHeight + 1)) + err := rs.SaveRegisters(wrongParent, flow.RegisterEntries{}) + require.Error(t, err) + require.Contains(t, err.Error(), "parent") + + err = rs.SaveRegisters(headerByHeight[rootHeight], flow.RegisterEntries{}) + require.Error(t, err) + require.Contains(t, err.Error(), "pruned") + }) +} + +// SaveRegisters should ok, and +// 1. GetRegister can get saved registers, +// 2. IsBlockExecuted should return true +// +// if SaveRegisters with empty register, then +// 1. LastFinalizedAndExecutedHeight should be updated +// 2. IsBlockExecuted should return true +func TestRegisterStoreSaveRegistersShouldOK(t *testing.T) { + t.Parallel() + withRegisterStore(t, func( + t *testing.T, + rs *storehouse.RegisterStore, + diskStore execution.OnDiskRegisterStore, + finalized *mockFinalizedReader, + rootHeight uint64, + endHeight uint64, + headerByHeight map[uint64]*flow.Header, + ) { + // not executed + executed, err := rs.IsBlockExecuted(rootHeight+1, headerByHeight[rootHeight+1].ID()) + require.NoError(t, err) + require.False(t, executed) + + // save block 11 + reg := makeReg("X", "1") + err = rs.SaveRegisters(headerByHeight[rootHeight+1], flow.RegisterEntries{reg}) + require.NoError(t, err) + + // should get value + val, err := rs.GetRegister(rootHeight+1, headerByHeight[rootHeight+1].ID(), reg.Key) + require.NoError(t, err) + require.Equal(t, reg.Value, val) + + // should become executed + executed, err = rs.IsBlockExecuted(rootHeight+1, headerByHeight[rootHeight+1].ID()) + require.NoError(t, err) + require.True(t, executed) + + // block 12 is empty + err = rs.SaveRegisters(headerByHeight[rootHeight+2], flow.RegisterEntries{}) + require.NoError(t, err) + + // should get same value + val, err = rs.GetRegister(rootHeight+1, headerByHeight[rootHeight+2].ID(), reg.Key) + require.NoError(t, err) + require.Equal(t, reg.Value, val) + + // should become executed + executed, err = rs.IsBlockExecuted(rootHeight+1, headerByHeight[rootHeight+2].ID()) + require.NoError(t, err) + require.True(t, executed) + }) +} + +// if 11 is latest finalized, then +// 1. IsBlockExecuted should return true for finalized block 10 +// 2. IsBlockExecuted should return false for conflicting block 10 +// 4. IsBlockExecuted should return true for executed and unfinalized block 12 +// 3. IsBlockExecuted should return false for unexecuted block 13 +func TestRegisterStoreIsBlockExecuted(t *testing.T) { + t.Parallel() + withRegisterStore(t, func( + t *testing.T, + rs *storehouse.RegisterStore, + diskStore execution.OnDiskRegisterStore, + finalized *mockFinalizedReader, + rootHeight uint64, + endHeight uint64, + headerByHeight map[uint64]*flow.Header, + ) { + // save block 11 + reg := makeReg("X", "1") + err := rs.SaveRegisters(headerByHeight[rootHeight+1], flow.RegisterEntries{reg}) + require.NoError(t, err) + + // save block 12 + err = rs.SaveRegisters(headerByHeight[rootHeight+2], flow.RegisterEntries{makeReg("X", "2")}) + require.NoError(t, err) + + require.NoError(t, finalized.MockFinal(rootHeight+1)) + + require.NoError(t, rs.OnBlockFinalized()) // notify 11 is finalized + + require.Equal(t, rootHeight+1, rs.LastFinalizedAndExecutedHeight()) + + executed, err := rs.IsBlockExecuted(rootHeight, headerByHeight[rootHeight].ID()) + require.NoError(t, err) + require.True(t, executed) + + executed, err = rs.IsBlockExecuted(rootHeight+1, headerByHeight[rootHeight+1].ID()) + require.NoError(t, err) + require.True(t, executed) + + executed, err = rs.IsBlockExecuted(rootHeight+2, headerByHeight[rootHeight+2].ID()) + require.NoError(t, err) + require.True(t, executed) + + executed, err = rs.IsBlockExecuted(rootHeight+3, headerByHeight[rootHeight+3].ID()) + require.NoError(t, err) + require.False(t, executed) + }) +} + +// Test reading registers from finalized block +func TestRegisterStoreReadingFromDisk(t *testing.T) { + t.Parallel() + withRegisterStore(t, func( + t *testing.T, + rs *storehouse.RegisterStore, + diskStore execution.OnDiskRegisterStore, + finalized *mockFinalizedReader, + rootHeight uint64, + endHeight uint64, + headerByHeight map[uint64]*flow.Header, + ) { + + // R <- 11 (X: 1, Y: 2) <- 12 (Y: 3) <- 13 (X: 4) + // save block 11 + err := rs.SaveRegisters(headerByHeight[rootHeight+1], flow.RegisterEntries{makeReg("X", "1"), makeReg("Y", "2")}) + require.NoError(t, err) + + // save block 12 + err = rs.SaveRegisters(headerByHeight[rootHeight+2], flow.RegisterEntries{makeReg("Y", "3")}) + require.NoError(t, err) + + // save block 13 + err = rs.SaveRegisters(headerByHeight[rootHeight+3], flow.RegisterEntries{makeReg("X", "4")}) + require.NoError(t, err) + + require.NoError(t, finalized.MockFinal(rootHeight+2)) + require.NoError(t, rs.OnBlockFinalized()) // notify 12 is finalized + + val, err := rs.GetRegister(rootHeight+1, headerByHeight[rootHeight+1].ID(), makeReg("Y", "2").Key) + require.NoError(t, err) + // value at block 11 is now stored in OnDiskRegisterStore, which is 2 + require.Equal(t, makeReg("Y", "2").Value, val) + + val, err = rs.GetRegister(rootHeight+2, headerByHeight[rootHeight+2].ID(), makeReg("X", "1").Key) + require.NoError(t, err) + // value at block 12 is now stored in OnDiskRegisterStore, which is 1 + require.Equal(t, makeReg("X", "1").Value, val) + + val, err = rs.GetRegister(rootHeight+3, headerByHeight[rootHeight+3].ID(), makeReg("Y", "3").Key) + require.NoError(t, err) + // value at block 13 was stored in OnDiskRegisterStore at block 12, which is 3 + require.Equal(t, makeReg("Y", "3").Value, val) + + _, err = rs.GetRegister(rootHeight+4, headerByHeight[rootHeight+4].ID(), makeReg("Y", "3").Key) + require.Error(t, err) + }) +} + +func TestRegisterStoreReadingFromInMemStore(t *testing.T) { + t.Parallel() + withRegisterStore(t, func( + t *testing.T, + rs *storehouse.RegisterStore, + diskStore execution.OnDiskRegisterStore, + finalized *mockFinalizedReader, + rootHeight uint64, + endHeight uint64, + headerByHeight map[uint64]*flow.Header, + ) { + + // R <- 11 (X: 1, Y: 2) <- 12 (Y: 3) + // ^- 11 (X: 4) + + // save block 11 + err := rs.SaveRegisters(headerByHeight[rootHeight+1], flow.RegisterEntries{makeReg("X", "1"), makeReg("Y", "2")}) + require.NoError(t, err) + + // save block 12 + err = rs.SaveRegisters(headerByHeight[rootHeight+2], flow.RegisterEntries{makeReg("Y", "3")}) + require.NoError(t, err) + + // save block 11 fork + block11Fork := unittest.BlockWithParentFixture(headerByHeight[rootHeight]).Header + err = rs.SaveRegisters(block11Fork, flow.RegisterEntries{makeReg("X", "4")}) + require.NoError(t, err) + + val, err := rs.GetRegister(rootHeight+1, headerByHeight[rootHeight+1].ID(), makeReg("X", "1").Key) + require.NoError(t, err) + require.Equal(t, makeReg("X", "1").Value, val) + + val, err = rs.GetRegister(rootHeight+1, headerByHeight[rootHeight+1].ID(), makeReg("Y", "2").Key) + require.NoError(t, err) + require.Equal(t, makeReg("Y", "2").Value, val) + + val, err = rs.GetRegister(rootHeight+2, headerByHeight[rootHeight+2].ID(), makeReg("X", "1").Key) + require.NoError(t, err) + require.Equal(t, makeReg("X", "1").Value, val) + + val, err = rs.GetRegister(rootHeight+2, headerByHeight[rootHeight+2].ID(), makeReg("Y", "3").Key) + require.NoError(t, err) + require.Equal(t, makeReg("Y", "3").Value, val) + + val, err = rs.GetRegister(rootHeight+1, block11Fork.ID(), makeReg("X", "4").Key) + require.NoError(t, err) + require.Equal(t, makeReg("X", "4").Value, val) + + // finalizing 11 should prune block 11 fork, and won't be able to read register from block 11 fork + require.NoError(t, finalized.MockFinal(rootHeight+1)) + require.NoError(t, rs.OnBlockFinalized()) // notify 11 is finalized + + val, err = rs.GetRegister(rootHeight+1, block11Fork.ID(), makeReg("X", "4").Key) + require.Error(t, err, fmt.Sprintf("%v", val)) + // pruned conflicting forks are considered not executed + require.ErrorIs(t, err, storehouse.ErrNotExecuted) + }) +} + +func TestRegisterStoreReadRegisterAtPrunedHeight(t *testing.T) { + t.Parallel() + withRegisterStore(t, func( + t *testing.T, + rs *storehouse.RegisterStore, + diskStore execution.OnDiskRegisterStore, + finalized *mockFinalizedReader, + rootHeight uint64, + endHeight uint64, + headerByHeight map[uint64]*flow.Header, + ) { + + // R <- 11 (X: 1) + + // if execute first then finalize later, should be able to read register at pruned height + // save block 11 + err := rs.SaveRegisters(headerByHeight[rootHeight+1], flow.RegisterEntries{makeReg("X", "1")}) + require.NoError(t, err) + require.Equal(t, 2, finalized.FinalizedCalled()) // called by SaveRegisters with height 11 + + // finalize block 11 + require.NoError(t, finalized.MockFinal(rootHeight+1)) + require.NoError(t, rs.OnBlockFinalized()) // notify 11 is finalized + + val, err := rs.GetRegister(rootHeight+1, headerByHeight[rootHeight+1].ID(), makeReg("X", "").Key) + require.NoError(t, err) + require.Equal(t, makeReg("X", "1").Value, val) + + // R <- 11 (X: 1) <- 12 (X: 2) + // if finalize first then execute later, should not be able to read register at pruned height + // finalize block 12 + require.NoError(t, finalized.MockFinal(rootHeight+2)) + require.NoError(t, rs.OnBlockFinalized()) // notify 12 is finalized + + // save block 12 + err = rs.SaveRegisters(headerByHeight[rootHeight+2], flow.RegisterEntries{makeReg("X", "2")}) + require.NoError(t, err) + + val, err = rs.GetRegister(rootHeight+1, headerByHeight[rootHeight+1].ID(), makeReg("X", "").Key) + require.NoError(t, err) + require.Equal(t, makeReg("X", "1").Value, val) + + val, err = rs.GetRegister(rootHeight+2, headerByHeight[rootHeight+2].ID(), makeReg("X", "").Key) + require.NoError(t, err) + require.Equal(t, makeReg("X", "2").Value, val) + }) +} + +// Test that when getting register during executing a finalized block or finalize an executed block, +// FinalizedBlockIDAtHeight should not be called +func TestRegisterStoreExecuteFinalizedBlockOrFinalizeExecutedBlockShouldNotCallFinalizedHeight(t *testing.T) { + t.Parallel() + withRegisterStore(t, func( + t *testing.T, + rs *storehouse.RegisterStore, + diskStore execution.OnDiskRegisterStore, + finalized *mockFinalizedReader, + rootHeight uint64, + endHeight uint64, + headerByHeight map[uint64]*flow.Header, + ) { + + require.Equal(t, 1, finalized.FinalizedCalled()) // called by NewRegisterStore + // R <- 11 (X: 1) + + val, err := rs.GetRegister(rootHeight, headerByHeight[rootHeight].ID(), makeReg("X", "").Key) + require.NoError(t, err) + require.Nil(t, val) + require.Equal(t, 1, finalized.FinalizedCalled()) // no FinalizedBlockIDAtHeight called + + // if execute first then finalize later, should be able to read register at pruned height + // save block 11 + err = rs.SaveRegisters(headerByHeight[rootHeight+1], flow.RegisterEntries{makeReg("X", "1")}) + require.NoError(t, err) + require.Equal(t, 2, finalized.FinalizedCalled()) // called by SaveRegisters with height 11 + + // finalize block 11 + require.NoError(t, finalized.MockFinal(rootHeight+1)) + require.NoError(t, rs.OnBlockFinalized()) // notify 11 is finalized + require.Equal(t, 4, finalized.FinalizedCalled()) // called by Checking whether height 11 and 12 are finalized + + // R <- 11 (X: 1) <- 12 (X: 2) + // if finalize first then execute later, should not be able to read register at pruned height + // finalize block 12 + require.NoError(t, finalized.MockFinal(rootHeight+2)) + require.NoError(t, rs.OnBlockFinalized()) // notify 12 is finalized + require.Equal(t, 5, finalized.FinalizedCalled()) // called by Checking whether height 12 and 13 are finalized + + val, err = rs.GetRegister(rootHeight+1, headerByHeight[rootHeight+1].ID(), makeReg("X", "").Key) + require.NoError(t, err) + require.Equal(t, makeReg("X", "1").Value, val) + require.Equal(t, 5, finalized.FinalizedCalled()) // no FinalizedBlockIDAtHeight call + + // save block 12 + err = rs.SaveRegisters(headerByHeight[rootHeight+2], flow.RegisterEntries{makeReg("X", "2")}) + require.NoError(t, err) + require.Equal(t, 7, finalized.FinalizedCalled()) // called by SaveRegisters with height 12 and 13 + + }) +} + +// Execute first then finalize later +// SaveRegisters(1), SaveRegisters(2), SaveRegisters(3), then +// OnBlockFinalized(1), OnBlockFinalized(2), OnBlockFinalized(3) should +// 1. update LastFinalizedAndExecutedHeight +// 2. InMemoryRegisterStore should have correct pruned height +// 3. NewRegisterStore with the same OnDiskRegisterStore again should return correct LastFinalizedAndExecutedHeight +func TestRegisterStoreExecuteFirstFinalizeLater(t *testing.T) { + t.Parallel() + withRegisterStore(t, func( + t *testing.T, + rs *storehouse.RegisterStore, + diskStore execution.OnDiskRegisterStore, + finalized *mockFinalizedReader, + rootHeight uint64, + endHeight uint64, + headerByHeight map[uint64]*flow.Header, + ) { + // save block 11 + err := rs.SaveRegisters(headerByHeight[rootHeight+1], flow.RegisterEntries{makeReg("X", "1")}) + require.NoError(t, err) + require.Equal(t, rootHeight, rs.LastFinalizedAndExecutedHeight()) + + // save block 12 + err = rs.SaveRegisters(headerByHeight[rootHeight+2], flow.RegisterEntries{makeReg("X", "2")}) + require.NoError(t, err) + require.Equal(t, rootHeight, rs.LastFinalizedAndExecutedHeight()) + + // save block 13 + err = rs.SaveRegisters(headerByHeight[rootHeight+3], flow.RegisterEntries{makeReg("X", "3")}) + require.NoError(t, err) + require.Equal(t, rootHeight, rs.LastFinalizedAndExecutedHeight()) + + require.NoError(t, finalized.MockFinal(rootHeight+1)) + require.NoError(t, rs.OnBlockFinalized()) // notify 11 is finalized + require.Equal(t, rootHeight+1, rs.LastFinalizedAndExecutedHeight()) + + require.NoError(t, finalized.MockFinal(rootHeight+2)) + require.NoError(t, rs.OnBlockFinalized()) // notify 12 is finalized + require.Equal(t, rootHeight+2, rs.LastFinalizedAndExecutedHeight()) + + require.NoError(t, finalized.MockFinal(rootHeight+3)) + require.NoError(t, rs.OnBlockFinalized()) // notify 13 is finalized + require.Equal(t, rootHeight+3, rs.LastFinalizedAndExecutedHeight()) + }) +} + +// Finalize first then execute later +// OnBlockFinalized(1), OnBlockFinalized(2), OnBlockFinalized(3), then +// SaveRegisters(1), SaveRegisters(2), SaveRegisters(3) should +// 1. update LastFinalizedAndExecutedHeight +// 2. InMemoryRegisterStore should have correct pruned height +// 3. NewRegisterStore with the same OnDiskRegisterStore again should return correct LastFinalizedAndExecutedHeight +func TestRegisterStoreFinalizeFirstExecuteLater(t *testing.T) { + t.Parallel() + withRegisterStore(t, func( + t *testing.T, + rs *storehouse.RegisterStore, + diskStore execution.OnDiskRegisterStore, + finalized *mockFinalizedReader, + rootHeight uint64, + endHeight uint64, + headerByHeight map[uint64]*flow.Header, + ) { + require.NoError(t, finalized.MockFinal(rootHeight+1)) + require.NoError(t, rs.OnBlockFinalized()) // notify 11 is finalized + require.Equal(t, rootHeight, rs.LastFinalizedAndExecutedHeight(), fmt.Sprintf("LastFinalizedAndExecutedHeight: %d", rs.LastFinalizedAndExecutedHeight())) + + require.NoError(t, finalized.MockFinal(rootHeight+2)) + require.NoError(t, rs.OnBlockFinalized()) // notify 12 is finalized + require.Equal(t, rootHeight, rs.LastFinalizedAndExecutedHeight(), fmt.Sprintf("LastFinalizedAndExecutedHeight: %d", rs.LastFinalizedAndExecutedHeight())) + + require.NoError(t, finalized.MockFinal(rootHeight+3)) + require.NoError(t, rs.OnBlockFinalized()) // notify 13 is finalized + require.Equal(t, rootHeight, rs.LastFinalizedAndExecutedHeight()) + + // save block 11 + err := rs.SaveRegisters(headerByHeight[rootHeight+1], flow.RegisterEntries{makeReg("X", "1")}) + require.NoError(t, err) + require.Equal(t, rootHeight+1, rs.LastFinalizedAndExecutedHeight()) + + // save block 12 + err = rs.SaveRegisters(headerByHeight[rootHeight+2], flow.RegisterEntries{makeReg("X", "2")}) + require.NoError(t, err) + require.Equal(t, rootHeight+2, rs.LastFinalizedAndExecutedHeight()) + + // save block 13 + err = rs.SaveRegisters(headerByHeight[rootHeight+3], flow.RegisterEntries{makeReg("X", "3")}) + require.NoError(t, err) + require.Equal(t, rootHeight+3, rs.LastFinalizedAndExecutedHeight()) + }) +} + +// Finalize and Execute concurrently +// SaveRegisters(1), SaveRegisters(2), ... SaveRegisters(100), happen concurrently with +// OnBlockFinalized(1), OnBlockFinalized(2), ... OnBlockFinalized(100), should update LastFinalizedAndExecutedHeight +func TestRegisterStoreConcurrentFinalizeAndExecute(t *testing.T) { + t.Parallel() + withRegisterStore(t, func( + t *testing.T, + rs *storehouse.RegisterStore, + diskStore execution.OnDiskRegisterStore, + finalized *mockFinalizedReader, + rootHeight uint64, + endHeight uint64, + headerByHeight map[uint64]*flow.Header, + ) { + + var wg sync.WaitGroup + savedHeights := make(chan uint64, len(headerByHeight)) // enough buffer so that producer won't be blocked + + wg.Add(1) + go func() { + defer wg.Done() + + for savedHeight := range savedHeights { + err := finalized.MockFinal(savedHeight) + require.NoError(t, err) + require.NoError(t, rs.OnBlockFinalized(), fmt.Sprintf("saved height %v", savedHeight)) + } + }() + + for height := rootHeight + 1; height <= endHeight; height++ { + if height >= 50 { + savedHeights <- height + } + + err := rs.SaveRegisters(headerByHeight[height], flow.RegisterEntries{makeReg("X", fmt.Sprintf("%d", height))}) + require.NoError(t, err) + } + close(savedHeights) + + wg.Wait() // wait until all heights are finalized + + // after all heights are executed and finalized, the LastFinalizedAndExecutedHeight should be the last height + require.Equal(t, endHeight, rs.LastFinalizedAndExecutedHeight()) + }) +} diff --git a/engine/execution/storehouse/storehouse_test.go b/engine/execution/storehouse/storehouse_test.go new file mode 100644 index 00000000000..6cf45e961f3 --- /dev/null +++ b/engine/execution/storehouse/storehouse_test.go @@ -0,0 +1,78 @@ +package storehouse_test + +import ( + "fmt" + + "go.uber.org/atomic" + + "github.com/onflow/flow-go/model/flow" + "github.com/onflow/flow-go/storage" + "github.com/onflow/flow-go/utils/unittest" +) + +var unknownBlock = unittest.IdentifierFixture() +var unknownReg = makeReg("unknown", "unknown") + +func makeReg(key string, value string) flow.RegisterEntry { + return flow.RegisterEntry{ + Key: flow.RegisterID{ + Owner: "owner", + Key: key, + }, + Value: []byte(value), + } +} + +type mockFinalizedReader struct { + headerByHeight map[uint64]*flow.Header + lowest uint64 + highest uint64 + finalizedHeight *atomic.Uint64 + finalizedCalled *atomic.Int64 +} + +func newMockFinalizedReader(initHeight uint64, count int) (*mockFinalizedReader, map[uint64]*flow.Header, uint64) { + root := unittest.BlockHeaderFixture(unittest.WithHeaderHeight(initHeight)) + blocks := unittest.ChainFixtureFrom(count, root) + headerByHeight := make(map[uint64]*flow.Header, len(blocks)+1) + headerByHeight[root.Height] = root + + for _, b := range blocks { + headerByHeight[b.Header.Height] = b.Header + } + + highest := blocks[len(blocks)-1].Header.Height + return &mockFinalizedReader{ + headerByHeight: headerByHeight, + lowest: initHeight, + highest: highest, + finalizedHeight: atomic.NewUint64(initHeight), + finalizedCalled: atomic.NewInt64(0), + }, headerByHeight, highest +} + +func (r *mockFinalizedReader) FinalizedBlockIDAtHeight(height uint64) (flow.Identifier, error) { + r.finalizedCalled.Add(1) + finalized := r.finalizedHeight.Load() + if height > finalized { + return flow.Identifier{}, storage.ErrNotFound + } + + if height < r.lowest { + return unknownBlock, nil + } + return r.headerByHeight[height].ID(), nil +} + +func (r *mockFinalizedReader) MockFinal(height uint64) error { + if height < r.lowest || height > r.highest { + return fmt.Errorf("height %d is out of range [%d, %d]", height, r.lowest, r.highest) + } + + r.finalizedHeight.Store(height) + return nil +} + +func (r *mockFinalizedReader) FinalizedCalled() int { + return int(r.finalizedCalled.Load()) +} diff --git a/engine/execution/testutil/fixtures_token.go b/engine/execution/testutil/fixtures_token.go index 2bbef170428..c69b096a7b9 100644 --- a/engine/execution/testutil/fixtures_token.go +++ b/engine/execution/testutil/fixtures_token.go @@ -6,11 +6,12 @@ import ( "github.com/onflow/cadence" jsoncdc "github.com/onflow/cadence/encoding/json" - "github.com/onflow/flow-go/fvm" + "github.com/onflow/flow-go/fvm/systemcontracts" "github.com/onflow/flow-go/model/flow" ) func CreateTokenTransferTransaction(chain flow.Chain, amount int, to flow.Address, signer flow.Address) *flow.TransactionBody { + sc := systemcontracts.SystemContractsForChain(chain.ChainID()) return flow.NewTransactionBody(). SetScript([]byte(fmt.Sprintf(` import FungibleToken from 0x%s @@ -32,7 +33,7 @@ func CreateTokenTransferTransaction(chain flow.Chain, amount int, to flow.Addres ?? panic("Could not borrow receiver reference to the recipient's Vault") receiverRef.deposit(from: <-self.sentVault) } - }`, fvm.FungibleTokenAddress(chain), fvm.FlowTokenAddress(chain)))). + }`, sc.FungibleToken.Address.Hex(), sc.FlowToken.Address.Hex()))). AddArgument(jsoncdc.MustEncode(cadence.UFix64(amount))). AddArgument(jsoncdc.MustEncode(cadence.NewAddress(to))). AddAuthorizer(signer) diff --git a/engine/testutil/mock/nodes.go b/engine/testutil/mock/nodes.go index 2de074cf947..08ddb160fd7 100644 --- a/engine/testutil/mock/nodes.go +++ b/engine/testutil/mock/nodes.go @@ -207,6 +207,7 @@ type ExecutionNode struct { Collections storage.Collections Finalizer *consensus.Finalizer MyExecutionReceipts storage.MyExecutionReceipts + StorehouseEnabled bool } func (en ExecutionNode) Ready(ctx context.Context) { @@ -251,7 +252,6 @@ func (en ExecutionNode) Done(cancelFunc context.CancelFunc) { } func (en ExecutionNode) AssertHighestExecutedBlock(t *testing.T, header *flow.Header) { - height, blockID, err := en.ExecutionState.GetHighestExecutedBlockID(context.Background()) require.NoError(t, err) @@ -259,6 +259,18 @@ func (en ExecutionNode) AssertHighestExecutedBlock(t *testing.T, header *flow.He require.Equal(t, header.Height, height) } +func (en ExecutionNode) AssertBlockIsExecuted(t *testing.T, header *flow.Header) { + executed, err := en.ExecutionState.IsBlockExecuted(header.Height, header.ID()) + require.NoError(t, err) + require.True(t, executed) +} + +func (en ExecutionNode) AssertBlockNotExecuted(t *testing.T, header *flow.Header) { + executed, err := en.ExecutionState.IsBlockExecuted(header.Height, header.ID()) + require.NoError(t, err) + require.False(t, executed) +} + // VerificationNode implements an in-process verification node for tests. type VerificationNode struct { *GenericNode diff --git a/engine/testutil/nodes.go b/engine/testutil/nodes.go index 1e6c3a8e884..c260ca4f1be 100644 --- a/engine/testutil/nodes.go +++ b/engine/testutil/nodes.go @@ -50,6 +50,8 @@ import ( executionprovider "github.com/onflow/flow-go/engine/execution/provider" executionState "github.com/onflow/flow-go/engine/execution/state" bootstrapexec "github.com/onflow/flow-go/engine/execution/state/bootstrap" + esbootstrap "github.com/onflow/flow-go/engine/execution/state/bootstrap" + "github.com/onflow/flow-go/engine/execution/storehouse" testmock "github.com/onflow/flow-go/engine/testutil/mock" verificationassigner "github.com/onflow/flow-go/engine/verification/assigner" "github.com/onflow/flow-go/engine/verification/assigner/blockconsumer" @@ -62,6 +64,7 @@ import ( "github.com/onflow/flow-go/fvm/storage/derived" "github.com/onflow/flow-go/ledger/common/pathfinder" completeLedger "github.com/onflow/flow-go/ledger/complete" + "github.com/onflow/flow-go/ledger/complete/mtrie/trie" "github.com/onflow/flow-go/ledger/complete/wal" "github.com/onflow/flow-go/model/bootstrap" "github.com/onflow/flow-go/model/flow" @@ -73,6 +76,7 @@ import ( "github.com/onflow/flow-go/module/executiondatasync/execution_data" exedataprovider "github.com/onflow/flow-go/module/executiondatasync/provider" mocktracker "github.com/onflow/flow-go/module/executiondatasync/tracker/mock" + "github.com/onflow/flow-go/module/finalizedreader" confinalizer "github.com/onflow/flow-go/module/finalizer/consensus" "github.com/onflow/flow-go/module/id" "github.com/onflow/flow-go/module/irrecoverable" @@ -99,6 +103,7 @@ import ( "github.com/onflow/flow-go/state/protocol/events/gadgets" "github.com/onflow/flow-go/state/protocol/util" storage "github.com/onflow/flow-go/storage/badger" + storagepebble "github.com/onflow/flow-go/storage/pebble" "github.com/onflow/flow-go/utils/unittest" ) @@ -524,6 +529,7 @@ func ExecutionNode(t *testing.T, hub *stub.Hub, identity bootstrap.NodeInfo, ide receipts := storage.NewExecutionReceipts(node.Metrics, node.PublicDB, results, storage.DefaultCacheSize) myReceipts := storage.NewMyExecutionReceipts(node.Metrics, node.PublicDB, receipts) versionBeacons := storage.NewVersionBeacons(node.PublicDB) + headersStorage := storage.NewHeaders(node.Metrics, node.PublicDB) checkAuthorizedAtBlock := func(blockID flow.Identifier) (bool, error) { return protocol.IsNodeAuthorizedAt(node.State.AtBlockID(blockID), node.Me.NodeID()) @@ -574,6 +580,15 @@ func ExecutionNode(t *testing.T, hub *stub.Hub, identity bootstrap.NodeInfo, ide fvm.WithInitialTokenSupply(unittest.GenesisTokenSupply)) require.NoError(t, err) + matchTrie, err := ls.FindTrieByStateCommit(commit) + require.NoError(t, err) + require.NotNil(t, matchTrie) + + const bootstrapCheckpointFile = "bootstrap-checkpoint" + checkpointFile := filepath.Join(dbDir, bootstrapCheckpointFile) + err = wal.StoreCheckpointV6([]*trie.MTrie{matchTrie}, dbDir, bootstrapCheckpointFile, zerolog.Nop(), 1) + require.NoError(t, err) + rootResult, rootSeal, err := protoState.Sealed().SealedResult() require.NoError(t, err) @@ -583,8 +598,30 @@ func ExecutionNode(t *testing.T, hub *stub.Hub, identity bootstrap.NodeInfo, ide err = bootstrapper.BootstrapExecutionDatabase(node.PublicDB, rootSeal) require.NoError(t, err) + registerDir := unittest.TempPebblePath(t) + pebbledb, err := storagepebble.OpenRegisterPebbleDB(registerDir) + require.NoError(t, err) + + checkpointHeight := uint64(0) + require.NoError(t, esbootstrap.ImportRegistersFromCheckpoint(node.Log, checkpointFile, checkpointHeight, pebbledb, 2)) + + diskStore, err := storagepebble.NewRegisters(pebbledb) + require.NoError(t, err) + + reader := finalizedreader.NewFinalizedReader(headersStorage, checkpointHeight) + registerStore, err := storehouse.NewRegisterStore( + diskStore, + nil, // TOOD(leo): replace with real WAL + reader, + node.Log) + require.NoError(t, err) + + storehouseEnabled := true execState := executionState.NewExecutionState( ls, commitsStorage, node.Blocks, node.Headers, collectionsStorage, chunkDataPackStorage, results, myReceipts, eventsStorage, serviceEventsStorage, txResultStorage, node.PublicDB, node.Tracer, + // TODO: test with register store + registerStore, + storehouseEnabled, ) requestEngine, err := requester.New( @@ -678,7 +715,7 @@ func ExecutionNode(t *testing.T, hub *stub.Hub, identity bootstrap.NodeInfo, ide ) fetcher := exeFetcher.NewCollectionFetcher(node.Log, requestEngine, node.State, false) - loader := loader.NewLoader(node.Log, node.State, node.Headers, execState) + loader := loader.NewUnexecutedLoader(node.Log, node.State, node.Headers, execState) rootHead, rootQC := getRoot(t, &node) ingestionEngine, err := ingestion.New( unit, @@ -782,6 +819,7 @@ func ExecutionNode(t *testing.T, hub *stub.Hub, identity bootstrap.NodeInfo, ide Finalizer: finalizer, MyExecutionReceipts: myReceipts, Compactor: compactor, + StorehouseEnabled: storehouseEnabled, } } diff --git a/follower/follower_builder.go b/follower/follower_builder.go index 0b9b56f0c07..07ec955ace4 100644 --- a/follower/follower_builder.go +++ b/follower/follower_builder.go @@ -586,8 +586,7 @@ func (builder *FollowerServiceBuilder) initPublicLibp2pNode(networkKey crypto.Pr } meshTracer := tracer.NewGossipSubMeshTracer(meshTracerCfg) - node, err := p2pbuilder.NewNodeBuilder( - builder.Logger, + node, err := p2pbuilder.NewNodeBuilder(builder.Logger, &p2pconfig.MetricsConfig{ HeroCacheFactory: builder.HeroCacheMetricsFactory(), Metrics: builder.Metrics.Network, @@ -597,8 +596,9 @@ func (builder *FollowerServiceBuilder) initPublicLibp2pNode(networkKey crypto.Pr networkKey, builder.SporkID, builder.IdentityProvider, + builder.FlowConfig.NetworkConfig.GossipSubConfig.GossipSubScoringRegistryConfig, &builder.FlowConfig.NetworkConfig.ResourceManager, - &builder.FlowConfig.NetworkConfig.GossipSubConfig.GossipSubRPCInspectorsConfig, + &builder.FlowConfig.NetworkConfig.GossipSubConfig, p2pconfig.PeerManagerDisableConfig(), // disable peer manager for follower &p2p.DisallowListCacheConfig{ MaxSize: builder.FlowConfig.NetworkConfig.DisallowListNotificationCacheSize, diff --git a/fvm/blueprints/contracts.go b/fvm/blueprints/contracts.go index bbe3ce422ab..34554be5a7a 100644 --- a/fvm/blueprints/contracts.go +++ b/fvm/blueprints/contracts.go @@ -3,8 +3,6 @@ package blueprints import ( _ "embed" - "encoding/hex" - "github.com/onflow/cadence" jsoncdc "github.com/onflow/cadence/encoding/json" "github.com/onflow/cadence/runtime/common" @@ -32,7 +30,7 @@ var setContractOperationAuthorizersTransactionTemplate string var setIsContractDeploymentRestrictedTransactionTemplate string //go:embed scripts/deployContractTransactionTemplate.cdc -var deployContractTransactionTemplate string +var DeployContractTransactionTemplate []byte // SetContractDeploymentAuthorizersTransaction returns a transaction for updating list of authorized accounts allowed to deploy/update contracts func SetContractDeploymentAuthorizersTransaction(serviceAccount flow.Address, authorized []flow.Address) (*flow.TransactionBody, error) { @@ -95,8 +93,8 @@ func SetIsContractDeploymentRestrictedTransaction(serviceAccount flow.Address, r // TODO (ramtin) get rid of authorizers func DeployContractTransaction(address flow.Address, contract []byte, contractName string) *flow.TransactionBody { return flow.NewTransactionBody(). - SetScript([]byte(deployContractTransactionTemplate)). + SetScript(DeployContractTransactionTemplate). AddArgument(jsoncdc.MustEncode(cadence.String(contractName))). - AddArgument(jsoncdc.MustEncode(cadence.String(hex.EncodeToString(contract)))). + AddArgument(jsoncdc.MustEncode(cadence.String(contract))). AddAuthorizer(address) } diff --git a/fvm/blueprints/scripts/deployContractTransactionTemplate.cdc b/fvm/blueprints/scripts/deployContractTransactionTemplate.cdc index 02573e4342b..4e24d39b7d4 100644 --- a/fvm/blueprints/scripts/deployContractTransactionTemplate.cdc +++ b/fvm/blueprints/scripts/deployContractTransactionTemplate.cdc @@ -1,5 +1,5 @@ transaction(name: String, code: String) { prepare(signer: AuthAccount) { - signer.contracts.add(name: name, code: code.decodeHex()) + signer.contracts.add(name: name, code: code.utf8) } } diff --git a/fvm/blueprints/system.go b/fvm/blueprints/system.go index d67f953f490..3683883c1e1 100644 --- a/fvm/blueprints/system.go +++ b/fvm/blueprints/system.go @@ -2,7 +2,6 @@ package blueprints import ( _ "embed" - "fmt" "github.com/onflow/flow-core-contracts/lib/go/templates" @@ -12,8 +11,6 @@ import ( const SystemChunkTransactionGasLimit = 100_000_000 -// TODO (Ramtin) after changes to this method are merged into master move them here. - // systemChunkTransactionTemplate looks for the epoch and version beacon heartbeat resources // and calls them. // @@ -23,20 +20,13 @@ var systemChunkTransactionTemplate string // SystemChunkTransaction creates and returns the transaction corresponding to the // system chunk for the given chain. func SystemChunkTransaction(chain flow.Chain) (*flow.TransactionBody, error) { - contracts, err := systemcontracts.SystemContractsForChain(chain.ChainID()) - if err != nil { - return nil, fmt.Errorf("could not get system contracts for chain: %w", err) - } + contracts := systemcontracts.SystemContractsForChain(chain.ChainID()) tx := flow.NewTransactionBody(). SetScript( []byte(templates.ReplaceAddresses( systemChunkTransactionTemplate, - templates.Environment{ - EpochAddress: contracts.Epoch.Address.Hex(), - NodeVersionBeaconAddress: contracts.NodeVersionBeacon.Address.Hex(), - RandomBeaconHistoryAddress: contracts.RandomBeaconHistory.Address.Hex(), - }, + contracts.AsTemplateEnv(), )), ). // The heartbeat resources needed by the system tx have are on the service account, diff --git a/fvm/bootstrap.go b/fvm/bootstrap.go index a888952ccb4..c00ee80d289 100644 --- a/fvm/bootstrap.go +++ b/fvm/bootstrap.go @@ -10,6 +10,7 @@ import ( "github.com/onflow/flow-go/fvm/blueprints" "github.com/onflow/flow-go/fvm/environment" "github.com/onflow/flow-go/fvm/errors" + "github.com/onflow/flow-go/fvm/evm/stdlib" "github.com/onflow/flow-go/fvm/meter" "github.com/onflow/flow-go/fvm/storage" "github.com/onflow/flow-go/fvm/storage/logical" @@ -75,6 +76,7 @@ type BootstrapParams struct { minimumStorageReservation cadence.UFix64 storagePerFlow cadence.UFix64 restrictedAccountCreationEnabled cadence.Bool + setupEVMEnabled cadence.Bool // versionFreezePeriod is the number of blocks in the future where the version // changes are frozen. The Node version beacon manages the freeze period, @@ -210,6 +212,13 @@ func WithRestrictedAccountCreationEnabled(enabled cadence.Bool) BootstrapProcedu } } +func WithSetupEVMEnabled(enabled cadence.Bool) BootstrapProcedureOption { + return func(bp *BootstrapProcedure) *BootstrapProcedure { + bp.setupEVMEnabled = enabled + return bp + } +} + func WithRestrictedContractDeployment(restricted *bool) BootstrapProcedureOption { return func(bp *BootstrapProcedure) *BootstrapProcedure { bp.restrictedContractDeployment = restricted @@ -380,6 +389,9 @@ func (b *bootstrapExecutor) Execute() error { // set the list of nodes which are allowed to stake in this network b.setStakingAllowlist(service, b.identities.NodeIDs()) + // sets up the EVM environment + b.setupEVM(service, flowToken) + return nil } @@ -776,6 +788,23 @@ func (b *bootstrapExecutor) setStakingAllowlist( panicOnMetaInvokeErrf("failed to set staking allow-list: %s", txError, err) } +func (b *bootstrapExecutor) setupEVM(serviceAddress, flowTokenAddress flow.Address) { + if b.setupEVMEnabled { + b.createAccount(nil) // account for storage + tx := blueprints.DeployContractTransaction( + serviceAddress, + stdlib.ContractCode(flowTokenAddress), + stdlib.ContractName, + ) + // WithEVMEnabled should only be used after we create an account for storage + txError, err := b.invokeMetaTransaction( + NewContextFromParent(b.ctx, WithEVMEnabled(true)), + Transaction(tx, 0), + ) + panicOnMetaInvokeErrf("failed to deploy EVM contract: %s", txError, err) + } +} + func (b *bootstrapExecutor) registerNodes(service, fungibleToken, flowToken flow.Address) { for _, id := range b.identities { @@ -944,16 +973,6 @@ func panicOnMetaInvokeErrf(msg string, txError errors.CodedError, err error) { } } -func FungibleTokenAddress(chain flow.Chain) flow.Address { - address, _ := chain.AddressAtIndex(environment.FungibleTokenAccountIndex) - return address -} - -func FlowTokenAddress(chain flow.Chain) flow.Address { - address, _ := chain.AddressAtIndex(environment.FlowTokenAccountIndex) - return address -} - // invokeMetaTransaction invokes a meta transaction inside the context of an // outer transaction. // diff --git a/fvm/context.go b/fvm/context.go index 44aecdd14ce..61a3f0c7268 100644 --- a/fvm/context.go +++ b/fvm/context.go @@ -28,6 +28,7 @@ type Context struct { // DisableMemoryAndInteractionLimits will override memory and interaction // limits and set them to MaxUint64, effectively disabling these limits. DisableMemoryAndInteractionLimits bool + EVMEnabled bool ComputationLimit uint64 MemoryLimit uint64 MaxStateKeySize uint64 @@ -366,3 +367,11 @@ func WithEventEncoder(encoder environment.EventEncoder) Option { return ctx } } + +// WithEVMEnabled enables access to the evm environment +func WithEVMEnabled(enabled bool) Option { + return func(ctx Context) Context { + ctx.EVMEnabled = enabled + return ctx + } +} diff --git a/fvm/crypto/crypto_test.go b/fvm/crypto/crypto_test.go index fe6c400c1b4..ffbdec3a730 100644 --- a/fvm/crypto/crypto_test.go +++ b/fvm/crypto/crypto_test.go @@ -425,16 +425,13 @@ func TestVerifySignatureFromTransaction(t *testing.T) { func TestValidatePublicKey(t *testing.T) { - // make sure the seed length is larger than miniumum seed lengths of all signature schemes - seedLength := 64 - validPublicKey := func(t *testing.T, s runtime.SignatureAlgorithm) []byte { - seed := make([]byte, seedLength) + seed := make([]byte, gocrypto.KeyGenSeedMinLen) _, err := rand.Read(seed) require.NoError(t, err) - pk, err := gocrypto.GeneratePrivateKey(crypto.RuntimeToCryptoSigningAlgorithm(s), seed) + sk, err := gocrypto.GeneratePrivateKey(crypto.RuntimeToCryptoSigningAlgorithm(s), seed) require.NoError(t, err) - return pk.PublicKey().Encode() + return sk.PublicKey().Encode() } t.Run("Unknown algorithm should return false", func(t *testing.T) { @@ -463,12 +460,14 @@ func TestValidatePublicKey(t *testing.T) { runtime.SignatureAlgorithmBLS_BLS12_381, } for i, s := range signatureAlgos { + t.Run(fmt.Sprintf("case %v: %v", i, s), func(t *testing.T) { key := validPublicKey(t, s) + // This may cause flakiness depending on the public key + // deserialization scheme used!! key[0] ^= 1 // alter one bit of the valid key - err := crypto.ValidatePublicKey(s, key) - require.Error(t, err) + require.Errorf(t, err, "key is %#x", key) }) } }) diff --git a/fvm/environment/account_creator.go b/fvm/environment/account_creator.go index 07612384d2c..d31985a8e75 100644 --- a/fvm/environment/account_creator.go +++ b/fvm/environment/account_creator.go @@ -12,12 +12,6 @@ import ( "github.com/onflow/flow-go/module/trace" ) -const ( - FungibleTokenAccountIndex = 2 - FlowTokenAccountIndex = 3 - FlowFeesAccountIndex = 4 -) - type AddressGenerator interface { Bytes() []byte NextAddress() (flow.Address, error) diff --git a/fvm/environment/env.go b/fvm/environment/env.go index ac8ac32f3b7..59dc4f83416 100644 --- a/fvm/environment/env.go +++ b/fvm/environment/env.go @@ -37,7 +37,6 @@ type Environment interface { // EventEmitter Events() flow.EventsList - EmitFlowEvent(etype flow.EventType, payload []byte) error ServiceEvents() flow.EventsList ConvertedServiceEvents() flow.ServiceEventList diff --git a/fvm/environment/event_emitter.go b/fvm/environment/event_emitter.go index 6a05fefe1f3..acc491ae986 100644 --- a/fvm/environment/event_emitter.go +++ b/fvm/environment/event_emitter.go @@ -38,17 +38,12 @@ func DefaultEventEmitterParams() EventEmitterParams { // Note that scripts do not emit events, but must expose the API in compliance // with the runtime environment interface. type EventEmitter interface { - // Cadence's runtime API. Note that the script variant will return - // OperationNotSupportedError. + // EmitEvent satisfies Cadence's runtime API. + // This will encode the cadence event + // + // Note that the script variant will return OperationNotSupportedError. EmitEvent(event cadence.Event) error - // EmitFlowEvent is used to emit events that are not generated by - // Cadence runtime. - // Warning: current implementation of EmitFlowEvent does not support handling service events - // that functionality should be added if needed in the future - // TODO: we could merge this one with the EmitEvent endpoint - EmitFlowEvent(etype flow.EventType, payload []byte) error - Events() flow.EventsList ServiceEvents() flow.EventsList ConvertedServiceEvents() flow.ServiceEventList @@ -79,16 +74,6 @@ func (emitter ParseRestrictedEventEmitter) EmitEvent(event cadence.Event) error event) } -func (emitter ParseRestrictedEventEmitter) EmitFlowEvent(etype flow.EventType, payload []byte) error { - return parseRestrict2Arg( - emitter.txnState, - trace.FVMEnvEmitEvent, - emitter.impl.EmitFlowEvent, - etype, - payload, - ) -} - func (emitter ParseRestrictedEventEmitter) Events() flow.EventsList { return emitter.impl.Events() } @@ -111,11 +96,7 @@ var _ EventEmitter = NoEventEmitter{} // where emitting an event does nothing. type NoEventEmitter struct{} -func (NoEventEmitter) EmitEvent(event cadence.Event) error { - return nil -} - -func (NoEventEmitter) EmitFlowEvent(etype flow.EventType, payload []byte) error { +func (NoEventEmitter) EmitEvent(cadence.Event) error { return nil } @@ -180,23 +161,27 @@ func (emitter *eventEmitter) EventCollection() *EventCollection { } func (emitter *eventEmitter) EmitEvent(event cadence.Event) error { - defer emitter.tracer.StartExtensiveTracingChildSpan( - trace.FVMEnvEmitEvent).End() - - err := emitter.meter.MeterComputation(ComputationKindEmitEvent, 1) + err := emitter.meter.MeterComputation(ComputationKindEncodeEvent, 1) if err != nil { - return fmt.Errorf("emit event failed: %w", err) + return fmt.Errorf("emit event, event encoding failed: %w", err) } payload, err := emitter.EventEncoder.Encode(event) if err != nil { return errors.NewEventEncodingError(err) } + emitter.tracer.StartExtensiveTracingChildSpan(trace.FVMEnvEncodeEvent).End() + defer emitter.tracer.StartExtensiveTracingChildSpan(trace.FVMEnvEmitEvent).End() - payloadSize := uint64(len(payload)) + payloadSize := len(payload) + err = emitter.meter.MeterComputation(ComputationKindEmitEvent, uint(payloadSize)) + if err != nil { + return fmt.Errorf("emit event failed: %w", err) + } + eventType := flow.EventType(event.EventType.ID()) flowEvent := flow.Event{ - Type: flow.EventType(event.EventType.ID()), + Type: eventType, TransactionID: emitter.txID, TransactionIndex: emitter.txIndex, EventIndex: emitter.eventCollection.TotalEventCounter(), @@ -207,7 +192,7 @@ func (emitter *eventEmitter) EmitEvent(event cadence.Event) error { isServiceAccount := emitter.payer == emitter.chain.ServiceAddress() if emitter.ServiceEventCollectionEnabled { - ok, err := IsServiceEvent(event, emitter.chain.ChainID()) + ok, err := IsServiceEvent(eventType, emitter.chain.ChainID()) if err != nil { return fmt.Errorf("unable to check service event: %w", err) } @@ -215,7 +200,7 @@ func (emitter *eventEmitter) EmitEvent(event cadence.Event) error { eventEmitError := emitter.eventCollection.AppendServiceEvent( emitter.chain, flowEvent, - payloadSize) + uint64(payloadSize)) // skip limit if payer is service account // TODO skip only limit-related errors @@ -227,7 +212,7 @@ func (emitter *eventEmitter) EmitEvent(event cadence.Event) error { // as well. } - eventEmitError := emitter.eventCollection.AppendEvent(flowEvent, payloadSize) + eventEmitError := emitter.eventCollection.AppendEvent(flowEvent, uint64(payloadSize)) // skip limit if payer is service account if !isServiceAccount { return eventEmitError @@ -236,28 +221,6 @@ func (emitter *eventEmitter) EmitEvent(event cadence.Event) error { return nil } -func (emitter *eventEmitter) EmitFlowEvent(etype flow.EventType, payload []byte) error { - defer emitter.tracer.StartExtensiveTracingChildSpan( - trace.FVMEnvEmitEvent).End() - - err := emitter.meter.MeterComputation(ComputationKindEmitEvent, 1) - if err != nil { - return fmt.Errorf("emit flow event failed: %w", err) - } - - eventSize := uint64(len(etype) + len(payload)) - - flowEvent := flow.Event{ - Type: etype, - TransactionID: emitter.txID, - TransactionIndex: emitter.txIndex, - EventIndex: emitter.eventCollection.TotalEventCounter(), - Payload: payload, - } - - return emitter.eventCollection.AppendEvent(flowEvent, eventSize) -} - func (emitter *eventEmitter) Events() flow.EventsList { return emitter.eventCollection.events } @@ -334,18 +297,11 @@ func (collection *EventCollection) TotalEventCounter() uint32 { // IsServiceEvent determines whether or not an emitted Cadence event is // considered a service event for the given chain. -func IsServiceEvent(event cadence.Event, chain flow.ChainID) (bool, error) { +func IsServiceEvent(eventType flow.EventType, chain flow.ChainID) (bool, error) { // retrieve the service event information for this chain - events, err := systemcontracts.ServiceEventsForChain(chain) - if err != nil { - return false, fmt.Errorf( - "unknown system contracts for chain (%s): %w", - chain.String(), - err) - } + events := systemcontracts.ServiceEventsForChain(chain) - eventType := flow.EventType(event.EventType.ID()) for _, serviceEvent := range events.All() { if serviceEvent.EventType() == eventType { return true, nil diff --git a/fvm/environment/event_emitter_test.go b/fvm/environment/event_emitter_test.go index 5057954680b..5f188ecd6f6 100644 --- a/fvm/environment/event_emitter_test.go +++ b/fvm/environment/event_emitter_test.go @@ -22,12 +22,11 @@ import ( func Test_IsServiceEvent(t *testing.T) { chain := flow.Emulator - events, err := systemcontracts.ServiceEventsForChain(chain) - require.NoError(t, err) + events := systemcontracts.ServiceEventsForChain(chain) t.Run("correct", func(t *testing.T) { for _, event := range events.All() { - isServiceEvent, err := environment.IsServiceEvent(cadence.Event{ + event := cadence.Event{ EventType: &cadence.EventType{ Location: common.AddressLocation{ Address: common.MustBytesToAddress( @@ -35,14 +34,16 @@ func Test_IsServiceEvent(t *testing.T) { }, QualifiedIdentifier: event.QualifiedIdentifier(), }, - }, chain) + } + + isServiceEvent, err := environment.IsServiceEvent(flow.EventType(event.Type().ID()), chain) require.NoError(t, err) assert.True(t, isServiceEvent) } }) t.Run("wrong chain", func(t *testing.T) { - isServiceEvent, err := environment.IsServiceEvent(cadence.Event{ + event := cadence.Event{ EventType: &cadence.EventType{ Location: common.AddressLocation{ Address: common.MustBytesToAddress( @@ -50,13 +51,15 @@ func Test_IsServiceEvent(t *testing.T) { }, QualifiedIdentifier: events.EpochCommit.QualifiedIdentifier(), }, - }, chain) + } + + isServiceEvent, err := environment.IsServiceEvent(flow.EventType(event.Type().ID()), chain) require.NoError(t, err) assert.False(t, isServiceEvent) }) t.Run("wrong type", func(t *testing.T) { - isServiceEvent, err := environment.IsServiceEvent(cadence.Event{ + event := cadence.Event{ EventType: &cadence.EventType{ Location: common.AddressLocation{ Address: common.MustBytesToAddress( @@ -64,7 +67,9 @@ func Test_IsServiceEvent(t *testing.T) { }, QualifiedIdentifier: "SomeContract.SomeEvent", }, - }, chain) + } + + isServiceEvent, err := environment.IsServiceEvent(flow.EventType(event.Type().ID()), chain) require.NoError(t, err) assert.False(t, isServiceEvent) }) @@ -149,22 +154,6 @@ func Test_EmitEvent_Limit(t *testing.T) { err := eventEmitter.EmitEvent(cadenceEvent1) require.Error(t, err) }) - - t.Run("emit flow event - exceeding limit", func(t *testing.T) { - flowEvent := flow.Event{ - Type: "sometype", - Payload: []byte{1, 2, 3, 4, 5}, - } - - eventSize := uint64(len(flowEvent.Type) + len(flowEvent.Payload)) - eventEmitter := createTestEventEmitterWithLimit( - flow.Emulator, - flow.Emulator.Chain().NewAddressGenerator().CurrentAddress(), - eventSize-1) - - err := eventEmitter.EmitFlowEvent(flowEvent.Type, flowEvent.Payload) - require.Error(t, err) - }) } func createTestEventEmitterWithLimit(chain flow.ChainID, address flow.Address, eventEmitLimit uint64) environment.EventEmitter { diff --git a/fvm/environment/meter.go b/fvm/environment/meter.go index 757ec0ea8be..75250d1c4c7 100644 --- a/fvm/environment/meter.go +++ b/fvm/environment/meter.go @@ -51,6 +51,7 @@ const ( ComputationKindEVMGasUsage = 2037 ComputationKindRLPEncoding = 2038 ComputationKindRLPDecoding = 2039 + ComputationKindEncodeEvent = 2040 ) type Meter interface { diff --git a/fvm/environment/mock/environment.go b/fvm/environment/mock/environment.go index 11b9cda285c..63b52c751a4 100644 --- a/fvm/environment/mock/environment.go +++ b/fvm/environment/mock/environment.go @@ -440,20 +440,6 @@ func (_m *Environment) EmitEvent(_a0 cadence.Event) error { return r0 } -// EmitFlowEvent provides a mock function with given fields: etype, payload -func (_m *Environment) EmitFlowEvent(etype flow.EventType, payload []byte) error { - ret := _m.Called(etype, payload) - - var r0 error - if rf, ok := ret.Get(0).(func(flow.EventType, []byte) error); ok { - r0 = rf(etype, payload) - } else { - r0 = ret.Error(0) - } - - return r0 -} - // Events provides a mock function with given fields: func (_m *Environment) Events() flow.EventsList { ret := _m.Called() diff --git a/fvm/environment/mock/event_emitter.go b/fvm/environment/mock/event_emitter.go index 018efa1f19b..5ff23d14d71 100644 --- a/fvm/environment/mock/event_emitter.go +++ b/fvm/environment/mock/event_emitter.go @@ -45,20 +45,6 @@ func (_m *EventEmitter) EmitEvent(event cadence.Event) error { return r0 } -// EmitFlowEvent provides a mock function with given fields: etype, payload -func (_m *EventEmitter) EmitFlowEvent(etype flow.EventType, payload []byte) error { - ret := _m.Called(etype, payload) - - var r0 error - if rf, ok := ret.Get(0).(func(flow.EventType, []byte) error); ok { - r0 = rf(etype, payload) - } else { - r0 = ret.Error(0) - } - - return r0 -} - // Events provides a mock function with given fields: func (_m *EventEmitter) Events() flow.EventsList { ret := _m.Called() diff --git a/fvm/environment/system_contracts.go b/fvm/environment/system_contracts.go index 606826314fa..52a4ce7312d 100644 --- a/fvm/environment/system_contracts.go +++ b/fvm/environment/system_contracts.go @@ -86,8 +86,8 @@ func (sys *SystemContracts) Invoke( } func FlowFeesAddress(chain flow.Chain) flow.Address { - address, _ := chain.AddressAtIndex(FlowFeesAccountIndex) - return address + sc := systemcontracts.SystemContractsForChain(chain.ChainID()) + return sc.FlowFees.Address } func ServiceAddress(chain flow.Chain) flow.Address { diff --git a/fvm/errors/codes.go b/fvm/errors/codes.go index 3308b47fdd9..cdbc734bd3d 100644 --- a/fvm/errors/codes.go +++ b/fvm/errors/codes.go @@ -78,8 +78,8 @@ const ( ErrCodeComputationLimitExceededError ErrorCode = 1110 ErrCodeMemoryLimitExceededError ErrorCode = 1111 ErrCodeCouldNotDecodeExecutionParameterFromState ErrorCode = 1112 - ErrCodeScriptExecutionCancelledError ErrorCode = 1114 ErrCodeScriptExecutionTimedOutError ErrorCode = 1113 + ErrCodeScriptExecutionCancelledError ErrorCode = 1114 ErrCodeEventEncodingError ErrorCode = 1115 ErrCodeInvalidInternalStateAccessError ErrorCode = 1116 // 1117 was never deployed and is free to use diff --git a/fvm/evm/emulator/database/database.go b/fvm/evm/emulator/database/database.go index f47e32b7174..8b18e56e7bc 100644 --- a/fvm/evm/emulator/database/database.go +++ b/fvm/evm/emulator/database/database.go @@ -34,6 +34,7 @@ type Database struct { flowEVMRootAddress flow.Address led atree.Ledger storage *atree.PersistentSlabStorage + baseStorage *atree.LedgerBaseStorage atreemap *atree.OrderedMap rootIDBytesToBeStored []byte // if is empty means we don't need to store anything // Ramtin: other database implementations for EVM uses a lock @@ -57,6 +58,7 @@ func NewDatabase(led atree.Ledger, flowEVMRootAddress flow.Address) (*Database, db := &Database{ led: led, + baseStorage: baseStorage, flowEVMRootAddress: flowEVMRootAddress, storage: storage, } @@ -237,7 +239,7 @@ func (db *Database) getRootHash() (gethCommon.Hash, error) { if len(data) == 0 { return gethTypes.EmptyRootHash, nil } - return gethCommon.Hash(data), nil + return gethCommon.BytesToHash(data), nil } // Commits the changes from atree into the underlying storage @@ -309,6 +311,19 @@ func (db *Database) Stat(property string) (string, error) { return "", types.ErrNotImplemented } +func (db *Database) BytesRetrieved() int { + return db.baseStorage.BytesRetrieved() +} + +func (db *Database) BytesStored() int { + return db.baseStorage.BytesStored() +} +func (db *Database) ResetReporter() { + db.baseStorage.ResetReporter() +} + +// Compact is not supported on a memory database, but there's no need either as +// a memory database doesn't waste space anyway. // Compact is a no op func (db *Database) Compact(start []byte, limit []byte) error { return nil @@ -357,6 +372,11 @@ func (b *batch) set(key []byte, value []byte, delete bool) error { return nil } +// DropCache drops the database read cache +func (db *Database) DropCache() { + db.storage.DropCache() +} + // ValueSize retrieves the amount of data queued up for writing. func (b *batch) ValueSize() int { return b.size diff --git a/fvm/evm/emulator/database/database_test.go b/fvm/evm/emulator/database/database_test.go index 62b7f9b63f3..a23e38d4295 100644 --- a/fvm/evm/emulator/database/database_test.go +++ b/fvm/evm/emulator/database/database_test.go @@ -24,7 +24,7 @@ func TestDatabase(t *testing.T) { value2 := []byte{9, 10, 11} t.Run("test basic database functionality", func(t *testing.T) { - testutils.RunWithTestBackend(t, func(backend types.Backend) { + testutils.RunWithTestBackend(t, func(backend *testutils.TestBackend) { testutils.RunWithTestFlowEVMRootAddress(t, backend, func(flowEVMRoot flow.Address) { db, err := database.NewDatabase(backend, flowEVMRoot) require.NoError(t, err) @@ -70,7 +70,7 @@ func TestDatabase(t *testing.T) { }) t.Run("test batch functionality", func(t *testing.T) { - testutils.RunWithTestBackend(t, func(backend types.Backend) { + testutils.RunWithTestBackend(t, func(backend *testutils.TestBackend) { testutils.RunWithTestFlowEVMRootAddress(t, backend, func(flowEVMRoot flow.Address) { db, err := database.NewDatabase(backend, flowEVMRoot) require.NoError(t, err) @@ -159,7 +159,7 @@ func TestDatabase(t *testing.T) { }) t.Run("test fatal error (not implemented methods)", func(t *testing.T) { - testutils.RunWithTestBackend(t, func(backend types.Backend) { + testutils.RunWithTestBackend(t, func(backend *testutils.TestBackend) { testutils.RunWithTestFlowEVMRootAddress(t, backend, func(flowEVMRoot flow.Address) { db, err := database.NewDatabase(backend, flowEVMRoot) require.NoError(t, err) diff --git a/fvm/evm/emulator/emulator.go b/fvm/evm/emulator/emulator.go index 9371562ef63..eef720912f7 100644 --- a/fvm/evm/emulator/emulator.go +++ b/fvm/evm/emulator/emulator.go @@ -148,7 +148,8 @@ func (bl *BlockView) newProcedure() (*procedure, error) { cfg.ChainConfig, cfg.EVMConfig, ), - state: execState, + state: execState, + database: bl.database, }, nil } @@ -159,9 +160,10 @@ func (bl *BlockView) commit(rootHash gethCommon.Hash) error { } type procedure struct { - config *Config - evm *gethVM.EVM - state *gethState.StateDB + config *Config + evm *gethVM.EVM + state *gethState.StateDB + database types.Database } // commit commits the changes to the state. @@ -184,6 +186,12 @@ func (proc *procedure) commit() (gethCommon.Hash, error) { if err != nil { return gethTypes.EmptyRootHash, handleCommitError(err) } + + // // remove the read registers (no history tracking) + // err = proc.database.DeleteAndCleanReadKey() + // if err != nil { + // return gethTypes.EmptyRootHash, types.NewFatalError(err) + // } return newRoot, nil } @@ -235,8 +243,11 @@ func (proc *procedure) withdrawFrom(address types.Address, amount *big.Int) (*ty // while this method is only called from bridged accounts // it might be the case that someone creates a bridged account // and never transfer tokens to and call for withdraw + // TODO: we might revisit this apporach and + // return res, types.ErrAccountDoesNotExist + // instead if !proc.state.Exist(addr) { - return res, types.ErrAccountDoesNotExist + proc.state.CreateAccount(addr) } // check the source account balance diff --git a/fvm/evm/emulator/emulator_test.go b/fvm/evm/emulator/emulator_test.go index 2169b96e630..caaf5853cac 100644 --- a/fvm/evm/emulator/emulator_test.go +++ b/fvm/evm/emulator/emulator_test.go @@ -22,7 +22,7 @@ var blockNumber = big.NewInt(10) var defaultCtx = types.NewDefaultBlockContext(blockNumber.Uint64()) func RunWithTestDB(t testing.TB, f func(types.Database)) { - testutils.RunWithTestBackend(t, func(backend types.Backend) { + testutils.RunWithTestBackend(t, func(backend *testutils.TestBackend) { testutils.RunWithTestFlowEVMRootAddress(t, backend, func(flowEVMRoot flow.Address) { db, err := database.NewDatabase(backend, flowEVMRoot) require.NoError(t, err) @@ -62,7 +62,7 @@ func TestNativeTokenBridging(t *testing.T) { }) }) }) - t.Run("mint tokens withdraw", func(t *testing.T) { + t.Run("tokens withdraw", func(t *testing.T) { amount := big.NewInt(1000) RunWithNewEmulator(t, db, func(env *emulator.Emulator) { RunWithNewReadOnlyBlockView(t, env, func(blk types.ReadOnlyBlockView) { @@ -93,7 +93,7 @@ func TestNativeTokenBridging(t *testing.T) { func TestContractInteraction(t *testing.T) { RunWithTestDB(t, func(db types.Database) { - testContract := testutils.GetTestContract(t) + testContract := testutils.GetStorageTestContract(t) testAccount := types.NewAddressFromString("test") amount := big.NewInt(0).Mul(big.NewInt(1337), big.NewInt(gethParams.Ether)) @@ -148,7 +148,7 @@ func TestContractInteraction(t *testing.T) { types.NewContractCall( testAccount, contractAddr, - testContract.MakeStoreCallData(t, num), + testContract.MakeCallData(t, "store", num), 1_000_000, big.NewInt(0), // this should be zero because the contract doesn't have receiver ), @@ -164,7 +164,7 @@ func TestContractInteraction(t *testing.T) { types.NewContractCall( testAccount, contractAddr, - testContract.MakeRetrieveCallData(t), + testContract.MakeCallData(t, "retrieve"), 1_000_000, big.NewInt(0), // this should be zero because the contract doesn't have receiver ), @@ -183,7 +183,7 @@ func TestContractInteraction(t *testing.T) { types.NewContractCall( testAccount, contractAddr, - testContract.MakeBlockNumberCallData(t), + testContract.MakeCallData(t, "blockNumber"), 1_000_000, big.NewInt(0), // this should be zero because the contract doesn't have receiver ), @@ -369,3 +369,30 @@ func TestDatabaseErrorHandling(t *testing.T) { }) }) } + +func TestStorageNoSideEffect(t *testing.T) { + t.Skip("we need to fix this issue ") + + testutils.RunWithTestBackend(t, func(backend *testutils.TestBackend) { + testutils.RunWithTestFlowEVMRootAddress(t, backend, func(flowEVMRoot flow.Address) { + db, err := database.NewDatabase(backend, flowEVMRoot) + require.NoError(t, err) + + em := emulator.NewEmulator(db) + testAccount := types.NewAddressFromString("test") + + amount := big.NewInt(100) + RunWithNewBlockView(t, em, func(blk types.BlockView) { + _, err = blk.DirectCall(types.NewDepositCall(testAccount, amount)) + require.NoError(t, err) + }) + + orgSize := backend.TotalStorageSize() + RunWithNewBlockView(t, em, func(blk types.BlockView) { + _, err = blk.DirectCall(types.NewDepositCall(testAccount, amount)) + require.NoError(t, err) + }) + require.Equal(t, orgSize, backend.TotalStorageSize()) + }) + }) +} diff --git a/fvm/evm/evm.go b/fvm/evm/evm.go new file mode 100644 index 00000000000..a44b8be4552 --- /dev/null +++ b/fvm/evm/evm.go @@ -0,0 +1,56 @@ +package evm + +import ( + "github.com/onflow/cadence/runtime" + "github.com/onflow/cadence/runtime/common" + + evm "github.com/onflow/flow-go/fvm/evm/emulator" + "github.com/onflow/flow-go/fvm/evm/emulator/database" + "github.com/onflow/flow-go/fvm/evm/handler" + "github.com/onflow/flow-go/fvm/evm/stdlib" + "github.com/onflow/flow-go/fvm/evm/types" + "github.com/onflow/flow-go/fvm/systemcontracts" + "github.com/onflow/flow-go/model/flow" +) + +func RootAccountAddress(chainID flow.ChainID) (flow.Address, error) { + sc := systemcontracts.SystemContractsForChain(chainID) + return sc.EVM.Address, nil +} + +func SetupEnvironment( + chainID flow.ChainID, + backend types.Backend, + env runtime.Environment, + service flow.Address, + flowToken flow.Address, +) error { + // TODO: setup proper root address based on chainID + evmRootAddress, err := RootAccountAddress(chainID) + if err != nil { + return err + } + + db, err := database.NewDatabase(backend, evmRootAddress) + if err != nil { + return err + } + + em := evm.NewEmulator(db) + + bs, err := handler.NewBlockStore(backend, evmRootAddress) + if err != nil { + return err + } + + aa, err := handler.NewAddressAllocator(backend, evmRootAddress) + if err != nil { + return err + } + + contractHandler := handler.NewContractHandler(common.Address(flowToken), bs, aa, backend, em) + + stdlib.SetupEnvironment(env, contractHandler, service) + + return nil +} diff --git a/fvm/evm/evm_test.go b/fvm/evm/evm_test.go new file mode 100644 index 00000000000..6108b57aaaf --- /dev/null +++ b/fvm/evm/evm_test.go @@ -0,0 +1,288 @@ +package evm_test + +import ( + "fmt" + "math/big" + "testing" + + "github.com/onflow/cadence" + "github.com/onflow/cadence/encoding/json" + "github.com/stretchr/testify/require" + + "github.com/onflow/flow-go/fvm" + "github.com/onflow/flow-go/fvm/evm/stdlib" + "github.com/onflow/flow-go/fvm/evm/testutils" + . "github.com/onflow/flow-go/fvm/evm/testutils" + "github.com/onflow/flow-go/fvm/storage/snapshot" + "github.com/onflow/flow-go/fvm/systemcontracts" + "github.com/onflow/flow-go/model/flow" + "github.com/onflow/flow-go/utils/unittest" +) + +func TestEVMRun(t *testing.T) { + + t.Parallel() + + t.Run("testing EVM.run (happy case)", func(t *testing.T) { + RunWithTestBackend(t, func(backend *testutils.TestBackend) { + RunWithTestFlowEVMRootAddress(t, backend, func(rootAddr flow.Address) { + tc := GetStorageTestContract(t) + RunWithDeployedContract(t, tc, backend, rootAddr, func(testContract *TestContract) { + RunWithEOATestAccount(t, backend, rootAddr, func(testAccount *EOATestAccount) { + num := int64(12) + chain := flow.Emulator.Chain() + + RunWithNewTestVM(t, chain, func(ctx fvm.Context, vm fvm.VM, snapshot snapshot.SnapshotTree) { + code := []byte(fmt.Sprintf( + ` + import EVM from %s + + access(all) + fun main(tx: [UInt8], coinbaseBytes: [UInt8; 20]) { + let coinbase = EVM.EVMAddress(bytes: coinbaseBytes) + EVM.run(tx: tx, coinbase: coinbase) + } + `, + chain.ServiceAddress().HexWithPrefix(), + )) + + gasLimit := uint64(100_000) + + txBytes := testAccount.PrepareSignAndEncodeTx(t, + testContract.DeployedAt.ToCommon(), + testContract.MakeCallData(t, "store", big.NewInt(num)), + big.NewInt(0), + gasLimit, + big.NewInt(0), + ) + + tx := cadence.NewArray( + ConvertToCadence(txBytes), + ).WithType(stdlib.EVMTransactionBytesCadenceType) + + coinbase := cadence.NewArray( + ConvertToCadence(testAccount.Address().Bytes()), + ).WithType(stdlib.EVMAddressBytesCadenceType) + + script := fvm.Script(code).WithArguments( + json.MustEncode(tx), + json.MustEncode(coinbase), + ) + + _, output, err := vm.Run( + ctx, + script, + snapshot) + require.NoError(t, err) + require.NoError(t, output.Err) + }) + }) + }) + }) + }) + }) +} + +func RunWithNewTestVM(t *testing.T, chain flow.Chain, f func(fvm.Context, fvm.VM, snapshot.SnapshotTree)) { + opts := []fvm.Option{ + fvm.WithChain(chain), + fvm.WithAuthorizationChecksEnabled(false), + fvm.WithSequenceNumberCheckAndIncrementEnabled(false), + } + ctx := fvm.NewContext(opts...) + + vm := fvm.NewVirtualMachine() + snapshotTree := snapshot.NewSnapshotTree(nil) + + baseBootstrapOpts := []fvm.BootstrapProcedureOption{ + fvm.WithInitialTokenSupply(unittest.GenesisTokenSupply), + fvm.WithSetupEVMEnabled(true), + } + + executionSnapshot, _, err := vm.Run( + ctx, + fvm.Bootstrap(unittest.ServiceAccountPublicKey, baseBootstrapOpts...), + snapshotTree) + require.NoError(t, err) + + snapshotTree = snapshotTree.Append(executionSnapshot) + + f(fvm.NewContextFromParent(ctx, fvm.WithEVMEnabled(true)), vm, snapshotTree) +} + +func TestEVMAddressDeposit(t *testing.T) { + + t.Parallel() + + RunWithTestBackend(t, func(backend *testutils.TestBackend) { + RunWithTestFlowEVMRootAddress(t, backend, func(rootAddr flow.Address) { + tc := GetStorageTestContract(t) + RunWithDeployedContract(t, tc, backend, rootAddr, func(testContract *TestContract) { + RunWithEOATestAccount(t, backend, rootAddr, func(testAccount *EOATestAccount) { + chain := flow.Emulator.Chain() + sc := systemcontracts.SystemContractsForChain(chain.ChainID()) + + RunWithNewTestVM(t, chain, func(ctx fvm.Context, vm fvm.VM, snapshot snapshot.SnapshotTree) { + + code := []byte(fmt.Sprintf( + ` + import EVM from %[1]s + import FlowToken from %[2]s + + access(all) + fun main() { + let admin = getAuthAccount(%[1]s) + .borrow<&FlowToken.Administrator>(from: /storage/flowTokenAdmin)! + let minter <- admin.createNewMinter(allowedAmount: 1.23) + let vault <- minter.mintTokens(amount: 1.23) + destroy minter + + let address = EVM.EVMAddress( + bytes: [2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + ) + address.deposit(from: <-vault) + } + `, + sc.FlowServiceAccount.Address.HexWithPrefix(), + sc.FlowToken.Address.HexWithPrefix(), + )) + + script := fvm.Script(code) + + executionSnapshot, output, err := vm.Run( + ctx, + script, + snapshot) + require.NoError(t, err) + require.NoError(t, output.Err) + + // TODO: + _ = executionSnapshot + }) + }) + }) + }) + }) +} + +func TestBridgedAccountWithdraw(t *testing.T) { + + t.Parallel() + + RunWithTestBackend(t, func(backend *testutils.TestBackend) { + RunWithTestFlowEVMRootAddress(t, backend, func(rootAddr flow.Address) { + tc := GetStorageTestContract(t) + RunWithDeployedContract(t, tc, backend, rootAddr, func(testContract *TestContract) { + RunWithEOATestAccount(t, backend, rootAddr, func(testAccount *EOATestAccount) { + chain := flow.Emulator.Chain() + sc := systemcontracts.SystemContractsForChain(chain.ChainID()) + + RunWithNewTestVM(t, chain, func(ctx fvm.Context, vm fvm.VM, snapshot snapshot.SnapshotTree) { + + code := []byte(fmt.Sprintf( + ` + import EVM from %[1]s + import FlowToken from %[2]s + + access(all) + fun main(): UFix64 { + let admin = getAuthAccount(%[1]s) + .borrow<&FlowToken.Administrator>(from: /storage/flowTokenAdmin)! + let minter <- admin.createNewMinter(allowedAmount: 2.34) + let vault <- minter.mintTokens(amount: 2.34) + destroy minter + + let bridgedAccount <- EVM.createBridgedAccount() + bridgedAccount.address().deposit(from: <-vault) + + let vault2 <- bridgedAccount.withdraw(balance: EVM.Balance(flow: 1.23)) + let balance = vault2.balance + destroy bridgedAccount + destroy vault2 + + return balance + } + `, + sc.FlowServiceAccount.Address.HexWithPrefix(), + sc.FlowToken.Address.HexWithPrefix(), + )) + + script := fvm.Script(code) + + executionSnapshot, output, err := vm.Run( + ctx, + script, + snapshot) + require.NoError(t, err) + require.NoError(t, output.Err) + + // TODO: + _ = executionSnapshot + }) + }) + }) + }) + }) +} + +// TODO: provide proper contract code +func TestBridgedAccountDeploy(t *testing.T) { + + t.Parallel() + + RunWithTestBackend(t, func(backend *testutils.TestBackend) { + RunWithTestFlowEVMRootAddress(t, backend, func(rootAddr flow.Address) { + tc := GetStorageTestContract(t) + RunWithDeployedContract(t, tc, backend, rootAddr, func(testContract *TestContract) { + RunWithEOATestAccount(t, backend, rootAddr, func(testAccount *EOATestAccount) { + chain := flow.Emulator.Chain() + sc := systemcontracts.SystemContractsForChain(chain.ChainID()) + + RunWithNewTestVM(t, chain, func(ctx fvm.Context, vm fvm.VM, snapshot snapshot.SnapshotTree) { + + code := []byte(fmt.Sprintf( + ` + import EVM from %[1]s + import FlowToken from %[2]s + + access(all) + fun main(): [UInt8; 20] { + let admin = getAuthAccount(%[1]s) + .borrow<&FlowToken.Administrator>(from: /storage/flowTokenAdmin)! + let minter <- admin.createNewMinter(allowedAmount: 2.34) + let vault <- minter.mintTokens(amount: 2.34) + destroy minter + + let bridgedAccount <- EVM.createBridgedAccount() + bridgedAccount.address().deposit(from: <-vault) + + let address = bridgedAccount.deploy( + code: [], + gasLimit: 53000, + value: EVM.Balance(flow: 1.23) + ) + destroy bridgedAccount + return address.bytes + } + `, + sc.FlowServiceAccount.Address.HexWithPrefix(), + sc.FlowToken.Address.HexWithPrefix(), + )) + + script := fvm.Script(code) + + executionSnapshot, output, err := vm.Run( + ctx, + script, + snapshot) + require.NoError(t, err) + require.NoError(t, output.Err) + + // TODO: + _ = executionSnapshot + }) + }) + }) + }) + }) +} diff --git a/fvm/evm/handler/addressAllocator_test.go b/fvm/evm/handler/addressAllocator_test.go index 6ff534ff221..ab8eb0de2b4 100644 --- a/fvm/evm/handler/addressAllocator_test.go +++ b/fvm/evm/handler/addressAllocator_test.go @@ -14,7 +14,7 @@ import ( func TestAddressAllocator(t *testing.T) { - testutils.RunWithTestBackend(t, func(backend types.Backend) { + testutils.RunWithTestBackend(t, func(backend *testutils.TestBackend) { testutils.RunWithTestFlowEVMRootAddress(t, backend, func(root flow.Address) { aa, err := handler.NewAddressAllocator(backend, root) require.NoError(t, err) diff --git a/fvm/evm/handler/blockstore_test.go b/fvm/evm/handler/blockstore_test.go index 77f80d947ff..77720b143a2 100644 --- a/fvm/evm/handler/blockstore_test.go +++ b/fvm/evm/handler/blockstore_test.go @@ -13,7 +13,7 @@ import ( func TestBlockStore(t *testing.T) { - testutils.RunWithTestBackend(t, func(backend types.Backend) { + testutils.RunWithTestBackend(t, func(backend *testutils.TestBackend) { testutils.RunWithTestFlowEVMRootAddress(t, backend, func(root flow.Address) { bs, err := handler.NewBlockStore(backend, root) require.NoError(t, err) diff --git a/fvm/evm/handler/handler.go b/fvm/evm/handler/handler.go index b11ef08b7ee..4ed802e08a8 100644 --- a/fvm/evm/handler/handler.go +++ b/fvm/evm/handler/handler.go @@ -5,6 +5,7 @@ import ( gethTypes "github.com/ethereum/go-ethereum/core/types" "github.com/ethereum/go-ethereum/rlp" + "github.com/onflow/cadence/runtime/common" "github.com/onflow/flow-go/fvm/environment" "github.com/onflow/flow-go/fvm/errors" @@ -22,21 +23,28 @@ import ( // in the future we might benefit from a view style of access to db passed as // a param to the emulator. type ContractHandler struct { + flowTokenAddress common.Address blockstore types.BlockStore + addressAllocator types.AddressAllocator backend types.Backend emulator types.Emulator - addressAllocator types.AddressAllocator +} + +func (h *ContractHandler) FlowTokenAddress() common.Address { + return h.flowTokenAddress } var _ types.ContractHandler = &ContractHandler{} func NewContractHandler( + flowTokenAddress common.Address, blockstore types.BlockStore, addressAllocator types.AddressAllocator, backend types.Backend, emulator types.Emulator, ) *ContractHandler { return &ContractHandler{ + flowTokenAddress: flowTokenAddress, blockstore: blockstore, addressAllocator: addressAllocator, backend: backend, @@ -58,7 +66,7 @@ func (h *ContractHandler) AccountByAddress(addr types.Address, isAuthorized bool } // LastExecutedBlock returns the last executed block -func (h ContractHandler) LastExecutedBlock() *types.Block { +func (h *ContractHandler) LastExecutedBlock() *types.Block { block, err := h.blockstore.LatestBlock() handleError(err) return block @@ -66,7 +74,7 @@ func (h ContractHandler) LastExecutedBlock() *types.Block { // Run runs an rlpencoded evm transaction and // collects the gas fees and pay it to the coinbase address provided. -func (h ContractHandler) Run(rlpEncodedTx []byte, coinbase types.Address) { +func (h *ContractHandler) Run(rlpEncodedTx []byte, coinbase types.Address) { // step 1 - transaction decoding encodedLen := uint(len(rlpEncodedTx)) err := h.backend.MeterComputation(environment.ComputationKindRLPDecoding, encodedLen) @@ -113,14 +121,14 @@ func (h ContractHandler) Run(rlpEncodedTx []byte, coinbase types.Address) { handleError(err) } -func (h ContractHandler) checkGasLimit(limit types.GasLimit) { +func (h *ContractHandler) checkGasLimit(limit types.GasLimit) { // check gas limit against what has been left on the transaction side if !h.backend.ComputationAvailable(environment.ComputationKindEVMGasUsage, uint(limit)) { handleError(types.ErrInsufficientComputation) } } -func (h ContractHandler) meterGasUsage(res *types.Result) { +func (h *ContractHandler) meterGasUsage(res *types.Result) { if res != nil { err := h.backend.MeterComputation(environment.ComputationKindEVMGasUsage, uint(res.GasConsumed)) handleError(err) @@ -128,10 +136,10 @@ func (h ContractHandler) meterGasUsage(res *types.Result) { } func (h *ContractHandler) emitEvent(event *types.Event) { - // TODO add extra metering for rlp encoding - encoded, err := event.Payload.Encode() + ev, err := event.Payload.CadenceEvent() handleError(err) - err = h.backend.EmitFlowEvent(event.Etype, encoded) + + err = h.backend.EmitEvent(ev) handleError(err) } diff --git a/fvm/evm/handler/handler_benchmark_test.go b/fvm/evm/handler/handler_benchmark_test.go new file mode 100644 index 00000000000..73f0f0ed59d --- /dev/null +++ b/fvm/evm/handler/handler_benchmark_test.go @@ -0,0 +1,82 @@ +package handler_test + +import ( + "testing" + + "github.com/stretchr/testify/require" + + "github.com/onflow/flow-go/fvm/evm/testutils" + "github.com/onflow/flow-go/fvm/evm/types" + "github.com/onflow/flow-go/model/flow" +) + +func BenchmarkStorage(b *testing.B) { benchmarkStorageGrowth(b, 100, 100) } + +// benchmark +func benchmarkStorageGrowth(b *testing.B, accountCount, setupKittyCount int) { + testutils.RunWithTestBackend(b, func(backend *testutils.TestBackend) { + testutils.RunWithTestFlowEVMRootAddress(b, backend, func(rootAddr flow.Address) { + testutils.RunWithDeployedContract(b, + testutils.GetDummyKittyTestContract(b), + backend, + rootAddr, + func(tc *testutils.TestContract) { + db, handler := SetupHandler(b, backend, rootAddr) + numOfAccounts := 100000 + accounts := make([]types.Account, numOfAccounts) + // setup several of accounts + // note that trie growth is the function of number of accounts + for i := 0; i < numOfAccounts; i++ { + account := handler.AccountByAddress(handler.AllocateAddress(), true) + account.Deposit(types.NewFlowTokenVault(types.Balance(100))) + accounts[i] = account + } + backend.DropEvents() + // mint kitties + for i := 0; i < setupKittyCount; i++ { + account := accounts[i%accountCount] + matronId := testutils.RandomBigInt(1000) + sireId := testutils.RandomBigInt(1000) + generation := testutils.RandomBigInt(1000) + genes := testutils.RandomBigInt(1000) + require.NotNil(b, account) + account.Call( + tc.DeployedAt, + tc.MakeCallData(b, + "CreateKitty", + matronId, + sireId, + generation, + genes, + ), + 300_000_000, + types.Balance(0), + ) + require.Equal(b, 2, len(backend.Events())) + backend.DropEvents() // this would make things lighter + } + + // measure the impact of mint after the setup phase + db.ResetReporter() + db.DropCache() + + accounts[0].Call( + tc.DeployedAt, + tc.MakeCallData(b, + "CreateKitty", + testutils.RandomBigInt(1000), + testutils.RandomBigInt(1000), + testutils.RandomBigInt(1000), + testutils.RandomBigInt(1000), + ), + 300_000_000, + types.Balance(0), + ) + + b.ReportMetric(float64(db.BytesRetrieved()), "bytes_read") + b.ReportMetric(float64(db.BytesStored()), "bytes_written") + b.ReportMetric(float64(backend.TotalStorageSize()), "total_storage_size") + }) + }) + }) +} diff --git a/fvm/evm/handler/handler_test.go b/fvm/evm/handler/handler_test.go index 29fae749a74..db78cf2d827 100644 --- a/fvm/evm/handler/handler_test.go +++ b/fvm/evm/handler/handler_test.go @@ -1,12 +1,17 @@ package handler_test import ( - "bytes" + "encoding/hex" "fmt" "math" "math/big" + "strings" "testing" + "github.com/onflow/cadence" + + jsoncdc "github.com/onflow/cadence/encoding/json" + gethCommon "github.com/ethereum/go-ethereum/common" gethTypes "github.com/ethereum/go-ethereum/core/types" gethParams "github.com/ethereum/go-ethereum/params" @@ -14,22 +19,29 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + "github.com/onflow/cadence/runtime/common" + "github.com/onflow/flow-go/fvm/errors" "github.com/onflow/flow-go/fvm/evm/emulator" "github.com/onflow/flow-go/fvm/evm/emulator/database" "github.com/onflow/flow-go/fvm/evm/handler" "github.com/onflow/flow-go/fvm/evm/testutils" "github.com/onflow/flow-go/fvm/evm/types" + "github.com/onflow/flow-go/fvm/systemcontracts" "github.com/onflow/flow-go/model/flow" ) // TODO add test for fatal errors +var flowTokenAddress = common.MustBytesToAddress(systemcontracts.SystemContractsForChain(flow.Emulator).FlowToken.Address.Bytes()) + func TestHandler_TransactionRun(t *testing.T) { t.Parallel() t.Run("test - transaction run (happy case)", func(t *testing.T) { - testutils.RunWithTestBackend(t, func(backend types.Backend) { + t.Parallel() + + testutils.RunWithTestBackend(t, func(backend *testutils.TestBackend) { testutils.RunWithTestFlowEVMRootAddress(t, backend, func(rootAddr flow.Address) { testutils.RunWithEOATestAccount(t, backend, rootAddr, func(eoa *testutils.EOATestAccount) { @@ -55,8 +67,7 @@ func TestHandler_TransactionRun(t *testing.T) { return result, nil }, } - - handler := handler.NewContractHandler(bs, aa, backend, em) + handler := handler.NewContractHandler(flowTokenAddress, bs, aa, backend, em) coinbase := types.NewAddress(gethCommon.Address{}) @@ -85,18 +96,31 @@ func TestHandler_TransactionRun(t *testing.T) { event := events[0] assert.Equal(t, event.Type, types.EventTypeTransactionExecuted) - ev := types.TransactionExecutedPayload{} - err = rlp.Decode(bytes.NewReader(event.Payload), &ev) + ev, err := jsoncdc.Decode(nil, event.Payload) require.NoError(t, err) - for i, l := range result.Logs { - assert.Equal(t, l, ev.Result.Logs[i]) + cadenceEvent, ok := ev.(cadence.Event) + require.True(t, ok) + for j, f := range cadenceEvent.GetFields() { + // todo add an event decoder in types.event + if f.Identifier == "logs" { + cadenceLogs := cadenceEvent.GetFieldValues()[j] + encodedLogs, err := hex.DecodeString(strings.ReplaceAll(cadenceLogs.String(), "\"", "")) + require.NoError(t, err) + + var logs []*gethTypes.Log + err = rlp.DecodeBytes(encodedLogs, &logs) + require.NoError(t, err) + + for i, l := range result.Logs { + assert.Equal(t, l, logs[i]) + } + } } // check block event event = events[1] assert.Equal(t, event.Type, types.EventTypeBlockExecuted) - payload := types.BlockExecutedEventPayload{} - err = rlp.Decode(bytes.NewReader(event.Payload), &payload) + _, err = jsoncdc.Decode(nil, event.Payload) require.NoError(t, err) }) }) @@ -104,7 +128,9 @@ func TestHandler_TransactionRun(t *testing.T) { }) t.Run("test - transaction run (unhappy cases)", func(t *testing.T) { - testutils.RunWithTestBackend(t, func(backend types.Backend) { + t.Parallel() + + testutils.RunWithTestBackend(t, func(backend *testutils.TestBackend) { testutils.RunWithTestFlowEVMRootAddress(t, backend, func(rootAddr flow.Address) { testutils.RunWithEOATestAccount(t, backend, rootAddr, func(eoa *testutils.EOATestAccount) { @@ -119,7 +145,7 @@ func TestHandler_TransactionRun(t *testing.T) { return &types.Result{}, types.NewEVMExecutionError(fmt.Errorf("some sort of error")) }, } - handler := handler.NewContractHandler(bs, aa, backend, em) + handler := handler.NewContractHandler(flowTokenAddress, bs, aa, backend, em) coinbase := types.NewAddress(gethCommon.Address{}) @@ -164,21 +190,11 @@ func TestHandler_TransactionRun(t *testing.T) { }) t.Run("test running transaction (with integrated emulator)", func(t *testing.T) { - testutils.RunWithTestBackend(t, func(backend types.Backend) { - testutils.RunWithTestFlowEVMRootAddress(t, backend, func(rootAddr flow.Address) { - - bs, err := handler.NewBlockStore(backend, rootAddr) - require.NoError(t, err) - - aa, err := handler.NewAddressAllocator(backend, rootAddr) - require.NoError(t, err) - - db, err := database.NewDatabase(backend, rootAddr) - require.NoError(t, err) - - emulator := emulator.NewEmulator(db) + t.Parallel() - handler := handler.NewContractHandler(bs, aa, backend, emulator) + testutils.RunWithTestBackend(t, func(backend *testutils.TestBackend) { + testutils.RunWithTestFlowEVMRootAddress(t, backend, func(rootAddr flow.Address) { + _, handler := SetupHandler(t, backend, rootAddr) eoa := testutils.GetTestEOAAccount(t, testutils.EOATestAccount1KeyHex) @@ -230,19 +246,12 @@ func TestHandler_OpsWithoutEmulator(t *testing.T) { t.Parallel() t.Run("test last executed block call", func(t *testing.T) { - testutils.RunWithTestBackend(t, func(backend types.Backend) { - testutils.RunWithTestFlowEVMRootAddress(t, backend, func(rootAddr flow.Address) { - bs, err := handler.NewBlockStore(backend, rootAddr) - require.NoError(t, err) - - aa, err := handler.NewAddressAllocator(backend, rootAddr) - require.NoError(t, err) + t.Parallel() - db, err := database.NewDatabase(backend, testutils.TestFlowEVMRootAddress) - require.NoError(t, err) - emulator := emulator.NewEmulator(db) + testutils.RunWithTestBackend(t, func(backend *testutils.TestBackend) { + testutils.RunWithTestFlowEVMRootAddress(t, backend, func(rootAddr flow.Address) { + _, handler := SetupHandler(t, backend, rootAddr) - handler := handler.NewContractHandler(bs, aa, backend, emulator) // test call last executed block without initialization b := handler.LastExecutedBlock() require.Equal(t, types.GenesisBlock, b) @@ -262,7 +271,9 @@ func TestHandler_OpsWithoutEmulator(t *testing.T) { }) t.Run("test address allocation", func(t *testing.T) { - testutils.RunWithTestBackend(t, func(backend types.Backend) { + t.Parallel() + + testutils.RunWithTestBackend(t, func(backend *testutils.TestBackend) { testutils.RunWithTestFlowEVMRootAddress(t, backend, func(rootAddr flow.Address) { blockchain, err := handler.NewBlockStore(backend, rootAddr) require.NoError(t, err) @@ -270,7 +281,8 @@ func TestHandler_OpsWithoutEmulator(t *testing.T) { aa, err := handler.NewAddressAllocator(backend, rootAddr) require.NoError(t, err) - handler := handler.NewContractHandler(blockchain, aa, backend, nil) + handler := handler.NewContractHandler(flowTokenAddress, blockchain, aa, backend, nil) + foa := handler.AllocateAddress() require.NotNil(t, foa) @@ -284,20 +296,12 @@ func TestHandler_OpsWithoutEmulator(t *testing.T) { func TestHandler_BridgedAccount(t *testing.T) { t.Run("test deposit/withdraw (with integrated emulator)", func(t *testing.T) { - testutils.RunWithTestBackend(t, func(backend types.Backend) { - testutils.RunWithTestFlowEVMRootAddress(t, backend, func(rootAddr flow.Address) { - bs, err := handler.NewBlockStore(backend, rootAddr) - require.NoError(t, err) - - aa, err := handler.NewAddressAllocator(backend, rootAddr) - require.NoError(t, err) - - db, err := database.NewDatabase(backend, rootAddr) - require.NoError(t, err) + t.Parallel() - emulator := emulator.NewEmulator(db) + testutils.RunWithTestBackend(t, func(backend *testutils.TestBackend) { + testutils.RunWithTestFlowEVMRootAddress(t, backend, func(rootAddr flow.Address) { + _, handler := SetupHandler(t, backend, rootAddr) - handler := handler.NewContractHandler(bs, aa, backend, emulator) foa := handler.AccountByAddress(handler.AllocateAddress(), true) require.NotNil(t, foa) @@ -326,12 +330,6 @@ func TestHandler_BridgedAccount(t *testing.T) { // transaction event event := events[0] assert.Equal(t, event.Type, types.EventTypeTransactionExecuted) - ret := types.TransactionExecutedPayload{} - err = rlp.Decode(bytes.NewReader(event.Payload), &ret) - require.NoError(t, err) - // TODO: decode encoded tx and check for the amount and value - // assert.Equal(t, foa.Address(), ret.Address) - // assert.Equal(t, balance, ret.Amount) // block event event = events[1] @@ -340,8 +338,7 @@ func TestHandler_BridgedAccount(t *testing.T) { // transaction event event = events[2] assert.Equal(t, event.Type, types.EventTypeTransactionExecuted) - ret = types.TransactionExecutedPayload{} - err = rlp.Decode(bytes.NewReader(event.Payload), &ret) + _, err = jsoncdc.Decode(nil, event.Payload) require.NoError(t, err) // TODO: decode encoded tx and check for the amount and value // assert.Equal(t, foa.Address(), ret.Address) @@ -360,7 +357,9 @@ func TestHandler_BridgedAccount(t *testing.T) { }) t.Run("test withdraw (unhappy case)", func(t *testing.T) { - testutils.RunWithTestBackend(t, func(backend types.Backend) { + t.Parallel() + + testutils.RunWithTestBackend(t, func(backend *testutils.TestBackend) { testutils.RunWithTestFlowEVMRootAddress(t, backend, func(rootAddr flow.Address) { testutils.RunWithEOATestAccount(t, backend, rootAddr, func(eoa *testutils.EOATestAccount) { bs, err := handler.NewBlockStore(backend, rootAddr) @@ -372,7 +371,8 @@ func TestHandler_BridgedAccount(t *testing.T) { // Withdraw calls are only possible within FOA accounts assertPanic(t, types.IsAUnAuthroizedMethodCallError, func() { em := &testutils.TestEmulator{} - handler := handler.NewContractHandler(bs, aa, backend, em) + + handler := handler.NewContractHandler(flowTokenAddress, bs, aa, backend, em) account := handler.AccountByAddress(testutils.RandomAddress(t), false) account.Withdraw(types.Balance(1)) @@ -385,8 +385,10 @@ func TestHandler_BridgedAccount(t *testing.T) { return &types.Result{}, types.NewEVMExecutionError(fmt.Errorf("some sort of error")) }, } - handler := handler.NewContractHandler(bs, aa, backend, em) + + handler := handler.NewContractHandler(flowTokenAddress, bs, aa, backend, em) account := handler.AccountByAddress(testutils.RandomAddress(t), true) + account.Withdraw(types.Balance(1)) }) @@ -397,8 +399,10 @@ func TestHandler_BridgedAccount(t *testing.T) { return &types.Result{}, types.NewEVMExecutionError(fmt.Errorf("some sort of error")) }, } - handler := handler.NewContractHandler(bs, aa, backend, em) + + handler := handler.NewContractHandler(flowTokenAddress, bs, aa, backend, em) account := handler.AccountByAddress(testutils.RandomAddress(t), true) + account.Withdraw(types.Balance(0)) }) @@ -409,8 +413,10 @@ func TestHandler_BridgedAccount(t *testing.T) { return &types.Result{}, types.NewFatalError(fmt.Errorf("some sort of fatal error")) }, } - handler := handler.NewContractHandler(bs, aa, backend, em) + + handler := handler.NewContractHandler(flowTokenAddress, bs, aa, backend, em) account := handler.AccountByAddress(testutils.RandomAddress(t), true) + account.Withdraw(types.Balance(0)) }) }) @@ -419,7 +425,9 @@ func TestHandler_BridgedAccount(t *testing.T) { }) t.Run("test deposit (unhappy case)", func(t *testing.T) { - testutils.RunWithTestBackend(t, func(backend types.Backend) { + t.Parallel() + + testutils.RunWithTestBackend(t, func(backend *testutils.TestBackend) { testutils.RunWithTestFlowEVMRootAddress(t, backend, func(rootAddr flow.Address) { testutils.RunWithEOATestAccount(t, backend, rootAddr, func(eoa *testutils.EOATestAccount) { bs, err := handler.NewBlockStore(backend, rootAddr) @@ -435,8 +443,10 @@ func TestHandler_BridgedAccount(t *testing.T) { return &types.Result{}, types.NewEVMExecutionError(fmt.Errorf("some sort of error")) }, } - handler := handler.NewContractHandler(bs, aa, backend, em) + + handler := handler.NewContractHandler(flowTokenAddress, bs, aa, backend, em) account := handler.AccountByAddress(testutils.RandomAddress(t), true) + account.Deposit(types.NewFlowTokenVault(1)) }) @@ -447,8 +457,10 @@ func TestHandler_BridgedAccount(t *testing.T) { return &types.Result{}, types.NewFatalError(fmt.Errorf("some sort of fatal error")) }, } - handler := handler.NewContractHandler(bs, aa, backend, em) + + handler := handler.NewContractHandler(flowTokenAddress, bs, aa, backend, em) account := handler.AccountByAddress(testutils.RandomAddress(t), true) + account.Deposit(types.NewFlowTokenVault(1)) }) }) @@ -457,21 +469,13 @@ func TestHandler_BridgedAccount(t *testing.T) { }) t.Run("test deploy/call (with integrated emulator)", func(t *testing.T) { + t.Parallel() + // TODO update this test with events, gas metering, etc - testutils.RunWithTestBackend(t, func(backend types.Backend) { + testutils.RunWithTestBackend(t, func(backend *testutils.TestBackend) { testutils.RunWithTestFlowEVMRootAddress(t, backend, func(rootAddr flow.Address) { - bs, err := handler.NewBlockStore(backend, rootAddr) - require.NoError(t, err) - - aa, err := handler.NewAddressAllocator(backend, rootAddr) - require.NoError(t, err) - - db, err := database.NewDatabase(backend, rootAddr) - require.NoError(t, err) - - emulator := emulator.NewEmulator(db) + _, handler := SetupHandler(t, backend, rootAddr) - handler := handler.NewContractHandler(bs, aa, backend, emulator) foa := handler.AccountByAddress(handler.AllocateAddress(), true) require.NotNil(t, foa) @@ -481,7 +485,7 @@ func TestHandler_BridgedAccount(t *testing.T) { vault := types.NewFlowTokenVault(orgBalance) foa.Deposit(vault) - testContract := testutils.GetTestContract(t) + testContract := testutils.GetStorageTestContract(t) addr := foa.Deploy(testContract.ByteCode, math.MaxUint64, types.Balance(0)) require.NotNil(t, addr) @@ -489,13 +493,13 @@ func TestHandler_BridgedAccount(t *testing.T) { _ = foa.Call( addr, - testContract.MakeStoreCallData(t, num), + testContract.MakeCallData(t, "store", num), math.MaxUint64, types.Balance(0)) ret := foa.Call( addr, - testContract.MakeRetrieveCallData(t), + testContract.MakeCallData(t, "retrieve"), math.MaxUint64, types.Balance(0)) @@ -528,3 +532,19 @@ func assertPanic(t *testing.T, check checkError, f func()) { }() f() } + +func SetupHandler(t testing.TB, backend types.Backend, rootAddr flow.Address) (*database.Database, *handler.ContractHandler) { + bs, err := handler.NewBlockStore(backend, rootAddr) + require.NoError(t, err) + + aa, err := handler.NewAddressAllocator(backend, rootAddr) + require.NoError(t, err) + + db, err := database.NewDatabase(backend, rootAddr) + require.NoError(t, err) + + emulator := emulator.NewEmulator(db) + + handler := handler.NewContractHandler(flowTokenAddress, bs, aa, backend, emulator) + return db, handler +} diff --git a/fvm/evm/stdlib/contract.cdc b/fvm/evm/stdlib/contract.cdc new file mode 100644 index 00000000000..60f544a68b0 --- /dev/null +++ b/fvm/evm/stdlib/contract.cdc @@ -0,0 +1,134 @@ +import "FlowToken" + +access(all) +contract EVM { + + /// EVMAddress is an EVM-compatible address + access(all) + struct EVMAddress { + + /// Bytes of the address + access(all) + let bytes: [UInt8; 20] + + /// Constructs a new EVM address from the given byte representation + init(bytes: [UInt8; 20]) { + self.bytes = bytes + } + + /// Deposits the given vault into the EVM account with the given address + access(all) + fun deposit(from: @FlowToken.Vault) { + InternalEVM.deposit( + from: <-from, + to: self.bytes + ) + } + } + + access(all) + struct Balance { + + /// The balance in FLOW + access(all) + let flow: UFix64 + + /// Constructs a new balance, given the balance in FLOW + init(flow: UFix64) { + self.flow = flow + } + + // TODO: + // /// Returns the balance in terms of atto-FLOW. + // /// Atto-FLOW is the smallest denomination of FLOW inside EVM + // access(all) + // fun toAttoFlow(): UInt64 + } + + access(all) + resource BridgedAccount { + + access(self) + let addressBytes: [UInt8; 20] + + init(addressBytes: [UInt8; 20]) { + self.addressBytes = addressBytes + } + + /// The EVM address of the bridged account + access(all) + fun address(): EVMAddress { + // Always create a new EVMAddress instance + return EVMAddress(bytes: self.addressBytes) + } + + /// Deposits the given vault into the bridged account's balance + access(all) + fun deposit(from: @FlowToken.Vault) { + self.address().deposit(from: <-from) + } + + /// Withdraws the balance from the bridged account's balance + access(all) + fun withdraw(balance: Balance): @FlowToken.Vault { + let vault <- InternalEVM.withdraw( + from: self.addressBytes, + amount: balance.flow + ) as! @FlowToken.Vault + return <-vault + } + + /// Deploys a contract to the EVM environment. + /// Returns the address of the newly deployed contract + access(all) + fun deploy( + code: [UInt8], + gasLimit: UInt64, + value: Balance + ): EVMAddress { + let addressBytes = InternalEVM.deploy( + from: self.addressBytes, + code: code, + gasLimit: gasLimit, + value: value.flow + ) + return EVMAddress(bytes: addressBytes) + } + + /// Calls a function with the given data. + /// The execution is limited by the given amount of gas + access(all) + fun call( + to: EVMAddress, + data: [UInt8], + gasLimit: UInt64, + value: Balance + ): [UInt8] { + return InternalEVM.call( + from: self.addressBytes, + to: to.bytes, + data: data, + gasLimit: gasLimit, + value: value.flow + ) + } + } + + /// Creates a new bridged account + access(all) + fun createBridgedAccount(): @BridgedAccount { + return <-create BridgedAccount( + addressBytes: InternalEVM.createBridgedAccount() + ) + } + + /// Runs an a RLP-encoded EVM transaction, deducts the gas fees, + /// and deposits the gas fees into the provided coinbase address. + /// + /// Returns true if the transaction was successful, + /// and returns false otherwise + access(all) + fun run(tx: [UInt8], coinbase: EVMAddress) { + InternalEVM.run(tx: tx, coinbase: coinbase.bytes) + } +} diff --git a/fvm/evm/stdlib/contract.go b/fvm/evm/stdlib/contract.go new file mode 100644 index 00000000000..e66fa8a6787 --- /dev/null +++ b/fvm/evm/stdlib/contract.go @@ -0,0 +1,658 @@ +package stdlib + +import ( + _ "embed" + "fmt" + "regexp" + + "github.com/onflow/cadence" + "github.com/onflow/cadence/runtime" + "github.com/onflow/cadence/runtime/common" + "github.com/onflow/cadence/runtime/errors" + "github.com/onflow/cadence/runtime/interpreter" + "github.com/onflow/cadence/runtime/sema" + "github.com/onflow/cadence/runtime/stdlib" + + "github.com/onflow/flow-go/fvm/evm/types" + "github.com/onflow/flow-go/model/flow" +) + +//go:embed contract.cdc +var contractCode string + +var flowTokenImportPattern = regexp.MustCompile(`^import "FlowToken"\n`) + +func ContractCode(flowTokenAddress flow.Address) []byte { + return []byte(flowTokenImportPattern.ReplaceAllString( + contractCode, + fmt.Sprintf("import FlowToken from %s", flowTokenAddress.HexWithPrefix()), + )) +} + +const ContractName = "EVM" + +var EVMTransactionBytesCadenceType = cadence.NewVariableSizedArrayType(cadence.TheUInt8Type) +var evmTransactionBytesType = sema.NewVariableSizedType(nil, sema.UInt8Type) + +var evmAddressBytesType = sema.NewConstantSizedType(nil, sema.UInt8Type, types.AddressLength) +var evmAddressBytesStaticType = interpreter.ConvertSemaArrayTypeToStaticArrayType(nil, evmAddressBytesType) +var EVMAddressBytesCadenceType = cadence.NewConstantSizedArrayType(types.AddressLength, cadence.TheUInt8Type) + +const internalEVMTypeRunFunctionName = "run" + +var internalEVMTypeRunFunctionType = &sema.FunctionType{ + Parameters: []sema.Parameter{ + { + Label: "tx", + TypeAnnotation: sema.NewTypeAnnotation(evmTransactionBytesType), + }, + { + Label: "coinbase", + TypeAnnotation: sema.NewTypeAnnotation(evmAddressBytesType), + }, + }, + ReturnTypeAnnotation: sema.NewTypeAnnotation(sema.BoolType), +} + +func newInternalEVMTypeRunFunction( + gauge common.MemoryGauge, + handler types.ContractHandler, +) *interpreter.HostFunctionValue { + return interpreter.NewHostFunctionValue( + gauge, + internalEVMTypeRunFunctionType, + func(invocation interpreter.Invocation) interpreter.Value { + inter := invocation.Interpreter + locationRange := invocation.LocationRange + + // Get transaction argument + + transactionValue, ok := invocation.Arguments[0].(*interpreter.ArrayValue) + if !ok { + panic(errors.NewUnreachableError()) + } + + transaction, err := interpreter.ByteArrayValueToByteSlice(inter, transactionValue, locationRange) + if err != nil { + panic(err) + } + + // Get coinbase argument + + coinbaseValue, ok := invocation.Arguments[1].(*interpreter.ArrayValue) + if !ok { + panic(errors.NewUnreachableError()) + } + + coinbase, err := interpreter.ByteArrayValueToByteSlice(inter, coinbaseValue, locationRange) + if err != nil { + panic(err) + } + + // Run + + cb := types.NewAddressFromBytes(coinbase) + handler.Run(transaction, cb) + + return interpreter.Void + }, + ) +} + +func EVMAddressToAddressBytesArrayValue( + inter *interpreter.Interpreter, + address types.Address, +) *interpreter.ArrayValue { + var index int + return interpreter.NewArrayValueWithIterator( + inter, + evmAddressBytesStaticType, + common.ZeroAddress, + types.AddressLength, + func() interpreter.Value { + if index >= types.AddressLength { + return nil + } + result := interpreter.NewUInt8Value(inter, func() uint8 { + return address[index] + }) + index++ + return result + }, + ) +} + +const internalEVMTypeCallFunctionName = "call" + +var internalEVMTypeCallFunctionType = &sema.FunctionType{ + Parameters: []sema.Parameter{ + { + Label: "from", + TypeAnnotation: sema.NewTypeAnnotation(evmAddressBytesType), + }, + { + Label: "to", + TypeAnnotation: sema.NewTypeAnnotation(evmAddressBytesType), + }, + { + Label: "data", + TypeAnnotation: sema.NewTypeAnnotation(sema.ByteArrayType), + }, + { + Label: "gasLimit", + TypeAnnotation: sema.NewTypeAnnotation(sema.UInt64Type), + }, + { + Label: "value", + TypeAnnotation: sema.NewTypeAnnotation(sema.UFix64Type), + }, + }, + ReturnTypeAnnotation: sema.NewTypeAnnotation(sema.ByteArrayType), +} + +func AddressBytesArrayValueToEVMAddress( + inter *interpreter.Interpreter, + locationRange interpreter.LocationRange, + addressBytesValue *interpreter.ArrayValue, +) ( + result types.Address, + err error, +) { + // Convert + + var bytes []byte + bytes, err = interpreter.ByteArrayValueToByteSlice( + inter, + addressBytesValue, + locationRange, + ) + if err != nil { + return result, err + } + + // Check length + + length := len(bytes) + const expectedLength = types.AddressLength + if length != expectedLength { + return result, errors.NewDefaultUserError( + "invalid address length: got %d, expected %d", + length, + expectedLength, + ) + } + + copy(result[:], bytes) + + return result, nil +} + +func newInternalEVMTypeCallFunction( + gauge common.MemoryGauge, + handler types.ContractHandler, +) *interpreter.HostFunctionValue { + return interpreter.NewHostFunctionValue( + gauge, + internalEVMTypeCallFunctionType, + func(invocation interpreter.Invocation) interpreter.Value { + inter := invocation.Interpreter + locationRange := invocation.LocationRange + + // Get from address + + fromAddressValue, ok := invocation.Arguments[0].(*interpreter.ArrayValue) + if !ok { + panic(errors.NewUnreachableError()) + } + + fromAddress, err := AddressBytesArrayValueToEVMAddress(inter, locationRange, fromAddressValue) + if err != nil { + panic(err) + } + + // Get to address + + toAddressValue, ok := invocation.Arguments[1].(*interpreter.ArrayValue) + if !ok { + panic(errors.NewUnreachableError()) + } + + toAddress, err := AddressBytesArrayValueToEVMAddress(inter, locationRange, toAddressValue) + if err != nil { + panic(err) + } + + // Get data + + dataValue, ok := invocation.Arguments[2].(*interpreter.ArrayValue) + if !ok { + panic(errors.NewUnreachableError()) + } + + data, err := interpreter.ByteArrayValueToByteSlice(inter, dataValue, locationRange) + if err != nil { + panic(err) + } + + // Get gas limit + + gasLimitValue, ok := invocation.Arguments[3].(interpreter.UInt64Value) + if !ok { + panic(errors.NewUnreachableError()) + } + + gasLimit := types.GasLimit(gasLimitValue) + + // Get balance + + balanceValue, ok := invocation.Arguments[4].(interpreter.UFix64Value) + if !ok { + panic(errors.NewUnreachableError()) + } + + balance := types.Balance(balanceValue) + + // Call + + const isAuthorized = true + account := handler.AccountByAddress(fromAddress, isAuthorized) + result := account.Call(toAddress, data, gasLimit, balance) + + return interpreter.ByteSliceToByteArrayValue(inter, result) + }, + ) +} + +const internalEVMTypeCreateBridgedAccountFunctionName = "createBridgedAccount" + +var internalEVMTypeCreateBridgedAccountFunctionType = &sema.FunctionType{ + ReturnTypeAnnotation: sema.NewTypeAnnotation(evmAddressBytesType), +} + +func newInternalEVMTypeCreateBridgedAccountFunction( + gauge common.MemoryGauge, + handler types.ContractHandler, +) *interpreter.HostFunctionValue { + return interpreter.NewHostFunctionValue( + gauge, + internalEVMTypeCreateBridgedAccountFunctionType, + func(invocation interpreter.Invocation) interpreter.Value { + inter := invocation.Interpreter + address := handler.AllocateAddress() + return EVMAddressToAddressBytesArrayValue(inter, address) + }, + ) +} + +const internalEVMTypeDepositFunctionName = "deposit" + +var internalEVMTypeDepositFunctionType = &sema.FunctionType{ + Parameters: []sema.Parameter{ + { + Label: "from", + TypeAnnotation: sema.NewTypeAnnotation(sema.AnyResourceType), + }, + { + Label: "to", + TypeAnnotation: sema.NewTypeAnnotation(evmAddressBytesType), + }, + }, + ReturnTypeAnnotation: sema.NewTypeAnnotation(sema.VoidType), +} + +const fungibleTokenVaultTypeBalanceFieldName = "balance" + +func newInternalEVMTypeDepositFunction( + gauge common.MemoryGauge, + handler types.ContractHandler, +) *interpreter.HostFunctionValue { + return interpreter.NewHostFunctionValue( + gauge, + internalEVMTypeCallFunctionType, + func(invocation interpreter.Invocation) interpreter.Value { + inter := invocation.Interpreter + locationRange := invocation.LocationRange + + // Get from vault + + fromValue, ok := invocation.Arguments[0].(*interpreter.CompositeValue) + if !ok { + panic(errors.NewUnreachableError()) + } + + amountValue, ok := fromValue.GetField( + inter, + locationRange, + fungibleTokenVaultTypeBalanceFieldName, + ).(interpreter.UFix64Value) + if !ok { + panic(errors.NewUnreachableError()) + } + + amount := types.Balance(amountValue) + + // Get to address + + toAddressValue, ok := invocation.Arguments[1].(*interpreter.ArrayValue) + if !ok { + panic(errors.NewUnreachableError()) + } + + toAddress, err := AddressBytesArrayValueToEVMAddress(inter, locationRange, toAddressValue) + if err != nil { + panic(err) + } + + // NOTE: We're intentionally not destroying the vault here, + // because the value of it is supposed to be "kept alive". + // Destroying would incorrectly be equivalent to a burn and decrease the total supply, + // and a withdrawal would then have to perform an actual mint of new tokens. + + // Deposit + + const isAuthorized = false + account := handler.AccountByAddress(toAddress, isAuthorized) + account.Deposit(types.NewFlowTokenVault(amount)) + + return interpreter.Void + }, + ) +} + +const internalEVMTypeWithdrawFunctionName = "withdraw" + +var internalEVMTypeWithdrawFunctionType = &sema.FunctionType{ + Parameters: []sema.Parameter{ + { + Label: "from", + TypeAnnotation: sema.NewTypeAnnotation(evmAddressBytesType), + }, + { + Label: "amount", + TypeAnnotation: sema.NewTypeAnnotation(sema.UFix64Type), + }, + }, + ReturnTypeAnnotation: sema.NewTypeAnnotation(sema.AnyResourceType), +} + +func newInternalEVMTypeWithdrawFunction( + gauge common.MemoryGauge, + handler types.ContractHandler, +) *interpreter.HostFunctionValue { + return interpreter.NewHostFunctionValue( + gauge, + internalEVMTypeCallFunctionType, + func(invocation interpreter.Invocation) interpreter.Value { + inter := invocation.Interpreter + locationRange := invocation.LocationRange + + // Get from address + + fromAddressValue, ok := invocation.Arguments[0].(*interpreter.ArrayValue) + if !ok { + panic(errors.NewUnreachableError()) + } + + fromAddress, err := AddressBytesArrayValueToEVMAddress(inter, locationRange, fromAddressValue) + if err != nil { + panic(err) + } + + // Get amount + + amountValue, ok := invocation.Arguments[1].(interpreter.UFix64Value) + if !ok { + panic(errors.NewUnreachableError()) + } + + amount := types.Balance(amountValue) + + // Withdraw + + const isAuthorized = true + account := handler.AccountByAddress(fromAddress, isAuthorized) + vault := account.Withdraw(amount) + + // TODO: improve: maybe call actual constructor + return interpreter.NewCompositeValue( + inter, + locationRange, + common.NewAddressLocation(gauge, handler.FlowTokenAddress(), "FlowToken"), + "FlowToken.Vault", + common.CompositeKindResource, + []interpreter.CompositeField{ + { + Name: "balance", + Value: interpreter.NewUFix64Value(gauge, func() uint64 { + return uint64(vault.Balance()) + }), + }, + }, + common.ZeroAddress, + ) + }, + ) +} + +const internalEVMTypeDeployFunctionName = "deploy" + +var internalEVMTypeDeployFunctionType = &sema.FunctionType{ + Parameters: []sema.Parameter{ + { + Label: "from", + TypeAnnotation: sema.NewTypeAnnotation(evmAddressBytesType), + }, + { + Label: "code", + TypeAnnotation: sema.NewTypeAnnotation(sema.ByteArrayType), + }, + { + Label: "gasLimit", + TypeAnnotation: sema.NewTypeAnnotation(sema.UInt64Type), + }, + { + Label: "value", + TypeAnnotation: sema.NewTypeAnnotation(sema.UFix64Type), + }, + }, + ReturnTypeAnnotation: sema.NewTypeAnnotation(evmAddressBytesType), +} + +func newInternalEVMTypeDeployFunction( + gauge common.MemoryGauge, + handler types.ContractHandler, +) *interpreter.HostFunctionValue { + return interpreter.NewHostFunctionValue( + gauge, + internalEVMTypeCallFunctionType, + func(invocation interpreter.Invocation) interpreter.Value { + inter := invocation.Interpreter + locationRange := invocation.LocationRange + + // Get from address + + fromAddressValue, ok := invocation.Arguments[0].(*interpreter.ArrayValue) + if !ok { + panic(errors.NewUnreachableError()) + } + + fromAddress, err := AddressBytesArrayValueToEVMAddress(inter, locationRange, fromAddressValue) + if err != nil { + panic(err) + } + + // Get code + + codeValue, ok := invocation.Arguments[1].(*interpreter.ArrayValue) + if !ok { + panic(errors.NewUnreachableError()) + } + + code, err := interpreter.ByteArrayValueToByteSlice(inter, codeValue, locationRange) + if err != nil { + panic(err) + } + + // Get gas limit + + gasLimitValue, ok := invocation.Arguments[2].(interpreter.UInt64Value) + if !ok { + panic(errors.NewUnreachableError()) + } + + gasLimit := types.GasLimit(gasLimitValue) + + // Get value + + amountValue, ok := invocation.Arguments[3].(interpreter.UFix64Value) + if !ok { + panic(errors.NewUnreachableError()) + } + + amount := types.Balance(amountValue) + + // Deploy + + const isAuthorized = true + account := handler.AccountByAddress(fromAddress, isAuthorized) + address := account.Deploy(code, gasLimit, amount) + + return EVMAddressToAddressBytesArrayValue(inter, address) + }, + ) +} + +func NewInternalEVMContractValue( + gauge common.MemoryGauge, + handler types.ContractHandler, +) *interpreter.SimpleCompositeValue { + return interpreter.NewSimpleCompositeValue( + gauge, + InternalEVMContractType.ID(), + internalEVMContractStaticType, + InternalEVMContractType.Fields, + map[string]interpreter.Value{ + internalEVMTypeRunFunctionName: newInternalEVMTypeRunFunction(gauge, handler), + internalEVMTypeCreateBridgedAccountFunctionName: newInternalEVMTypeCreateBridgedAccountFunction(gauge, handler), + internalEVMTypeCallFunctionName: newInternalEVMTypeCallFunction(gauge, handler), + internalEVMTypeDepositFunctionName: newInternalEVMTypeDepositFunction(gauge, handler), + internalEVMTypeWithdrawFunctionName: newInternalEVMTypeWithdrawFunction(gauge, handler), + internalEVMTypeDeployFunctionName: newInternalEVMTypeDeployFunction(gauge, handler), + }, + nil, + nil, + nil, + ) +} + +const InternalEVMContractName = "InternalEVM" + +var InternalEVMContractType = func() *sema.CompositeType { + ty := &sema.CompositeType{ + Identifier: InternalEVMContractName, + Kind: common.CompositeKindContract, + } + + ty.Members = sema.MembersAsMap([]*sema.Member{ + sema.NewUnmeteredPublicFunctionMember( + ty, + internalEVMTypeRunFunctionName, + internalEVMTypeRunFunctionType, + "", + ), + sema.NewUnmeteredPublicFunctionMember( + ty, + internalEVMTypeCreateBridgedAccountFunctionName, + internalEVMTypeCreateBridgedAccountFunctionType, + "", + ), + sema.NewUnmeteredPublicFunctionMember( + ty, + internalEVMTypeCallFunctionName, + internalEVMTypeCallFunctionType, + "", + ), + sema.NewUnmeteredPublicFunctionMember( + ty, + internalEVMTypeDepositFunctionName, + internalEVMTypeDepositFunctionType, + "", + ), + sema.NewUnmeteredPublicFunctionMember( + ty, + internalEVMTypeWithdrawFunctionName, + internalEVMTypeWithdrawFunctionType, + "", + ), + sema.NewUnmeteredPublicFunctionMember( + ty, + internalEVMTypeDeployFunctionName, + internalEVMTypeDeployFunctionType, + "", + ), + }) + return ty +}() + +var internalEVMContractStaticType = interpreter.ConvertSemaCompositeTypeToStaticCompositeType( + nil, + InternalEVMContractType, +) + +func newInternalEVMStandardLibraryValue( + gauge common.MemoryGauge, + handler types.ContractHandler, +) stdlib.StandardLibraryValue { + return stdlib.StandardLibraryValue{ + Name: InternalEVMContractName, + Type: InternalEVMContractType, + Value: NewInternalEVMContractValue(gauge, handler), + Kind: common.DeclarationKindContract, + } +} + +var internalEVMStandardLibraryType = stdlib.StandardLibraryType{ + Name: InternalEVMContractName, + Type: InternalEVMContractType, + Kind: common.DeclarationKindContract, +} + +func SetupEnvironment(env runtime.Environment, handler types.ContractHandler, service flow.Address) { + location := common.NewAddressLocation(nil, common.Address(service), ContractName) + env.DeclareType( + internalEVMStandardLibraryType, + location, + ) + env.DeclareValue( + newInternalEVMStandardLibraryValue(nil, handler), + location, + ) +} + +func NewEVMAddressCadenceType(address common.Address) *cadence.StructType { + return cadence.NewStructType( + common.NewAddressLocation(nil, address, ContractName), + "EVM.EVMAddress", + []cadence.Field{ + { + Identifier: "bytes", + Type: EVMAddressBytesCadenceType, + }, + }, + nil, + ) +} + +func NewBalanceCadenceType(address common.Address) *cadence.StructType { + return cadence.NewStructType( + common.NewAddressLocation(nil, address, ContractName), + "EVM.Balance", + []cadence.Field{ + { + Identifier: "flow", + Type: cadence.UFix64Type{}, + }, + }, + nil, + ) +} diff --git a/fvm/evm/stdlib/contract_test.go b/fvm/evm/stdlib/contract_test.go new file mode 100644 index 00000000000..2ba15c3eb16 --- /dev/null +++ b/fvm/evm/stdlib/contract_test.go @@ -0,0 +1,1145 @@ +package stdlib_test + +import ( + "encoding/binary" + "testing" + + "github.com/onflow/cadence" + "github.com/onflow/cadence/encoding/json" + "github.com/onflow/cadence/runtime" + "github.com/onflow/cadence/runtime/common" + contracts2 "github.com/onflow/flow-core-contracts/lib/go/contracts" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/onflow/flow-go/fvm/blueprints" + "github.com/onflow/flow-go/fvm/evm/stdlib" + . "github.com/onflow/flow-go/fvm/evm/testutils" + "github.com/onflow/flow-go/fvm/evm/types" + "github.com/onflow/flow-go/model/flow" +) + +type testContractHandler struct { + flowTokenAddress common.Address + allocateAddress func() types.Address + addressIndex uint64 + accountByAddress func(types.Address, bool) types.Account + lastExecutedBlock func() *types.Block + run func(tx []byte, coinbase types.Address) +} + +func (t *testContractHandler) FlowTokenAddress() common.Address { + return t.flowTokenAddress +} + +var _ types.ContractHandler = &testContractHandler{} + +func (t *testContractHandler) AllocateAddress() types.Address { + if t.allocateAddress == nil { + t.addressIndex++ + var address types.Address + binary.LittleEndian.PutUint64(address[:], t.addressIndex) + return address + } + return t.allocateAddress() +} + +func (t *testContractHandler) AccountByAddress(addr types.Address, isAuthorized bool) types.Account { + if t.accountByAddress == nil { + panic("unexpected AccountByAddress") + } + return t.accountByAddress(addr, isAuthorized) +} + +func (t *testContractHandler) LastExecutedBlock() *types.Block { + if t.lastExecutedBlock == nil { + panic("unexpected LastExecutedBlock") + } + return t.lastExecutedBlock() +} + +func (t *testContractHandler) Run(tx []byte, coinbase types.Address) { + if t.run == nil { + panic("unexpected Run") + } + t.run(tx, coinbase) +} + +type testFlowAccount struct { + address types.Address + balance func() types.Balance + transfer func(address types.Address, balance types.Balance) + deposit func(vault *types.FLOWTokenVault) + withdraw func(balance types.Balance) *types.FLOWTokenVault + deploy func(code types.Code, limit types.GasLimit, balance types.Balance) types.Address + call func(address types.Address, data types.Data, limit types.GasLimit, balance types.Balance) types.Data +} + +var _ types.Account = &testFlowAccount{} + +func (t *testFlowAccount) Address() types.Address { + return t.address +} + +func (t *testFlowAccount) Balance() types.Balance { + if t.balance == nil { + return types.Balance(0) + } + return t.balance() +} + +func (t *testFlowAccount) Transfer(address types.Address, balance types.Balance) { + if t.transfer == nil { + panic("unexpected Transfer") + } + t.transfer(address, balance) +} + +func (t *testFlowAccount) Deposit(vault *types.FLOWTokenVault) { + if t.deposit == nil { + panic("unexpected Deposit") + } + t.deposit(vault) +} + +func (t *testFlowAccount) Withdraw(balance types.Balance) *types.FLOWTokenVault { + if t.withdraw == nil { + panic("unexpected Withdraw") + } + return t.withdraw(balance) +} + +func (t *testFlowAccount) Deploy(code types.Code, limit types.GasLimit, balance types.Balance) types.Address { + if t.deploy == nil { + panic("unexpected Deploy") + } + return t.deploy(code, limit, balance) +} + +func (t *testFlowAccount) Call(address types.Address, data types.Data, limit types.GasLimit, balance types.Balance) types.Data { + if t.call == nil { + panic("unexpected Call") + } + return t.call(address, data, limit, balance) +} + +func deployContracts( + t *testing.T, + rt runtime.Runtime, + contractsAddress flow.Address, + runtimeInterface *TestRuntimeInterface, + transactionEnvironment runtime.Environment, + nextTransactionLocation func() common.TransactionLocation, +) { + + contractsAddressHex := contractsAddress.Hex() + + contracts := []struct { + name string + code []byte + deployTx []byte + }{ + { + name: "FungibleToken", + code: contracts2.FungibleToken(), + }, + { + name: "NonFungibleToken", + code: contracts2.NonFungibleToken(), + }, + { + name: "MetadataViews", + code: contracts2.MetadataViews( + contractsAddressHex, + contractsAddressHex, + ), + }, + { + name: "FungibleTokenMetadataViews", + code: contracts2.FungibleTokenMetadataViews( + contractsAddressHex, + contractsAddressHex, + ), + }, + { + name: "ViewResolver", + code: contracts2.ViewResolver(), + }, + { + name: "FlowToken", + code: contracts2.FlowToken( + contractsAddressHex, + contractsAddressHex, + contractsAddressHex, + ), + deployTx: []byte(` + transaction(name: String, code: String) { + prepare(signer: AuthAccount) { + signer.contracts.add(name: name, code: code.utf8, signer) + } + } + `), + }, + { + name: stdlib.ContractName, + code: stdlib.ContractCode(contractsAddress), + }, + } + + for _, contract := range contracts { + + deployTx := contract.deployTx + if len(deployTx) == 0 { + deployTx = blueprints.DeployContractTransactionTemplate + } + + err := rt.ExecuteTransaction( + runtime.Script{ + Source: deployTx, + Arguments: EncodeArgs([]cadence.Value{ + cadence.String(contract.name), + cadence.String(contract.code), + }), + }, + runtime.Context{ + Interface: runtimeInterface, + Environment: transactionEnvironment, + Location: nextTransactionLocation(), + }, + ) + require.NoError(t, err) + } + +} + +func newEVMTransactionEnvironment(handler types.ContractHandler, service flow.Address) runtime.Environment { + transactionEnvironment := runtime.NewBaseInterpreterEnvironment(runtime.Config{}) + + stdlib.SetupEnvironment( + transactionEnvironment, + handler, + service, + ) + + return transactionEnvironment +} + +func newEVMScriptEnvironment(handler types.ContractHandler, service flow.Address) runtime.Environment { + scriptEnvironment := runtime.NewScriptInterpreterEnvironment(runtime.Config{}) + + stdlib.SetupEnvironment( + scriptEnvironment, + handler, + service, + ) + + return scriptEnvironment +} + +func TestEVMAddressConstructionAndReturn(t *testing.T) { + + t.Parallel() + + handler := &testContractHandler{} + + contractsAddress := flow.BytesToAddress([]byte{0x1}) + + transactionEnvironment := newEVMTransactionEnvironment(handler, contractsAddress) + scriptEnvironment := newEVMScriptEnvironment(handler, contractsAddress) + + rt := runtime.NewInterpreterRuntime(runtime.Config{}) + + script := []byte(` + import EVM from 0x1 + + access(all) + fun main(_ bytes: [UInt8; 20]): EVM.EVMAddress { + return EVM.EVMAddress(bytes: bytes) + } + `) + + accountCodes := map[common.Location][]byte{} + var events []cadence.Event + + runtimeInterface := &TestRuntimeInterface{ + Storage: NewTestLedger(nil, nil), + OnGetSigningAccounts: func() ([]runtime.Address, error) { + return []runtime.Address{runtime.Address(contractsAddress)}, nil + }, + OnResolveLocation: SingleIdentifierLocationResolver(t), + OnUpdateAccountContractCode: func(location common.AddressLocation, code []byte) error { + accountCodes[location] = code + return nil + }, + OnGetAccountContractCode: func(location common.AddressLocation) (code []byte, err error) { + code = accountCodes[location] + return code, nil + }, + OnEmitEvent: func(event cadence.Event) error { + events = append(events, event) + return nil + }, + OnDecodeArgument: func(b []byte, t cadence.Type) (cadence.Value, error) { + return json.Decode(nil, b) + }, + } + + addressBytesArray := cadence.NewArray([]cadence.Value{ + cadence.UInt8(1), cadence.UInt8(1), + cadence.UInt8(2), cadence.UInt8(2), + cadence.UInt8(3), cadence.UInt8(3), + cadence.UInt8(4), cadence.UInt8(4), + cadence.UInt8(5), cadence.UInt8(5), + cadence.UInt8(6), cadence.UInt8(6), + cadence.UInt8(7), cadence.UInt8(7), + cadence.UInt8(8), cadence.UInt8(8), + cadence.UInt8(9), cadence.UInt8(9), + cadence.UInt8(10), cadence.UInt8(10), + }).WithType(stdlib.EVMAddressBytesCadenceType) + + nextTransactionLocation := NewTransactionLocationGenerator() + nextScriptLocation := NewScriptLocationGenerator() + + // Deploy contracts + + deployContracts( + t, + rt, + contractsAddress, + runtimeInterface, + transactionEnvironment, + nextTransactionLocation, + ) + + // Run script + + result, err := rt.ExecuteScript( + runtime.Script{ + Source: script, + Arguments: EncodeArgs([]cadence.Value{ + addressBytesArray, + }), + }, + runtime.Context{ + Interface: runtimeInterface, + Environment: scriptEnvironment, + Location: nextScriptLocation(), + }, + ) + require.NoError(t, err) + + evmAddressCadenceType := stdlib.NewEVMAddressCadenceType(common.Address(contractsAddress)) + + assert.Equal(t, + cadence.Struct{ + StructType: evmAddressCadenceType, + Fields: []cadence.Value{ + addressBytesArray, + }, + }, + result, + ) +} + +func TestBalanceConstructionAndReturn(t *testing.T) { + + t.Parallel() + + handler := &testContractHandler{} + + contractsAddress := flow.BytesToAddress([]byte{0x1}) + + transactionEnvironment := newEVMTransactionEnvironment(handler, contractsAddress) + scriptEnvironment := newEVMScriptEnvironment(handler, contractsAddress) + + rt := runtime.NewInterpreterRuntime(runtime.Config{}) + + script := []byte(` + import EVM from 0x1 + + access(all) + fun main(_ flow: UFix64): EVM.Balance { + return EVM.Balance(flow: flow) + } + `) + + accountCodes := map[common.Location][]byte{} + var events []cadence.Event + + runtimeInterface := &TestRuntimeInterface{ + Storage: NewTestLedger(nil, nil), + OnGetSigningAccounts: func() ([]runtime.Address, error) { + return []runtime.Address{runtime.Address(contractsAddress)}, nil + }, + OnResolveLocation: SingleIdentifierLocationResolver(t), + OnUpdateAccountContractCode: func(location common.AddressLocation, code []byte) error { + accountCodes[location] = code + return nil + }, + OnGetAccountContractCode: func(location common.AddressLocation) (code []byte, err error) { + code = accountCodes[location] + return code, nil + }, + OnEmitEvent: func(event cadence.Event) error { + events = append(events, event) + return nil + }, + OnDecodeArgument: func(b []byte, t cadence.Type) (cadence.Value, error) { + return json.Decode(nil, b) + }, + } + + nextTransactionLocation := NewTransactionLocationGenerator() + nextScriptLocation := NewScriptLocationGenerator() + + // Deploy contracts + + deployContracts( + t, + rt, + contractsAddress, + runtimeInterface, + transactionEnvironment, + nextTransactionLocation, + ) + + // Run script + + flowValue, err := cadence.NewUFix64FromParts(1, 23000000) + require.NoError(t, err) + + result, err := rt.ExecuteScript( + runtime.Script{ + Source: script, + Arguments: EncodeArgs([]cadence.Value{ + flowValue, + }), + }, + runtime.Context{ + Interface: runtimeInterface, + Environment: scriptEnvironment, + Location: nextScriptLocation(), + }, + ) + require.NoError(t, err) + + evmBalanceCadenceType := stdlib.NewBalanceCadenceType(common.Address(contractsAddress)) + + assert.Equal(t, + cadence.Struct{ + StructType: evmBalanceCadenceType, + Fields: []cadence.Value{ + flowValue, + }, + }, + result, + ) +} + +func TestEVMRun(t *testing.T) { + + t.Parallel() + + evmTx := cadence.NewArray([]cadence.Value{ + cadence.UInt8(1), + cadence.UInt8(2), + cadence.UInt8(3), + }).WithType(stdlib.EVMTransactionBytesCadenceType) + + coinbase := cadence.NewArray([]cadence.Value{ + cadence.UInt8(1), cadence.UInt8(1), + cadence.UInt8(2), cadence.UInt8(2), + cadence.UInt8(3), cadence.UInt8(3), + cadence.UInt8(4), cadence.UInt8(4), + cadence.UInt8(5), cadence.UInt8(5), + cadence.UInt8(6), cadence.UInt8(6), + cadence.UInt8(7), cadence.UInt8(7), + cadence.UInt8(8), cadence.UInt8(8), + cadence.UInt8(9), cadence.UInt8(9), + cadence.UInt8(10), cadence.UInt8(10), + }).WithType(stdlib.EVMAddressBytesCadenceType) + + runCalled := false + + handler := &testContractHandler{ + run: func(tx []byte, coinbase types.Address) { + runCalled = true + + assert.Equal(t, []byte{1, 2, 3}, tx) + assert.Equal(t, + types.Address{ + 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, + }, + coinbase, + ) + + }, + } + + contractsAddress := flow.BytesToAddress([]byte{0x1}) + + transactionEnvironment := newEVMTransactionEnvironment(handler, contractsAddress) + scriptEnvironment := newEVMScriptEnvironment(handler, contractsAddress) + + rt := runtime.NewInterpreterRuntime(runtime.Config{}) + + script := []byte(` + import EVM from 0x1 + + access(all) + fun main(tx: [UInt8], coinbaseBytes: [UInt8; 20]) { + let coinbase = EVM.EVMAddress(bytes: coinbaseBytes) + EVM.run(tx: tx, coinbase: coinbase) + } + `) + + accountCodes := map[common.Location][]byte{} + var events []cadence.Event + + runtimeInterface := &TestRuntimeInterface{ + Storage: NewTestLedger(nil, nil), + OnGetSigningAccounts: func() ([]runtime.Address, error) { + return []runtime.Address{runtime.Address(contractsAddress)}, nil + }, + OnResolveLocation: SingleIdentifierLocationResolver(t), + OnUpdateAccountContractCode: func(location common.AddressLocation, code []byte) error { + accountCodes[location] = code + return nil + }, + OnGetAccountContractCode: func(location common.AddressLocation) (code []byte, err error) { + code = accountCodes[location] + return code, nil + }, + OnEmitEvent: func(event cadence.Event) error { + events = append(events, event) + return nil + }, + OnDecodeArgument: func(b []byte, t cadence.Type) (cadence.Value, error) { + return json.Decode(nil, b) + }, + } + + nextTransactionLocation := NewTransactionLocationGenerator() + nextScriptLocation := NewScriptLocationGenerator() + + // Deploy contracts + + deployContracts( + t, + rt, + contractsAddress, + runtimeInterface, + transactionEnvironment, + nextTransactionLocation, + ) + + // Run script + + _, err := rt.ExecuteScript( + runtime.Script{ + Source: script, + Arguments: EncodeArgs([]cadence.Value{evmTx, coinbase}), + }, + runtime.Context{ + Interface: runtimeInterface, + Environment: scriptEnvironment, + Location: nextScriptLocation(), + }, + ) + require.NoError(t, err) + + assert.True(t, runCalled) +} + +func TestEVMCreateBridgedAccount(t *testing.T) { + + t.Parallel() + + handler := &testContractHandler{} + + contractsAddress := flow.BytesToAddress([]byte{0x1}) + + transactionEnvironment := newEVMTransactionEnvironment(handler, contractsAddress) + scriptEnvironment := newEVMScriptEnvironment(handler, contractsAddress) + + rt := runtime.NewInterpreterRuntime(runtime.Config{}) + + script := []byte(` + import EVM from 0x1 + + access(all) + fun main(): [UInt8; 20] { + let bridgedAccount1 <- EVM.createBridgedAccount() + destroy bridgedAccount1 + + let bridgedAccount2 <- EVM.createBridgedAccount() + let bytes = bridgedAccount2.address().bytes + destroy bridgedAccount2 + + return bytes + } + `) + + accountCodes := map[common.Location][]byte{} + var events []cadence.Event + + runtimeInterface := &TestRuntimeInterface{ + Storage: NewTestLedger(nil, nil), + OnGetSigningAccounts: func() ([]runtime.Address, error) { + return []runtime.Address{runtime.Address(contractsAddress)}, nil + }, + OnResolveLocation: SingleIdentifierLocationResolver(t), + OnUpdateAccountContractCode: func(location common.AddressLocation, code []byte) error { + accountCodes[location] = code + return nil + }, + OnGetAccountContractCode: func(location common.AddressLocation) (code []byte, err error) { + code = accountCodes[location] + return code, nil + }, + OnEmitEvent: func(event cadence.Event) error { + events = append(events, event) + return nil + }, + OnDecodeArgument: func(b []byte, t cadence.Type) (cadence.Value, error) { + return json.Decode(nil, b) + }, + } + + nextTransactionLocation := NewTransactionLocationGenerator() + nextScriptLocation := NewScriptLocationGenerator() + + // Deploy contracts + + deployContracts( + t, + rt, + contractsAddress, + runtimeInterface, + transactionEnvironment, + nextTransactionLocation, + ) + + // Run script + + actual, err := rt.ExecuteScript( + runtime.Script{ + Source: script, + }, + runtime.Context{ + Interface: runtimeInterface, + Environment: scriptEnvironment, + Location: nextScriptLocation(), + }, + ) + require.NoError(t, err) + + expected := cadence.NewArray([]cadence.Value{ + cadence.UInt8(2), cadence.UInt8(0), + cadence.UInt8(0), cadence.UInt8(0), + cadence.UInt8(0), cadence.UInt8(0), + cadence.UInt8(0), cadence.UInt8(0), + cadence.UInt8(0), cadence.UInt8(0), + cadence.UInt8(0), cadence.UInt8(0), + cadence.UInt8(0), cadence.UInt8(0), + cadence.UInt8(0), cadence.UInt8(0), + cadence.UInt8(0), cadence.UInt8(0), + cadence.UInt8(0), cadence.UInt8(0), + }).WithType(cadence.NewConstantSizedArrayType( + types.AddressLength, + cadence.UInt8Type{}, + )) + + require.Equal(t, expected, actual) +} + +func TestBridgedAccountCall(t *testing.T) { + + t.Parallel() + + expectedBalance, err := cadence.NewUFix64FromParts(1, 23000000) + require.NoError(t, err) + + handler := &testContractHandler{ + accountByAddress: func(fromAddress types.Address, isAuthorized bool) types.Account { + assert.Equal(t, types.Address{1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, fromAddress) + assert.True(t, isAuthorized) + + return &testFlowAccount{ + address: fromAddress, + call: func( + toAddress types.Address, + data types.Data, + limit types.GasLimit, + balance types.Balance, + ) types.Data { + assert.Equal(t, types.Address{2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, toAddress) + assert.Equal(t, types.Data{4, 5, 6}, data) + assert.Equal(t, types.GasLimit(9999), limit) + assert.Equal(t, types.Balance(expectedBalance), balance) + + return types.Data{3, 1, 4} + }, + } + }, + } + + contractsAddress := flow.BytesToAddress([]byte{0x1}) + + transactionEnvironment := newEVMTransactionEnvironment(handler, contractsAddress) + scriptEnvironment := newEVMScriptEnvironment(handler, contractsAddress) + + rt := runtime.NewInterpreterRuntime(runtime.Config{}) + + script := []byte(` + import EVM from 0x1 + + access(all) + fun main(): [UInt8] { + let bridgedAccount <- EVM.createBridgedAccount() + let response = bridgedAccount.call( + to: EVM.EVMAddress( + bytes: [2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + ), + data: [4, 5, 6], + gasLimit: 9999, + value: EVM.Balance(flow: 1.23) + ) + destroy bridgedAccount + return response + } + `) + + accountCodes := map[common.Location][]byte{} + var events []cadence.Event + + runtimeInterface := &TestRuntimeInterface{ + Storage: NewTestLedger(nil, nil), + OnGetSigningAccounts: func() ([]runtime.Address, error) { + return []runtime.Address{runtime.Address(contractsAddress)}, nil + }, + OnResolveLocation: SingleIdentifierLocationResolver(t), + OnUpdateAccountContractCode: func(location common.AddressLocation, code []byte) error { + accountCodes[location] = code + return nil + }, + OnGetAccountContractCode: func(location common.AddressLocation) (code []byte, err error) { + code = accountCodes[location] + return code, nil + }, + OnEmitEvent: func(event cadence.Event) error { + events = append(events, event) + return nil + }, + OnDecodeArgument: func(b []byte, t cadence.Type) (cadence.Value, error) { + return json.Decode(nil, b) + }, + } + + nextTransactionLocation := NewTransactionLocationGenerator() + nextScriptLocation := NewScriptLocationGenerator() + + // Deploy contracts + + deployContracts( + t, + rt, + contractsAddress, + runtimeInterface, + transactionEnvironment, + nextTransactionLocation, + ) + + // Run script + + actual, err := rt.ExecuteScript( + runtime.Script{ + Source: script, + }, + runtime.Context{ + Interface: runtimeInterface, + Environment: scriptEnvironment, + Location: nextScriptLocation(), + }, + ) + require.NoError(t, err) + + expected := cadence.NewArray([]cadence.Value{ + cadence.UInt8(3), + cadence.UInt8(1), + cadence.UInt8(4), + }).WithType(cadence.NewVariableSizedArrayType(cadence.UInt8Type{})) + + require.Equal(t, expected, actual) +} + +func TestEVMAddressDeposit(t *testing.T) { + + t.Parallel() + + expectedBalance, err := cadence.NewUFix64FromParts(1, 23000000) + require.NoError(t, err) + + var deposited bool + + handler := &testContractHandler{ + + accountByAddress: func(fromAddress types.Address, isAuthorized bool) types.Account { + assert.Equal(t, types.Address{2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, fromAddress) + assert.False(t, isAuthorized) + + return &testFlowAccount{ + address: fromAddress, + deposit: func(vault *types.FLOWTokenVault) { + deposited = true + assert.Equal( + t, + types.Balance(expectedBalance), + vault.Balance(), + ) + }, + } + }, + } + + contractsAddress := flow.BytesToAddress([]byte{0x1}) + + transactionEnvironment := newEVMTransactionEnvironment(handler, contractsAddress) + scriptEnvironment := newEVMScriptEnvironment(handler, contractsAddress) + + rt := runtime.NewInterpreterRuntime(runtime.Config{}) + + script := []byte(` + import EVM from 0x1 + import FlowToken from 0x1 + + access(all) + fun main() { + let admin = getAuthAccount(0x1) + .borrow<&FlowToken.Administrator>(from: /storage/flowTokenAdmin)! + let minter <- admin.createNewMinter(allowedAmount: 1.23) + let vault <- minter.mintTokens(amount: 1.23) + destroy minter + + let address = EVM.EVMAddress( + bytes: [2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + ) + address.deposit(from: <-vault) + } + `) + + accountCodes := map[common.Location][]byte{} + var events []cadence.Event + + runtimeInterface := &TestRuntimeInterface{ + Storage: NewTestLedger(nil, nil), + OnGetSigningAccounts: func() ([]runtime.Address, error) { + return []runtime.Address{runtime.Address(contractsAddress)}, nil + }, + OnResolveLocation: SingleIdentifierLocationResolver(t), + OnUpdateAccountContractCode: func(location common.AddressLocation, code []byte) error { + accountCodes[location] = code + return nil + }, + OnGetAccountContractCode: func(location common.AddressLocation) (code []byte, err error) { + code = accountCodes[location] + return code, nil + }, + OnEmitEvent: func(event cadence.Event) error { + events = append(events, event) + return nil + }, + OnDecodeArgument: func(b []byte, t cadence.Type) (cadence.Value, error) { + return json.Decode(nil, b) + }, + } + + nextTransactionLocation := NewTransactionLocationGenerator() + nextScriptLocation := NewScriptLocationGenerator() + + // Deploy contracts + + deployContracts( + t, + rt, + contractsAddress, + runtimeInterface, + transactionEnvironment, + nextTransactionLocation, + ) + + // Run script + + _, err = rt.ExecuteScript( + runtime.Script{ + Source: script, + }, + runtime.Context{ + Interface: runtimeInterface, + Environment: scriptEnvironment, + Location: nextScriptLocation(), + }, + ) + require.NoError(t, err) + + require.True(t, deposited) +} + +func TestBridgedAccountWithdraw(t *testing.T) { + + t.Parallel() + + expectedDepositBalance, err := cadence.NewUFix64FromParts(2, 34000000) + require.NoError(t, err) + + expectedWithdrawBalance, err := cadence.NewUFix64FromParts(1, 23000000) + require.NoError(t, err) + + var deposited bool + var withdrew bool + + contractsAddress := flow.BytesToAddress([]byte{0x1}) + + handler := &testContractHandler{ + flowTokenAddress: common.Address(contractsAddress), + accountByAddress: func(fromAddress types.Address, isAuthorized bool) types.Account { + assert.Equal(t, types.Address{1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, fromAddress) + assert.Equal(t, deposited, isAuthorized) + + return &testFlowAccount{ + address: fromAddress, + deposit: func(vault *types.FLOWTokenVault) { + deposited = true + assert.Equal(t, + types.Balance(expectedDepositBalance), + vault.Balance(), + ) + }, + withdraw: func(balance types.Balance) *types.FLOWTokenVault { + assert.Equal(t, + types.Balance(expectedWithdrawBalance), + balance, + ) + withdrew = true + return types.NewFlowTokenVault(balance) + }, + } + }, + } + + transactionEnvironment := newEVMTransactionEnvironment(handler, contractsAddress) + scriptEnvironment := newEVMScriptEnvironment(handler, contractsAddress) + + rt := runtime.NewInterpreterRuntime(runtime.Config{}) + + script := []byte(` + import EVM from 0x1 + import FlowToken from 0x1 + + access(all) + fun main(): UFix64 { + let admin = getAuthAccount(0x1) + .borrow<&FlowToken.Administrator>(from: /storage/flowTokenAdmin)! + let minter <- admin.createNewMinter(allowedAmount: 2.34) + let vault <- minter.mintTokens(amount: 2.34) + destroy minter + + let bridgedAccount <- EVM.createBridgedAccount() + bridgedAccount.address().deposit(from: <-vault) + + let vault2 <- bridgedAccount.withdraw(balance: EVM.Balance(flow: 1.23)) + let balance = vault2.balance + destroy bridgedAccount + destroy vault2 + + return balance + } + `) + + accountCodes := map[common.Location][]byte{} + var events []cadence.Event + + runtimeInterface := &TestRuntimeInterface{ + Storage: NewTestLedger(nil, nil), + OnGetSigningAccounts: func() ([]runtime.Address, error) { + return []runtime.Address{runtime.Address(contractsAddress)}, nil + }, + OnResolveLocation: SingleIdentifierLocationResolver(t), + OnUpdateAccountContractCode: func(location common.AddressLocation, code []byte) error { + accountCodes[location] = code + return nil + }, + OnGetAccountContractCode: func(location common.AddressLocation) (code []byte, err error) { + code = accountCodes[location] + return code, nil + }, + OnEmitEvent: func(event cadence.Event) error { + events = append(events, event) + return nil + }, + OnDecodeArgument: func(b []byte, t cadence.Type) (cadence.Value, error) { + return json.Decode(nil, b) + }, + } + + nextTransactionLocation := NewTransactionLocationGenerator() + nextScriptLocation := NewScriptLocationGenerator() + + // Deploy contracts + + deployContracts( + t, + rt, + contractsAddress, + runtimeInterface, + transactionEnvironment, + nextTransactionLocation, + ) + + // Run script + + result, err := rt.ExecuteScript( + runtime.Script{ + Source: script, + }, + runtime.Context{ + Interface: runtimeInterface, + Environment: scriptEnvironment, + Location: nextScriptLocation(), + }, + ) + require.NoError(t, err) + + assert.True(t, deposited) + assert.True(t, withdrew) + assert.Equal(t, expectedWithdrawBalance, result) +} + +func TestBridgedAccountDeploy(t *testing.T) { + + t.Parallel() + + var deployed bool + + contractsAddress := flow.BytesToAddress([]byte{0x1}) + + expectedBalance, err := cadence.NewUFix64FromParts(1, 23000000) + require.NoError(t, err) + + var handler *testContractHandler + handler = &testContractHandler{ + flowTokenAddress: common.Address(contractsAddress), + accountByAddress: func(fromAddress types.Address, isAuthorized bool) types.Account { + assert.Equal(t, types.Address{1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, fromAddress) + assert.True(t, isAuthorized) + + return &testFlowAccount{ + address: fromAddress, + deploy: func(code types.Code, limit types.GasLimit, balance types.Balance) types.Address { + deployed = true + assert.Equal(t, types.Code{4, 5, 6}, code) + assert.Equal(t, types.GasLimit(9999), limit) + assert.Equal(t, types.Balance(expectedBalance), balance) + + return handler.AllocateAddress() + }, + } + }, + } + + transactionEnvironment := newEVMTransactionEnvironment(handler, contractsAddress) + scriptEnvironment := newEVMScriptEnvironment(handler, contractsAddress) + + rt := runtime.NewInterpreterRuntime(runtime.Config{}) + + script := []byte(` + import EVM from 0x1 + import FlowToken from 0x1 + + access(all) + fun main(): [UInt8; 20] { + let bridgedAccount <- EVM.createBridgedAccount() + let address = bridgedAccount.deploy( + code: [4, 5, 6], + gasLimit: 9999, + value: EVM.Balance(flow: 1.23) + ) + destroy bridgedAccount + return address.bytes + } + `) + + accountCodes := map[common.Location][]byte{} + var events []cadence.Event + + runtimeInterface := &TestRuntimeInterface{ + Storage: NewTestLedger(nil, nil), + OnGetSigningAccounts: func() ([]runtime.Address, error) { + return []runtime.Address{runtime.Address(contractsAddress)}, nil + }, + OnResolveLocation: SingleIdentifierLocationResolver(t), + OnUpdateAccountContractCode: func(location common.AddressLocation, code []byte) error { + accountCodes[location] = code + return nil + }, + OnGetAccountContractCode: func(location common.AddressLocation) (code []byte, err error) { + code = accountCodes[location] + return code, nil + }, + OnEmitEvent: func(event cadence.Event) error { + events = append(events, event) + return nil + }, + OnDecodeArgument: func(b []byte, t cadence.Type) (cadence.Value, error) { + return json.Decode(nil, b) + }, + } + + nextTransactionLocation := NewTransactionLocationGenerator() + nextScriptLocation := NewScriptLocationGenerator() + + // Deploy contracts + + deployContracts( + t, + rt, + contractsAddress, + runtimeInterface, + transactionEnvironment, + nextTransactionLocation, + ) + + // Run script + + actual, err := rt.ExecuteScript( + runtime.Script{ + Source: script, + }, + runtime.Context{ + Interface: runtimeInterface, + Environment: scriptEnvironment, + Location: nextScriptLocation(), + }, + ) + require.NoError(t, err) + + expected := cadence.NewArray([]cadence.Value{ + cadence.UInt8(2), cadence.UInt8(0), + cadence.UInt8(0), cadence.UInt8(0), + cadence.UInt8(0), cadence.UInt8(0), + cadence.UInt8(0), cadence.UInt8(0), + cadence.UInt8(0), cadence.UInt8(0), + cadence.UInt8(0), cadence.UInt8(0), + cadence.UInt8(0), cadence.UInt8(0), + cadence.UInt8(0), cadence.UInt8(0), + cadence.UInt8(0), cadence.UInt8(0), + cadence.UInt8(0), cadence.UInt8(0), + }).WithType(cadence.NewConstantSizedArrayType( + types.AddressLength, + cadence.UInt8Type{}, + )) + + require.Equal(t, expected, actual) + + require.True(t, deployed) +} diff --git a/fvm/evm/testutils/accounts.go b/fvm/evm/testutils/accounts.go index 237474da400..34487b8e91f 100644 --- a/fvm/evm/testutils/accounts.go +++ b/fvm/evm/testutils/accounts.go @@ -98,6 +98,13 @@ func (a *EOATestAccount) signTx( return tx } +func (a *EOATestAccount) SetNonce(nonce uint64) { + a.lock.Lock() + defer a.lock.Unlock() + + a.nonce = nonce +} + func GetTestEOAAccount(t testing.TB, keyHex string) *EOATestAccount { key, _ := gethCrypto.HexToECDSA(keyHex) address := gethCrypto.PubkeyToAddress(key.PublicKey) @@ -110,7 +117,12 @@ func GetTestEOAAccount(t testing.TB, keyHex string) *EOATestAccount { } } -func RunWithEOATestAccount(t *testing.T, led atree.Ledger, flowEVMRootAddress flow.Address, f func(*EOATestAccount)) { +func RunWithEOATestAccount(t testing.TB, led atree.Ledger, flowEVMRootAddress flow.Address, f func(*EOATestAccount)) { + account := FundAndGetEOATestAccount(t, led, flowEVMRootAddress) + f(account) +} + +func FundAndGetEOATestAccount(t testing.TB, led atree.Ledger, flowEVMRootAddress flow.Address) *EOATestAccount { account := GetTestEOAAccount(t, EOATestAccount1KeyHex) // fund account @@ -131,5 +143,12 @@ func RunWithEOATestAccount(t *testing.T, led atree.Ledger, flowEVMRootAddress fl ) require.NoError(t, err) - f(account) + blk2, err := e.NewReadOnlyBlockView(types.NewDefaultBlockContext(2)) + require.NoError(t, err) + + bal, err := blk2.BalanceOf(account.Address()) + require.NoError(t, err) + require.Greater(t, bal.Uint64(), uint64(0)) + + return account } diff --git a/fvm/evm/testutils/backend.go b/fvm/evm/testutils/backend.go index 477f1dc89fb..4625cf44e10 100644 --- a/fvm/evm/testutils/backend.go +++ b/fvm/evm/testutils/backend.go @@ -6,13 +6,14 @@ import ( "math" "testing" + jsoncdc "github.com/onflow/cadence/encoding/json" + "github.com/onflow/atree" "github.com/onflow/cadence" "github.com/onflow/cadence/runtime/common" "github.com/stretchr/testify/require" "github.com/onflow/flow-go/fvm/environment" - "github.com/onflow/flow-go/fvm/evm/types" "github.com/onflow/flow-go/fvm/meter" "github.com/onflow/flow-go/model/flow" ) @@ -27,8 +28,8 @@ func RunWithTestFlowEVMRootAddress(t testing.TB, backend atree.Ledger, f func(fl f(TestFlowEVMRootAddress) } -func RunWithTestBackend(t testing.TB, f func(types.Backend)) { - tb := &testBackend{ +func RunWithTestBackend(t testing.TB, f func(*TestBackend)) { + tb := &TestBackend{ TestValueStore: GetSimpleValueStore(), testEventEmitter: getSimpleEventEmitter(), testMeter: getSimpleMeter(), @@ -71,19 +72,37 @@ func GetSimpleValueStore() *TestValueStore { binary.BigEndian.PutUint64(data[:], index) return atree.StorageIndex(data), nil }, + TotalStorageSizeFunc: func() int { + sum := 0 + for key, value := range data { + sum += len(key) + len(value) + } + for key := range allocator { + sum += len(key) + 8 + } + return sum + }, } } func getSimpleEventEmitter() *testEventEmitter { events := make(flow.EventsList, 0) return &testEventEmitter{ - emitFlowEvent: func(etype flow.EventType, payload []byte) error { - events = append(events, flow.Event{Type: etype, Payload: payload}) + emitEvent: func(event cadence.Event) error { + payload, err := jsoncdc.Encode(event) + if err != nil { + return err + } + + events = append(events, flow.Event{Type: flow.EventType(event.EventType.QualifiedIdentifier), Payload: payload}) return nil }, events: func() flow.EventsList { return events }, + reset: func() { + events = make(flow.EventsList, 0) + }, } } @@ -107,17 +126,32 @@ func getSimpleMeter() *testMeter { } } -type testBackend struct { +type TestBackend struct { *TestValueStore *testMeter *testEventEmitter } +func (tb *TestBackend) TotalStorageSize() int { + if tb.TotalStorageSizeFunc == nil { + panic("method not set") + } + return tb.TotalStorageSizeFunc() +} + +func (tb *TestBackend) DropEvents() { + if tb.reset == nil { + panic("method not set") + } + tb.reset() +} + type TestValueStore struct { GetValueFunc func(owner, key []byte) ([]byte, error) SetValueFunc func(owner, key, value []byte) error ValueExistsFunc func(owner, key []byte) (bool, error) AllocateStorageIndexFunc func(owner []byte) (atree.StorageIndex, error) + TotalStorageSizeFunc func() int } var _ environment.ValueStore = &TestValueStore{} @@ -150,6 +184,13 @@ func (vs *TestValueStore) AllocateStorageIndex(owner []byte) (atree.StorageIndex return vs.AllocateStorageIndexFunc(owner) } +func (vs *TestValueStore) TotalStorageSize() int { + if vs.TotalStorageSizeFunc == nil { + panic("method not set") + } + return vs.TotalStorageSizeFunc() +} + type testMeter struct { meterComputation func(common.ComputationKind, uint) error hasComputationCapacity func(common.ComputationKind, uint) bool @@ -238,7 +279,6 @@ func (m *testMeter) TotalEmittedEventBytes() uint64 { type testEventEmitter struct { emitEvent func(event cadence.Event) error - emitFlowEvent func(etype flow.EventType, payload []byte) error events func() flow.EventsList serviceEvents func() flow.EventsList convertedServiceEvents func() flow.ServiceEventList @@ -254,13 +294,6 @@ func (vs *testEventEmitter) EmitEvent(event cadence.Event) error { return vs.emitEvent(event) } -func (vs *testEventEmitter) EmitFlowEvent(etype flow.EventType, payload []byte) error { - if vs.emitFlowEvent == nil { - panic("method not set") - } - return vs.emitFlowEvent(etype, payload) -} - func (vs *testEventEmitter) Events() flow.EventsList { if vs.events == nil { panic("method not set") diff --git a/fvm/evm/testutils/cadence.go b/fvm/evm/testutils/cadence.go new file mode 100644 index 00000000000..a35070c3f69 --- /dev/null +++ b/fvm/evm/testutils/cadence.go @@ -0,0 +1,691 @@ +package testutils + +import ( + "bytes" + "encoding/binary" + "encoding/hex" + "errors" + "fmt" + "strconv" + "strings" + "sync/atomic" + "testing" + "time" + + "github.com/onflow/atree" + "github.com/onflow/cadence" + "github.com/onflow/cadence/encoding/json" + "github.com/onflow/cadence/runtime" + "github.com/onflow/cadence/runtime/common" + "github.com/onflow/cadence/runtime/interpreter" + "github.com/onflow/cadence/runtime/sema" + cadenceStdlib "github.com/onflow/cadence/runtime/stdlib" + "github.com/stretchr/testify/require" + "go.opentelemetry.io/otel/attribute" +) + +// TODO: replace with Cadence runtime testing utils once available https://github.com/onflow/cadence/pull/2800 + +func SingleIdentifierLocationResolver(t testing.TB) func( + identifiers []runtime.Identifier, + location runtime.Location, +) ( + []runtime.ResolvedLocation, + error, +) { + return func(identifiers []runtime.Identifier, location runtime.Location) ([]runtime.ResolvedLocation, error) { + require.Len(t, identifiers, 1) + require.IsType(t, common.AddressLocation{}, location) + + return []runtime.ResolvedLocation{ + { + Location: common.AddressLocation{ + Address: location.(common.AddressLocation).Address, + Name: identifiers[0].Identifier, + }, + Identifiers: identifiers, + }, + }, nil + } +} + +func newLocationGenerator[T ~[32]byte]() func() T { + var count uint64 + return func() T { + t := T{} + newCount := atomic.AddUint64(&count, 1) + binary.LittleEndian.PutUint64(t[:], newCount) + return t + } +} + +func NewTransactionLocationGenerator() func() common.TransactionLocation { + return newLocationGenerator[common.TransactionLocation]() +} + +func NewScriptLocationGenerator() func() common.ScriptLocation { + return newLocationGenerator[common.ScriptLocation]() +} + +func EncodeArgs(argValues []cadence.Value) [][]byte { + args := make([][]byte, len(argValues)) + for i, arg := range argValues { + var err error + args[i], err = json.Encode(arg) + if err != nil { + panic(fmt.Errorf("broken test: invalid argument: %w", err)) + } + } + return args +} + +type TestLedger struct { + StoredValues map[string][]byte + OnValueExists func(owner, key []byte) (exists bool, err error) + OnGetValue func(owner, key []byte) (value []byte, err error) + OnSetValue func(owner, key, value []byte) (err error) + OnAllocateStorageIndex func(owner []byte) (atree.StorageIndex, error) +} + +var _ atree.Ledger = TestLedger{} + +func (s TestLedger) GetValue(owner, key []byte) (value []byte, err error) { + return s.OnGetValue(owner, key) +} + +func (s TestLedger) SetValue(owner, key, value []byte) (err error) { + return s.OnSetValue(owner, key, value) +} + +func (s TestLedger) ValueExists(owner, key []byte) (exists bool, err error) { + return s.OnValueExists(owner, key) +} + +func (s TestLedger) AllocateStorageIndex(owner []byte) (atree.StorageIndex, error) { + return s.OnAllocateStorageIndex(owner) +} + +func (s TestLedger) Dump() { + // Only used for testing/debugging purposes + for key, data := range s.StoredValues { //nolint:maprange + fmt.Printf("%s:\n", strconv.Quote(key)) + fmt.Printf("%s\n", hex.Dump(data)) + println() + } +} + +func NewTestLedger( + onRead func(owner, key, value []byte), + onWrite func(owner, key, value []byte), +) TestLedger { + + storageKey := func(owner, key string) string { + return strings.Join([]string{owner, key}, "|") + } + + storedValues := map[string][]byte{} + + storageIndices := map[string]uint64{} + + return TestLedger{ + StoredValues: storedValues, + OnValueExists: func(owner, key []byte) (bool, error) { + value := storedValues[storageKey(string(owner), string(key))] + return len(value) > 0, nil + }, + OnGetValue: func(owner, key []byte) (value []byte, err error) { + value = storedValues[storageKey(string(owner), string(key))] + if onRead != nil { + onRead(owner, key, value) + } + return value, nil + }, + OnSetValue: func(owner, key, value []byte) (err error) { + storedValues[storageKey(string(owner), string(key))] = value + if onWrite != nil { + onWrite(owner, key, value) + } + return nil + }, + OnAllocateStorageIndex: func(owner []byte) (result atree.StorageIndex, err error) { + index := storageIndices[string(owner)] + 1 + storageIndices[string(owner)] = index + binary.BigEndian.PutUint64(result[:], index) + return + }, + } +} + +type TestRuntimeInterface struct { + Storage atree.Ledger + + OnResolveLocation func( + identifiers []runtime.Identifier, + location runtime.Location, + ) ( + []runtime.ResolvedLocation, + error, + ) + OnGetCode func(_ runtime.Location) ([]byte, error) + OnGetAndSetProgram func( + location runtime.Location, + load func() (*interpreter.Program, error), + ) (*interpreter.Program, error) + OnSetInterpreterSharedState func(state *interpreter.SharedState) + OnGetInterpreterSharedState func() *interpreter.SharedState + OnCreateAccount func(payer runtime.Address) (address runtime.Address, err error) + OnAddEncodedAccountKey func(address runtime.Address, publicKey []byte) error + OnRemoveEncodedAccountKey func(address runtime.Address, index int) (publicKey []byte, err error) + OnAddAccountKey func( + address runtime.Address, + publicKey *cadenceStdlib.PublicKey, + hashAlgo runtime.HashAlgorithm, + weight int, + ) (*cadenceStdlib.AccountKey, error) + OnGetAccountKey func(address runtime.Address, index int) (*cadenceStdlib.AccountKey, error) + OnRemoveAccountKey func(address runtime.Address, index int) (*cadenceStdlib.AccountKey, error) + OnAccountKeysCount func(address runtime.Address) (uint64, error) + OnUpdateAccountContractCode func(location common.AddressLocation, code []byte) error + OnGetAccountContractCode func(location common.AddressLocation) (code []byte, err error) + OnRemoveAccountContractCode func(location common.AddressLocation) (err error) + OnGetSigningAccounts func() ([]runtime.Address, error) + OnProgramLog func(string) + OnEmitEvent func(cadence.Event) error + OnResourceOwnerChanged func( + interpreter *interpreter.Interpreter, + resource *interpreter.CompositeValue, + oldAddress common.Address, + newAddress common.Address, + ) + OnGenerateUUID func() (uint64, error) + OnMeterComputation func(compKind common.ComputationKind, intensity uint) error + OnDecodeArgument func(b []byte, t cadence.Type) (cadence.Value, error) + OnProgramParsed func(location runtime.Location, duration time.Duration) + OnProgramChecked func(location runtime.Location, duration time.Duration) + OnProgramInterpreted func(location runtime.Location, duration time.Duration) + OnReadRandom func([]byte) error + OnVerifySignature func( + signature []byte, + tag string, + signedData []byte, + publicKey []byte, + signatureAlgorithm runtime.SignatureAlgorithm, + hashAlgorithm runtime.HashAlgorithm, + ) (bool, error) + OnHash func( + data []byte, + tag string, + hashAlgorithm runtime.HashAlgorithm, + ) ([]byte, error) + OnSetCadenceValue func(owner runtime.Address, key string, value cadence.Value) (err error) + OnGetAccountBalance func(_ runtime.Address) (uint64, error) + OnGetAccountAvailableBalance func(_ runtime.Address) (uint64, error) + OnGetStorageUsed func(_ runtime.Address) (uint64, error) + OnGetStorageCapacity func(_ runtime.Address) (uint64, error) + Programs map[runtime.Location]*interpreter.Program + OnImplementationDebugLog func(message string) error + OnValidatePublicKey func(publicKey *cadenceStdlib.PublicKey) error + OnBLSVerifyPOP func(pk *cadenceStdlib.PublicKey, s []byte) (bool, error) + OnBLSAggregateSignatures func(sigs [][]byte) ([]byte, error) + OnBLSAggregatePublicKeys func(keys []*cadenceStdlib.PublicKey) (*cadenceStdlib.PublicKey, error) + OnGetAccountContractNames func(address runtime.Address) ([]string, error) + OnRecordTrace func( + operation string, + location runtime.Location, + duration time.Duration, + attrs []attribute.KeyValue, + ) + OnMeterMemory func(usage common.MemoryUsage) error + OnComputationUsed func() (uint64, error) + OnMemoryUsed func() (uint64, error) + OnInteractionUsed func() (uint64, error) + OnGenerateAccountID func(address common.Address) (uint64, error) + + lastUUID uint64 + accountIDs map[common.Address]uint64 + updatedContractCode bool +} + +// TestRuntimeInterface should implement Interface +var _ runtime.Interface = &TestRuntimeInterface{} + +func (i *TestRuntimeInterface) ResolveLocation( + identifiers []runtime.Identifier, + location runtime.Location, +) ([]runtime.ResolvedLocation, error) { + if i.OnResolveLocation == nil { + return []runtime.ResolvedLocation{ + { + Location: location, + Identifiers: identifiers, + }, + }, nil + } + return i.OnResolveLocation(identifiers, location) +} + +func (i *TestRuntimeInterface) GetCode(location runtime.Location) ([]byte, error) { + if i.OnGetCode == nil { + return nil, nil + } + return i.OnGetCode(location) +} + +func (i *TestRuntimeInterface) GetOrLoadProgram( + location runtime.Location, + load func() (*interpreter.Program, error), +) ( + program *interpreter.Program, + err error, +) { + if i.OnGetAndSetProgram == nil { + if i.Programs == nil { + i.Programs = map[runtime.Location]*interpreter.Program{} + } + + var ok bool + program, ok = i.Programs[location] + if ok { + return + } + + program, err = load() + + // NOTE: important: still set empty program, + // even if error occurred + + i.Programs[location] = program + + return + } + + return i.OnGetAndSetProgram(location, load) +} + +func (i *TestRuntimeInterface) SetInterpreterSharedState(state *interpreter.SharedState) { + if i.OnSetInterpreterSharedState == nil { + return + } + + i.OnSetInterpreterSharedState(state) +} + +func (i *TestRuntimeInterface) GetInterpreterSharedState() *interpreter.SharedState { + if i.OnGetInterpreterSharedState == nil { + return nil + } + + return i.OnGetInterpreterSharedState() +} + +func (i *TestRuntimeInterface) ValueExists(owner, key []byte) (exists bool, err error) { + return i.Storage.ValueExists(owner, key) +} + +func (i *TestRuntimeInterface) GetValue(owner, key []byte) (value []byte, err error) { + return i.Storage.GetValue(owner, key) +} + +func (i *TestRuntimeInterface) SetValue(owner, key, value []byte) (err error) { + return i.Storage.SetValue(owner, key, value) +} + +func (i *TestRuntimeInterface) AllocateStorageIndex(owner []byte) (atree.StorageIndex, error) { + return i.Storage.AllocateStorageIndex(owner) +} + +func (i *TestRuntimeInterface) CreateAccount(payer runtime.Address) (address runtime.Address, err error) { + if i.OnCreateAccount == nil { + panic("must specify TestRuntimeInterface.OnCreateAccount") + } + return i.OnCreateAccount(payer) +} + +func (i *TestRuntimeInterface) AddEncodedAccountKey(address runtime.Address, publicKey []byte) error { + if i.OnAddEncodedAccountKey == nil { + panic("must specify TestRuntimeInterface.OnAddEncodedAccountKey") + } + return i.OnAddEncodedAccountKey(address, publicKey) +} + +func (i *TestRuntimeInterface) RevokeEncodedAccountKey(address runtime.Address, index int) ([]byte, error) { + if i.OnRemoveEncodedAccountKey == nil { + panic("must specify TestRuntimeInterface.OnRemoveEncodedAccountKey") + } + return i.OnRemoveEncodedAccountKey(address, index) +} + +func (i *TestRuntimeInterface) AddAccountKey( + address runtime.Address, + publicKey *cadenceStdlib.PublicKey, + hashAlgo runtime.HashAlgorithm, + weight int, +) (*cadenceStdlib.AccountKey, error) { + if i.OnAddAccountKey == nil { + panic("must specify TestRuntimeInterface.OnAddAccountKey") + } + return i.OnAddAccountKey(address, publicKey, hashAlgo, weight) +} + +func (i *TestRuntimeInterface) GetAccountKey(address runtime.Address, index int) (*cadenceStdlib.AccountKey, error) { + if i.OnGetAccountKey == nil { + panic("must specify TestRuntimeInterface.OnGetAccountKey") + } + return i.OnGetAccountKey(address, index) +} + +func (i *TestRuntimeInterface) AccountKeysCount(address runtime.Address) (uint64, error) { + if i.OnAccountKeysCount == nil { + panic("must specify TestRuntimeInterface.OnAccountKeysCount") + } + return i.OnAccountKeysCount(address) +} + +func (i *TestRuntimeInterface) RevokeAccountKey(address runtime.Address, index int) (*cadenceStdlib.AccountKey, error) { + if i.OnRemoveAccountKey == nil { + panic("must specify TestRuntimeInterface.OnRemoveAccountKey") + } + return i.OnRemoveAccountKey(address, index) +} + +func (i *TestRuntimeInterface) UpdateAccountContractCode(location common.AddressLocation, code []byte) (err error) { + if i.OnUpdateAccountContractCode == nil { + panic("must specify TestRuntimeInterface.OnUpdateAccountContractCode") + } + + err = i.OnUpdateAccountContractCode(location, code) + if err != nil { + return err + } + + i.updatedContractCode = true + + return nil +} + +func (i *TestRuntimeInterface) GetAccountContractCode(location common.AddressLocation) (code []byte, err error) { + if i.OnGetAccountContractCode == nil { + panic("must specify TestRuntimeInterface.OnGetAccountContractCode") + } + return i.OnGetAccountContractCode(location) +} + +func (i *TestRuntimeInterface) RemoveAccountContractCode(location common.AddressLocation) (err error) { + if i.OnRemoveAccountContractCode == nil { + panic("must specify TestRuntimeInterface.OnRemoveAccountContractCode") + } + return i.OnRemoveAccountContractCode(location) +} + +func (i *TestRuntimeInterface) GetSigningAccounts() ([]runtime.Address, error) { + if i.OnGetSigningAccounts == nil { + return nil, nil + } + return i.OnGetSigningAccounts() +} + +func (i *TestRuntimeInterface) ProgramLog(message string) error { + i.OnProgramLog(message) + return nil +} + +func (i *TestRuntimeInterface) EmitEvent(event cadence.Event) error { + return i.OnEmitEvent(event) +} + +func (i *TestRuntimeInterface) ResourceOwnerChanged( + interpreter *interpreter.Interpreter, + resource *interpreter.CompositeValue, + oldOwner common.Address, + newOwner common.Address, +) { + if i.OnResourceOwnerChanged != nil { + i.OnResourceOwnerChanged( + interpreter, + resource, + oldOwner, + newOwner, + ) + } +} + +func (i *TestRuntimeInterface) GenerateUUID() (uint64, error) { + if i.OnGenerateUUID == nil { + i.lastUUID++ + return i.lastUUID, nil + } + return i.OnGenerateUUID() +} + +func (i *TestRuntimeInterface) MeterComputation(compKind common.ComputationKind, intensity uint) error { + if i.OnMeterComputation == nil { + return nil + } + return i.OnMeterComputation(compKind, intensity) +} + +func (i *TestRuntimeInterface) DecodeArgument(b []byte, t cadence.Type) (cadence.Value, error) { + if i.OnDecodeArgument == nil { + panic("must specify TestRuntimeInterface.OnDecodeArgument") + } + return i.OnDecodeArgument(b, t) +} + +func (i *TestRuntimeInterface) ProgramParsed(location runtime.Location, duration time.Duration) { + if i.OnProgramParsed == nil { + return + } + i.OnProgramParsed(location, duration) +} + +func (i *TestRuntimeInterface) ProgramChecked(location runtime.Location, duration time.Duration) { + if i.OnProgramChecked == nil { + return + } + i.OnProgramChecked(location, duration) +} + +func (i *TestRuntimeInterface) ProgramInterpreted(location runtime.Location, duration time.Duration) { + if i.OnProgramInterpreted == nil { + return + } + i.OnProgramInterpreted(location, duration) +} + +func (i *TestRuntimeInterface) GetCurrentBlockHeight() (uint64, error) { + return 1, nil +} + +func (i *TestRuntimeInterface) GetBlockAtHeight(height uint64) (block cadenceStdlib.Block, exists bool, err error) { + + buf := new(bytes.Buffer) + err = binary.Write(buf, binary.BigEndian, height) + if err != nil { + panic(err) + } + + encoded := buf.Bytes() + var hash cadenceStdlib.BlockHash + copy(hash[sema.BlockTypeIdFieldType.Size-int64(len(encoded)):], encoded) + + block = cadenceStdlib.Block{ + Height: height, + View: height, + Hash: hash, + Timestamp: time.Unix(int64(height), 0).UnixNano(), + } + return block, true, nil +} + +func (i *TestRuntimeInterface) ReadRandom(buffer []byte) error { + if i.OnReadRandom == nil { + return nil + } + return i.OnReadRandom(buffer) +} + +func (i *TestRuntimeInterface) VerifySignature( + signature []byte, + tag string, + signedData []byte, + publicKey []byte, + signatureAlgorithm runtime.SignatureAlgorithm, + hashAlgorithm runtime.HashAlgorithm, +) (bool, error) { + if i.OnVerifySignature == nil { + return false, nil + } + return i.OnVerifySignature( + signature, + tag, + signedData, + publicKey, + signatureAlgorithm, + hashAlgorithm, + ) +} + +func (i *TestRuntimeInterface) Hash(data []byte, tag string, hashAlgorithm runtime.HashAlgorithm) ([]byte, error) { + if i.OnHash == nil { + return nil, nil + } + return i.OnHash(data, tag, hashAlgorithm) +} + +func (i *TestRuntimeInterface) SetCadenceValue(owner common.Address, key string, value cadence.Value) (err error) { + if i.OnSetCadenceValue == nil { + panic("must specify TestRuntimeInterface.OnSetCadenceValue") + } + return i.OnSetCadenceValue(owner, key, value) +} + +func (i *TestRuntimeInterface) GetAccountBalance(address runtime.Address) (uint64, error) { + if i.OnGetAccountBalance == nil { + panic("must specify TestRuntimeInterface.OnGetAccountBalance") + } + return i.OnGetAccountBalance(address) +} + +func (i *TestRuntimeInterface) GetAccountAvailableBalance(address runtime.Address) (uint64, error) { + if i.OnGetAccountAvailableBalance == nil { + panic("must specify TestRuntimeInterface.OnGetAccountAvailableBalance") + } + return i.OnGetAccountAvailableBalance(address) +} + +func (i *TestRuntimeInterface) GetStorageUsed(address runtime.Address) (uint64, error) { + if i.OnGetStorageUsed == nil { + panic("must specify TestRuntimeInterface.OnGetStorageUsed") + } + return i.OnGetStorageUsed(address) +} + +func (i *TestRuntimeInterface) GetStorageCapacity(address runtime.Address) (uint64, error) { + if i.OnGetStorageCapacity == nil { + panic("must specify TestRuntimeInterface.OnGetStorageCapacity") + } + return i.OnGetStorageCapacity(address) +} + +func (i *TestRuntimeInterface) ImplementationDebugLog(message string) error { + if i.OnImplementationDebugLog == nil { + return nil + } + return i.OnImplementationDebugLog(message) +} + +func (i *TestRuntimeInterface) ValidatePublicKey(key *cadenceStdlib.PublicKey) error { + if i.OnValidatePublicKey == nil { + return errors.New("mock defaults to public key validation failure") + } + + return i.OnValidatePublicKey(key) +} + +func (i *TestRuntimeInterface) BLSVerifyPOP(key *cadenceStdlib.PublicKey, s []byte) (bool, error) { + if i.OnBLSVerifyPOP == nil { + return false, nil + } + + return i.OnBLSVerifyPOP(key, s) +} + +func (i *TestRuntimeInterface) BLSAggregateSignatures(sigs [][]byte) ([]byte, error) { + if i.OnBLSAggregateSignatures == nil { + return []byte{}, nil + } + + return i.OnBLSAggregateSignatures(sigs) +} + +func (i *TestRuntimeInterface) BLSAggregatePublicKeys(keys []*cadenceStdlib.PublicKey) (*cadenceStdlib.PublicKey, error) { + if i.OnBLSAggregatePublicKeys == nil { + return nil, nil + } + + return i.OnBLSAggregatePublicKeys(keys) +} + +func (i *TestRuntimeInterface) GetAccountContractNames(address runtime.Address) ([]string, error) { + if i.OnGetAccountContractNames == nil { + return []string{}, nil + } + + return i.OnGetAccountContractNames(address) +} + +func (i *TestRuntimeInterface) GenerateAccountID(address common.Address) (uint64, error) { + if i.OnGenerateAccountID == nil { + if i.accountIDs == nil { + i.accountIDs = map[common.Address]uint64{} + } + i.accountIDs[address]++ + return i.accountIDs[address], nil + } + + return i.OnGenerateAccountID(address) +} + +func (i *TestRuntimeInterface) RecordTrace( + operation string, + location runtime.Location, + duration time.Duration, + attrs []attribute.KeyValue, +) { + if i.OnRecordTrace == nil { + return + } + i.OnRecordTrace(operation, location, duration, attrs) +} + +func (i *TestRuntimeInterface) MeterMemory(usage common.MemoryUsage) error { + if i.OnMeterMemory == nil { + return nil + } + + return i.OnMeterMemory(usage) +} + +func (i *TestRuntimeInterface) ComputationUsed() (uint64, error) { + if i.OnComputationUsed == nil { + return 0, nil + } + + return i.OnComputationUsed() +} + +func (i *TestRuntimeInterface) MemoryUsed() (uint64, error) { + if i.OnMemoryUsed == nil { + return 0, nil + } + + return i.OnMemoryUsed() +} + +func (i *TestRuntimeInterface) InteractionUsed() (uint64, error) { + if i.OnInteractionUsed == nil { + return 0, nil + } + + return i.OnInteractionUsed() +} diff --git a/fvm/evm/testutils/contract.go b/fvm/evm/testutils/contract.go index a4984974455..78316f44cd2 100644 --- a/fvm/evm/testutils/contract.go +++ b/fvm/evm/testutils/contract.go @@ -26,37 +26,21 @@ type TestContract struct { DeployedAt types.Address } -func (tc *TestContract) MakeStoreCallData(t *testing.T, num *big.Int) []byte { +func (tc *TestContract) MakeCallData(t testing.TB, name string, args ...interface{}) []byte { abi, err := gethABI.JSON(strings.NewReader(tc.ABI)) require.NoError(t, err) - store, err := abi.Pack("store", num) + call, err := abi.Pack(name, args...) require.NoError(t, err) - return store -} - -func (tc *TestContract) MakeRetrieveCallData(t *testing.T) []byte { - abi, err := gethABI.JSON(strings.NewReader(tc.ABI)) - require.NoError(t, err) - retrieve, err := abi.Pack("retrieve") - require.NoError(t, err) - return retrieve -} - -func (tc *TestContract) MakeBlockNumberCallData(t *testing.T) []byte { - abi, err := gethABI.JSON(strings.NewReader(tc.ABI)) - require.NoError(t, err) - blockNum, err := abi.Pack("blockNumber") - require.NoError(t, err) - return blockNum + return call } func (tc *TestContract) SetDeployedAt(deployedAt types.Address) { tc.DeployedAt = deployedAt } -func GetTestContract(t *testing.T) *TestContract { +func GetStorageTestContract(tb testing.TB) *TestContract { byteCodes, err := hex.DecodeString("608060405261022c806100136000396000f3fe608060405234801561001057600080fd5b50600436106100575760003560e01c80632e64cec11461005c57806348b151661461007a57806357e871e7146100985780636057361d146100b657806385df51fd146100d2575b600080fd5b610064610102565b6040516100719190610149565b60405180910390f35b61008261010b565b60405161008f9190610149565b60405180910390f35b6100a0610113565b6040516100ad9190610149565b60405180910390f35b6100d060048036038101906100cb9190610195565b61011b565b005b6100ec60048036038101906100e79190610195565b610125565b6040516100f991906101db565b60405180910390f35b60008054905090565b600042905090565b600043905090565b8060008190555050565b600081409050919050565b6000819050919050565b61014381610130565b82525050565b600060208201905061015e600083018461013a565b92915050565b600080fd5b61017281610130565b811461017d57600080fd5b50565b60008135905061018f81610169565b92915050565b6000602082840312156101ab576101aa610164565b5b60006101b984828501610180565b91505092915050565b6000819050919050565b6101d5816101c2565b82525050565b60006020820190506101f060008301846101cc565b9291505056fea26469706673582212203ee61567a25f0b1848386ae6b8fdbd7733c8a502c83b5ed305b921b7933f4e8164736f6c63430008120033") - require.NoError(t, err) + require.NoError(tb, err) return &TestContract{ Code: ` contract Storage { @@ -165,8 +149,243 @@ func GetTestContract(t *testing.T) *TestContract { } } -func RunWithDeployedContract(t *testing.T, led atree.Ledger, flowEVMRootAddress flow.Address, f func(*TestContract)) { - tc := GetTestContract(t) +func GetDummyKittyTestContract(t testing.TB) *TestContract { + byteCodes, err := hex.DecodeString("608060405234801561001057600080fd5b506107dd806100206000396000f3fe608060405234801561001057600080fd5b50600436106100415760003560e01c8063a45f4bfc14610046578063d0b169d114610076578063ddf252ad146100a6575b600080fd5b610060600480360381019061005b91906104e4565b6100c2565b60405161006d9190610552565b60405180910390f35b610090600480360381019061008b919061056d565b6100f5565b60405161009d91906105e3565b60405180910390f35b6100c060048036038101906100bb919061062a565b610338565b005b60026020528060005260406000206000915054906101000a900473ffffffffffffffffffffffffffffffffffffffff1681565b60008463ffffffff16851461010957600080fd5b8363ffffffff16841461011b57600080fd5b8261ffff16831461012b57600080fd5b60006040518060a001604052808481526020014267ffffffffffffffff1681526020018763ffffffff1681526020018663ffffffff1681526020018561ffff16815250905060018190806001815401808255809150506001900390600052602060002090600202016000909190919091506000820151816000015560208201518160010160006101000a81548167ffffffffffffffff021916908367ffffffffffffffff16021790555060408201518160010160086101000a81548163ffffffff021916908363ffffffff160217905550606082015181600101600c6101000a81548163ffffffff021916908363ffffffff16021790555060808201518160010160106101000a81548161ffff021916908361ffff16021790555050507fc1e409485f45287e73ab1623a8f2ef17af5eac1b4c792ee9ec466e8795e7c09133600054836040015163ffffffff16846060015163ffffffff16856000015160405161029995949392919061067d565b60405180910390a13073ffffffffffffffffffffffffffffffffffffffff1663ddf252ad6000336000546040518463ffffffff1660e01b81526004016102e1939291906106d0565b600060405180830381600087803b1580156102fb57600080fd5b505af115801561030f573d6000803e3d6000fd5b5050505060008081548092919061032590610736565b9190505550600054915050949350505050565b600360008373ffffffffffffffffffffffffffffffffffffffff1673ffffffffffffffffffffffffffffffffffffffff168152602001908152602001600020600081548092919061038890610736565b9190505550816002600083815260200190815260200160002060006101000a81548173ffffffffffffffffffffffffffffffffffffffff021916908373ffffffffffffffffffffffffffffffffffffffff160217905550600073ffffffffffffffffffffffffffffffffffffffff168373ffffffffffffffffffffffffffffffffffffffff161461046957600360008473ffffffffffffffffffffffffffffffffffffffff1673ffffffffffffffffffffffffffffffffffffffff16815260200190815260200160002060008154809291906104639061077e565b91905055505b7feaf1c4b3ce0f4f62a2bae7eb3e68225c75f7e6ff4422073b7437b9a78d25f17083838360405161049c939291906106d0565b60405180910390a1505050565b600080fd5b6000819050919050565b6104c1816104ae565b81146104cc57600080fd5b50565b6000813590506104de816104b8565b92915050565b6000602082840312156104fa576104f96104a9565b5b6000610508848285016104cf565b91505092915050565b600073ffffffffffffffffffffffffffffffffffffffff82169050919050565b600061053c82610511565b9050919050565b61054c81610531565b82525050565b60006020820190506105676000830184610543565b92915050565b60008060008060808587031215610587576105866104a9565b5b6000610595878288016104cf565b94505060206105a6878288016104cf565b93505060406105b7878288016104cf565b92505060606105c8878288016104cf565b91505092959194509250565b6105dd816104ae565b82525050565b60006020820190506105f860008301846105d4565b92915050565b61060781610531565b811461061257600080fd5b50565b600081359050610624816105fe565b92915050565b600080600060608486031215610643576106426104a9565b5b600061065186828701610615565b935050602061066286828701610615565b9250506040610673868287016104cf565b9150509250925092565b600060a0820190506106926000830188610543565b61069f60208301876105d4565b6106ac60408301866105d4565b6106b960608301856105d4565b6106c660808301846105d4565b9695505050505050565b60006060820190506106e56000830186610543565b6106f26020830185610543565b6106ff60408301846105d4565b949350505050565b7f4e487b7100000000000000000000000000000000000000000000000000000000600052601160045260246000fd5b6000610741826104ae565b91507fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff820361077357610772610707565b5b600182019050919050565b6000610789826104ae565b91506000820361079c5761079b610707565b5b60018203905091905056fea2646970667358221220ab35c07ec72cc064a663de06ec7f5f919b1a499a25cf6ef0c63a45fdd4a1e91e64736f6c63430008120033") + require.NoError(t, err) + return &TestContract{ + Code: ` + contract DummyKitty { + + event BirthEvent(address owner, uint256 kittyId, uint256 matronId, uint256 sireId, uint256 genes); + event TransferEvent(address from, address to, uint256 tokenId); + + struct Kitty { + uint256 genes; + uint64 birthTime; + uint32 matronId; + uint32 sireId; + uint16 generation; + } + + uint256 idCounter; + + // @dev all kitties + Kitty[] kitties; + + /// @dev a mapping from cat IDs to the address that owns them. + mapping (uint256 => address) public kittyIndexToOwner; + + // @dev a mapping from owner address to count of tokens that address owns. + mapping (address => uint256) ownershipTokenCount; + + /// @dev a method to transfer kitty + function Transfer(address _from, address _to, uint256 _tokenId) external { + // Since the number of kittens is capped to 2^32 we can't overflow this + ownershipTokenCount[_to]++; + // transfer ownership + kittyIndexToOwner[_tokenId] = _to; + // When creating new kittens _from is 0x0, but we can't account that address. + if (_from != address(0)) { + ownershipTokenCount[_from]--; + } + // Emit the transfer event. + emit TransferEvent(_from, _to, _tokenId); + } + + /// @dev a method callable by anyone to create a kitty + function CreateKitty( + uint256 _matronId, + uint256 _sireId, + uint256 _generation, + uint256 _genes + ) + external + returns (uint) + { + + require(_matronId == uint256(uint32(_matronId))); + require(_sireId == uint256(uint32(_sireId))); + require(_generation == uint256(uint16(_generation))); + + Kitty memory _kitty = Kitty({ + genes: _genes, + birthTime: uint64(block.timestamp), + matronId: uint32(_matronId), + sireId: uint32(_sireId), + generation: uint16(_generation) + }); + + kitties.push(_kitty); + + emit BirthEvent( + msg.sender, + idCounter, + uint256(_kitty.matronId), + uint256(_kitty.sireId), + _kitty.genes + ); + + this.Transfer(address(0), msg.sender, idCounter); + + idCounter++; + + return idCounter; + } + } + `, + + ABI: ` + [ + { + "anonymous": false, + "inputs": [ + { + "indexed": false, + "internalType": "address", + "name": "owner", + "type": "address" + }, + { + "indexed": false, + "internalType": "uint256", + "name": "kittyId", + "type": "uint256" + }, + { + "indexed": false, + "internalType": "uint256", + "name": "matronId", + "type": "uint256" + }, + { + "indexed": false, + "internalType": "uint256", + "name": "sireId", + "type": "uint256" + }, + { + "indexed": false, + "internalType": "uint256", + "name": "genes", + "type": "uint256" + } + ], + "name": "BirthEvent", + "type": "event" + }, + { + "anonymous": false, + "inputs": [ + { + "indexed": false, + "internalType": "address", + "name": "from", + "type": "address" + }, + { + "indexed": false, + "internalType": "address", + "name": "to", + "type": "address" + }, + { + "indexed": false, + "internalType": "uint256", + "name": "tokenId", + "type": "uint256" + } + ], + "name": "TransferEvent", + "type": "event" + }, + { + "inputs": [ + { + "internalType": "uint256", + "name": "_matronId", + "type": "uint256" + }, + { + "internalType": "uint256", + "name": "_sireId", + "type": "uint256" + }, + { + "internalType": "uint256", + "name": "_generation", + "type": "uint256" + }, + { + "internalType": "uint256", + "name": "_genes", + "type": "uint256" + } + ], + "name": "CreateKitty", + "outputs": [ + { + "internalType": "uint256", + "name": "", + "type": "uint256" + } + ], + "stateMutability": "nonpayable", + "type": "function" + }, + { + "inputs": [ + { + "internalType": "address", + "name": "_from", + "type": "address" + }, + { + "internalType": "address", + "name": "_to", + "type": "address" + }, + { + "internalType": "uint256", + "name": "_tokenId", + "type": "uint256" + } + ], + "name": "Transfer", + "outputs": [], + "stateMutability": "nonpayable", + "type": "function" + }, + { + "inputs": [ + { + "internalType": "uint256", + "name": "", + "type": "uint256" + } + ], + "name": "kittyIndexToOwner", + "outputs": [ + { + "internalType": "address", + "name": "", + "type": "address" + } + ], + "stateMutability": "view", + "type": "function" + } + ] + `, + ByteCode: byteCodes, + } +} + +func RunWithDeployedContract(t testing.TB, tc *TestContract, led atree.Ledger, flowEVMRootAddress flow.Address, f func(*TestContract)) { + DeployContract(t, tc, led, flowEVMRootAddress) + f(tc) +} + +func DeployContract(t testing.TB, tc *TestContract, led atree.Ledger, flowEVMRootAddress flow.Address) { // deploy contract db, err := database.NewDatabase(led, flowEVMRootAddress) require.NoError(t, err) @@ -185,7 +404,10 @@ func RunWithDeployedContract(t *testing.T, led atree.Ledger, flowEVMRootAddress ) require.NoError(t, err) - res, err := blk.DirectCall( + blk2, err := e.NewBlockView(types.NewDefaultBlockContext(3)) + require.NoError(t, err) + + res, err := blk2.DirectCall( types.NewDeployCall( caller, tc.ByteCode, @@ -196,5 +418,4 @@ func RunWithDeployedContract(t *testing.T, led atree.Ledger, flowEVMRootAddress require.NoError(t, err) tc.SetDeployedAt(res.DeployedContractAddress) - f(tc) } diff --git a/fvm/evm/testutils/emulator.go b/fvm/evm/testutils/emulator.go index 48b3e2218d7..5f7f2ce3068 100644 --- a/fvm/evm/testutils/emulator.go +++ b/fvm/evm/testutils/emulator.go @@ -80,6 +80,10 @@ func RandomCommonHash(t testing.TB) gethCommon.Hash { return ret } +func RandomBigInt(limit int64) *big.Int { + return big.NewInt(rand.Int63n(limit) + 1) +} + func RandomAddress(t testing.TB) types.Address { return types.NewAddress(RandomCommonAddress(t)) } diff --git a/fvm/evm/types/address.go b/fvm/evm/types/address.go index afcaa72e246..134ae6c6cf8 100644 --- a/fvm/evm/types/address.go +++ b/fvm/evm/types/address.go @@ -17,6 +17,9 @@ func NewAddress(addr gethCommon.Address) Address { return Address(addr) } +// EmptyAddress is an empty evm address +var EmptyAddress = Address(gethCommon.Address{}) + // Bytes returns a byte slice for the address func (fa Address) Bytes() []byte { return fa[:] @@ -27,14 +30,16 @@ func (fa Address) ToCommon() gethCommon.Address { return gethCommon.Address(fa) } +// NewAddressFromBytes constructs a new address from bytes +func NewAddressFromBytes(inp []byte) Address { + return Address(gethCommon.BytesToAddress(inp)) +} + // NewAddressFromString constructs a new address from an string func NewAddressFromString(str string) Address { - return Address(gethCommon.BytesToAddress([]byte(str))) + return NewAddressFromBytes([]byte(str)) } -// EmptyAddress is an empty evm address -var EmptyAddress = Address(gethCommon.Address{}) - type GasLimit uint64 type Code []byte diff --git a/fvm/evm/types/events.go b/fvm/evm/types/events.go index fb3e802bb83..148c3f59ede 100644 --- a/fvm/evm/types/events.go +++ b/fvm/evm/types/events.go @@ -1,8 +1,12 @@ package types import ( + "encoding/hex" + gethCommon "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/rlp" + "github.com/onflow/cadence" + "github.com/onflow/cadence/runtime/stdlib" "github.com/onflow/flow-go/model/flow" ) @@ -13,7 +17,8 @@ const ( ) type EventPayload interface { - Encode() ([]byte, error) + // CadenceEvent creates a Cadence event type + CadenceEvent() (cadence.Event, error) } type Event struct { @@ -21,6 +26,7 @@ type Event struct { Payload EventPayload } +// we might break this event into two (tx included /tx executed) if size becomes an issue type TransactionExecutedPayload struct { BlockHeight uint64 TxEncoded []byte @@ -28,8 +34,51 @@ type TransactionExecutedPayload struct { Result *Result } -func (p *TransactionExecutedPayload) Encode() ([]byte, error) { - return rlp.EncodeToBytes(p) +var transactionExecutedEventCadenceType = &cadence.EventType{ + Location: stdlib.FlowLocation{}, + QualifiedIdentifier: string(EventTypeTransactionExecuted), + Fields: []cadence.Field{ + cadence.NewField("blockHeight", cadence.UInt64Type{}), + cadence.NewField("transactionHash", cadence.StringType{}), + cadence.NewField("transaction", cadence.StringType{}), + cadence.NewField("failed", cadence.BoolType{}), + cadence.NewField("transactionType", cadence.UInt8Type{}), + cadence.NewField("gasConsumed", cadence.UInt64Type{}), + cadence.NewField("stateRootHash", cadence.StringType{}), + cadence.NewField("deployedContractAddress", cadence.StringType{}), + cadence.NewField("returnedValue", cadence.StringType{}), + cadence.NewField("logs", cadence.StringType{}), + }, +} + +// todo add decoder for events from cadence to evm payload + +func (p *TransactionExecutedPayload) CadenceEvent() (cadence.Event, error) { + var encodedLogs []byte + var err error + if len(p.Result.Logs) > 0 { + encodedLogs, err = rlp.EncodeToBytes(p.Result.Logs) + if err != nil { + return cadence.Event{}, err + } + } + + fields := []cadence.Value{ + cadence.NewUInt64(p.BlockHeight), + cadence.String(p.TxHash.String()), + cadence.String(hex.EncodeToString(p.TxEncoded)), + cadence.NewBool(p.Result.Failed), + cadence.NewUInt8(p.Result.TxType), + cadence.NewUInt64(p.Result.GasConsumed), + cadence.String(p.Result.StateRootHash.String()), + cadence.String(hex.EncodeToString(p.Result.DeployedContractAddress.Bytes())), + cadence.String(hex.EncodeToString(p.Result.ReturnedValue)), + cadence.String(hex.EncodeToString(encodedLogs)), + } + + return cadence. + NewEvent(fields). + WithType(transactionExecutedEventCadenceType), nil } func NewTransactionExecutedEvent( @@ -49,12 +98,44 @@ func NewTransactionExecutedEvent( } } +var blockExecutedEventCadenceType = &cadence.EventType{ + Location: stdlib.FlowLocation{}, // todo create evm custom location + QualifiedIdentifier: string(EventTypeBlockExecuted), + Fields: []cadence.Field{ + cadence.NewField("height", cadence.UInt64Type{}), + cadence.NewField("totalSupply", cadence.UInt64Type{}), + cadence.NewField("parentHash", cadence.StringType{}), + cadence.NewField("stateRoot", cadence.StringType{}), + cadence.NewField("receiptRoot", cadence.StringType{}), + cadence.NewField( + "transactionHashes", + cadence.NewVariableSizedArrayType(cadence.StringType{}), + ), + }, +} + type BlockExecutedEventPayload struct { Block *Block } -func (p *BlockExecutedEventPayload) Encode() ([]byte, error) { - return rlp.EncodeToBytes(p) +func (p *BlockExecutedEventPayload) CadenceEvent() (cadence.Event, error) { + hashes := make([]cadence.Value, len(p.Block.TransactionHashes)) + for i, hash := range p.Block.TransactionHashes { + hashes[i] = cadence.String(hash.String()) + } + + fields := []cadence.Value{ + cadence.NewUInt64(p.Block.Height), + cadence.NewUInt64(p.Block.TotalSupply), + cadence.String(p.Block.ReceiptRoot.String()), + cadence.String(p.Block.ParentBlockHash.String()), + cadence.String(p.Block.StateRoot.String()), + cadence.NewArray(hashes).WithType(cadence.NewVariableSizedArrayType(cadence.StringType{})), + } + + return cadence. + NewEvent(fields). + WithType(blockExecutedEventCadenceType), nil } func NewBlockExecutedEvent(block *Block) *Event { diff --git a/fvm/evm/types/handler.go b/fvm/evm/types/handler.go index d3775f7987a..3badb5c6175 100644 --- a/fvm/evm/types/handler.go +++ b/fvm/evm/types/handler.go @@ -2,6 +2,7 @@ package types import ( gethCommon "github.com/ethereum/go-ethereum/common" + "github.com/onflow/cadence/runtime/common" "github.com/onflow/flow-go/fvm/environment" ) @@ -37,6 +38,8 @@ type ContractHandler interface { // Run runs a transaction in the evm environment, // collects the gas fees, and transfers the gas fees to the given coinbase account. Run(tx []byte, coinbase Address) + + FlowTokenAddress() common.Address } // Backend passes the FVM functionality needed inside the handler @@ -57,7 +60,7 @@ type BlockStore interface { // LatestBlock returns the latest appended block LatestBlock() (*Block, error) - // returns the hash of the block at the given height + // BlockHash returns the hash of the block at the given height BlockHash(height int) (gethCommon.Hash, error) // BlockProposal returns the block proposal @@ -66,6 +69,6 @@ type BlockStore interface { // CommitBlockProposal commits the block proposal and update the chain of blocks CommitBlockProposal() error - // Resets the block proposal + // ResetBlockProposal resets the block proposal ResetBlockProposal() error } diff --git a/fvm/evm/types/result.go b/fvm/evm/types/result.go index fb6f4087210..6e4248b2d58 100644 --- a/fvm/evm/types/result.go +++ b/fvm/evm/types/result.go @@ -5,7 +5,9 @@ import ( gethTypes "github.com/ethereum/go-ethereum/core/types" ) -// Result captures the result of an interaction with the emulator (direct call or evm tx) +// Result captures the result of an interaction to the emulator +// it could be the out put of a direct call or output of running an +// evm transaction. // Its more comprehensive than typical evm receipt, usually // the receipt generation requires some extra calculation (e.g. Deployed contract address) // but we take a different apporach here and include more data so that diff --git a/fvm/fvm_bench_test.go b/fvm/fvm_bench_test.go index 276c8cb69b8..880f86720ee 100644 --- a/fvm/fvm_bench_test.go +++ b/fvm/fvm_bench_test.go @@ -2,27 +2,28 @@ package fvm_test import ( "context" + "encoding/hex" "encoding/json" "fmt" "io" + "math/big" "strings" "testing" "github.com/ipfs/go-datastore" dssync "github.com/ipfs/go-datastore/sync" blockstore "github.com/ipfs/go-ipfs-blockstore" - "github.com/rs/zerolog" - "github.com/stretchr/testify/mock" - "github.com/stretchr/testify/require" - + "github.com/onflow/atree" "github.com/onflow/cadence" "github.com/onflow/cadence/encoding/ccf" jsoncdc "github.com/onflow/cadence/encoding/json" "github.com/onflow/cadence/runtime" + "github.com/rs/zerolog" + "github.com/stretchr/testify/mock" + "github.com/stretchr/testify/require" flow2 "github.com/onflow/flow-go-sdk" "github.com/onflow/flow-go-sdk/templates" - "github.com/onflow/flow-go/engine/execution" "github.com/onflow/flow-go/engine/execution/computation" "github.com/onflow/flow-go/engine/execution/computation/committer" @@ -31,9 +32,14 @@ import ( bootstrapexec "github.com/onflow/flow-go/engine/execution/state/bootstrap" "github.com/onflow/flow-go/engine/execution/testutil" "github.com/onflow/flow-go/fvm" + "github.com/onflow/flow-go/fvm/environment" + "github.com/onflow/flow-go/fvm/evm/testutils" reusableRuntime "github.com/onflow/flow-go/fvm/runtime" "github.com/onflow/flow-go/fvm/storage/derived" "github.com/onflow/flow-go/fvm/storage/snapshot" + "github.com/onflow/flow-go/fvm/storage/state" + "github.com/onflow/flow-go/fvm/systemcontracts" + "github.com/onflow/flow-go/fvm/tracing" completeLedger "github.com/onflow/flow-go/ledger/complete" "github.com/onflow/flow-go/ledger/complete/wal/fixtures" "github.com/onflow/flow-go/model/flow" @@ -148,7 +154,9 @@ func NewBasicBlockExecutor(tb testing.TB, chain flow.Chain, logger zerolog.Logge opts := []fvm.Option{ fvm.WithTransactionFeesEnabled(true), - fvm.WithAccountStorageLimit(true), + // TODO (JanezP): enable storage feee once we figure out how storage limits work + // with the EVM account + fvm.WithAccountStorageLimit(false), fvm.WithChain(chain), fvm.WithLogger(logger), fvm.WithMaxStateInteractionSize(interactionLimit), @@ -158,6 +166,7 @@ func NewBasicBlockExecutor(tb testing.TB, chain flow.Chain, logger zerolog.Logge runtime.Config{}, ), ), + fvm.WithEVMEnabled(true), } fvmContext := fvm.NewContext(opts...) @@ -194,6 +203,7 @@ func NewBasicBlockExecutor(tb testing.TB, chain flow.Chain, logger zerolog.Logge fvm.WithMinimumStorageReservation(fvm.DefaultMinimumStorageReservation), fvm.WithTransactionFee(fvm.DefaultTransactionFees), fvm.WithStorageMBPerFLOW(fvm.DefaultStorageMBPerFLOW), + fvm.WithSetupEVMEnabled(true), ) require.NoError(tb, err) @@ -225,7 +235,7 @@ func NewBasicBlockExecutor(tb testing.TB, chain flow.Chain, logger zerolog.Logge me, prov, nil, - nil, + testutil.ProtocolStateWithSourceFixture(nil), 1) // We're interested in fvm's serial execution time require.NoError(tb, err) @@ -280,6 +290,26 @@ func (b *BasicBlockExecutor) ExecuteCollections(tb testing.TB, collections [][]* return computationResult } +func (b *BasicBlockExecutor) RunWithLedger(tb testing.TB, f func(ledger atree.Ledger)) { + ts := state.NewTransactionState(b.activeSnapshot, state.DefaultParameters()) + + accounts := environment.NewAccounts(ts) + meter := environment.NewMeter(ts) + + valueStore := environment.NewValueStore( + tracing.NewMockTracerSpan(), + meter, + accounts, + ) + + f(valueStore) + + newSnapshot, err := ts.FinalizeMainTransaction() + require.NoError(tb, err) + + b.activeSnapshot = b.activeSnapshot.Append(newSnapshot) +} + func (b *BasicBlockExecutor) SetupAccounts(tb testing.TB, privateKeys []flow.AccountPrivateKey) []TestBenchAccount { accounts := make([]TestBenchAccount, 0) serviceAddress := b.Chain(tb).ServiceAddress() @@ -374,6 +404,11 @@ func (l *logExtractor) Write(p []byte) (n int, err error) { var _ io.Writer = &logExtractor{} +type benchTransactionContext struct { + EvmTestContract *testutils.TestContract + EvmTestAccount *testutils.EOATestAccount +} + // BenchmarkRuntimeEmptyTransaction simulates executing blocks with `transactionsPerBlock` // where each transaction is an empty transaction func BenchmarkRuntimeTransaction(b *testing.B) { @@ -388,8 +423,15 @@ func BenchmarkRuntimeTransaction(b *testing.B) { TimeSpent: map[string]uint64{}, InteractionUsed: map[string]uint64{}, } + sc := systemcontracts.SystemContractsForChain(chain.ChainID()) + + testContractAddress, err := chain.AddressAtIndex(systemcontracts.EVMAccountIndex + 1) + require.NoError(b, err) - benchTransaction := func(b *testing.B, tx string) { + benchTransaction := func( + b *testing.B, + txStringFunc func(b *testing.B, context benchTransactionContext) string, + ) { logger := zerolog.New(logE).Level(zerolog.DebugLevel) @@ -408,8 +450,13 @@ func BenchmarkRuntimeTransaction(b *testing.B) { for _, account := range accounts { addrs = append(addrs, account.Address) } + // TODO (JanezP): fix when the evm account has a receiver + //evmAddress, err := chain.AddressAtIndex(environment.EVMAccountIndex) + //require.NoError(b, err) + //addrs = append(addrs, evmAddress) + // fund all accounts so not to run into storage problems - fundAccounts(b, blockExecutor, cadence.UFix64(10_000_000_000), addrs...) + fundAccounts(b, blockExecutor, cadence.UFix64(1_000_000_000_000), addrs...) accounts[0].DeployContract(b, blockExecutor, "TestContract", ` access(all) contract TestContract { @@ -423,17 +470,33 @@ func BenchmarkRuntimeTransaction(b *testing.B) { } } `) + require.Equal(b, testContractAddress, accounts[0].Address, + "test contract should be deployed to first available account index") accounts[0].AddArrayToStorage(b, blockExecutor, []string{longString, longString, longString, longString, longString}) - btx := []byte(tx) + tc := testutils.GetStorageTestContract(b) + var evmTestAccount *testutils.EOATestAccount + blockExecutor.RunWithLedger(b, func(ledger atree.Ledger) { + testutils.DeployContract(b, tc, ledger, chain.ServiceAddress()) + evmTestAccount = testutils.FundAndGetEOATestAccount(b, ledger, chain.ServiceAddress()) + }) + + benchTransactionContext := benchTransactionContext{ + EvmTestContract: tc, + EvmTestAccount: evmTestAccount, + } benchmarkAccount := &accounts[0] b.ResetTimer() // setup done, lets start measuring + b.StopTimer() for i := 0; i < b.N; i++ { transactions := make([]*flow.TransactionBody, transactionsPerBlock) for j := 0; j < transactionsPerBlock; j++ { + tx := txStringFunc(b, benchTransactionContext) + + btx := []byte(tx) txBody := flow.NewTransactionBody(). SetScript(btx). AddAuthorizer(benchmarkAccount.Address). @@ -445,8 +508,9 @@ func BenchmarkRuntimeTransaction(b *testing.B) { transactions[j] = txBody } - + b.StartTimer() computationResult := blockExecutor.ExecuteCollections(b, [][]*flow.TransactionBody{transactions}) + b.StopTimer() totalInteractionUsed := uint64(0) totalComputationUsed := uint64(0) results := computationResult.AllTransactionResults() @@ -462,130 +526,265 @@ func BenchmarkRuntimeTransaction(b *testing.B) { } templateTx := func(rep int, prepare string) string { - return fmt.Sprintf(` + return fmt.Sprintf( + ` import FungibleToken from 0x%s import FlowToken from 0x%s import TestContract from 0x%s + import EVM from 0x%s transaction(){ prepare(signer: AuthAccount){ var i = 0 while i < %d { - i = i + 1 + i = i + 1 %s } } - }`, fvm.FungibleTokenAddress(chain), fvm.FlowTokenAddress(chain), "754aed9de6197641", rep, prepare) + }`, + sc.FungibleToken.Address.Hex(), + sc.FlowToken.Address.Hex(), + testContractAddress, + sc.FlowServiceAccount.Address.Hex(), + rep, + prepare, + ) } b.Run("reference tx", func(b *testing.B) { - benchTransaction(b, templateTx(100, "")) + benchTransaction(b, + func(b *testing.B, context benchTransactionContext) string { + return templateTx(100, "") + }, + ) }) b.Run("convert int to string", func(b *testing.B) { - benchTransaction(b, templateTx(100, `i.toString()`)) + benchTransaction(b, + func(b *testing.B, context benchTransactionContext) string { + return templateTx(100, `i.toString()`) + }, + ) }) b.Run("convert int to string and concatenate it", func(b *testing.B) { - benchTransaction(b, templateTx(100, `"x".concat(i.toString())`)) + benchTransaction(b, + func(b *testing.B, context benchTransactionContext) string { + return templateTx(100, `"x".concat(i.toString())`) + }, + ) }) b.Run("get signer address", func(b *testing.B) { - benchTransaction(b, templateTx(100, `signer.address`)) + benchTransaction(b, + func(b *testing.B, context benchTransactionContext) string { + return templateTx(100, `signer.address`) + }, + ) }) b.Run("get public account", func(b *testing.B) { - benchTransaction(b, templateTx(100, `getAccount(signer.address)`)) + benchTransaction(b, + func(b *testing.B, context benchTransactionContext) string { + return templateTx(100, `getAccount(signer.address)`) + }, + ) }) b.Run("get account and get balance", func(b *testing.B) { - benchTransaction(b, templateTx(100, `getAccount(signer.address).balance`)) + benchTransaction(b, + func(b *testing.B, context benchTransactionContext) string { + return templateTx(100, `getAccount(signer.address).balance`) + }, + ) }) b.Run("get account and get available balance", func(b *testing.B) { - benchTransaction(b, templateTx(100, `getAccount(signer.address).availableBalance`)) + benchTransaction(b, + func(b *testing.B, context benchTransactionContext) string { + return templateTx(100, `getAccount(signer.address).availableBalance`) + }, + ) }) b.Run("get account and get storage used", func(b *testing.B) { - benchTransaction(b, templateTx(100, `getAccount(signer.address).storageUsed`)) + benchTransaction(b, + func(b *testing.B, context benchTransactionContext) string { + return templateTx(100, `getAccount(signer.address).storageUsed`) + }, + ) }) b.Run("get account and get storage capacity", func(b *testing.B) { - benchTransaction(b, templateTx(100, `getAccount(signer.address).storageCapacity`)) + benchTransaction(b, + func(b *testing.B, context benchTransactionContext) string { + return templateTx(100, `getAccount(signer.address).storageCapacity`) + }, + ) }) b.Run("get signer vault", func(b *testing.B) { benchTransaction( b, - templateTx(100, `let vaultRef = signer.borrow<&FlowToken.Vault>(from: /storage/flowTokenVault)!`), + func(b *testing.B, context benchTransactionContext) string { + return templateTx(100, `let vaultRef = signer.borrow<&FlowToken.Vault>(from: /storage/flowTokenVault)!`) + }, ) }) b.Run("get signer receiver", func(b *testing.B) { benchTransaction( b, - templateTx(100, `let receiverRef = getAccount(signer.address) - .getCapability(/public/flowTokenReceiver) - .borrow<&{FungibleToken.Receiver}>()!`), + func(b *testing.B, context benchTransactionContext) string { + return templateTx(100, + `let receiverRef = getAccount(signer.address) + .getCapability(/public/flowTokenReceiver) + .borrow<&{FungibleToken.Receiver}>()!`) + }, ) }) b.Run("transfer tokens", func(b *testing.B) { benchTransaction( b, - templateTx(100, ` - let receiverRef = getAccount(signer.address) - .getCapability(/public/flowTokenReceiver) - .borrow<&{FungibleToken.Receiver}>()! - - let vaultRef = signer.borrow<&FlowToken.Vault>(from: /storage/flowTokenVault)! - - receiverRef.deposit(from: <-vaultRef.withdraw(amount: 0.00001)) - `), + func(b *testing.B, context benchTransactionContext) string { + return templateTx(100, ` + let receiverRef = getAccount(signer.address) + .getCapability(/public/flowTokenReceiver) + .borrow<&{FungibleToken.Receiver}>()! + + let vaultRef = signer.borrow<&FlowToken.Vault>(from: /storage/flowTokenVault)! + + receiverRef.deposit(from: <-vaultRef.withdraw(amount: 0.00001)) + `) + }, ) }) b.Run("load and save empty string on signers address", func(b *testing.B) { benchTransaction( b, - templateTx(100, ` - signer.load(from: /storage/testpath) - signer.save("", to: /storage/testpath) - `), + func(b *testing.B, context benchTransactionContext) string { + return templateTx(100, ` + signer.load(from: /storage/testpath) + signer.save("", to: /storage/testpath) + `) + }, ) }) b.Run("load and save long string on signers address", func(b *testing.B) { benchTransaction( b, - templateTx(100, fmt.Sprintf(` - signer.load(from: /storage/testpath) - signer.save("%s", to: /storage/testpath) - `, longString)), + func(b *testing.B, context benchTransactionContext) string { + return templateTx(100, fmt.Sprintf(` + signer.load(from: /storage/testpath) + signer.save("%s", to: /storage/testpath) + `, longString)) + }, ) }) b.Run("create new account", func(b *testing.B) { - benchTransaction(b, templateTx(50, `let acct = AuthAccount(payer: signer)`)) + benchTransaction(b, + func(b *testing.B, context benchTransactionContext) string { + return templateTx(50, `let acct = AuthAccount(payer: signer)`) + }, + ) }) b.Run("call empty contract function", func(b *testing.B) { - benchTransaction(b, templateTx(100, `TestContract.empty()`)) + benchTransaction(b, + func(b *testing.B, context benchTransactionContext) string { + return templateTx(100, `TestContract.empty()`) + }, + ) }) b.Run("emit event", func(b *testing.B) { - benchTransaction(b, templateTx(100, `TestContract.emit()`)) + benchTransaction(b, + func(b *testing.B, context benchTransactionContext) string { + return templateTx(100, `TestContract.emit()`) + }, + ) }) b.Run("borrow array from storage", func(b *testing.B) { benchTransaction( b, - templateTx(100, ` - let strings = signer.borrow<&[String]>(from: /storage/test)! - var i = 0 - while (i < strings.length) { - log(strings[i]) - i = i +1 - } - `), + func(b *testing.B, context benchTransactionContext) string { + return templateTx(100, ` + let strings = signer.borrow<&[String]>(from: /storage/test)! + var i = 0 + while (i < strings.length) { + log(strings[i]) + i = i +1 + } + `) + }, ) }) b.Run("copy array from storage", func(b *testing.B) { benchTransaction( b, - templateTx(100, ` - let strings = signer.copy<[String]>(from: /storage/test)! - var i = 0 - while (i < strings.length) { - log(strings[i]) - i = i +1 + func(b *testing.B, context benchTransactionContext) string { + return templateTx(100, ` + let strings = signer.copy<[String]>(from: /storage/test)! + var i = 0 + while (i < strings.length) { + log(strings[i]) + i = i +1 + } + `) + }, + ) + }) + + benchEvm := func(b *testing.B, control bool) { + // This is the same as the evm benchmark but without the EVM.run call + // This way we can observe the cost of just the EVM.run call + benchTransaction( + b, + func(b *testing.B, context benchTransactionContext) string { + coinbaseBytes := context.EvmTestAccount.Address().Bytes() + transactionBody := fmt.Sprintf(` + let coinbaseBytesRaw = "%s".decodeHex() + let coinbaseBytes: [UInt8; 20] = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0] + for j, v in coinbaseBytesRaw { + coinbaseBytes[j] = v + } + let coinbase = EVM.EVMAddress(bytes: coinbaseBytes) + `, hex.EncodeToString(coinbaseBytes)) + + num := int64(12) + gasLimit := uint64(100_000) + + // add 10 EVM transactions to the Flow transaction body + for i := 0; i < 100; i++ { + txBytes := context.EvmTestAccount.PrepareSignAndEncodeTx(b, + context.EvmTestContract.DeployedAt.ToCommon(), + context.EvmTestContract.MakeCallData(b, "store", big.NewInt(num)), + big.NewInt(0), + gasLimit, + big.NewInt(0), + ) + if control { + transactionBody += fmt.Sprintf(` + let txBytes%[1]d = "%[2]s".decodeHex() + EVM.run(tx: txBytes%[1]d, coinbase: coinbase) + `, + i, + hex.EncodeToString(txBytes), + ) + } else { + // don't run the EVM transaction but do the hex conversion + transactionBody += fmt.Sprintf(` + let txBytes%[1]d = "%[2]s".decodeHex() + //EVM.run(tx: txBytes%[1]d, coinbase: coinbase) + `, + i, + hex.EncodeToString(txBytes), + ) + } + } - `), + + return templateTx(1, transactionBody) + }, ) + } + + b.Run("evm", func(b *testing.B) { + benchEvm(b, false) + }) + + b.Run("evm control", func(b *testing.B) { + benchEvm(b, true) }) + } const TransferTxTemplate = ` diff --git a/fvm/fvm_blockcontext_test.go b/fvm/fvm_blockcontext_test.go index 7d68ec0bb62..dcf73b9280b 100644 --- a/fvm/fvm_blockcontext_test.go +++ b/fvm/fvm_blockcontext_test.go @@ -24,13 +24,16 @@ import ( envMock "github.com/onflow/flow-go/fvm/environment/mock" errors "github.com/onflow/flow-go/fvm/errors" "github.com/onflow/flow-go/fvm/storage/snapshot" + "github.com/onflow/flow-go/fvm/systemcontracts" "github.com/onflow/flow-go/model/flow" "github.com/onflow/flow-go/utils/unittest" ) func transferTokensTx(chain flow.Chain) *flow.TransactionBody { + sc := systemcontracts.SystemContractsForChain(chain.ChainID()) return flow.NewTransactionBody(). - SetScript([]byte(fmt.Sprintf(` + SetScript([]byte(fmt.Sprintf( + ` // This transaction is a template for a transaction that // could be used by anyone to send tokens to another account // that has been set up to receive tokens. @@ -69,7 +72,10 @@ func transferTokensTx(chain flow.Chain) *flow.TransactionBody { // Deposit the withdrawn tokens in the recipient's receiver receiverRef.deposit(from: <-self.sentVault) } - }`, fvm.FungibleTokenAddress(chain), fvm.FlowTokenAddress(chain))), + }`, + sc.FungibleToken.Address.Hex(), + sc.FlowToken.Address.Hex(), + )), ) } @@ -1009,9 +1015,11 @@ func TestBlockContext_ExecuteTransaction_StorageLimit(t *testing.T) { chain) require.NoError(t, err) + sc := systemcontracts.SystemContractsForChain(chain.ChainID()) // deposit more flow to increase capacity txBody := flow.NewTransactionBody(). - SetScript([]byte(fmt.Sprintf(` + SetScript([]byte(fmt.Sprintf( + ` import FungibleToken from %s import FlowToken from %s @@ -1027,10 +1035,12 @@ func TestBlockContext_ExecuteTransaction_StorageLimit(t *testing.T) { ?? panic("Could not borrow receiver reference to the recipient's Vault") receiver.deposit(from: <-payment) } - }`, fvm.FungibleTokenAddress(chain).HexWithPrefix(), - fvm.FlowTokenAddress(chain).HexWithPrefix(), + }`, + sc.FungibleToken.Address.HexWithPrefix(), + sc.FlowToken.Address.HexWithPrefix(), "Container", - hex.EncodeToString([]byte(script))))). + hex.EncodeToString([]byte(script)), + ))). AddAuthorizer(accounts[0]). AddAuthorizer(chain.ServiceAddress()). SetProposalKey(chain.ServiceAddress(), 0, 0). @@ -1816,7 +1826,9 @@ func TestBlockContext_ExecuteTransaction_FailingTransactions(t *testing.T) { address flow.Address, ) uint64 { - code := []byte(fmt.Sprintf(` + sc := systemcontracts.SystemContractsForChain(chain.ChainID()) + code := []byte(fmt.Sprintf( + ` import FungibleToken from 0x%s import FlowToken from 0x%s @@ -1828,7 +1840,10 @@ func TestBlockContext_ExecuteTransaction_FailingTransactions(t *testing.T) { return vaultRef.balance } - `, fvm.FungibleTokenAddress(chain), fvm.FlowTokenAddress(chain))) + `, + sc.FungibleToken.Address.Hex(), + sc.FlowToken.Address.Hex(), + )) script := fvm.Script(code).WithArguments( jsoncdc.MustEncode(cadence.NewAddress(address)), ) diff --git a/fvm/fvm_fuzz_test.go b/fvm/fvm_fuzz_test.go index 8877540b362..a34e78a95b3 100644 --- a/fvm/fvm_fuzz_test.go +++ b/fvm/fvm_fuzz_test.go @@ -13,10 +13,10 @@ import ( "github.com/onflow/flow-go/engine/execution/testutil" "github.com/onflow/flow-go/fvm" - "github.com/onflow/flow-go/fvm/environment" "github.com/onflow/flow-go/fvm/errors" "github.com/onflow/flow-go/fvm/meter" "github.com/onflow/flow-go/fvm/storage/snapshot" + "github.com/onflow/flow-go/fvm/systemcontracts" "github.com/onflow/flow-go/model/flow" "github.com/onflow/flow-go/utils/unittest" ) @@ -220,10 +220,12 @@ const fuzzTestsInclusionFees = uint64(1_000) func getDeductedFees(tb testing.TB, tctx transactionTypeContext, results fuzzResults) (fees cadence.UFix64, deducted bool) { tb.Helper() + sc := systemcontracts.SystemContractsForChain(tctx.chain.ChainID()) + var ok bool var feesDeductedEvent cadence.Event for _, e := range results.output.Events { - if string(e.Type) == fmt.Sprintf("A.%s.FlowFees.FeesDeducted", environment.FlowFeesAddress(tctx.chain)) { + if string(e.Type) == fmt.Sprintf("A.%s.FlowFees.FeesDeducted", sc.FlowFees.Address.Hex()) { data, err := ccf.Decode(nil, e.Payload) require.NoError(tb, err) feesDeductedEvent, ok = data.(cadence.Event) diff --git a/fvm/fvm_test.go b/fvm/fvm_test.go index fcf44bcff2b..4017ec1f638 100644 --- a/fvm/fvm_test.go +++ b/fvm/fvm_test.go @@ -8,6 +8,8 @@ import ( "strings" "testing" + "github.com/onflow/flow-go/fvm/evm/stdlib" + "github.com/onflow/cadence" "github.com/onflow/cadence/encoding/ccf" jsoncdc "github.com/onflow/cadence/encoding/json" @@ -27,6 +29,8 @@ import ( "github.com/onflow/flow-go/fvm/meter" reusableRuntime "github.com/onflow/flow-go/fvm/runtime" "github.com/onflow/flow-go/fvm/storage/snapshot" + "github.com/onflow/flow-go/fvm/storage/testutils" + "github.com/onflow/flow-go/fvm/systemcontracts" "github.com/onflow/flow-go/model/flow" "github.com/onflow/flow-go/utils/unittest" ) @@ -597,8 +601,9 @@ func TestHappyPathTransactionSigning(t *testing.T) { func TestTransactionFeeDeduction(t *testing.T) { getBalance := func(vm fvm.VM, chain flow.Chain, ctx fvm.Context, snapshotTree snapshot.SnapshotTree, address flow.Address) uint64 { - - code := []byte(fmt.Sprintf(` + sc := systemcontracts.SystemContractsForChain(chain.ChainID()) + code := []byte(fmt.Sprintf( + ` import FungibleToken from 0x%s import FlowToken from 0x%s @@ -610,7 +615,10 @@ func TestTransactionFeeDeduction(t *testing.T) { return vaultRef.balance } - `, fvm.FungibleTokenAddress(chain), fvm.FlowTokenAddress(chain))) + `, + sc.FungibleToken.Address.Hex(), + sc.FlowToken.Address.Hex(), + )) script := fvm.Script(code).WithArguments( jsoncdc.MustEncode(cadence.NewAddress(address)), ) @@ -634,6 +642,12 @@ func TestTransactionFeeDeduction(t *testing.T) { transferAmount := uint64(123_456) minimumStorageReservation := fvm.DefaultMinimumStorageReservation.ToGoValue().(uint64) + chain := flow.Testnet.Chain() + sc := systemcontracts.SystemContractsForChain(chain.ChainID()) + depositedEvent := fmt.Sprintf("A.%s.FlowToken.TokensDeposited", sc.FlowToken.Address) + withdrawnEvent := fmt.Sprintf("A.%s.FlowToken.TokensWithdrawn", sc.FlowToken.Address) + feesDeductedEvent := fmt.Sprintf("A.%s.FlowFees.FeesDeducted", sc.FlowFees.Address) + testCases := []testCase{ { name: "Transaction fees are deducted", @@ -656,10 +670,10 @@ func TestTransactionFeeDeduction(t *testing.T) { chain := flow.Testnet.Chain() for _, e := range output.Events { - if string(e.Type) == fmt.Sprintf("A.%s.FlowToken.TokensDeposited", fvm.FlowTokenAddress(chain)) { + if string(e.Type) == depositedEvent { deposits = append(deposits, e) } - if string(e.Type) == fmt.Sprintf("A.%s.FlowToken.TokensWithdrawn", fvm.FlowTokenAddress(chain)) { + if string(e.Type) == withdrawnEvent { withdraws = append(withdraws, e) } } @@ -688,7 +702,7 @@ func TestTransactionFeeDeduction(t *testing.T) { var feeDeduction flow.Event // fee deduction event for _, e := range output.Events { - if string(e.Type) == fmt.Sprintf("A.%s.FlowFees.FeesDeducted", environment.FlowFeesAddress(chain)) { + if string(e.Type) == feesDeductedEvent { feeDeduction = e break } @@ -765,10 +779,10 @@ func TestTransactionFeeDeduction(t *testing.T) { chain := flow.Testnet.Chain() for _, e := range output.Events { - if string(e.Type) == fmt.Sprintf("A.%s.FlowToken.TokensDeposited", fvm.FlowTokenAddress(chain)) { + if string(e.Type) == depositedEvent { deposits = append(deposits, e) } - if string(e.Type) == fmt.Sprintf("A.%s.FlowToken.TokensWithdrawn", fvm.FlowTokenAddress(chain)) { + if string(e.Type) == withdrawnEvent { withdraws = append(withdraws, e) } } @@ -792,10 +806,10 @@ func TestTransactionFeeDeduction(t *testing.T) { chain := flow.Testnet.Chain() for _, e := range output.Events { - if string(e.Type) == fmt.Sprintf("A.%s.FlowToken.TokensDeposited", fvm.FlowTokenAddress(chain)) { + if string(e.Type) == depositedEvent { deposits = append(deposits, e) } - if string(e.Type) == fmt.Sprintf("A.%s.FlowToken.TokensWithdrawn", fvm.FlowTokenAddress(chain)) { + if string(e.Type) == withdrawnEvent { withdraws = append(withdraws, e) } } @@ -830,10 +844,10 @@ func TestTransactionFeeDeduction(t *testing.T) { chain := flow.Testnet.Chain() for _, e := range output.Events { - if string(e.Type) == fmt.Sprintf("A.%s.FlowToken.TokensDeposited", fvm.FlowTokenAddress(chain)) { + if string(e.Type) == depositedEvent { deposits = append(deposits, e) } - if string(e.Type) == fmt.Sprintf("A.%s.FlowToken.TokensWithdrawn", fvm.FlowTokenAddress(chain)) { + if string(e.Type) == withdrawnEvent { withdraws = append(withdraws, e) } } @@ -883,10 +897,10 @@ func TestTransactionFeeDeduction(t *testing.T) { chain := flow.Testnet.Chain() for _, e := range output.Events { - if string(e.Type) == fmt.Sprintf("A.%s.FlowToken.TokensDeposited", fvm.FlowTokenAddress(chain)) { + if string(e.Type) == depositedEvent { deposits = append(deposits, e) } - if string(e.Type) == fmt.Sprintf("A.%s.FlowToken.TokensWithdrawn", fvm.FlowTokenAddress(chain)) { + if string(e.Type) == withdrawnEvent { withdraws = append(withdraws, e) } } @@ -910,10 +924,10 @@ func TestTransactionFeeDeduction(t *testing.T) { chain := flow.Testnet.Chain() for _, e := range output.Events { - if string(e.Type) == fmt.Sprintf("A.%s.FlowToken.TokensDeposited", fvm.FlowTokenAddress(chain)) { + if string(e.Type) == depositedEvent { deposits = append(deposits, e) } - if string(e.Type) == fmt.Sprintf("A.%s.FlowToken.TokensWithdrawn", fvm.FlowTokenAddress(chain)) { + if string(e.Type) == withdrawnEvent { withdraws = append(withdraws, e) } } @@ -1032,6 +1046,7 @@ func TestTransactionFeeDeduction(t *testing.T) { fvm.WithExecutionMemoryWeights(meter.DefaultMemoryWeights), ).withContextOptions( fvm.WithTransactionFeesEnabled(true), + fvm.WithChain(chain), ).run( runTx(tc)), ) @@ -1049,6 +1064,7 @@ func TestTransactionFeeDeduction(t *testing.T) { ).withContextOptions( fvm.WithTransactionFeesEnabled(true), fvm.WithAccountStorageLimit(true), + fvm.WithChain(chain), ).run( runTx(tc)), ) @@ -1704,9 +1720,11 @@ func TestStorageCapacity(t *testing.T) { snapshotTree = snapshotTree.Append(executionSnapshot) // Perform test + sc := systemcontracts.SystemContractsForChain(chain.ChainID()) txBody := flow.NewTransactionBody(). - SetScript([]byte(fmt.Sprintf(` + SetScript([]byte(fmt.Sprintf( + ` import FungibleToken from 0x%s import FlowToken from 0x%s @@ -1730,8 +1748,8 @@ func TestStorageCapacity(t *testing.T) { log(cap0 - cap1) } }`, - fvm.FungibleTokenAddress(chain), - fvm.FlowTokenAddress(chain), + sc.FungibleToken.Address.Hex(), + sc.FlowToken.Address.Hex(), ))). AddArgument(jsoncdc.MustEncode(cadence.NewAddress(target))). AddAuthorizer(signer) @@ -2910,3 +2928,89 @@ func TestEntropyCallExpectsNoParameters(t *testing.T) { }, )(t) } + +func TestTransientNetworkCoreContractAddresses(t *testing.T) { + + // This test ensures that the transient networks have the correct core contract addresses. + newVMTest(). + run( + func( + t *testing.T, + vm fvm.VM, + chain flow.Chain, + ctx fvm.Context, + snapshotTree snapshot.SnapshotTree, + ) { + sc := systemcontracts.SystemContractsForChain(chain.ChainID()) + + for _, contract := range sc.All() { + txnState := testutils.NewSimpleTransaction(snapshotTree) + accounts := environment.NewAccounts(txnState) + + yes, err := accounts.ContractExists(contract.Name, contract.Address) + require.NoError(t, err) + require.True(t, yes, "contract %s does not exist", contract.Name) + } + }) +} + +func TestEVM(t *testing.T) { + + t.Run("successful transaction", newVMTest(). + withBootstrapProcedureOptions(fvm.WithSetupEVMEnabled(true)). + // we keep this dissabled during bootstrap and later overwrite in the test for test transaction + withContextOptions( + fvm.WithEVMEnabled(false), + fvm.WithCadenceLogging(true), + ). + run(func( + t *testing.T, + vm fvm.VM, + chain flow.Chain, + ctx fvm.Context, + snapshotTree snapshot.SnapshotTree, + ) { + // generate test address + genArr := make([]cadence.Value, 20) + for i := range genArr { + genArr[i] = cadence.UInt8(i) + } + addrBytes := cadence.NewArray(genArr).WithType(stdlib.EVMAddressBytesCadenceType) + encodedArg, err := jsoncdc.Encode(addrBytes) + require.NoError(t, err) + + txBody := flow.NewTransactionBody(). + SetScript([]byte(fmt.Sprintf(` + import EVM from %s + + transaction(bytes: [UInt8; 20]) { + execute { + let addr = EVM.EVMAddress(bytes: bytes) + log(addr) + } + } + `, chain.ServiceAddress().HexWithPrefix()))). + SetProposalKey(chain.ServiceAddress(), 0, 0). + SetPayer(chain.ServiceAddress()). + AddArgument(encodedArg) + + err = testutil.SignTransactionAsServiceAccount(txBody, 0, chain) + require.NoError(t, err) + + ctx = fvm.NewContextFromParent(ctx, fvm.WithEVMEnabled(true)) + _, output, err := vm.Run( + ctx, + fvm.Transaction(txBody, 0), + snapshotTree) + + require.NoError(t, err) + require.NoError(t, output.Err) + require.Len(t, output.Logs, 1) + require.Equal(t, output.Logs[0], fmt.Sprintf( + "A.%s.EVM.EVMAddress(bytes: %s)", + chain.ServiceAddress(), + addrBytes.String(), + )) + }), + ) +} diff --git a/fvm/runtime/reusable_cadence_runtime.go b/fvm/runtime/reusable_cadence_runtime.go index 307d6959bb2..f2e5c941d77 100644 --- a/fvm/runtime/reusable_cadence_runtime.go +++ b/fvm/runtime/reusable_cadence_runtime.go @@ -28,15 +28,17 @@ var randomSourceFunctionType = &sema.FunctionType{ type ReusableCadenceRuntime struct { runtime.Runtime - runtime.Environment + TxRuntimeEnv runtime.Environment + ScriptRuntimeEnv runtime.Environment fvmEnv Environment } func NewReusableCadenceRuntime(rt runtime.Runtime, config runtime.Config) *ReusableCadenceRuntime { reusable := &ReusableCadenceRuntime{ - Runtime: rt, - Environment: runtime.NewBaseInterpreterEnvironment(config), + Runtime: rt, + TxRuntimeEnv: runtime.NewBaseInterpreterEnvironment(config), + ScriptRuntimeEnv: runtime.NewScriptInterpreterEnvironment(config), } // Declare the `randomSourceHistory` function. This function is **only** used by the @@ -78,7 +80,7 @@ func NewReusableCadenceRuntime(rt runtime.Runtime, config runtime.Config) *Reusa ), } - reusable.DeclareValue(blockRandomSource, nil) + reusable.TxRuntimeEnv.DeclareValue(blockRandomSource, nil) return reusable } @@ -99,7 +101,7 @@ func (reusable *ReusableCadenceRuntime) ReadStored( path, runtime.Context{ Interface: reusable.fvmEnv, - Environment: reusable.Environment, + Environment: reusable.TxRuntimeEnv, }, ) } @@ -120,7 +122,7 @@ func (reusable *ReusableCadenceRuntime) InvokeContractFunction( argumentTypes, runtime.Context{ Interface: reusable.fvmEnv, - Environment: reusable.Environment, + Environment: reusable.TxRuntimeEnv, }, ) } @@ -134,7 +136,7 @@ func (reusable *ReusableCadenceRuntime) NewTransactionExecutor( runtime.Context{ Interface: reusable.fvmEnv, Location: location, - Environment: reusable.Environment, + Environment: reusable.TxRuntimeEnv, }, ) } @@ -149,8 +151,9 @@ func (reusable *ReusableCadenceRuntime) ExecuteScript( return reusable.Runtime.ExecuteScript( script, runtime.Context{ - Interface: reusable.fvmEnv, - Location: location, + Interface: reusable.fvmEnv, + Location: location, + Environment: reusable.ScriptRuntimeEnv, }, ) } diff --git a/fvm/script.go b/fvm/script.go index 23d89027835..c310c73ba00 100644 --- a/fvm/script.go +++ b/fvm/script.go @@ -10,8 +10,10 @@ import ( "github.com/onflow/flow-go/fvm/environment" "github.com/onflow/flow-go/fvm/errors" + "github.com/onflow/flow-go/fvm/evm" "github.com/onflow/flow-go/fvm/storage" "github.com/onflow/flow-go/fvm/storage/logical" + "github.com/onflow/flow-go/fvm/systemcontracts" "github.com/onflow/flow-go/model/flow" "github.com/onflow/flow-go/model/hash" ) @@ -198,6 +200,21 @@ func (executor *scriptExecutor) executeScript() error { rt := executor.env.BorrowCadenceRuntime() defer executor.env.ReturnCadenceRuntime(rt) + if executor.ctx.EVMEnabled { + chain := executor.ctx.Chain + sc := systemcontracts.SystemContractsForChain(chain.ChainID()) + err := evm.SetupEnvironment( + chain.ChainID(), + executor.env, + rt.ScriptRuntimeEnv, + chain.ServiceAddress(), + sc.FlowToken.Address, + ) + if err != nil { + return err + } + } + value, err := rt.ExecuteScript( runtime.Script{ Source: executor.proc.Script, diff --git a/fvm/storage/snapshot/execution_snapshot.go b/fvm/storage/snapshot/execution_snapshot.go index 89cabec443a..420c4ffccb4 100644 --- a/fvm/storage/snapshot/execution_snapshot.go +++ b/fvm/storage/snapshot/execution_snapshot.go @@ -37,6 +37,11 @@ func (snapshot *ExecutionSnapshot) UpdatedRegisters() flow.RegisterEntries { return entries } +// UpdatedRegisterSet returns all registers that were updated by this view. +func (snapshot *ExecutionSnapshot) UpdatedRegisterSet() map[flow.RegisterID]flow.RegisterValue { + return snapshot.WriteSet +} + // UpdatedRegisterIDs returns all register ids that were updated by this // view. The returned ids are unsorted. func (snapshot *ExecutionSnapshot) UpdatedRegisterIDs() []flow.RegisterID { diff --git a/fvm/storage/state/execution_state_test.go b/fvm/storage/state/execution_state_test.go index 84184f1f4f7..d12a1f34b6c 100644 --- a/fvm/storage/state/execution_state_test.go +++ b/fvm/storage/state/execution_state_test.go @@ -39,7 +39,7 @@ func TestExecutionState_Finalize(t *testing.T) { require.Equal( t, map[flow.RegisterID]struct{}{ - readId: struct{}{}, + readId: {}, }, childSnapshot.ReadSet) diff --git a/fvm/systemcontracts/system_contracts.go b/fvm/systemcontracts/system_contracts.go index 40067b31ab8..192e5dd67cc 100644 --- a/fvm/systemcontracts/system_contracts.go +++ b/fvm/systemcontracts/system_contracts.go @@ -16,14 +16,16 @@ package systemcontracts import ( "fmt" + "github.com/onflow/flow-core-contracts/lib/go/templates" + "github.com/onflow/flow-go/model/flow" ) const ( - // Unqualified names of system smart contracts (not including address prefix) ContractNameEpoch = "FlowEpoch" + ContractNameIDTableStaking = "FlowIDTableStaking" ContractNameClusterQC = "FlowClusterQC" ContractNameDKG = "FlowDKG" ContractNameServiceAccount = "FlowServiceAccount" @@ -31,6 +33,12 @@ const ( ContractNameStorageFees = "FlowStorageFees" ContractNameNodeVersionBeacon = "NodeVersionBeacon" ContractNameRandomBeaconHistory = "RandomBeaconHistory" + ContractNameFungibleToken = "FungibleToken" + ContractNameFlowToken = "FlowToken" + ContractNameNonFungibleToken = "NonFungibleToken" + ContractNameMetadataViews = "MetadataViews" + ContractNameViewResolver = "ViewResolver" + ContractNameEVM = "EVM" // Unqualified names of service events (not including address prefix or contract name) @@ -47,6 +55,30 @@ const ( ContractStorageFeesFunction_calculateAccountCapacity = "calculateAccountCapacity" ContractStorageFeesFunction_getAccountsCapacityForTransactionStorageCheck = "getAccountsCapacityForTransactionStorageCheck" ContractStorageFeesFunction_defaultTokenAvailableBalance = "defaultTokenAvailableBalance" + + // Indexes of the system contracts that are deployed to an address at a specific index + + FungibleTokenAccountIndex = 2 + FlowTokenAccountIndex = 3 + FlowFeesAccountIndex = 4 + EVMAccountIndex = 5 +) + +// Well-known addresses for system contracts on long-running networks. +// For now, all system contracts tracked by this package are deployed to the same +// address (per chain) as the staking contract. +// +// Ref: https://docs.onflow.org/core-contracts/staking-contract-reference/ +var ( + // stakingContractAddressMainnet is the address of the FlowIDTableStaking contract on Mainnet + stakingContractAddressMainnet = flow.HexToAddress("8624b52f9ddcd04a") + // stakingContractAddressTestnet is the address of the FlowIDTableStaking contract on Testnet + stakingContractAddressTestnet = flow.HexToAddress("9eca2b38b18b5dfe") + + // nftTokenAddressTestnet is the address of the NonFungibleToken contract on Testnet + nftTokenAddressMainnet = flow.HexToAddress("1d7e57aa55817448") + // nftTokenAddressTestnet is the address of the NonFungibleToken contract on Testnet + nftTokenAddressTestnet = flow.HexToAddress("631e88ae7f1d7c20") ) // SystemContract represents a system contract on a particular chain. @@ -76,11 +108,84 @@ func (se ServiceEvent) EventType() flow.EventType { // SystemContracts is a container for all system contracts on a particular chain. type SystemContracts struct { - Epoch SystemContract - ClusterQC SystemContract - DKG SystemContract + // epoch related contracts + Epoch SystemContract + IDTableStaking SystemContract + ClusterQC SystemContract + DKG SystemContract + + // service account related contracts + FlowServiceAccount SystemContract NodeVersionBeacon SystemContract RandomBeaconHistory SystemContract + FlowStorageFees SystemContract + + // token related contracts + FlowFees SystemContract + FlowToken SystemContract + FungibleToken SystemContract + + // NFT related contracts + NonFungibleToken SystemContract + MetadataViews SystemContract + ViewResolver SystemContract + + // EVM related contracts + EVM SystemContract +} + +// AsTemplateEnv returns a template environment with all system contracts filled in. +// This is useful for generating Cadence code from templates. +func (c SystemContracts) AsTemplateEnv() templates.Environment { + return templates.Environment{ + EpochAddress: c.Epoch.Address.Hex(), + IDTableAddress: c.IDTableStaking.Address.Hex(), + QuorumCertificateAddress: c.ClusterQC.Address.Hex(), + DkgAddress: c.DKG.Address.Hex(), + + ServiceAccountAddress: c.FlowServiceAccount.Address.Hex(), + NodeVersionBeaconAddress: c.NodeVersionBeacon.Address.Hex(), + RandomBeaconHistoryAddress: c.RandomBeaconHistory.Address.Hex(), + StorageFeesAddress: c.FlowStorageFees.Address.Hex(), + + FlowFeesAddress: c.FlowFees.Address.Hex(), + FlowTokenAddress: c.FlowToken.Address.Hex(), + FungibleTokenAddress: c.FungibleToken.Address.Hex(), + + // The following contracts dont exist on the template env yet + // that is not a problem, but they are still listed here for completeness. + + // NonFungibleToken: c.NonFungibleToken.Address.Hex(), + // MetadataViews : c.MetadataViews.Address.Hex(), + // ViewResolver : c.ViewResolver.Address.Hex(), + + // EVMAddress: c.EVM.Address.Hex(), + } +} + +// All returns all system contracts as a slice. +func (c SystemContracts) All() []SystemContract { + return []SystemContract{ + c.Epoch, + c.IDTableStaking, + c.ClusterQC, + c.DKG, + + c.FlowServiceAccount, + c.NodeVersionBeacon, + c.RandomBeaconHistory, + c.FlowStorageFees, + + c.FlowFees, + c.FlowToken, + c.FungibleToken, + + c.NonFungibleToken, + c.MetadataViews, + c.ViewResolver, + + c.EVM, + } } // ServiceEvents is a container for all service events on a particular chain. @@ -100,134 +205,164 @@ func (se ServiceEvents) All() []ServiceEvent { } // SystemContractsForChain returns the system contract configuration for the given chain. -func SystemContractsForChain(chainID flow.ChainID) (*SystemContracts, error) { - addresses, ok := contractAddressesByChainID[chainID] +// Panics if the chain is unknown. +func SystemContractsForChain(chainID flow.ChainID) *SystemContracts { + contracts, ok := systemContractsForChain[chainID] if !ok { - return nil, fmt.Errorf("unknown chain id (%s)", chainID.String()) - } - - contracts := &SystemContracts{ - Epoch: SystemContract{ - Address: addresses[ContractNameEpoch], - Name: ContractNameEpoch, - }, - ClusterQC: SystemContract{ - Address: addresses[ContractNameClusterQC], - Name: ContractNameClusterQC, - }, - DKG: SystemContract{ - Address: addresses[ContractNameDKG], - Name: ContractNameDKG, - }, - NodeVersionBeacon: SystemContract{ - Address: addresses[ContractNameNodeVersionBeacon], - Name: ContractNameNodeVersionBeacon, - }, - RandomBeaconHistory: SystemContract{ - Address: addresses[ContractNameRandomBeaconHistory], - Name: ContractNameRandomBeaconHistory, - }, + // this is a panic, since it can only happen if the code is wrong + panic(fmt.Sprintf("unknown chain: %s", chainID)) } - - return contracts, nil + return contracts } +var systemContractsForChain = map[flow.ChainID]*SystemContracts{} + // ServiceEventsForChain returns the service event confirmation for the given chain. -func ServiceEventsForChain(chainID flow.ChainID) (*ServiceEvents, error) { - addresses, ok := contractAddressesByChainID[chainID] +// Panics if the chain is unknown. +func ServiceEventsForChain(chainID flow.ChainID) *ServiceEvents { + events, ok := serviceEventsForChain[chainID] if !ok { - return nil, fmt.Errorf("unknown chain id (%s)", chainID.String()) + // this is a panic, since it can only happen if the code is wrong + panic(fmt.Sprintf("unknown chain: %s", chainID)) } + return events +} + +var serviceEventsForChain = map[flow.ChainID]*ServiceEvents{} + +var contractAddressFunc = map[string]func(id flow.ChainID) flow.Address{} + +func init() { - events := &ServiceEvents{ - EpochSetup: ServiceEvent{ - Address: addresses[ContractNameEpoch], - ContractName: ContractNameEpoch, - Name: EventNameEpochSetup, - }, - EpochCommit: ServiceEvent{ - Address: addresses[ContractNameEpoch], - ContractName: ContractNameEpoch, - Name: EventNameEpochCommit, - }, - VersionBeacon: ServiceEvent{ - Address: addresses[ContractNameNodeVersionBeacon], - ContractName: ContractNameNodeVersionBeacon, - Name: EventNameVersionBeacon, - }, + serviceAddressFunc := func(chain flow.ChainID) flow.Address { + return chain.Chain().ServiceAddress() } - return events, nil -} + // epoch contracts are deployed on a separate account on mainnet and testnet + epochAddressFunc := func(chain flow.ChainID) flow.Address { + switch chain { + case flow.Mainnet: + return stakingContractAddressMainnet + case flow.Testnet: + return stakingContractAddressTestnet + default: + return chain.Chain().ServiceAddress() + } + } -// contractAddressesByChainID stores the default system smart contract -// addresses for each chain. -var contractAddressesByChainID map[flow.ChainID]map[string]flow.Address + // some contracts are always at an address with a a predetermined index + nthAddressFunc := func(index uint64) func(chain flow.ChainID) flow.Address { + return func(chain flow.ChainID) flow.Address { + address, err := chain.Chain().AddressAtIndex(index) + if err != nil { + // this can only happen if the code is wrong + panic(fmt.Sprintf("failed to get %d address: %v", FlowFeesAccountIndex, err)) + } + return address + } + } -// Well-known addresses for system contracts on long-running networks. -// For now, all system contracts tracked by this package are deployed to the same -// address (per chain) as the staking contract. -// -// Ref: https://docs.onflow.org/core-contracts/staking-contract-reference/ -var ( - // stakingContractAddressMainnet is the address of the FlowIDTableStaking contract on Mainnet - stakingContractAddressMainnet = flow.HexToAddress("8624b52f9ddcd04a") - // stakingContractAddressTestnet is the address of the FlowIDTableStaking contract on Testnet - stakingContractAddressTestnet = flow.HexToAddress("9eca2b38b18b5dfe") + nftTokenAddressFunc := func(chain flow.ChainID) flow.Address { + switch chain { + case flow.Mainnet: + return nftTokenAddressMainnet + case flow.Testnet: + return nftTokenAddressTestnet + default: + return chain.Chain().ServiceAddress() + } + } - serviceAddressMainnet = flow.Mainnet.Chain().ServiceAddress() - serviceAddressTestnet = flow.Testnet.Chain().ServiceAddress() - serviceAddressSandboxnet = flow.Sandboxnet.Chain().ServiceAddress() - serviceAddressEmulator = flow.Emulator.Chain().ServiceAddress() -) + contractAddressFunc = map[string]func(id flow.ChainID) flow.Address{ + ContractNameIDTableStaking: epochAddressFunc, + ContractNameEpoch: epochAddressFunc, + ContractNameClusterQC: epochAddressFunc, + ContractNameDKG: epochAddressFunc, -func init() { - contractAddressesByChainID = make(map[flow.ChainID]map[string]flow.Address) - - // Main Flow network - // All system contracts are deployed to the account of the staking contract - mainnet := map[string]flow.Address{ - ContractNameEpoch: stakingContractAddressMainnet, - ContractNameClusterQC: stakingContractAddressMainnet, - ContractNameDKG: stakingContractAddressMainnet, - ContractNameNodeVersionBeacon: serviceAddressMainnet, - ContractNameRandomBeaconHistory: serviceAddressMainnet, + ContractNameNodeVersionBeacon: serviceAddressFunc, + ContractNameRandomBeaconHistory: serviceAddressFunc, + ContractNameServiceAccount: serviceAddressFunc, + ContractNameStorageFees: serviceAddressFunc, + + ContractNameFlowFees: nthAddressFunc(FlowFeesAccountIndex), + ContractNameFungibleToken: nthAddressFunc(FungibleTokenAccountIndex), + ContractNameFlowToken: nthAddressFunc(FlowTokenAccountIndex), + + ContractNameNonFungibleToken: nftTokenAddressFunc, + ContractNameMetadataViews: nftTokenAddressFunc, + ContractNameViewResolver: nftTokenAddressFunc, + + ContractNameEVM: nthAddressFunc(EVMAccountIndex), } - contractAddressesByChainID[flow.Mainnet] = mainnet - - // Long-lived test networks - // All system contracts are deployed to the account of the staking contract - testnet := map[string]flow.Address{ - ContractNameEpoch: stakingContractAddressTestnet, - ContractNameClusterQC: stakingContractAddressTestnet, - ContractNameDKG: stakingContractAddressTestnet, - ContractNameNodeVersionBeacon: serviceAddressTestnet, - ContractNameRandomBeaconHistory: serviceAddressTestnet, + + getSystemContractsForChain := func(chainID flow.ChainID) *SystemContracts { + + contract := func(name string) SystemContract { + addressFunc, ok := contractAddressFunc[name] + if !ok { + // this is a panic, since it can only happen if the code is wrong + panic(fmt.Sprintf("unknown system contract name: %s", name)) + } + + return SystemContract{ + Address: addressFunc(chainID), + Name: name, + } + } + + contracts := &SystemContracts{ + Epoch: contract(ContractNameEpoch), + IDTableStaking: contract(ContractNameIDTableStaking), + ClusterQC: contract(ContractNameClusterQC), + DKG: contract(ContractNameDKG), + + FlowServiceAccount: contract(ContractNameServiceAccount), + NodeVersionBeacon: contract(ContractNameNodeVersionBeacon), + RandomBeaconHistory: contract(ContractNameRandomBeaconHistory), + FlowStorageFees: contract(ContractNameStorageFees), + + FlowFees: contract(ContractNameFlowFees), + FlowToken: contract(ContractNameFlowToken), + FungibleToken: contract(ContractNameFungibleToken), + + NonFungibleToken: contract(ContractNameNonFungibleToken), + MetadataViews: contract(ContractNameMetadataViews), + ViewResolver: contract(ContractNameViewResolver), + + EVM: contract(ContractNameEVM), + } + + return contracts } - contractAddressesByChainID[flow.Testnet] = testnet - - // Sandboxnet test network - // All system contracts are deployed to the service account - sandboxnet := map[string]flow.Address{ - ContractNameEpoch: serviceAddressSandboxnet, - ContractNameClusterQC: serviceAddressSandboxnet, - ContractNameDKG: serviceAddressSandboxnet, - ContractNameNodeVersionBeacon: serviceAddressSandboxnet, - ContractNameRandomBeaconHistory: serviceAddressSandboxnet, + + getServiceEventsForChain := func(chainID flow.ChainID) *ServiceEvents { + + event := func(contractName, eventName string) ServiceEvent { + addressFunc, ok := contractAddressFunc[contractName] + if !ok { + // this is a panic, since it can only happen if the code is wrong + panic(fmt.Sprintf("unknown system contract name: %s", contractName)) + } + + return ServiceEvent{ + Address: addressFunc(chainID), + ContractName: contractName, + Name: eventName, + } + } + + events := &ServiceEvents{ + EpochSetup: event(ContractNameEpoch, EventNameEpochSetup), + EpochCommit: event(ContractNameEpoch, EventNameEpochCommit), + VersionBeacon: event(ContractNameNodeVersionBeacon, EventNameVersionBeacon), + } + + return events } - contractAddressesByChainID[flow.Sandboxnet] = sandboxnet - - // Transient test networks - // All system contracts are deployed to the service account - transient := map[string]flow.Address{ - ContractNameEpoch: serviceAddressEmulator, - ContractNameClusterQC: serviceAddressEmulator, - ContractNameDKG: serviceAddressEmulator, - ContractNameNodeVersionBeacon: serviceAddressEmulator, - ContractNameRandomBeaconHistory: serviceAddressEmulator, + + // pre-populate the system contracts and service events for all chains for fast access + for _, chain := range flow.AllChainIDs() { + serviceEventsForChain[chain] = getServiceEventsForChain(chain) + systemContractsForChain[chain] = getSystemContractsForChain(chain) } - contractAddressesByChainID[flow.Emulator] = transient - contractAddressesByChainID[flow.Localnet] = transient - contractAddressesByChainID[flow.BftTestnet] = transient - contractAddressesByChainID[flow.Benchnet] = transient } diff --git a/fvm/systemcontracts/system_contracts_test.go b/fvm/systemcontracts/system_contracts_test.go index bae3308aac0..8352d1a049c 100644 --- a/fvm/systemcontracts/system_contracts_test.go +++ b/fvm/systemcontracts/system_contracts_test.go @@ -13,18 +13,10 @@ import ( // TestSystemContract_Address tests that we can retrieve a canonical address // for all accepted chains and contracts. func TestSystemContracts(t *testing.T) { - chains := []flow.ChainID{ - flow.Mainnet, - flow.Testnet, - flow.Sandboxnet, - flow.Benchnet, - flow.Localnet, - flow.Emulator, - } + chains := flow.AllChainIDs() for _, chain := range chains { - _, err := SystemContractsForChain(chain) - require.NoError(t, err) + require.NotPanics(t, func() { SystemContractsForChain(chain) }) checkSystemContracts(t, chain) } } @@ -34,47 +26,30 @@ func TestSystemContracts(t *testing.T) { func TestSystemContract_InvalidChainID(t *testing.T) { invalidChain := flow.ChainID("invalid-chain") - _, err := SystemContractsForChain(invalidChain) - assert.Error(t, err) + require.Panics(t, func() { SystemContractsForChain(invalidChain) }) } // TestServiceEvents tests that we can retrieve service events for all accepted // chains and contracts. func TestServiceEvents(t *testing.T) { - chains := []flow.ChainID{ - flow.Mainnet, - flow.Testnet, - flow.Sandboxnet, - flow.Benchnet, - flow.Localnet, - flow.Emulator, - } + chains := flow.AllChainIDs() for _, chain := range chains { - _, err := ServiceEventsForChain(chain) + require.NotPanics(t, func() { ServiceEventsForChain(chain) }) checkServiceEvents(t, chain) - require.NoError(t, err) } } // TestServiceEventLookup_Consistency sanity checks consistency of the lookup // method, in case an update to ServiceEvents forgets to update the lookup. func TestServiceEventAll_Consistency(t *testing.T) { - chains := []flow.ChainID{ - flow.Mainnet, - flow.Testnet, - flow.Sandboxnet, - flow.Benchnet, - flow.Localnet, - flow.Emulator, - } + chains := flow.AllChainIDs() fields := reflect.TypeOf(ServiceEvents{}).NumField() for _, chain := range chains { - events, err := ServiceEventsForChain(chain) - require.NoError(t, err) + events := ServiceEventsForChain(chain) - // ensure all events are returns + // ensure all events are present all := events.All() assert.Equal(t, fields, len(all)) } @@ -85,39 +60,42 @@ func TestServiceEventAll_Consistency(t *testing.T) { func TestServiceEvents_InvalidChainID(t *testing.T) { invalidChain := flow.ChainID("invalid-chain") - _, err := ServiceEventsForChain(invalidChain) - assert.Error(t, err) + require.Panics(t, func() { ServiceEventsForChain(invalidChain) }) } func checkSystemContracts(t *testing.T, chainID flow.ChainID) { - contracts, err := SystemContractsForChain(chainID) - require.NoError(t, err) + contracts := SystemContractsForChain(chainID) - addresses, ok := contractAddressesByChainID[chainID] - require.True(t, ok, "missing chain %s", chainID.String()) + address := func(name string) flow.Address { + f, ok := contractAddressFunc[name] + require.True(t, ok, "missing contract %s for chain %s", name, chainID.String()) + return f(chainID) + } // entries may not be empty - assert.NotEqual(t, flow.EmptyAddress, addresses[ContractNameEpoch]) - assert.NotEqual(t, flow.EmptyAddress, addresses[ContractNameClusterQC]) - assert.NotEqual(t, flow.EmptyAddress, addresses[ContractNameDKG]) - assert.NotEqual(t, flow.EmptyAddress, addresses[ContractNameNodeVersionBeacon]) + assert.NotEqual(t, flow.EmptyAddress, address(ContractNameEpoch)) + assert.NotEqual(t, flow.EmptyAddress, address(ContractNameClusterQC)) + assert.NotEqual(t, flow.EmptyAddress, address(ContractNameDKG)) + assert.NotEqual(t, flow.EmptyAddress, address(ContractNameNodeVersionBeacon)) // entries must match internal mapping - assert.Equal(t, addresses[ContractNameEpoch], contracts.Epoch.Address) - assert.Equal(t, addresses[ContractNameClusterQC], contracts.ClusterQC.Address) - assert.Equal(t, addresses[ContractNameDKG], contracts.DKG.Address) - assert.Equal(t, addresses[ContractNameNodeVersionBeacon], contracts.NodeVersionBeacon.Address) + assert.Equal(t, address(ContractNameEpoch), contracts.Epoch.Address) + assert.Equal(t, address(ContractNameClusterQC), contracts.ClusterQC.Address) + assert.Equal(t, address(ContractNameDKG), contracts.DKG.Address) + assert.Equal(t, address(ContractNameNodeVersionBeacon), contracts.NodeVersionBeacon.Address) } func checkServiceEvents(t *testing.T, chainID flow.ChainID) { - events, err := ServiceEventsForChain(chainID) - require.NoError(t, err) + events := ServiceEventsForChain(chainID) - addresses, ok := contractAddressesByChainID[chainID] - require.True(t, ok, "missing chain %w", chainID.String()) + address := func(name string) flow.Address { + f, ok := contractAddressFunc[name] + require.True(t, ok, "missing contract %s for chain %s", name, chainID.String()) + return f(chainID) + } - epochContractAddr := addresses[ContractNameEpoch] - versionContractAddr := addresses[ContractNameNodeVersionBeacon] + epochContractAddr := address(ContractNameEpoch) + versionContractAddr := address(ContractNameNodeVersionBeacon) // entries may not be empty assert.NotEqual(t, flow.EmptyAddress, epochContractAddr) assert.NotEqual(t, flow.EmptyAddress, versionContractAddr) diff --git a/fvm/transactionInvoker.go b/fvm/transactionInvoker.go index 68f3861c849..acd4088c4bd 100644 --- a/fvm/transactionInvoker.go +++ b/fvm/transactionInvoker.go @@ -12,11 +12,13 @@ import ( "github.com/onflow/flow-go/fvm/environment" "github.com/onflow/flow-go/fvm/errors" + "github.com/onflow/flow-go/fvm/evm" reusableRuntime "github.com/onflow/flow-go/fvm/runtime" "github.com/onflow/flow-go/fvm/storage" "github.com/onflow/flow-go/fvm/storage/derived" "github.com/onflow/flow-go/fvm/storage/snapshot" "github.com/onflow/flow-go/fvm/storage/state" + "github.com/onflow/flow-go/fvm/systemcontracts" "github.com/onflow/flow-go/module/trace" ) @@ -180,6 +182,22 @@ func (executor *transactionExecutor) preprocess() error { // infrequently modified and are expensive to compute. For now this includes // reading meter parameter overrides and parsing programs. func (executor *transactionExecutor) preprocessTransactionBody() error { + // setup evm + if executor.ctx.EVMEnabled { + chain := executor.ctx.Chain + sc := systemcontracts.SystemContractsForChain(chain.ChainID()) + err := evm.SetupEnvironment( + chain.ChainID(), + executor.env, + executor.cadenceRuntime.TxRuntimeEnv, + chain.ServiceAddress(), + sc.FlowToken.Address, + ) + if err != nil { + return err + } + } + meterParams, err := getBodyMeterParameters( executor.ctx, executor.proc, @@ -224,6 +242,22 @@ func (executor *transactionExecutor) execute() error { } func (executor *transactionExecutor) ExecuteTransactionBody() error { + // setup evm + if executor.ctx.EVMEnabled { + chain := executor.ctx.Chain + sc := systemcontracts.SystemContractsForChain(chain.ChainID()) + err := evm.SetupEnvironment( + chain.ChainID(), + executor.env, + executor.cadenceRuntime.TxRuntimeEnv, + chain.ServiceAddress(), + sc.FlowToken.Address, + ) + if err != nil { + return err + } + } + var invalidator derived.TransactionInvalidator if !executor.errs.CollectedError() { diff --git a/go.mod b/go.mod index 5480b46d6e8..6bc2aa92684 100644 --- a/go.mod +++ b/go.mod @@ -56,8 +56,8 @@ require ( github.com/onflow/flow-core-contracts/lib/go/contracts v0.14.0 github.com/onflow/flow-core-contracts/lib/go/templates v0.14.0 github.com/onflow/flow-go-sdk v0.41.16 - github.com/onflow/flow-go/crypto v0.24.9 - github.com/onflow/flow/protobuf/go/flow v0.3.2-0.20231110212518-071176bb06b8 + github.com/onflow/flow-go/crypto v0.25.0 + github.com/onflow/flow/protobuf/go/flow v0.3.2-0.20231207123230-136eab6aadf9 github.com/onflow/go-bitswap v0.0.0-20230703214630-6d3db958c73d github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58 github.com/pierrec/lz4 v2.6.1+incompatible diff --git a/go.sum b/go.sum index 22bd56ca024..34f2ec3f24b 100644 --- a/go.sum +++ b/go.sum @@ -1339,13 +1339,13 @@ github.com/onflow/flow-go-sdk v0.24.0/go.mod h1:IoptMLPyFXWvyd9yYA6/4EmSeeozl6nJ github.com/onflow/flow-go-sdk v0.41.16 h1:HsmHwEVmj+iK+GszHbFseHh7Ii5W3PWOIRNAH/En08Q= github.com/onflow/flow-go-sdk v0.41.16/go.mod h1:bVrVNoJKiwB6vW5Qbm5tFAfJBQ5we4uSQWnn9gNAFhQ= github.com/onflow/flow-go/crypto v0.21.3/go.mod h1:vI6V4CY3R6c4JKBxdcRiR/AnjBfL8OSD97bJc60cLuQ= -github.com/onflow/flow-go/crypto v0.24.9 h1:0EQp+kSZYJepMIiSypfJVe7tzsPcb6UXOdOtsTCDhBs= -github.com/onflow/flow-go/crypto v0.24.9/go.mod h1:fqCzkIBBMRRkciVrvW21rECKq1oD7Q6u+bCI78lfNX0= +github.com/onflow/flow-go/crypto v0.25.0 h1:6lmoiAQ3APCF+nV7f4f2AXL3PuDKqQiWqRJXmjrMEq4= +github.com/onflow/flow-go/crypto v0.25.0/go.mod h1:OOb2vYcS8AOCajBClhHTJ0NKftFl1RQgTQ0+Vh4nbqk= github.com/onflow/flow-nft/lib/go/contracts v1.1.0 h1:rhUDeD27jhLwOqQKI/23008CYfnqXErrJvc4EFRP2a0= github.com/onflow/flow-nft/lib/go/contracts v1.1.0/go.mod h1:YsvzYng4htDgRB9sa9jxdwoTuuhjK8WYWXTyLkIigZY= github.com/onflow/flow/protobuf/go/flow v0.2.2/go.mod h1:gQxYqCfkI8lpnKsmIjwtN2mV/N2PIwc1I+RUK4HPIc8= -github.com/onflow/flow/protobuf/go/flow v0.3.2-0.20231110212518-071176bb06b8 h1:AsIyEDiwxpRAifgBK/0lsjEdNfqFtHqNHedpMeHoA4w= -github.com/onflow/flow/protobuf/go/flow v0.3.2-0.20231110212518-071176bb06b8/go.mod h1:NA2pX2nw8zuaxfKphhKsk00kWLwfd+tv8mS23YXO4Sk= +github.com/onflow/flow/protobuf/go/flow v0.3.2-0.20231207123230-136eab6aadf9 h1:EjTkQnXvoH/yQLoGxVRZObuE8oxcanEu+IxwBwb/gSQ= +github.com/onflow/flow/protobuf/go/flow v0.3.2-0.20231207123230-136eab6aadf9/go.mod h1:NA2pX2nw8zuaxfKphhKsk00kWLwfd+tv8mS23YXO4Sk= github.com/onflow/go-bitswap v0.0.0-20230703214630-6d3db958c73d h1:QcOAeEyF3iAUHv21LQ12sdcsr0yFrJGoGLyCAzYYtvI= github.com/onflow/go-bitswap v0.0.0-20230703214630-6d3db958c73d/go.mod h1:GCPpiyRoHncdqPj++zPr9ZOYBX4hpJ0pYZRYqSE8VKk= github.com/onflow/sdks v0.5.0 h1:2HCRibwqDaQ1c9oUApnkZtEAhWiNY2GTpRD5+ftdkN8= @@ -1612,7 +1612,6 @@ github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXl github.com/subosito/gotenv v1.4.2 h1:X1TuBLAMDFbaTAChgCBLu3DU3UPyELpnF2jjJ2cz/S8= github.com/subosito/gotenv v1.4.2/go.mod h1:ayKnFf/c6rvx/2iiLrJUk1e6plDbT3edrFNGqEflhK0= github.com/supranational/blst v0.3.4/go.mod h1:jZJtfjgudtNl4en1tzwPIV3KjUnQUvG3/j+w+fVonLw= -github.com/supranational/blst v0.3.11-0.20230406105308-e9dfc5ee724b h1:u49mjRnygnB34h8OKbnNJFVUtWSKIKb1KukdV8bILUM= github.com/syndtr/goleveldb v1.0.0/go.mod h1:ZVVdQEZoIme9iO1Ch2Jdy24qqXrMMOU6lpPAyBWyWuQ= github.com/syndtr/goleveldb v1.0.1-0.20190923125748-758128399b1d/go.mod h1:9OrXJhf154huy1nPWmuSrkgjPUtUNhA+Zmy+6AESzuA= github.com/syndtr/goleveldb v1.0.1-0.20210819022825-2ae1ddf74ef7 h1:epCh84lMvA70Z7CTTCmYQn2CKbY8j86K7/FAIr141uY= diff --git a/insecure/Makefile b/insecure/Makefile index 72a38cf4b4d..f38a03381b3 100644 --- a/insecure/Makefile +++ b/insecure/Makefile @@ -8,7 +8,26 @@ else RACE_FLAG := endif +include ../crypto_adx_flag.mk + +CGO_FLAG := CGO_CFLAGS=$(CRYPTO_FLAG) + # runs all unit tests of the insecure module .PHONY: test test: - go test $(if $(VERBOSE),-v,) -coverprofile=$(COVER_PROFILE) $(RACE_FLAG) $(if $(JSON_OUTPUT),-json,) $(if $(NUM_RUNS),-count $(NUM_RUNS),) --tags relic ./... + $(CGO_FLAG) go test $(if $(VERBOSE),-v,) -coverprofile=$(COVER_PROFILE) $(RACE_FLAG) $(if $(JSON_OUTPUT),-json,) $(if $(NUM_RUNS),-count $(NUM_RUNS),) ./... + +.PHONY: lint +lint: tidy + # revive -config revive.toml -exclude storage/ledger/trie ./... + golangci-lint run -v + +# this ensures there is no unused dependency being added by accident +.PHONY: tidy +tidy: + go mod tidy -v + cd integration; go mod tidy -v + cd crypto; go mod tidy -v + cd cmd/testclient; go mod tidy -v + cd insecure; go mod tidy -v + git diff --exit-code diff --git a/insecure/corruptlibp2p/fixtures.go b/insecure/corruptlibp2p/fixtures.go index 3be85acc05e..5821e7cd03d 100644 --- a/insecure/corruptlibp2p/fixtures.go +++ b/insecure/corruptlibp2p/fixtures.go @@ -7,110 +7,8 @@ import ( corrupt "github.com/yhassanzadeh13/go-libp2p-pubsub" "github.com/onflow/flow-go/network/p2p" - "github.com/onflow/flow-go/utils/unittest" ) -const ( - // topicIDFixtureLen is the length of the topic ID fixture for testing. - topicIDFixtureLen = 10 - // messageIDFixtureLen is the length of the message ID fixture for testing. - messageIDFixtureLen = 10 -) - -type GossipSubCtrlOption func(*pubsubpb.ControlMessage) - -// GossipSubCtrlFixture returns a ControlMessage with the given options. -func GossipSubCtrlFixture(opts ...GossipSubCtrlOption) *pubsubpb.ControlMessage { - msg := &pubsubpb.ControlMessage{} - for _, opt := range opts { - opt(msg) - } - return msg -} - -// WithIHave adds iHave control messages of the given size and number to the control message. -func WithIHave(msgCount, msgIDCount int, topicId string) GossipSubCtrlOption { - return func(msg *pubsubpb.ControlMessage) { - iHaves := make([]*pubsubpb.ControlIHave, msgCount) - for i := 0; i < msgCount; i++ { - iHaves[i] = &pubsubpb.ControlIHave{ - TopicID: &topicId, - MessageIDs: GossipSubMessageIdsFixture(msgIDCount), - } - } - msg.Ihave = iHaves - } -} - -// WithIWant adds iWant control messages of the given size and number to the control message. -// The message IDs are generated randomly. -// Args: -// -// msgCount: number of iWant messages to add. -// msgIdsPerIWant: number of message IDs to add to each iWant message. -// -// Returns: -// A GossipSubCtrlOption that adds iWant messages to the control message. -// Example: WithIWant(2, 3) will add 2 iWant messages, each with 3 message IDs. -func WithIWant(iWantCount int, msgIdsPerIWant int) GossipSubCtrlOption { - return func(msg *pubsubpb.ControlMessage) { - iWants := make([]*pubsubpb.ControlIWant, iWantCount) - for i := 0; i < iWantCount; i++ { - iWants[i] = &pubsubpb.ControlIWant{ - MessageIDs: GossipSubMessageIdsFixture(msgIdsPerIWant), - } - } - msg.Iwant = iWants - } -} - -// WithGraft adds GRAFT control messages with given topicID to the control message. -func WithGraft(msgCount int, topicId string) GossipSubCtrlOption { - return func(msg *pubsubpb.ControlMessage) { - grafts := make([]*pubsubpb.ControlGraft, msgCount) - for i := 0; i < msgCount; i++ { - grafts[i] = &pubsubpb.ControlGraft{ - TopicID: &topicId, - } - } - msg.Graft = grafts - } -} - -// WithPrune adds PRUNE control messages with given topicID to the control message. -func WithPrune(msgCount int, topicId string) GossipSubCtrlOption { - return func(msg *pubsubpb.ControlMessage) { - prunes := make([]*pubsubpb.ControlPrune, msgCount) - for i := 0; i < msgCount; i++ { - prunes[i] = &pubsubpb.ControlPrune{ - TopicID: &topicId, - } - } - msg.Prune = prunes - } -} - -// gossipSubMessageIdFixture returns a random gossipSub message ID. -func gossipSubMessageIdFixture() string { - // TODO: messageID length should be a parameter. - return unittest.GenerateRandomStringWithLen(messageIDFixtureLen) -} - -// GossipSubTopicIdFixture returns a random gossipSub topic ID. -func GossipSubTopicIdFixture() string { - // TODO: topicID length should be a parameter. - return unittest.GenerateRandomStringWithLen(topicIDFixtureLen) -} - -// GossipSubMessageIdsFixture returns a slice of random gossipSub message IDs of the given size. -func GossipSubMessageIdsFixture(count int) []string { - msgIds := make([]string, count) - for i := 0; i < count; i++ { - msgIds[i] = gossipSubMessageIdFixture() - } - return msgIds -} - // CorruptInspectorFunc wraps a normal RPC inspector with a corrupt inspector func by translating corrupt.RPC -> pubsubpb.RPC // before calling Inspect func. func CorruptInspectorFunc(inspector p2p.GossipSubRPCInspector) func(id peer.ID, rpc *corrupt.RPC) error { diff --git a/insecure/corruptlibp2p/gossipsub_spammer.go b/insecure/corruptlibp2p/gossipsub_spammer.go index f169183b2b8..94e936c9b87 100644 --- a/insecure/corruptlibp2p/gossipsub_spammer.go +++ b/insecure/corruptlibp2p/gossipsub_spammer.go @@ -51,7 +51,11 @@ func NewGossipSubRouterSpammer(t *testing.T, sporkId flow.Identifier, role flow. // - inspector: the RPC inspector. // Returns: // - the GossipSubRouterSpammer. -func NewGossipSubRouterSpammerWithRpcInspector(t *testing.T, sporkId flow.Identifier, role flow.Role, provider module.IdentityProvider, inspector func(id peer.ID, rpc *corrupt.RPC) error) *GossipSubRouterSpammer { +func NewGossipSubRouterSpammerWithRpcInspector(t *testing.T, + sporkId flow.Identifier, + role flow.Role, + provider module.IdentityProvider, + inspector func(id peer.ID, rpc *corrupt.RPC) error) *GossipSubRouterSpammer { spammerNode, spammerId, router := newSpammerNodeWithRpcInspector(t, sporkId, role, provider, inspector) return &GossipSubRouterSpammer{ router: router, @@ -70,10 +74,10 @@ func (s *GossipSubRouterSpammer) SpamControlMessage(t *testing.T, victim p2p.Lib // GenerateCtlMessages generates control messages before they are sent so the test can prepare // to expect receiving them before they are sent by the spammer. -func (s *GossipSubRouterSpammer) GenerateCtlMessages(msgCount int, opts ...GossipSubCtrlOption) []pb.ControlMessage { +func (s *GossipSubRouterSpammer) GenerateCtlMessages(msgCount int, opts ...p2ptest.GossipSubCtrlOption) []pb.ControlMessage { var ctlMgs []pb.ControlMessage for i := 0; i < msgCount; i++ { - ctlMsg := GossipSubCtrlFixture(opts...) + ctlMsg := p2ptest.GossipSubCtrlFixture(opts...) ctlMgs = append(ctlMgs, *ctlMsg) } return ctlMgs diff --git a/insecure/corruptlibp2p/libp2p_node_factory.go b/insecure/corruptlibp2p/libp2p_node_factory.go index 69c7b0cfba9..b3ad94599ef 100644 --- a/insecure/corruptlibp2p/libp2p_node_factory.go +++ b/insecure/corruptlibp2p/libp2p_node_factory.go @@ -91,7 +91,6 @@ func InitCorruptLibp2pNode( connGaterCfg, peerManagerCfg, &netConfig.GossipSubConfig, - &netConfig.GossipSubRPCInspectorsConfig, &netConfig.ResourceManager, uniCfg, &netConfig.ConnectionManagerConfig, diff --git a/insecure/corruptlibp2p/spam_test.go b/insecure/corruptlibp2p/spam_test.go index 1c48d9feca5..bc901cbc116 100644 --- a/insecure/corruptlibp2p/spam_test.go +++ b/insecure/corruptlibp2p/spam_test.go @@ -77,7 +77,7 @@ func TestSpam_IHave(t *testing.T) { }) // prepare to spam - generate iHAVE control messages - iHaveSentCtlMsgs := gsrSpammer.GenerateCtlMessages(messagesToSpam, corruptlibp2p.WithIHave(messagesToSpam, 5, fmt.Sprintf("%s/%s", channels.PushBlocks, sporkId))) + iHaveSentCtlMsgs := gsrSpammer.GenerateCtlMessages(messagesToSpam, p2ptest.WithIHave(messagesToSpam, 5, fmt.Sprintf("%s/%s", channels.PushBlocks, sporkId))) // start spamming the victim peer gsrSpammer.SpamControlMessage(t, victimNode, iHaveSentCtlMsgs) diff --git a/insecure/dependency_test.go b/insecure/dependency_test.go new file mode 100644 index 00000000000..a2375847be9 --- /dev/null +++ b/insecure/dependency_test.go @@ -0,0 +1,8 @@ +package insecure + +import "github.com/btcsuite/btcd/chaincfg/chainhash" + +// this is added to resolve the issue with chainhash ambiguous import, +// the code is not used, but it's needed to force go.mod specify and retain chainhash version +// workaround for issue: https://github.com/golang/go/issues/27899 +var _ = chainhash.Hash{} diff --git a/insecure/go.mod b/insecure/go.mod index c2485e59c9c..563107e28de 100644 --- a/insecure/go.mod +++ b/insecure/go.mod @@ -3,30 +3,33 @@ module github.com/onflow/flow-go/insecure go 1.20 require ( + github.com/btcsuite/btcd/chaincfg/chainhash v1.0.2 github.com/golang/protobuf v1.5.3 github.com/hashicorp/go-multierror v1.1.1 github.com/ipfs/go-datastore v0.6.0 github.com/libp2p/go-libp2p v0.28.1 github.com/libp2p/go-libp2p-pubsub v0.9.3 github.com/multiformats/go-multiaddr-dns v0.3.1 - github.com/onflow/flow-go v0.31.1-0.20230718164039-e3411eff1e9d - github.com/onflow/flow-go/crypto v0.24.9 + github.com/onflow/flow-go v0.32.4-0.20231130134727-3c01c7f8966c + github.com/onflow/flow-go/crypto v0.25.0 github.com/rs/zerolog v1.29.0 github.com/spf13/pflag v1.0.5 github.com/stretchr/testify v1.8.4 github.com/yhassanzadeh13/go-libp2p-pubsub v0.6.11-flow-expose-msg.0.20230703223453-544e2fe28a26 go.uber.org/atomic v1.11.0 - google.golang.org/grpc v1.58.3 + google.golang.org/grpc v1.59.0 google.golang.org/protobuf v1.31.0 ) require ( - cloud.google.com/go v0.110.4 // indirect - cloud.google.com/go/compute v1.21.0 // indirect + cloud.google.com/go v0.110.7 // indirect + cloud.google.com/go/compute v1.23.0 // indirect cloud.google.com/go/compute/metadata v0.2.3 // indirect cloud.google.com/go/iam v1.1.1 // indirect cloud.google.com/go/storage v1.30.1 // indirect github.com/DataDog/zstd v1.5.2 // indirect + github.com/StackExchange/wmi v0.0.0-20180116203802-5d049714c4a6 // indirect + github.com/VictoriaMetrics/fastcache v1.6.0 // indirect github.com/aws/aws-sdk-go-v2 v1.17.7 // indirect github.com/aws/aws-sdk-go-v2/config v1.18.19 // indirect github.com/aws/aws-sdk-go-v2/credentials v1.13.18 // indirect @@ -61,6 +64,7 @@ require ( github.com/cskr/pubsub v1.0.2 // indirect github.com/davecgh/go-spew v1.1.1 // indirect github.com/davidlazar/go-crypto v0.0.0-20200604182044-b73af7476f6c // indirect + github.com/deckarep/golang-set/v2 v2.1.0 // indirect github.com/decred/dcrd/dcrec/secp256k1/v4 v4.2.0 // indirect github.com/desertbit/timer v0.0.0-20180107155436-c41aec40b27f // indirect github.com/dgraph-io/badger/v2 v2.2007.4 // indirect @@ -89,30 +93,32 @@ require ( github.com/go-playground/locales v0.14.1 // indirect github.com/go-playground/universal-translator v0.18.1 // indirect github.com/go-playground/validator/v10 v10.14.1 // indirect + github.com/go-stack/stack v1.8.1 // indirect github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 // indirect github.com/godbus/dbus/v5 v5.1.0 // indirect + github.com/gofrs/flock v0.8.1 // indirect github.com/gogo/protobuf v1.3.2 // indirect - github.com/golang/glog v1.1.0 // indirect + github.com/golang/glog v1.1.2 // indirect github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect github.com/golang/mock v1.6.0 // indirect github.com/golang/snappy v0.0.5-0.20220116011046-fa5810519dcb // indirect - github.com/google/go-cmp v0.5.9 // indirect github.com/google/gopacket v1.1.19 // indirect github.com/google/pprof v0.0.0-20230602150820-91b7bce49751 // indirect github.com/google/s2a-go v0.1.4 // indirect - github.com/google/uuid v1.3.0 // indirect + github.com/google/uuid v1.3.1 // indirect github.com/googleapis/enterprise-certificate-proxy v0.2.3 // indirect github.com/googleapis/gax-go/v2 v2.11.0 // indirect - github.com/gorilla/mux v1.8.0 // indirect + github.com/gorilla/mux v1.8.1 // indirect github.com/gorilla/websocket v1.5.0 // indirect github.com/grpc-ecosystem/go-grpc-middleware/providers/zerolog/v2 v2.0.0-rc.2 // indirect - github.com/grpc-ecosystem/go-grpc-middleware/v2 v2.0.0-20200501113911-9a95f0fdbfea // indirect + github.com/grpc-ecosystem/go-grpc-middleware/v2 v2.0.0-rc.2 // indirect github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0 // indirect github.com/grpc-ecosystem/grpc-gateway/v2 v2.11.3 // indirect github.com/hashicorp/errwrap v1.1.0 // indirect github.com/hashicorp/golang-lru v0.5.4 // indirect github.com/hashicorp/golang-lru/v2 v2.0.2 // indirect github.com/hashicorp/hcl v1.0.0 // indirect + github.com/holiman/bloomfilter/v2 v2.0.3 // indirect github.com/holiman/uint256 v1.2.2-0.20230321075855-87b91420868c // indirect github.com/huin/goupnp v1.2.0 // indirect github.com/improbable-eng/grpc-web v0.15.0 // indirect @@ -189,6 +195,8 @@ require ( github.com/multiformats/go-multihash v0.2.3 // indirect github.com/multiformats/go-multistream v0.4.1 // indirect github.com/multiformats/go-varint v0.0.7 // indirect + github.com/nxadm/tail v1.4.8 // indirect + github.com/olekukonko/tablewriter v0.0.5 // indirect github.com/onflow/atree v0.6.0 // indirect github.com/onflow/cadence v0.42.5 // indirect github.com/onflow/flow-core-contracts/lib/go/contracts v1.2.4-0.20231016154253-a00dbf7c061f // indirect @@ -196,7 +204,7 @@ require ( github.com/onflow/flow-ft/lib/go/contracts v0.7.1-0.20230711213910-baad011d2b13 // indirect github.com/onflow/flow-go-sdk v0.41.16 // indirect github.com/onflow/flow-nft/lib/go/contracts v1.1.0 // indirect - github.com/onflow/flow/protobuf/go/flow v0.3.2-0.20231110212518-071176bb06b8 // indirect + github.com/onflow/flow/protobuf/go/flow v0.3.2-0.20231207123230-136eab6aadf9 // indirect github.com/onflow/go-bitswap v0.0.0-20230703214630-6d3db958c73d // indirect github.com/onflow/sdks v0.5.0 // indirect github.com/onflow/wal v0.0.0-20230529184820-bc9f8244608d // indirect @@ -210,10 +218,10 @@ require ( github.com/pmezard/go-difflib v1.0.0 // indirect github.com/polydawn/refmt v0.89.0 // indirect github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c // indirect - github.com/prometheus/client_golang v1.14.0 // indirect + github.com/prometheus/client_golang v1.16.0 // indirect github.com/prometheus/client_model v0.4.0 // indirect github.com/prometheus/common v0.42.0 // indirect - github.com/prometheus/procfs v0.9.0 // indirect + github.com/prometheus/procfs v0.10.1 // indirect github.com/psiemens/sconfig v0.1.0 // indirect github.com/quic-go/qpack v0.4.0 // indirect github.com/quic-go/qtls-go1-19 v0.3.2 // indirect @@ -226,17 +234,19 @@ require ( github.com/rs/cors v1.8.0 // indirect github.com/schollz/progressbar/v3 v3.13.1 // indirect github.com/sethvargo/go-retry v0.2.3 // indirect + github.com/shirou/gopsutil v3.21.4-0.20210419000835-c7a38de76ee5+incompatible // indirect github.com/shirou/gopsutil/v3 v3.22.2 // indirect github.com/slok/go-http-metrics v0.10.0 // indirect github.com/sony/gobreaker v0.5.0 // indirect github.com/spaolacci/murmur3 v1.1.0 // indirect github.com/spf13/afero v1.9.3 // indirect github.com/spf13/cast v1.5.0 // indirect - github.com/spf13/cobra v1.7.0 // indirect + github.com/spf13/cobra v1.8.0 // indirect github.com/spf13/jwalterweatherman v1.1.0 // indirect github.com/spf13/viper v1.15.0 // indirect github.com/stretchr/objx v0.5.0 // indirect github.com/subosito/gotenv v1.4.2 // indirect + github.com/syndtr/goleveldb v1.0.1-0.20210819022825-2ae1ddf74ef7 // indirect github.com/texttheater/golang-levenshtein/levenshtein v0.0.0-20200805054039-cae8b0eaed6c // indirect github.com/tklauser/go-sysconf v0.3.9 // indirect github.com/tklauser/numcpus v0.3.0 // indirect @@ -264,8 +274,8 @@ require ( golang.org/x/crypto v0.12.0 // indirect golang.org/x/exp v0.0.0-20230321023759-10a507213a29 // indirect golang.org/x/mod v0.10.0 // indirect - golang.org/x/net v0.12.0 // indirect - golang.org/x/oauth2 v0.10.0 // indirect + golang.org/x/net v0.14.0 // indirect + golang.org/x/oauth2 v0.11.0 // indirect golang.org/x/sync v0.3.0 // indirect golang.org/x/sys v0.11.0 // indirect golang.org/x/term v0.11.0 // indirect @@ -276,11 +286,12 @@ require ( gonum.org/v1/gonum v0.13.0 // indirect google.golang.org/api v0.126.0 // indirect google.golang.org/appengine v1.6.7 // indirect - google.golang.org/genproto v0.0.0-20230711160842-782d3b101e98 // indirect - google.golang.org/genproto/googleapis/api v0.0.0-20230711160842-782d3b101e98 // indirect - google.golang.org/genproto/googleapis/rpc v0.0.0-20230711160842-782d3b101e98 // indirect + google.golang.org/genproto v0.0.0-20230822172742-b8732ec3820d // indirect + google.golang.org/genproto/googleapis/api v0.0.0-20230822172742-b8732ec3820d // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20230822172742-b8732ec3820d // indirect google.golang.org/grpc/cmd/protoc-gen-go-grpc v1.2.0 // indirect gopkg.in/ini.v1 v1.67.0 // indirect + gopkg.in/natefinch/npipe.v2 v2.0.0-20160621034901-c1b8fa8bdcce // indirect gopkg.in/yaml.v3 v3.0.1 // indirect lukechampine.com/blake3 v1.2.1 // indirect nhooyr.io/websocket v1.8.7 // indirect diff --git a/insecure/go.sum b/insecure/go.sum index c876aed50eb..050b66add32 100644 --- a/insecure/go.sum +++ b/insecure/go.sum @@ -29,16 +29,16 @@ cloud.google.com/go v0.90.0/go.mod h1:kRX0mNRHe0e2rC6oNakvwQqzyDmg57xJ+SZU1eT2aD cloud.google.com/go v0.93.3/go.mod h1:8utlLll2EF5XMAV15woO4lSbWQlk8rer9aLOfLh7+YI= cloud.google.com/go v0.94.1/go.mod h1:qAlAugsXlC+JWO+Bke5vCtc9ONxjQT3drlTTnAplMW4= cloud.google.com/go v0.97.0/go.mod h1:GF7l59pYBVlXQIBLx3a761cZ41F9bBH3JUlihCt2Udc= -cloud.google.com/go v0.110.4 h1:1JYyxKMN9hd5dR2MYTPWkGUgcoxVVhg0LKNKEo0qvmk= -cloud.google.com/go v0.110.4/go.mod h1:+EYjdK8e5RME/VY/qLCAtuyALQ9q67dvuum8i+H5xsI= +cloud.google.com/go v0.110.7 h1:rJyC7nWRg2jWGZ4wSJ5nY65GTdYJkg0cd/uXb+ACI6o= +cloud.google.com/go v0.110.7/go.mod h1:+EYjdK8e5RME/VY/qLCAtuyALQ9q67dvuum8i+H5xsI= cloud.google.com/go/bigquery v1.0.1/go.mod h1:i/xbL2UlR5RvWAURpBYZTtm/cXjCha9lbfbpx4poX+o= cloud.google.com/go/bigquery v1.3.0/go.mod h1:PjpwJnslEMmckchkHFfq+HTD2DmtT67aNFKH1/VBDHE= cloud.google.com/go/bigquery v1.4.0/go.mod h1:S8dzgnTigyfTmLBfrtrhyYhwRxG72rYxvftPBK2Dvzc= cloud.google.com/go/bigquery v1.5.0/go.mod h1:snEHRnqQbz117VIFhE8bmtwIDY80NLUZUMb4Nv6dBIg= cloud.google.com/go/bigquery v1.7.0/go.mod h1://okPTzCYNXSlb24MZs83e2Do+h+VXtc4gLoIoXIAPc= cloud.google.com/go/bigquery v1.8.0/go.mod h1:J5hqkt3O0uAFnINi6JXValWIb1v0goeZM77hZzJN/fQ= -cloud.google.com/go/compute v1.21.0 h1:JNBsyXVoOoNJtTQcnEY5uYpZIbeCTYIeDe0Xh1bySMk= -cloud.google.com/go/compute v1.21.0/go.mod h1:4tCnrn48xsqlwSAiLf1HXMQk8CONslYbdiEZc9FEIbM= +cloud.google.com/go/compute v1.23.0 h1:tP41Zoavr8ptEqaW6j+LQOnyBBhO7OkOMAGrgLopTwY= +cloud.google.com/go/compute v1.23.0/go.mod h1:4tCnrn48xsqlwSAiLf1HXMQk8CONslYbdiEZc9FEIbM= cloud.google.com/go/compute/metadata v0.2.3 h1:mg4jlk7mCAj6xXp9UJ4fjI9VUI5rubuGBW5aJ7UnBMY= cloud.google.com/go/compute/metadata v0.2.3/go.mod h1:VAV5nSsACxMJvgaAuX6Pk2AawlZn8kiOGuCv6gTkwuA= cloud.google.com/go/datastore v1.0.0/go.mod h1:LXYbyblFSglQ5pkeyhO+Qmw7ukd3C+pD7TKLgZqpHYE= @@ -96,8 +96,11 @@ github.com/OneOfOne/xxhash v1.2.5/go.mod h1:eZbhyaAYD41SGSSsnmcpxVoRiQ/MPUTjUdII github.com/Shopify/goreferrer v0.0.0-20181106222321-ec9c9a553398/go.mod h1:a1uqRtAwp2Xwc6WNPJEufxJ7fx3npB4UV/JOLmbu5I0= github.com/Shopify/sarama v1.19.0/go.mod h1:FVkBWblsNy7DGZRfXLU0O9RCGt5g3g3yEuWXgklEdEo= github.com/Shopify/toxiproxy v2.1.4+incompatible/go.mod h1:OXgGpZ6Cli1/URJOF1DMxUHB2q5Ap20/P/eIdh4G0pI= +github.com/StackExchange/wmi v0.0.0-20180116203802-5d049714c4a6 h1:fLjPD/aNc3UIOA6tDi6QXUemppXK3P9BI7mr2hd6gx8= github.com/StackExchange/wmi v0.0.0-20180116203802-5d049714c4a6/go.mod h1:3eOhrUMpNV+6aFIbp5/iudMxNCF27Vw2OZgy4xEx0Fg= github.com/VictoriaMetrics/fastcache v1.5.3/go.mod h1:+jv9Ckb+za/P1ZRg/sulP5Ni1v49daAVERr0H3CuscE= +github.com/VictoriaMetrics/fastcache v1.6.0 h1:C/3Oi3EiBCqufydp1neRZkqcwmEiuRT9c3fqvvgKm5o= +github.com/VictoriaMetrics/fastcache v1.6.0/go.mod h1:0qHz5QP0GMX4pfmMA/zt5RgfNuXJrTP0zS7DqpHGGTw= github.com/VividCortex/gohistogram v1.0.0/go.mod h1:Pf5mBqqDxYaXu3hDrrU+w6nw50o/4+TcAqDqk/vUH7g= github.com/aead/siphash v1.0.1/go.mod h1:Nywa3cDsYNNK3gaciGTWPwHt0wlpNV15vwmswBAUSII= github.com/afex/hystrix-go v0.0.0-20180502004556-fa1af6a1f4f5/go.mod h1:SkGFH1ia65gfNATL8TAiHDNxPzPdmEL5uirI2Uyuz6c= @@ -108,6 +111,7 @@ github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuy github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= github.com/alecthomas/units v0.0.0-20190717042225-c3de453c63f4/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= github.com/alecthomas/units v0.0.0-20190924025748-f65c72e2690d/go.mod h1:rBZYJk541a8SKzHPHnH3zbiI+7dagKZ0cgpgrD7Fyho= +github.com/allegro/bigcache v1.2.1-0.20190218064605-e24eb225f156 h1:eMwmnE/GDgah4HI848JfFxHt+iPb26b4zyfspmqY0/8= github.com/allegro/bigcache v1.2.1-0.20190218064605-e24eb225f156/go.mod h1:Cb/ax3seSYIx7SuZdm2G2xzfwmv3TPSk2ucNfQESPXM= github.com/anmitsu/go-shlex v0.0.0-20161002113705-648efa622239/go.mod h1:2FmKhYUyUczH0OGQWaF5ceTx0UBShxjsH6f8oGKYe2c= github.com/antihax/optional v1.0.0/go.mod h1:uupD/76wgC+ih3iEmQUL+0Ugr19nfwCT1kdvxnR2qWY= @@ -185,6 +189,8 @@ github.com/btcsuite/btcd v0.20.1-beta/go.mod h1:wVuoA8VJLEcwgqHBwHmzLRazpKxTv13P github.com/btcsuite/btcd v0.21.0-beta/go.mod h1:ZSWyehm27aAuS9bvkATT+Xte3hjHZ+MRgMY/8NJ7K94= github.com/btcsuite/btcd/btcec/v2 v2.2.1 h1:xP60mv8fvp+0khmrN0zTdPC3cNm24rfeE6lh2R/Yv3E= github.com/btcsuite/btcd/btcec/v2 v2.2.1/go.mod h1:9/CSmJxmuvqzX9Wh2fXMWToLOHhPd11lSPuIupwTkI8= +github.com/btcsuite/btcd/chaincfg/chainhash v1.0.2 h1:KdUfX2zKommPRa+PD0sWZUyXe9w277ABlgELO7H04IM= +github.com/btcsuite/btcd/chaincfg/chainhash v1.0.2/go.mod h1:7SFka0XMvUgj3hfZtydOrQY2mwhPclbT2snogU7SQQc= github.com/btcsuite/btclog v0.0.0-20170628155309-84c8d2346e9f/go.mod h1:TdznJufoqS23FtqVCzL0ZqgP5MqXbb4fg/WgDys70nA= github.com/btcsuite/btcutil v0.0.0-20190207003914-4c204d697803/go.mod h1:+5NJ2+qvTyV9exUAL/rxXi3DcLg2Ts+ymUAY5y4NvMg= github.com/btcsuite/btcutil v0.0.0-20190425235716-9e5f4b9a998d/go.mod h1:+5NJ2+qvTyV9exUAL/rxXi3DcLg2Ts+ymUAY5y4NvMg= @@ -264,7 +270,7 @@ github.com/coreos/pkg v0.0.0-20180928190104-399ea9e2e55f/go.mod h1:E3G3o1h8I7cfc github.com/cpuguy83/go-md2man v1.0.10/go.mod h1:SmD6nW6nTyfqj6ABTjUi3V3JVMnlJmwcJI5acqYI6dE= github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU= github.com/cpuguy83/go-md2man/v2 v2.0.0/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU= -github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= +github.com/cpuguy83/go-md2man/v2 v2.0.3/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= github.com/creack/pty v1.1.7/go.mod h1:lj5s0c3V2DBrqTV7llrYr5NG6My20zk30Fl46Y7DoTY= github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= github.com/cskr/pubsub v1.0.2 h1:vlOzMhl6PFn60gRlTQQsIfVwaPB/B/8MziK8FhEPt/0= @@ -277,6 +283,8 @@ github.com/davidlazar/go-crypto v0.0.0-20170701192655-dcfb0a7ac018/go.mod h1:rQY github.com/davidlazar/go-crypto v0.0.0-20200604182044-b73af7476f6c h1:pFUpOrbxDR6AkioZ1ySsx5yxlDQZ8stG2b88gTPxgJU= github.com/davidlazar/go-crypto v0.0.0-20200604182044-b73af7476f6c/go.mod h1:6UhI8N9EjYm1c2odKpFpAYeR8dsBeM7PtzQhRgxRr9U= github.com/deckarep/golang-set v0.0.0-20180603214616-504e848d77ea/go.mod h1:93vsz/8Wt4joVM7c2AVqh+YRMiUSc14yDtF28KmMOgQ= +github.com/deckarep/golang-set/v2 v2.1.0 h1:g47V4Or+DUdzbs8FxCCmgb6VYd+ptPAngjM6dtGktsI= +github.com/deckarep/golang-set/v2 v2.1.0/go.mod h1:VAky9rY/yGXJOLEDv3OMci+7wtDpOF4IN+y82NBOac4= github.com/decred/dcrd/crypto/blake256 v1.0.1 h1:7PltbUIQB7u/FfZ39+DGa/ShuMyJ5ilcvdfma9wOH6Y= github.com/decred/dcrd/dcrec/secp256k1/v4 v4.2.0 h1:8UrgZ3GkP4i/CLijOJx79Yu+etlyjdBU4sfcs2WYQMs= github.com/decred/dcrd/dcrec/secp256k1/v4 v4.2.0/go.mod h1:v57UDF4pDQJcEfFUCRop3lJL149eHGSe9Jvczhzjo/0= @@ -421,6 +429,8 @@ github.com/go-playground/validator/v10 v10.14.1 h1:9c50NUPC30zyuKprjL3vNZ0m5oG+j github.com/go-playground/validator/v10 v10.14.1/go.mod h1:9iXMNT7sEkjXb0I+enO7QXmzG6QCsPWY4zveKFVRSyU= github.com/go-sql-driver/mysql v1.4.0/go.mod h1:zAC/RDZ24gD3HViQzih4MyKcchzm+sOG5ZlKdlhCg5w= github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY= +github.com/go-stack/stack v1.8.1 h1:ntEHSVwIt7PNXNpgPmVfMrNhLtgjlmnZha2kOpuRiDw= +github.com/go-stack/stack v1.8.1/go.mod h1:dcoOX6HbPZSZptuspn9bctJ+N/CnF5gGygcUP3XYfe4= github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI= github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572/go.mod h1:9Pwr4B2jHnOSGXyyzV8ROjYa2ojvAY6HCGYYfMoC3Ls= github.com/go-test/deep v1.0.5/go.mod h1:QV8Hv/iy04NyLBxAdO9njL0iVPN1S4d/A3NVv1V36o8= @@ -436,6 +446,8 @@ github.com/godbus/dbus/v5 v5.0.3/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5x github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= github.com/godbus/dbus/v5 v5.1.0 h1:4KLkAxT3aOY8Li4FRJe/KvhoNFFxo0m6fNuFUO8QJUk= github.com/godbus/dbus/v5 v5.1.0/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= +github.com/gofrs/flock v0.8.1 h1:+gYjHKf32LDeiEEFhQaotPbLuUXjY5ZqxKgXy7n59aw= +github.com/gofrs/flock v0.8.1/go.mod h1:F1TvTiK9OcQqauNUHlbJvyl9Qa1QvF/gOUDKA14jxHU= github.com/gogo/googleapis v0.0.0-20180223154316-0cd9801be74a/go.mod h1:gf4bu3Q80BeJ6H1S1vYPm8/ELATdvryBaNFGgqEef3s= github.com/gogo/googleapis v1.1.0/go.mod h1:gf4bu3Q80BeJ6H1S1vYPm8/ELATdvryBaNFGgqEef3s= github.com/gogo/googleapis v1.4.1/go.mod h1:2lpHqI5OcWCtVElxXnPt+s8oJvMpySlOyM6xDCrzib4= @@ -451,8 +463,8 @@ github.com/golang-jwt/jwt v3.2.2+incompatible/go.mod h1:8pz2t5EyA70fFQQSrl6XZXzq github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0/go.mod h1:E/TSTwGwJL78qG/PmXZO1EjYhfJinVAhrmmHX6Z8B9k= github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= github.com/golang/glog v1.0.0/go.mod h1:EWib/APOK0SL3dFbYqvxE3UYd8E6s1ouQ7iEp/0LWV4= -github.com/golang/glog v1.1.0 h1:/d3pCKDPWNnvIWe0vVUpNP32qc8U3PDVxySP/y360qE= -github.com/golang/glog v1.1.0/go.mod h1:pfYeQZ3JWZoXTV5sFc986z3HTpwQs9At6P4ImfuP3NQ= +github.com/golang/glog v1.1.2 h1:DVjP2PbBOzHyzA+dn3WhHIq4NdVu3Q+pvivFICf/7fo= +github.com/golang/glog v1.1.2/go.mod h1:zR+okUeTbrL6EL3xHUDxZuEtGv04p5shwip1+mL/rLQ= github.com/golang/groupcache v0.0.0-20160516000752-02826c3e7903/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= github.com/golang/groupcache v0.0.0-20190129154638-5b532d6fd5ef/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= github.com/golang/groupcache v0.0.0-20190702054246-869f871628b6/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= @@ -496,6 +508,7 @@ github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiu github.com/golang/snappy v0.0.0-20180518054509-2e65f85255db/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= github.com/golang/snappy v0.0.1/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= github.com/golang/snappy v0.0.3/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= +github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= github.com/golang/snappy v0.0.5-0.20220116011046-fa5810519dcb h1:PBC98N2aIaM3XXiurYmW7fx4GZkL8feAMVq7nEjURHk= github.com/golang/snappy v0.0.5-0.20220116011046-fa5810519dcb/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= github.com/gomodule/redigo v1.7.1-0.20190724094224-574c33c3df38/go.mod h1:B4C85qUVwatsJoIUNIfCRsp7qO0iAmpGFZ4EELWSbC4= @@ -516,7 +529,6 @@ github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/ github.com/google/go-cmp v0.5.7/go.mod h1:n+brtR0CgQNWTVd5ZUFpTBC8YFBDLK/h/bpaJ8/DtOE= github.com/google/go-cmp v0.5.8/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38= -github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/go-github v17.0.0+incompatible/go.mod h1:zLgOLi98H3fifZn+44m+umXrS52loVEgC2AApnigrVQ= github.com/google/go-querystring v1.0.0/go.mod h1:odCYkC5MyYFN7vkCjXpyrEuKhc/BUO6wN/zVPAxq5ck= github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= @@ -553,8 +565,8 @@ github.com/google/uuid v1.0.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+ github.com/google/uuid v1.1.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/google/uuid v1.2.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= -github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I= -github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/google/uuid v1.3.1 h1:KjJaJ9iWZ3jOFZIf1Lqf4laDRCasjl0BCmnEGxkdLb4= +github.com/google/uuid v1.3.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/googleapis/enterprise-certificate-proxy v0.2.3 h1:yk9/cqRKtT9wXZSsRH9aurXEpJX+U6FLtpYTdC3R06k= github.com/googleapis/enterprise-certificate-proxy v0.2.3/go.mod h1:AwSRAtLfXpU5Nm3pW+v7rGDHp09LsPtGY9MduiEsR9k= github.com/googleapis/gax-go v2.0.0+incompatible/go.mod h1:SFVmujtThgffbyetf+mdk2eWhX2bMyUtNHzFKcPA9HY= @@ -571,8 +583,8 @@ github.com/gopherjs/gopherjs v0.0.0-20190430165422-3e4dfb77656c h1:7lF+Vz0LqiRid github.com/gorilla/context v1.1.1/go.mod h1:kBGZzfjB9CEq2AlWe17Uuf7NDRt0dE0s8S51q0aT7Yg= github.com/gorilla/mux v1.6.2/go.mod h1:1lud6UwP+6orDFRuTfBEV8e9/aOM/c4fVVCaMa2zaAs= github.com/gorilla/mux v1.7.3/go.mod h1:1lud6UwP+6orDFRuTfBEV8e9/aOM/c4fVVCaMa2zaAs= -github.com/gorilla/mux v1.8.0 h1:i40aqfkR1h2SlN9hojwV5ZA91wcXFOvkdNIeFDP5koI= -github.com/gorilla/mux v1.8.0/go.mod h1:DVbg23sWSpFRCP0SfiEN6jmj59UnW/n46BH5rLB71So= +github.com/gorilla/mux v1.8.1 h1:TuBL49tXwgrFYWhqrNgrUNEY92u81SPhu7sTdzQEiWY= +github.com/gorilla/mux v1.8.1/go.mod h1:AKf9I4AEqPTmMytcMc0KkNouC66V3BtZ4qD5fmWSiMQ= github.com/gorilla/websocket v0.0.0-20170926233335-4201258b820c/go.mod h1:E7qHFY5m1UJ88s3WnNqhKjPHQ0heANvMoAMk2YaljkQ= github.com/gorilla/websocket v1.4.0/go.mod h1:E7qHFY5m1UJ88s3WnNqhKjPHQ0heANvMoAMk2YaljkQ= github.com/gorilla/websocket v1.4.1-0.20190629185528-ae1634f6a989/go.mod h1:E7qHFY5m1UJ88s3WnNqhKjPHQ0heANvMoAMk2YaljkQ= @@ -587,8 +599,9 @@ github.com/grpc-ecosystem/go-grpc-middleware v1.0.1-0.20190118093823-f849b5445de github.com/grpc-ecosystem/go-grpc-middleware v1.2.2/go.mod h1:EaizFBKfUKtMIF5iaDEhniwNedqGo9FuLFzppDr3uwI= github.com/grpc-ecosystem/go-grpc-middleware/providers/zerolog/v2 v2.0.0-rc.2 h1:uxUHSMwWDJ/9jVPHNumRC8WZOi3hrBL22ObVOoLg4ww= github.com/grpc-ecosystem/go-grpc-middleware/providers/zerolog/v2 v2.0.0-rc.2/go.mod h1:BL7w7qd2l/j9jgY6WMhYutfOFQc0I8RTVwtjpnAMoTM= -github.com/grpc-ecosystem/go-grpc-middleware/v2 v2.0.0-20200501113911-9a95f0fdbfea h1:1Tk1IbruXbunEnaIZEFb+Hpv9BIZti3OxKwKn5wWyKk= github.com/grpc-ecosystem/go-grpc-middleware/v2 v2.0.0-20200501113911-9a95f0fdbfea/go.mod h1:GugMBs30ZSAkckqXEAIEGyYdDH6EgqowG8ppA3Zt+AY= +github.com/grpc-ecosystem/go-grpc-middleware/v2 v2.0.0-rc.2 h1:1aeRCnE2CkKYqyzBu0+B2lgTcZPc3ea2lGpijeHbI1c= +github.com/grpc-ecosystem/go-grpc-middleware/v2 v2.0.0-rc.2/go.mod h1:GhphxcdlaRyAuBSvo6rV71BvQcvB/vuX8ugCyybuS2k= github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0 h1:Ovs26xHkKqVztRpIrF/92BcuyuQ/YW4NSIpoGtfXNho= github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0/go.mod h1:8NvIoxWQoOIhqOTXgfV/d3M/q6VIi02HzZEHgUlZvzk= github.com/grpc-ecosystem/grpc-gateway v1.5.0/go.mod h1:RSKVYQBd5MCa4OVpNdGskqpgL2+G+NZTnrVHpWWfpdw= @@ -631,6 +644,8 @@ github.com/hashicorp/logutils v1.0.0/go.mod h1:QIAnNjmIWmVIIkWDTG1z5v++HQmx9WQRO github.com/hashicorp/mdns v1.0.0/go.mod h1:tL+uN++7HEJ6SQLQ2/p+z2pH24WQKWjBPkE0mNTz8vQ= github.com/hashicorp/memberlist v0.1.3/go.mod h1:ajVTdAv/9Im8oMAAj5G31PhhMCZJV2pPBoIllUwCN7I= github.com/hashicorp/serf v0.8.2/go.mod h1:6hOLApaqBFA1NXqRQAsxw9QxuDEvNxSQRwA/JwenrHc= +github.com/holiman/bloomfilter/v2 v2.0.3 h1:73e0e/V0tCydx14a0SCYS/EWCxgwLZ18CZcZKVu0fao= +github.com/holiman/bloomfilter/v2 v2.0.3/go.mod h1:zpoh+gs7qcpqrHr3dB55AMiJwo0iURXE7ZOP9L9hSkA= github.com/holiman/uint256 v1.2.2-0.20230321075855-87b91420868c h1:DZfsyhDK1hnSS5lH8l+JggqzEleHteTYfutAiVlSUM8= github.com/holiman/uint256 v1.2.2-0.20230321075855-87b91420868c/go.mod h1:SC8Ryt4n+UBbPbIBKaG9zbbDlp4jOru9xFZmPzLUTxw= github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU= @@ -864,6 +879,7 @@ github.com/kr/pty v1.1.3/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc= github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw= github.com/labstack/echo/v4 v4.5.0/go.mod h1:czIriw4a0C1dFun+ObrXp7ok03xON0N1awStJ6ArI7Y= github.com/labstack/gommon v0.3.0/go.mod h1:MULnywXg0yavhxWKc+lOruYdAhDwPK9wf0OL7NoOu+k= @@ -1272,12 +1288,16 @@ github.com/nats-io/nuid v1.0.1/go.mod h1:19wcPz3Ph3q0Jbyiqsd0kePYG7A95tJPxeL+1OS github.com/neelance/astrewrite v0.0.0-20160511093645-99348263ae86/go.mod h1:kHJEU3ofeGjhHklVoIGuVj85JJwZ6kWPaJwCIxgnFmo= github.com/neelance/sourcemap v0.0.0-20151028013722-8c68805598ab/go.mod h1:Qr6/a/Q4r9LP1IltGz7tA7iOK1WonHEYhu1HRBA7ZiM= github.com/nxadm/tail v1.4.4/go.mod h1:kenIhsEOeOJmVchQTgglprH7qJGnHDVpk1VPCcaMI8A= +github.com/nxadm/tail v1.4.8 h1:nPr65rt6Y5JFSKQO7qToXr7pePgD6Gwiw05lkbyAQTE= +github.com/nxadm/tail v1.4.8/go.mod h1:+ncqLTQzXmGhMZNUePPaPqPvBxHAIsmXswZKocGu+AU= github.com/oklog/oklog v0.3.2/go.mod h1:FCV+B7mhrz4o+ueLpx+KqkyXRGMWOYEvfiXtdGtbWGs= github.com/oklog/run v1.0.0/go.mod h1:dlhp/R75TPv97u0XWUtDeV/lRKWPKSdTuV0TZvrmrQA= github.com/oklog/ulid v1.3.1/go.mod h1:CirwcVhetQ6Lv90oh/F+FBtV6XMibvdAFo93nm5qn4U= github.com/olekukonko/tablewriter v0.0.0-20170122224234-a0225b3f23b5/go.mod h1:vsDQFd/mU46D+Z4whnwzcISnGGzXWMclvtLoiIKAKIo= github.com/olekukonko/tablewriter v0.0.1/go.mod h1:vsDQFd/mU46D+Z4whnwzcISnGGzXWMclvtLoiIKAKIo= github.com/olekukonko/tablewriter v0.0.2-0.20190409134802-7e037d187b0c/go.mod h1:vsDQFd/mU46D+Z4whnwzcISnGGzXWMclvtLoiIKAKIo= +github.com/olekukonko/tablewriter v0.0.5 h1:P2Ga83D34wi1o9J6Wh1mRuqd4mF/x/lgBS7N7AbDhec= +github.com/olekukonko/tablewriter v0.0.5/go.mod h1:hPp6KlRPjbx+hW8ykQs1w3UBbZlj6HuIJcUGPhkA7kY= github.com/onflow/atree v0.1.0-beta1.0.20211027184039-559ee654ece9/go.mod h1:+6x071HgCF/0v5hQcaE5qqjc2UqN5gCU8h5Mk6uqpOg= github.com/onflow/atree v0.6.0 h1:j7nQ2r8npznx4NX39zPpBYHmdy45f4xwoi+dm37Jk7c= github.com/onflow/atree v0.6.0/go.mod h1:gBHU0M05qCbv9NN0kijLWMgC47gHVNBIp4KmsVFi0tc= @@ -1294,13 +1314,13 @@ github.com/onflow/flow-go-sdk v0.24.0/go.mod h1:IoptMLPyFXWvyd9yYA6/4EmSeeozl6nJ github.com/onflow/flow-go-sdk v0.41.16 h1:HsmHwEVmj+iK+GszHbFseHh7Ii5W3PWOIRNAH/En08Q= github.com/onflow/flow-go-sdk v0.41.16/go.mod h1:bVrVNoJKiwB6vW5Qbm5tFAfJBQ5we4uSQWnn9gNAFhQ= github.com/onflow/flow-go/crypto v0.21.3/go.mod h1:vI6V4CY3R6c4JKBxdcRiR/AnjBfL8OSD97bJc60cLuQ= -github.com/onflow/flow-go/crypto v0.24.9 h1:0EQp+kSZYJepMIiSypfJVe7tzsPcb6UXOdOtsTCDhBs= -github.com/onflow/flow-go/crypto v0.24.9/go.mod h1:fqCzkIBBMRRkciVrvW21rECKq1oD7Q6u+bCI78lfNX0= +github.com/onflow/flow-go/crypto v0.25.0 h1:6lmoiAQ3APCF+nV7f4f2AXL3PuDKqQiWqRJXmjrMEq4= +github.com/onflow/flow-go/crypto v0.25.0/go.mod h1:OOb2vYcS8AOCajBClhHTJ0NKftFl1RQgTQ0+Vh4nbqk= github.com/onflow/flow-nft/lib/go/contracts v1.1.0 h1:rhUDeD27jhLwOqQKI/23008CYfnqXErrJvc4EFRP2a0= github.com/onflow/flow-nft/lib/go/contracts v1.1.0/go.mod h1:YsvzYng4htDgRB9sa9jxdwoTuuhjK8WYWXTyLkIigZY= github.com/onflow/flow/protobuf/go/flow v0.2.2/go.mod h1:gQxYqCfkI8lpnKsmIjwtN2mV/N2PIwc1I+RUK4HPIc8= -github.com/onflow/flow/protobuf/go/flow v0.3.2-0.20231110212518-071176bb06b8 h1:AsIyEDiwxpRAifgBK/0lsjEdNfqFtHqNHedpMeHoA4w= -github.com/onflow/flow/protobuf/go/flow v0.3.2-0.20231110212518-071176bb06b8/go.mod h1:NA2pX2nw8zuaxfKphhKsk00kWLwfd+tv8mS23YXO4Sk= +github.com/onflow/flow/protobuf/go/flow v0.3.2-0.20231207123230-136eab6aadf9 h1:EjTkQnXvoH/yQLoGxVRZObuE8oxcanEu+IxwBwb/gSQ= +github.com/onflow/flow/protobuf/go/flow v0.3.2-0.20231207123230-136eab6aadf9/go.mod h1:NA2pX2nw8zuaxfKphhKsk00kWLwfd+tv8mS23YXO4Sk= github.com/onflow/go-bitswap v0.0.0-20230703214630-6d3db958c73d h1:QcOAeEyF3iAUHv21LQ12sdcsr0yFrJGoGLyCAzYYtvI= github.com/onflow/go-bitswap v0.0.0-20230703214630-6d3db958c73d/go.mod h1:GCPpiyRoHncdqPj++zPr9ZOYBX4hpJ0pYZRYqSE8VKk= github.com/onflow/sdks v0.5.0 h1:2HCRibwqDaQ1c9oUApnkZtEAhWiNY2GTpRD5+ftdkN8= @@ -1314,6 +1334,7 @@ github.com/onsi/ginkgo v1.10.3/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+ github.com/onsi/ginkgo v1.12.0/go.mod h1:oUhWkIvk5aDxtKvDDuw8gItl8pKl42LzjC9KZE0HfGg= github.com/onsi/ginkgo v1.12.1/go.mod h1:zj2OWP4+oCPe1qIXoGWkgMRwljMUYCdkwsT2108oapk= github.com/onsi/ginkgo v1.14.0/go.mod h1:iSB4RoI2tjJc9BBv4NKIKWKya62Rps+oPG/Lv9klQyY= +github.com/onsi/ginkgo v1.16.5 h1:8xi0RTUf59SOSfEtZMvwTvXYMzG4gV23XVHOZiXNtnE= github.com/onsi/ginkgo/v2 v2.9.7 h1:06xGQy5www2oN160RtEZoTvnP2sPhEfePYmCDc2szss= github.com/onsi/ginkgo/v2 v2.9.7/go.mod h1:cxrmXWykAwTwhQsJOPfdIDiJ+l2RYq7U8hFU+M/1uw0= github.com/onsi/gomega v1.4.1/go.mod h1:C1qb7wdrVGGVU+Z6iS04AVkA3Q65CEZX59MT0QO5uiA= @@ -1378,8 +1399,8 @@ github.com/prometheus/client_golang v1.0.0/go.mod h1:db9x61etRT2tGnBNRi70OPL5Fsn github.com/prometheus/client_golang v1.3.0/go.mod h1:hJaj2vgQTGQmVCsAACORcieXFeDPbaTKGT+JTgUa3og= github.com/prometheus/client_golang v1.7.1/go.mod h1:PY5Wy2awLA44sXw4AOSfFBetzPP4j5+D6mVACh+pe2M= github.com/prometheus/client_golang v1.10.0/go.mod h1:WJM3cc3yu7XKBKa/I8WeZm+V3eltZnBwfENSU7mdogU= -github.com/prometheus/client_golang v1.14.0 h1:nJdhIvne2eSX/XRAFV9PcvFFRbrjbcTUj0VP62TMhnw= -github.com/prometheus/client_golang v1.14.0/go.mod h1:8vpkKitgIVNcqrRBWh1C4TIUQgYNtG/XQE4E/Zae36Y= +github.com/prometheus/client_golang v1.16.0 h1:yk/hx9hDbrGHovbci4BY+pRMfSuuat626eFsHb7tmT8= +github.com/prometheus/client_golang v1.16.0/go.mod h1:Zsulrv/L9oM40tJ7T815tM89lFEugiJ9HzIqaAx4LKc= github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo= github.com/prometheus/client_model v0.0.0-20190115171406-56726106282f/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo= github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= @@ -1408,8 +1429,8 @@ github.com/prometheus/procfs v0.0.8/go.mod h1:7Qr8sr6344vo1JqZ6HhLceV9o3AJ1Ff+Gx github.com/prometheus/procfs v0.1.3/go.mod h1:lV6e/gmhEcM9IjHGsFOCxxuZ+z1YqCvr4OA4YeYWdaU= github.com/prometheus/procfs v0.3.0/go.mod h1:lV6e/gmhEcM9IjHGsFOCxxuZ+z1YqCvr4OA4YeYWdaU= github.com/prometheus/procfs v0.6.0/go.mod h1:cz+aTbrPOrUb4q7XlbU9ygM+/jj0fzG6c1xBZuNvfVA= -github.com/prometheus/procfs v0.9.0 h1:wzCHvIvM5SxWqYvwgVL7yJY8Lz3PKn49KQtpgMYJfhI= -github.com/prometheus/procfs v0.9.0/go.mod h1:+pB4zwohETzFnmlpe6yd2lSc+0/46IYZRB/chUwxUZY= +github.com/prometheus/procfs v0.10.1 h1:kYK1Va/YMlutzCGazswoHKo//tZVlFpKYh+PymziUAg= +github.com/prometheus/procfs v0.10.1/go.mod h1:nwNm2aOCAYw8uTR/9bWRREkZFxAUcWzPHWJq+XBB/FM= github.com/prometheus/tsdb v0.6.2-0.20190402121629-4f204dcbc150/go.mod h1:qhTCs0VvXwvX/y3TZrWD7rabWM+ijKTux40TwIPHuXU= github.com/prometheus/tsdb v0.7.1/go.mod h1:qhTCs0VvXwvX/y3TZrWD7rabWM+ijKTux40TwIPHuXU= github.com/psiemens/sconfig v0.1.0 h1:xfWqW+TRpih7mXZIqKYTmpRhlZLQ1kbxV8EjllPv76s= @@ -1463,6 +1484,8 @@ github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529/go.mod h1:DxrIzT+xaE7yg github.com/sergi/go-diff v1.0.0/go.mod h1:0CfEIISq7TuYL3j771MWULgwwjU+GofnZX9QAmXWZgo= github.com/sethvargo/go-retry v0.2.3 h1:oYlgvIvsju3jNbottWABtbnoLC+GDtLdBHxKWxQm/iU= github.com/sethvargo/go-retry v0.2.3/go.mod h1:1afjQuvh7s4gflMObvjLPaWgluLLyhA1wmVZ6KLpICw= +github.com/shirou/gopsutil v3.21.4-0.20210419000835-c7a38de76ee5+incompatible h1:Bn1aCHHRnjv4Bl16T8rcaFjYSrGrIZvpiGO6P3Q4GpU= +github.com/shirou/gopsutil v3.21.4-0.20210419000835-c7a38de76ee5+incompatible/go.mod h1:5b4v6he4MtMOwMlS0TUMTu2PcXUg8+E1lC7eC3UO/RA= github.com/shirou/gopsutil/v3 v3.22.2 h1:wCrArWFkHYIdDxx/FSfF5RB4dpJYW6t7rcp3+zL8uks= github.com/shirou/gopsutil/v3 v3.22.2/go.mod h1:WapW1AOOPlHyXr+yOyw3uYx36enocrtSoSBy0L5vUHY= github.com/shurcooL/component v0.0.0-20170202220835-f88ec8f54cc4/go.mod h1:XhFIlyj5a1fBNx5aJTbKoIq0mNaPvOagO+HjB3EtxrY= @@ -1522,8 +1545,8 @@ github.com/spf13/cast v1.5.0 h1:rj3WzYc11XZaIZMPKmwP96zkFEnnAmV8s6XbB2aY32w= github.com/spf13/cast v1.5.0/go.mod h1:SpXXQ5YoyJw6s3/6cMTQuxvgRl3PCJiyaX9p6b155UU= github.com/spf13/cobra v0.0.3/go.mod h1:1l0Ry5zgKvJasoi3XT1TypsSe7PqH0Sj9dhYf7v3XqQ= github.com/spf13/cobra v0.0.5/go.mod h1:3K3wKZymM7VvHMDS9+Akkh4K60UwM26emMESw8tLCHU= -github.com/spf13/cobra v1.7.0 h1:hyqWnYt1ZQShIddO5kBpj3vu05/++x6tJ6dg8EC572I= -github.com/spf13/cobra v1.7.0/go.mod h1:uLxZILRyS/50WlhOIKD7W6V5bgeIt+4sICxh6uRMrb0= +github.com/spf13/cobra v1.8.0 h1:7aJaZx1B85qltLMc546zn58BxxfZdR/W22ej9CFoEf0= +github.com/spf13/cobra v1.8.0/go.mod h1:WXLWApfZ71AjXPya3WOlMsY9yMs7YeiHhFVlvLyhcho= github.com/spf13/jwalterweatherman v1.0.0/go.mod h1:cQK4TGJAtQXfYWX+Ddv3mKDzgVb68N+wFjFa4jdeBTo= github.com/spf13/jwalterweatherman v1.1.0 h1:ue6voC5bR5F8YxI5S67j9i582FU4Qvo2bmqnqMYADFk= github.com/spf13/jwalterweatherman v1.1.0/go.mod h1:aNWZUN0dPAAO/Ljvb5BEdw96iTZ0EXowPYD95IqWIGo= @@ -1562,9 +1585,10 @@ github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXl github.com/subosito/gotenv v1.4.2 h1:X1TuBLAMDFbaTAChgCBLu3DU3UPyELpnF2jjJ2cz/S8= github.com/subosito/gotenv v1.4.2/go.mod h1:ayKnFf/c6rvx/2iiLrJUk1e6plDbT3edrFNGqEflhK0= github.com/supranational/blst v0.3.4/go.mod h1:jZJtfjgudtNl4en1tzwPIV3KjUnQUvG3/j+w+fVonLw= -github.com/supranational/blst v0.3.11-0.20230406105308-e9dfc5ee724b h1:u49mjRnygnB34h8OKbnNJFVUtWSKIKb1KukdV8bILUM= github.com/syndtr/goleveldb v1.0.0/go.mod h1:ZVVdQEZoIme9iO1Ch2Jdy24qqXrMMOU6lpPAyBWyWuQ= github.com/syndtr/goleveldb v1.0.1-0.20190923125748-758128399b1d/go.mod h1:9OrXJhf154huy1nPWmuSrkgjPUtUNhA+Zmy+6AESzuA= +github.com/syndtr/goleveldb v1.0.1-0.20210819022825-2ae1ddf74ef7 h1:epCh84lMvA70Z7CTTCmYQn2CKbY8j86K7/FAIr141uY= +github.com/syndtr/goleveldb v1.0.1-0.20210819022825-2ae1ddf74ef7/go.mod h1:q4W45IWZaF22tdD+VEXcAWRA037jwmWEB5VWYORlTpc= github.com/tarm/serial v0.0.0-20180830185346-98f6abe2eb07/go.mod h1:kDXzergiv9cbyO7IOYJZWg1U88JhDg3PB6klq9Hg2pA= github.com/texttheater/golang-levenshtein/levenshtein v0.0.0-20200805054039-cae8b0eaed6c h1:HelZ2kAFadG0La9d+4htN4HzQ68Bm2iM9qKMSMES6xg= github.com/texttheater/golang-levenshtein/levenshtein v0.0.0-20200805054039-cae8b0eaed6c/go.mod h1:JlzghshsemAMDGZLytTFY8C1JQxQPhnatWqNwUXjggo= @@ -1841,6 +1865,7 @@ golang.org/x/net v0.0.0-20200520004742-59133d7f0dd7/go.mod h1:qpuaurCH72eLCgpAm/ golang.org/x/net v0.0.0-20200520182314-0ba52f642ac2/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= golang.org/x/net v0.0.0-20200625001655-4c5254603344/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= golang.org/x/net v0.0.0-20200707034311-ab3426394381/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= +golang.org/x/net v0.0.0-20200813134508-3edf25e44fcc/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= golang.org/x/net v0.0.0-20200822124328-c89045814202/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= golang.org/x/net v0.0.0-20201031054903-ff519b6c9102/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= @@ -1857,8 +1882,8 @@ golang.org/x/net v0.0.0-20210805182204-aaa1db679c0d/go.mod h1:9nx3DQGgdP8bBQD5qx golang.org/x/net v0.0.0-20211008194852-3b03d305991f/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= -golang.org/x/net v0.12.0 h1:cfawfvKITfUsFCeJIHJrbSxpeu/E81khclypR0GVT50= -golang.org/x/net v0.12.0/go.mod h1:zEVYFnQC7m/vmpQFELhcD1EWkZlX69l4oqgmer6hfKA= +golang.org/x/net v0.14.0 h1:BONx9s002vGdD9umnlX1Po8vOZmrgH34qlHcD1MfK14= +golang.org/x/net v0.14.0/go.mod h1:PpSgVXXLK0OxS0F31C1/tv6XNguvCrnXIDrFMspZIUI= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.0.0-20181017192945-9dcd33a902f4/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.0.0-20181203162652-d668ce993890/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= @@ -1877,8 +1902,8 @@ golang.org/x/oauth2 v0.0.0-20210628180205-a41e5a781914/go.mod h1:KelEdhl1UZF7XfJ golang.org/x/oauth2 v0.0.0-20210805134026-6f1e6394065a/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= golang.org/x/oauth2 v0.0.0-20210819190943-2bc19b11175f/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= golang.org/x/oauth2 v0.0.0-20211104180415-d3ed0bb246c8/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= -golang.org/x/oauth2 v0.10.0 h1:zHCpF2Khkwy4mMB4bv0U37YtJdTGW8jI0glAApi0Kh8= -golang.org/x/oauth2 v0.10.0/go.mod h1:kTpgurOux7LqtuxjuyZa4Gj2gdezIt/jQtGnNFfypQI= +golang.org/x/oauth2 v0.11.0 h1:vPL4xzxBM4niKCW6g9whtaWVXTJf1U5e4aZxxFx/gbU= +golang.org/x/oauth2 v0.11.0/go.mod h1:LdF7O/8bLR/qWK9DrpXmbHLTouvRHK0SgJl0GmDBchk= golang.org/x/perf v0.0.0-20180704124530-6e6d33e29852/go.mod h1:JLpeXjPJfIyPr5TlbXLkXWLhP8nz10XfvxElABhCtcw= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= @@ -1958,6 +1983,7 @@ golang.org/x/sys v0.0.0-20200602225109-6fdc65e7d980/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20200615200032-f1bc736245b1/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200625212154-ddb9806d33ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200803210538-64077c9b5642/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200814200057-3d37ad5750ed/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200905004654-be1d3432aa8f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200909081042-eff7692f9009/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200918174421-af09f7315aff/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -1977,6 +2003,7 @@ golang.org/x/sys v0.0.0-20210309074719-68d13333faf2/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20210315160823-c6e025ad8005/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210317225723-c4fcb01b228e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210320140829-1e4c9ba3b0c4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210324051608-47abb6519492/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210403161142-5e06dd20ab57/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -2237,13 +2264,13 @@ google.golang.org/genproto v0.0.0-20210921142501-181ce0d877f6/go.mod h1:5CzLGKJ6 google.golang.org/genproto v0.0.0-20210924002016-3dee208752a0/go.mod h1:5CzLGKJ67TSI2B9POpiiyGha0AjJvZIUgRMt1dSmuhc= google.golang.org/genproto v0.0.0-20211007155348-82e027067bd4/go.mod h1:5CzLGKJ67TSI2B9POpiiyGha0AjJvZIUgRMt1dSmuhc= google.golang.org/genproto v0.0.0-20211118181313-81c1377c94b1/go.mod h1:5CzLGKJ67TSI2B9POpiiyGha0AjJvZIUgRMt1dSmuhc= -google.golang.org/genproto v0.0.0-20230711160842-782d3b101e98 h1:Z0hjGZePRE0ZBWotvtrwxFNrNE9CUAGtplaDK5NNI/g= -google.golang.org/genproto v0.0.0-20230711160842-782d3b101e98/go.mod h1:S7mY02OqCJTD0E1OiQy1F72PWFB4bZJ87cAtLPYgDR0= -google.golang.org/genproto/googleapis/api v0.0.0-20230711160842-782d3b101e98 h1:FmF5cCW94Ij59cfpoLiwTgodWmm60eEV0CjlsVg2fuw= -google.golang.org/genproto/googleapis/api v0.0.0-20230711160842-782d3b101e98/go.mod h1:rsr7RhLuwsDKL7RmgDDCUc6yaGr1iqceVb5Wv6f6YvQ= +google.golang.org/genproto v0.0.0-20230822172742-b8732ec3820d h1:VBu5YqKPv6XiJ199exd8Br+Aetz+o08F+PLMnwJQHAY= +google.golang.org/genproto v0.0.0-20230822172742-b8732ec3820d/go.mod h1:yZTlhN0tQnXo3h00fuXNCxJdLdIdnVFVBaRJ5LWBbw4= +google.golang.org/genproto/googleapis/api v0.0.0-20230822172742-b8732ec3820d h1:DoPTO70H+bcDXcd39vOqb2viZxgqeBeSGtZ55yZU4/Q= +google.golang.org/genproto/googleapis/api v0.0.0-20230822172742-b8732ec3820d/go.mod h1:KjSP20unUpOx5kyQUFa7k4OJg0qeJ7DEZflGDu2p6Bk= google.golang.org/genproto/googleapis/bytestream v0.0.0-20230530153820-e85fd2cbaebc h1:g3hIDl0jRNd9PPTs2uBzYuaD5mQuwOkZY0vSc0LR32o= -google.golang.org/genproto/googleapis/rpc v0.0.0-20230711160842-782d3b101e98 h1:bVf09lpb+OJbByTj913DRJioFFAjf/ZGxEz7MajTp2U= -google.golang.org/genproto/googleapis/rpc v0.0.0-20230711160842-782d3b101e98/go.mod h1:TUfxEVdsvPg18p6AslUXFoLdpED4oBnGwyqk3dV1XzM= +google.golang.org/genproto/googleapis/rpc v0.0.0-20230822172742-b8732ec3820d h1:uvYuEyMHKNt+lT4K3bN6fGswmK8qSvcreM3BwjDh+y4= +google.golang.org/genproto/googleapis/rpc v0.0.0-20230822172742-b8732ec3820d/go.mod h1:+Bk1OCOj40wS2hwAMA+aCW9ypzm63QTBBHp6lQ3p+9M= google.golang.org/grpc v1.12.0/go.mod h1:yo6s7OP7yaDglbqo1J04qKzAhqBH6lvTonzMVmEdcZw= google.golang.org/grpc v1.14.0/go.mod h1:yo6s7OP7yaDglbqo1J04qKzAhqBH6lvTonzMVmEdcZw= google.golang.org/grpc v1.16.0/go.mod h1:0JHn/cJsOMiMfNA9+DeHDlAU7KAAB5GDlYFpa9MZMio= @@ -2281,8 +2308,8 @@ google.golang.org/grpc v1.39.1/go.mod h1:PImNr+rS9TWYb2O4/emRugxiyHZ5JyHW5F+RPnD google.golang.org/grpc v1.40.0/go.mod h1:ogyxbiOoUXAkP+4+xa6PZSE9DZgIHtSpzjDTB9KAK34= google.golang.org/grpc v1.42.0/go.mod h1:k+4IHHFw41K8+bbowsex27ge2rCb65oeWqe4jJ590SU= google.golang.org/grpc v1.45.0/go.mod h1:lN7owxKUQEqMfSyQikvvk5tf/6zMPsrK+ONuO11+0rQ= -google.golang.org/grpc v1.58.3 h1:BjnpXut1btbtgN/6sp+brB2Kbm2LjNXnidYujAVbSoQ= -google.golang.org/grpc v1.58.3/go.mod h1:tgX3ZQDlNJGU96V6yHh1T/JeoBQ2TXdr43YbYSsCJk0= +google.golang.org/grpc v1.59.0 h1:Z5Iec2pjwb+LEOqzpB2MR12/eKFhDPhuqW91O+4bwUk= +google.golang.org/grpc v1.59.0/go.mod h1:aUPDwccQo6OTjy7Hct4AfBPD1GptF4fyUjIkQ9YtF98= google.golang.org/grpc/cmd/protoc-gen-go-grpc v1.1.0/go.mod h1:6Kw0yEErY5E/yWrBtf03jp27GLLJujG4z/JK95pnjjw= google.golang.org/grpc/cmd/protoc-gen-go-grpc v1.2.0 h1:TLkBREm4nIsEcexnCjgQd5GQWaHcqMzwQV0TX9pq8S0= google.golang.org/grpc/cmd/protoc-gen-go-grpc v1.2.0/go.mod h1:DNq5QpG7LJqD2AamLZ7zvKE0DEpVl2BSEVjFycAAjRY= @@ -2319,12 +2346,14 @@ gopkg.in/ini.v1 v1.51.1/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k= gopkg.in/ini.v1 v1.67.0 h1:Dgnx+6+nfE+IfzjUEISNeydPJh9AXNNsWbGP9KzCsOA= gopkg.in/ini.v1 v1.67.0/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k= gopkg.in/mgo.v2 v2.0.0-20180705113604-9856a29383ce/go.mod h1:yeKp02qBN3iKW1OzL3MGk2IdtZzaj7SFntXj72NppTA= +gopkg.in/natefinch/npipe.v2 v2.0.0-20160621034901-c1b8fa8bdcce h1:+JknDZhAj8YMt7GC73Ei8pv4MzjDUNPHgQWJdtMAaDU= gopkg.in/natefinch/npipe.v2 v2.0.0-20160621034901-c1b8fa8bdcce/go.mod h1:5AcXVHNjg+BDxry382+8OKon8SEWiKktQR07RKPsv1c= gopkg.in/olebedev/go-duktape.v3 v3.0.0-20190213234257-ec84240a7772/go.mod h1:uAJfkITjFhyEEuUfm7bsmCZRbW5WRq8s9EY8HZ6hCns= gopkg.in/resty.v1 v1.12.0/go.mod h1:mDo4pnntr5jdWRML875a/NmxYqAlA73dVijT2AXvQQo= gopkg.in/sourcemap.v1 v1.0.5/go.mod h1:2RlvNNSMglmRrcvhfuzp4hQHwOtjxlbjX7UPY/GXb78= gopkg.in/src-d/go-cli.v0 v0.0.0-20181105080154-d492247bbc0d/go.mod h1:z+K8VcOYVYcSwSjGebuDL6176A1XskgbtNl64NSg+n8= gopkg.in/src-d/go-log.v1 v1.0.1/go.mod h1:GN34hKP0g305ysm2/hctJ0Y8nWP3zxXXJ8GFabTyABE= +gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 h1:uRGJdciOHaEIrze2W8Q3AKkepLTh2hOroT7a+7czfdQ= gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw= gopkg.in/urfave/cli.v1 v1.20.0/go.mod h1:vuBzUtMdQeixQj8LVd+/98pzhxNGQoyuPBlsXHOQNO0= gopkg.in/warnings.v0 v0.1.2/go.mod h1:jksf8JmL6Qr/oQM2OXTHunEvvTAsrWBLb6OOjuVWRNI= diff --git a/insecure/integration/functional/test/gossipsub/rpc_inspector/metrics_inspector_test.go b/insecure/integration/functional/test/gossipsub/rpc_inspector/metrics_inspector_test.go index 2914f6fa895..baafcc70f87 100644 --- a/insecure/integration/functional/test/gossipsub/rpc_inspector/metrics_inspector_test.go +++ b/insecure/integration/functional/test/gossipsub/rpc_inspector/metrics_inspector_test.go @@ -25,7 +25,6 @@ import ( // TestMetricsInspector_ObserveRPC ensures that the gossipsub rpc metrics inspector observes metrics for control messages as expected. func TestMetricsInspector_ObserveRPC(t *testing.T) { - t.Parallel() role := flow.RoleConsensus sporkID := unittest.IdentifierFixture() idProvider := unittest.NewUpdatableIDProvider(flow.IdentityList{}) @@ -74,9 +73,9 @@ func TestMetricsInspector_ObserveRPC(t *testing.T) { defer stopComponents(t, cancel, nodes, metricsInspector) // prepare to spam - generate control messages ctlMsgs := spammer.GenerateCtlMessages(controlMessageCount, - corruptlibp2p.WithGraft(messageCount, channels.PushBlocks.String()), - corruptlibp2p.WithPrune(messageCount, channels.PushBlocks.String()), - corruptlibp2p.WithIHave(messageCount, 1000, channels.PushBlocks.String())) + p2ptest.WithGraft(messageCount, channels.PushBlocks.String()), + p2ptest.WithPrune(messageCount, channels.PushBlocks.String()), + p2ptest.WithIHave(messageCount, 1000, channels.PushBlocks.String())) // start spamming the victim peer spammer.SpamControlMessage(t, victimNode, ctlMsgs) diff --git a/insecure/integration/functional/test/gossipsub/rpc_inspector/utils.go b/insecure/integration/functional/test/gossipsub/rpc_inspector/utils.go index 863fb36a898..555a06a6bba 100644 --- a/insecure/integration/functional/test/gossipsub/rpc_inspector/utils.go +++ b/insecure/integration/functional/test/gossipsub/rpc_inspector/utils.go @@ -58,22 +58,6 @@ func withExpectedNotificationDissemination(expectedNumOfTotalNotif int, f onNoti } } -// mockDistributorReadyDoneAware mocks the Ready and Done methods of the distributor to return a channel that is already closed, -// so that the distributor is considered ready and done when the test needs. -func mockDistributorReadyDoneAware(d *mockp2p.GossipSubInspectorNotificationDistributor) { - d.On("Start", mockery.Anything).Return().Maybe() - d.On("Ready").Return(func() <-chan struct{} { - ch := make(chan struct{}) - close(ch) - return ch - }()).Maybe() - d.On("Done").Return(func() <-chan struct{} { - ch := make(chan struct{}) - close(ch) - return ch - }()).Maybe() -} - func meshTracerFixture(flowConfig *config.FlowConfig, idProvider module.IdentityProvider) *tracer.GossipSubMeshTracer { meshTracerCfg := &tracer.GossipSubMeshTracerConfig{ Logger: unittest.Logger(), diff --git a/insecure/integration/functional/test/gossipsub/rpc_inspector/validation_inspector_test.go b/insecure/integration/functional/test/gossipsub/rpc_inspector/validation_inspector_test.go index 50434ad6de8..6ca319fae5f 100644 --- a/insecure/integration/functional/test/gossipsub/rpc_inspector/validation_inspector_test.go +++ b/insecure/integration/functional/test/gossipsub/rpc_inspector/validation_inspector_test.go @@ -22,11 +22,13 @@ import ( "github.com/onflow/flow-go/module/irrecoverable" "github.com/onflow/flow-go/module/metrics" "github.com/onflow/flow-go/module/mock" + "github.com/onflow/flow-go/network" "github.com/onflow/flow-go/network/channels" "github.com/onflow/flow-go/network/p2p" "github.com/onflow/flow-go/network/p2p/inspector/validation" p2pmsg "github.com/onflow/flow-go/network/p2p/message" mockp2p "github.com/onflow/flow-go/network/p2p/mock" + "github.com/onflow/flow-go/network/p2p/scoring" p2ptest "github.com/onflow/flow-go/network/p2p/test" "github.com/onflow/flow-go/utils/unittest" ) @@ -38,7 +40,6 @@ import ( // - malformed topic: topic is malformed in some way // - invalid spork ID: spork ID prepended to topic and current spork ID do not match func TestValidationInspector_InvalidTopicId_Detection(t *testing.T) { - t.Parallel() role := flow.RoleConsensus sporkID := unittest.IdentifierFixture() flowConfig, err := config.DefaultConfig() @@ -61,6 +62,7 @@ func TestValidationInspector_InvalidTopicId_Detection(t *testing.T) { count.Inc() notification, ok := args[0].(*p2p.InvCtrlMsgNotif) require.True(t, ok) + require.Equal(t, notification.TopicType, p2p.CtrlMsgNonClusterTopicType, "IsClusterPrefixed is expected to be false, no RPC with cluster prefixed topic sent in this test") require.Equal(t, spammer.SpammerNode.ID(), notification.PeerID) require.True(t, channels.IsInvalidTopicErr(notification.Error)) switch notification.MsgType { @@ -86,37 +88,45 @@ func TestValidationInspector_InvalidTopicId_Detection(t *testing.T) { signalerCtx := irrecoverable.NewMockSignalerContext(t, ctx) distributor := mockp2p.NewGossipSubInspectorNotificationDistributor(t) - mockDistributorReadyDoneAware(distributor) + p2ptest.MockInspectorNotificationDistributorReadyDoneAware(distributor) withExpectedNotificationDissemination(expectedNumOfTotalNotif, inspectDisseminatedNotifyFunc)(distributor, spammer) meshTracer := meshTracerFixture(flowConfig, idProvider) - - validationInspector, err := validation.NewControlMsgValidationInspector(signalerCtx, unittest.Logger(), sporkID, &inspectorConfig, distributor, metrics.NewNoopCollector(), metrics.NewNoopCollector(), idProvider, metrics.NewNoopCollector(), meshTracer) + topicProvider := newMockUpdatableTopicProvider() + validationInspector, err := validation.NewControlMsgValidationInspector(&validation.InspectorParams{ + Logger: unittest.Logger(), + SporkID: sporkID, + Config: &inspectorConfig, + Distributor: distributor, + IdProvider: idProvider, + HeroCacheMetricsFactory: metrics.NewNoopHeroCacheMetricsFactory(), + InspectorMetrics: metrics.NewNoopCollector(), + RpcTracker: meshTracer, + NetworkingType: network.PrivateNetwork, + TopicOracle: func() p2p.TopicProvider { + return topicProvider + }, + }) require.NoError(t, err) corruptInspectorFunc := corruptlibp2p.CorruptInspectorFunc(validationInspector) - victimNode, victimIdentity := p2ptest.NodeFixture( - t, + victimNode, victimIdentity := p2ptest.NodeFixture(t, sporkID, t.Name(), idProvider, p2ptest.WithRole(role), p2ptest.WithGossipSubTracer(meshTracer), - internal.WithCorruptGossipSub(corruptlibp2p.CorruptGossipSubFactory(), - corruptlibp2p.CorruptGossipSubConfigFactoryWithInspector(corruptInspectorFunc)), - ) + internal.WithCorruptGossipSub(corruptlibp2p.CorruptGossipSubFactory(), corruptlibp2p.CorruptGossipSubConfigFactoryWithInspector(corruptInspectorFunc))) idProvider.On("ByPeerID", victimNode.ID()).Return(&victimIdentity, true).Maybe() idProvider.On("ByPeerID", spammer.SpammerNode.ID()).Return(&spammer.SpammerId, true).Maybe() // create unknown topic - unknownTopic := channels.Topic(fmt.Sprintf("%s/%s", corruptlibp2p.GossipSubTopicIdFixture(), sporkID)) + unknownTopic := channels.Topic(fmt.Sprintf("%s/%s", p2ptest.GossipSubTopicIdFixture(), sporkID)) // create malformed topic malformedTopic := channels.Topic(unittest.RandomStringFixture(t, 100)) // a topics spork ID is considered invalid if it does not match the current spork ID invalidSporkIDTopic := channels.Topic(fmt.Sprintf("%s/%s", channels.PushBlocks, unittest.IdentifierFixture())) // set topic oracle to return list with all topics to avoid hasSubscription failures and force topic validation - require.NoError(t, validationInspector.SetTopicOracle(func() []string { - return []string{unknownTopic.String(), malformedTopic.String(), invalidSporkIDTopic.String()} - })) + topicProvider.UpdateTopics([]string{unknownTopic.String(), malformedTopic.String(), invalidSporkIDTopic.String()}) validationInspector.Start(signalerCtx) nodes := []p2p.LibP2PNode{victimNode, spammer.SpammerNode} @@ -125,17 +135,17 @@ func TestValidationInspector_InvalidTopicId_Detection(t *testing.T) { defer stopComponents(t, cancel, nodes, validationInspector) // prepare to spam - generate control messages - graftCtlMsgsWithUnknownTopic := spammer.GenerateCtlMessages(int(controlMessageCount), corruptlibp2p.WithGraft(messageCount, unknownTopic.String())) - graftCtlMsgsWithMalformedTopic := spammer.GenerateCtlMessages(int(controlMessageCount), corruptlibp2p.WithGraft(messageCount, malformedTopic.String())) - graftCtlMsgsInvalidSporkIDTopic := spammer.GenerateCtlMessages(int(controlMessageCount), corruptlibp2p.WithGraft(messageCount, invalidSporkIDTopic.String())) + graftCtlMsgsWithUnknownTopic := spammer.GenerateCtlMessages(int(controlMessageCount), p2ptest.WithGraft(messageCount, unknownTopic.String())) + graftCtlMsgsWithMalformedTopic := spammer.GenerateCtlMessages(int(controlMessageCount), p2ptest.WithGraft(messageCount, malformedTopic.String())) + graftCtlMsgsInvalidSporkIDTopic := spammer.GenerateCtlMessages(int(controlMessageCount), p2ptest.WithGraft(messageCount, invalidSporkIDTopic.String())) - pruneCtlMsgsWithUnknownTopic := spammer.GenerateCtlMessages(int(controlMessageCount), corruptlibp2p.WithPrune(messageCount, unknownTopic.String())) - pruneCtlMsgsWithMalformedTopic := spammer.GenerateCtlMessages(int(controlMessageCount), corruptlibp2p.WithPrune(messageCount, malformedTopic.String())) - pruneCtlMsgsInvalidSporkIDTopic := spammer.GenerateCtlMessages(int(controlMessageCount), corruptlibp2p.WithPrune(messageCount, invalidSporkIDTopic.String())) + pruneCtlMsgsWithUnknownTopic := spammer.GenerateCtlMessages(int(controlMessageCount), p2ptest.WithPrune(messageCount, unknownTopic.String())) + pruneCtlMsgsWithMalformedTopic := spammer.GenerateCtlMessages(int(controlMessageCount), p2ptest.WithPrune(messageCount, malformedTopic.String())) + pruneCtlMsgsInvalidSporkIDTopic := spammer.GenerateCtlMessages(int(controlMessageCount), p2ptest.WithPrune(messageCount, invalidSporkIDTopic.String())) - iHaveCtlMsgsWithUnknownTopic := spammer.GenerateCtlMessages(int(controlMessageCount), corruptlibp2p.WithIHave(messageCount, 1000, unknownTopic.String())) - iHaveCtlMsgsWithMalformedTopic := spammer.GenerateCtlMessages(int(controlMessageCount), corruptlibp2p.WithIHave(messageCount, 1000, malformedTopic.String())) - iHaveCtlMsgsInvalidSporkIDTopic := spammer.GenerateCtlMessages(int(controlMessageCount), corruptlibp2p.WithIHave(messageCount, 1000, invalidSporkIDTopic.String())) + iHaveCtlMsgsWithUnknownTopic := spammer.GenerateCtlMessages(int(controlMessageCount), p2ptest.WithIHave(messageCount, 1000, unknownTopic.String())) + iHaveCtlMsgsWithMalformedTopic := spammer.GenerateCtlMessages(int(controlMessageCount), p2ptest.WithIHave(messageCount, 1000, malformedTopic.String())) + iHaveCtlMsgsInvalidSporkIDTopic := spammer.GenerateCtlMessages(int(controlMessageCount), p2ptest.WithIHave(messageCount, 1000, invalidSporkIDTopic.String())) // spam the victim peer with invalid graft messages spammer.SpamControlMessage(t, victimNode, graftCtlMsgsWithUnknownTopic) @@ -164,7 +174,6 @@ func TestValidationInspector_InvalidTopicId_Detection(t *testing.T) { // TestValidationInspector_DuplicateTopicId_Detection ensures that when an RPC control message contains a duplicate topic ID an invalid control message // notification is disseminated with the expected error. func TestValidationInspector_DuplicateTopicId_Detection(t *testing.T) { - t.Parallel() role := flow.RoleConsensus sporkID := unittest.IdentifierFixture() flowConfig, err := config.DefaultConfig() @@ -187,6 +196,7 @@ func TestValidationInspector_DuplicateTopicId_Detection(t *testing.T) { count.Inc() notification, ok := args[0].(*p2p.InvCtrlMsgNotif) require.True(t, ok) + require.Equal(t, notification.TopicType, p2p.CtrlMsgNonClusterTopicType, "IsClusterPrefixed is expected to be false, no RPC with cluster prefixed topic sent in this test") require.True(t, validation.IsDuplicateTopicErr(notification.Error)) require.Equal(t, spammer.SpammerNode.ID(), notification.PeerID) switch notification.MsgType { @@ -213,32 +223,41 @@ func TestValidationInspector_DuplicateTopicId_Detection(t *testing.T) { signalerCtx := irrecoverable.NewMockSignalerContext(t, ctx) distributor := mockp2p.NewGossipSubInspectorNotificationDistributor(t) - mockDistributorReadyDoneAware(distributor) + p2ptest.MockInspectorNotificationDistributorReadyDoneAware(distributor) withExpectedNotificationDissemination(expectedNumOfTotalNotif, inspectDisseminatedNotifyFunc)(distributor, spammer) meshTracer := meshTracerFixture(flowConfig, idProvider) - - validationInspector, err := validation.NewControlMsgValidationInspector(signalerCtx, unittest.Logger(), sporkID, &inspectorConfig, distributor, metrics.NewNoopCollector(), metrics.NewNoopCollector(), idProvider, metrics.NewNoopCollector(), meshTracer) + topicProvider := newMockUpdatableTopicProvider() + validationInspector, err := validation.NewControlMsgValidationInspector(&validation.InspectorParams{ + Logger: unittest.Logger(), + SporkID: sporkID, + Config: &inspectorConfig, + Distributor: distributor, + IdProvider: idProvider, + HeroCacheMetricsFactory: metrics.NewNoopHeroCacheMetricsFactory(), + InspectorMetrics: metrics.NewNoopCollector(), + RpcTracker: meshTracer, + NetworkingType: network.PrivateNetwork, + TopicOracle: func() p2p.TopicProvider { + return topicProvider + }, + }) require.NoError(t, err) + corruptInspectorFunc := corruptlibp2p.CorruptInspectorFunc(validationInspector) - victimNode, victimIdentity := p2ptest.NodeFixture( - t, + victimNode, victimIdentity := p2ptest.NodeFixture(t, sporkID, t.Name(), idProvider, p2ptest.WithRole(role), p2ptest.WithGossipSubTracer(meshTracer), - internal.WithCorruptGossipSub(corruptlibp2p.CorruptGossipSubFactory(), - corruptlibp2p.CorruptGossipSubConfigFactoryWithInspector(corruptInspectorFunc)), - ) + internal.WithCorruptGossipSub(corruptlibp2p.CorruptGossipSubFactory(), corruptlibp2p.CorruptGossipSubConfigFactoryWithInspector(corruptInspectorFunc))) idProvider.On("ByPeerID", victimNode.ID()).Return(&victimIdentity, true).Maybe() idProvider.On("ByPeerID", spammer.SpammerNode.ID()).Return(&spammer.SpammerId, true).Maybe() // a topics spork ID is considered invalid if it does not match the current spork ID duplicateTopic := channels.Topic(fmt.Sprintf("%s/%s", channels.PushBlocks, sporkID)) // set topic oracle to return list with all topics to avoid hasSubscription failures and force topic validation - require.NoError(t, validationInspector.SetTopicOracle(func() []string { - return []string{duplicateTopic.String()} - })) + topicProvider.UpdateTopics([]string{duplicateTopic.String()}) validationInspector.Start(signalerCtx) nodes := []p2p.LibP2PNode{victimNode, spammer.SpammerNode} @@ -247,9 +266,9 @@ func TestValidationInspector_DuplicateTopicId_Detection(t *testing.T) { defer stopComponents(t, cancel, nodes, validationInspector) // prepare to spam - generate control messages - graftCtlMsgsDuplicateTopic := spammer.GenerateCtlMessages(int(controlMessageCount), corruptlibp2p.WithGraft(messageCount, duplicateTopic.String())) - ihaveCtlMsgsDuplicateTopic := spammer.GenerateCtlMessages(int(controlMessageCount), corruptlibp2p.WithIHave(messageCount, 10, duplicateTopic.String())) - pruneCtlMsgsDuplicateTopic := spammer.GenerateCtlMessages(int(controlMessageCount), corruptlibp2p.WithPrune(messageCount, duplicateTopic.String())) + graftCtlMsgsDuplicateTopic := spammer.GenerateCtlMessages(int(controlMessageCount), p2ptest.WithGraft(messageCount, duplicateTopic.String())) + ihaveCtlMsgsDuplicateTopic := spammer.GenerateCtlMessages(int(controlMessageCount), p2ptest.WithIHave(messageCount, 10, duplicateTopic.String())) + pruneCtlMsgsDuplicateTopic := spammer.GenerateCtlMessages(int(controlMessageCount), p2ptest.WithPrune(messageCount, duplicateTopic.String())) // start spamming the victim peer spammer.SpamControlMessage(t, victimNode, graftCtlMsgsDuplicateTopic) @@ -266,7 +285,6 @@ func TestValidationInspector_DuplicateTopicId_Detection(t *testing.T) { // TestValidationInspector_IHaveDuplicateMessageId_Detection ensures that when an RPC iHave control message contains a duplicate message ID for a single topic // notification is disseminated with the expected error. func TestValidationInspector_IHaveDuplicateMessageId_Detection(t *testing.T) { - t.Parallel() role := flow.RoleConsensus sporkID := unittest.IdentifierFixture() flowConfig, err := config.DefaultConfig() @@ -284,9 +302,12 @@ func TestValidationInspector_IHaveDuplicateMessageId_Detection(t *testing.T) { count.Inc() notification, ok := args[0].(*p2p.InvCtrlMsgNotif) require.True(t, ok) + require.Equal(t, notification.TopicType, p2p.CtrlMsgNonClusterTopicType, "IsClusterPrefixed is expected to be false, no RPC with cluster prefixed topic sent in this test") require.True(t, validation.IsDuplicateTopicErr(notification.Error)) require.Equal(t, spammer.SpammerNode.ID(), notification.PeerID) - require.True(t, notification.MsgType == p2pmsg.CtrlMsgIHave, fmt.Sprintf("unexpected control message type %s error: %s", notification.MsgType, notification.Error)) + require.True(t, + notification.MsgType == p2pmsg.CtrlMsgIHave, + fmt.Sprintf("unexpected control message type %s error: %s", notification.MsgType, notification.Error)) invIHaveNotifCount.Inc() if count.Load() == int64(expectedNumOfTotalNotif) { @@ -302,23 +323,35 @@ func TestValidationInspector_IHaveDuplicateMessageId_Detection(t *testing.T) { signalerCtx := irrecoverable.NewMockSignalerContext(t, ctx) distributor := mockp2p.NewGossipSubInspectorNotificationDistributor(t) - mockDistributorReadyDoneAware(distributor) + p2ptest.MockInspectorNotificationDistributorReadyDoneAware(distributor) withExpectedNotificationDissemination(expectedNumOfTotalNotif, inspectDisseminatedNotifyFunc)(distributor, spammer) meshTracer := meshTracerFixture(flowConfig, idProvider) - validationInspector, err := validation.NewControlMsgValidationInspector(signalerCtx, unittest.Logger(), sporkID, &inspectorConfig, distributor, metrics.NewNoopCollector(), metrics.NewNoopCollector(), idProvider, metrics.NewNoopCollector(), meshTracer) + topicProvider := newMockUpdatableTopicProvider() + validationInspector, err := validation.NewControlMsgValidationInspector(&validation.InspectorParams{ + Logger: unittest.Logger(), + SporkID: sporkID, + Config: &inspectorConfig, + Distributor: distributor, + IdProvider: idProvider, + HeroCacheMetricsFactory: metrics.NewNoopHeroCacheMetricsFactory(), + InspectorMetrics: metrics.NewNoopCollector(), + RpcTracker: meshTracer, + NetworkingType: network.PrivateNetwork, + TopicOracle: func() p2p.TopicProvider { + return topicProvider + }, + }) require.NoError(t, err) + corruptInspectorFunc := corruptlibp2p.CorruptInspectorFunc(validationInspector) - victimNode, victimIdentity := p2ptest.NodeFixture( - t, + victimNode, victimIdentity := p2ptest.NodeFixture(t, sporkID, t.Name(), idProvider, p2ptest.WithRole(role), p2ptest.WithGossipSubTracer(meshTracer), - internal.WithCorruptGossipSub(corruptlibp2p.CorruptGossipSubFactory(), - corruptlibp2p.CorruptGossipSubConfigFactoryWithInspector(corruptInspectorFunc)), - ) + internal.WithCorruptGossipSub(corruptlibp2p.CorruptGossipSubFactory(), corruptlibp2p.CorruptGossipSubConfigFactoryWithInspector(corruptInspectorFunc))) idProvider.On("ByPeerID", victimNode.ID()).Return(&victimIdentity, true).Maybe() idProvider.On("ByPeerID", spammer.SpammerNode.ID()).Return(&spammer.SpammerId, true).Maybe() @@ -326,9 +359,7 @@ func TestValidationInspector_IHaveDuplicateMessageId_Detection(t *testing.T) { pushBlocks := channels.Topic(fmt.Sprintf("%s/%s", channels.PushBlocks, sporkID)) reqChunks := channels.Topic(fmt.Sprintf("%s/%s", channels.RequestChunks, sporkID)) // set topic oracle to return list with all topics to avoid hasSubscription failures and force topic validation - require.NoError(t, validationInspector.SetTopicOracle(func() []string { - return []string{pushBlocks.String(), reqChunks.String()} - })) + topicProvider.UpdateTopics([]string{pushBlocks.String(), reqChunks.String()}) validationInspector.Start(signalerCtx) nodes := []p2p.LibP2PNode{victimNode, spammer.SpammerNode} @@ -337,8 +368,8 @@ func TestValidationInspector_IHaveDuplicateMessageId_Detection(t *testing.T) { defer stopComponents(t, cancel, nodes, validationInspector) // generate 2 control messages with iHaves for different topics - ihaveCtlMsgs1 := spammer.GenerateCtlMessages(1, corruptlibp2p.WithIHave(1, 1, pushBlocks.String())) - ihaveCtlMsgs2 := spammer.GenerateCtlMessages(1, corruptlibp2p.WithIHave(1, 1, reqChunks.String())) + ihaveCtlMsgs1 := spammer.GenerateCtlMessages(1, p2ptest.WithIHave(1, 1, pushBlocks.String())) + ihaveCtlMsgs2 := spammer.GenerateCtlMessages(1, p2ptest.WithIHave(1, 1, reqChunks.String())) // duplicate message ids for a single topic is invalid and will cause an error ihaveCtlMsgs1[0].Ihave[0].MessageIDs = append(ihaveCtlMsgs1[0].Ihave[0].MessageIDs, ihaveCtlMsgs1[0].Ihave[0].MessageIDs[0]) @@ -357,7 +388,6 @@ func TestValidationInspector_IHaveDuplicateMessageId_Detection(t *testing.T) { // TestValidationInspector_UnknownClusterId_Detection ensures that when an RPC control message contains a topic with an unknown cluster ID an invalid control message // notification is disseminated with the expected error. func TestValidationInspector_UnknownClusterId_Detection(t *testing.T) { - t.Parallel() role := flow.RoleConsensus sporkID := unittest.IdentifierFixture() flowConfig, err := config.DefaultConfig() @@ -383,6 +413,7 @@ func TestValidationInspector_UnknownClusterId_Detection(t *testing.T) { count.Inc() notification, ok := args[0].(*p2p.InvCtrlMsgNotif) require.True(t, ok) + require.Equal(t, notification.TopicType, p2p.CtrlMsgTopicTypeClusterPrefixed) require.Equal(t, spammer.SpammerNode.ID(), notification.PeerID) require.True(t, channels.IsUnknownClusterIDErr(notification.Error)) switch notification.MsgType { @@ -406,32 +437,42 @@ func TestValidationInspector_UnknownClusterId_Detection(t *testing.T) { signalerCtx := irrecoverable.NewMockSignalerContext(t, ctx) distributor := mockp2p.NewGossipSubInspectorNotificationDistributor(t) - mockDistributorReadyDoneAware(distributor) + p2ptest.MockInspectorNotificationDistributorReadyDoneAware(distributor) withExpectedNotificationDissemination(expectedNumOfTotalNotif, inspectDisseminatedNotifyFunc)(distributor, spammer) meshTracer := meshTracerFixture(flowConfig, idProvider) - - validationInspector, err := validation.NewControlMsgValidationInspector(signalerCtx, unittest.Logger(), sporkID, &inspectorConfig, distributor, metrics.NewNoopCollector(), metrics.NewNoopCollector(), idProvider, metrics.NewNoopCollector(), meshTracer) + topicProvider := newMockUpdatableTopicProvider() + validationInspector, err := validation.NewControlMsgValidationInspector(&validation.InspectorParams{ + Logger: unittest.Logger(), + SporkID: sporkID, + Config: &inspectorConfig, + Distributor: distributor, + IdProvider: idProvider, + HeroCacheMetricsFactory: metrics.NewNoopHeroCacheMetricsFactory(), + InspectorMetrics: metrics.NewNoopCollector(), + RpcTracker: meshTracer, + NetworkingType: network.PrivateNetwork, + TopicOracle: func() p2p.TopicProvider { + return topicProvider + }, + }) require.NoError(t, err) + corruptInspectorFunc := corruptlibp2p.CorruptInspectorFunc(validationInspector) - victimNode, victimIdentity := p2ptest.NodeFixture( - t, + victimNode, victimIdentity := p2ptest.NodeFixture(t, sporkID, t.Name(), idProvider, p2ptest.WithRole(role), p2ptest.WithGossipSubTracer(meshTracer), - internal.WithCorruptGossipSub(corruptlibp2p.CorruptGossipSubFactory(), - corruptlibp2p.CorruptGossipSubConfigFactoryWithInspector(corruptInspectorFunc)), - ) + internal.WithCorruptGossipSub(corruptlibp2p.CorruptGossipSubFactory(), corruptlibp2p.CorruptGossipSubConfigFactoryWithInspector(corruptInspectorFunc))) idProvider.On("ByPeerID", victimNode.ID()).Return(&victimIdentity, true).Maybe() - idProvider.On("ByPeerID", spammer.SpammerNode.ID()).Return(&spammer.SpammerId, true).Times(3) + idProvider.On("ByPeerID", spammer.SpammerNode.ID()).Return(&spammer.SpammerId, true).Times(4) // setup cluster prefixed topic with an invalid cluster ID unknownClusterID := channels.Topic(channels.SyncCluster("unknown-cluster-ID")) // set topic oracle to return list with all topics to avoid hasSubscription failures and force topic validation - require.NoError(t, validationInspector.SetTopicOracle(func() []string { - return []string{unknownClusterID.String()} - })) + topicProvider.UpdateTopics([]string{unknownClusterID.String()}) + // consume cluster ID update so that active cluster IDs set validationInspector.ActiveClustersChanged(flow.ChainIDList{"known-cluster-id"}) @@ -442,8 +483,8 @@ func TestValidationInspector_UnknownClusterId_Detection(t *testing.T) { defer stopComponents(t, cancel, nodes, validationInspector) // prepare to spam - generate control messages - graftCtlMsgsDuplicateTopic := spammer.GenerateCtlMessages(int(controlMessageCount), corruptlibp2p.WithGraft(messageCount, unknownClusterID.String())) - pruneCtlMsgsDuplicateTopic := spammer.GenerateCtlMessages(int(controlMessageCount), corruptlibp2p.WithPrune(messageCount, unknownClusterID.String())) + graftCtlMsgsDuplicateTopic := spammer.GenerateCtlMessages(int(controlMessageCount), p2ptest.WithGraft(messageCount, unknownClusterID.String())) + pruneCtlMsgsDuplicateTopic := spammer.GenerateCtlMessages(int(controlMessageCount), p2ptest.WithPrune(messageCount, unknownClusterID.String())) // start spamming the victim peer spammer.SpamControlMessage(t, victimNode, graftCtlMsgsDuplicateTopic) @@ -459,7 +500,6 @@ func TestValidationInspector_UnknownClusterId_Detection(t *testing.T) { // cluster prefix hard threshold when the active cluster IDs not set and an invalid control message notification is disseminated with the expected error. // This test involves Graft control messages. func TestValidationInspector_ActiveClusterIdsNotSet_Graft_Detection(t *testing.T) { - t.Parallel() role := flow.RoleConsensus sporkID := unittest.IdentifierFixture() flowConfig, err := config.DefaultConfig() @@ -485,36 +525,49 @@ func TestValidationInspector_ActiveClusterIdsNotSet_Graft_Detection(t *testing.T }) logger := zerolog.New(os.Stdout).Level(zerolog.WarnLevel).Hook(hook) + inspectorIdProvider := mock.NewIdentityProvider(t) idProvider := mock.NewIdentityProvider(t) spammer := corruptlibp2p.NewGossipSubRouterSpammer(t, sporkID, role, idProvider) ctx, cancel := context.WithCancel(context.Background()) signalerCtx := irrecoverable.NewMockSignalerContext(t, ctx) distributor := mockp2p.NewGossipSubInspectorNotificationDistributor(t) - mockDistributorReadyDoneAware(distributor) + p2ptest.MockInspectorNotificationDistributorReadyDoneAware(distributor) meshTracer := meshTracerFixture(flowConfig, idProvider) - validationInspector, err := validation.NewControlMsgValidationInspector(signalerCtx, logger, sporkID, &inspectorConfig, distributor, metrics.NewNoopCollector(), metrics.NewNoopCollector(), idProvider, metrics.NewNoopCollector(), meshTracer) + topicProvider := newMockUpdatableTopicProvider() + validationInspector, err := validation.NewControlMsgValidationInspector(&validation.InspectorParams{ + Logger: logger, + SporkID: sporkID, + Config: &inspectorConfig, + Distributor: distributor, + IdProvider: inspectorIdProvider, + HeroCacheMetricsFactory: metrics.NewNoopHeroCacheMetricsFactory(), + InspectorMetrics: metrics.NewNoopCollector(), + RpcTracker: meshTracer, + NetworkingType: network.PrivateNetwork, + TopicOracle: func() p2p.TopicProvider { + return topicProvider + }, + }) require.NoError(t, err) + corruptInspectorFunc := corruptlibp2p.CorruptInspectorFunc(validationInspector) - victimNode, victimIdentity := p2ptest.NodeFixture( - t, + victimNode, victimIdentity := p2ptest.NodeFixture(t, sporkID, t.Name(), idProvider, p2ptest.WithRole(role), p2ptest.WithGossipSubTracer(meshTracer), - internal.WithCorruptGossipSub(corruptlibp2p.CorruptGossipSubFactory(), - corruptlibp2p.CorruptGossipSubConfigFactoryWithInspector(corruptInspectorFunc)), - ) + internal.WithCorruptGossipSub(corruptlibp2p.CorruptGossipSubFactory(), corruptlibp2p.CorruptGossipSubConfigFactoryWithInspector(corruptInspectorFunc))) idProvider.On("ByPeerID", victimNode.ID()).Return(&victimIdentity, true).Maybe() - idProvider.On("ByPeerID", spammer.SpammerNode.ID()).Return(&spammer.SpammerId, true).Times(int(controlMessageCount + 1)) - + idProvider.On("ByPeerID", spammer.SpammerNode.ID()).Return(&spammer.SpammerId, true).Maybe() + // we expect controlMessageCount plus 1 extra call, this is due to messages that are exchanged when the nodes startup + inspectorIdProvider.On("ByPeerID", spammer.SpammerNode.ID()).Return(&spammer.SpammerId, true).Times(int(controlMessageCount + 1)) clusterPrefixedTopic := randomClusterPrefixedTopic() + // set topic oracle to return list with all topics to avoid hasSubscription failures and force topic validation - require.NoError(t, validationInspector.SetTopicOracle(func() []string { - return []string{clusterPrefixedTopic.String()} - })) + topicProvider.UpdateTopics([]string{clusterPrefixedTopic.String()}) // we deliberately avoid setting the cluster IDs so that we eventually receive errors after we have exceeded the allowed cluster // prefixed hard threshold @@ -525,9 +578,7 @@ func TestValidationInspector_ActiveClusterIdsNotSet_Graft_Detection(t *testing.T defer stopComponents(t, cancel, nodes, validationInspector) // generate multiple control messages with GRAFT's for randomly generated // cluster prefixed channels, this ensures we do not encounter duplicate topic ID errors - ctlMsgs := spammer.GenerateCtlMessages(int(controlMessageCount), - corruptlibp2p.WithGraft(1, clusterPrefixedTopic.String()), - ) + ctlMsgs := spammer.GenerateCtlMessages(int(controlMessageCount), p2ptest.WithGraft(1, clusterPrefixedTopic.String())) // start spamming the victim peer spammer.SpamControlMessage(t, victimNode, ctlMsgs) @@ -538,7 +589,6 @@ func TestValidationInspector_ActiveClusterIdsNotSet_Graft_Detection(t *testing.T // cluster prefix hard threshold when the active cluster IDs not set and an invalid control message notification is disseminated with the expected error. // This test involves Prune control messages. func TestValidationInspector_ActiveClusterIdsNotSet_Prune_Detection(t *testing.T) { - t.Parallel() role := flow.RoleConsensus sporkID := unittest.IdentifierFixture() flowConfig, err := config.DefaultConfig() @@ -569,30 +619,41 @@ func TestValidationInspector_ActiveClusterIdsNotSet_Prune_Detection(t *testing.T signalerCtx := irrecoverable.NewMockSignalerContext(t, ctx) distributor := mockp2p.NewGossipSubInspectorNotificationDistributor(t) - mockDistributorReadyDoneAware(distributor) + p2ptest.MockInspectorNotificationDistributorReadyDoneAware(distributor) meshTracer := meshTracerFixture(flowConfig, idProvider) - - validationInspector, err := validation.NewControlMsgValidationInspector(signalerCtx, logger, sporkID, &inspectorConfig, distributor, metrics.NewNoopCollector(), metrics.NewNoopCollector(), idProvider, metrics.NewNoopCollector(), meshTracer) + topicProvider := newMockUpdatableTopicProvider() + inspectorIdProvider := mock.NewIdentityProvider(t) + validationInspector, err := validation.NewControlMsgValidationInspector(&validation.InspectorParams{ + Logger: logger, + SporkID: sporkID, + Config: &inspectorConfig, + Distributor: distributor, + IdProvider: inspectorIdProvider, + HeroCacheMetricsFactory: metrics.NewNoopHeroCacheMetricsFactory(), + InspectorMetrics: metrics.NewNoopCollector(), + RpcTracker: meshTracer, + NetworkingType: network.PrivateNetwork, + TopicOracle: func() p2p.TopicProvider { + return topicProvider + }, + }) require.NoError(t, err) corruptInspectorFunc := corruptlibp2p.CorruptInspectorFunc(validationInspector) - victimNode, victimIdentity := p2ptest.NodeFixture( - t, + victimNode, victimIdentity := p2ptest.NodeFixture(t, sporkID, t.Name(), idProvider, p2ptest.WithRole(role), p2ptest.WithGossipSubTracer(meshTracer), - internal.WithCorruptGossipSub(corruptlibp2p.CorruptGossipSubFactory(), - corruptlibp2p.CorruptGossipSubConfigFactoryWithInspector(corruptInspectorFunc)), - ) + internal.WithCorruptGossipSub(corruptlibp2p.CorruptGossipSubFactory(), corruptlibp2p.CorruptGossipSubConfigFactoryWithInspector(corruptInspectorFunc))) idProvider.On("ByPeerID", victimNode.ID()).Return(&victimIdentity, true).Maybe() - idProvider.On("ByPeerID", spammer.SpammerNode.ID()).Return(&spammer.SpammerId, true).Times(int(controlMessageCount + 1)) + idProvider.On("ByPeerID", spammer.SpammerNode.ID()).Return(&spammer.SpammerId, true).Maybe() + // we expect controlMessageCount plus 1 extra call, this is due to messages that are exchanged when the nodes startup + inspectorIdProvider.On("ByPeerID", spammer.SpammerNode.ID()).Return(&spammer.SpammerId, true).Times(int(controlMessageCount + 1)) clusterPrefixedTopic := randomClusterPrefixedTopic() // set topic oracle to return list with all topics to avoid hasSubscription failures and force topic validation - require.NoError(t, validationInspector.SetTopicOracle(func() []string { - return []string{clusterPrefixedTopic.String()} - })) + topicProvider.UpdateTopics([]string{clusterPrefixedTopic.String()}) // we deliberately avoid setting the cluster IDs so that we eventually receive errors after we have exceeded the allowed cluster // prefixed hard threshold @@ -603,9 +664,7 @@ func TestValidationInspector_ActiveClusterIdsNotSet_Prune_Detection(t *testing.T defer stopComponents(t, cancel, nodes, validationInspector) // generate multiple control messages with GRAFT's for randomly generated // cluster prefixed channels, this ensures we do not encounter duplicate topic ID errors - ctlMsgs := spammer.GenerateCtlMessages(int(controlMessageCount), - corruptlibp2p.WithPrune(1, clusterPrefixedTopic.String()), - ) + ctlMsgs := spammer.GenerateCtlMessages(int(controlMessageCount), p2ptest.WithPrune(1, clusterPrefixedTopic.String())) // start spamming the victim peer spammer.SpamControlMessage(t, victimNode, ctlMsgs) @@ -615,7 +674,6 @@ func TestValidationInspector_ActiveClusterIdsNotSet_Prune_Detection(t *testing.T // TestValidationInspector_UnstakedNode_Detection ensures that RPC control message inspector disseminates an invalid control message notification when an unstaked peer // sends a control message for a cluster prefixed topic. func TestValidationInspector_UnstakedNode_Detection(t *testing.T) { - t.Parallel() role := flow.RoleConsensus sporkID := unittest.IdentifierFixture() flowConfig, err := config.DefaultConfig() @@ -652,24 +710,38 @@ func TestValidationInspector_UnstakedNode_Detection(t *testing.T) { signalerCtx := irrecoverable.NewMockSignalerContext(t, ctx) distributor := mockp2p.NewGossipSubInspectorNotificationDistributor(t) - mockDistributorReadyDoneAware(distributor) + p2ptest.MockInspectorNotificationDistributorReadyDoneAware(distributor) meshTracer := meshTracerFixture(flowConfig, idProvider) - validationInspector, err := validation.NewControlMsgValidationInspector(signalerCtx, logger, sporkID, &inspectorConfig, distributor, metrics.NewNoopCollector(), metrics.NewNoopCollector(), idProvider, metrics.NewNoopCollector(), meshTracer) + topicProvider := newMockUpdatableTopicProvider() + inspectorIdProvider := mock.NewIdentityProvider(t) + validationInspector, err := validation.NewControlMsgValidationInspector(&validation.InspectorParams{ + Logger: logger, + SporkID: sporkID, + Config: &inspectorConfig, + Distributor: distributor, + IdProvider: inspectorIdProvider, + HeroCacheMetricsFactory: metrics.NewNoopHeroCacheMetricsFactory(), + InspectorMetrics: metrics.NewNoopCollector(), + RpcTracker: meshTracer, + NetworkingType: network.PrivateNetwork, + TopicOracle: func() p2p.TopicProvider { + return topicProvider + }, + }) require.NoError(t, err) corruptInspectorFunc := corruptlibp2p.CorruptInspectorFunc(validationInspector) - victimNode, victimIdentity := p2ptest.NodeFixture( - t, + victimNode, victimIdentity := p2ptest.NodeFixture(t, sporkID, t.Name(), idProvider, p2ptest.WithRole(role), p2ptest.WithGossipSubTracer(meshTracer), - internal.WithCorruptGossipSub(corruptlibp2p.CorruptGossipSubFactory(), - corruptlibp2p.CorruptGossipSubConfigFactoryWithInspector(corruptInspectorFunc)), - ) + internal.WithCorruptGossipSub(corruptlibp2p.CorruptGossipSubFactory(), corruptlibp2p.CorruptGossipSubConfigFactoryWithInspector(corruptInspectorFunc))) idProvider.On("ByPeerID", victimNode.ID()).Return(&victimIdentity, true).Maybe() - idProvider.On("ByPeerID", spammer.SpammerNode.ID()).Return(nil, false).Times(3) + idProvider.On("ByPeerID", spammer.SpammerNode.ID()).Return(&spammer.SpammerId, true).Maybe() + // we expect 2 calls from notification inspection plus 1 extra call, this is due to messages that are exchanged when the nodes startup + inspectorIdProvider.On("ByPeerID", spammer.SpammerNode.ID()).Return(nil, false).Times(3) // setup cluster prefixed topic with an invalid cluster ID clusterID := flow.ChainID("known-cluster-id") @@ -678,9 +750,7 @@ func TestValidationInspector_UnstakedNode_Detection(t *testing.T) { validationInspector.ActiveClustersChanged(flow.ChainIDList{clusterID}) // set topic oracle to return list with all topics to avoid hasSubscription failures and force topic validation - require.NoError(t, validationInspector.SetTopicOracle(func() []string { - return []string{clusterIDTopic.String()} - })) + topicProvider.UpdateTopics([]string{clusterIDTopic.String()}) validationInspector.Start(signalerCtx) nodes := []p2p.LibP2PNode{victimNode, spammer.SpammerNode} @@ -689,8 +759,8 @@ func TestValidationInspector_UnstakedNode_Detection(t *testing.T) { defer stopComponents(t, cancel, nodes, validationInspector) // prepare to spam - generate control messages - graftCtlMsgsDuplicateTopic := spammer.GenerateCtlMessages(int(controlMessageCount), corruptlibp2p.WithGraft(messageCount, clusterIDTopic.String())) - pruneCtlMsgsDuplicateTopic := spammer.GenerateCtlMessages(int(controlMessageCount), corruptlibp2p.WithPrune(messageCount, clusterIDTopic.String())) + graftCtlMsgsDuplicateTopic := spammer.GenerateCtlMessages(int(controlMessageCount), p2ptest.WithGraft(messageCount, clusterIDTopic.String())) + pruneCtlMsgsDuplicateTopic := spammer.GenerateCtlMessages(int(controlMessageCount), p2ptest.WithPrune(messageCount, clusterIDTopic.String())) // start spamming the victim peer spammer.SpamControlMessage(t, victimNode, graftCtlMsgsDuplicateTopic) @@ -702,7 +772,6 @@ func TestValidationInspector_UnstakedNode_Detection(t *testing.T) { // TestValidationInspector_InspectIWants_CacheMissThreshold ensures that expected invalid control message notification is disseminated when the number of iWant message Ids // without a corresponding iHave message sent with the same message ID exceeds the configured cache miss threshold. func TestValidationInspector_InspectIWants_CacheMissThreshold(t *testing.T) { - t.Parallel() role := flow.RoleConsensus sporkID := unittest.IdentifierFixture() // create our RPC validation inspector @@ -710,7 +779,7 @@ func TestValidationInspector_InspectIWants_CacheMissThreshold(t *testing.T) { require.NoError(t, err) inspectorConfig := flowConfig.NetworkConfig.GossipSubConfig.GossipSubRPCInspectorsConfig.GossipSubRPCValidationInspectorConfigs // force all cache miss checks - inspectorConfig.IWantRPCInspectionConfig.CacheMissCheckSize = 0 + inspectorConfig.IWantRPCInspectionConfig.CacheMissCheckSize = 1 inspectorConfig.NumberOfWorkers = 1 inspectorConfig.IWantRPCInspectionConfig.CacheMissThreshold = .5 // set cache miss threshold to 50% messageCount := 1 @@ -722,8 +791,11 @@ func TestValidationInspector_InspectIWants_CacheMissThreshold(t *testing.T) { return func(args mockery.Arguments) { notification, ok := args[0].(*p2p.InvCtrlMsgNotif) require.True(t, ok) + require.Equal(t, notification.TopicType, p2p.CtrlMsgNonClusterTopicType, "IsClusterPrefixed is expected to be false, no RPC with cluster prefixed topic sent in this test") require.Equal(t, spammer.SpammerNode.ID(), notification.PeerID) - require.True(t, notification.MsgType == p2pmsg.CtrlMsgIWant, fmt.Sprintf("unexpected control message type %s error: %s", notification.MsgType, notification.Error)) + require.True(t, + notification.MsgType == p2pmsg.CtrlMsgIWant, + fmt.Sprintf("unexpected control message type %s error: %s", notification.MsgType, notification.Error)) require.True(t, validation.IsIWantCacheMissThresholdErr(notification.Error)) cacheMissThresholdNotifCount.Inc() @@ -740,40 +812,48 @@ func TestValidationInspector_InspectIWants_CacheMissThreshold(t *testing.T) { signalerCtx := irrecoverable.NewMockSignalerContext(t, ctx) distributor := mockp2p.NewGossipSubInspectorNotificationDistributor(t) - mockDistributorReadyDoneAware(distributor) + p2ptest.MockInspectorNotificationDistributorReadyDoneAware(distributor) withExpectedNotificationDissemination(1, inspectDisseminatedNotifyFunc)(distributor, spammer) meshTracer := meshTracerFixture(flowConfig, idProvider) - validationInspector, err := validation.NewControlMsgValidationInspector(signalerCtx, unittest.Logger(), sporkID, &inspectorConfig, distributor, metrics.NewNoopCollector(), metrics.NewNoopCollector(), idProvider, metrics.NewNoopCollector(), meshTracer) + topicProvider := newMockUpdatableTopicProvider() + validationInspector, err := validation.NewControlMsgValidationInspector(&validation.InspectorParams{ + Logger: unittest.Logger(), + SporkID: sporkID, + Config: &inspectorConfig, + Distributor: distributor, + IdProvider: idProvider, + HeroCacheMetricsFactory: metrics.NewNoopHeroCacheMetricsFactory(), + InspectorMetrics: metrics.NewNoopCollector(), + RpcTracker: meshTracer, + NetworkingType: network.PrivateNetwork, + TopicOracle: func() p2p.TopicProvider { + return topicProvider + }, + }) require.NoError(t, err) corruptInspectorFunc := corruptlibp2p.CorruptInspectorFunc(validationInspector) - victimNode, victimIdentity := p2ptest.NodeFixture( - t, + victimNode, victimIdentity := p2ptest.NodeFixture(t, sporkID, t.Name(), idProvider, p2ptest.WithRole(role), p2ptest.WithGossipSubTracer(meshTracer), - internal.WithCorruptGossipSub(corruptlibp2p.CorruptGossipSubFactory(), - corruptlibp2p.CorruptGossipSubConfigFactoryWithInspector(corruptInspectorFunc)), - ) + internal.WithCorruptGossipSub(corruptlibp2p.CorruptGossipSubFactory(), corruptlibp2p.CorruptGossipSubConfigFactoryWithInspector(corruptInspectorFunc))) idProvider.On("ByPeerID", victimNode.ID()).Return(&victimIdentity, true).Maybe() idProvider.On("ByPeerID", spammer.SpammerNode.ID()).Return(&spammer.SpammerId, true).Maybe() - messageIDs := corruptlibp2p.GossipSubMessageIdsFixture(10) + messageIDs := p2ptest.GossipSubMessageIdsFixture(10) // create control message with iWant that contains 5 message IDs that were not tracked - ctlWithIWants := spammer.GenerateCtlMessages(int(controlMessageCount), corruptlibp2p.WithIWant(messageCount, messageCount)) + ctlWithIWants := spammer.GenerateCtlMessages(int(controlMessageCount), p2ptest.WithIWant(messageCount, messageCount)) ctlWithIWants[0].Iwant[0].MessageIDs = messageIDs // the first 5 message ids will not have a corresponding iHave - topic := channels.PushBlocks // create control message with iHave that contains only the last 4 message IDs, this will force cache misses for the other 6 message IDs - ctlWithIhaves := spammer.GenerateCtlMessages(int(controlMessageCount), corruptlibp2p.WithIHave(messageCount, messageCount, topic.String())) + ctlWithIhaves := spammer.GenerateCtlMessages(int(controlMessageCount), p2ptest.WithIHave(messageCount, messageCount, topic.String())) ctlWithIhaves[0].Ihave[0].MessageIDs = messageIDs[6:] // set topic oracle - require.NoError(t, validationInspector.SetTopicOracle(func() []string { - return []string{topic.String()} - })) + topicProvider.UpdateTopics([]string{topic.String()}) validationInspector.Start(signalerCtx) nodes := []p2p.LibP2PNode{victimNode, spammer.SpammerNode} startNodesAndEnsureConnected(t, signalerCtx, nodes, sporkID) @@ -797,7 +877,6 @@ func TestValidationInspector_InspectIWants_CacheMissThreshold(t *testing.T) { // TestValidationInspector_InspectRpcPublishMessages ensures that expected invalid control message notification is disseminated when the number of errors encountered during // RPC publish message validation exceeds the configured error threshold. func TestValidationInspector_InspectRpcPublishMessages(t *testing.T) { - t.Parallel() role := flow.RoleConsensus sporkID := unittest.IdentifierFixture() // create our RPC validation inspector @@ -805,20 +884,60 @@ func TestValidationInspector_InspectRpcPublishMessages(t *testing.T) { require.NoError(t, err) inspectorConfig := flowConfig.NetworkConfig.GossipSubConfig.GossipSubRPCInspectorsConfig.GossipSubRPCValidationInspectorConfigs inspectorConfig.NumberOfWorkers = 1 - // after 5 errors encountered disseminate a notification - inspectorConfig.RpcMessageErrorThreshold = 4 + + idProvider := mock.NewIdentityProvider(t) + spammer := corruptlibp2p.NewGossipSubRouterSpammer(t, sporkID, role, idProvider) controlMessageCount := int64(1) notificationCount := atomic.NewUint64(0) done := make(chan struct{}) + validTopic := channels.Topic(fmt.Sprintf("%s/%s", channels.TestNetworkChannel.String(), sporkID)).String() + // create unknown topic + unknownTopic := channels.Topic(fmt.Sprintf("%s/%s", p2ptest.GossipSubTopicIdFixture(), sporkID)).String() + // create malformed topic + malformedTopic := channels.Topic(unittest.RandomStringFixture(t, 100)).String() + // a topics spork ID is considered invalid if it does not match the current spork ID + invalidSporkIDTopic := channels.Topic(fmt.Sprintf("%s/%s", channels.PushBlocks, unittest.IdentifierFixture())).String() + + // unknown peer ID + unknownPeerID := unittest.PeerIdFixture(t) + + // ejected identity + ejectedIdentityPeerID := unittest.PeerIdFixture(t) + ejectedIdentity := unittest.IdentityFixture() + ejectedIdentity.EpochParticipationStatus = flow.EpochParticipationStatusEjected + + // invalid messages this should force a notification to disseminate + invalidPublishMsgs := []*pb.Message{ + {Topic: &unknownTopic, From: []byte(spammer.SpammerNode.ID())}, + {Topic: &malformedTopic, From: []byte(spammer.SpammerNode.ID())}, + {Topic: &malformedTopic, From: []byte(spammer.SpammerNode.ID())}, + {Topic: &malformedTopic, From: []byte(spammer.SpammerNode.ID())}, + {Topic: &invalidSporkIDTopic, From: []byte(spammer.SpammerNode.ID())}, + {Topic: &validTopic, From: []byte(unknownPeerID)}, + {Topic: &validTopic, From: []byte(ejectedIdentityPeerID)}, + } + topic := channels.Topic(fmt.Sprintf("%s/%s", channels.PushBlocks, sporkID)) + // first create 4 valid messages + publishMsgs := unittest.GossipSubMessageFixtures(4, topic.String(), unittest.WithFrom(spammer.SpammerNode.ID())) + publishMsgs = append(publishMsgs, invalidPublishMsgs...) // ensure expected notifications are disseminated with expected error inspectDisseminatedNotifyFunc := func(spammer *corruptlibp2p.GossipSubRouterSpammer) func(args mockery.Arguments) { return func(args mockery.Arguments) { notification, ok := args[0].(*p2p.InvCtrlMsgNotif) require.True(t, ok) + require.Equal(t, notification.TopicType, p2p.CtrlMsgNonClusterTopicType, "IsClusterPrefixed is expected to be false, no RPC with cluster prefixed topic sent in this test") require.Equal(t, spammer.SpammerNode.ID(), notification.PeerID) - require.True(t, notification.MsgType == p2pmsg.RpcPublishMessage, fmt.Sprintf("unexpected control message type %s error: %s", notification.MsgType, notification.Error)) + require.True(t, + notification.MsgType == p2pmsg.RpcPublishMessage, + fmt.Sprintf("unexpected control message type %s error: %s", notification.MsgType, notification.Error)) require.True(t, validation.IsInvalidRpcPublishMessagesErr(notification.Error)) + require.Contains(t, + notification.Error.Error(), + fmt.Sprintf("%d error(s) encountered", len(invalidPublishMsgs)), + fmt.Sprintf("expected %d errors, an error for each invalid pubsub message", len(invalidPublishMsgs))) + require.Contains(t, notification.Error.Error(), fmt.Sprintf("received rpc publish message from unstaked peer: %s", unknownPeerID)) + require.Contains(t, notification.Error.Error(), fmt.Sprintf("received rpc publish message from ejected peer: %s", ejectedIdentityPeerID)) notificationCount.Inc() if notificationCount.Load() == 1 { close(done) @@ -826,55 +945,58 @@ func TestValidationInspector_InspectRpcPublishMessages(t *testing.T) { } } - idProvider := mock.NewIdentityProvider(t) - spammer := corruptlibp2p.NewGossipSubRouterSpammer(t, sporkID, role, idProvider) - ctx, cancel := context.WithCancel(context.Background()) signalerCtx := irrecoverable.NewMockSignalerContext(t, ctx) distributor := mockp2p.NewGossipSubInspectorNotificationDistributor(t) - mockDistributorReadyDoneAware(distributor) + p2ptest.MockInspectorNotificationDistributorReadyDoneAware(distributor) withExpectedNotificationDissemination(1, inspectDisseminatedNotifyFunc)(distributor, spammer) meshTracer := meshTracerFixture(flowConfig, idProvider) + topicProvider := newMockUpdatableTopicProvider() + validationInspector, err := validation.NewControlMsgValidationInspector(&validation.InspectorParams{ + Logger: unittest.Logger(), + SporkID: sporkID, + Config: &inspectorConfig, + Distributor: distributor, + IdProvider: idProvider, + HeroCacheMetricsFactory: metrics.NewNoopHeroCacheMetricsFactory(), + InspectorMetrics: metrics.NewNoopCollector(), + RpcTracker: meshTracer, + NetworkingType: network.PrivateNetwork, + TopicOracle: func() p2p.TopicProvider { + return topicProvider + }, + }) + require.NoError(t, err) + // set topic oracle to return list with all topics to avoid hasSubscription failures and force topic validation + topics := make([]string, len(publishMsgs)) + for i := 0; i < len(publishMsgs); i++ { + topics[i] = publishMsgs[i].GetTopic() + } + topicProvider.UpdateTopics(topics) + + // after 7 errors encountered disseminate a notification + inspectorConfig.RpcMessageErrorThreshold = 6 - validationInspector, err := validation.NewControlMsgValidationInspector(signalerCtx, unittest.Logger(), sporkID, &inspectorConfig, distributor, metrics.NewNoopCollector(), metrics.NewNoopCollector(), idProvider, metrics.NewNoopCollector(), meshTracer) require.NoError(t, err) corruptInspectorFunc := corruptlibp2p.CorruptInspectorFunc(validationInspector) - victimNode, victimIdentity := p2ptest.NodeFixture( - t, + victimNode, victimIdentity := p2ptest.NodeFixture(t, sporkID, t.Name(), idProvider, p2ptest.WithRole(role), p2ptest.WithGossipSubTracer(meshTracer), - internal.WithCorruptGossipSub(corruptlibp2p.CorruptGossipSubFactory(), - corruptlibp2p.CorruptGossipSubConfigFactoryWithInspector(corruptInspectorFunc)), - ) + internal.WithCorruptGossipSub(corruptlibp2p.CorruptGossipSubFactory(), corruptlibp2p.CorruptGossipSubConfigFactoryWithInspector(corruptInspectorFunc))) idProvider.On("ByPeerID", victimNode.ID()).Return(&victimIdentity, true).Maybe() idProvider.On("ByPeerID", spammer.SpammerNode.ID()).Return(&spammer.SpammerId, true).Maybe() - topic := channels.Topic(fmt.Sprintf("%s/%s", channels.PushBlocks, sporkID)) - // first create 4 valid messages - publishMsgs := unittest.GossipSubMessageFixtures(t, 4, topic.String()) - // create unknown topic - unknownTopic := channels.Topic(fmt.Sprintf("%s/%s", corruptlibp2p.GossipSubTopicIdFixture(), sporkID)).String() - // create malformed topic - malformedTopic := channels.Topic(unittest.RandomStringFixture(t, 100)).String() - // a topics spork ID is considered invalid if it does not match the current spork ID - invalidSporkIDTopic := channels.Topic(fmt.Sprintf("%s/%s", channels.PushBlocks, unittest.IdentifierFixture())).String() - // append 5 messages with invalid topics, this should force a notification to disseminate - publishMsgs = append(publishMsgs, []*pb.Message{ - {Topic: &unknownTopic}, - {Topic: &malformedTopic}, - {Topic: &malformedTopic}, - {Topic: &malformedTopic}, - {Topic: &invalidSporkIDTopic}, - }...) + // return nil for unknown peer ID indicating unstaked peer + idProvider.On("ByPeerID", unknownPeerID).Return(nil, false).Once() + // return ejected identity for peer ID will force message validation failure + idProvider.On("ByPeerID", ejectedIdentityPeerID).Return(ejectedIdentity, true).Once() // set topic oracle to return list with all topics to avoid hasSubscription failures and force topic validation - require.NoError(t, validationInspector.SetTopicOracle(func() []string { - return []string{topic.String(), unknownTopic, malformedTopic, invalidSporkIDTopic} - })) + topicProvider.UpdateTopics([]string{topic.String(), unknownTopic, malformedTopic, invalidSporkIDTopic}) validationInspector.Start(signalerCtx) nodes := []p2p.LibP2PNode{victimNode, spammer.SpammerNode} @@ -885,7 +1007,6 @@ func TestValidationInspector_InspectRpcPublishMessages(t *testing.T) { // prepare to spam - generate control messages ctlMsg := spammer.GenerateCtlMessages(int(controlMessageCount)) - // start spamming the victim peer spammer.SpamControlMessage(t, victimNode, ctlMsg, publishMsgs...) @@ -901,53 +1022,50 @@ func TestValidationInspector_InspectRpcPublishMessages(t *testing.T) { // The victim node is configured to use the GossipSubInspector to detect spam and the scoring system to mitigate spam. // The test ensures that the victim node is disconnected from the spammer node on the GossipSub mesh after the spam detection is triggered. func TestGossipSubSpamMitigationIntegration(t *testing.T) { - t.Parallel() idProvider := mock.NewIdentityProvider(t) sporkID := unittest.IdentifierFixture() spammer := corruptlibp2p.NewGossipSubRouterSpammer(t, sporkID, flow.RoleConsensus, idProvider) ctx, cancel := context.WithCancel(context.Background()) signalerCtx := irrecoverable.NewMockSignalerContext(t, ctx) - victimNode, victimId := p2ptest.NodeFixture( - t, + victimNode, victimId := p2ptest.NodeFixture(t, sporkID, t.Name(), idProvider, + p2ptest.WithPeerScoreTracerInterval(100*time.Millisecond), p2ptest.WithRole(flow.RoleConsensus), - p2ptest.EnablePeerScoringWithOverride(p2p.PeerScoringConfigNoOverride), - ) + p2ptest.EnablePeerScoringWithOverride(p2p.PeerScoringConfigNoOverride)) ids := flow.IdentityList{&victimId, &spammer.SpammerId} - idProvider.On("ByPeerID", mockery.Anything).Return( - func(peerId peer.ID) *flow.Identity { - switch peerId { - case victimNode.ID(): - return &victimId - case spammer.SpammerNode.ID(): - return &spammer.SpammerId - default: - return nil - } + idProvider.On("ByPeerID", mockery.Anything).Return(func(peerId peer.ID) *flow.Identity { + switch peerId { + case victimNode.ID(): + return &victimId + case spammer.SpammerNode.ID(): + return &spammer.SpammerId + default: + return nil + } - }, func(peerId peer.ID) bool { - switch peerId { - case victimNode.ID(): - fallthrough - case spammer.SpammerNode.ID(): - return true - default: - return false - } - }) + }, func(peerId peer.ID) bool { + switch peerId { + case victimNode.ID(): + fallthrough + case spammer.SpammerNode.ID(): + return true + default: + return false + } + }) - spamRpcCount := 10 // total number of individual rpc messages to send - spamCtrlMsgCount := int64(10) // total number of control messages to send on each RPC + spamRpcCount := 100 // total number of individual rpc messages to send + spamCtrlMsgCount := int64(100) // total number of control messages to send on each RPC // unknownTopic is an unknown topic to the victim node but shaped like a valid topic (i.e., it has the correct prefix and spork ID). - unknownTopic := channels.Topic(fmt.Sprintf("%s/%s", corruptlibp2p.GossipSubTopicIdFixture(), sporkID)) + unknownTopic := channels.Topic(fmt.Sprintf("%s/%s", p2ptest.GossipSubTopicIdFixture(), sporkID)) // malformedTopic is a topic that is not shaped like a valid topic (i.e., it does not have the correct prefix and spork ID). - malformedTopic := channels.Topic(unittest.RandomStringFixture(t, 100)) + malformedTopic := channels.Topic("!@#$%^&**((") // invalidSporkIDTopic is a topic that has a valid prefix but an invalid spork ID (i.e., not the current spork ID). invalidSporkIDTopic := channels.Topic(fmt.Sprintf("%s/%s", channels.PushBlocks, unittest.IdentifierFixture())) @@ -971,15 +1089,15 @@ func TestGossipSubSpamMitigationIntegration(t *testing.T) { }) // prepares spam graft and prune messages with different strategies. - graftCtlMsgsWithUnknownTopic := spammer.GenerateCtlMessages(int(spamCtrlMsgCount), corruptlibp2p.WithGraft(spamRpcCount, unknownTopic.String())) - graftCtlMsgsWithMalformedTopic := spammer.GenerateCtlMessages(int(spamCtrlMsgCount), corruptlibp2p.WithGraft(spamRpcCount, malformedTopic.String())) - graftCtlMsgsInvalidSporkIDTopic := spammer.GenerateCtlMessages(int(spamCtrlMsgCount), corruptlibp2p.WithGraft(spamRpcCount, invalidSporkIDTopic.String())) - graftCtlMsgsDuplicateTopic := spammer.GenerateCtlMessages(int(spamCtrlMsgCount), corruptlibp2p.WithGraft(3, duplicateTopic.String())) + graftCtlMsgsWithUnknownTopic := spammer.GenerateCtlMessages(int(spamCtrlMsgCount), p2ptest.WithGraft(spamRpcCount, unknownTopic.String())) + graftCtlMsgsWithMalformedTopic := spammer.GenerateCtlMessages(int(spamCtrlMsgCount), p2ptest.WithGraft(spamRpcCount, malformedTopic.String())) + graftCtlMsgsInvalidSporkIDTopic := spammer.GenerateCtlMessages(int(spamCtrlMsgCount), p2ptest.WithGraft(spamRpcCount, invalidSporkIDTopic.String())) + graftCtlMsgsDuplicateTopic := spammer.GenerateCtlMessages(int(spamCtrlMsgCount), p2ptest.WithGraft(3, duplicateTopic.String())) - pruneCtlMsgsWithUnknownTopic := spammer.GenerateCtlMessages(int(spamCtrlMsgCount), corruptlibp2p.WithPrune(spamRpcCount, unknownTopic.String())) - pruneCtlMsgsWithMalformedTopic := spammer.GenerateCtlMessages(int(spamCtrlMsgCount), corruptlibp2p.WithPrune(spamRpcCount, malformedTopic.String())) - pruneCtlMsgsInvalidSporkIDTopic := spammer.GenerateCtlMessages(int(spamCtrlMsgCount), corruptlibp2p.WithGraft(spamRpcCount, invalidSporkIDTopic.String())) - pruneCtlMsgsDuplicateTopic := spammer.GenerateCtlMessages(int(spamCtrlMsgCount), corruptlibp2p.WithPrune(3, duplicateTopic.String())) + pruneCtlMsgsWithUnknownTopic := spammer.GenerateCtlMessages(int(spamCtrlMsgCount), p2ptest.WithPrune(spamRpcCount, unknownTopic.String())) + pruneCtlMsgsWithMalformedTopic := spammer.GenerateCtlMessages(int(spamCtrlMsgCount), p2ptest.WithPrune(spamRpcCount, malformedTopic.String())) + pruneCtlMsgsInvalidSporkIDTopic := spammer.GenerateCtlMessages(int(spamCtrlMsgCount), p2ptest.WithGraft(spamRpcCount, invalidSporkIDTopic.String())) + pruneCtlMsgsDuplicateTopic := spammer.GenerateCtlMessages(int(spamCtrlMsgCount), p2ptest.WithPrune(3, duplicateTopic.String())) // start spamming the victim peer spammer.SpamControlMessage(t, victimNode, graftCtlMsgsWithUnknownTopic) @@ -993,12 +1111,14 @@ func TestGossipSubSpamMitigationIntegration(t *testing.T) { spammer.SpamControlMessage(t, victimNode, pruneCtlMsgsDuplicateTopic) // wait for three GossipSub heartbeat intervals to ensure that the victim node has penalized the spammer node. - time.Sleep(3 * time.Second) + require.Eventually(t, func() bool { + score, ok := victimNode.PeerScoreExposer().GetScore(spammer.SpammerNode.ID()) + return ok && score < 2*scoring.DefaultGraylistThreshold + }, 5*time.Second, 100*time.Millisecond, "expected victim node to penalize spammer node") // now we expect the detection and mitigation to kick in and the victim node to disconnect from the spammer node. // so the spammer and victim nodes should not be able to exchange messages on the topic. - p2ptest.EnsureNoPubsubExchangeBetweenGroups( - t, + p2ptest.EnsureNoPubsubExchangeBetweenGroups(t, ctx, []p2p.LibP2PNode{victimNode}, flow.IdentifierList{victimId.NodeID}, @@ -1010,3 +1130,36 @@ func TestGossipSubSpamMitigationIntegration(t *testing.T) { return unittest.ProposalFixture() }) } + +// mockUpdatableTopicProvider is a mock implementation of the TopicProvider interface. +// TODO: there is a duplicate implementation of this in the test package, we should consolidate them. +// The duplicate exists in network/p2p/inspector/internal/mockTopicProvider.go. The reason for duplication is that +// the inspector/validation package does not have a separate test package. Hence, sharing the mock implementation +// will cause a cyclic dependency. +type mockUpdatableTopicProvider struct { + topics []string + subscriptions map[string][]peer.ID +} + +func newMockUpdatableTopicProvider() *mockUpdatableTopicProvider { + return &mockUpdatableTopicProvider{ + topics: []string{}, + subscriptions: map[string][]peer.ID{}, + } +} + +func (m *mockUpdatableTopicProvider) GetTopics() []string { + return m.topics +} + +func (m *mockUpdatableTopicProvider) ListPeers(topic string) []peer.ID { + return m.subscriptions[topic] +} + +func (m *mockUpdatableTopicProvider) UpdateTopics(topics []string) { + m.topics = topics +} + +func (m *mockUpdatableTopicProvider) UpdateSubscriptions(topic string, peers []peer.ID) { + m.subscriptions[topic] = peers +} diff --git a/insecure/integration/functional/test/gossipsub/scoring/ihave_spam_test.go b/insecure/integration/functional/test/gossipsub/scoring/ihave_spam_test.go index 23a33b287ca..37524922907 100644 --- a/insecure/integration/functional/test/gossipsub/scoring/ihave_spam_test.go +++ b/insecure/integration/functional/test/gossipsub/scoring/ihave_spam_test.go @@ -34,6 +34,9 @@ import ( // Also, per hearbeat (i.e., decay interval), the spammer is allowed to send at most 5000 ihave messages (gossip sub parameter) on aggregate, and // excess messages are dropped (without being counted as broken promises). func TestGossipSubIHaveBrokenPromises_Below_Threshold(t *testing.T) { + + unittest.SkipUnless(t, unittest.TEST_FLAKY, "flaky test") + role := flow.RoleConsensus sporkId := unittest.IdentifierFixture() blockTopic := channels.TopicFromChannel(channels.PushBlocks, sporkId) @@ -118,9 +121,24 @@ func TestGossipSubIHaveBrokenPromises_Below_Threshold(t *testing.T) { spammerScore, ok := victimNode.PeerScoreExposer().GetScore(spammer.SpammerNode.ID()) require.True(t, ok, "sanity check failed, we should have a score for the spammer node") // since spammer is not yet considered to be penalized, its score must be greater than the gossipsub health thresholds. - require.Greaterf(t, spammerScore, scoring.DefaultGossipThreshold, "sanity check failed, the score of the spammer node must be greater than gossip threshold: %f, actual: %f", scoring.DefaultGossipThreshold, spammerScore) - require.Greaterf(t, spammerScore, scoring.DefaultPublishThreshold, "sanity check failed, the score of the spammer node must be greater than publish threshold: %f, actual: %f", scoring.DefaultPublishThreshold, spammerScore) - require.Greaterf(t, spammerScore, scoring.DefaultGraylistThreshold, "sanity check failed, the score of the spammer node must be greater than graylist threshold: %f, actual: %f", scoring.DefaultGraylistThreshold, spammerScore) + require.Greaterf(t, + spammerScore, + scoring.DefaultGossipThreshold, + "sanity check failed, the score of the spammer node must be greater than gossip threshold: %f, actual: %f", + scoring.DefaultGossipThreshold, + spammerScore) + require.Greaterf(t, + spammerScore, + scoring.DefaultPublishThreshold, + "sanity check failed, the score of the spammer node must be greater than publish threshold: %f, actual: %f", + scoring.DefaultPublishThreshold, + spammerScore) + require.Greaterf(t, + spammerScore, + scoring.DefaultGraylistThreshold, + "sanity check failed, the score of the spammer node must be greater than graylist threshold: %f, actual: %f", + scoring.DefaultGraylistThreshold, + spammerScore) // eventually, after a heartbeat the spammer behavioral counter must be decayed require.Eventually(t, func() bool { @@ -150,6 +168,7 @@ func TestGossipSubIHaveBrokenPromises_Below_Threshold(t *testing.T) { // Second round of attack makes spammers broken promises above the threshold of 10 RPCs, hence a degradation of the spammers score. // Third round of attack makes spammers broken promises to around 20 RPCs above the threshold, which causes the graylisting of the spammer node. func TestGossipSubIHaveBrokenPromises_Above_Threshold(t *testing.T) { + unittest.SkipUnless(t, unittest.TEST_FLAKY, "flaky in CI") role := flow.RoleConsensus sporkId := unittest.IdentifierFixture() blockTopic := channels.TopicFromChannel(channels.PushBlocks, sporkId) @@ -273,10 +292,30 @@ func TestGossipSubIHaveBrokenPromises_Above_Threshold(t *testing.T) { require.True(t, ok, "sanity check failed, we should have a score for the spammer node") // with the second round of the attack, the spammer is about 10 broken promises above the threshold (total ~20 broken promises, but the first 10 are not counted). // we expect the score to be dropped to initScore - 10 * 10 * 0.01 * scoring.MaxAppSpecificReward, however, instead of 10, we consider 8 about the threshold, to account for decays. - require.LessOrEqual(t, spammerScore, initScore-8*8*0.01*scoring.MaxAppSpecificReward, "sanity check failed, the score of the spammer node must be less than the initial score minus 8 * 8 * 0.01 * scoring.MaxAppSpecificReward: %f, actual: %f", initScore-10*10*10-2*scoring.MaxAppSpecificReward, spammerScore) - require.Greaterf(t, spammerScore, scoring.DefaultGossipThreshold, "sanity check failed, the score of the spammer node must be greater than gossip threshold: %f, actual: %f", scoring.DefaultGossipThreshold, spammerScore) - require.Greaterf(t, spammerScore, scoring.DefaultPublishThreshold, "sanity check failed, the score of the spammer node must be greater than publish threshold: %f, actual: %f", scoring.DefaultPublishThreshold, spammerScore) - require.Greaterf(t, spammerScore, scoring.DefaultGraylistThreshold, "sanity check failed, the score of the spammer node must be greater than graylist threshold: %f, actual: %f", scoring.DefaultGraylistThreshold, spammerScore) + require.LessOrEqual(t, + spammerScore, + initScore-8*8*0.01*scoring.MaxAppSpecificReward, + "sanity check failed, the score of the spammer node must be less than the initial score minus 8 * 8 * 0.01 * scoring.MaxAppSpecificReward: %f, actual: %f", + initScore-10*10*10-2*scoring.MaxAppSpecificReward, + spammerScore) + require.Greaterf(t, + spammerScore, + scoring.DefaultGossipThreshold, + "sanity check failed, the score of the spammer node must be greater than gossip threshold: %f, actual: %f", + scoring.DefaultGossipThreshold, + spammerScore) + require.Greaterf(t, + spammerScore, + scoring.DefaultPublishThreshold, + "sanity check failed, the score of the spammer node must be greater than publish threshold: %f, actual: %f", + scoring.DefaultPublishThreshold, + spammerScore) + require.Greaterf(t, + spammerScore, + scoring.DefaultGraylistThreshold, + "sanity check failed, the score of the spammer node must be greater than graylist threshold: %f, actual: %f", + scoring.DefaultGraylistThreshold, + spammerScore) // since the spammer score is above the gossip, graylist and publish thresholds, it should be still able to exchange messages with victim. p2ptest.EnsurePubsubMessageExchange(t, ctx, nodes, blockTopic, 1, func() interface{} { @@ -307,9 +346,24 @@ func TestGossipSubIHaveBrokenPromises_Above_Threshold(t *testing.T) { require.True(t, ok, "sanity check failed, we should have a score for the spammer node") // with the third round of the attack, the spammer is about 20 broken promises above the threshold (total ~30 broken promises), hence its overall score must be below the gossip, publish, and graylist thresholds, meaning that // victim will not exchange messages with it anymore, and also that it will be graylisted meaning all incoming and outgoing RPCs to and from the spammer will be dropped by the victim. - require.Lessf(t, spammerScore, scoring.DefaultGossipThreshold, "sanity check failed, the score of the spammer node must be less than gossip threshold: %f, actual: %f", scoring.DefaultGossipThreshold, spammerScore) - require.Lessf(t, spammerScore, scoring.DefaultPublishThreshold, "sanity check failed, the score of the spammer node must be less than publish threshold: %f, actual: %f", scoring.DefaultPublishThreshold, spammerScore) - require.Lessf(t, spammerScore, scoring.DefaultGraylistThreshold, "sanity check failed, the score of the spammer node must be less than graylist threshold: %f, actual: %f", scoring.DefaultGraylistThreshold, spammerScore) + require.Lessf(t, + spammerScore, + scoring.DefaultGossipThreshold, + "sanity check failed, the score of the spammer node must be less than gossip threshold: %f, actual: %f", + scoring.DefaultGossipThreshold, + spammerScore) + require.Lessf(t, + spammerScore, + scoring.DefaultPublishThreshold, + "sanity check failed, the score of the spammer node must be less than publish threshold: %f, actual: %f", + scoring.DefaultPublishThreshold, + spammerScore) + require.Lessf(t, + spammerScore, + scoring.DefaultGraylistThreshold, + "sanity check failed, the score of the spammer node must be less than graylist threshold: %f, actual: %f", + scoring.DefaultGraylistThreshold, + spammerScore) // since the spammer score is below the gossip, graylist and publish thresholds, it should not be able to exchange messages with victim anymore. p2ptest.EnsureNoPubsubExchangeBetweenGroups( @@ -335,8 +389,12 @@ func TestGossipSubIHaveBrokenPromises_Above_Threshold(t *testing.T) { // - topic: the topic to spam. // - receivedIWants: a map to keep track of the iWants received by the victim node (exclusive to TestGossipSubIHaveBrokenPromises). // - victimNode: the victim node. -func spamIHaveBrokenPromise(t *testing.T, spammer *corruptlibp2p.GossipSubRouterSpammer, topic string, receivedIWants *unittest.ProtectedMap[string, struct{}], victimNode p2p.LibP2PNode) { - spamMsgs := spammer.GenerateCtlMessages(1, corruptlibp2p.WithIHave(1, 500, topic)) +func spamIHaveBrokenPromise(t *testing.T, + spammer *corruptlibp2p.GossipSubRouterSpammer, + topic string, + receivedIWants *unittest.ProtectedMap[string, struct{}], + victimNode p2p.LibP2PNode) { + spamMsgs := spammer.GenerateCtlMessages(1, p2ptest.WithIHave(1, 500, topic)) var sentIHaves []string for _, msg := range spamMsgs { for _, iHave := range msg.Ihave { @@ -361,13 +419,17 @@ func spamIHaveBrokenPromise(t *testing.T, spammer *corruptlibp2p.GossipSubRouter unittest.AssertReturnsBefore(t, wg.Wait, 3*time.Second, "could not send RPCs on time") // wait till all the spam iHaves are responded with iWants. - require.Eventually(t, func() bool { - for _, msgId := range sentIHaves { - if _, ok := receivedIWants.Get(msgId); !ok { - return false + require.Eventually(t, + func() bool { + for _, msgId := range sentIHaves { + if _, ok := receivedIWants.Get(msgId); !ok { + return false + } } - } - return true - }, 5*time.Second, 100*time.Millisecond, fmt.Sprintf("sanity check failed, we should have received all the iWants for the spam iHaves, expected: %d, actual: %d", len(sentIHaves), receivedIWants.Size())) + return true + }, + 5*time.Second, + 100*time.Millisecond, + fmt.Sprintf("sanity check failed, we should have received all the iWants for the spam iHaves, expected: %d, actual: %d", len(sentIHaves), receivedIWants.Size())) } diff --git a/integration/Makefile b/integration/Makefile index b538289ae82..361013a41d1 100644 --- a/integration/Makefile +++ b/integration/Makefile @@ -8,6 +8,10 @@ else RACE_FLAG := endif +include ../crypto_adx_flag.mk + +CGO_FLAG := CGO_CFLAGS=$(CRYPTO_FLAG) + # Run the integration test suite .PHONY: integration-test integration-test: access-tests ghost-tests mvp-tests execution-tests verification-tests upgrades-tests collection-tests epochs-cohort1-tests epochs-cohort2-tests network-tests consensus-tests @@ -15,82 +19,78 @@ integration-test: access-tests ghost-tests mvp-tests execution-tests verificatio .PHONY: ci-integration-test ci-integration-test: access-tests ghost-tests mvp-tests epochs-cohort1-tests epochs-cohort2-tests consensus-tests execution-tests verification-tests upgrades-tests network-tests collection-tests -############################################################################################ -# CAUTION: DO NOT MODIFY THE TARGETS BELOW! DOING SO WILL BREAK THE FLAKY TEST MONITOR -# In particular, do not skip tests by commenting them out here. - # Run unit tests for test utilities in this module .PHONY: test test: - go test $(if $(VERBOSE),-v,) -tags relic -coverprofile=$(COVER_PROFILE) $(RACE_FLAG) $(if $(JSON_OUTPUT),-json,) $(if $(NUM_RUNS),-count $(NUM_RUNS),) `go list ./... | grep -v -e integration/tests` + $(CGO_FLAG) go test $(if $(VERBOSE),-v,) -coverprofile=$(COVER_PROFILE) $(RACE_FLAG) $(if $(JSON_OUTPUT),-json,) $(if $(NUM_RUNS),-count $(NUM_RUNS),) `go list ./... | grep -v -e integration/tests` .PHONY: access-cohort1-tests access-cohort1-tests: - go test $(if $(VERBOSE),-v,) $(RACE_FLAG) $(if $(JSON_OUTPUT),-json,) $(if $(NUM_RUNS),-count $(NUM_RUNS),) -tags relic ./tests/access/cohort1/... + $(CGO_FLAG) go test $(if $(VERBOSE),-v,) $(RACE_FLAG) $(if $(JSON_OUTPUT),-json,) $(if $(NUM_RUNS),-count $(NUM_RUNS),) ./tests/access/cohort1/... .PHONY: access-cohort2-tests access-cohort2-tests: - go test $(if $(VERBOSE),-v,) $(RACE_FLAG) $(if $(JSON_OUTPUT),-json,) $(if $(NUM_RUNS),-count $(NUM_RUNS),) -tags relic ./tests/access/cohort2/... + $(CGO_FLAG) go test $(if $(VERBOSE),-v,) $(RACE_FLAG) $(if $(JSON_OUTPUT),-json,) $(if $(NUM_RUNS),-count $(NUM_RUNS),) ./tests/access/cohort2/... .PHONY: access-cohort3-tests access-cohort3-tests: - go test $(if $(VERBOSE),-v,) $(RACE_FLAG) $(if $(JSON_OUTPUT),-json,) $(if $(NUM_RUNS),-count $(NUM_RUNS),) -tags relic ./tests/access/cohort3/... + $(CGO_FLAG) go test $(if $(VERBOSE),-v,) $(RACE_FLAG) $(if $(JSON_OUTPUT),-json,) $(if $(NUM_RUNS),-count $(NUM_RUNS),) ./tests/access/cohort3/... .PHONY: collection-tests collection-tests: - go test $(if $(VERBOSE),-v,) $(RACE_FLAG) $(if $(JSON_OUTPUT),-json,) $(if $(NUM_RUNS),-count $(NUM_RUNS),) -tags relic ./tests/collection/... + $(CGO_FLAG) go test $(if $(VERBOSE),-v,) $(RACE_FLAG) $(if $(JSON_OUTPUT),-json,) $(if $(NUM_RUNS),-count $(NUM_RUNS),) ./tests/collection/... .PHONY: consensus-tests consensus-tests: - go test $(if $(VERBOSE),-v,) $(RACE_FLAG) $(if $(JSON_OUTPUT),-json,) $(if $(NUM_RUNS),-count $(NUM_RUNS),) -tags relic ./tests/consensus/... + $(CGO_FLAG) go test $(if $(VERBOSE),-v,) $(RACE_FLAG) $(if $(JSON_OUTPUT),-json,) $(if $(NUM_RUNS),-count $(NUM_RUNS),) ./tests/consensus/... .PHONY: epochs-cohort1-tests epochs-cohort1-tests: # Use a higher timeout of 20m for the suite of tests which span full epochs - go test $(if $(VERBOSE),-v,) $(RACE_FLAG) $(if $(JSON_OUTPUT),-json,) $(if $(NUM_RUNS),-count $(NUM_RUNS),) -tags relic -timeout 20m ./tests/epochs/cohort1/... + $(CGO_FLAG) go test $(if $(VERBOSE),-v,) $(RACE_FLAG) $(if $(JSON_OUTPUT),-json,) $(if $(NUM_RUNS),-count $(NUM_RUNS),) -timeout 20m ./tests/epochs/cohort1/... .PHONY: epochs-cohort2-tests epochs-cohort2-tests: # Use a higher timeout of 20m for the suite of tests which span full epochs - go test $(if $(VERBOSE),-v,) $(RACE_FLAG) $(if $(JSON_OUTPUT),-json,) $(if $(NUM_RUNS),-count $(NUM_RUNS),) -tags relic -timeout 20m ./tests/epochs/cohort2/... + $(CGO_FLAG) go test $(if $(VERBOSE),-v,) $(RACE_FLAG) $(if $(JSON_OUTPUT),-json,) $(if $(NUM_RUNS),-count $(NUM_RUNS),) -timeout 20m ./tests/epochs/cohort2/... .PHONY: ghost-tests ghost-tests: - go test $(if $(VERBOSE),-v,) $(RACE_FLAG) $(if $(JSON_OUTPUT),-json,) $(if $(NUM_RUNS),-count $(NUM_RUNS),) -tags relic ./tests/ghost/... + $(CGO_FLAG) go test $(if $(VERBOSE),-v,) $(RACE_FLAG) $(if $(JSON_OUTPUT),-json,) $(if $(NUM_RUNS),-count $(NUM_RUNS),) ./tests/ghost/... .PHONY: mvp-tests mvp-tests: - go test $(if $(VERBOSE),-v,) $(RACE_FLAG) $(if $(JSON_OUTPUT),-json,) $(if $(NUM_RUNS),-count $(NUM_RUNS),) -tags relic ./tests/mvp/... + $(CGO_FLAG) go test $(if $(VERBOSE),-v,) $(RACE_FLAG) $(if $(JSON_OUTPUT),-json,) $(if $(NUM_RUNS),-count $(NUM_RUNS),) ./tests/mvp/... .PHONY: execution-tests execution-tests: - go test $(if $(VERBOSE),-v,) $(RACE_FLAG) $(if $(JSON_OUTPUT),-json,) $(if $(NUM_RUNS),-count $(NUM_RUNS),) -tags relic ./tests/execution/... + $(CGO_FLAG) go test $(if $(VERBOSE),-v,) $(RACE_FLAG) $(if $(JSON_OUTPUT),-json,) $(if $(NUM_RUNS),-count $(NUM_RUNS),) ./tests/execution/... .PHONY: verification-tests verification-tests: - go test $(if $(VERBOSE),-v,) $(RACE_FLAG) $(if $(JSON_OUTPUT),-json,) $(if $(NUM_RUNS),-count $(NUM_RUNS),) -tags relic ./tests/verification/... + $(CGO_FLAG) go test $(if $(VERBOSE),-v,) $(RACE_FLAG) $(if $(JSON_OUTPUT),-json,) $(if $(NUM_RUNS),-count $(NUM_RUNS),) ./tests/verification/... # upgrades-tests tests need to be run sequentially (-p 1) due to interference between different Docker networks when tests are run in parallel .PHONY: upgrades-tests upgrades-tests: - go test $(if $(VERBOSE),-v,) $(RACE_FLAG) $(if $(JSON_OUTPUT),-json,) $(if $(NUM_RUNS),-count $(NUM_RUNS),) -tags relic ./tests/upgrades/... -p 1 + $(CGO_FLAG) go test $(if $(VERBOSE),-v,) $(RACE_FLAG) $(if $(JSON_OUTPUT),-json,) $(if $(NUM_RUNS),-count $(NUM_RUNS),) ./tests/upgrades/... -p 1 .PHONY: network-tests network-tests: - go test $(if $(VERBOSE),-v,) $(RACE_FLAG) $(if $(JSON_OUTPUT),-json,) $(if $(NUM_RUNS),-count $(NUM_RUNS),) -tags relic ./tests/network/... + $(CGO_FLAG) go test $(if $(VERBOSE),-v,) $(RACE_FLAG) $(if $(JSON_OUTPUT),-json,) $(if $(NUM_RUNS),-count $(NUM_RUNS),) ./tests/network/... # BFT tests need to be run sequentially (-p 1) due to interference between different Docker networks when tests are run in parallel .PHONY: bft-framework-tests bft-framework-tests: - go test $(if $(VERBOSE),-v,) $(RACE_FLAG) $(if $(JSON_OUTPUT),-json,) $(if $(NUM_RUNS),-count $(NUM_RUNS),) -tags relic ./tests/bft/framework/... -p 1 + $(CGO_FLAG) go test $(if $(VERBOSE),-v,) $(RACE_FLAG) $(if $(JSON_OUTPUT),-json,) $(if $(NUM_RUNS),-count $(NUM_RUNS),) ./tests/bft/framework/... -p 1 .PHONY: bft-protocol-tests bft-protocol-tests: - go test $(if $(VERBOSE),-v,) $(RACE_FLAG) $(if $(JSON_OUTPUT),-json,) $(if $(NUM_RUNS),-count $(NUM_RUNS),) -tags relic ./tests/bft/protocol/... -p 1 + $(CGO_FLAG) go test $(if $(VERBOSE),-v,) $(RACE_FLAG) $(if $(JSON_OUTPUT),-json,) $(if $(NUM_RUNS),-count $(NUM_RUNS),) ./tests/bft/protocol/... -p 1 .PHONY: bft-gossipsub-tests bft-gossipsub-tests: - go test $(if $(VERBOSE),-v,) $(RACE_FLAG) $(if $(JSON_OUTPUT),-json,) $(if $(NUM_RUNS),-count $(NUM_RUNS),) -tags relic ./tests/bft/gossipsub/... -p 1 + $(CGO_FLAG) go test $(if $(VERBOSE),-v,) $(RACE_FLAG) $(if $(JSON_OUTPUT),-json,) $(if $(NUM_RUNS),-count $(NUM_RUNS),) ./tests/bft/gossipsub/... -p 1 .PHONY: bft-tests bft-tests: bft-framework-tests bft-protocol-tests bft-gossipsub-tests -############################################################################################ + diff --git a/integration/benchmark/cmd/manual/Dockerfile b/integration/benchmark/cmd/manual/Dockerfile index 58f2b71d42b..788c2e6edb0 100644 --- a/integration/benchmark/cmd/manual/Dockerfile +++ b/integration/benchmark/cmd/manual/Dockerfile @@ -4,20 +4,11 @@ FROM golang:1.20-buster AS build-setup RUN apt-get update -RUN apt-get -y install cmake zip - -## (1) Build Relic first to maximize caching -FROM build-setup AS build-relic +RUN apt-get -y install zip RUN mkdir /build WORKDIR /build -# Copy over the crypto package -COPY crypto ./crypto - -# Build Relic (this places build artifacts in /build/relic/build) -RUN cd ./crypto/ && go generate - ## (2) Build the app binary FROM build-setup AS build-env @@ -35,12 +26,12 @@ ARG TARGET COPY . . -# Copy over Relic build artifacts -COPY --from=build-relic /build/crypto/relic/build ./crypto/relic/build - FROM build-env as build-production WORKDIR /app +# CGO_FLAG can be overwritten +ARG CGO_FLAG + # Keep Go's build cache between builds. # https://github.com/golang/go/issues/27719#issuecomment-514747274 # Also, allow ssh access @@ -48,7 +39,7 @@ RUN --mount=type=cache,sharing=locked,target=/go/pkg/mod \ --mount=type=cache,target=/root/.cache/go-build \ --mount=type=ssh \ cd integration && \ - CGO_ENABLED=1 go build --tags relic -ldflags "-extldflags -static" -o ./app ./${TARGET} + CGO_ENABLED=1 CGO_FLAGS="${CGO_FLAG}" go build -ldflags "-extldflags -static" -o ./app ./${TARGET} RUN mv /app/integration/app /app/app diff --git a/integration/benchmark/server/bench.sh b/integration/benchmark/server/bench.sh index 6ada16119a1..8c87214a3b1 100755 --- a/integration/benchmark/server/bench.sh +++ b/integration/benchmark/server/bench.sh @@ -22,8 +22,6 @@ while read -r branch_hash; do git log --oneline | head -1 git describe - make -C ../.. crypto_setup_gopath - # instead of running "make stop" which uses docker-compose for a lot of older versions, # we explicitly run the command here with "docker compose" DOCKER_BUILDKIT=1 COMPOSE_DOCKER_CLI_BUILD=1 docker compose -f docker-compose.nodes.yml down -v --remove-orphans @@ -36,7 +34,7 @@ while read -r branch_hash; do # sleep is workaround for slow initialization of some node types, so that benchmark does not quit immediately with "connection refused" sleep 30; - go run -tags relic ../benchmark/cmd/ci -log-level debug -git-repo-path ../../ -tps-initial 800 -tps-min 1 -tps-max 1200 -duration 30m + go run ../benchmark/cmd/ci -log-level debug -git-repo-path ../../ -tps-initial 800 -tps-min 1 -tps-max 1200 -duration 30m # instead of running "make stop" which uses docker-compose for a lot of older versions, # we explicitly run the command here with "docker compose" diff --git a/integration/benchnet2/Makefile b/integration/benchnet2/Makefile index facb25dc152..b7911fdc0f9 100644 --- a/integration/benchnet2/Makefile +++ b/integration/benchnet2/Makefile @@ -29,13 +29,12 @@ endif # assumes there is a checked out version of flow-go in a "flow-go" sub-folder at this level so that the bootstrap executable # for the checked out version will be run in the sub folder but the bootstrap folder will be created here (outside of the checked out flow-go in the sub folder) gen-bootstrap: clone-flow - cd flow-go && make crypto_setup_gopath - cd flow-go/cmd/bootstrap && go run -tags relic . genconfig --address-format "%s%d-${NETWORK_ID}.${NAMESPACE}:3569" --access $(ACCESS) --collection $(COLLECTION) --consensus $(CONSENSUS) --execution $(EXECUTION) --verification $(VERIFICATION) --weight 100 -o ./ --config ../../../bootstrap/conf/node-config.json - cd flow-go/cmd/bootstrap && go run -tags relic . keygen --machine-account --config ../../../bootstrap/conf/node-config.json -o ../../../bootstrap/keys + cd flow-go/cmd/bootstrap && go run . genconfig --address-format "%s%d-${NETWORK_ID}.${NAMESPACE}:3569" --access $(ACCESS) --collection $(COLLECTION) --consensus $(CONSENSUS) --execution $(EXECUTION) --verification $(VERIFICATION) --weight 100 -o ./ --config ../../../bootstrap/conf/node-config.json + cd flow-go/cmd/bootstrap && go run . keygen --machine-account --config ../../../bootstrap/conf/node-config.json -o ../../../bootstrap/keys echo {} > ./bootstrap/conf/partner-stakes.json mkdir ./bootstrap/partner-nodes - cd flow-go/cmd/bootstrap && go run -tags relic . rootblock --root-chain bench --root-height 0 --root-parent 0000000000000000000000000000000000000000000000000000000000000000 --config ../../../bootstrap/conf/node-config.json -o ../../../bootstrap/ --partner-dir ../../../bootstrap/partner-nodes --partner-weights ../../../bootstrap/conf/partner-stakes.json --internal-priv-dir ../../../bootstrap/keys/private-root-information - cd flow-go/cmd/bootstrap && go run -tags relic . finalize --root-commit 0000000000000000000000000000000000000000000000000000000000000000 --service-account-public-key-json "{\"PublicKey\":\"R7MTEDdLclRLrj2MI1hcp4ucgRTpR15PCHAWLM5nks6Y3H7+PGkfZTP2di2jbITooWO4DD1yqaBSAVK8iQ6i0A==\",\"SignAlgo\":2,\"HashAlgo\":1,\"SeqNumber\":0,\"Weight\":1000}" --config ../../../bootstrap/conf/node-config.json -o ../../../bootstrap/ --partner-dir ../../../bootstrap/partner-nodes --partner-weights ../../../bootstrap/conf/partner-stakes.json --collection-clusters 1 --epoch-counter 0 --epoch-length 30000 --epoch-staking-phase-length 20000 --epoch-dkg-phase-length 2000 --genesis-token-supply="1000000000.0" --protocol-version=0 --internal-priv-dir ../../../bootstrap/keys/private-root-information --dkg-data ../../../bootstrap/private-root-information/root-dkg-data.priv.json --root-block ../../../bootstrap/public-root-information/root-block.json --root-block-votes-dir ../../../bootstrap/public-root-information/root-block-votes/ --epoch-commit-safety-threshold=1000 + cd flow-go/cmd/bootstrap && go run . rootblock --root-chain bench --root-height 0 --root-parent 0000000000000000000000000000000000000000000000000000000000000000 --config ../../../bootstrap/conf/node-config.json -o ../../../bootstrap/ --partner-dir ../../../bootstrap/partner-nodes --partner-weights ../../../bootstrap/conf/partner-stakes.json --internal-priv-dir ../../../bootstrap/keys/private-root-information + cd flow-go/cmd/bootstrap && go run . finalize --root-commit 0000000000000000000000000000000000000000000000000000000000000000 --service-account-public-key-json "{\"PublicKey\":\"R7MTEDdLclRLrj2MI1hcp4ucgRTpR15PCHAWLM5nks6Y3H7+PGkfZTP2di2jbITooWO4DD1yqaBSAVK8iQ6i0A==\",\"SignAlgo\":2,\"HashAlgo\":1,\"SeqNumber\":0,\"Weight\":1000}" --config ../../../bootstrap/conf/node-config.json -o ../../../bootstrap/ --partner-dir ../../../bootstrap/partner-nodes --partner-weights ../../../bootstrap/conf/partner-stakes.json --collection-clusters 1 --epoch-counter 0 --epoch-length 30000 --epoch-staking-phase-length 20000 --epoch-dkg-phase-length 2000 --genesis-token-supply="1000000000.0" --protocol-version=0 --internal-priv-dir ../../../bootstrap/keys/private-root-information --dkg-data ../../../bootstrap/private-root-information/root-dkg-data.priv.json --root-block ../../../bootstrap/public-root-information/root-block.json --root-block-votes-dir ../../../bootstrap/public-root-information/root-block-votes/ --epoch-commit-safety-threshold=1000 gen-helm-l1: go run automate/cmd/level1/bootstrap.go --data bootstrap/public-root-information/root-protocol-state-snapshot.json --dockerTag $(NETWORK_ID) --dockerRegistry $(DOCKER_REGISTRY) diff --git a/integration/dkg/dkg_emulator_suite.go b/integration/dkg/dkg_emulator_suite.go index 69738ecd90f..c21636d796a 100644 --- a/integration/dkg/dkg_emulator_suite.go +++ b/integration/dkg/dkg_emulator_suite.go @@ -24,7 +24,7 @@ import ( dkgeng "github.com/onflow/flow-go/engine/consensus/dkg" "github.com/onflow/flow-go/engine/testutil" - "github.com/onflow/flow-go/fvm" + "github.com/onflow/flow-go/fvm/systemcontracts" "github.com/onflow/flow-go/integration/tests/lib" "github.com/onflow/flow-go/integration/utils" "github.com/onflow/flow-go/model/bootstrap" @@ -188,6 +188,8 @@ func (s *EmulatorSuite) createAndFundAccount(netID bootstrap.NodeInfo) *nodeAcco accountSigner, err := sdkcrypto.NewInMemorySigner(accountPrivateKey, accountKey.HashAlgo) require.NoError(s.T(), err) + sc := systemcontracts.SystemContractsForChain(s.chainID) + /*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ create Flow account ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/ @@ -224,8 +226,8 @@ func (s *EmulatorSuite) createAndFundAccount(netID bootstrap.NodeInfo) *nodeAcco receiverRef.deposit(from: <-self.sentVault) } }`, - fvm.FungibleTokenAddress(s.chainID.Chain()).Hex(), - fvm.FlowTokenAddress(s.chainID.Chain()).Hex(), + sc.FungibleToken.Address.Hex(), + sc.FlowToken.Address.Hex(), ))). AddAuthorizer(s.blockchain.ServiceKey().Address). SetProposalKey( diff --git a/integration/epochs/epoch_qc_test.go b/integration/epochs/epoch_qc_test.go index 352d4b21d5b..ccb46a59a3e 100644 --- a/integration/epochs/epoch_qc_test.go +++ b/integration/epochs/epoch_qc_test.go @@ -73,7 +73,16 @@ func (s *Suite) TestEpochQuorumCertificate() { address, err := s.emulatorClient.CreateAccount([]*sdk.AccountKey{key}, []sdktemplates.Contract{}) s.Require().NoError(err) - client := epochs.NewQCContractClient(zerolog.Nop(), s.emulatorClient, flow.ZeroID, nodeID, address.String(), 0, s.qcAddress.String(), signer) + client := epochs.NewQCContractClient( + zerolog.Nop(), + s.emulatorClient, + flow.ZeroID, + nodeID, + address.String(), + 0, + s.qcAddress.String(), + signer, + ) s.Require().NoError(err) local := &modulemock.Local{} diff --git a/integration/go.mod b/integration/go.mod index 070bdc5d621..24ada535828 100644 --- a/integration/go.mod +++ b/integration/go.mod @@ -3,8 +3,9 @@ module github.com/onflow/flow-go/integration go 1.20 require ( - cloud.google.com/go/bigquery v1.52.0 + cloud.google.com/go/bigquery v1.53.0 github.com/VividCortex/ewma v1.2.0 + github.com/btcsuite/btcd/chaincfg/chainhash v1.0.2 github.com/coreos/go-semver v0.3.0 github.com/dapperlabs/testingdock v0.4.5-0.20231020233342-a2853fe18724 github.com/dgraph-io/badger/v2 v2.2007.4 @@ -21,12 +22,12 @@ require ( github.com/onflow/cadence v0.42.5 github.com/onflow/flow-core-contracts/lib/go/contracts v1.2.4-0.20231016154253-a00dbf7c061f github.com/onflow/flow-core-contracts/lib/go/templates v1.2.4-0.20231016154253-a00dbf7c061f - github.com/onflow/flow-emulator v0.54.1-0.20231024204057-0273f8fe3807 - github.com/onflow/flow-go v0.32.3 + github.com/onflow/flow-emulator v0.58.1-0.20231130142844-f22e54339f85 + github.com/onflow/flow-go v0.32.7 github.com/onflow/flow-go-sdk v0.41.16 - github.com/onflow/flow-go/crypto v0.24.9 + github.com/onflow/flow-go/crypto v0.25.0 github.com/onflow/flow-go/insecure v0.0.0-00010101000000-000000000000 - github.com/onflow/flow/protobuf/go/flow v0.3.2-0.20231110212518-071176bb06b8 + github.com/onflow/flow/protobuf/go/flow v0.3.2-0.20231207123230-136eab6aadf9 github.com/plus3it/gorecurcopy v0.0.1 github.com/prometheus/client_golang v1.16.0 github.com/prometheus/client_model v0.5.0 @@ -37,13 +38,13 @@ require ( go.uber.org/atomic v1.11.0 golang.org/x/exp v0.0.0-20230321023759-10a507213a29 golang.org/x/sync v0.3.0 - google.golang.org/grpc v1.58.3 + google.golang.org/grpc v1.59.0 google.golang.org/protobuf v1.31.0 ) require ( - cloud.google.com/go v0.110.4 // indirect - cloud.google.com/go/compute v1.21.0 // indirect + cloud.google.com/go v0.110.7 // indirect + cloud.google.com/go/compute v1.23.0 // indirect cloud.google.com/go/compute/metadata v0.2.3 // indirect cloud.google.com/go/iam v1.1.1 // indirect cloud.google.com/go/storage v1.30.1 // indirect @@ -51,6 +52,8 @@ require ( github.com/DataDog/zstd v1.5.2 // indirect github.com/Microsoft/go-winio v0.6.1 // indirect github.com/ProtonMail/go-crypto v0.0.0-20221026131551-cf6655e29de4 // indirect + github.com/StackExchange/wmi v0.0.0-20180116203802-5d049714c4a6 // indirect + github.com/VictoriaMetrics/fastcache v1.6.0 // indirect github.com/acomagu/bufpipe v1.0.3 // indirect github.com/andybalholm/brotli v1.0.4 // indirect github.com/apache/arrow/go/v12 v12.0.0 // indirect @@ -90,6 +93,7 @@ require ( github.com/cskr/pubsub v1.0.2 // indirect github.com/davecgh/go-spew v1.1.1 // indirect github.com/davidlazar/go-crypto v0.0.0-20200604182044-b73af7476f6c // indirect + github.com/deckarep/golang-set/v2 v2.1.0 // indirect github.com/decred/dcrd/dcrec/secp256k1/v4 v4.2.0 // indirect github.com/dgraph-io/ristretto v0.1.0 // indirect github.com/dgryski/go-farm v0.0.0-20200201041132-a6ae2369ad13 // indirect @@ -127,24 +131,25 @@ require ( github.com/go-playground/universal-translator v0.18.1 // indirect github.com/go-playground/validator/v10 v10.14.1 // indirect github.com/go-redis/redis/v8 v8.11.5 // indirect + github.com/go-stack/stack v1.8.1 // indirect github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 // indirect github.com/goccy/go-json v0.9.11 // indirect github.com/godbus/dbus/v5 v5.1.0 // indirect + github.com/gofrs/flock v0.8.1 // indirect github.com/gogo/protobuf v1.3.2 // indirect - github.com/golang/glog v1.1.0 // indirect + github.com/golang/glog v1.1.2 // indirect github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect github.com/golang/mock v1.6.0 // indirect github.com/golang/protobuf v1.5.3 // indirect github.com/golang/snappy v0.0.5-0.20220116011046-fa5810519dcb // indirect github.com/google/flatbuffers v2.0.8+incompatible // indirect - github.com/google/go-cmp v0.5.9 // indirect github.com/google/gopacket v1.1.19 // indirect github.com/google/pprof v0.0.0-20230602150820-91b7bce49751 // indirect github.com/google/s2a-go v0.1.4 // indirect - github.com/google/uuid v1.3.0 // indirect + github.com/google/uuid v1.3.1 // indirect github.com/googleapis/enterprise-certificate-proxy v0.2.3 // indirect github.com/googleapis/gax-go/v2 v2.11.0 // indirect - github.com/gorilla/mux v1.8.0 // indirect + github.com/gorilla/mux v1.8.1 // indirect github.com/grpc-ecosystem/go-grpc-middleware/providers/zerolog/v2 v2.0.0-rc.2 // indirect github.com/grpc-ecosystem/go-grpc-middleware/v2 v2.0.0-rc.2 // indirect github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0 // indirect @@ -154,6 +159,7 @@ require ( github.com/hashicorp/golang-lru v0.5.5-0.20210104140557-80c98217689d // indirect github.com/hashicorp/golang-lru/v2 v2.0.2 // indirect github.com/hashicorp/hcl v1.0.0 // indirect + github.com/holiman/bloomfilter/v2 v2.0.3 // indirect github.com/holiman/uint256 v1.2.2-0.20230321075855-87b91420868c // indirect github.com/huin/goupnp v1.2.0 // indirect github.com/imdario/mergo v0.3.13 // indirect @@ -237,6 +243,7 @@ require ( github.com/multiformats/go-multihash v0.2.3 // indirect github.com/multiformats/go-multistream v0.4.1 // indirect github.com/multiformats/go-varint v0.0.7 // indirect + github.com/olekukonko/tablewriter v0.0.5 // indirect github.com/onflow/atree v0.6.0 // indirect github.com/onflow/flow-ft/lib/go/contracts v0.7.1-0.20230711213910-baad011d2b13 // indirect github.com/onflow/flow-nft/lib/go/contracts v1.1.0 // indirect @@ -274,6 +281,7 @@ require ( github.com/schollz/progressbar/v3 v3.13.1 // indirect github.com/sergi/go-diff v1.1.0 // indirect github.com/sethvargo/go-retry v0.2.3 // indirect + github.com/shirou/gopsutil v3.21.4-0.20210419000835-c7a38de76ee5+incompatible // indirect github.com/shirou/gopsutil/v3 v3.22.2 // indirect github.com/sirupsen/logrus v1.9.2 // indirect github.com/skeema/knownhosts v1.1.0 // indirect @@ -282,12 +290,13 @@ require ( github.com/spaolacci/murmur3 v1.1.0 // indirect github.com/spf13/afero v1.9.3 // indirect github.com/spf13/cast v1.5.0 // indirect - github.com/spf13/cobra v1.7.0 // indirect + github.com/spf13/cobra v1.8.0 // indirect github.com/spf13/jwalterweatherman v1.1.0 // indirect github.com/spf13/pflag v1.0.5 // indirect github.com/spf13/viper v1.15.0 // indirect github.com/stretchr/objx v0.5.0 // indirect github.com/subosito/gotenv v1.4.2 // indirect + github.com/syndtr/goleveldb v1.0.1-0.20210819022825-2ae1ddf74ef7 // indirect github.com/texttheater/golang-levenshtein/levenshtein v0.0.0-20200805054039-cae8b0eaed6c // indirect github.com/tklauser/go-sysconf v0.3.9 // indirect github.com/tklauser/numcpus v0.3.0 // indirect @@ -317,8 +326,8 @@ require ( go.uber.org/zap v1.24.0 // indirect golang.org/x/crypto v0.12.0 // indirect golang.org/x/mod v0.10.0 // indirect - golang.org/x/net v0.12.0 // indirect - golang.org/x/oauth2 v0.10.0 // indirect + golang.org/x/net v0.14.0 // indirect + golang.org/x/oauth2 v0.11.0 // indirect golang.org/x/sys v0.12.0 // indirect golang.org/x/term v0.11.0 // indirect golang.org/x/text v0.12.0 // indirect @@ -328,11 +337,12 @@ require ( gonum.org/v1/gonum v0.13.0 // indirect google.golang.org/api v0.126.0 // indirect google.golang.org/appengine v1.6.7 // indirect - google.golang.org/genproto v0.0.0-20230711160842-782d3b101e98 // indirect - google.golang.org/genproto/googleapis/api v0.0.0-20230711160842-782d3b101e98 // indirect - google.golang.org/genproto/googleapis/rpc v0.0.0-20230711160842-782d3b101e98 // indirect + google.golang.org/genproto v0.0.0-20230822172742-b8732ec3820d // indirect + google.golang.org/genproto/googleapis/api v0.0.0-20230822172742-b8732ec3820d // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20230822172742-b8732ec3820d // indirect google.golang.org/grpc/cmd/protoc-gen-go-grpc v1.2.0 // indirect gopkg.in/ini.v1 v1.67.0 // indirect + gopkg.in/natefinch/npipe.v2 v2.0.0-20160621034901-c1b8fa8bdcce // indirect gopkg.in/warnings.v0 v0.1.2 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect lukechampine.com/blake3 v1.2.1 // indirect diff --git a/integration/go.sum b/integration/go.sum index 2ada1506358..6d8b0a92297 100644 --- a/integration/go.sum +++ b/integration/go.sum @@ -29,21 +29,21 @@ cloud.google.com/go v0.90.0/go.mod h1:kRX0mNRHe0e2rC6oNakvwQqzyDmg57xJ+SZU1eT2aD cloud.google.com/go v0.93.3/go.mod h1:8utlLll2EF5XMAV15woO4lSbWQlk8rer9aLOfLh7+YI= cloud.google.com/go v0.94.1/go.mod h1:qAlAugsXlC+JWO+Bke5vCtc9ONxjQT3drlTTnAplMW4= cloud.google.com/go v0.97.0/go.mod h1:GF7l59pYBVlXQIBLx3a761cZ41F9bBH3JUlihCt2Udc= -cloud.google.com/go v0.110.4 h1:1JYyxKMN9hd5dR2MYTPWkGUgcoxVVhg0LKNKEo0qvmk= -cloud.google.com/go v0.110.4/go.mod h1:+EYjdK8e5RME/VY/qLCAtuyALQ9q67dvuum8i+H5xsI= +cloud.google.com/go v0.110.7 h1:rJyC7nWRg2jWGZ4wSJ5nY65GTdYJkg0cd/uXb+ACI6o= +cloud.google.com/go v0.110.7/go.mod h1:+EYjdK8e5RME/VY/qLCAtuyALQ9q67dvuum8i+H5xsI= cloud.google.com/go/bigquery v1.0.1/go.mod h1:i/xbL2UlR5RvWAURpBYZTtm/cXjCha9lbfbpx4poX+o= cloud.google.com/go/bigquery v1.3.0/go.mod h1:PjpwJnslEMmckchkHFfq+HTD2DmtT67aNFKH1/VBDHE= cloud.google.com/go/bigquery v1.4.0/go.mod h1:S8dzgnTigyfTmLBfrtrhyYhwRxG72rYxvftPBK2Dvzc= cloud.google.com/go/bigquery v1.5.0/go.mod h1:snEHRnqQbz117VIFhE8bmtwIDY80NLUZUMb4Nv6dBIg= cloud.google.com/go/bigquery v1.7.0/go.mod h1://okPTzCYNXSlb24MZs83e2Do+h+VXtc4gLoIoXIAPc= cloud.google.com/go/bigquery v1.8.0/go.mod h1:J5hqkt3O0uAFnINi6JXValWIb1v0goeZM77hZzJN/fQ= -cloud.google.com/go/bigquery v1.52.0 h1:JKLNdxI0N+TIUWD6t9KN646X27N5dQWq9dZbbTWZ8hc= -cloud.google.com/go/bigquery v1.52.0/go.mod h1:3b/iXjRQGU4nKa87cXeg6/gogLjO8C6PmuM8i5Bi/u4= -cloud.google.com/go/compute v1.21.0 h1:JNBsyXVoOoNJtTQcnEY5uYpZIbeCTYIeDe0Xh1bySMk= -cloud.google.com/go/compute v1.21.0/go.mod h1:4tCnrn48xsqlwSAiLf1HXMQk8CONslYbdiEZc9FEIbM= +cloud.google.com/go/bigquery v1.53.0 h1:K3wLbjbnSlxhuG5q4pntHv5AEbQM1QqHKGYgwFIqOTg= +cloud.google.com/go/bigquery v1.53.0/go.mod h1:3b/iXjRQGU4nKa87cXeg6/gogLjO8C6PmuM8i5Bi/u4= +cloud.google.com/go/compute v1.23.0 h1:tP41Zoavr8ptEqaW6j+LQOnyBBhO7OkOMAGrgLopTwY= +cloud.google.com/go/compute v1.23.0/go.mod h1:4tCnrn48xsqlwSAiLf1HXMQk8CONslYbdiEZc9FEIbM= cloud.google.com/go/compute/metadata v0.2.3 h1:mg4jlk7mCAj6xXp9UJ4fjI9VUI5rubuGBW5aJ7UnBMY= cloud.google.com/go/compute/metadata v0.2.3/go.mod h1:VAV5nSsACxMJvgaAuX6Pk2AawlZn8kiOGuCv6gTkwuA= -cloud.google.com/go/datacatalog v1.14.1 h1:cFPBt8V5V2T3mu/96tc4nhcMB+5cYcpwjBfn79bZDI8= +cloud.google.com/go/datacatalog v1.16.0 h1:qVeQcw1Cz93/cGu2E7TYUPh8Lz5dn5Ws2siIuQ17Vng= cloud.google.com/go/datastore v1.0.0/go.mod h1:LXYbyblFSglQ5pkeyhO+Qmw7ukd3C+pD7TKLgZqpHYE= cloud.google.com/go/datastore v1.1.0/go.mod h1:umbIZjpQpHh4hmRpGhH4tLFup+FVzqBi1b3c64qFpCk= cloud.google.com/go/iam v1.1.1 h1:lW7fzj15aVIXYHREOqjRBV9PsH0Z6u8Y46a1YGvQP4Y= @@ -109,8 +109,11 @@ github.com/ProtonMail/go-crypto v0.0.0-20221026131551-cf6655e29de4/go.mod h1:UBY github.com/Shopify/goreferrer v0.0.0-20181106222321-ec9c9a553398/go.mod h1:a1uqRtAwp2Xwc6WNPJEufxJ7fx3npB4UV/JOLmbu5I0= github.com/Shopify/sarama v1.19.0/go.mod h1:FVkBWblsNy7DGZRfXLU0O9RCGt5g3g3yEuWXgklEdEo= github.com/Shopify/toxiproxy v2.1.4+incompatible/go.mod h1:OXgGpZ6Cli1/URJOF1DMxUHB2q5Ap20/P/eIdh4G0pI= +github.com/StackExchange/wmi v0.0.0-20180116203802-5d049714c4a6 h1:fLjPD/aNc3UIOA6tDi6QXUemppXK3P9BI7mr2hd6gx8= github.com/StackExchange/wmi v0.0.0-20180116203802-5d049714c4a6/go.mod h1:3eOhrUMpNV+6aFIbp5/iudMxNCF27Vw2OZgy4xEx0Fg= github.com/VictoriaMetrics/fastcache v1.5.3/go.mod h1:+jv9Ckb+za/P1ZRg/sulP5Ni1v49daAVERr0H3CuscE= +github.com/VictoriaMetrics/fastcache v1.6.0 h1:C/3Oi3EiBCqufydp1neRZkqcwmEiuRT9c3fqvvgKm5o= +github.com/VictoriaMetrics/fastcache v1.6.0/go.mod h1:0qHz5QP0GMX4pfmMA/zt5RgfNuXJrTP0zS7DqpHGGTw= github.com/VividCortex/ewma v1.2.0 h1:f58SaIzcDXrSy3kWaHNvuJgJ3Nmz59Zji6XoJR/q1ow= github.com/VividCortex/ewma v1.2.0/go.mod h1:nz4BbCtbLyFDeC9SUHbtcT5644juEuWfUAUnGx7j5l4= github.com/VividCortex/gohistogram v1.0.0/go.mod h1:Pf5mBqqDxYaXu3hDrrU+w6nw50o/4+TcAqDqk/vUH7g= @@ -125,6 +128,7 @@ github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuy github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= github.com/alecthomas/units v0.0.0-20190717042225-c3de453c63f4/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= github.com/alecthomas/units v0.0.0-20190924025748-f65c72e2690d/go.mod h1:rBZYJk541a8SKzHPHnH3zbiI+7dagKZ0cgpgrD7Fyho= +github.com/allegro/bigcache v1.2.1-0.20190218064605-e24eb225f156 h1:eMwmnE/GDgah4HI848JfFxHt+iPb26b4zyfspmqY0/8= github.com/allegro/bigcache v1.2.1-0.20190218064605-e24eb225f156/go.mod h1:Cb/ax3seSYIx7SuZdm2G2xzfwmv3TPSk2ucNfQESPXM= github.com/andybalholm/brotli v1.0.4 h1:V7DdXeJtZscaqfNuAdSRuRFzuiKlHSC/Zh3zl9qY3JY= github.com/andybalholm/brotli v1.0.4/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig= @@ -212,6 +216,8 @@ github.com/btcsuite/btcd v0.20.1-beta/go.mod h1:wVuoA8VJLEcwgqHBwHmzLRazpKxTv13P github.com/btcsuite/btcd v0.21.0-beta/go.mod h1:ZSWyehm27aAuS9bvkATT+Xte3hjHZ+MRgMY/8NJ7K94= github.com/btcsuite/btcd/btcec/v2 v2.2.1 h1:xP60mv8fvp+0khmrN0zTdPC3cNm24rfeE6lh2R/Yv3E= github.com/btcsuite/btcd/btcec/v2 v2.2.1/go.mod h1:9/CSmJxmuvqzX9Wh2fXMWToLOHhPd11lSPuIupwTkI8= +github.com/btcsuite/btcd/chaincfg/chainhash v1.0.2 h1:KdUfX2zKommPRa+PD0sWZUyXe9w277ABlgELO7H04IM= +github.com/btcsuite/btcd/chaincfg/chainhash v1.0.2/go.mod h1:7SFka0XMvUgj3hfZtydOrQY2mwhPclbT2snogU7SQQc= github.com/btcsuite/btclog v0.0.0-20170628155309-84c8d2346e9f/go.mod h1:TdznJufoqS23FtqVCzL0ZqgP5MqXbb4fg/WgDys70nA= github.com/btcsuite/btcutil v0.0.0-20190207003914-4c204d697803/go.mod h1:+5NJ2+qvTyV9exUAL/rxXi3DcLg2Ts+ymUAY5y4NvMg= github.com/btcsuite/btcutil v0.0.0-20190425235716-9e5f4b9a998d/go.mod h1:+5NJ2+qvTyV9exUAL/rxXi3DcLg2Ts+ymUAY5y4NvMg= @@ -296,6 +302,7 @@ github.com/cpuguy83/go-md2man v1.0.10/go.mod h1:SmD6nW6nTyfqj6ABTjUi3V3JVMnlJmwc github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU= github.com/cpuguy83/go-md2man/v2 v2.0.0/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU= github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= +github.com/cpuguy83/go-md2man/v2 v2.0.3/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= github.com/creack/pty v1.1.7/go.mod h1:lj5s0c3V2DBrqTV7llrYr5NG6My20zk30Fl46Y7DoTY= github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= github.com/creack/pty v1.1.18 h1:n56/Zwd5o6whRC5PMGretI4IdRLlmBXYNjScPaBgsbY= @@ -313,6 +320,8 @@ github.com/davidlazar/go-crypto v0.0.0-20170701192655-dcfb0a7ac018/go.mod h1:rQY github.com/davidlazar/go-crypto v0.0.0-20200604182044-b73af7476f6c h1:pFUpOrbxDR6AkioZ1ySsx5yxlDQZ8stG2b88gTPxgJU= github.com/davidlazar/go-crypto v0.0.0-20200604182044-b73af7476f6c/go.mod h1:6UhI8N9EjYm1c2odKpFpAYeR8dsBeM7PtzQhRgxRr9U= github.com/deckarep/golang-set v0.0.0-20180603214616-504e848d77ea/go.mod h1:93vsz/8Wt4joVM7c2AVqh+YRMiUSc14yDtF28KmMOgQ= +github.com/deckarep/golang-set/v2 v2.1.0 h1:g47V4Or+DUdzbs8FxCCmgb6VYd+ptPAngjM6dtGktsI= +github.com/deckarep/golang-set/v2 v2.1.0/go.mod h1:VAky9rY/yGXJOLEDv3OMci+7wtDpOF4IN+y82NBOac4= github.com/decred/dcrd/crypto/blake256 v1.0.1 h1:7PltbUIQB7u/FfZ39+DGa/ShuMyJ5ilcvdfma9wOH6Y= github.com/decred/dcrd/dcrec/secp256k1/v4 v4.2.0 h1:8UrgZ3GkP4i/CLijOJx79Yu+etlyjdBU4sfcs2WYQMs= github.com/decred/dcrd/dcrec/secp256k1/v4 v4.2.0/go.mod h1:v57UDF4pDQJcEfFUCRop3lJL149eHGSe9Jvczhzjo/0= @@ -479,6 +488,8 @@ github.com/go-redis/redis/v8 v8.11.5 h1:AcZZR7igkdvfVmQTPnu9WE37LRrO/YrBH5zWyjDC github.com/go-redis/redis/v8 v8.11.5/go.mod h1:gREzHqY1hg6oD9ngVRbLStwAWKhA0FEgq8Jd4h5lpwo= github.com/go-sql-driver/mysql v1.4.0/go.mod h1:zAC/RDZ24gD3HViQzih4MyKcchzm+sOG5ZlKdlhCg5w= github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY= +github.com/go-stack/stack v1.8.1 h1:ntEHSVwIt7PNXNpgPmVfMrNhLtgjlmnZha2kOpuRiDw= +github.com/go-stack/stack v1.8.1/go.mod h1:dcoOX6HbPZSZptuspn9bctJ+N/CnF5gGygcUP3XYfe4= github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI= github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572/go.mod h1:9Pwr4B2jHnOSGXyyzV8ROjYa2ojvAY6HCGYYfMoC3Ls= github.com/go-test/deep v1.0.5/go.mod h1:QV8Hv/iy04NyLBxAdO9njL0iVPN1S4d/A3NVv1V36o8= @@ -493,6 +504,7 @@ github.com/godbus/dbus/v5 v5.0.3/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5x github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= github.com/godbus/dbus/v5 v5.1.0 h1:4KLkAxT3aOY8Li4FRJe/KvhoNFFxo0m6fNuFUO8QJUk= github.com/godbus/dbus/v5 v5.1.0/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= +github.com/gofrs/flock v0.8.1 h1:+gYjHKf32LDeiEEFhQaotPbLuUXjY5ZqxKgXy7n59aw= github.com/gofrs/flock v0.8.1/go.mod h1:F1TvTiK9OcQqauNUHlbJvyl9Qa1QvF/gOUDKA14jxHU= github.com/gogo/googleapis v0.0.0-20180223154316-0cd9801be74a/go.mod h1:gf4bu3Q80BeJ6H1S1vYPm8/ELATdvryBaNFGgqEef3s= github.com/gogo/googleapis v1.1.0/go.mod h1:gf4bu3Q80BeJ6H1S1vYPm8/ELATdvryBaNFGgqEef3s= @@ -509,8 +521,8 @@ github.com/golang-jwt/jwt v3.2.2+incompatible/go.mod h1:8pz2t5EyA70fFQQSrl6XZXzq github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0/go.mod h1:E/TSTwGwJL78qG/PmXZO1EjYhfJinVAhrmmHX6Z8B9k= github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= github.com/golang/glog v1.0.0/go.mod h1:EWib/APOK0SL3dFbYqvxE3UYd8E6s1ouQ7iEp/0LWV4= -github.com/golang/glog v1.1.0 h1:/d3pCKDPWNnvIWe0vVUpNP32qc8U3PDVxySP/y360qE= -github.com/golang/glog v1.1.0/go.mod h1:pfYeQZ3JWZoXTV5sFc986z3HTpwQs9At6P4ImfuP3NQ= +github.com/golang/glog v1.1.2 h1:DVjP2PbBOzHyzA+dn3WhHIq4NdVu3Q+pvivFICf/7fo= +github.com/golang/glog v1.1.2/go.mod h1:zR+okUeTbrL6EL3xHUDxZuEtGv04p5shwip1+mL/rLQ= github.com/golang/groupcache v0.0.0-20160516000752-02826c3e7903/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= github.com/golang/groupcache v0.0.0-20190129154638-5b532d6fd5ef/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= github.com/golang/groupcache v0.0.0-20190702054246-869f871628b6/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= @@ -554,6 +566,7 @@ github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiu github.com/golang/snappy v0.0.0-20180518054509-2e65f85255db/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= github.com/golang/snappy v0.0.1/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= github.com/golang/snappy v0.0.3/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= +github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= github.com/golang/snappy v0.0.5-0.20220116011046-fa5810519dcb h1:PBC98N2aIaM3XXiurYmW7fx4GZkL8feAMVq7nEjURHk= github.com/golang/snappy v0.0.5-0.20220116011046-fa5810519dcb/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= github.com/gomodule/redigo v1.7.1-0.20190724094224-574c33c3df38/go.mod h1:B4C85qUVwatsJoIUNIfCRsp7qO0iAmpGFZ4EELWSbC4= @@ -613,8 +626,9 @@ github.com/google/uuid v1.0.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+ github.com/google/uuid v1.1.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/google/uuid v1.2.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= -github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I= github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/google/uuid v1.3.1 h1:KjJaJ9iWZ3jOFZIf1Lqf4laDRCasjl0BCmnEGxkdLb4= +github.com/google/uuid v1.3.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/googleapis/enterprise-certificate-proxy v0.2.3 h1:yk9/cqRKtT9wXZSsRH9aurXEpJX+U6FLtpYTdC3R06k= github.com/googleapis/enterprise-certificate-proxy v0.2.3/go.mod h1:AwSRAtLfXpU5Nm3pW+v7rGDHp09LsPtGY9MduiEsR9k= github.com/googleapis/gax-go v2.0.0+incompatible/go.mod h1:SFVmujtThgffbyetf+mdk2eWhX2bMyUtNHzFKcPA9HY= @@ -631,8 +645,9 @@ github.com/gopherjs/gopherjs v0.0.0-20190430165422-3e4dfb77656c h1:7lF+Vz0LqiRid github.com/gorilla/context v1.1.1/go.mod h1:kBGZzfjB9CEq2AlWe17Uuf7NDRt0dE0s8S51q0aT7Yg= github.com/gorilla/mux v1.6.2/go.mod h1:1lud6UwP+6orDFRuTfBEV8e9/aOM/c4fVVCaMa2zaAs= github.com/gorilla/mux v1.7.3/go.mod h1:1lud6UwP+6orDFRuTfBEV8e9/aOM/c4fVVCaMa2zaAs= -github.com/gorilla/mux v1.8.0 h1:i40aqfkR1h2SlN9hojwV5ZA91wcXFOvkdNIeFDP5koI= github.com/gorilla/mux v1.8.0/go.mod h1:DVbg23sWSpFRCP0SfiEN6jmj59UnW/n46BH5rLB71So= +github.com/gorilla/mux v1.8.1 h1:TuBL49tXwgrFYWhqrNgrUNEY92u81SPhu7sTdzQEiWY= +github.com/gorilla/mux v1.8.1/go.mod h1:AKf9I4AEqPTmMytcMc0KkNouC66V3BtZ4qD5fmWSiMQ= github.com/gorilla/websocket v0.0.0-20170926233335-4201258b820c/go.mod h1:E7qHFY5m1UJ88s3WnNqhKjPHQ0heANvMoAMk2YaljkQ= github.com/gorilla/websocket v1.4.0/go.mod h1:E7qHFY5m1UJ88s3WnNqhKjPHQ0heANvMoAMk2YaljkQ= github.com/gorilla/websocket v1.4.1-0.20190629185528-ae1634f6a989/go.mod h1:E7qHFY5m1UJ88s3WnNqhKjPHQ0heANvMoAMk2YaljkQ= @@ -692,6 +707,8 @@ github.com/hashicorp/logutils v1.0.0/go.mod h1:QIAnNjmIWmVIIkWDTG1z5v++HQmx9WQRO github.com/hashicorp/mdns v1.0.0/go.mod h1:tL+uN++7HEJ6SQLQ2/p+z2pH24WQKWjBPkE0mNTz8vQ= github.com/hashicorp/memberlist v0.1.3/go.mod h1:ajVTdAv/9Im8oMAAj5G31PhhMCZJV2pPBoIllUwCN7I= github.com/hashicorp/serf v0.8.2/go.mod h1:6hOLApaqBFA1NXqRQAsxw9QxuDEvNxSQRwA/JwenrHc= +github.com/holiman/bloomfilter/v2 v2.0.3 h1:73e0e/V0tCydx14a0SCYS/EWCxgwLZ18CZcZKVu0fao= +github.com/holiman/bloomfilter/v2 v2.0.3/go.mod h1:zpoh+gs7qcpqrHr3dB55AMiJwo0iURXE7ZOP9L9hSkA= github.com/holiman/uint256 v1.2.2-0.20230321075855-87b91420868c h1:DZfsyhDK1hnSS5lH8l+JggqzEleHteTYfutAiVlSUM8= github.com/holiman/uint256 v1.2.2-0.20230321075855-87b91420868c/go.mod h1:SC8Ryt4n+UBbPbIBKaG9zbbDlp4jOru9xFZmPzLUTxw= github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU= @@ -938,6 +955,7 @@ github.com/kr/pty v1.1.3/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc= github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw= github.com/labstack/echo/v4 v4.5.0/go.mod h1:czIriw4a0C1dFun+ObrXp7ok03xON0N1awStJ6ArI7Y= github.com/labstack/gommon v0.3.0/go.mod h1:MULnywXg0yavhxWKc+lOruYdAhDwPK9wf0OL7NoOu+k= @@ -1370,6 +1388,8 @@ github.com/oklog/ulid v1.3.1/go.mod h1:CirwcVhetQ6Lv90oh/F+FBtV6XMibvdAFo93nm5qn github.com/olekukonko/tablewriter v0.0.0-20170122224234-a0225b3f23b5/go.mod h1:vsDQFd/mU46D+Z4whnwzcISnGGzXWMclvtLoiIKAKIo= github.com/olekukonko/tablewriter v0.0.1/go.mod h1:vsDQFd/mU46D+Z4whnwzcISnGGzXWMclvtLoiIKAKIo= github.com/olekukonko/tablewriter v0.0.2-0.20190409134802-7e037d187b0c/go.mod h1:vsDQFd/mU46D+Z4whnwzcISnGGzXWMclvtLoiIKAKIo= +github.com/olekukonko/tablewriter v0.0.5 h1:P2Ga83D34wi1o9J6Wh1mRuqd4mF/x/lgBS7N7AbDhec= +github.com/olekukonko/tablewriter v0.0.5/go.mod h1:hPp6KlRPjbx+hW8ykQs1w3UBbZlj6HuIJcUGPhkA7kY= github.com/onflow/atree v0.1.0-beta1.0.20211027184039-559ee654ece9/go.mod h1:+6x071HgCF/0v5hQcaE5qqjc2UqN5gCU8h5Mk6uqpOg= github.com/onflow/atree v0.6.0 h1:j7nQ2r8npznx4NX39zPpBYHmdy45f4xwoi+dm37Jk7c= github.com/onflow/atree v0.6.0/go.mod h1:gBHU0M05qCbv9NN0kijLWMgC47gHVNBIp4KmsVFi0tc= @@ -1380,21 +1400,21 @@ github.com/onflow/flow-core-contracts/lib/go/contracts v1.2.4-0.20231016154253-a github.com/onflow/flow-core-contracts/lib/go/contracts v1.2.4-0.20231016154253-a00dbf7c061f/go.mod h1:jM6GMAL+m0hjusUgiYDNrixPQ6b9s8xjoJQoEu5bHQI= github.com/onflow/flow-core-contracts/lib/go/templates v1.2.4-0.20231016154253-a00dbf7c061f h1:Ep+Mpo2miWMe4pjPGIaEvEzshRep30dvNgxqk+//FrQ= github.com/onflow/flow-core-contracts/lib/go/templates v1.2.4-0.20231016154253-a00dbf7c061f/go.mod h1:ZeLxwaBkzuSInESGjL8/IPZWezF+YOYsYbMrZlhN+q4= -github.com/onflow/flow-emulator v0.54.1-0.20231024204057-0273f8fe3807 h1:/4jZ2oELdhKubgL97NGqhiuO80oMH/M+fIQoNPfGg+g= -github.com/onflow/flow-emulator v0.54.1-0.20231024204057-0273f8fe3807/go.mod h1:Qq1YmTDYlfpzfuzrFH8gwMgzzv80LCKFiS1Kqm8vFcY= +github.com/onflow/flow-emulator v0.58.1-0.20231130142844-f22e54339f85 h1:GWAZqWQmckvmvGtoFxpM1q+LMTNUT3DKxHnl266Ke9A= +github.com/onflow/flow-emulator v0.58.1-0.20231130142844-f22e54339f85/go.mod h1:Iv+lFLKbN4aGZeFOlrF7v7LIjpclfrdyGLsOTNXyLUQ= github.com/onflow/flow-ft/lib/go/contracts v0.7.1-0.20230711213910-baad011d2b13 h1:B4ll7e3j+MqTJv2122Enq3RtDNzmIGRu9xjV7fo7un0= github.com/onflow/flow-ft/lib/go/contracts v0.7.1-0.20230711213910-baad011d2b13/go.mod h1:kTMFIySzEJJeupk+7EmXs0EJ6CBWY/MV9fv9iYQk+RU= github.com/onflow/flow-go-sdk v0.24.0/go.mod h1:IoptMLPyFXWvyd9yYA6/4EmSeeozl6nJoIv4FaEMg74= github.com/onflow/flow-go-sdk v0.41.16 h1:HsmHwEVmj+iK+GszHbFseHh7Ii5W3PWOIRNAH/En08Q= github.com/onflow/flow-go-sdk v0.41.16/go.mod h1:bVrVNoJKiwB6vW5Qbm5tFAfJBQ5we4uSQWnn9gNAFhQ= github.com/onflow/flow-go/crypto v0.21.3/go.mod h1:vI6V4CY3R6c4JKBxdcRiR/AnjBfL8OSD97bJc60cLuQ= -github.com/onflow/flow-go/crypto v0.24.9 h1:0EQp+kSZYJepMIiSypfJVe7tzsPcb6UXOdOtsTCDhBs= -github.com/onflow/flow-go/crypto v0.24.9/go.mod h1:fqCzkIBBMRRkciVrvW21rECKq1oD7Q6u+bCI78lfNX0= +github.com/onflow/flow-go/crypto v0.25.0 h1:6lmoiAQ3APCF+nV7f4f2AXL3PuDKqQiWqRJXmjrMEq4= +github.com/onflow/flow-go/crypto v0.25.0/go.mod h1:OOb2vYcS8AOCajBClhHTJ0NKftFl1RQgTQ0+Vh4nbqk= github.com/onflow/flow-nft/lib/go/contracts v1.1.0 h1:rhUDeD27jhLwOqQKI/23008CYfnqXErrJvc4EFRP2a0= github.com/onflow/flow-nft/lib/go/contracts v1.1.0/go.mod h1:YsvzYng4htDgRB9sa9jxdwoTuuhjK8WYWXTyLkIigZY= github.com/onflow/flow/protobuf/go/flow v0.2.2/go.mod h1:gQxYqCfkI8lpnKsmIjwtN2mV/N2PIwc1I+RUK4HPIc8= -github.com/onflow/flow/protobuf/go/flow v0.3.2-0.20231110212518-071176bb06b8 h1:AsIyEDiwxpRAifgBK/0lsjEdNfqFtHqNHedpMeHoA4w= -github.com/onflow/flow/protobuf/go/flow v0.3.2-0.20231110212518-071176bb06b8/go.mod h1:NA2pX2nw8zuaxfKphhKsk00kWLwfd+tv8mS23YXO4Sk= +github.com/onflow/flow/protobuf/go/flow v0.3.2-0.20231207123230-136eab6aadf9 h1:EjTkQnXvoH/yQLoGxVRZObuE8oxcanEu+IxwBwb/gSQ= +github.com/onflow/flow/protobuf/go/flow v0.3.2-0.20231207123230-136eab6aadf9/go.mod h1:NA2pX2nw8zuaxfKphhKsk00kWLwfd+tv8mS23YXO4Sk= github.com/onflow/go-bitswap v0.0.0-20230703214630-6d3db958c73d h1:QcOAeEyF3iAUHv21LQ12sdcsr0yFrJGoGLyCAzYYtvI= github.com/onflow/go-bitswap v0.0.0-20230703214630-6d3db958c73d/go.mod h1:GCPpiyRoHncdqPj++zPr9ZOYBX4hpJ0pYZRYqSE8VKk= github.com/onflow/nft-storefront/lib/go/contracts v0.0.0-20221222181731-14b90207cead h1:2j1Unqs76Z1b95Gu4C3Y28hzNUHBix7wL490e61SMSw= @@ -1578,6 +1598,8 @@ github.com/sergi/go-diff v1.1.0 h1:we8PVUC3FE2uYfodKH/nBHMSetSfHDR6scGdBi+erh0= github.com/sergi/go-diff v1.1.0/go.mod h1:STckp+ISIX8hZLjrqAeVduY0gWCT9IjLuqbuNXdaHfM= github.com/sethvargo/go-retry v0.2.3 h1:oYlgvIvsju3jNbottWABtbnoLC+GDtLdBHxKWxQm/iU= github.com/sethvargo/go-retry v0.2.3/go.mod h1:1afjQuvh7s4gflMObvjLPaWgluLLyhA1wmVZ6KLpICw= +github.com/shirou/gopsutil v3.21.4-0.20210419000835-c7a38de76ee5+incompatible h1:Bn1aCHHRnjv4Bl16T8rcaFjYSrGrIZvpiGO6P3Q4GpU= +github.com/shirou/gopsutil v3.21.4-0.20210419000835-c7a38de76ee5+incompatible/go.mod h1:5b4v6he4MtMOwMlS0TUMTu2PcXUg8+E1lC7eC3UO/RA= github.com/shirou/gopsutil/v3 v3.22.2 h1:wCrArWFkHYIdDxx/FSfF5RB4dpJYW6t7rcp3+zL8uks= github.com/shirou/gopsutil/v3 v3.22.2/go.mod h1:WapW1AOOPlHyXr+yOyw3uYx36enocrtSoSBy0L5vUHY= github.com/shurcooL/component v0.0.0-20170202220835-f88ec8f54cc4/go.mod h1:XhFIlyj5a1fBNx5aJTbKoIq0mNaPvOagO+HjB3EtxrY= @@ -1642,8 +1664,8 @@ github.com/spf13/cast v1.5.0 h1:rj3WzYc11XZaIZMPKmwP96zkFEnnAmV8s6XbB2aY32w= github.com/spf13/cast v1.5.0/go.mod h1:SpXXQ5YoyJw6s3/6cMTQuxvgRl3PCJiyaX9p6b155UU= github.com/spf13/cobra v0.0.3/go.mod h1:1l0Ry5zgKvJasoi3XT1TypsSe7PqH0Sj9dhYf7v3XqQ= github.com/spf13/cobra v0.0.5/go.mod h1:3K3wKZymM7VvHMDS9+Akkh4K60UwM26emMESw8tLCHU= -github.com/spf13/cobra v1.7.0 h1:hyqWnYt1ZQShIddO5kBpj3vu05/++x6tJ6dg8EC572I= -github.com/spf13/cobra v1.7.0/go.mod h1:uLxZILRyS/50WlhOIKD7W6V5bgeIt+4sICxh6uRMrb0= +github.com/spf13/cobra v1.8.0 h1:7aJaZx1B85qltLMc546zn58BxxfZdR/W22ej9CFoEf0= +github.com/spf13/cobra v1.8.0/go.mod h1:WXLWApfZ71AjXPya3WOlMsY9yMs7YeiHhFVlvLyhcho= github.com/spf13/jwalterweatherman v1.0.0/go.mod h1:cQK4TGJAtQXfYWX+Ddv3mKDzgVb68N+wFjFa4jdeBTo= github.com/spf13/jwalterweatherman v1.1.0 h1:ue6voC5bR5F8YxI5S67j9i582FU4Qvo2bmqnqMYADFk= github.com/spf13/jwalterweatherman v1.1.0/go.mod h1:aNWZUN0dPAAO/Ljvb5BEdw96iTZ0EXowPYD95IqWIGo= @@ -1682,9 +1704,10 @@ github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXl github.com/subosito/gotenv v1.4.2 h1:X1TuBLAMDFbaTAChgCBLu3DU3UPyELpnF2jjJ2cz/S8= github.com/subosito/gotenv v1.4.2/go.mod h1:ayKnFf/c6rvx/2iiLrJUk1e6plDbT3edrFNGqEflhK0= github.com/supranational/blst v0.3.4/go.mod h1:jZJtfjgudtNl4en1tzwPIV3KjUnQUvG3/j+w+fVonLw= -github.com/supranational/blst v0.3.11-0.20230406105308-e9dfc5ee724b h1:u49mjRnygnB34h8OKbnNJFVUtWSKIKb1KukdV8bILUM= github.com/syndtr/goleveldb v1.0.0/go.mod h1:ZVVdQEZoIme9iO1Ch2Jdy24qqXrMMOU6lpPAyBWyWuQ= github.com/syndtr/goleveldb v1.0.1-0.20190923125748-758128399b1d/go.mod h1:9OrXJhf154huy1nPWmuSrkgjPUtUNhA+Zmy+6AESzuA= +github.com/syndtr/goleveldb v1.0.1-0.20210819022825-2ae1ddf74ef7 h1:epCh84lMvA70Z7CTTCmYQn2CKbY8j86K7/FAIr141uY= +github.com/syndtr/goleveldb v1.0.1-0.20210819022825-2ae1ddf74ef7/go.mod h1:q4W45IWZaF22tdD+VEXcAWRA037jwmWEB5VWYORlTpc= github.com/tarm/serial v0.0.0-20180830185346-98f6abe2eb07/go.mod h1:kDXzergiv9cbyO7IOYJZWg1U88JhDg3PB6klq9Hg2pA= github.com/texttheater/golang-levenshtein/levenshtein v0.0.0-20200805054039-cae8b0eaed6c h1:HelZ2kAFadG0La9d+4htN4HzQ68Bm2iM9qKMSMES6xg= github.com/texttheater/golang-levenshtein/levenshtein v0.0.0-20200805054039-cae8b0eaed6c/go.mod h1:JlzghshsemAMDGZLytTFY8C1JQxQPhnatWqNwUXjggo= @@ -1976,6 +1999,7 @@ golang.org/x/net v0.0.0-20200520004742-59133d7f0dd7/go.mod h1:qpuaurCH72eLCgpAm/ golang.org/x/net v0.0.0-20200520182314-0ba52f642ac2/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= golang.org/x/net v0.0.0-20200625001655-4c5254603344/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= golang.org/x/net v0.0.0-20200707034311-ab3426394381/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= +golang.org/x/net v0.0.0-20200813134508-3edf25e44fcc/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= golang.org/x/net v0.0.0-20200822124328-c89045814202/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= golang.org/x/net v0.0.0-20201010224723-4f7140c49acb/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= @@ -1998,8 +2022,8 @@ golang.org/x/net v0.2.0/go.mod h1:KqCZLdyyvdV855qA2rE3GC2aiw5xGR5TEjj8smXukLY= golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= golang.org/x/net v0.7.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg= -golang.org/x/net v0.12.0 h1:cfawfvKITfUsFCeJIHJrbSxpeu/E81khclypR0GVT50= -golang.org/x/net v0.12.0/go.mod h1:zEVYFnQC7m/vmpQFELhcD1EWkZlX69l4oqgmer6hfKA= +golang.org/x/net v0.14.0 h1:BONx9s002vGdD9umnlX1Po8vOZmrgH34qlHcD1MfK14= +golang.org/x/net v0.14.0/go.mod h1:PpSgVXXLK0OxS0F31C1/tv6XNguvCrnXIDrFMspZIUI= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.0.0-20181017192945-9dcd33a902f4/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.0.0-20181203162652-d668ce993890/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= @@ -2018,8 +2042,8 @@ golang.org/x/oauth2 v0.0.0-20210628180205-a41e5a781914/go.mod h1:KelEdhl1UZF7XfJ golang.org/x/oauth2 v0.0.0-20210805134026-6f1e6394065a/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= golang.org/x/oauth2 v0.0.0-20210819190943-2bc19b11175f/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= golang.org/x/oauth2 v0.0.0-20211104180415-d3ed0bb246c8/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= -golang.org/x/oauth2 v0.10.0 h1:zHCpF2Khkwy4mMB4bv0U37YtJdTGW8jI0glAApi0Kh8= -golang.org/x/oauth2 v0.10.0/go.mod h1:kTpgurOux7LqtuxjuyZa4Gj2gdezIt/jQtGnNFfypQI= +golang.org/x/oauth2 v0.11.0 h1:vPL4xzxBM4niKCW6g9whtaWVXTJf1U5e4aZxxFx/gbU= +golang.org/x/oauth2 v0.11.0/go.mod h1:LdF7O/8bLR/qWK9DrpXmbHLTouvRHK0SgJl0GmDBchk= golang.org/x/perf v0.0.0-20180704124530-6e6d33e29852/go.mod h1:JLpeXjPJfIyPr5TlbXLkXWLhP8nz10XfvxElABhCtcw= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= @@ -2101,6 +2125,7 @@ golang.org/x/sys v0.0.0-20200602225109-6fdc65e7d980/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20200615200032-f1bc736245b1/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200625212154-ddb9806d33ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200803210538-64077c9b5642/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200814200057-3d37ad5750ed/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200905004654-be1d3432aa8f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200909081042-eff7692f9009/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200918174421-af09f7315aff/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -2122,6 +2147,7 @@ golang.org/x/sys v0.0.0-20210309074719-68d13333faf2/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20210315160823-c6e025ad8005/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210317225723-c4fcb01b228e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210320140829-1e4c9ba3b0c4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210324051608-47abb6519492/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210403161142-5e06dd20ab57/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -2398,13 +2424,13 @@ google.golang.org/genproto v0.0.0-20210921142501-181ce0d877f6/go.mod h1:5CzLGKJ6 google.golang.org/genproto v0.0.0-20210924002016-3dee208752a0/go.mod h1:5CzLGKJ67TSI2B9POpiiyGha0AjJvZIUgRMt1dSmuhc= google.golang.org/genproto v0.0.0-20211007155348-82e027067bd4/go.mod h1:5CzLGKJ67TSI2B9POpiiyGha0AjJvZIUgRMt1dSmuhc= google.golang.org/genproto v0.0.0-20211118181313-81c1377c94b1/go.mod h1:5CzLGKJ67TSI2B9POpiiyGha0AjJvZIUgRMt1dSmuhc= -google.golang.org/genproto v0.0.0-20230711160842-782d3b101e98 h1:Z0hjGZePRE0ZBWotvtrwxFNrNE9CUAGtplaDK5NNI/g= -google.golang.org/genproto v0.0.0-20230711160842-782d3b101e98/go.mod h1:S7mY02OqCJTD0E1OiQy1F72PWFB4bZJ87cAtLPYgDR0= -google.golang.org/genproto/googleapis/api v0.0.0-20230711160842-782d3b101e98 h1:FmF5cCW94Ij59cfpoLiwTgodWmm60eEV0CjlsVg2fuw= -google.golang.org/genproto/googleapis/api v0.0.0-20230711160842-782d3b101e98/go.mod h1:rsr7RhLuwsDKL7RmgDDCUc6yaGr1iqceVb5Wv6f6YvQ= +google.golang.org/genproto v0.0.0-20230822172742-b8732ec3820d h1:VBu5YqKPv6XiJ199exd8Br+Aetz+o08F+PLMnwJQHAY= +google.golang.org/genproto v0.0.0-20230822172742-b8732ec3820d/go.mod h1:yZTlhN0tQnXo3h00fuXNCxJdLdIdnVFVBaRJ5LWBbw4= +google.golang.org/genproto/googleapis/api v0.0.0-20230822172742-b8732ec3820d h1:DoPTO70H+bcDXcd39vOqb2viZxgqeBeSGtZ55yZU4/Q= +google.golang.org/genproto/googleapis/api v0.0.0-20230822172742-b8732ec3820d/go.mod h1:KjSP20unUpOx5kyQUFa7k4OJg0qeJ7DEZflGDu2p6Bk= google.golang.org/genproto/googleapis/bytestream v0.0.0-20230530153820-e85fd2cbaebc h1:g3hIDl0jRNd9PPTs2uBzYuaD5mQuwOkZY0vSc0LR32o= -google.golang.org/genproto/googleapis/rpc v0.0.0-20230711160842-782d3b101e98 h1:bVf09lpb+OJbByTj913DRJioFFAjf/ZGxEz7MajTp2U= -google.golang.org/genproto/googleapis/rpc v0.0.0-20230711160842-782d3b101e98/go.mod h1:TUfxEVdsvPg18p6AslUXFoLdpED4oBnGwyqk3dV1XzM= +google.golang.org/genproto/googleapis/rpc v0.0.0-20230822172742-b8732ec3820d h1:uvYuEyMHKNt+lT4K3bN6fGswmK8qSvcreM3BwjDh+y4= +google.golang.org/genproto/googleapis/rpc v0.0.0-20230822172742-b8732ec3820d/go.mod h1:+Bk1OCOj40wS2hwAMA+aCW9ypzm63QTBBHp6lQ3p+9M= google.golang.org/grpc v1.12.0/go.mod h1:yo6s7OP7yaDglbqo1J04qKzAhqBH6lvTonzMVmEdcZw= google.golang.org/grpc v1.14.0/go.mod h1:yo6s7OP7yaDglbqo1J04qKzAhqBH6lvTonzMVmEdcZw= google.golang.org/grpc v1.16.0/go.mod h1:0JHn/cJsOMiMfNA9+DeHDlAU7KAAB5GDlYFpa9MZMio= @@ -2441,8 +2467,8 @@ google.golang.org/grpc v1.39.1/go.mod h1:PImNr+rS9TWYb2O4/emRugxiyHZ5JyHW5F+RPnD google.golang.org/grpc v1.40.0/go.mod h1:ogyxbiOoUXAkP+4+xa6PZSE9DZgIHtSpzjDTB9KAK34= google.golang.org/grpc v1.42.0/go.mod h1:k+4IHHFw41K8+bbowsex27ge2rCb65oeWqe4jJ590SU= google.golang.org/grpc v1.45.0/go.mod h1:lN7owxKUQEqMfSyQikvvk5tf/6zMPsrK+ONuO11+0rQ= -google.golang.org/grpc v1.58.3 h1:BjnpXut1btbtgN/6sp+brB2Kbm2LjNXnidYujAVbSoQ= -google.golang.org/grpc v1.58.3/go.mod h1:tgX3ZQDlNJGU96V6yHh1T/JeoBQ2TXdr43YbYSsCJk0= +google.golang.org/grpc v1.59.0 h1:Z5Iec2pjwb+LEOqzpB2MR12/eKFhDPhuqW91O+4bwUk= +google.golang.org/grpc v1.59.0/go.mod h1:aUPDwccQo6OTjy7Hct4AfBPD1GptF4fyUjIkQ9YtF98= google.golang.org/grpc/cmd/protoc-gen-go-grpc v1.1.0/go.mod h1:6Kw0yEErY5E/yWrBtf03jp27GLLJujG4z/JK95pnjjw= google.golang.org/grpc/cmd/protoc-gen-go-grpc v1.2.0 h1:TLkBREm4nIsEcexnCjgQd5GQWaHcqMzwQV0TX9pq8S0= google.golang.org/grpc/cmd/protoc-gen-go-grpc v1.2.0/go.mod h1:DNq5QpG7LJqD2AamLZ7zvKE0DEpVl2BSEVjFycAAjRY= @@ -2479,6 +2505,7 @@ gopkg.in/ini.v1 v1.51.1/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k= gopkg.in/ini.v1 v1.67.0 h1:Dgnx+6+nfE+IfzjUEISNeydPJh9AXNNsWbGP9KzCsOA= gopkg.in/ini.v1 v1.67.0/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k= gopkg.in/mgo.v2 v2.0.0-20180705113604-9856a29383ce/go.mod h1:yeKp02qBN3iKW1OzL3MGk2IdtZzaj7SFntXj72NppTA= +gopkg.in/natefinch/npipe.v2 v2.0.0-20160621034901-c1b8fa8bdcce h1:+JknDZhAj8YMt7GC73Ei8pv4MzjDUNPHgQWJdtMAaDU= gopkg.in/natefinch/npipe.v2 v2.0.0-20160621034901-c1b8fa8bdcce/go.mod h1:5AcXVHNjg+BDxry382+8OKon8SEWiKktQR07RKPsv1c= gopkg.in/olebedev/go-duktape.v3 v3.0.0-20190213234257-ec84240a7772/go.mod h1:uAJfkITjFhyEEuUfm7bsmCZRbW5WRq8s9EY8HZ6hCns= gopkg.in/resty.v1 v1.12.0/go.mod h1:mDo4pnntr5jdWRML875a/NmxYqAlA73dVijT2AXvQQo= diff --git a/integration/localnet/Makefile b/integration/localnet/Makefile index a2a035f4a93..d889d22e150 100644 --- a/integration/localnet/Makefile +++ b/integration/localnet/Makefile @@ -47,7 +47,7 @@ ifeq ($(strip $(VALID_EXECUTION)), 1) else ifeq ($(strip $(VALID_CONSENSUS)), 1) $(error Number of Consensus nodes should be no less than 2) else - go run -tags relic \ + go run \ -ldflags="-X 'github.com/onflow/flow-go/cmd/build.commit=${COMMIT}' \ -X 'github.com/onflow/flow-go/cmd/build.semver=${VERSION}'" \ builder/*.go \ @@ -123,15 +123,15 @@ stop: .PHONY: load load: - go run --tags relic ../benchmark/cmd/manual -log-level info -tps 1,10,100 -tps-durations 30s,30s + go run ../benchmark/cmd/manual -log-level info -tps 1,10,100 -tps-durations 30s,30s .PHONY: tps-ci-smoke tps-ci-smoke: - go run --tags relic ../benchmark/cmd/ci -log-level info -tps-initial 1 -tps-min 1 -tps-max 10 -duration 20s -tps-adjust-interval 1s -stat-interval 1s -bigquery-upload=false + go run ../benchmark/cmd/ci -log-level info -tps-initial 1 -tps-min 1 -tps-max 10 -duration 20s -tps-adjust-interval 1s -stat-interval 1s -bigquery-upload=false .PHONY: tps-ci tps-ci: bootstrap-ci build-flow start-flow - go run --tags relic ../benchmark/cmd/ci -log-level info -tps-initial $(TPS_INIT) -tps-min $(TPS_MIN) -tps-max $(TPS_MAX) -duration $(DURATION) + go run ../benchmark/cmd/ci -log-level info -tps-initial $(TPS_INIT) -tps-min $(TPS_MIN) -tps-max $(TPS_MAX) -duration $(DURATION) .PHONY: clean-data clean-data: diff --git a/integration/testnet/client.go b/integration/testnet/client.go index fe4138b666e..5e760cf1256 100644 --- a/integration/testnet/client.go +++ b/integration/testnet/client.go @@ -47,7 +47,7 @@ func NewClientWithKey(accessAddr string, accountAddr sdk.Address, key sdkcrypto. acc, err := flowClient.GetAccount(context.Background(), accountAddr) if err != nil { - return nil, fmt.Errorf("could not get the account %x: %w", accountAddr, err) + return nil, fmt.Errorf("could not get the account %v: %w", accountAddr, err) } accountKey := acc.Keys[0] diff --git a/integration/testnet/container.go b/integration/testnet/container.go index ab51f4a44e4..4e181df97e1 100644 --- a/integration/testnet/container.go +++ b/integration/testnet/container.go @@ -395,7 +395,8 @@ func (c *Container) OpenState() (*state.State, error) { qcs := storage.NewQuorumCertificates(metrics, db, storage.DefaultCacheSize) setups := storage.NewEpochSetups(metrics, db) commits := storage.NewEpochCommits(metrics, db) - protocolState := storage.NewProtocolState(metrics, setups, commits, db, storage.DefaultCacheSize) + protocolState := storage.NewProtocolState(metrics, setups, commits, db, + storage.DefaultProtocolStateCacheSize, storage.DefaultProtocolStateByBlockIDCacheSize) versionBeacons := storage.NewVersionBeacons(db) return state.OpenState( diff --git a/integration/testnet/network.go b/integration/testnet/network.go index e5af9220bd9..7340c5c57bd 100644 --- a/integration/testnet/network.go +++ b/integration/testnet/network.go @@ -74,6 +74,8 @@ const ( DefaultFlowSecretsDBDir = "/data/secrets" // DefaultExecutionRootDir is the default directory for the execution node state database. DefaultExecutionRootDir = "/data/exedb" + // DefaultRegisterDir is the default directory for the register store database. + DefaultRegisterDir = "/data/register" // DefaultExecutionDataServiceDir for the execution data service blobstore. DefaultExecutionDataServiceDir = "/data/execution_data" // DefaultExecutionStateDir for the execution data service blobstore. @@ -868,6 +870,7 @@ func (net *FlowNetwork) AddNode(t *testing.T, bootstrapDir string, nodeConf Cont nodeContainer.AddFlag("triedir", DefaultExecutionRootDir) nodeContainer.AddFlag("execution-data-dir", DefaultExecutionDataServiceDir) nodeContainer.AddFlag("chunk-data-pack-dir", DefaultChunkDataPackDir) + nodeContainer.AddFlag("register-dir", DefaultRegisterDir) case flow.RoleAccess: nodeContainer.exposePort(GRPCPort, testingdock.RandomPort(t)) diff --git a/integration/testnet/util.go b/integration/testnet/util.go index 2dfd450d38f..d5d9559ccb6 100644 --- a/integration/testnet/util.go +++ b/integration/testnet/util.go @@ -69,7 +69,7 @@ func toNodeInfos(confs []ContainerConfig) []bootstrap.NodeInfo { } func getSeed() ([]byte, error) { - seedLen := int(math.Max(crypto.SeedMinLenDKG, crypto.KeyGenSeedMinLen)) + seedLen := int(math.Max(crypto.KeyGenSeedMinLen, crypto.KeyGenSeedMinLen)) seed := make([]byte, seedLen) n, err := rand.Read(seed) if err != nil || n != seedLen { diff --git a/integration/tests/access/cohort1/access_api_test.go b/integration/tests/access/cohort1/access_api_test.go index cb5a175130d..24409f84ad2 100644 --- a/integration/tests/access/cohort1/access_api_test.go +++ b/integration/tests/access/cohort1/access_api_test.go @@ -87,7 +87,12 @@ func (s *AccessAPISuite) SetupTest() { ) consensusConfigs := []func(config *testnet.NodeConfig){ - testnet.WithAdditionalFlag("--cruise-ctl-fallback-proposal-duration=100ms"), + // `cruise-ctl-fallback-proposal-duration` is set to 250ms instead to of 100ms + // to purposely slow down the block rate. This is needed since the crypto module + // update providing faster BLS operations. + // TODO: fix the access integration test logic to function without slowing down + // the block rate + testnet.WithAdditionalFlag("--cruise-ctl-fallback-proposal-duration=250ms"), testnet.WithAdditionalFlagf("--required-verification-seal-approvals=%d", 1), testnet.WithAdditionalFlagf("--required-construction-seal-approvals=%d", 1), testnet.WithLogLevel(zerolog.FatalLevel), diff --git a/integration/tests/access/cohort2/observer_test.go b/integration/tests/access/cohort2/observer_test.go index 38f0c8bca05..755ab087c63 100644 --- a/integration/tests/access/cohort2/observer_test.go +++ b/integration/tests/access/cohort2/observer_test.go @@ -74,7 +74,9 @@ func (s *ObserverSuite) SetupTest() { nodeConfigs := []testnet.NodeConfig{ // access node with unstaked nodes supported - testnet.NewNodeConfig(flow.RoleAccess, testnet.WithLogLevel(zerolog.InfoLevel), testnet.WithAdditionalFlag("--supports-observer=true")), + testnet.NewNodeConfig(flow.RoleAccess, testnet.WithLogLevel(zerolog.InfoLevel), + testnet.WithAdditionalFlag("--supports-observer=true"), + ), // need one dummy execution node testnet.NewNodeConfig(flow.RoleExecution, testnet.WithLogLevel(zerolog.FatalLevel)), diff --git a/integration/tests/bft/base_suite.go b/integration/tests/bft/base_suite.go index b50085a9e50..2e6e74de881 100644 --- a/integration/tests/bft/base_suite.go +++ b/integration/tests/bft/base_suite.go @@ -77,7 +77,12 @@ func (b *BaseSuite) SetupSuite() { testnet.WithLogLevel(zerolog.FatalLevel), testnet.WithAdditionalFlag("--required-verification-seal-approvals=1"), testnet.WithAdditionalFlag("--required-construction-seal-approvals=1"), - testnet.WithAdditionalFlag("--cruise-ctl-fallback-proposal-duration=1ms"), + // `cruise-ctl-fallback-proposal-duration` is set to 250ms instead to of 1ms + // to purposely slow down the block rate. This is needed since the crypto module + // update providing faster BLS operations. + // TODO: fix the access integration test logic to function without slowing down + // the block rate + testnet.WithAdditionalFlag("--cruise-ctl-fallback-proposal-duration=250ms"), ) b.NodeConfigs = append(b.NodeConfigs, nodeConfig) } diff --git a/integration/tests/epochs/cohort2/epoch_join_and_leave_sn_test.go b/integration/tests/epochs/cohort2/epoch_join_and_leave_sn_test.go index fb825e447a6..d101af6371d 100644 --- a/integration/tests/epochs/cohort2/epoch_join_and_leave_sn_test.go +++ b/integration/tests/epochs/cohort2/epoch_join_and_leave_sn_test.go @@ -2,6 +2,7 @@ package cohort2 import ( "testing" + "time" "github.com/stretchr/testify/suite" @@ -17,6 +18,15 @@ type EpochJoinAndLeaveSNSuite struct { epochs.DynamicEpochTransitionSuite } +func (s *EpochJoinAndLeaveSNSuite) SetupTest() { + // slow down the block rate. This is needed since the crypto module + // update provides faster BLS operations. + // TODO: fix the access integration test logic to function without slowing down + // the block rate + s.ConsensusProposalDuration = time.Millisecond * 250 + s.DynamicEpochTransitionSuite.SetupTest() +} + // TestEpochJoinAndLeaveSN should update consensus nodes and assert healthy network conditions // after the epoch transition completes. See health check function for details. func (s *EpochJoinAndLeaveSNSuite) TestEpochJoinAndLeaveSN() { diff --git a/integration/tests/epochs/cohort2/epoch_join_and_leave_vn_test.go b/integration/tests/epochs/cohort2/epoch_join_and_leave_vn_test.go index d4b46693bb9..ed8f7ef1ae1 100644 --- a/integration/tests/epochs/cohort2/epoch_join_and_leave_vn_test.go +++ b/integration/tests/epochs/cohort2/epoch_join_and_leave_vn_test.go @@ -32,7 +32,7 @@ func (s *EpochJoinAndLeaveVNSuite) SetupTest() { s.DKGPhaseLen = 100 s.EpochLen = 450 s.EpochCommitSafetyThreshold = 20 - s.DynamicEpochTransitionSuite.Suite.SetupTest() + s.Suite.SetupTest() } // TestEpochJoinAndLeaveVN should update verification nodes and assert healthy network conditions diff --git a/integration/tests/lib/testnet_state_tracker.go b/integration/tests/lib/testnet_state_tracker.go index 990f94259e9..f00553d27a7 100644 --- a/integration/tests/lib/testnet_state_tracker.go +++ b/integration/tests/lib/testnet_state_tracker.go @@ -116,7 +116,7 @@ func (tst *TestnetStateTracker) Track(t *testing.T, ctx context.Context, ghost * sender) default: - t.Logf("%v other msg received from %s: %#v\n", time.Now().UTC(), sender, msg) + t.Logf("%v other msg received from %s: %T\n", time.Now().UTC(), sender, msg) continue } } diff --git a/integration/tests/mvp/common.go b/integration/tests/mvp/common.go index 2bb8ee9530a..dc50242f6e5 100644 --- a/integration/tests/mvp/common.go +++ b/integration/tests/mvp/common.go @@ -13,7 +13,7 @@ import ( sdkcrypto "github.com/onflow/flow-go-sdk/crypto" "github.com/onflow/flow-go-sdk/templates" - "github.com/onflow/flow-go/fvm" + "github.com/onflow/flow-go/fvm/systemcontracts" "github.com/onflow/flow-go/integration/testnet" "github.com/onflow/flow-go/integration/tests/lib" ) @@ -77,6 +77,8 @@ func RunMVPTest(t *testing.T, ctx context.Context, net *testnet.FlowNetwork, acc } require.NotEqual(t, sdk.EmptyAddress, newAccountAddress) + sc := systemcontracts.SystemContractsForChain(chain.ChainID()) + t.Log(">> new account address: ", newAccountAddress) // Generate the fund account transaction (so account can be used as a payer) @@ -100,8 +102,9 @@ func RunMVPTest(t *testing.T, ctx context.Context, net *testnet.FlowNetwork, acc receiverRef.deposit(from: <-self.sentVault) } }`, - fvm.FungibleTokenAddress(chain).Hex(), - fvm.FlowTokenAddress(chain).Hex()))). + sc.FungibleToken.Address.Hex(), + sc.FlowToken.Address.Hex(), + ))). AddAuthorizer(serviceAddress). SetReferenceBlockID(sdk.Identifier(latestBlockID)). SetProposalKey(serviceAddress, 0, serviceAccountClient.GetSeqNumber()). diff --git a/integration/tests/upgrades/suite.go b/integration/tests/upgrades/suite.go index dbc40e810aa..93094b8c13b 100644 --- a/integration/tests/upgrades/suite.go +++ b/integration/tests/upgrades/suite.go @@ -83,10 +83,12 @@ func (s *Suite) SetupTest() { testnet.WithLogLevel(zerolog.WarnLevel), testnet.WithID(s.exe1ID), testnet.WithAdditionalFlag("--extensive-logging=true"), + testnet.WithAdditionalFlag("--max-graceful-stop-duration=1s"), ), testnet.NewNodeConfig( flow.RoleExecution, testnet.WithLogLevel(zerolog.WarnLevel), + testnet.WithAdditionalFlag("--max-graceful-stop-duration=1s"), ), testnet.NewNodeConfig(flow.RoleConsensus, consensusConfigs...), testnet.NewNodeConfig(flow.RoleConsensus, consensusConfigs...), diff --git a/integration/utils/dependency_test.go b/integration/utils/dependency_test.go new file mode 100644 index 00000000000..8ac0fac8cc2 --- /dev/null +++ b/integration/utils/dependency_test.go @@ -0,0 +1,8 @@ +package utils + +import "github.com/btcsuite/btcd/chaincfg/chainhash" + +// this is added to resolve the issue with chainhash ambiguous import, +// the code is not used, but it's needed to force go.mod specify and retain chainhash version +// workaround for issue: https://github.com/golang/go/issues/27899 +var _ = chainhash.Hash{} diff --git a/ledger/common/convert/convert.go b/ledger/common/convert/convert.go index 6c028b1b5b2..d1c9d732570 100644 --- a/ledger/common/convert/convert.go +++ b/ledger/common/convert/convert.go @@ -61,5 +61,4 @@ func PayloadToRegister(payload *ledger.Payload) (flow.RegisterID, flow.RegisterV } return regID, payload.Value(), nil - } diff --git a/model/bootstrap/node_info.go b/model/bootstrap/node_info.go index 270f574533a..5a871c762a2 100644 --- a/model/bootstrap/node_info.go +++ b/model/bootstrap/node_info.go @@ -174,6 +174,18 @@ type decodableNodeInfoPub struct { Stake uint64 } +func (info *NodeInfoPub) Equals(other *NodeInfoPub) bool { + if other == nil { + return false + } + return info.Address == other.Address && + info.NodeID == other.NodeID && + info.Role == other.Role && + info.Weight == other.Weight && + info.NetworkPubKey.PublicKey.Equals(other.NetworkPubKey.PublicKey) && + info.StakingPubKey.PublicKey.Equals(other.StakingPubKey.PublicKey) +} + func (info *NodeInfoPub) UnmarshalJSON(b []byte) error { var decodable decodableNodeInfoPub err := json.Unmarshal(b, &decodable) diff --git a/model/bootstrap/node_info_test.go b/model/bootstrap/node_info_test.go index fbe02d86569..2e0c8c248f4 100644 --- a/model/bootstrap/node_info_test.go +++ b/model/bootstrap/node_info_test.go @@ -51,7 +51,7 @@ func TestNodeInfoPubEncodingJSON(t *testing.T) { var dec bootstrap.NodeInfoPub err = json.Unmarshal(enc, &dec) require.NoError(t, err) - assert.Equal(t, conf, dec) + assert.True(t, dec.Equals(&conf)) }) t.Run("compat: should accept old files using Stake field", func(t *testing.T) { conf := unittest.NodeInfoFixture().Public() @@ -62,6 +62,6 @@ func TestNodeInfoPubEncodingJSON(t *testing.T) { var dec bootstrap.NodeInfoPub err = json.Unmarshal(enc, &dec) require.NoError(t, err) - assert.Equal(t, conf, dec) + assert.True(t, dec.Equals(&conf)) }) } diff --git a/model/convert/fixtures_test.go b/model/convert/fixtures_test.go index 74b2890202b..a9c869fb295 100644 --- a/model/convert/fixtures_test.go +++ b/model/convert/fixtures_test.go @@ -13,10 +13,7 @@ import ( // EpochSetupFixture returns an EpochSetup service event as a Cadence event // representation and as a protocol model representation. func EpochSetupFixture(chain flow.ChainID) (flow.Event, *flow.EpochSetup) { - events, err := systemcontracts.ServiceEventsForChain(chain) - if err != nil { - panic(err) - } + events := systemcontracts.ServiceEventsForChain(chain) event := unittest.EventFixture(events.EpochSetup.EventType(), 1, 1, unittest.IdentifierFixture(), 0) event.Payload = []byte(epochSetupFixtureJSON) @@ -112,10 +109,7 @@ func EpochSetupFixture(chain flow.ChainID) (flow.Event, *flow.EpochSetup) { // representation and as a protocol model representation. func EpochCommitFixture(chain flow.ChainID) (flow.Event, *flow.EpochCommit) { - events, err := systemcontracts.ServiceEventsForChain(chain) - if err != nil { - panic(err) - } + events := systemcontracts.ServiceEventsForChain(chain) event := unittest.EventFixture(events.EpochCommit.EventType(), 1, 1, unittest.IdentifierFixture(), 0) event.Payload = []byte(epochCommitFixtureJSON) diff --git a/model/convert/service_event.go b/model/convert/service_event.go index ee46eb28b26..ebc17abdb85 100644 --- a/model/convert/service_event.go +++ b/model/convert/service_event.go @@ -19,10 +19,7 @@ import ( // state. This acts as the conversion from the Cadence type to the flow-go type. func ServiceEvent(chainID flow.ChainID, event flow.Event) (*flow.ServiceEvent, error) { - events, err := systemcontracts.ServiceEventsForChain(chainID) - if err != nil { - return nil, fmt.Errorf("could not get service event info: %w", err) - } + events := systemcontracts.ServiceEventsForChain(chainID) // depending on type of service event construct Go type switch event.Type { diff --git a/model/convert/service_event_test.go b/model/convert/service_event_test.go index 88ba8c4d3ca..9c50a98d1c3 100644 --- a/model/convert/service_event_test.go +++ b/model/convert/service_event_test.go @@ -182,11 +182,9 @@ func TestVersionBeaconEventConversion(t *testing.T) { runVersionBeaconTestCase := func(t *testing.T, test vbTestCase) { chainID := flow.Emulator t.Run(test.name, func(t *testing.T) { - events, err := systemcontracts.ServiceEventsForChain(chainID) - if err != nil { - panic(err) - } + events := systemcontracts.ServiceEventsForChain(chainID) + var err error event := unittest.EventFixture(events.VersionBeacon.EventType(), 1, 1, unittest.IdentifierFixture(), 0) event.Payload, err = ccf.Encode(test.event) require.NoError(t, err) diff --git a/model/encodable/keys_test.go b/model/encodable/keys_test.go index ccdf63cd044..338c1708366 100644 --- a/model/encodable/keys_test.go +++ b/model/encodable/keys_test.go @@ -247,7 +247,7 @@ func TestEncodableRandomBeaconPrivKeyMsgPack(t *testing.T) { err = key.UnmarshalMsgpack(b) require.NoError(t, err) - require.Equal(t, oldPubKey, key.PublicKey) + require.True(t, oldPubKey.Equals(key.PublicKey)) } func generateRandomSeed(t *testing.T) []byte { diff --git a/model/flow/chain.go b/model/flow/chain.go index adb4080b44b..63f39ece58f 100644 --- a/model/flow/chain.go +++ b/model/flow/chain.go @@ -40,6 +40,20 @@ const ( MonotonicEmulator ChainID = "flow-emulator-monotonic" ) +// AllChainIDs returns a list of all supported chain IDs. +func AllChainIDs() ChainIDList { + return ChainIDList{ + Mainnet, + Testnet, + Sandboxnet, + Benchnet, + Localnet, + Emulator, + BftTestnet, + MonotonicEmulator, + } +} + // Transient returns whether the chain ID is for a transient network. func (c ChainID) Transient() bool { return c == Emulator || c == Localnet || c == Benchnet || c == BftTestnet diff --git a/model/flow/constants.go b/model/flow/constants.go index 4f172c36528..6b03c36a6db 100644 --- a/model/flow/constants.go +++ b/model/flow/constants.go @@ -28,6 +28,10 @@ const DefaultTransactionExpiryBuffer = 30 // DefaultMaxTransactionGasLimit is the default maximum value for the transaction gas limit. const DefaultMaxTransactionGasLimit = 9999 +// EstimatedComputationPerMillisecond is the approximate number of computation units that can be performed in a millisecond. +// this was calibrated during the Variable Transaction Fees: Execution Effort FLIP https://github.com/onflow/flow/pull/753 +const EstimatedComputationPerMillisecond = 9999.0 / 200.0 + // DefaultMaxTransactionByteSize is the default maximum transaction byte size. (~1.5MB) const DefaultMaxTransactionByteSize = 1_500_000 diff --git a/model/flow/identity_test.go b/model/flow/identity_test.go index f4dd5f80729..06dde6fe831 100644 --- a/model/flow/identity_test.go +++ b/model/flow/identity_test.go @@ -58,7 +58,7 @@ func TestIdentityEncodingJSON(t *testing.T) { var dec flow.Identity err = json.Unmarshal(enc, &dec) require.NoError(t, err) - require.Equal(t, identity, &dec) + require.True(t, identity.EqualTo(&dec)) }) t.Run("empty address should be omitted", func(t *testing.T) { @@ -71,7 +71,7 @@ func TestIdentityEncodingJSON(t *testing.T) { var dec flow.Identity err = json.Unmarshal(enc, &dec) require.NoError(t, err) - require.Equal(t, identity, &dec) + require.True(t, identity.EqualTo(&dec)) }) } @@ -82,7 +82,7 @@ func TestIdentityEncodingMsgpack(t *testing.T) { var dec flow.Identity err = msgpack.Unmarshal(enc, &dec) require.NoError(t, err) - require.Equal(t, identity, &dec) + require.True(t, identity.EqualTo(&dec)) } func TestIdentityList_Exists(t *testing.T) { diff --git a/model/flow/protocol_state.go b/model/flow/protocol_state.go index c94a2c231ac..7addfc130d9 100644 --- a/model/flow/protocol_state.go +++ b/model/flow/protocol_state.go @@ -22,7 +22,6 @@ type DynamicIdentityEntryList []*DynamicIdentityEntry // Note that the current implementation does not store the identity table directly. Instead, we store // the original events that constituted the _initial_ identity table at the beginning of the epoch // plus some modifiers. We intend to restructure this code soon. -// TODO: https://github.com/onflow/flow-go/issues/4649 type ProtocolStateEntry struct { PreviousEpoch *EpochStateContainer // minimal dynamic properties for previous epoch [optional, nil for first epoch after spork, genesis] CurrentEpoch EpochStateContainer // minimal dynamic properties for current epoch diff --git a/model/flow/protocol_state_test.go b/model/flow/protocol_state_test.go index 58dc1782fbc..57be2207960 100644 --- a/model/flow/protocol_state_test.go +++ b/model/flow/protocol_state_test.go @@ -126,7 +126,52 @@ func TestNewRichProtocolStateEntry(t *testing.T) { assert.Equal(t, expectedIdentities, richEntry.NextEpochIdentityTable, "should be equal to next epoch setup participants + current epoch setup participants") }) - // TODO: include test for epoch setup phase where no prior epoch exist (i.e. first epoch setup phase after spork) + t.Run("setup-after-spork", func(t *testing.T) { + stateEntry := unittest.ProtocolStateFixture(unittest.WithNextEpochProtocolState(), func(entry *flow.RichProtocolStateEntry) { + // no previous epoch since we are in the first epoch + entry.PreviousEpochSetup = nil + entry.PreviousEpochCommit = nil + entry.PreviousEpoch = nil + + // next epoch is setup but not committed + entry.NextEpochCommit = nil + entry.NextEpoch.CommitID = flow.ZeroID + }) + // sanity check that previous epoch is not populated in `stateEntry` + assert.Nil(t, stateEntry.PreviousEpoch) + assert.Nil(t, stateEntry.PreviousEpochSetup) + assert.Nil(t, stateEntry.PreviousEpochCommit) + + richEntry, err := flow.NewRichProtocolStateEntry( + stateEntry.ProtocolStateEntry, + stateEntry.PreviousEpochSetup, + stateEntry.PreviousEpochCommit, + stateEntry.CurrentEpochSetup, + stateEntry.CurrentEpochCommit, + stateEntry.NextEpochSetup, + nil, + ) + assert.NoError(t, err) + expectedIdentities, err := flow.BuildIdentityTable( + stateEntry.CurrentEpochSetup.Participants, + stateEntry.CurrentEpoch.ActiveIdentities, + stateEntry.NextEpochSetup.Participants, + stateEntry.NextEpoch.ActiveIdentities, + flow.EpochParticipationStatusJoining, + ) + assert.NoError(t, err) + assert.Equal(t, expectedIdentities, richEntry.CurrentEpochIdentityTable, "should be equal to current epoch setup participants + next epoch setup participants") + assert.Nil(t, richEntry.NextEpochCommit) + expectedIdentities, err = flow.BuildIdentityTable( + stateEntry.NextEpochSetup.Participants, + stateEntry.NextEpoch.ActiveIdentities, + stateEntry.CurrentEpochSetup.Participants, + stateEntry.CurrentEpoch.ActiveIdentities, + flow.EpochParticipationStatusLeaving, + ) + assert.NoError(t, err) + assert.Equal(t, expectedIdentities, richEntry.NextEpochIdentityTable, "should be equal to next epoch setup participants + current epoch setup participants") + }) // Common situation during the epoch commit phase for epoch N+1 // * we are currently in Epoch N @@ -165,8 +210,47 @@ func TestNewRichProtocolStateEntry(t *testing.T) { assert.Equal(t, expectedIdentities, richEntry.NextEpochIdentityTable, "should be equal to next epoch setup participants + current epoch setup participants") }) - // TODO: include test for epoch commit phase where no prior epoch exist (i.e. first epoch commit phase after spork) + t.Run("commit-after-spork", func(t *testing.T) { + stateEntry := unittest.ProtocolStateFixture(unittest.WithNextEpochProtocolState(), func(entry *flow.RichProtocolStateEntry) { + // no previous epoch since we are in the first epoch + entry.PreviousEpochSetup = nil + entry.PreviousEpochCommit = nil + entry.PreviousEpoch = nil + }) + // sanity check that previous epoch is not populated in `stateEntry` + assert.Nil(t, stateEntry.PreviousEpoch) + assert.Nil(t, stateEntry.PreviousEpochSetup) + assert.Nil(t, stateEntry.PreviousEpochCommit) + richEntry, err := flow.NewRichProtocolStateEntry( + stateEntry.ProtocolStateEntry, + stateEntry.PreviousEpochSetup, + stateEntry.PreviousEpochCommit, + stateEntry.CurrentEpochSetup, + stateEntry.CurrentEpochCommit, + stateEntry.NextEpochSetup, + stateEntry.NextEpochCommit, + ) + assert.NoError(t, err) + expectedIdentities, err := flow.BuildIdentityTable( + stateEntry.CurrentEpochSetup.Participants, + stateEntry.CurrentEpoch.ActiveIdentities, + stateEntry.NextEpochSetup.Participants, + stateEntry.NextEpoch.ActiveIdentities, + flow.EpochParticipationStatusJoining, + ) + assert.NoError(t, err) + assert.Equal(t, expectedIdentities, richEntry.CurrentEpochIdentityTable, "should be equal to current epoch setup participants + next epoch setup participants") + expectedIdentities, err = flow.BuildIdentityTable( + stateEntry.NextEpochSetup.Participants, + stateEntry.NextEpoch.ActiveIdentities, + stateEntry.CurrentEpochSetup.Participants, + stateEntry.CurrentEpoch.ActiveIdentities, + flow.EpochParticipationStatusLeaving, + ) + assert.NoError(t, err) + assert.Equal(t, expectedIdentities, richEntry.NextEpochIdentityTable, "should be equal to next epoch setup participants + current epoch setup participants") + }) } // TestProtocolStateEntry_Copy tests if the copy method returns a deep copy of the entry. diff --git a/module/dkg/client.go b/module/dkg/client.go index e8401f23736..fbe7247ed46 100644 --- a/module/dkg/client.go +++ b/module/dkg/client.go @@ -44,7 +44,7 @@ func NewClient( Str("component", "dkg_contract_client"). Str("flow_client_an_id", flowClientANID.String()). Logger() - base := epochs.NewBaseClient(log, flowClient, accountAddress, accountKeyIndex, signer, dkgContractAddress) + base := epochs.NewBaseClient(log, flowClient, accountAddress, accountKeyIndex, signer) env := templates.Environment{DkgAddress: dkgContractAddress} diff --git a/module/dkg/controller_test.go b/module/dkg/controller_test.go index e8f8d253537..3d9d1676a6a 100644 --- a/module/dkg/controller_test.go +++ b/module/dkg/controller_test.go @@ -248,7 +248,7 @@ func initNodes(t *testing.T, n int, phase1Duration, phase2Duration, phase3Durati logger: logger, } - seed := unittest.SeedFixture(20) + seed := unittest.SeedFixture(crypto.KeyGenSeedMinLen) dkg, err := crypto.NewJointFeldman(n, signature.RandomBeaconThreshold(n), i, broker) require.NoError(t, err) diff --git a/module/dkg_broker.go b/module/dkg_broker.go index 49ebb0ad051..7e64353816e 100644 --- a/module/dkg_broker.go +++ b/module/dkg_broker.go @@ -1,6 +1,3 @@ -//go:build relic -// +build relic - package module import ( diff --git a/module/epochs/base_client.go b/module/epochs/base_client.go index eea76558639..108b2d1ed3b 100644 --- a/module/epochs/base_client.go +++ b/module/epochs/base_client.go @@ -34,8 +34,7 @@ var ( type BaseClient struct { Log zerolog.Logger // default logger - ContractAddress string // contract address - FlowClient module.SDKClientWrapper // flow access node client + FlowClient module.SDKClientWrapper // flow access node client AccountAddress sdk.Address // account belonging to node interacting with the contract AccountKeyIndex uint // account key index @@ -49,12 +48,10 @@ func NewBaseClient( accountAddress string, accountKeyIndex uint, signer sdkcrypto.Signer, - contractAddress string, ) *BaseClient { return &BaseClient{ Log: log, - ContractAddress: contractAddress, FlowClient: flowClient, AccountKeyIndex: accountKeyIndex, Signer: signer, diff --git a/module/epochs/qc_client.go b/module/epochs/qc_client.go index a1a2b5ec461..8bf675f4048 100644 --- a/module/epochs/qc_client.go +++ b/module/epochs/qc_client.go @@ -56,7 +56,7 @@ func NewQCContractClient( Str("component", "qc_contract_client"). Str("flow_client_an_id", flowClientANID.String()). Logger() - base := NewBaseClient(log, flowClient, accountAddress, accountKeyIndex, signer, qcContractAddress) + base := NewBaseClient(log, flowClient, accountAddress, accountKeyIndex, signer) // set QCContractAddress to the contract address given env := templates.Environment{QuorumCertificateAddress: qcContractAddress} diff --git a/module/execution/registers_async.go b/module/execution/registers_async.go new file mode 100644 index 00000000000..37043704c52 --- /dev/null +++ b/module/execution/registers_async.go @@ -0,0 +1,59 @@ +package execution + +import ( + "fmt" + + "go.uber.org/atomic" + + "github.com/onflow/flow-go/model/flow" + "github.com/onflow/flow-go/storage" +) + +// RegistersAsyncStore has the same basic structure as access/backend.ScriptExecutor +// TODO: use this implementation in the `scripts.ScriptExecutor` passed into the AccessAPI +type RegistersAsyncStore struct { + registerIndex atomic.Pointer[storage.RegisterIndex] +} + +func NewRegistersAsyncStore() *RegistersAsyncStore { + return &RegistersAsyncStore{atomic.Pointer[storage.RegisterIndex]{}} +} + +// InitDataAvailable initializes the underlying storage.RegisterIndex +// This method can be called at any time after the RegistersAsyncStore object is created and before RegisterValues is called +// since we can't disambiguate between the underlying store before bootstrapping or just simply being behind sync +func (r *RegistersAsyncStore) InitDataAvailable(registers storage.RegisterIndex) error { + if r.registerIndex.CompareAndSwap(nil, ®isters) { + return nil + } + return fmt.Errorf("registers already initialized") +} + +// RegisterValues gets the register values from the underlying storage.RegisterIndex +// Expected errors: +// - storage.ErrHeightNotIndexed if the store is still bootstrapping or if the values at the height is not indexed yet +// - storage.ErrNotFound if the register does not exist at the height +func (r *RegistersAsyncStore) RegisterValues(ids flow.RegisterIDs, height uint64) ([]flow.RegisterValue, error) { + registerStore, isAvailable := r.isDataAvailable(height) + if !isAvailable { + return nil, storage.ErrHeightNotIndexed + } + result := make([]flow.RegisterValue, len(ids)) + for i, regId := range ids { + val, err := registerStore.Get(regId, height) + if err != nil { + return nil, fmt.Errorf("failed to get register value for id %d: %w", i, err) + } + result[i] = val + } + return result, nil +} + +func (r *RegistersAsyncStore) isDataAvailable(height uint64) (storage.RegisterIndex, bool) { + str := r.registerIndex.Load() + if str != nil { + registerStore := *str + return registerStore, height <= registerStore.LatestHeight() && height >= registerStore.FirstHeight() + } + return nil, false +} diff --git a/module/execution/registers_async_test.go b/module/execution/registers_async_test.go new file mode 100644 index 00000000000..0db7b38233a --- /dev/null +++ b/module/execution/registers_async_test.go @@ -0,0 +1,86 @@ +package execution + +import ( + "testing" + + "github.com/stretchr/testify/require" + + "github.com/onflow/flow-go/model/flow" + "github.com/onflow/flow-go/storage" + storagemock "github.com/onflow/flow-go/storage/mock" + "github.com/onflow/flow-go/utils/unittest" +) + +func TestInitDataAvailable(t *testing.T) { + rootBlockHeight := uint64(1) + // test data available on init + registerID := unittest.RegisterIDFixture() + invalidRegisterID := flow.RegisterID{ + Owner: "ha", + Key: "ha", + } + registerValue1 := []byte("response1") + registerValue2 := []byte("response2") + firstHeight := rootBlockHeight + latestHeight := rootBlockHeight + 1 + + t.Parallel() + + t.Run("registersDB bootstrapped correct values returned", func(t *testing.T) { + registersAsync := NewRegistersAsyncStore() + registers := storagemock.NewRegisterIndex(t) + registers.On("Get", registerID, firstHeight).Return(registerValue1, nil) + registers.On("Get", registerID, latestHeight).Return(registerValue2, nil) + registers.On("FirstHeight").Return(firstHeight) + registers.On("LatestHeight").Return(latestHeight) + + require.NoError(t, registersAsync.InitDataAvailable(registers)) + val1, err := registersAsync.RegisterValues([]flow.RegisterID{registerID}, firstHeight) + require.NoError(t, err) + require.Equal(t, val1[0], registerValue1) + + val2, err := registersAsync.RegisterValues([]flow.RegisterID{registerID}, latestHeight) + require.NoError(t, err) + require.Equal(t, val2[0], registerValue2) + }) + + t.Run("out of bounds height correct error returned", func(t *testing.T) { + registersAsync := NewRegistersAsyncStore() + registers := storagemock.NewRegisterIndex(t) + registers.On("LatestHeight").Return(latestHeight) + + require.NoError(t, registersAsync.InitDataAvailable(registers)) + _, err := registersAsync.RegisterValues([]flow.RegisterID{registerID}, latestHeight+1) + require.ErrorIs(t, err, storage.ErrHeightNotIndexed) + }) + + t.Run("no register value available correct error returned", func(t *testing.T) { + registersAsync := NewRegistersAsyncStore() + registers := storagemock.NewRegisterIndex(t) + registers.On("Get", invalidRegisterID, latestHeight).Return(nil, storage.ErrNotFound) + registers.On("FirstHeight").Return(firstHeight) + registers.On("LatestHeight").Return(latestHeight) + + require.NoError(t, registersAsync.InitDataAvailable(registers)) + _, err := registersAsync.RegisterValues([]flow.RegisterID{invalidRegisterID}, latestHeight) + require.ErrorIs(t, err, storage.ErrNotFound) + }) +} + +func TestRegisterValuesDataUnAvailable(t *testing.T) { + rootBlockHeight := uint64(1) + registersAsync := NewRegistersAsyncStore() + // registerDB not bootstrapped, correct error returned + registerID := unittest.RegisterIDFixture() + _, err := registersAsync.RegisterValues([]flow.RegisterID{registerID}, rootBlockHeight) + require.ErrorIs(t, err, storage.ErrHeightNotIndexed) +} + +func TestInitDataRepeatedCalls(t *testing.T) { + registersAsync := NewRegistersAsyncStore() + registers1 := storagemock.NewRegisterIndex(t) + registers2 := storagemock.NewRegisterIndex(t) + + require.NoError(t, registersAsync.InitDataAvailable(registers1)) + require.Error(t, registersAsync.InitDataAvailable(registers2)) +} diff --git a/module/execution/scripts.go b/module/execution/scripts.go index 35680b1ca84..471fee0c8a4 100644 --- a/module/execution/scripts.go +++ b/module/execution/scripts.go @@ -66,6 +66,7 @@ func NewScripts( entropy query.EntropyProviderPerBlock, header storage.Headers, registerAtHeight RegisterAtHeight, + queryConf query.QueryConfig, ) (*Scripts, error) { vm := fvm.NewVirtualMachine() @@ -80,7 +81,7 @@ func NewScripts( } queryExecutor := query.NewQueryExecutor( - query.NewDefaultConfig(), + queryConf, log, metrics, vm, diff --git a/module/execution/scripts_test.go b/module/execution/scripts_test.go index 97a63a20d1b..b73b905eb8f 100644 --- a/module/execution/scripts_test.go +++ b/module/execution/scripts_test.go @@ -15,6 +15,7 @@ import ( mocks "github.com/stretchr/testify/mock" "github.com/stretchr/testify/suite" + "github.com/onflow/flow-go/engine/execution/computation/query" "github.com/onflow/flow-go/engine/execution/computation/query/mock" "github.com/onflow/flow-go/engine/execution/testutil" "github.com/onflow/flow-go/fvm" @@ -165,6 +166,7 @@ func (s *scriptTestSuite) SetupTest() { entropyBlock, headers, index.RegisterValue, + query.NewDefaultConfig(), ) s.Require().NoError(err) s.scripts = scripts diff --git a/module/finalizedreader/finalizedreader.go b/module/finalizedreader/finalizedreader.go new file mode 100644 index 00000000000..01b6e4ec5ce --- /dev/null +++ b/module/finalizedreader/finalizedreader.go @@ -0,0 +1,67 @@ +package finalizedreader + +import ( + "fmt" + + "go.uber.org/atomic" + + "github.com/onflow/flow-go/model/flow" + "github.com/onflow/flow-go/state/protocol" + "github.com/onflow/flow-go/storage" +) + +type FinalizedReader struct { + lastHeight *atomic.Uint64 + headers storage.Headers +} + +var _ protocol.Consumer = (*FinalizedReader)(nil) + +func NewFinalizedReader(headers storage.Headers, lastHeight uint64) *FinalizedReader { + return &FinalizedReader{ + lastHeight: atomic.NewUint64(lastHeight), + headers: headers, + } +} + +// FinalizedBlockIDAtHeight returns the block ID of the finalized block at the given height. +// It return storage.NotFound if the given height has not been finalized yet +// any other error returned are exceptions +func (r *FinalizedReader) FinalizedBlockIDAtHeight(height uint64) (flow.Identifier, error) { + if height > r.lastHeight.Load() { + return flow.ZeroID, fmt.Errorf("height not finalized (%v): %w", height, storage.ErrNotFound) + } + + header, err := r.headers.ByHeight(height) + if err != nil { + return flow.ZeroID, err + } + + return header.ID(), nil +} + +// BlockFinalized implements the protocol.Consumer interface, which allows FinalizedReader +// to consume finalized blocks from the protocol +func (r *FinalizedReader) BlockFinalized(h *flow.Header) { + r.lastHeight.Store(h.Height) +} + +func (r *FinalizedReader) BlockProcessable(h *flow.Header, qc *flow.QuorumCertificate) { + // noop +} + +func (r *FinalizedReader) EpochTransition(newEpochCounter uint64, first *flow.Header) { + // noop +} + +func (r *FinalizedReader) EpochSetupPhaseStarted(currentEpochCounter uint64, first *flow.Header) { + // noop +} + +func (r *FinalizedReader) EpochCommittedPhaseStarted(currentEpochCounter uint64, first *flow.Header) { + // noop +} + +func (r *FinalizedReader) EpochEmergencyFallbackTriggered() { + // noop +} diff --git a/module/finalizedreader/finalizedreader_test.go b/module/finalizedreader/finalizedreader_test.go new file mode 100644 index 00000000000..e9a97133dc5 --- /dev/null +++ b/module/finalizedreader/finalizedreader_test.go @@ -0,0 +1,59 @@ +package finalizedreader + +import ( + "errors" + "testing" + + "github.com/dgraph-io/badger/v2" + "github.com/stretchr/testify/require" + + "github.com/onflow/flow-go/module/metrics" + "github.com/onflow/flow-go/storage" + "github.com/onflow/flow-go/storage/badger/operation" + "github.com/onflow/flow-go/utils/unittest" + + badgerstorage "github.com/onflow/flow-go/storage/badger" +) + +func TestFinalizedReader(t *testing.T) { + unittest.RunWithBadgerDB(t, func(db *badger.DB) { + // prepare the storage.Headers instance + metrics := metrics.NewNoopCollector() + headers := badgerstorage.NewHeaders(metrics, db) + block := unittest.BlockFixture() + + // store header + err := headers.Store(block.Header) + require.NoError(t, err) + + // index the header + err = db.Update(operation.IndexBlockHeight(block.Header.Height, block.ID())) + require.NoError(t, err) + + // verify is able to reader the finalized block ID + reader := NewFinalizedReader(headers, block.Header.Height) + finalized, err := reader.FinalizedBlockIDAtHeight(block.Header.Height) + require.NoError(t, err) + require.Equal(t, block.ID(), finalized) + + // verify is able to return storage.NotFound when the height is not finalized + _, err = reader.FinalizedBlockIDAtHeight(block.Header.Height + 1) + require.Error(t, err) + require.True(t, errors.Is(err, storage.ErrNotFound), err) + + // finalize one more block + block2 := unittest.BlockWithParentFixture(block.Header) + require.NoError(t, headers.Store(block2.Header)) + err = db.Update(operation.IndexBlockHeight(block2.Header.Height, block2.ID())) + require.NoError(t, err) + reader.BlockFinalized(block2.Header) + + // should be able to retrieve the block + finalized, err = reader.FinalizedBlockIDAtHeight(block2.Header.Height) + require.NoError(t, err) + require.Equal(t, block2.ID(), finalized) + + // should noop and no panic + reader.BlockProcessable(block.Header, block2.Header.QuorumCertificate()) + }) +} diff --git a/module/grpcserver/server.go b/module/grpcserver/server.go index 7ca1c478688..231fa1dd6dc 100644 --- a/module/grpcserver/server.go +++ b/module/grpcserver/server.go @@ -4,6 +4,8 @@ import ( "net" "sync" + "go.uber.org/atomic" + "github.com/rs/zerolog" "google.golang.org/grpc" @@ -20,8 +22,9 @@ import ( // into different engines making it possible to use single grpc server for multiple services which live in different modules. type GrpcServer struct { component.Component - log zerolog.Logger - Server *grpc.Server + log zerolog.Logger + Server *grpc.Server + grpcSignalerCtx *atomic.Pointer[irrecoverable.SignalerContext] grpcListenAddr string // the GRPC server address as ip:port @@ -35,11 +38,13 @@ var _ component.Component = (*GrpcServer)(nil) func NewGrpcServer(log zerolog.Logger, grpcListenAddr string, grpcServer *grpc.Server, + grpcSignalerCtx *atomic.Pointer[irrecoverable.SignalerContext], ) *GrpcServer { server := &GrpcServer{ - log: log, - Server: grpcServer, - grpcListenAddr: grpcListenAddr, + log: log, + Server: grpcServer, + grpcListenAddr: grpcListenAddr, + grpcSignalerCtx: grpcSignalerCtx, } server.Component = component.NewComponentManagerBuilder(). AddWorker(server.serveGRPCWorker). @@ -54,6 +59,8 @@ func (g *GrpcServer) serveGRPCWorker(ctx irrecoverable.SignalerContext, ready co g.log = g.log.With().Str("grpc_address", g.grpcListenAddr).Logger() g.log.Info().Msg("starting grpc server on address") + g.grpcSignalerCtx.Store(&ctx) + l, err := net.Listen("tcp", g.grpcListenAddr) if err != nil { g.log.Err(err).Msg("failed to start the grpc server") diff --git a/module/grpcserver/server_builder.go b/module/grpcserver/server_builder.go index d42196cdf12..a42cdc0e269 100644 --- a/module/grpcserver/server_builder.go +++ b/module/grpcserver/server_builder.go @@ -1,14 +1,16 @@ package grpcserver import ( - "github.com/rs/zerolog" + "context" + grpc_prometheus "github.com/grpc-ecosystem/go-grpc-prometheus" + "github.com/rs/zerolog" + "go.uber.org/atomic" "google.golang.org/grpc" "google.golang.org/grpc/credentials" - grpc_prometheus "github.com/grpc-ecosystem/go-grpc-prometheus" - "github.com/onflow/flow-go/engine/common/rpc" + "github.com/onflow/flow-go/module/irrecoverable" ) type Option func(*GrpcServerBuilder) @@ -33,12 +35,26 @@ type GrpcServerBuilder struct { log zerolog.Logger gRPCListenAddr string server *grpc.Server + signalerCtx *atomic.Pointer[irrecoverable.SignalerContext] transportCredentials credentials.TransportCredentials // the GRPC credentials stateStreamInterceptorEnable bool } -// NewGrpcServerBuilder helps to build a new grpc server. +// NewGrpcServerBuilder creates a new builder for configuring and initializing a gRPC server. +// +// The builder is configured with the provided parameters such as logger, gRPC server address, maximum message size, +// API rate limits, and additional options. The builder also sets up the necessary interceptors, including handling +// irrecoverable errors using the irrecoverable.SignalerContext. The gRPC server can be configured with options such +// as maximum message sizes and interceptors for handling RPC calls. +// +// If RPC metrics are enabled, the builder adds the gRPC Prometheus interceptor for collecting metrics. Additionally, +// it can enable a state stream interceptor based on the configuration. Rate limiting interceptors can be added based +// on specified API rate limits. Logging and custom interceptors are applied, and the final gRPC server is returned. +// +// If transport credentials are provided, a secure gRPC server is created; otherwise, an unsecured server is initialized. +// +// Note: The gRPC server is created with the specified options and is ready for further configuration or starting. func NewGrpcServerBuilder(log zerolog.Logger, gRPCListenAddr string, maxMsgSize uint, @@ -57,12 +73,36 @@ func NewGrpcServerBuilder(log zerolog.Logger, applyOption(grpcServerBuilder) } + // we use an atomic pointer to setup an interceptor for handling irrecoverable errors, the necessity of this approach + // is dictated by complex startup order of grpc server and other services. At the point where we need to register + // an interceptor we don't have an `irrecoverable.SignalerContext`, it becomes available only when we start + // the server but at that point we can't register interceptors anymore, so we inject it using this approach. + signalerCtx := atomic.NewPointer[irrecoverable.SignalerContext](nil) + // create a GRPC server to serve GRPC clients grpcOpts := []grpc.ServerOption{ grpc.MaxRecvMsgSize(int(maxMsgSize)), grpc.MaxSendMsgSize(int(maxMsgSize)), } var interceptors []grpc.UnaryServerInterceptor // ordered list of interceptors + // This interceptor is responsible for ensuring that irrecoverable errors are properly propagated using + // the irrecoverable.SignalerContext. It replaces the original gRPC context with a new one that includes + // the irrecoverable.SignalerContextKey if available, allowing the server to handle error conditions indicating + // an inconsistent or corrupted node state. If no irrecoverable.SignalerContext is present, the original context + // is used to process the gRPC request. + // + // The interceptor follows the grpc.UnaryServerInterceptor signature, where it takes the incoming gRPC context, + // request, unary server information, and handler function. It returns the response and error after handling + // the request. This mechanism ensures consistent error handling for gRPC requests across the server. + interceptors = append(interceptors, func(ctx context.Context, req any, _ *grpc.UnaryServerInfo, handler grpc.UnaryHandler) (resp any, err error) { + if sigCtx := signalerCtx.Load(); sigCtx != nil { + resp, err = handler(irrecoverable.WithSignalerContext(ctx, *sigCtx), req) + } else { + resp, err = handler(ctx, req) + } + return + }) + // if rpc metrics is enabled, first create the grpc metrics interceptor if rpcMetricsEnabled { interceptors = append(interceptors, grpc_prometheus.UnaryServerInterceptor) @@ -98,10 +138,11 @@ func NewGrpcServerBuilder(log zerolog.Logger, } grpcServerBuilder.log = log grpcServerBuilder.server = grpc.NewServer(grpcOpts...) + grpcServerBuilder.signalerCtx = signalerCtx return grpcServerBuilder } func (b *GrpcServerBuilder) Build() *GrpcServer { - return NewGrpcServer(b.log, b.gRPCListenAddr, b.server) + return NewGrpcServer(b.log, b.gRPCListenAddr, b.server, b.signalerCtx) } diff --git a/module/irrecoverable/irrecoverable.go b/module/irrecoverable/irrecoverable.go index 1ef79f5f4ab..0877732bcd8 100644 --- a/module/irrecoverable/irrecoverable.go +++ b/module/irrecoverable/irrecoverable.go @@ -48,6 +48,9 @@ type SignalerContext interface { sealed() // private, to constrain builder to using WithSignaler } +// SignalerContextKey represents the key type for retrieving a SignalerContext from a value `context.Context`. +type SignalerContextKey struct{} + // private, to force context derivation / WithSignaler type signalerCtx struct { context.Context @@ -62,6 +65,11 @@ func WithSignaler(parent context.Context) (SignalerContext, <-chan error) { return &signalerCtx{parent, sig}, errChan } +// WithSignalerContext wraps `SignalerContext` using `context.WithValue` so it can later be used with `Throw`. +func WithSignalerContext(parent context.Context, ctx SignalerContext) context.Context { + return context.WithValue(parent, SignalerContextKey{}, ctx) +} + // Throw enables throwing an irrecoverable error using any context.Context. // // If we have an SignalerContext, we can directly ctx.Throw. @@ -72,12 +80,13 @@ func WithSignaler(parent context.Context) (SignalerContext, <-chan error) { // Throw can be a drop-in replacement anywhere we have a context.Context likely // to support Irrecoverables. Note: this is not a method func Throw(ctx context.Context, err error) { - signalerAbleContext, ok := ctx.(SignalerContext) + signalerAbleContext, ok := ctx.Value(SignalerContextKey{}).(SignalerContext) if ok { signalerAbleContext.Throw(err) + } else { + // Be spectacular on how this does not -but should- handle irrecoverables: + log.Fatalf("irrecoverable error signaler not found for context, please implement! Unhandled irrecoverable error: %v", err) } - // Be spectacular on how this does not -but should- handle irrecoverables: - log.Fatalf("irrecoverable error signaler not found for context, please implement! Unhandled irrecoverable error %v", err) } // WithSignallerAndCancel returns an irrecoverable context, the cancel diff --git a/module/irrecoverable/unittest.go b/module/irrecoverable/unittest.go index 814eaba53a4..8f59cc27960 100644 --- a/module/irrecoverable/unittest.go +++ b/module/irrecoverable/unittest.go @@ -4,14 +4,16 @@ import ( "context" "testing" - "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/mock" + "github.com/stretchr/testify/require" ) -// MockSignalerContext is a SignalerContext which will immediately fail a test if an error is thrown. +// MockSignalerContext is a SignalerContext that can be used in tests to assert that an error is thrown. +// It embeds a mock.Mock, so it can be used it to assert that Throw is called with a specific error. +// Use NewMockSignalerContextExpectError to create a new MockSignalerContext that expects a specific error, otherwise NewMockSignalerContext. type MockSignalerContext struct { context.Context - t *testing.T - expectError error + *mock.Mock } var _ SignalerContext = &MockSignalerContext{} @@ -19,29 +21,32 @@ var _ SignalerContext = &MockSignalerContext{} func (m MockSignalerContext) sealed() {} func (m MockSignalerContext) Throw(err error) { - if m.expectError != nil { - assert.EqualError(m.t, err, m.expectError.Error()) - return - } - m.t.Fatalf("mock signaler context received error: %v", err) + m.Called(err) } func NewMockSignalerContext(t *testing.T, ctx context.Context) *MockSignalerContext { - return &MockSignalerContext{ + m := &MockSignalerContext{ Context: ctx, - t: t, + Mock: &mock.Mock{}, } + m.Mock.Test(t) + t.Cleanup(func() { m.AssertExpectations(t) }) + return m } +// NewMockSignalerContextWithCancel creates a new MockSignalerContext with a cancel function. func NewMockSignalerContextWithCancel(t *testing.T, parent context.Context) (*MockSignalerContext, context.CancelFunc) { ctx, cancel := context.WithCancel(parent) return NewMockSignalerContext(t, ctx), cancel } +// NewMockSignalerContextExpectError creates a new MockSignalerContext which expects a specific error to be thrown. func NewMockSignalerContextExpectError(t *testing.T, ctx context.Context, err error) *MockSignalerContext { - return &MockSignalerContext{ - Context: ctx, - t: t, - expectError: err, - } + require.NotNil(t, err) + m := NewMockSignalerContext(t, ctx) + + // since we expect an error, we should expect a call to Throw + m.On("Throw", err).Once().Return() + + return m } diff --git a/module/mempool/queue/internal/messageEntity.go b/module/mempool/queue/internal/messageEntity.go index 6174f1e0a12..295da05da49 100644 --- a/module/mempool/queue/internal/messageEntity.go +++ b/module/mempool/queue/internal/messageEntity.go @@ -14,9 +14,10 @@ type MessageEntity struct { var _ flow.Entity = (*MessageEntity)(nil) func NewMessageEntity(msg *engine.Message) MessageEntity { + id := identifierOfMessage(msg) return MessageEntity{ Msg: *msg, - id: identifierOfMessage(msg), + id: id, } } diff --git a/module/mempool/queue/internal/rpcInspectionRequest_test.go b/module/mempool/queue/internal/rpcInspectionRequest_test.go new file mode 100644 index 00000000000..39060a5a73f --- /dev/null +++ b/module/mempool/queue/internal/rpcInspectionRequest_test.go @@ -0,0 +1,57 @@ +package internal_test + +import ( + "testing" + + pubsub "github.com/libp2p/go-libp2p-pubsub" + "github.com/stretchr/testify/require" + + "github.com/onflow/flow-go/engine" + "github.com/onflow/flow-go/module/mempool/queue/internal" + "github.com/onflow/flow-go/network/p2p/inspector/validation" + p2ptest "github.com/onflow/flow-go/network/p2p/test" + "github.com/onflow/flow-go/utils/unittest" +) + +// TestMessageEntity_InspectRPCRequest_ID tests that the ID of a MessageEntity created from an InspectRPCRequest is +// only dependent of the Nonce and PeerID fields of the InspectRPCRequest; and is independent of the RPC field. +// Unique identifier for the HeroCache is imperative to prevent false-positive de-duplication. +// However, the RPC field contains a bulk of the data in the InspectRPCRequest, and including it in the ID would +// cause the InspectRPCRequest store and retrieval to be resource intensive. +func TestMessageEntity_InspectRPCRequest_ID(t *testing.T) { + rpcs := p2ptest.GossipSubRpcFixtures(t, 2) + rpc1 := rpcs[0] + rpc2 := rpcs[1] + peerId1 := unittest.PeerIdFixture(t) + + // creates two InspectRPCRequest structs with the same Nonce and PeerID fields + req1, err := validation.NewInspectRPCRequest(peerId1, &pubsub.RPC{ + RPC: *rpc1, + }) + require.NoError(t, err) + + req2, err := validation.NewInspectRPCRequest(peerId1, &pubsub.RPC{ + RPC: *rpc1, + }) + require.NoError(t, err) + // Set the Nonce field of the second InspectRPCRequest struct to the Nonce field of the first + req2.Nonce = req1.Nonce + + // creates a third InspectRPCRequest struct with the same Nonce and PeerID fields as the first two + // but with a different RPC field + req3, err := validation.NewInspectRPCRequest(peerId1, &pubsub.RPC{ + RPC: *rpc2, + }) + require.NoError(t, err) + req3.Nonce = req1.Nonce + + // now convert to MessageEntity + entity1 := internal.NewMessageEntity(&engine.Message{Payload: req1}) + entity2 := internal.NewMessageEntity(&engine.Message{Payload: req2}) + entity3 := internal.NewMessageEntity(&engine.Message{Payload: req3}) + + // as the Nonce and PeerID fields are the same, the ID of the MessageEntity should be the same accross all three + // in other words, the RPC field should not affect the ID + require.Equal(t, entity1.ID(), entity2.ID()) + require.Equal(t, entity1.ID(), entity3.ID()) +} diff --git a/module/metrics/example/README.md b/module/metrics/example/README.md index f693cac0780..ec319414ad8 100644 --- a/module/metrics/example/README.md +++ b/module/metrics/example/README.md @@ -18,7 +18,7 @@ You can choose one of the following: Note: Running example with `-happypath` flag examines the metrics collection on a real happy path of verification node. ``` - go run --tags=relic module/metrics/example/verification/main.go + go run module/metrics/example/verification/main.go ``` - Consensus Node: ``` diff --git a/module/metrics/execution.go b/module/metrics/execution.go index f17692e3859..94c3e70e107 100644 --- a/module/metrics/execution.go +++ b/module/metrics/execution.go @@ -11,77 +11,78 @@ import ( ) type ExecutionCollector struct { - tracer module.Tracer - totalExecutedBlocksCounter prometheus.Counter - totalExecutedCollectionsCounter prometheus.Counter - totalExecutedTransactionsCounter prometheus.Counter - totalExecutedScriptsCounter prometheus.Counter - totalFailedTransactionsCounter prometheus.Counter - lastExecutedBlockHeightGauge prometheus.Gauge - stateStorageDiskTotal prometheus.Gauge - storageStateCommitment prometheus.Gauge - forestApproxMemorySize prometheus.Gauge - forestNumberOfTrees prometheus.Gauge - latestTrieRegCount prometheus.Gauge - latestTrieRegCountDiff prometheus.Gauge - latestTrieRegSize prometheus.Gauge - latestTrieRegSizeDiff prometheus.Gauge - latestTrieMaxDepthTouched prometheus.Gauge - updated prometheus.Counter - proofSize prometheus.Gauge - updatedValuesNumber prometheus.Counter - updatedValuesSize prometheus.Gauge - updatedDuration prometheus.Histogram - updatedDurationPerValue prometheus.Histogram - readValuesNumber prometheus.Counter - readValuesSize prometheus.Gauge - readDuration prometheus.Histogram - readDurationPerValue prometheus.Histogram - blockComputationUsed prometheus.Histogram - blockComputationVector *prometheus.GaugeVec - blockCachedPrograms prometheus.Gauge - blockMemoryUsed prometheus.Histogram - blockEventCounts prometheus.Histogram - blockEventSize prometheus.Histogram - blockExecutionTime prometheus.Histogram - blockTransactionCounts prometheus.Histogram - blockCollectionCounts prometheus.Histogram - collectionComputationUsed prometheus.Histogram - collectionMemoryUsed prometheus.Histogram - collectionEventSize prometheus.Histogram - collectionEventCounts prometheus.Histogram - collectionNumberOfRegistersTouched prometheus.Histogram - collectionTotalBytesWrittenToRegisters prometheus.Histogram - collectionExecutionTime prometheus.Histogram - collectionTransactionCounts prometheus.Histogram - collectionRequestSent prometheus.Counter - collectionRequestRetried prometheus.Counter - transactionParseTime prometheus.Histogram - transactionCheckTime prometheus.Histogram - transactionInterpretTime prometheus.Histogram - transactionExecutionTime prometheus.Histogram - transactionConflictRetries prometheus.Histogram - transactionMemoryEstimate prometheus.Histogram - transactionComputationUsed prometheus.Histogram - transactionEmittedEvents prometheus.Histogram - transactionEventSize prometheus.Histogram - scriptExecutionTime prometheus.Histogram - scriptComputationUsed prometheus.Histogram - scriptMemoryUsage prometheus.Histogram - scriptMemoryEstimate prometheus.Histogram - scriptMemoryDifference prometheus.Histogram - numberOfAccounts prometheus.Gauge - programsCacheMiss prometheus.Counter - programsCacheHit prometheus.Counter - chunkDataPackRequestProcessedTotal prometheus.Counter - chunkDataPackProofSize prometheus.Histogram - chunkDataPackCollectionSize prometheus.Histogram - stateSyncActive prometheus.Gauge - blockDataUploadsInProgress prometheus.Gauge - blockDataUploadsDuration prometheus.Histogram - maxCollectionHeight prometheus.Gauge - computationResultUploadedCount prometheus.Counter - computationResultUploadRetriedCount prometheus.Counter + tracer module.Tracer + totalExecutedBlocksCounter prometheus.Counter + totalExecutedCollectionsCounter prometheus.Counter + totalExecutedTransactionsCounter prometheus.Counter + totalExecutedScriptsCounter prometheus.Counter + totalFailedTransactionsCounter prometheus.Counter + lastExecutedBlockHeightGauge prometheus.Gauge + stateStorageDiskTotal prometheus.Gauge + storageStateCommitment prometheus.Gauge + forestApproxMemorySize prometheus.Gauge + forestNumberOfTrees prometheus.Gauge + latestTrieRegCount prometheus.Gauge + latestTrieRegCountDiff prometheus.Gauge + latestTrieRegSize prometheus.Gauge + latestTrieRegSizeDiff prometheus.Gauge + latestTrieMaxDepthTouched prometheus.Gauge + updated prometheus.Counter + proofSize prometheus.Gauge + updatedValuesNumber prometheus.Counter + updatedValuesSize prometheus.Gauge + updatedDuration prometheus.Histogram + updatedDurationPerValue prometheus.Histogram + readValuesNumber prometheus.Counter + readValuesSize prometheus.Gauge + readDuration prometheus.Histogram + readDurationPerValue prometheus.Histogram + blockComputationUsed prometheus.Histogram + blockComputationVector *prometheus.GaugeVec + blockCachedPrograms prometheus.Gauge + blockMemoryUsed prometheus.Histogram + blockEventCounts prometheus.Histogram + blockEventSize prometheus.Histogram + blockExecutionTime prometheus.Histogram + blockTransactionCounts prometheus.Histogram + blockCollectionCounts prometheus.Histogram + collectionComputationUsed prometheus.Histogram + collectionMemoryUsed prometheus.Histogram + collectionEventSize prometheus.Histogram + collectionEventCounts prometheus.Histogram + collectionNumberOfRegistersTouched prometheus.Histogram + collectionTotalBytesWrittenToRegisters prometheus.Histogram + collectionExecutionTime prometheus.Histogram + collectionTransactionCounts prometheus.Histogram + collectionRequestSent prometheus.Counter + collectionRequestRetried prometheus.Counter + transactionParseTime prometheus.Histogram + transactionCheckTime prometheus.Histogram + transactionInterpretTime prometheus.Histogram + transactionExecutionTime prometheus.Histogram + transactionConflictRetries prometheus.Histogram + transactionMemoryEstimate prometheus.Histogram + transactionComputationUsed prometheus.Histogram + transactionNormalizedTimePerComputation prometheus.Histogram + transactionEmittedEvents prometheus.Histogram + transactionEventSize prometheus.Histogram + scriptExecutionTime prometheus.Histogram + scriptComputationUsed prometheus.Histogram + scriptMemoryUsage prometheus.Histogram + scriptMemoryEstimate prometheus.Histogram + scriptMemoryDifference prometheus.Histogram + numberOfAccounts prometheus.Gauge + programsCacheMiss prometheus.Counter + programsCacheHit prometheus.Counter + chunkDataPackRequestProcessedTotal prometheus.Counter + chunkDataPackProofSize prometheus.Histogram + chunkDataPackCollectionSize prometheus.Histogram + stateSyncActive prometheus.Gauge + blockDataUploadsInProgress prometheus.Gauge + blockDataUploadsDuration prometheus.Histogram + maxCollectionHeight prometheus.Gauge + computationResultUploadedCount prometheus.Counter + computationResultUploadRetriedCount prometheus.Counter } func NewExecutionCollector(tracer module.Tracer) *ExecutionCollector { @@ -405,6 +406,14 @@ func NewExecutionCollector(tracer module.Tracer) *ExecutionCollector { Buckets: []float64{50, 100, 500, 1000, 5000, 10000}, }) + transactionNormalizedTimePerComputation := promauto.NewHistogram(prometheus.HistogramOpts{ + Namespace: namespaceExecution, + Subsystem: subsystemRuntime, + Name: "transaction_ms_per_computation", + Help: "The normalized ratio of millisecond of execution time per computation used. Value below 1 means the transaction was executed faster than estimated (is using less resources then estimated)", + Buckets: []float64{0.015625, 0.03125, 0.0625, 0.125, 0.25, 0.5, 1, 2, 4, 8, 16, 32, 64}, + }) + transactionMemoryEstimate := promauto.NewHistogram(prometheus.HistogramOpts{ Namespace: namespaceExecution, Subsystem: subsystemRuntime, @@ -524,63 +533,64 @@ func NewExecutionCollector(tracer module.Tracer) *ExecutionCollector { ec := &ExecutionCollector{ tracer: tracer, - forestApproxMemorySize: forestApproxMemorySize, - forestNumberOfTrees: forestNumberOfTrees, - latestTrieRegCount: latestTrieRegCount, - latestTrieRegCountDiff: latestTrieRegCountDiff, - latestTrieRegSize: latestTrieRegSize, - latestTrieRegSizeDiff: latestTrieRegSizeDiff, - latestTrieMaxDepthTouched: latestTrieMaxDepthTouched, - updated: updatedCount, - proofSize: proofSize, - updatedValuesNumber: updatedValuesNumber, - updatedValuesSize: updatedValuesSize, - updatedDuration: updatedDuration, - updatedDurationPerValue: updatedDurationPerValue, - readValuesNumber: readValuesNumber, - readValuesSize: readValuesSize, - readDuration: readDuration, - readDurationPerValue: readDurationPerValue, - blockExecutionTime: blockExecutionTime, - blockComputationUsed: blockComputationUsed, - blockComputationVector: blockComputationVector, - blockCachedPrograms: blockCachedPrograms, - blockMemoryUsed: blockMemoryUsed, - blockEventCounts: blockEventCounts, - blockEventSize: blockEventSize, - blockTransactionCounts: blockTransactionCounts, - blockCollectionCounts: blockCollectionCounts, - collectionExecutionTime: collectionExecutionTime, - collectionComputationUsed: collectionComputationUsed, - collectionMemoryUsed: collectionMemoryUsed, - collectionEventSize: collectionEventSize, - collectionEventCounts: collectionEventCounts, - collectionNumberOfRegistersTouched: collectionNumberOfRegistersTouched, - collectionTotalBytesWrittenToRegisters: collectionTotalBytesWrittenToRegisters, - collectionTransactionCounts: collectionTransactionCounts, - collectionRequestSent: collectionRequestsSent, - collectionRequestRetried: collectionRequestsRetries, - transactionParseTime: transactionParseTime, - transactionCheckTime: transactionCheckTime, - transactionInterpretTime: transactionInterpretTime, - transactionExecutionTime: transactionExecutionTime, - transactionConflictRetries: transactionConflictRetries, - transactionComputationUsed: transactionComputationUsed, - transactionMemoryEstimate: transactionMemoryEstimate, - transactionEmittedEvents: transactionEmittedEvents, - transactionEventSize: transactionEventSize, - scriptExecutionTime: scriptExecutionTime, - scriptComputationUsed: scriptComputationUsed, - scriptMemoryUsage: scriptMemoryUsage, - scriptMemoryEstimate: scriptMemoryEstimate, - scriptMemoryDifference: scriptMemoryDifference, - chunkDataPackRequestProcessedTotal: chunkDataPackRequestProcessedTotal, - chunkDataPackProofSize: chunkDataPackProofSize, - chunkDataPackCollectionSize: chunkDataPackCollectionSize, - blockDataUploadsInProgress: blockDataUploadsInProgress, - blockDataUploadsDuration: blockDataUploadsDuration, - computationResultUploadedCount: computationResultUploadedCount, - computationResultUploadRetriedCount: computationResultUploadRetriedCount, + forestApproxMemorySize: forestApproxMemorySize, + forestNumberOfTrees: forestNumberOfTrees, + latestTrieRegCount: latestTrieRegCount, + latestTrieRegCountDiff: latestTrieRegCountDiff, + latestTrieRegSize: latestTrieRegSize, + latestTrieRegSizeDiff: latestTrieRegSizeDiff, + latestTrieMaxDepthTouched: latestTrieMaxDepthTouched, + updated: updatedCount, + proofSize: proofSize, + updatedValuesNumber: updatedValuesNumber, + updatedValuesSize: updatedValuesSize, + updatedDuration: updatedDuration, + updatedDurationPerValue: updatedDurationPerValue, + readValuesNumber: readValuesNumber, + readValuesSize: readValuesSize, + readDuration: readDuration, + readDurationPerValue: readDurationPerValue, + blockExecutionTime: blockExecutionTime, + blockComputationUsed: blockComputationUsed, + blockComputationVector: blockComputationVector, + blockCachedPrograms: blockCachedPrograms, + blockMemoryUsed: blockMemoryUsed, + blockEventCounts: blockEventCounts, + blockEventSize: blockEventSize, + blockTransactionCounts: blockTransactionCounts, + blockCollectionCounts: blockCollectionCounts, + collectionExecutionTime: collectionExecutionTime, + collectionComputationUsed: collectionComputationUsed, + collectionMemoryUsed: collectionMemoryUsed, + collectionEventSize: collectionEventSize, + collectionEventCounts: collectionEventCounts, + collectionNumberOfRegistersTouched: collectionNumberOfRegistersTouched, + collectionTotalBytesWrittenToRegisters: collectionTotalBytesWrittenToRegisters, + collectionTransactionCounts: collectionTransactionCounts, + collectionRequestSent: collectionRequestsSent, + collectionRequestRetried: collectionRequestsRetries, + transactionParseTime: transactionParseTime, + transactionCheckTime: transactionCheckTime, + transactionInterpretTime: transactionInterpretTime, + transactionExecutionTime: transactionExecutionTime, + transactionConflictRetries: transactionConflictRetries, + transactionComputationUsed: transactionComputationUsed, + transactionNormalizedTimePerComputation: transactionNormalizedTimePerComputation, + transactionMemoryEstimate: transactionMemoryEstimate, + transactionEmittedEvents: transactionEmittedEvents, + transactionEventSize: transactionEventSize, + scriptExecutionTime: scriptExecutionTime, + scriptComputationUsed: scriptComputationUsed, + scriptMemoryUsage: scriptMemoryUsage, + scriptMemoryEstimate: scriptMemoryEstimate, + scriptMemoryDifference: scriptMemoryDifference, + chunkDataPackRequestProcessedTotal: chunkDataPackRequestProcessedTotal, + chunkDataPackProofSize: chunkDataPackProofSize, + chunkDataPackCollectionSize: chunkDataPackCollectionSize, + blockDataUploadsInProgress: blockDataUploadsInProgress, + blockDataUploadsDuration: blockDataUploadsDuration, + computationResultUploadedCount: computationResultUploadedCount, + computationResultUploadRetriedCount: computationResultUploadRetriedCount, totalExecutedBlocksCounter: promauto.NewCounter(prometheus.CounterOpts{ Namespace: namespaceExecution, Subsystem: subsystemRuntime, @@ -739,6 +749,11 @@ func (ec *ExecutionCollector) ExecutionTransactionExecuted( ec.transactionExecutionTime.Observe(float64(dur.Milliseconds())) ec.transactionConflictRetries.Observe(float64(numConflictRetries)) ec.transactionComputationUsed.Observe(float64(compUsed)) + if compUsed > 0 { + // normalize so the value should be around 1 + ec.transactionNormalizedTimePerComputation.Observe( + (float64(dur.Milliseconds()) / float64(compUsed)) * flow.EstimatedComputationPerMillisecond) + } ec.transactionMemoryEstimate.Observe(float64(memoryUsed)) ec.transactionEmittedEvents.Observe(float64(eventCounts)) ec.transactionEventSize.Observe(float64(eventSize)) diff --git a/module/metrics/herocache.go b/module/metrics/herocache.go index 586f6bbda75..59ddb0f2f36 100644 --- a/module/metrics/herocache.go +++ b/module/metrics/herocache.go @@ -72,6 +72,10 @@ func NetworkReceiveCacheMetricsFactory(f HeroCacheMetricsFactory, networkType ne return f(namespaceNetwork, r) } +func NewSubscriptionRecordCacheMetricsFactory(f HeroCacheMetricsFactory) module.HeroCacheMetrics { + return f(namespaceNetwork, ResourceNetworkingSubscriptionRecordsCache) +} + // DisallowListCacheMetricsFactory is the factory method for creating a new HeroCacheCollector for the disallow list cache. // The disallow-list cache is used to keep track of peers that are disallow-listed and the reasons for it. // Args: diff --git a/module/metrics/labels.go b/module/metrics/labels.go index 0390d3f56b6..067e7cb6bc3 100644 --- a/module/metrics/labels.go +++ b/module/metrics/labels.go @@ -60,6 +60,7 @@ const ( ResourceMyReceipt = "my_receipt" ResourceCollection = "collection" ResourceProtocolState = "protocol_state" + ResourceProtocolStateByBlockID = "protocol_state_by_block_id" ResourceApproval = "approval" ResourceSeal = "seal" ResourcePendingIncorporatedSeal = "pending_incorporated_seal" @@ -86,6 +87,7 @@ const ( ResourceEpochCommit = "epoch_commit" ResourceEpochStatus = "epoch_status" ResourceNetworkingReceiveCache = "networking_received_message" // networking layer + ResourceNetworkingSubscriptionRecordsCache = "subscription_records_cache" // networking layer ResourceNetworkingDnsIpCache = "networking_dns_ip_cache" // networking layer ResourceNetworkingDnsTxtCache = "networking_dns_txt_cache" // networking layer ResourceNetworkingDisallowListNotificationQueue = "networking_disallow_list_notification_queue" diff --git a/module/metrics/unicast_manager.go b/module/metrics/unicast_manager.go index e621c44f460..4f1ef04ec52 100644 --- a/module/metrics/unicast_manager.go +++ b/module/metrics/unicast_manager.go @@ -120,6 +120,22 @@ func NewUnicastManagerMetrics(prefix string) *UnicastManagerMetrics { }, ) + uc.streamCreationRetryBudgetResetToDefault = promauto.NewCounter( + prometheus.CounterOpts{ + Namespace: namespaceNetwork, + Subsystem: subsystemGossip, + Name: uc.prefix + "stream_creation_retry_budget_reset_to_default_total", + Help: "the number of times the stream creation retry budget is reset to default by the unicast manager", + }) + + uc.dialRetryBudgetResetToDefault = promauto.NewCounter( + prometheus.CounterOpts{ + Namespace: namespaceNetwork, + Subsystem: subsystemGossip, + Name: uc.prefix + "dial_retry_budget_reset_to_default_total", + Help: "the number of times the dial retry budget is reset to default by the unicast manager", + }) + return uc } diff --git a/module/signature/aggregation.go b/module/signature/aggregation.go index 99129c656dc..76101ee3805 100644 --- a/module/signature/aggregation.go +++ b/module/signature/aggregation.go @@ -1,6 +1,3 @@ -//go:build relic -// +build relic - package signature import ( diff --git a/module/signature/aggregation_no_relic.go b/module/signature/aggregation_no_relic.go deleted file mode 100644 index 6b51c6f35a3..00000000000 --- a/module/signature/aggregation_no_relic.go +++ /dev/null @@ -1,34 +0,0 @@ -//go:build !relic -// +build !relic - -package signature - -import ( - "github.com/onflow/flow-go/crypto" -) - -const panic_relic = "function only supported with the relic build tag" - -// These functions are the non-relic versions of some public functions from the package. -// The functions are here to allow the build of flow-emulator, since the emulator is built -// without the "relic" build tag, and does not run the functions below. -type SignatureAggregatorSameMessage struct{} - -func NewSignatureAggregatorSameMessage( - message []byte, - dsTag string, - publicKeys []crypto.PublicKey, -) (*SignatureAggregatorSameMessage, error) { - panic(panic_relic) -} - -func (s *SignatureAggregatorSameMessage) Verify(signer int, sig crypto.Signature) (bool, error) { - panic(panic_relic) -} -func (s *SignatureAggregatorSameMessage) TrustedAdd(signer int, sig crypto.Signature) error { - panic(panic_relic) -} - -func (s *SignatureAggregatorSameMessage) Aggregate() ([]int, crypto.Signature, error) { - panic(panic_relic) -} diff --git a/module/signature/aggregation_test.go b/module/signature/aggregation_test.go index aebc696b091..87a31561753 100644 --- a/module/signature/aggregation_test.go +++ b/module/signature/aggregation_test.go @@ -1,6 +1,3 @@ -//go:build relic -// +build relic - package signature import ( diff --git a/module/state_synchronization/indexer/indexer_test.go b/module/state_synchronization/indexer/indexer_test.go index 126afb75274..14d959df4ce 100644 --- a/module/state_synchronization/indexer/indexer_test.go +++ b/module/state_synchronization/indexer/indexer_test.go @@ -228,9 +228,10 @@ func TestIndexer_Failure(t *testing.T) { // make sure the error returned is as expected expectedErr := fmt.Errorf( - "failed to index block data at height %d: could not index register payloads at height %d: error persisting data", - test.blocks[lastIndexedIndex].Header.Height+1, + "failed to index block data at height %d: %w", test.blocks[lastIndexedIndex].Header.Height+1, + fmt.Errorf( + "could not index register payloads at height %d: %w", test.blocks[lastIndexedIndex].Header.Height+1, fmt.Errorf("error persisting data")), ) _, cancel := context.WithCancel(context.Background()) diff --git a/module/state_synchronization/mock/index_reporter.go b/module/state_synchronization/mock/index_reporter.go new file mode 100644 index 00000000000..3de0696a8b5 --- /dev/null +++ b/module/state_synchronization/mock/index_reporter.go @@ -0,0 +1,53 @@ +// Code generated by mockery v2.21.4. DO NOT EDIT. + +package state_synchronization + +import mock "github.com/stretchr/testify/mock" + +// IndexReporter is an autogenerated mock type for the IndexReporter type +type IndexReporter struct { + mock.Mock +} + +// HighestIndexedHeight provides a mock function with given fields: +func (_m *IndexReporter) HighestIndexedHeight() uint64 { + ret := _m.Called() + + var r0 uint64 + if rf, ok := ret.Get(0).(func() uint64); ok { + r0 = rf() + } else { + r0 = ret.Get(0).(uint64) + } + + return r0 +} + +// LowestIndexedHeight provides a mock function with given fields: +func (_m *IndexReporter) LowestIndexedHeight() uint64 { + ret := _m.Called() + + var r0 uint64 + if rf, ok := ret.Get(0).(func() uint64); ok { + r0 = rf() + } else { + r0 = ret.Get(0).(uint64) + } + + return r0 +} + +type mockConstructorTestingTNewIndexReporter interface { + mock.TestingT + Cleanup(func()) +} + +// NewIndexReporter creates a new instance of IndexReporter. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations. +func NewIndexReporter(t mockConstructorTestingTNewIndexReporter) *IndexReporter { + mock := &IndexReporter{} + mock.Mock.Test(t) + + t.Cleanup(func() { mock.AssertExpectations(t) }) + + return mock +} diff --git a/module/trace/constants.go b/module/trace/constants.go index 5cda4f10d33..2d333bdb5fc 100644 --- a/module/trace/constants.go +++ b/module/trace/constants.go @@ -167,6 +167,7 @@ const ( FVMEnvGetOrLoadProgram SpanName = "fvm.env.getOrLoadCachedProgram" FVMEnvProgramLog SpanName = "fvm.env.programLog" FVMEnvEmitEvent SpanName = "fvm.env.emitEvent" + FVMEnvEncodeEvent SpanName = "fvm.env.encodeEvent" FVMEnvGenerateUUID SpanName = "fvm.env.generateUUID" FVMEnvGenerateAccountLocalID SpanName = "fvm.env.generateAccountLocalID" FVMEnvDecodeArgument SpanName = "fvm.env.decodeArgument" diff --git a/network/alsp/internal/cache.go b/network/alsp/internal/cache.go index c29ae4bd988..6bc6f361593 100644 --- a/network/alsp/internal/cache.go +++ b/network/alsp/internal/cache.go @@ -81,7 +81,6 @@ func (s *SpamRecordCache) Adjust(originId flow.Identifier, adjustFunc model.Reco penalty, err := s.adjust(originId, adjustFunc) switch { - case err == ErrSpamRecordNotFound: // if the record does not exist, we initialize the record and try to adjust it again. // Note: there is an edge case where the record is initialized by another goroutine between the two calls. diff --git a/network/internal/p2pfixtures/fixtures.go b/network/internal/p2pfixtures/fixtures.go index 77dd8bd9d38..198e420c185 100644 --- a/network/internal/p2pfixtures/fixtures.go +++ b/network/internal/p2pfixtures/fixtures.go @@ -13,7 +13,6 @@ import ( pubsub "github.com/libp2p/go-libp2p-pubsub" "github.com/libp2p/go-libp2p/core/host" "github.com/libp2p/go-libp2p/core/network" - "github.com/libp2p/go-libp2p/core/peer" "github.com/libp2p/go-libp2p/core/peerstore" "github.com/libp2p/go-libp2p/core/routing" "github.com/multiformats/go-multiaddr" @@ -32,7 +31,6 @@ import ( "github.com/onflow/flow-go/network/message" "github.com/onflow/flow-go/network/p2p" p2pdht "github.com/onflow/flow-go/network/p2p/dht" - "github.com/onflow/flow-go/network/p2p/keyutils" "github.com/onflow/flow-go/network/p2p/p2pbuilder" p2pconfig "github.com/onflow/flow-go/network/p2p/p2pbuilder/config" "github.com/onflow/flow-go/network/p2p/tracer" @@ -125,8 +123,9 @@ func CreateNode(t *testing.T, networkKey crypto.PrivateKey, sporkID flow.Identif networkKey, sporkID, idProvider, + defaultFlowConfig.NetworkConfig.GossipSubConfig.GossipSubScoringRegistryConfig, &defaultFlowConfig.NetworkConfig.ResourceManager, - &defaultFlowConfig.NetworkConfig.GossipSubRPCInspectorsConfig, + &defaultFlowConfig.NetworkConfig.GossipSubConfig, p2pconfig.PeerManagerDisableConfig(), &p2p.DisallowListCacheConfig{ MaxSize: uint32(1000), @@ -153,36 +152,6 @@ func CreateNode(t *testing.T, networkKey crypto.PrivateKey, sporkID flow.Identif return libp2pNode } -// PeerIdFixture creates a random and unique peer ID (libp2p node ID). -func PeerIdFixture(t *testing.T) peer.ID { - key, err := generateNetworkingKey(unittest.IdentifierFixture()) - require.NoError(t, err) - - pubKey, err := keyutils.LibP2PPublicKeyFromFlow(key.PublicKey()) - require.NoError(t, err) - - peerID, err := peer.IDFromPublicKey(pubKey) - require.NoError(t, err) - - return peerID -} - -// generateNetworkingKey generates a Flow ECDSA key using the given seed -func generateNetworkingKey(s flow.Identifier) (crypto.PrivateKey, error) { - seed := make([]byte, crypto.KeyGenSeedMinLen) - copy(seed, s[:]) - return crypto.GeneratePrivateKey(crypto.ECDSASecp256k1, seed) -} - -// PeerIdsFixture creates random and unique peer IDs (libp2p node IDs). -func PeerIdsFixture(t *testing.T, n int) []peer.ID { - peerIDs := make([]peer.ID, n) - for i := 0; i < n; i++ { - peerIDs[i] = PeerIdFixture(t) - } - return peerIDs -} - // SubMustNeverReceiveAnyMessage checks that the subscription never receives any message within the given timeout by the context. func SubMustNeverReceiveAnyMessage(t *testing.T, ctx context.Context, sub p2p.Subscription) { timeouted := make(chan struct{}) diff --git a/network/netconf/config.go b/network/netconf/config.go index 2eeccdce256..b9df868d281 100644 --- a/network/netconf/config.go +++ b/network/netconf/config.go @@ -33,19 +33,10 @@ type Config struct { type UnicastConfig struct { // UnicastRateLimitersConfig configuration for all unicast rate limiters. UnicastRateLimitersConfig `mapstructure:",squash"` + // CreateStreamBackoffDelay initial delay used in the exponential backoff for create stream retries. CreateStreamBackoffDelay time.Duration `validate:"gt=0s" mapstructure:"unicast-create-stream-retry-delay"` - // DialInProgressBackoffDelay is the backoff delay for parallel attempts on dialing to the same peer. - // When the unicast manager is invoked to create stream to the same peer concurrently while there is - // already an ongoing dialing attempt to the same peer, the unicast manager will wait for this backoff delay - // and retry creating the stream after the backoff delay has elapsed. This is to prevent the unicast manager - // from creating too many parallel dialing attempts to the same peer. - DialInProgressBackoffDelay time.Duration `validate:"gt=0s" mapstructure:"unicast-dial-in-progress-backoff-delay"` - - // DialBackoffDelay is the backoff delay between retrying connection to the same peer. - DialBackoffDelay time.Duration `validate:"gt=0s" mapstructure:"unicast-dial-backoff-delay"` - // StreamZeroRetryResetThreshold is the threshold that determines when to reset the stream creation retry budget to the default value. // // For example the default value of 100 means that if the stream creation retry budget is decreased to 0, then it will be reset to default value @@ -58,25 +49,11 @@ type UnicastConfig struct { // 100 stream creations are all successful. StreamZeroRetryResetThreshold uint64 `validate:"gt=0" mapstructure:"unicast-stream-zero-retry-reset-threshold"` - // DialZeroRetryResetThreshold is the threshold that determines when to reset the dial retry budget to the default value. - // For example the threshold of 1 hour means that if the dial retry budget is decreased to 0, then it will be reset to default value - // when it has been 1 hour since the last successful dial. - // - // This is to prevent the retry budget from being reset too frequently, as the retry budget is used to gauge the reliability of the dialing a remote peer. - // When the dial retry budget is reset to the default value, it means that the dialing is reliable enough to be trusted again. - // This parameter mandates when the dialing is reliable enough to be trusted again; i.e., when it has been 1 hour since the last successful dial. - // Note that the last dial attempt timestamp is reset to zero when the dial fails, so the value of for example 1 hour means that the dialing to the remote peer is reliable enough that the last - // successful dial attempt was 1 hour ago. - DialZeroRetryResetThreshold time.Duration `validate:"gt=0s" mapstructure:"unicast-dial-zero-retry-reset-threshold"` - - // MaxDialRetryAttemptTimes is the maximum number of attempts to be made to connect to a remote node to establish a unicast (1:1) connection before we give up. - MaxDialRetryAttemptTimes uint64 `validate:"gt=0" mapstructure:"unicast-max-dial-retry-attempt-times"` - // MaxStreamCreationRetryAttemptTimes is the maximum number of attempts to be made to create a stream to a remote node over a direct unicast (1:1) connection before we give up. MaxStreamCreationRetryAttemptTimes uint64 `validate:"gt=1" mapstructure:"unicast-max-stream-creation-retry-attempt-times"` - // DialConfigCacheSize is the cache size of the dial config cache that keeps the individual dial config for each peer. - DialConfigCacheSize uint32 `validate:"gt=0" mapstructure:"unicast-dial-config-cache-size"` + // ConfigCacheSize is the cache size of the dial config cache that keeps the individual dial config for each peer. + ConfigCacheSize uint32 `validate:"gt=0" mapstructure:"unicast-dial-config-cache-size"` } // UnicastRateLimitersConfig unicast rate limiter configuration for the message and bandwidth rate limiters. diff --git a/network/netconf/flags.go b/network/netconf/flags.go index 3a90c0c1973..823772f5b03 100644 --- a/network/netconf/flags.go +++ b/network/netconf/flags.go @@ -20,11 +20,7 @@ const ( unicastMessageTimeout = "unicast-message-timeout" unicastCreateStreamRetryDelay = "unicast-create-stream-retry-delay" unicastStreamZeroRetryResetThreshold = "unicast-stream-zero-retry-reset-threshold" - unicastDialZeroRetryResetThreshold = "unicast-dial-zero-retry-reset-threshold" - unicastMaxDialRetryAttemptTimes = "unicast-max-dial-retry-attempt-times" unicastMaxStreamCreationRetryAttemptTimes = "unicast-max-stream-creation-retry-attempt-times" - unicastDialInProgressBackoffDelay = "unicast-dial-in-progress-backoff-delay" - unicastDialBackoffDelay = "unicast-dial-backoff-delay" unicastDialConfigCacheSize = "unicast-dial-config-cache-size" dnsCacheTTL = "dns-cache-ttl" disallowListNotificationCacheSize = "disallow-list-notification-cache-size" @@ -64,6 +60,9 @@ const ( rpcSentTrackerNumOfWorkers = "gossipsub-rpc-sent-tracker-workers" scoreTracerInterval = "gossipsub-score-tracer-interval" + gossipSubSubscriptionProviderUpdateInterval = "gossipsub-subscription-provider-update-interval" + gossipSubSubscriptionProviderCacheSize = "gossipsub-subscription-provider-cache-size" + // gossipsub validation inspector gossipSubRPCInspectorNotificationCacheSize = "gossipsub-rpc-inspector-notification-cache-size" validationInspectorNumberOfWorkers = "gossipsub-rpc-validation-inspector-workers" @@ -86,10 +85,14 @@ const ( metricsInspectorNumberOfWorkers = "gossipsub-rpc-metrics-inspector-workers" metricsInspectorCacheSize = "gossipsub-rpc-metrics-inspector-cache-size" - alspDisabled = "alsp-disable-penalty" - alspSpamRecordCacheSize = "alsp-spam-record-cache-size" - alspSpamRecordQueueSize = "alsp-spam-report-queue-size" - alspHearBeatInterval = "alsp-heart-beat-interval" + // gossipsub scoring registry + scoringRegistrySlowerDecayThreshold = "gossipsub-app-specific-penalty-decay-slowdown-threshold" + scoringRegistryDecayRateDecrement = "gossipsub-app-specific-penalty-decay-rate-reduction-factor" + scoringRegistryDecayAdjustInterval = "gossipsub-app-specific-penalty-decay-evaluation-period" + alspDisabled = "alsp-disable-penalty" + alspSpamRecordCacheSize = "alsp-spam-record-cache-size" + alspSpamRecordQueueSize = "alsp-spam-report-queue-size" + alspHearBeatInterval = "alsp-heart-beat-interval" alspSyncEngineBatchRequestBaseProb = "alsp-sync-engine-batch-request-base-prob" alspSyncEngineRangeRequestBaseProb = "alsp-sync-engine-range-request-base-prob" @@ -104,11 +107,7 @@ func AllFlagNames() []string { peerUpdateInterval, unicastMessageTimeout, unicastCreateStreamRetryDelay, - unicastDialInProgressBackoffDelay, - unicastDialBackoffDelay, unicastStreamZeroRetryResetThreshold, - unicastDialZeroRetryResetThreshold, - unicastMaxDialRetryAttemptTimes, unicastMaxStreamCreationRetryAttemptTimes, unicastDialConfigCacheSize, dnsCacheTTL, @@ -153,8 +152,11 @@ func AllFlagNames() []string { controlMessageMaxSampleSize, iwantDuplicateMsgIDThreshold, iwantCacheMissCheckSize, + scoringRegistrySlowerDecayThreshold, + scoringRegistryDecayRateDecrement, rpcMessageMaxSampleSize, rpcMessageErrorThreshold, + scoringRegistryDecayAdjustInterval, } for _, scope := range []string{systemScope, transientScope, protocolScope, peerScope, peerProtocolScope} { @@ -179,9 +181,13 @@ func AllFlagNames() []string { func InitializeNetworkFlags(flags *pflag.FlagSet, config *Config) { flags.Bool(networkingConnectionPruning, config.NetworkConnectionPruning, "enabling connection trimming") flags.Duration(dnsCacheTTL, config.DNSCacheTTL, "time-to-live for dns cache") - flags.StringSlice(preferredUnicastsProtocols, config.PreferredUnicastProtocols, "preferred unicast protocols in ascending order of preference") + flags.StringSlice( + preferredUnicastsProtocols, config.PreferredUnicastProtocols, "preferred unicast protocols in ascending order of preference") flags.Uint32(receivedMessageCacheSize, config.NetworkReceivedMessageCacheSize, "incoming message cache size at networking layer") - flags.Uint32(disallowListNotificationCacheSize, config.DisallowListNotificationCacheSize, "cache size for notification events from disallow list") + flags.Uint32( + disallowListNotificationCacheSize, + config.DisallowListNotificationCacheSize, + "cache size for notification events from disallow list") flags.Duration(peerUpdateInterval, config.PeerUpdateInterval, "how often to refresh the peer connections for the node") flags.Duration(unicastMessageTimeout, config.UnicastMessageTimeout, "how long a unicast transmission can take to complete") // unicast manager options @@ -189,24 +195,12 @@ func InitializeNetworkFlags(flags *pflag.FlagSet, config *Config) { config.UnicastConfig.CreateStreamBackoffDelay, "initial backoff delay between failing to establish a connection with another node and retrying, "+ "this delay increases exponentially with the number of subsequent failures to establish a connection.") - flags.Duration(unicastDialBackoffDelay, - config.UnicastConfig.DialInProgressBackoffDelay, - "initial backoff delay between failing to establish a connection with another node and retrying, "+ - "this delay increases exponentially with the number of subsequent failures to establish a connection.") - flags.Duration(unicastDialInProgressBackoffDelay, - config.UnicastConfig.DialInProgressBackoffDelay, - "initial backoff delay for concurrent stream creations to a remote peer when there is no exising connection and a dial is in progress. "+ - "this delay increases exponentially with the number of subsequent failure attempts") flags.Uint64(unicastStreamZeroRetryResetThreshold, config.UnicastConfig.StreamZeroRetryResetThreshold, "reset stream creation retry budget from zero to the maximum after consecutive successful streams reach this threshold.") - flags.Duration(unicastDialZeroRetryResetThreshold, - config.UnicastConfig.DialZeroRetryResetThreshold, - "reset dial retry budget if the last successful dial is longer than this threshold.") - flags.Uint64(unicastMaxDialRetryAttemptTimes, config.UnicastConfig.MaxDialRetryAttemptTimes, "maximum attempts to establish a unicast connection.") flags.Uint64(unicastMaxStreamCreationRetryAttemptTimes, config.UnicastConfig.MaxStreamCreationRetryAttemptTimes, "max attempts to create a unicast stream.") flags.Uint32(unicastDialConfigCacheSize, - config.UnicastConfig.DialConfigCacheSize, + config.UnicastConfig.ConfigCacheSize, "cache size of the dial config cache, recommended to be big enough to accommodate the entire nodes in the network.") // unicast stream handler rate limits @@ -229,10 +223,22 @@ func InitializeNetworkFlags(flags *pflag.FlagSet, config *Config) { flags.Duration(silencePeriod, config.ConnectionManagerConfig.SilencePeriod, "silence period for libp2p connection manager") flags.Bool(peerScoring, config.GossipSubConfig.PeerScoring, "enabling peer scoring on pubsub network") flags.Duration(localMeshLogInterval, config.GossipSubConfig.LocalMeshLogInterval, "logging interval for local mesh in gossipsub") - flags.Duration(scoreTracerInterval, config.GossipSubConfig.ScoreTracerInterval, "logging interval for peer score tracer in gossipsub, set to 0 to disable") - flags.Uint32(rpcSentTrackerCacheSize, config.GossipSubConfig.RPCSentTrackerCacheSize, "cache size of the rpc sent tracker used by the gossipsub mesh tracer.") - flags.Uint32(rpcSentTrackerQueueCacheSize, config.GossipSubConfig.RPCSentTrackerQueueCacheSize, "cache size of the rpc sent tracker worker queue.") - flags.Int(rpcSentTrackerNumOfWorkers, config.GossipSubConfig.RpcSentTrackerNumOfWorkers, "number of workers for the rpc sent tracker worker pool.") + flags.Duration( + scoreTracerInterval, + config.GossipSubConfig.ScoreTracerInterval, + "logging interval for peer score tracer in gossipsub, set to 0 to disable") + flags.Uint32( + rpcSentTrackerCacheSize, + config.GossipSubConfig.RPCSentTrackerCacheSize, + "cache size of the rpc sent tracker used by the gossipsub mesh tracer.") + flags.Uint32( + rpcSentTrackerQueueCacheSize, + config.GossipSubConfig.RPCSentTrackerQueueCacheSize, + "cache size of the rpc sent tracker worker queue.") + flags.Int( + rpcSentTrackerNumOfWorkers, + config.GossipSubConfig.RpcSentTrackerNumOfWorkers, + "number of workers for the rpc sent tracker worker pool.") // gossipsub RPC control message validation limits used for validation configuration and rate limiting flags.Int(validationInspectorNumberOfWorkers, config.GossipSubConfig.GossipSubRPCInspectorsConfig.GossipSubRPCValidationInspectorConfigs.NumberOfWorkers, @@ -275,6 +281,16 @@ func InitializeNetworkFlags(flags *pflag.FlagSet, config *Config) { "base probability of creating a misbehavior report for a range request message") flags.Float32(alspSyncEngineSyncRequestProb, config.AlspConfig.SyncEngine.SyncRequestProb, "probability of creating a misbehavior report for a sync request message") + flags.Float64(scoringRegistrySlowerDecayThreshold, + config.GossipSubConfig.GossipSubScoringRegistryConfig.PenaltyDecaySlowdownThreshold, + "the penalty level at which the decay rate is reduced by --gossipsub-app-specific-penalty-decay-rate-reduction-factor") + flags.Float64(scoringRegistryDecayRateDecrement, + config.GossipSubConfig.GossipSubScoringRegistryConfig.DecayRateReductionFactor, + "defines the value by which the decay rate is decreased every time the penalty is below the --gossipsub-app-specific-penalty-decay-slowdown-threshold.") + flags.Duration(scoringRegistryDecayAdjustInterval, + config.GossipSubConfig.GossipSubScoringRegistryConfig.PenaltyDecayEvaluationPeriod, + "defines the period at which the decay for a spam record is okay to be adjusted.") + flags.Int(ihaveMaxSampleSize, config.GossipSubConfig.GossipSubRPCInspectorsConfig.GossipSubRPCValidationInspectorConfigs.IHaveRPCInspectionConfig.MaxSampleSize, "max number of ihaves to sample when performing validation") @@ -300,12 +316,15 @@ func InitializeNetworkFlags(flags *pflag.FlagSet, config *Config) { config.GossipSubConfig.GossipSubRPCInspectorsConfig.GossipSubRPCValidationInspectorConfigs.IWantRPCInspectionConfig.DuplicateMsgIDThreshold, "max allowed duplicate message IDs in a single iWant control message") - flags.Int(rpcMessageMaxSampleSize, - config.GossipSubConfig.GossipSubRPCInspectorsConfig.GossipSubRPCValidationInspectorConfigs.RpcMessageMaxSampleSize, - "the max sample size used for RPC message validation. If the total number of RPC messages exceeds this value a sample will be taken but messages will not be truncated") - flags.Int(rpcMessageErrorThreshold, - config.GossipSubConfig.GossipSubRPCInspectorsConfig.GossipSubRPCValidationInspectorConfigs.RpcMessageErrorThreshold, - "the threshold at which an error will be returned if the number of invalid RPC messages exceeds this value") + flags.Int(rpcMessageMaxSampleSize, config.GossipSubConfig.GossipSubRPCInspectorsConfig.GossipSubRPCValidationInspectorConfigs.RpcMessageMaxSampleSize, "the max sample size used for RPC message validation. If the total number of RPC messages exceeds this value a sample will be taken but messages will not be truncated") + flags.Int(rpcMessageErrorThreshold, config.GossipSubConfig.GossipSubRPCInspectorsConfig.GossipSubRPCValidationInspectorConfigs.RpcMessageErrorThreshold, "the threshold at which an error will be returned if the number of invalid RPC messages exceeds this value") + flags.Duration( + gossipSubSubscriptionProviderUpdateInterval, config.GossipSubConfig.SubscriptionProviderConfig.SubscriptionUpdateInterval, + "interval for updating the list of subscribed topics for all peers in the gossipsub, recommended value is a few minutes") + flags.Uint32( + gossipSubSubscriptionProviderCacheSize, + config.GossipSubConfig.SubscriptionProviderConfig.CacheSize, + "size of the cache that keeps the list of topics each peer has subscribed to, recommended size is 10x the number of authorized nodes") } // LoadLibP2PResourceManagerFlags loads all CLI flags for the libp2p resource manager configuration on the provided pflag set. @@ -379,7 +398,8 @@ func SetAliases(conf *viper.Viper) error { for _, flagName := range AllFlagNames() { fullKey, ok := m[flagName] if !ok { - return fmt.Errorf("invalid network configuration missing configuration key flag name %s check config file and cli flags", flagName) + return fmt.Errorf( + "invalid network configuration missing configuration key flag name %s check config file and cli flags", flagName) } conf.RegisterAlias(fullKey, flagName) } diff --git a/network/p2p/builder.go b/network/p2p/builder.go index d38457674e9..31a7da024f5 100644 --- a/network/p2p/builder.go +++ b/network/p2p/builder.go @@ -110,6 +110,7 @@ type GossipSubRpcInspectorSuiteFactoryFunc func( metrics.HeroCacheMetricsFactory, flownet.NetworkingType, module.IdentityProvider, + func() TopicProvider, ) (GossipSubInspectorSuite, error) // NodeBuilder is a builder pattern for creating a libp2p Node instance. diff --git a/network/p2p/cache.go b/network/p2p/cache.go index f764f1c6321..3f3bbadc00c 100644 --- a/network/p2p/cache.go +++ b/network/p2p/cache.go @@ -1,6 +1,8 @@ package p2p import ( + "time" + "github.com/libp2p/go-libp2p/core/peer" "github.com/libp2p/go-libp2p/core/protocol" ) @@ -82,4 +84,9 @@ type GossipSubSpamRecord struct { Decay float64 // Penalty is the application specific Penalty of the peer. Penalty float64 + // LastDecayAdjustment records the time of the most recent adjustment in the decay process for a spam record. + // At each interval, the system evaluates and potentially adjusts the decay rate, which affects how quickly a node's penalty diminishes. + // The decay process is multiplicative (newPenalty = decayRate * oldPenalty) and operates within a range of 0 to 1. At certain regular intervals, the decay adjustment is evaluated and if the node's penalty falls below the set threshold, the decay rate is modified by the reduction factor, such as 0.01. This modification incrementally increases the decay rate. For example, if the decay rate is `x`, adding the reduction factor results in a decay rate of `x + 0.01`, leading to a slower reduction in penalty. Thus, a higher decay rate actually slows down the recovery process, contrary to accelerating it. + // The LastDecayAdjustment timestamp is crucial in ensuring balanced and fair penalization, especially important during periods of high message traffic to prevent unintended rapid decay of penalties for malicious nodes. + LastDecayAdjustment time.Time } diff --git a/network/p2p/connection/connection_gater_test.go b/network/p2p/connection/connection_gater_test.go index 5a2c678b15c..59ef138758d 100644 --- a/network/p2p/connection/connection_gater_test.go +++ b/network/p2p/connection/connection_gater_test.go @@ -74,7 +74,7 @@ func TestConnectionGating(t *testing.T) { // although nodes have each other addresses, they are not in the allow-lists of each other. // so they should not be able to connect to each other. p2pfixtures.EnsureNoStreamCreationBetweenGroups(t, ctx, []p2p.LibP2PNode{node1}, []p2p.LibP2PNode{node2}, func(t *testing.T, err error) { - require.True(t, stream.IsErrGaterDisallowedConnection(err)) + require.Truef(t, stream.IsErrGaterDisallowedConnection(err), "expected ErrGaterDisallowedConnection, got: %v", err) }) }) @@ -89,7 +89,7 @@ func TestConnectionGating(t *testing.T) { // from node2 -> node1 should also NOT work, since node 1 is not in node2's allow list for dialing! p2pfixtures.EnsureNoStreamCreation(t, ctx, []p2p.LibP2PNode{node2}, []p2p.LibP2PNode{node1}, func(t *testing.T, err error) { // dialing node-1 by node-2 should fail locally at the connection gater of node-2. - require.True(t, stream.IsErrGaterDisallowedConnection(err)) + require.Truef(t, stream.IsErrGaterDisallowedConnection(err), "expected ErrGaterDisallowedConnection, got: %v", err) }) // now node2 should be able to connect to node1. diff --git a/network/p2p/connection/connector.go b/network/p2p/connection/connector.go index 69fbb5d4359..2e59c595bf7 100644 --- a/network/p2p/connection/connector.go +++ b/network/p2p/connection/connector.go @@ -59,7 +59,7 @@ var _ p2p.PeerUpdater = (*PeerUpdater)(nil) // - error: an error if there is any error while creating the connector. The errors are irrecoverable and unexpected. func NewPeerUpdater(cfg *PeerUpdaterConfig) (*PeerUpdater, error) { libP2PConnector := &PeerUpdater{ - log: cfg.Logger, + log: cfg.Logger.With().Str("component", "peer-updater").Logger(), connector: cfg.Connector, host: cfg.Host, pruneConnections: cfg.PruneConnections, diff --git a/network/p2p/consumers.go b/network/p2p/consumers.go index 85206b7f1df..f079a3864af 100644 --- a/network/p2p/consumers.go +++ b/network/p2p/consumers.go @@ -24,6 +24,27 @@ type GossipSubInspectorNotifDistributor interface { AddConsumer(GossipSubInvCtrlMsgNotifConsumer) } +// CtrlMsgTopicType represents the type of the topic within a control message. +type CtrlMsgTopicType uint64 + +const ( + // CtrlMsgNonClusterTopicType represents a non-cluster-prefixed topic. + CtrlMsgNonClusterTopicType CtrlMsgTopicType = iota + // CtrlMsgTopicTypeClusterPrefixed represents a cluster-prefixed topic. + CtrlMsgTopicTypeClusterPrefixed +) + +func (t CtrlMsgTopicType) String() string { + switch t { + case CtrlMsgNonClusterTopicType: + return "non-cluster-prefixed" + case CtrlMsgTopicTypeClusterPrefixed: + return "cluster-prefixed" + default: + return "unknown" + } +} + // InvCtrlMsgNotif is the notification sent to the consumer when an invalid control message is received. // It models the information that is available to the consumer about a misbehaving peer. type InvCtrlMsgNotif struct { @@ -33,14 +54,32 @@ type InvCtrlMsgNotif struct { Error error // MsgType the control message type. MsgType p2pmsg.ControlMessageType + // Count the number of errors. + Count uint64 + // TopicType reports whether the error occurred on a cluster-prefixed topic within the control message. + // Notifications must be explicitly marked as cluster-prefixed or not because the penalty applied to the GossipSub score + // for an error on a cluster-prefixed topic is more lenient than the penalty applied to a non-cluster-prefixed topic. + // This distinction ensures that nodes engaged in cluster-prefixed topic communication are not penalized too harshly, + // as such communication is vital to the progress of the chain. + TopicType CtrlMsgTopicType } // NewInvalidControlMessageNotification returns a new *InvCtrlMsgNotif -func NewInvalidControlMessageNotification(peerID peer.ID, ctlMsgType p2pmsg.ControlMessageType, err error) *InvCtrlMsgNotif { +// Args: +// - peerID: peer id of the offender. +// - ctlMsgType: the control message type of the rpc message that caused the error. +// - err: the error that occurred. +// - count: the number of occurrences of the error. +// +// Returns: +// - *InvCtlMsgNotif: invalid control message notification. +func NewInvalidControlMessageNotification(peerID peer.ID, ctlMsgType p2pmsg.ControlMessageType, err error, count uint64, topicType CtrlMsgTopicType) *InvCtrlMsgNotif { return &InvCtrlMsgNotif{ - PeerID: peerID, - Error: err, - MsgType: ctlMsgType, + PeerID: peerID, + Error: err, + MsgType: ctlMsgType, + Count: count, + TopicType: topicType, } } @@ -70,11 +109,4 @@ type GossipSubInspectorSuite interface { // pattern where the consumer is notified when a new notification is published. // A consumer is only notified once for each notification, and only receives notifications that were published after it was added. AddInvalidControlMessageConsumer(GossipSubInvCtrlMsgNotifConsumer) - - // SetTopicOracle sets the topic oracle of the gossipsub inspector suite. - // The topic oracle is used to determine the list of topics that the node is subscribed to. - // If an oracle is not set, the node will not be able to determine the list of topics that the node is subscribed to. - // This func is expected to be called once and will return an error on all subsequent calls. - // All errors returned from this func are considered irrecoverable. - SetTopicOracle(topicOracle func() []string) error } diff --git a/network/p2p/distributor/gossipsub_inspector.go b/network/p2p/distributor/gossipsub_inspector.go index 9c9eec28c61..e61ce744143 100644 --- a/network/p2p/distributor/gossipsub_inspector.go +++ b/network/p2p/distributor/gossipsub_inspector.go @@ -11,6 +11,7 @@ import ( "github.com/onflow/flow-go/module/mempool/queue" "github.com/onflow/flow-go/module/metrics" "github.com/onflow/flow-go/network/p2p" + "github.com/onflow/flow-go/network/p2p/p2plogging" ) const ( @@ -80,10 +81,12 @@ func NewGossipSubInspectorNotificationDistributor(log zerolog.Logger, store engi // The distribution is done asynchronously and non-blocking. The notification is added to a queue and processed by a worker pool. // DistributeEvent in this implementation does not return an error, but it logs a warning if the queue is full. func (g *GossipSubInspectorNotifDistributor) Distribute(notification *p2p.InvCtrlMsgNotif) error { + lg := g.logger.With().Str("peer_id", p2plogging.PeerId(notification.PeerID)).Logger() if ok := g.workerPool.Submit(notification); !ok { // we use a queue with a fixed size, so this can happen when queue is full or when the notification is duplicate. - g.logger.Warn().Msg("gossipsub rpc inspector notification queue is full or notification is duplicate, discarding notification") + lg.Warn().Msg("gossipsub rpc inspector notification queue is full or notification is duplicate, discarding notification") } + lg.Trace().Msg("gossipsub rpc inspector notification submitted to the queue") return nil } diff --git a/network/p2p/inspector/internal/mockTopicProvider.go b/network/p2p/inspector/internal/mockTopicProvider.go new file mode 100644 index 00000000000..33599a2fb97 --- /dev/null +++ b/network/p2p/inspector/internal/mockTopicProvider.go @@ -0,0 +1,35 @@ +package internal + +import ( + "github.com/libp2p/go-libp2p/core/peer" +) + +// MockUpdatableTopicProvider is a mock implementation of the TopicProvider interface. +// TODO: this should be moved to a common package (e.g. network/p2p/test). Currently, it is not possible to do so because of a circular dependency. +type MockUpdatableTopicProvider struct { + topics []string + subscriptions map[string][]peer.ID +} + +func NewMockUpdatableTopicProvider() *MockUpdatableTopicProvider { + return &MockUpdatableTopicProvider{ + topics: []string{}, + subscriptions: map[string][]peer.ID{}, + } +} + +func (m *MockUpdatableTopicProvider) GetTopics() []string { + return m.topics +} + +func (m *MockUpdatableTopicProvider) ListPeers(topic string) []peer.ID { + return m.subscriptions[topic] +} + +func (m *MockUpdatableTopicProvider) UpdateTopics(topics []string) { + m.topics = topics +} + +func (m *MockUpdatableTopicProvider) UpdateSubscriptions(topic string, peers []peer.ID) { + m.subscriptions[topic] = peers +} diff --git a/network/p2p/inspector/validation/control_message_validation_inspector.go b/network/p2p/inspector/validation/control_message_validation_inspector.go index dcb07027976..edc935efbcb 100644 --- a/network/p2p/inspector/validation/control_message_validation_inspector.go +++ b/network/p2p/inspector/validation/control_message_validation_inspector.go @@ -4,6 +4,7 @@ import ( "fmt" "time" + "github.com/go-playground/validator/v10" "github.com/hashicorp/go-multierror" pubsub "github.com/libp2p/go-libp2p-pubsub" pubsub_pb "github.com/libp2p/go-libp2p-pubsub/pb" @@ -16,6 +17,8 @@ import ( "github.com/onflow/flow-go/module/component" "github.com/onflow/flow-go/module/irrecoverable" "github.com/onflow/flow-go/module/mempool/queue" + "github.com/onflow/flow-go/module/metrics" + "github.com/onflow/flow-go/network" "github.com/onflow/flow-go/network/channels" "github.com/onflow/flow-go/network/p2p" "github.com/onflow/flow-go/network/p2p/inspector/internal/cache" @@ -51,12 +54,37 @@ type ControlMsgValidationInspector struct { // 1. The cluster prefix topic is received while the inspector waits for the cluster IDs provider to be set (this can happen during the startup or epoch transitions). // 2. The node sends a cluster prefix topic where the cluster prefix does not match any of the active cluster IDs. // In such cases, the inspector will allow a configured number of these messages from the corresponding peer. - tracker *cache.ClusterPrefixedMessagesReceivedTracker - idProvider module.IdentityProvider - rateLimiters map[p2pmsg.ControlMessageType]p2p.BasicRateLimiter - rpcTracker p2p.RpcControlTracking + tracker *cache.ClusterPrefixedMessagesReceivedTracker + idProvider module.IdentityProvider + rpcTracker p2p.RpcControlTracking + // networkingType indicates public or private network, rpc publish messages are inspected for unstaked senders when running the private network. + networkingType network.NetworkingType // topicOracle callback used to retrieve the current subscribed topics of the libp2p node. - topicOracle func() []string + topicOracle func() p2p.TopicProvider +} + +type InspectorParams struct { + // Logger the logger used by the inspector. + Logger zerolog.Logger `validate:"required"` + // SporkID the current spork ID. + SporkID flow.Identifier `validate:"required"` + // Config inspector configuration. + Config *p2pconf.GossipSubRPCValidationInspectorConfigs `validate:"required"` + // Distributor gossipsub inspector notification distributor. + Distributor p2p.GossipSubInspectorNotifDistributor `validate:"required"` + // HeroCacheMetricsFactory the metrics factory. + HeroCacheMetricsFactory metrics.HeroCacheMetricsFactory `validate:"required"` + // IdProvider identity provider is used to get the flow identifier for a peer. + IdProvider module.IdentityProvider `validate:"required"` + // InspectorMetrics metrics for the validation inspector. + InspectorMetrics module.GossipSubRpcValidationInspectorMetrics `validate:"required"` + // RpcTracker tracker used to track iHave RPC's sent and last size. + RpcTracker p2p.RpcControlTracking `validate:"required"` + // NetworkingType the networking type of the node. + NetworkingType network.NetworkingType `validate:"required"` + // TopicOracle callback used to retrieve the current subscribed topics of the libp2p node. + // It is set as a callback to avoid circular dependencies between the topic oracle and the inspector. + TopicOracle func() p2p.TopicProvider `validate:"required"` } var _ component.Component = (*ControlMsgValidationInspector)(nil) @@ -65,55 +93,70 @@ var _ protocol.Consumer = (*ControlMsgValidationInspector)(nil) // NewControlMsgValidationInspector returns new ControlMsgValidationInspector // Args: -// - logger: the logger used by the inspector. -// - sporkID: the current spork ID. -// - config: inspector configuration. -// - distributor: gossipsub inspector notification distributor. -// - clusterPrefixedCacheCollector: metrics collector for the underlying cluster prefix received tracker cache. -// - idProvider: identity provider is used to get the flow identifier for a peer. +// - *InspectorParams: params used to create the inspector. // // Returns: // - *ControlMsgValidationInspector: a new control message validation inspector. // - error: an error if there is any error while creating the inspector. All errors are irrecoverable and unexpected. -func NewControlMsgValidationInspector(ctx irrecoverable.SignalerContext, logger zerolog.Logger, sporkID flow.Identifier, config *p2pconf.GossipSubRPCValidationInspectorConfigs, distributor p2p.GossipSubInspectorNotifDistributor, inspectMsgQueueCacheCollector module.HeroCacheMetrics, clusterPrefixedCacheCollector module.HeroCacheMetrics, idProvider module.IdentityProvider, inspectorMetrics module.GossipSubRpcValidationInspectorMetrics, rpcTracker p2p.RpcControlTracking) (*ControlMsgValidationInspector, error) { - lg := logger.With().Str("component", "gossip_sub_rpc_validation_inspector").Logger() +func NewControlMsgValidationInspector(params *InspectorParams) (*ControlMsgValidationInspector, error) { + err := validator.New().Struct(params) + if err != nil { + return nil, fmt.Errorf("inspector params validation failed: %w", err) + } + lg := params.Logger.With().Str("component", "gossip_sub_rpc_validation_inspector").Logger() + + inspectMsgQueueCacheCollector := metrics.GossipSubRPCInspectorQueueMetricFactory(params.HeroCacheMetricsFactory, params.NetworkingType) + clusterPrefixedCacheCollector := metrics.GossipSubRPCInspectorClusterPrefixedCacheMetricFactory(params.HeroCacheMetricsFactory, params.NetworkingType) - clusterPrefixedTracker, err := cache.NewClusterPrefixedMessagesReceivedTracker(logger, config.ClusterPrefixedControlMsgsReceivedCacheSize, clusterPrefixedCacheCollector, config.ClusterPrefixedControlMsgsReceivedCacheDecay) + clusterPrefixedTracker, err := cache.NewClusterPrefixedMessagesReceivedTracker(params.Logger, + params.Config.ClusterPrefixedControlMsgsReceivedCacheSize, + clusterPrefixedCacheCollector, + params.Config.ClusterPrefixedControlMsgsReceivedCacheDecay) if err != nil { return nil, fmt.Errorf("failed to create cluster prefix topics received tracker") } - if config.RpcMessageMaxSampleSize < config.RpcMessageErrorThreshold { - return nil, fmt.Errorf("rpc message max sample size must be greater than or equal to rpc message error threshold, got %d and %d respectively", config.RpcMessageMaxSampleSize, config.RpcMessageErrorThreshold) + if params.Config.RpcMessageMaxSampleSize < params.Config.RpcMessageErrorThreshold { + return nil, fmt.Errorf("rpc message max sample size must be greater than or equal to rpc message error threshold, got %d and %d respectively", + params.Config.RpcMessageMaxSampleSize, + params.Config.RpcMessageErrorThreshold) } c := &ControlMsgValidationInspector{ - ctx: ctx, - logger: lg, - sporkID: sporkID, - config: config, - distributor: distributor, - tracker: clusterPrefixedTracker, - rpcTracker: rpcTracker, - idProvider: idProvider, - metrics: inspectorMetrics, - rateLimiters: make(map[p2pmsg.ControlMessageType]p2p.BasicRateLimiter), - } - - store := queue.NewHeroStore(config.CacheSize, logger, inspectMsgQueueCacheCollector) + logger: lg, + sporkID: params.SporkID, + config: params.Config, + distributor: params.Distributor, + tracker: clusterPrefixedTracker, + rpcTracker: params.RpcTracker, + idProvider: params.IdProvider, + metrics: params.InspectorMetrics, + networkingType: params.NetworkingType, + topicOracle: params.TopicOracle, + } + + store := queue.NewHeroStore(params.Config.CacheSize, params.Logger, inspectMsgQueueCacheCollector) + pool := worker.NewWorkerPoolBuilder[*InspectRPCRequest](lg, store, c.processInspectRPCReq).Build() c.workerPool = pool builder := component.NewComponentManagerBuilder() builder.AddWorker(func(ctx irrecoverable.SignalerContext, ready component.ReadyFunc) { - distributor.Start(ctx) + c.logger.Debug().Msg("starting rpc inspector distributor") + c.ctx = ctx + c.distributor.Start(ctx) select { case <-ctx.Done(): - case <-distributor.Ready(): + c.logger.Debug().Msg("rpc inspector distributor startup aborted; context cancelled") + case <-c.distributor.Ready(): + c.logger.Debug().Msg("rpc inspector distributor started") ready() } - <-distributor.Done() + <-ctx.Done() + c.logger.Debug().Msg("rpc inspector distributor stopped") + <-c.distributor.Done() + c.logger.Debug().Msg("rpc inspector distributor shutdown complete") }) for i := 0; i < c.config.NumberOfWorkers; i++ { builder.AddWorker(pool.WorkerLogic()) @@ -124,20 +167,31 @@ func NewControlMsgValidationInspector(ctx irrecoverable.SignalerContext, logger func (c *ControlMsgValidationInspector) Start(parent irrecoverable.SignalerContext) { if c.topicOracle == nil { - parent.Throw(fmt.Errorf("topic oracle not set")) + parent.Throw(fmt.Errorf("control message validation inspector topic oracle not set")) } c.Component.Start(parent) } +// Name returns the name of the rpc inspector. +func (c *ControlMsgValidationInspector) Name() string { + return rpcInspectorComponentName +} + +// ActiveClustersChanged consumes cluster ID update protocol events. +func (c *ControlMsgValidationInspector) ActiveClustersChanged(clusterIDList flow.ChainIDList) { + c.tracker.StoreActiveClusterIds(clusterIDList) +} + // Inspect is called by gossipsub upon reception of a rpc from a remote node. // It creates a new InspectRPCRequest for the RPC to be inspected async by the worker pool. +// Args: +// - from: the sender. +// - rpc: the control message RPC. +// +// Returns: +// - error: if a new inspect rpc request cannot be created, all errors returned are considered irrecoverable. func (c *ControlMsgValidationInspector) Inspect(from peer.ID, rpc *pubsub.RPC) error { - // first truncate rpc - err := c.truncateRPC(from, rpc) - if err != nil { - // irrecoverable error encountered - c.logAndThrowError(fmt.Errorf("failed to get inspect RPC request could not perform truncation: %w", err)) - } + c.truncateRPC(from, rpc) // queue further async inspection req, err := NewInspectRPCRequest(from, rpc) if err != nil { @@ -154,6 +208,11 @@ func (c *ControlMsgValidationInspector) Inspect(from peer.ID, rpc *pubsub.RPC) e // processInspectRPCReq func used by component workers to perform further inspection of RPC control messages that will validate ensure all control message // types are valid in the RPC. +// Args: +// - req: the inspect rpc request. +// +// Returns: +// - error: no error is expected to be returned from this func as they are logged and distributed in invalid control message notifications. func (c *ControlMsgValidationInspector) processInspectRPCReq(req *InspectRPCRequest) error { c.metrics.AsyncProcessingStarted() start := time.Now() @@ -165,42 +224,65 @@ func (c *ControlMsgValidationInspector) processInspectRPCReq(req *InspectRPCRequ for _, ctrlMsgType := range p2pmsg.ControlMessageTypes() { switch ctrlMsgType { case p2pmsg.CtrlMsgGraft: - err := c.inspectGraftMessages(req.Peer, req.rpc.GetControl().GetGraft(), activeClusterIDS) + err, topicType := c.inspectGraftMessages(req.Peer, req.rpc.GetControl().GetGraft(), activeClusterIDS) if err != nil { - c.logAndDistributeAsyncInspectErrs(req, p2pmsg.CtrlMsgGraft, err) + c.logAndDistributeAsyncInspectErrs(req, p2pmsg.CtrlMsgGraft, err, 1, topicType) return nil } case p2pmsg.CtrlMsgPrune: - err := c.inspectPruneMessages(req.Peer, req.rpc.GetControl().GetPrune(), activeClusterIDS) + err, topicType := c.inspectPruneMessages(req.Peer, req.rpc.GetControl().GetPrune(), activeClusterIDS) if err != nil { - c.logAndDistributeAsyncInspectErrs(req, p2pmsg.CtrlMsgPrune, err) + c.logAndDistributeAsyncInspectErrs(req, p2pmsg.CtrlMsgPrune, err, 1, topicType) return nil } case p2pmsg.CtrlMsgIWant: err := c.inspectIWantMessages(req.Peer, req.rpc.GetControl().GetIwant()) if err != nil { - c.logAndDistributeAsyncInspectErrs(req, p2pmsg.CtrlMsgIWant, err) + c.logAndDistributeAsyncInspectErrs(req, p2pmsg.CtrlMsgIWant, err, 1, p2p.CtrlMsgNonClusterTopicType) return nil } case p2pmsg.CtrlMsgIHave: - err := c.inspectIHaveMessages(req.Peer, req.rpc.GetControl().GetIhave(), activeClusterIDS) + err, topicType := c.inspectIHaveMessages(req.Peer, req.rpc.GetControl().GetIhave(), activeClusterIDS) if err != nil { - c.logAndDistributeAsyncInspectErrs(req, p2pmsg.CtrlMsgIHave, err) + c.logAndDistributeAsyncInspectErrs(req, p2pmsg.CtrlMsgIHave, err, 1, topicType) return nil } } } // inspect rpc publish messages after all control message validation has passed - err := c.inspectRpcPublishMessages(req.Peer, req.rpc.GetPublish(), activeClusterIDS) + err, errCount := c.inspectRpcPublishMessages(req.Peer, req.rpc.GetPublish(), activeClusterIDS) if err != nil { - c.logAndDistributeAsyncInspectErrs(req, p2pmsg.RpcPublishMessage, err) + c.logAndDistributeAsyncInspectErrs(req, p2pmsg.RpcPublishMessage, err, errCount, p2p.CtrlMsgNonClusterTopicType) return nil } return nil } +// checkPubsubMessageSender checks the sender of the sender of pubsub message to ensure they are not unstaked, or ejected. +// This check is only required on private networks. +// Args: +// - message: the pubsub message. +// +// Returns: +// - error: if the peer ID cannot be created from bytes, sender is unknown or the identity is ejected. +// +// All errors returned from this function can be considered benign. +func (c *ControlMsgValidationInspector) checkPubsubMessageSender(message *pubsub_pb.Message) error { + pid, err := peer.IDFromBytes(message.GetFrom()) + if err != nil { + return fmt.Errorf("failed to get peer ID from bytes: %w", err) + } + if id, ok := c.idProvider.ByPeerID(pid); !ok { + return fmt.Errorf("received rpc publish message from unstaked peer: %s", pid) + } else if id.IsEjected() { + return fmt.Errorf("received rpc publish message from ejected peer: %s", pid) + } + + return nil +} + // inspectGraftMessages performs topic validation on all grafts in the control message using the provided validateTopic func while tracking duplicates. // Args: // - from: peer ID of the sender. @@ -208,20 +290,22 @@ func (c *ControlMsgValidationInspector) processInspectRPCReq(req *InspectRPCRequ // - activeClusterIDS: the list of active cluster ids. // Returns: // - DuplicateTopicErr: if there are any duplicate topics in the list of grafts -func (c *ControlMsgValidationInspector) inspectGraftMessages(from peer.ID, grafts []*pubsub_pb.ControlGraft, activeClusterIDS flow.ChainIDList) error { +// - error: if any error occurs while sampling or validating topics, all returned errors are benign and should not cause the node to crash. +// - bool: true if an error is returned and the topic that failed validation was a cluster prefixed topic, false otherwise. +func (c *ControlMsgValidationInspector) inspectGraftMessages(from peer.ID, grafts []*pubsub_pb.ControlGraft, activeClusterIDS flow.ChainIDList) (error, p2p.CtrlMsgTopicType) { tracker := make(duplicateStrTracker) for _, graft := range grafts { topic := channels.Topic(graft.GetTopicID()) if tracker.isDuplicate(topic.String()) { - return NewDuplicateTopicErr(topic.String(), p2pmsg.CtrlMsgGraft) + return NewDuplicateTopicErr(topic.String(), p2pmsg.CtrlMsgGraft), p2p.CtrlMsgNonClusterTopicType } tracker.set(topic.String()) - err := c.validateTopic(from, topic, activeClusterIDS) + err, ctrlMsgType := c.validateTopic(from, topic, activeClusterIDS) if err != nil { - return err + return err, ctrlMsgType } } - return nil + return nil, p2p.CtrlMsgNonClusterTopicType } // inspectPruneMessages performs topic validation on all prunes in the control message using the provided validateTopic func while tracking duplicates. @@ -233,20 +317,21 @@ func (c *ControlMsgValidationInspector) inspectGraftMessages(from peer.ID, graft // - DuplicateTopicErr: if there are any duplicate topics found in the list of iHaves // or any duplicate message ids found inside a single iHave. // - error: if any error occurs while sampling or validating topics, all returned errors are benign and should not cause the node to crash. -func (c *ControlMsgValidationInspector) inspectPruneMessages(from peer.ID, prunes []*pubsub_pb.ControlPrune, activeClusterIDS flow.ChainIDList) error { +// - bool: true if an error is returned and the topic that failed validation was a cluster prefixed topic, false otherwise. +func (c *ControlMsgValidationInspector) inspectPruneMessages(from peer.ID, prunes []*pubsub_pb.ControlPrune, activeClusterIDS flow.ChainIDList) (error, p2p.CtrlMsgTopicType) { tracker := make(duplicateStrTracker) for _, prune := range prunes { topic := channels.Topic(prune.GetTopicID()) if tracker.isDuplicate(topic.String()) { - return NewDuplicateTopicErr(topic.String(), p2pmsg.CtrlMsgPrune) + return NewDuplicateTopicErr(topic.String(), p2pmsg.CtrlMsgPrune), p2p.CtrlMsgNonClusterTopicType } tracker.set(topic.String()) - err := c.validateTopic(from, topic, activeClusterIDS) + err, ctrlMsgType := c.validateTopic(from, topic, activeClusterIDS) if err != nil { - return err + return err, ctrlMsgType } } - return nil + return nil, p2p.CtrlMsgNonClusterTopicType } // inspectIHaveMessages performs topic validation on all ihaves in the control message using the provided validateTopic func while tracking duplicates. @@ -258,9 +343,10 @@ func (c *ControlMsgValidationInspector) inspectPruneMessages(from peer.ID, prune // - DuplicateTopicErr: if there are any duplicate topics found in the list of iHaves // or any duplicate message ids found inside a single iHave. // - error: if any error occurs while sampling or validating topics, all returned errors are benign and should not cause the node to crash. -func (c *ControlMsgValidationInspector) inspectIHaveMessages(from peer.ID, ihaves []*pubsub_pb.ControlIHave, activeClusterIDS flow.ChainIDList) error { +// - bool: true if an error is returned and the topic that failed validation was a cluster prefixed topic, false otherwise. +func (c *ControlMsgValidationInspector) inspectIHaveMessages(from peer.ID, ihaves []*pubsub_pb.ControlIHave, activeClusterIDS flow.ChainIDList) (error, p2p.CtrlMsgTopicType) { if len(ihaves) == 0 { - return nil + return nil, p2p.CtrlMsgNonClusterTopicType } lg := c.logger.With(). Str("peer_id", p2plogging.PeerId(from)). @@ -274,17 +360,17 @@ func (c *ControlMsgValidationInspector) inspectIHaveMessages(from peer.ID, ihave messageIds := ihave.GetMessageIDs() topic := ihave.GetTopicID() if duplicateTopicTracker.isDuplicate(topic) { - return NewDuplicateTopicErr(topic, p2pmsg.CtrlMsgIHave) + return NewDuplicateTopicErr(topic, p2pmsg.CtrlMsgIHave), p2p.CtrlMsgNonClusterTopicType } duplicateTopicTracker.set(topic) - err := c.validateTopic(from, channels.Topic(topic), activeClusterIDS) + err, ctrlMsgType := c.validateTopic(from, channels.Topic(topic), activeClusterIDS) if err != nil { - return err + return err, ctrlMsgType } for _, messageID := range messageIds { if duplicateMessageIDTracker.isDuplicate(messageID) { - return NewDuplicateTopicErr(messageID, p2pmsg.CtrlMsgIHave) + return NewDuplicateTopicErr(messageID, p2pmsg.CtrlMsgIHave), p2p.CtrlMsgNonClusterTopicType } duplicateMessageIDTracker.set(messageID) } @@ -292,7 +378,7 @@ func (c *ControlMsgValidationInspector) inspectIHaveMessages(from peer.ID, ihave lg.Debug(). Int("total_message_ids", totalMessageIds). Msg("ihave control message validation complete") - return nil + return nil, p2p.CtrlMsgNonClusterTopicType } // inspectIWantMessages inspects RPC iWant control messages. This func will sample the iWants and perform validation on each iWant in the sample. @@ -322,7 +408,7 @@ func (c *ControlMsgValidationInspector) inspectIWantMessages(from peer.ID, iWant allowedCacheMissesThreshold := float64(sampleSize) * c.config.IWantRPCInspectionConfig.CacheMissThreshold duplicates := 0 allowedDuplicatesThreshold := float64(sampleSize) * c.config.IWantRPCInspectionConfig.DuplicateMsgIDThreshold - checkCacheMisses := len(iWants) > c.config.IWantRPCInspectionConfig.CacheMissCheckSize + checkCacheMisses := len(iWants) >= c.config.IWantRPCInspectionConfig.CacheMissCheckSize lg = lg.With(). Uint("iwant_sample_size", sampleSize). Float64("allowed_cache_misses_threshold", allowedCacheMissesThreshold). @@ -375,11 +461,12 @@ func (c *ControlMsgValidationInspector) inspectIWantMessages(from peer.ID, iWant // - messages: rpc publish messages. // - activeClusterIDS: the list of active cluster ids. // Returns: -// - InvalidRpcPublishMessagesErr: if the amount of invalid messages exceeds the configured RpcMessageErrorThreshold. -func (c *ControlMsgValidationInspector) inspectRpcPublishMessages(from peer.ID, messages []*pubsub_pb.Message, activeClusterIDS flow.ChainIDList) error { +// - InvalidRpcPublishMessagesErr: if the amount of invalid messages exceeds the configured RPCMessageErrorThreshold. +// - int: the number of invalid pubsub messages +func (c *ControlMsgValidationInspector) inspectRpcPublishMessages(from peer.ID, messages []*pubsub_pb.Message, activeClusterIDS flow.ChainIDList) (error, uint64) { totalMessages := len(messages) if totalMessages == 0 { - return nil + return nil, 0 } sampleSize := c.config.RpcMessageMaxSampleSize if sampleSize > totalMessages { @@ -389,7 +476,7 @@ func (c *ControlMsgValidationInspector) inspectRpcPublishMessages(from peer.ID, messages[i], messages[j] = messages[j], messages[i] }) - subscribedTopics := c.topicOracle() + subscribedTopics := c.topicOracle().GetTopics() hasSubscription := func(topic string) bool { for _, subscribedTopic := range subscribedTopics { if topic == subscribedTopic { @@ -398,30 +485,44 @@ func (c *ControlMsgValidationInspector) inspectRpcPublishMessages(from peer.ID, } return false } - var errs *multierror.Error for _, message := range messages[:sampleSize] { + if c.networkingType == network.PrivateNetwork { + err := c.checkPubsubMessageSender(message) + if err != nil { + errs = multierror.Append(errs, err) + continue + } + } topic := channels.Topic(message.GetTopic()) - err := c.validateTopic(from, topic, activeClusterIDS) + // The boolean value returned when validating a topic, indicating whether the topic is cluster-prefixed or not, is intentionally ignored. + // This is because we have already set a threshold for errors allowed on publish messages. Reducing the penalty further based on + // cluster prefix status is unnecessary when the error threshold is exceeded. + err, _ := c.validateTopic(from, topic, activeClusterIDS) if err != nil { + // we can skip checking for subscription of topic that failed validation and continue errs = multierror.Append(errs, err) + continue } if !hasSubscription(topic.String()) { errs = multierror.Append(errs, fmt.Errorf("subscription for topic %s not found", topic)) } + } - // return an error when we exceed the error threshold - if errs != nil && errs.Len() > c.config.RpcMessageErrorThreshold { - return NewInvalidRpcPublishMessagesErr(errs.ErrorOrNil(), errs.Len()) - } + // return an error when we exceed the error threshold + if errs != nil && errs.Len() > c.config.RpcMessageErrorThreshold { + return NewInvalidRpcPublishMessagesErr(errs.ErrorOrNil(), errs.Len()), uint64(errs.Len()) } - return nil + return nil, 0 } // truncateRPC truncates the RPC by truncating each control message type using the configured max sample size values. -func (c *ControlMsgValidationInspector) truncateRPC(from peer.ID, rpc *pubsub.RPC) error { +// Args: +// - from: peer ID of the sender. +// - rpc: the pubsub RPC. +func (c *ControlMsgValidationInspector) truncateRPC(from peer.ID, rpc *pubsub.RPC) { for _, ctlMsgType := range p2pmsg.ControlMessageTypes() { switch ctlMsgType { case p2pmsg.CtrlMsgGraft: @@ -437,16 +538,12 @@ func (c *ControlMsgValidationInspector) truncateRPC(from peer.ID, rpc *pubsub.RP c.logAndThrowError(fmt.Errorf("unknown control message type encountered during RPC truncation")) } } - return nil } // truncateGraftMessages truncates the Graft control messages in the RPC. If the total number of Grafts in the RPC exceeds the configured // GraftPruneMessageMaxSampleSize the list of Grafts will be truncated. // Args: // - rpc: the rpc message to truncate. -// -// Returns: -// - error: if any error encountered while sampling the messages, all errors are considered irrecoverable. func (c *ControlMsgValidationInspector) truncateGraftMessages(rpc *pubsub.RPC) { grafts := rpc.GetControl().GetGraft() totalGrafts := len(grafts) @@ -467,9 +564,6 @@ func (c *ControlMsgValidationInspector) truncateGraftMessages(rpc *pubsub.RPC) { // GraftPruneMessageMaxSampleSize the list of Prunes will be truncated. // Args: // - rpc: the rpc message to truncate. -// -// Returns: -// - error: if any error encountered while sampling the messages, all errors are considered irrecoverable. func (c *ControlMsgValidationInspector) truncatePruneMessages(rpc *pubsub.RPC) { prunes := rpc.GetControl().GetPrune() totalPrunes := len(prunes) @@ -490,9 +584,6 @@ func (c *ControlMsgValidationInspector) truncatePruneMessages(rpc *pubsub.RPC) { // MaxSampleSize the list of iHaves will be truncated. // Args: // - rpc: the rpc message to truncate. -// -// Returns: -// - error: if any error encountered while sampling the messages, all errors are considered irrecoverable. func (c *ControlMsgValidationInspector) truncateIHaveMessages(rpc *pubsub.RPC) { ihaves := rpc.GetControl().GetIhave() totalIHaves := len(ihaves) @@ -537,9 +628,6 @@ func (c *ControlMsgValidationInspector) truncateIHaveMessageIds(rpc *pubsub.RPC) // MaxSampleSize the list of iWants will be truncated. // Args: // - rpc: the rpc message to truncate. -// -// Returns: -// - error: if any error encountered while sampling the messages, all errors are considered irrecoverable. func (c *ControlMsgValidationInspector) truncateIWantMessages(from peer.ID, rpc *pubsub.RPC) { iWants := rpc.GetControl().GetIwant() totalIWants := uint(len(iWants)) @@ -561,9 +649,6 @@ func (c *ControlMsgValidationInspector) truncateIWantMessages(from peer.ID, rpc // MaxMessageIDSampleSize the list of message ids will be truncated. Before message ids are truncated the iWant control messages should have been truncated themselves. // Args: // - rpc: the rpc message to truncate. -// -// Returns: -// - error: if any error encountered while sampling the messages, all errors are considered irrecoverable. func (c *ControlMsgValidationInspector) truncateIWantMessageIds(from peer.ID, rpc *pubsub.RPC) { lastHighest := c.rpcTracker.LastHighestIHaveRPCSize() lg := c.logger.With(). @@ -594,28 +679,6 @@ func (c *ControlMsgValidationInspector) truncateIWantMessageIds(from peer.ID, rp } } -// Name returns the name of the rpc inspector. -func (c *ControlMsgValidationInspector) Name() string { - return rpcInspectorComponentName -} - -// ActiveClustersChanged consumes cluster ID update protocol events. -func (c *ControlMsgValidationInspector) ActiveClustersChanged(clusterIDList flow.ChainIDList) { - c.tracker.StoreActiveClusterIds(clusterIDList) -} - -// SetTopicOracle Sets the topic oracle. The topic oracle is used to determine the list of topics that the node is subscribed to. -// If an oracle is not set, the node will not be able to determine the list of topics that the node is subscribed to. -// This func is expected to be called once and will return an error on all subsequent calls. -// All errors returned from this func are considered irrecoverable. -func (c *ControlMsgValidationInspector) SetTopicOracle(topicOracle func() []string) error { - if c.topicOracle != nil { - return fmt.Errorf("topic oracle already set") - } - c.topicOracle = topicOracle - return nil -} - // performSample performs sampling on the specified control message that will randomize // the items in the control message slice up to index sampleSize-1. Any error encountered during sampling is considered // irrecoverable and will cause the node to crash. @@ -634,23 +697,22 @@ func (c *ControlMsgValidationInspector) performSample(ctrlMsg p2pmsg.ControlMess // // This func returns an exception in case of unexpected bug or state corruption if cluster prefixed topic validation // fails due to unexpected error returned when getting the active cluster IDS. -func (c *ControlMsgValidationInspector) validateTopic(from peer.ID, topic channels.Topic, activeClusterIds flow.ChainIDList) error { +func (c *ControlMsgValidationInspector) validateTopic(from peer.ID, topic channels.Topic, activeClusterIds flow.ChainIDList) (error, p2p.CtrlMsgTopicType) { channel, ok := channels.ChannelFromTopic(topic) if !ok { - return channels.NewInvalidTopicErr(topic, fmt.Errorf("failed to get channel from topic")) + return channels.NewInvalidTopicErr(topic, fmt.Errorf("failed to get channel from topic")), p2p.CtrlMsgNonClusterTopicType } - // handle cluster prefixed topics if channels.IsClusterChannel(channel) { - return c.validateClusterPrefixedTopic(from, topic, activeClusterIds) + return c.validateClusterPrefixedTopic(from, topic, activeClusterIds), p2p.CtrlMsgTopicTypeClusterPrefixed } // non cluster prefixed topic validation err := channels.IsValidNonClusterFlowTopic(topic, c.sporkID) if err != nil { - return err + return err, p2p.CtrlMsgNonClusterTopicType } - return nil + return nil, p2p.CtrlMsgNonClusterTopicType } // validateClusterPrefixedTopic validates cluster prefixed topics. @@ -668,12 +730,12 @@ func (c *ControlMsgValidationInspector) validateClusterPrefixedTopic(from peer.I lg := c.logger.With(). Str("from", p2plogging.PeerId(from)). Logger() - // reject messages from unstaked nodes for cluster prefixed topics + + // only staked nodes are expected to participate on cluster prefixed topics nodeID, err := c.getFlowIdentifier(from) if err != nil { return err } - if len(activeClusterIds) == 0 { // cluster IDs have not been updated yet _, incErr := c.tracker.Inc(nodeID) @@ -683,7 +745,7 @@ func (c *ControlMsgValidationInspector) validateClusterPrefixedTopic(from peer.I } // if the amount of messages received is below our hard threshold log the error and return nil. - if c.checkClusterPrefixHardThreshold(nodeID) { + if ok := c.checkClusterPrefixHardThreshold(nodeID); ok { lg.Warn(). Err(err). Str("topic", topic.String()). @@ -745,29 +807,43 @@ func (c *ControlMsgValidationInspector) checkClusterPrefixHardThreshold(nodeID f } // logAndDistributeErr logs the provided error and attempts to disseminate an invalid control message validation notification for the error. -func (c *ControlMsgValidationInspector) logAndDistributeAsyncInspectErrs(req *InspectRPCRequest, ctlMsgType p2pmsg.ControlMessageType, err error) { +// Args: +// - req: inspect rpc request that failed validation. +// - ctlMsgType: the control message type of the rpc message that caused the error. +// - err: the error that occurred. +// - count: the number of occurrences of the error. +// - isClusterPrefixed: indicates if the errors occurred on a cluster prefixed topic. +func (c *ControlMsgValidationInspector) logAndDistributeAsyncInspectErrs(req *InspectRPCRequest, ctlMsgType p2pmsg.ControlMessageType, err error, count uint64, topicType p2p.CtrlMsgTopicType) { lg := c.logger.With(). + Err(err). + Str("control_message_type", ctlMsgType.String()). Bool(logging.KeySuspicious, true). Bool(logging.KeyNetworkingSecurity, true). + Str("topic_type", topicType.String()). + Uint64("error_count", count). Str("peer_id", p2plogging.PeerId(req.Peer)). Logger() switch { case IsErrActiveClusterIDsNotSet(err): - lg.Warn().Err(err).Msg("active cluster ids not set") + lg.Warn().Msg("active cluster ids not set") case IsErrUnstakedPeer(err): - lg.Warn().Err(err).Msg("control message received from unstaked peer") + lg.Warn().Msg("control message received from unstaked peer") default: - err = c.distributor.Distribute(p2p.NewInvalidControlMessageNotification(req.Peer, ctlMsgType, err)) - if err != nil { + distErr := c.distributor.Distribute(p2p.NewInvalidControlMessageNotification(req.Peer, ctlMsgType, err, count, topicType)) + if distErr != nil { lg.Error(). - Err(err). + Err(distErr). Msg("failed to distribute invalid control message notification") } - lg.Error().Err(err).Msg("rpc control message async inspection failed") + lg.Error().Msg("rpc control message async inspection failed") } } +// logAndThrowError logs and throws irrecoverable errors on the context. +// Args: +// +// err: the error encountered. func (c *ControlMsgValidationInspector) logAndThrowError(err error) { c.logger.Error(). Err(err). diff --git a/network/p2p/inspector/validation/control_message_validation_inspector_test.go b/network/p2p/inspector/validation/control_message_validation_inspector_test.go index 1a68e7d7a10..8ed0a4b1888 100644 --- a/network/p2p/inspector/validation/control_message_validation_inspector_test.go +++ b/network/p2p/inspector/validation/control_message_validation_inspector_test.go @@ -1,9 +1,11 @@ -package validation +package validation_test import ( "context" "fmt" + "math/rand" "testing" + "time" pubsub_pb "github.com/libp2p/go-libp2p-pubsub/pb" "github.com/libp2p/go-libp2p/core/peer" @@ -15,117 +17,257 @@ import ( "github.com/onflow/flow-go/module/irrecoverable" "github.com/onflow/flow-go/module/metrics" mockmodule "github.com/onflow/flow-go/module/mock" + "github.com/onflow/flow-go/network" "github.com/onflow/flow-go/network/channels" "github.com/onflow/flow-go/network/p2p" + "github.com/onflow/flow-go/network/p2p/inspector/internal" + "github.com/onflow/flow-go/network/p2p/inspector/validation" p2pmsg "github.com/onflow/flow-go/network/p2p/message" mockp2p "github.com/onflow/flow-go/network/p2p/mock" + p2ptest "github.com/onflow/flow-go/network/p2p/test" "github.com/onflow/flow-go/utils/unittest" ) +func TestNewControlMsgValidationInspector(t *testing.T) { + t.Run("should create validation inspector without error", func(t *testing.T) { + sporkID := unittest.IdentifierFixture() + flowConfig, err := config.DefaultConfig() + require.NoError(t, err, "failed to get default flow config") + distributor := mockp2p.NewGossipSubInspectorNotifDistributor(t) + idProvider := mockmodule.NewIdentityProvider(t) + topicProvider := internal.NewMockUpdatableTopicProvider() + inspector, err := validation.NewControlMsgValidationInspector(&validation.InspectorParams{ + Logger: unittest.Logger(), + SporkID: sporkID, + Config: &flowConfig.NetworkConfig.GossipSubRPCValidationInspectorConfigs, + Distributor: distributor, + IdProvider: idProvider, + HeroCacheMetricsFactory: metrics.NewNoopHeroCacheMetricsFactory(), + InspectorMetrics: metrics.NewNoopCollector(), + RpcTracker: mockp2p.NewRpcControlTracking(t), + NetworkingType: network.PublicNetwork, + TopicOracle: func() p2p.TopicProvider { + return topicProvider + }, + }) + require.NoError(t, err) + require.NotNil(t, inspector) + }) + t.Run("should return error if any of the params are nil", func(t *testing.T) { + inspector, err := validation.NewControlMsgValidationInspector(&validation.InspectorParams{ + Logger: unittest.Logger(), + SporkID: unittest.IdentifierFixture(), + Config: nil, + Distributor: nil, + IdProvider: nil, + HeroCacheMetricsFactory: nil, + InspectorMetrics: nil, + RpcTracker: nil, + TopicOracle: nil, + }) + require.Nil(t, inspector) + require.Error(t, err) + s := err.Error() + require.Contains(t, s, "validation for 'Config' failed on the 'required'") + require.Contains(t, s, "validation for 'Distributor' failed on the 'required'") + require.Contains(t, s, "validation for 'IdProvider' failed on the 'required'") + require.Contains(t, s, "validation for 'HeroCacheMetricsFactory' failed on the 'required'") + require.Contains(t, s, "validation for 'InspectorMetrics' failed on the 'required'") + require.Contains(t, s, "validation for 'RpcTracker' failed on the 'required'") + require.Contains(t, s, "validation for 'NetworkingType' failed on the 'required'") + require.Contains(t, s, "validation for 'TopicOracle' failed on the 'required'") + }) +} + // TestControlMessageValidationInspector_TruncateRPC verifies the expected truncation behavior of RPC control messages. // Message truncation for each control message type occurs when the count of control // messages exceeds the configured maximum sample size for that control message type. func TestControlMessageValidationInspector_truncateRPC(t *testing.T) { t.Run("truncateGraftMessages should truncate graft messages as expected", func(t *testing.T) { - inspector, _, _, _, _ := inspectorFixture(t) - inspector.config.GraftPruneMessageMaxSampleSize = 100 + graftPruneMessageMaxSampleSize := 1000 + inspector, signalerCtx, cancel, distributor, rpcTracker, _, _, _ := inspectorFixture(t, func(params *validation.InspectorParams) { + params.Config.GraftPruneMessageMaxSampleSize = graftPruneMessageMaxSampleSize + }) + // topic validation is ignored set any topic oracle + distributor.On("Distribute", mock.AnythingOfType("*p2p.InvCtrlMsgNotif")).Return(nil).Maybe() + rpcTracker.On("LastHighestIHaveRPCSize").Return(int64(100)).Maybe() + rpcTracker.On("WasIHaveRPCSent", mock.AnythingOfType("string")).Return(true).Maybe() + inspector.Start(signalerCtx) // topic validation not performed so we can use random strings - graftsGreaterThanMaxSampleSize := unittest.P2PRPCFixture(unittest.WithGrafts(unittest.P2PRPCGraftFixtures(unittest.IdentifierListFixture(200).Strings()...)...)) - require.Greater(t, len(graftsGreaterThanMaxSampleSize.GetControl().GetGraft()), inspector.config.GraftPruneMessageMaxSampleSize) + graftsGreaterThanMaxSampleSize := unittest.P2PRPCFixture(unittest.WithGrafts(unittest.P2PRPCGraftFixtures(unittest.IdentifierListFixture(2000).Strings()...)...)) + require.Greater(t, len(graftsGreaterThanMaxSampleSize.GetControl().GetGraft()), graftPruneMessageMaxSampleSize) graftsLessThanMaxSampleSize := unittest.P2PRPCFixture(unittest.WithGrafts(unittest.P2PRPCGraftFixtures(unittest.IdentifierListFixture(50).Strings()...)...)) - require.Less(t, len(graftsLessThanMaxSampleSize.GetControl().GetGraft()), inspector.config.GraftPruneMessageMaxSampleSize) - inspector.truncateGraftMessages(graftsGreaterThanMaxSampleSize) - inspector.truncateGraftMessages(graftsLessThanMaxSampleSize) - // rpc with grafts greater than configured max sample size should be truncated to GraftPruneMessageMaxSampleSize - require.Len(t, graftsGreaterThanMaxSampleSize.GetControl().GetGraft(), inspector.config.GraftPruneMessageMaxSampleSize) - // rpc with grafts less than GraftPruneMessageMaxSampleSize should not be truncated - require.Len(t, graftsLessThanMaxSampleSize.GetControl().GetGraft(), 50) + require.Less(t, len(graftsLessThanMaxSampleSize.GetControl().GetGraft()), graftPruneMessageMaxSampleSize) + + from := unittest.PeerIdFixture(t) + require.NoError(t, inspector.Inspect(from, graftsGreaterThanMaxSampleSize)) + require.NoError(t, inspector.Inspect(from, graftsLessThanMaxSampleSize)) + require.Eventually(t, func() bool { + // rpc with grafts greater than configured max sample size should be truncated to GraftPruneMessageMaxSampleSize + shouldBeTruncated := len(graftsGreaterThanMaxSampleSize.GetControl().GetGraft()) == graftPruneMessageMaxSampleSize + // rpc with grafts less than GraftPruneMessageMaxSampleSize should not be truncated + shouldNotBeTruncated := len(graftsLessThanMaxSampleSize.GetControl().GetGraft()) == 50 + return shouldBeTruncated && shouldNotBeTruncated + }, time.Second, 500*time.Millisecond) + stopInspector(t, cancel, inspector) }) t.Run("truncatePruneMessages should truncate prune messages as expected", func(t *testing.T) { - inspector, _, _, _, _ := inspectorFixture(t) - inspector.config.GraftPruneMessageMaxSampleSize = 100 - // topic validation not performed so we can use random strings - prunesGreaterThanMaxSampleSize := unittest.P2PRPCFixture(unittest.WithPrunes(unittest.P2PRPCPruneFixtures(unittest.IdentifierListFixture(200).Strings()...)...)) - require.Greater(t, len(prunesGreaterThanMaxSampleSize.GetControl().GetPrune()), inspector.config.GraftPruneMessageMaxSampleSize) + graftPruneMessageMaxSampleSize := 1000 + inspector, signalerCtx, cancel, distributor, rpcTracker, _, _, _ := inspectorFixture(t, func(params *validation.InspectorParams) { + params.Config.GraftPruneMessageMaxSampleSize = graftPruneMessageMaxSampleSize + }) + // topic validation is ignored set any topic oracle + rpcTracker.On("LastHighestIHaveRPCSize").Return(int64(100)).Maybe() + rpcTracker.On("WasIHaveRPCSent", mock.AnythingOfType("string")).Return(true).Maybe() + distributor.On("Distribute", mock.AnythingOfType("*p2p.InvCtrlMsgNotif")).Return(nil).Twice() + + inspector.Start(signalerCtx) + // unittest.RequireCloseBefore(t, inspector.Ready(), 100*time.Millisecond, "inspector did not start") + // topic validation not performed, so we can use random strings + prunesGreaterThanMaxSampleSize := unittest.P2PRPCFixture(unittest.WithPrunes(unittest.P2PRPCPruneFixtures(unittest.IdentifierListFixture(2000).Strings()...)...)) + require.Greater(t, len(prunesGreaterThanMaxSampleSize.GetControl().GetPrune()), graftPruneMessageMaxSampleSize) prunesLessThanMaxSampleSize := unittest.P2PRPCFixture(unittest.WithPrunes(unittest.P2PRPCPruneFixtures(unittest.IdentifierListFixture(50).Strings()...)...)) - require.Less(t, len(prunesLessThanMaxSampleSize.GetControl().GetPrune()), inspector.config.GraftPruneMessageMaxSampleSize) - inspector.truncatePruneMessages(prunesGreaterThanMaxSampleSize) - inspector.truncatePruneMessages(prunesLessThanMaxSampleSize) - // rpc with prunes greater than configured max sample size should be truncated to GraftPruneMessageMaxSampleSize - require.Len(t, prunesGreaterThanMaxSampleSize.GetControl().GetPrune(), inspector.config.GraftPruneMessageMaxSampleSize) - // rpc with prunes less than GraftPruneMessageMaxSampleSize should not be truncated - require.Len(t, prunesLessThanMaxSampleSize.GetControl().GetPrune(), 50) + require.Less(t, len(prunesLessThanMaxSampleSize.GetControl().GetPrune()), graftPruneMessageMaxSampleSize) + from := unittest.PeerIdFixture(t) + require.NoError(t, inspector.Inspect(from, prunesGreaterThanMaxSampleSize)) + require.NoError(t, inspector.Inspect(from, prunesLessThanMaxSampleSize)) + require.Eventually(t, func() bool { + // rpc with prunes greater than configured max sample size should be truncated to GraftPruneMessageMaxSampleSize + shouldBeTruncated := len(prunesGreaterThanMaxSampleSize.GetControl().GetPrune()) == graftPruneMessageMaxSampleSize + // rpc with prunes less than GraftPruneMessageMaxSampleSize should not be truncated + shouldNotBeTruncated := len(prunesLessThanMaxSampleSize.GetControl().GetPrune()) == 50 + return shouldBeTruncated && shouldNotBeTruncated + }, time.Second, 500*time.Millisecond) + stopInspector(t, cancel, inspector) }) t.Run("truncateIHaveMessages should truncate iHave messages as expected", func(t *testing.T) { - inspector, _, _, _, _ := inspectorFixture(t) - inspector.config.IHaveRPCInspectionConfig.MaxSampleSize = 100 + maxSampleSize := 1000 + inspector, signalerCtx, cancel, distributor, rpcTracker, _, _, _ := inspectorFixture(t, func(params *validation.InspectorParams) { + params.Config.IHaveRPCInspectionConfig.MaxSampleSize = maxSampleSize + }) + // topic validation is ignored set any topic oracle + rpcTracker.On("LastHighestIHaveRPCSize").Return(int64(100)).Maybe() + rpcTracker.On("WasIHaveRPCSent", mock.AnythingOfType("string")).Return(true).Maybe() + distributor.On("Distribute", mock.AnythingOfType("*p2p.InvCtrlMsgNotif")).Return(nil).Twice() + inspector.Start(signalerCtx) + // topic validation not performed so we can use random strings - iHavesGreaterThanMaxSampleSize := unittest.P2PRPCFixture(unittest.WithIHaves(unittest.P2PRPCIHaveFixtures(200, unittest.IdentifierListFixture(200).Strings()...)...)) - require.Greater(t, len(iHavesGreaterThanMaxSampleSize.GetControl().GetIhave()), inspector.config.IHaveRPCInspectionConfig.MaxSampleSize) + iHavesGreaterThanMaxSampleSize := unittest.P2PRPCFixture(unittest.WithIHaves(unittest.P2PRPCIHaveFixtures(2000, unittest.IdentifierListFixture(2000).Strings()...)...)) + require.Greater(t, len(iHavesGreaterThanMaxSampleSize.GetControl().GetIhave()), maxSampleSize) iHavesLessThanMaxSampleSize := unittest.P2PRPCFixture(unittest.WithIHaves(unittest.P2PRPCIHaveFixtures(200, unittest.IdentifierListFixture(50).Strings()...)...)) - require.Less(t, len(iHavesLessThanMaxSampleSize.GetControl().GetIhave()), inspector.config.IHaveRPCInspectionConfig.MaxSampleSize) - inspector.truncateIHaveMessages(iHavesGreaterThanMaxSampleSize) - inspector.truncateIHaveMessages(iHavesLessThanMaxSampleSize) - // rpc with iHaves greater than configured max sample size should be truncated to MaxSampleSize - require.Len(t, iHavesGreaterThanMaxSampleSize.GetControl().GetIhave(), inspector.config.IHaveRPCInspectionConfig.MaxSampleSize) - // rpc with iHaves less than MaxSampleSize should not be truncated - require.Len(t, iHavesLessThanMaxSampleSize.GetControl().GetIhave(), 50) + require.Less(t, len(iHavesLessThanMaxSampleSize.GetControl().GetIhave()), maxSampleSize) + + from := unittest.PeerIdFixture(t) + require.NoError(t, inspector.Inspect(from, iHavesGreaterThanMaxSampleSize)) + require.NoError(t, inspector.Inspect(from, iHavesLessThanMaxSampleSize)) + require.Eventually(t, func() bool { + // rpc with iHaves greater than configured max sample size should be truncated to MaxSampleSize + shouldBeTruncated := len(iHavesGreaterThanMaxSampleSize.GetControl().GetIhave()) == maxSampleSize + // rpc with iHaves less than MaxSampleSize should not be truncated + shouldNotBeTruncated := len(iHavesLessThanMaxSampleSize.GetControl().GetIhave()) == 50 + return shouldBeTruncated && shouldNotBeTruncated + }, time.Second, 500*time.Millisecond) + stopInspector(t, cancel, inspector) }) t.Run("truncateIHaveMessageIds should truncate iHave message ids as expected", func(t *testing.T) { - inspector, _, _, _, _ := inspectorFixture(t) - inspector.config.IHaveRPCInspectionConfig.MaxMessageIDSampleSize = 100 + maxMessageIDSampleSize := 1000 + inspector, signalerCtx, cancel, distributor, rpcTracker, _, _, _ := inspectorFixture(t, func(params *validation.InspectorParams) { + params.Config.IHaveRPCInspectionConfig.MaxMessageIDSampleSize = maxMessageIDSampleSize + }) + // topic validation is ignored set any topic oracle + rpcTracker.On("LastHighestIHaveRPCSize").Return(int64(100)).Maybe() + rpcTracker.On("WasIHaveRPCSent", mock.AnythingOfType("string")).Return(true).Maybe() + distributor.On("Distribute", mock.AnythingOfType("*p2p.InvCtrlMsgNotif")).Return(nil).Twice() + inspector.Start(signalerCtx) + // topic validation not performed so we can use random strings - iHavesGreaterThanMaxSampleSize := unittest.P2PRPCFixture(unittest.WithIHaves(unittest.P2PRPCIHaveFixtures(200, unittest.IdentifierListFixture(10).Strings()...)...)) + iHavesGreaterThanMaxSampleSize := unittest.P2PRPCFixture(unittest.WithIHaves(unittest.P2PRPCIHaveFixtures(2000, unittest.IdentifierListFixture(10).Strings()...)...)) iHavesLessThanMaxSampleSize := unittest.P2PRPCFixture(unittest.WithIHaves(unittest.P2PRPCIHaveFixtures(50, unittest.IdentifierListFixture(10).Strings()...)...)) - inspector.truncateIHaveMessageIds(iHavesGreaterThanMaxSampleSize) - inspector.truncateIHaveMessageIds(iHavesLessThanMaxSampleSize) - for _, iHave := range iHavesGreaterThanMaxSampleSize.GetControl().GetIhave() { - // rpc with iHaves message ids greater than configured max sample size should be truncated to MaxSampleSize - require.Len(t, iHave.GetMessageIDs(), inspector.config.IHaveRPCInspectionConfig.MaxMessageIDSampleSize) - } - for _, iHave := range iHavesLessThanMaxSampleSize.GetControl().GetIhave() { - // rpc with iHaves message ids less than MaxSampleSize should not be truncated - require.Len(t, iHave.GetMessageIDs(), 50) - } + + from := unittest.PeerIdFixture(t) + require.NoError(t, inspector.Inspect(from, iHavesGreaterThanMaxSampleSize)) + require.NoError(t, inspector.Inspect(from, iHavesLessThanMaxSampleSize)) + require.Eventually(t, func() bool { + for _, iHave := range iHavesGreaterThanMaxSampleSize.GetControl().GetIhave() { + // rpc with iHaves message ids greater than configured max sample size should be truncated to MaxSampleSize + if len(iHave.GetMessageIDs()) != maxMessageIDSampleSize { + return false + } + } + for _, iHave := range iHavesLessThanMaxSampleSize.GetControl().GetIhave() { + // rpc with iHaves message ids less than MaxSampleSize should not be truncated + if len(iHave.GetMessageIDs()) != 50 { + return false + } + } + return true + }, time.Second, 500*time.Millisecond) + stopInspector(t, cancel, inspector) }) t.Run("truncateIWantMessages should truncate iWant messages as expected", func(t *testing.T) { - inspector, _, rpcTracker, _, _ := inspectorFixture(t) - inspector.config.IWantRPCInspectionConfig.MaxSampleSize = 100 + maxSampleSize := uint(100) + inspector, signalerCtx, cancel, distributor, rpcTracker, _, _, _ := inspectorFixture(t, func(params *validation.InspectorParams) { + params.Config.IWantRPCInspectionConfig.MaxSampleSize = maxSampleSize + }) + // topic validation is ignored set any topic oracle + rpcTracker.On("LastHighestIHaveRPCSize").Return(int64(100)).Maybe() + rpcTracker.On("WasIHaveRPCSent", mock.AnythingOfType("string")).Return(true).Maybe() + distributor.On("Distribute", mock.AnythingOfType("*p2p.InvCtrlMsgNotif")).Return(nil).Maybe() + inspector.Start(signalerCtx) iWantsGreaterThanMaxSampleSize := unittest.P2PRPCFixture(unittest.WithIWants(unittest.P2PRPCIWantFixtures(200, 200)...)) - require.Greater(t, uint(len(iWantsGreaterThanMaxSampleSize.GetControl().GetIwant())), inspector.config.IWantRPCInspectionConfig.MaxSampleSize) + require.Greater(t, uint(len(iWantsGreaterThanMaxSampleSize.GetControl().GetIwant())), maxSampleSize) iWantsLessThanMaxSampleSize := unittest.P2PRPCFixture(unittest.WithIWants(unittest.P2PRPCIWantFixtures(50, 200)...)) - require.Less(t, uint(len(iWantsLessThanMaxSampleSize.GetControl().GetIwant())), inspector.config.IWantRPCInspectionConfig.MaxSampleSize) - peerID := peer.ID("peer") - rpcTracker.On("LastHighestIHaveRPCSize").Return(int64(100)).Twice() - inspector.truncateIWantMessages(peerID, iWantsGreaterThanMaxSampleSize) - inspector.truncateIWantMessages(peerID, iWantsLessThanMaxSampleSize) - // rpc with iWants greater than configured max sample size should be truncated to MaxSampleSize - require.Len(t, iWantsGreaterThanMaxSampleSize.GetControl().GetIwant(), int(inspector.config.IWantRPCInspectionConfig.MaxSampleSize)) - // rpc with iWants less than MaxSampleSize should not be truncated - require.Len(t, iWantsLessThanMaxSampleSize.GetControl().GetIwant(), 50) + require.Less(t, uint(len(iWantsLessThanMaxSampleSize.GetControl().GetIwant())), maxSampleSize) + + from := unittest.PeerIdFixture(t) + require.NoError(t, inspector.Inspect(from, iWantsGreaterThanMaxSampleSize)) + require.NoError(t, inspector.Inspect(from, iWantsLessThanMaxSampleSize)) + require.Eventually(t, func() bool { + // rpc with iWants greater than configured max sample size should be truncated to MaxSampleSize + shouldBeTruncated := len(iWantsGreaterThanMaxSampleSize.GetControl().GetIwant()) == int(maxSampleSize) + // rpc with iWants less than MaxSampleSize should not be truncated + shouldNotBeTruncated := len(iWantsLessThanMaxSampleSize.GetControl().GetIwant()) == 50 + return shouldBeTruncated && shouldNotBeTruncated + }, time.Second, 500*time.Millisecond) + stopInspector(t, cancel, inspector) }) t.Run("truncateIWantMessageIds should truncate iWant message ids as expected", func(t *testing.T) { - inspector, _, rpcTracker, _, _ := inspectorFixture(t) - inspector.config.IWantRPCInspectionConfig.MaxMessageIDSampleSize = 100 - iWantsGreaterThanMaxSampleSize := unittest.P2PRPCFixture(unittest.WithIWants(unittest.P2PRPCIWantFixtures(10, 200)...)) + maxMessageIDSampleSize := 1000 + inspector, signalerCtx, cancel, distributor, rpcTracker, _, _, _ := inspectorFixture(t, func(params *validation.InspectorParams) { + params.Config.IWantRPCInspectionConfig.MaxMessageIDSampleSize = maxMessageIDSampleSize + }) + // topic validation is ignored set any topic oracle + rpcTracker.On("LastHighestIHaveRPCSize").Return(int64(100)).Maybe() + rpcTracker.On("WasIHaveRPCSent", mock.AnythingOfType("string")).Return(true).Maybe() + distributor.On("Distribute", mock.AnythingOfType("*p2p.InvCtrlMsgNotif")).Return(nil).Maybe() + inspector.Start(signalerCtx) + iWantsGreaterThanMaxSampleSize := unittest.P2PRPCFixture(unittest.WithIWants(unittest.P2PRPCIWantFixtures(10, 2000)...)) iWantsLessThanMaxSampleSize := unittest.P2PRPCFixture(unittest.WithIWants(unittest.P2PRPCIWantFixtures(10, 50)...)) - peerID := peer.ID("peer") - rpcTracker.On("LastHighestIHaveRPCSize").Return(int64(100)).Twice() - inspector.truncateIWantMessages(peerID, iWantsGreaterThanMaxSampleSize) - inspector.truncateIWantMessages(peerID, iWantsLessThanMaxSampleSize) - for _, iWant := range iWantsGreaterThanMaxSampleSize.GetControl().GetIwant() { - // rpc with iWants message ids greater than configured max sample size should be truncated to MaxSampleSize - require.Len(t, iWant.GetMessageIDs(), inspector.config.IWantRPCInspectionConfig.MaxMessageIDSampleSize) - } - for _, iWant := range iWantsLessThanMaxSampleSize.GetControl().GetIwant() { - // rpc with iWants less than MaxSampleSize should not be truncated - require.Len(t, iWant.GetMessageIDs(), 50) - } + + from := unittest.PeerIdFixture(t) + require.NoError(t, inspector.Inspect(from, iWantsGreaterThanMaxSampleSize)) + require.NoError(t, inspector.Inspect(from, iWantsLessThanMaxSampleSize)) + require.Eventually(t, func() bool { + for _, iWant := range iWantsGreaterThanMaxSampleSize.GetControl().GetIwant() { + // rpc with iWants message ids greater than configured max sample size should be truncated to MaxSampleSize + if len(iWant.GetMessageIDs()) != maxMessageIDSampleSize { + return false + } + } + for _, iWant := range iWantsLessThanMaxSampleSize.GetControl().GetIwant() { + // rpc with iWants less than MaxSampleSize should not be truncated + if len(iWant.GetMessageIDs()) != 50 { + return false + } + } + return true + }, time.Second, 500*time.Millisecond) + stopInspector(t, cancel, inspector) }) } @@ -134,176 +276,197 @@ func TestControlMessageValidationInspector_truncateRPC(t *testing.T) { // while all types of invalid control messages trigger expected notifications. func TestControlMessageValidationInspector_processInspectRPCReq(t *testing.T) { t.Run("processInspectRPCReq should not disseminate any invalid notification errors for valid RPC's", func(t *testing.T) { - inspector, distributor, rpcTracker, _, sporkID := inspectorFixture(t) + inspector, signalerCtx, cancel, distributor, rpcTracker, sporkID, _, topicProviderOracle := inspectorFixture(t) defer distributor.AssertNotCalled(t, "Distribute") + topics := []string{ fmt.Sprintf("%s/%s", channels.TestNetworkChannel, sporkID), fmt.Sprintf("%s/%s", channels.PushBlocks, sporkID), fmt.Sprintf("%s/%s", channels.SyncCommittee, sporkID), fmt.Sprintf("%s/%s", channels.RequestChunks, sporkID), } - // set topic oracle to return list of topics excluding first topic sent - require.NoError(t, inspector.SetTopicOracle(func() []string { - return topics - })) + // avoid unknown topics errors + topicProviderOracle.UpdateTopics(topics) + inspector.Start(signalerCtx) grafts := unittest.P2PRPCGraftFixtures(topics...) prunes := unittest.P2PRPCPruneFixtures(topics...) ihaves := unittest.P2PRPCIHaveFixtures(50, topics...) iwants := unittest.P2PRPCIWantFixtures(2, 5) - pubsubMsgs := unittest.GossipSubMessageFixtures(t, 10, topics[0]) + pubsubMsgs := unittest.GossipSubMessageFixtures(10, topics[0]) // avoid cache misses for iwant messages. iwants[0].MessageIDs = ihaves[0].MessageIDs[:10] iwants[1].MessageIDs = ihaves[1].MessageIDs[11:20] expectedMsgIds := make([]string, 0) - expectedMsgIds = append(expectedMsgIds, ihaves[0].MessageIDs[:10]...) - expectedMsgIds = append(expectedMsgIds, ihaves[1].MessageIDs[11:20]...) - expectedPeerID := unittest.PeerIdFixture(t) - req, err := NewInspectRPCRequest(expectedPeerID, unittest.P2PRPCFixture( + expectedMsgIds = append(expectedMsgIds, ihaves[0].MessageIDs...) + expectedMsgIds = append(expectedMsgIds, ihaves[1].MessageIDs...) + rpc := unittest.P2PRPCFixture( unittest.WithGrafts(grafts...), unittest.WithPrunes(prunes...), unittest.WithIHaves(ihaves...), unittest.WithIWants(iwants...), - unittest.WithPubsubMessages(pubsubMsgs...)), - ) - require.NoError(t, err, "failed to get inspect message request") + unittest.WithPubsubMessages(pubsubMsgs...)) rpcTracker.On("LastHighestIHaveRPCSize").Return(int64(100)).Maybe() rpcTracker.On("WasIHaveRPCSent", mock.AnythingOfType("string")).Return(true).Run(func(args mock.Arguments) { id, ok := args[0].(string) require.True(t, ok) require.Contains(t, expectedMsgIds, id) }) - require.NoError(t, inspector.processInspectRPCReq(req)) + + from := unittest.PeerIdFixture(t) + require.NoError(t, inspector.Inspect(from, rpc)) + // sleep for 1 second to ensure rpc is processed + time.Sleep(time.Second) + stopInspector(t, cancel, inspector) }) t.Run("processInspectRPCReq should disseminate invalid control message notification for control messages with duplicate topics", func(t *testing.T) { - inspector, distributor, _, _, sporkID := inspectorFixture(t) - defer distributor.AssertNotCalled(t, "Distribute") + inspector, signalerCtx, cancel, distributor, _, sporkID, _, topicProviderOracle := inspectorFixture(t) duplicateTopic := fmt.Sprintf("%s/%s", channels.TestNetworkChannel, sporkID) + // avoid unknown topics errors + topicProviderOracle.UpdateTopics([]string{duplicateTopic}) // create control messages with duplicate topic grafts := []*pubsub_pb.ControlGraft{unittest.P2PRPCGraftFixture(&duplicateTopic), unittest.P2PRPCGraftFixture(&duplicateTopic)} prunes := []*pubsub_pb.ControlPrune{unittest.P2PRPCPruneFixture(&duplicateTopic), unittest.P2PRPCPruneFixture(&duplicateTopic)} - ihaves := []*pubsub_pb.ControlIHave{unittest.P2PRPCIHaveFixture(&duplicateTopic, unittest.IdentifierListFixture(20).Strings()...), unittest.P2PRPCIHaveFixture(&duplicateTopic, unittest.IdentifierListFixture(20).Strings()...)} - expectedPeerID := unittest.PeerIdFixture(t) - duplicateTopicGraftsReq, err := NewInspectRPCRequest(expectedPeerID, unittest.P2PRPCFixture(unittest.WithGrafts(grafts...))) - require.NoError(t, err, "failed to get inspect message request") - duplicateTopicPrunesReq, err := NewInspectRPCRequest(expectedPeerID, unittest.P2PRPCFixture(unittest.WithPrunes(prunes...))) - require.NoError(t, err, "failed to get inspect message request") - duplicateTopicIHavesReq, err := NewInspectRPCRequest(expectedPeerID, unittest.P2PRPCFixture(unittest.WithIHaves(ihaves...))) - require.NoError(t, err, "failed to get inspect message request") + ihaves := []*pubsub_pb.ControlIHave{unittest.P2PRPCIHaveFixture(&duplicateTopic, unittest.IdentifierListFixture(20).Strings()...), + unittest.P2PRPCIHaveFixture(&duplicateTopic, unittest.IdentifierListFixture(20).Strings()...)} + from := unittest.PeerIdFixture(t) + duplicateTopicGraftsRpc := unittest.P2PRPCFixture(unittest.WithGrafts(grafts...)) + duplicateTopicPrunesRpc := unittest.P2PRPCFixture(unittest.WithPrunes(prunes...)) + duplicateTopicIHavesRpc := unittest.P2PRPCFixture(unittest.WithIHaves(ihaves...)) distributor.On("Distribute", mock.AnythingOfType("*p2p.InvCtrlMsgNotif")).Return(nil).Times(3).Run(func(args mock.Arguments) { notification, ok := args[0].(*p2p.InvCtrlMsgNotif) require.True(t, ok) - require.Equal(t, expectedPeerID, notification.PeerID) + require.Equal(t, notification.TopicType, p2p.CtrlMsgNonClusterTopicType, "expected p2p.CtrlMsgNonClusterTopicType notification type, no RPC with cluster prefixed topic sent in this test") + require.Equal(t, from, notification.PeerID) require.Contains(t, []p2pmsg.ControlMessageType{p2pmsg.CtrlMsgGraft, p2pmsg.CtrlMsgPrune, p2pmsg.CtrlMsgIHave}, notification.MsgType) - require.True(t, IsDuplicateTopicErr(notification.Error)) + require.True(t, validation.IsDuplicateTopicErr(notification.Error)) }) - require.NoError(t, inspector.processInspectRPCReq(duplicateTopicGraftsReq)) - require.NoError(t, inspector.processInspectRPCReq(duplicateTopicPrunesReq)) - require.NoError(t, inspector.processInspectRPCReq(duplicateTopicIHavesReq)) + inspector.Start(signalerCtx) + + require.NoError(t, inspector.Inspect(from, duplicateTopicGraftsRpc)) + require.NoError(t, inspector.Inspect(from, duplicateTopicPrunesRpc)) + require.NoError(t, inspector.Inspect(from, duplicateTopicIHavesRpc)) + // sleep for 1 second to ensure rpc's is processed + time.Sleep(time.Second) + stopInspector(t, cancel, inspector) }) t.Run("inspectGraftMessages should disseminate invalid control message notification for invalid graft messages as expected", func(t *testing.T) { - inspector, distributor, _, _, sporkID := inspectorFixture(t) + inspector, signalerCtx, cancel, distributor, _, sporkID, _, topicProviderOracle := inspectorFixture(t) // create unknown topic unknownTopic, malformedTopic, invalidSporkIDTopic := invalidTopics(t, sporkID) + // avoid unknown topics errors + topicProviderOracle.UpdateTopics([]string{unknownTopic, malformedTopic, invalidSporkIDTopic}) unknownTopicGraft := unittest.P2PRPCGraftFixture(&unknownTopic) malformedTopicGraft := unittest.P2PRPCGraftFixture(&malformedTopic) invalidSporkIDTopicGraft := unittest.P2PRPCGraftFixture(&invalidSporkIDTopic) - expectedPeerID := unittest.PeerIdFixture(t) - unknownTopicReq, err := NewInspectRPCRequest(expectedPeerID, unittest.P2PRPCFixture(unittest.WithGrafts(unknownTopicGraft))) - require.NoError(t, err, "failed to get inspect message request") - malformedTopicReq, err := NewInspectRPCRequest(expectedPeerID, unittest.P2PRPCFixture(unittest.WithGrafts(malformedTopicGraft))) - require.NoError(t, err, "failed to get inspect message request") - invalidSporkIDTopicReq, err := NewInspectRPCRequest(expectedPeerID, unittest.P2PRPCFixture(unittest.WithGrafts(invalidSporkIDTopicGraft))) - require.NoError(t, err, "failed to get inspect message request") + unknownTopicReq := unittest.P2PRPCFixture(unittest.WithGrafts(unknownTopicGraft)) + malformedTopicReq := unittest.P2PRPCFixture(unittest.WithGrafts(malformedTopicGraft)) + invalidSporkIDTopicReq := unittest.P2PRPCFixture(unittest.WithGrafts(invalidSporkIDTopicGraft)) - checkNotification := checkNotificationFunc(t, expectedPeerID, p2pmsg.CtrlMsgGraft, channels.IsInvalidTopicErr) + from := unittest.PeerIdFixture(t) + checkNotification := checkNotificationFunc(t, from, p2pmsg.CtrlMsgGraft, channels.IsInvalidTopicErr, p2p.CtrlMsgNonClusterTopicType) distributor.On("Distribute", mock.AnythingOfType("*p2p.InvCtrlMsgNotif")).Return(nil).Times(3).Run(checkNotification) - require.NoError(t, inspector.processInspectRPCReq(unknownTopicReq)) - require.NoError(t, inspector.processInspectRPCReq(malformedTopicReq)) - require.NoError(t, inspector.processInspectRPCReq(invalidSporkIDTopicReq)) + inspector.Start(signalerCtx) + + require.NoError(t, inspector.Inspect(from, unknownTopicReq)) + require.NoError(t, inspector.Inspect(from, malformedTopicReq)) + require.NoError(t, inspector.Inspect(from, invalidSporkIDTopicReq)) + // sleep for 1 second to ensure rpc's is processed + time.Sleep(time.Second) + stopInspector(t, cancel, inspector) }) t.Run("inspectPruneMessages should disseminate invalid control message notification for invalid prune messages as expected", func(t *testing.T) { - inspector, distributor, _, _, sporkID := inspectorFixture(t) + inspector, signalerCtx, cancel, distributor, _, sporkID, _, topicProviderOracle := inspectorFixture(t) // create unknown topic unknownTopic, malformedTopic, invalidSporkIDTopic := invalidTopics(t, sporkID) unknownTopicPrune := unittest.P2PRPCPruneFixture(&unknownTopic) malformedTopicPrune := unittest.P2PRPCPruneFixture(&malformedTopic) invalidSporkIDTopicPrune := unittest.P2PRPCPruneFixture(&invalidSporkIDTopic) - - expectedPeerID := unittest.PeerIdFixture(t) - unknownTopicReq, err := NewInspectRPCRequest(expectedPeerID, unittest.P2PRPCFixture(unittest.WithPrunes(unknownTopicPrune))) - require.NoError(t, err, "failed to get inspect message request") - malformedTopicReq, err := NewInspectRPCRequest(expectedPeerID, unittest.P2PRPCFixture(unittest.WithPrunes(malformedTopicPrune))) - require.NoError(t, err, "failed to get inspect message request") - invalidSporkIDTopicReq, err := NewInspectRPCRequest(expectedPeerID, unittest.P2PRPCFixture(unittest.WithPrunes(invalidSporkIDTopicPrune))) - require.NoError(t, err, "failed to get inspect message request") - - checkNotification := checkNotificationFunc(t, expectedPeerID, p2pmsg.CtrlMsgPrune, channels.IsInvalidTopicErr) + // avoid unknown topics errors + topicProviderOracle.UpdateTopics([]string{unknownTopic, malformedTopic, invalidSporkIDTopic}) + unknownTopicRpc := unittest.P2PRPCFixture(unittest.WithPrunes(unknownTopicPrune)) + malformedTopicRpc := unittest.P2PRPCFixture(unittest.WithPrunes(malformedTopicPrune)) + invalidSporkIDTopicRpc := unittest.P2PRPCFixture(unittest.WithPrunes(invalidSporkIDTopicPrune)) + + from := unittest.PeerIdFixture(t) + checkNotification := checkNotificationFunc(t, from, p2pmsg.CtrlMsgPrune, channels.IsInvalidTopicErr, p2p.CtrlMsgNonClusterTopicType) distributor.On("Distribute", mock.AnythingOfType("*p2p.InvCtrlMsgNotif")).Return(nil).Times(3).Run(checkNotification) - require.NoError(t, inspector.processInspectRPCReq(unknownTopicReq)) - require.NoError(t, inspector.processInspectRPCReq(malformedTopicReq)) - require.NoError(t, inspector.processInspectRPCReq(invalidSporkIDTopicReq)) + inspector.Start(signalerCtx) + + require.NoError(t, inspector.Inspect(from, unknownTopicRpc)) + require.NoError(t, inspector.Inspect(from, malformedTopicRpc)) + require.NoError(t, inspector.Inspect(from, invalidSporkIDTopicRpc)) + // sleep for 1 second to ensure rpc's is processed + time.Sleep(time.Second) + stopInspector(t, cancel, inspector) }) t.Run("inspectIHaveMessages should disseminate invalid control message notification for iHave messages with invalid topics as expected", func(t *testing.T) { - inspector, distributor, _, _, sporkID := inspectorFixture(t) + inspector, signalerCtx, cancel, distributor, _, sporkID, _, topicProviderOracle := inspectorFixture(t) // create unknown topic unknownTopic, malformedTopic, invalidSporkIDTopic := invalidTopics(t, sporkID) + // avoid unknown topics errors + topicProviderOracle.UpdateTopics([]string{unknownTopic, malformedTopic, invalidSporkIDTopic}) unknownTopicIhave := unittest.P2PRPCIHaveFixture(&unknownTopic, unittest.IdentifierListFixture(5).Strings()...) malformedTopicIhave := unittest.P2PRPCIHaveFixture(&malformedTopic, unittest.IdentifierListFixture(5).Strings()...) invalidSporkIDTopicIhave := unittest.P2PRPCIHaveFixture(&invalidSporkIDTopic, unittest.IdentifierListFixture(5).Strings()...) - expectedPeerID := unittest.PeerIdFixture(t) - unknownTopicReq, err := NewInspectRPCRequest(expectedPeerID, unittest.P2PRPCFixture(unittest.WithIHaves(unknownTopicIhave))) - require.NoError(t, err, "failed to get inspect message request") - malformedTopicReq, err := NewInspectRPCRequest(expectedPeerID, unittest.P2PRPCFixture(unittest.WithIHaves(malformedTopicIhave))) - require.NoError(t, err, "failed to get inspect message request") - invalidSporkIDTopicReq, err := NewInspectRPCRequest(expectedPeerID, unittest.P2PRPCFixture(unittest.WithIHaves(invalidSporkIDTopicIhave))) - require.NoError(t, err, "failed to get inspect message request") + unknownTopicRpc := unittest.P2PRPCFixture(unittest.WithIHaves(unknownTopicIhave)) + malformedTopicRpc := unittest.P2PRPCFixture(unittest.WithIHaves(malformedTopicIhave)) + invalidSporkIDTopicRpc := unittest.P2PRPCFixture(unittest.WithIHaves(invalidSporkIDTopicIhave)) - checkNotification := checkNotificationFunc(t, expectedPeerID, p2pmsg.CtrlMsgIHave, channels.IsInvalidTopicErr) + from := unittest.PeerIdFixture(t) + checkNotification := checkNotificationFunc(t, from, p2pmsg.CtrlMsgIHave, channels.IsInvalidTopicErr, p2p.CtrlMsgNonClusterTopicType) distributor.On("Distribute", mock.AnythingOfType("*p2p.InvCtrlMsgNotif")).Return(nil).Times(3).Run(checkNotification) - - require.NoError(t, inspector.processInspectRPCReq(unknownTopicReq)) - require.NoError(t, inspector.processInspectRPCReq(malformedTopicReq)) - require.NoError(t, inspector.processInspectRPCReq(invalidSporkIDTopicReq)) + inspector.Start(signalerCtx) + + require.NoError(t, inspector.Inspect(from, unknownTopicRpc)) + require.NoError(t, inspector.Inspect(from, malformedTopicRpc)) + require.NoError(t, inspector.Inspect(from, invalidSporkIDTopicRpc)) + // sleep for 1 second to ensure rpc's is processed + time.Sleep(time.Second) + stopInspector(t, cancel, inspector) }) t.Run("inspectIHaveMessages should disseminate invalid control message notification for iHave messages with duplicate message ids as expected", func(t *testing.T) { - inspector, distributor, _, _, sporkID := inspectorFixture(t) + inspector, signalerCtx, cancel, distributor, _, sporkID, _, topicProviderOracle := inspectorFixture(t) validTopic := fmt.Sprintf("%s/%s", channels.PushBlocks.String(), sporkID) + // avoid unknown topics errors + topicProviderOracle.UpdateTopics([]string{validTopic}) duplicateMsgID := unittest.IdentifierFixture() msgIds := flow.IdentifierList{duplicateMsgID, duplicateMsgID, duplicateMsgID} duplicateMsgIDIHave := unittest.P2PRPCIHaveFixture(&validTopic, append(msgIds, unittest.IdentifierListFixture(5)...).Strings()...) - - expectedPeerID := unittest.PeerIdFixture(t) - duplicateMsgIDReq, err := NewInspectRPCRequest(expectedPeerID, unittest.P2PRPCFixture(unittest.WithIHaves(duplicateMsgIDIHave))) - require.NoError(t, err, "failed to get inspect message request") - - checkNotification := checkNotificationFunc(t, expectedPeerID, p2pmsg.CtrlMsgIHave, IsDuplicateTopicErr) + duplicateMsgIDRpc := unittest.P2PRPCFixture(unittest.WithIHaves(duplicateMsgIDIHave)) + from := unittest.PeerIdFixture(t) + checkNotification := checkNotificationFunc(t, from, p2pmsg.CtrlMsgIHave, validation.IsDuplicateTopicErr, p2p.CtrlMsgNonClusterTopicType) distributor.On("Distribute", mock.AnythingOfType("*p2p.InvCtrlMsgNotif")).Return(nil).Once().Run(checkNotification) - require.NoError(t, inspector.processInspectRPCReq(duplicateMsgIDReq)) + inspector.Start(signalerCtx) + + require.NoError(t, inspector.Inspect(from, duplicateMsgIDRpc)) + // sleep for 1 second to ensure rpc's is processed + time.Sleep(time.Second) + stopInspector(t, cancel, inspector) }) t.Run("inspectIWantMessages should disseminate invalid control message notification for iWant messages when duplicate message ids exceeds the allowed threshold", func(t *testing.T) { - inspector, distributor, rpcTracker, _, _ := inspectorFixture(t) + inspector, signalerCtx, cancel, distributor, rpcTracker, _, _, _ := inspectorFixture(t) + // oracle must be set even though iWant messages do not have topic IDs duplicateMsgID := unittest.IdentifierFixture() duplicates := flow.IdentifierList{duplicateMsgID, duplicateMsgID} msgIds := append(duplicates, unittest.IdentifierListFixture(5)...).Strings() duplicateMsgIDIWant := unittest.P2PRPCIWantFixture(msgIds...) - expectedPeerID := unittest.PeerIdFixture(t) - duplicateMsgIDReq, err := NewInspectRPCRequest(expectedPeerID, unittest.P2PRPCFixture(unittest.WithIWants(duplicateMsgIDIWant))) - require.NoError(t, err, "failed to get inspect message request") + duplicateMsgIDRpc := unittest.P2PRPCFixture(unittest.WithIWants(duplicateMsgIDIWant)) - checkNotification := checkNotificationFunc(t, expectedPeerID, p2pmsg.CtrlMsgIWant, IsIWantDuplicateMsgIDThresholdErr) + from := unittest.PeerIdFixture(t) + checkNotification := checkNotificationFunc(t, from, p2pmsg.CtrlMsgIWant, validation.IsIWantDuplicateMsgIDThresholdErr, p2p.CtrlMsgNonClusterTopicType) distributor.On("Distribute", mock.AnythingOfType("*p2p.InvCtrlMsgNotif")).Return(nil).Once().Run(checkNotification) rpcTracker.On("LastHighestIHaveRPCSize").Return(int64(100)).Maybe() rpcTracker.On("WasIHaveRPCSent", mock.AnythingOfType("string")).Return(true).Run(func(args mock.Arguments) { @@ -311,219 +474,320 @@ func TestControlMessageValidationInspector_processInspectRPCReq(t *testing.T) { require.True(t, ok) require.Contains(t, msgIds, id) }) - require.NoError(t, inspector.processInspectRPCReq(duplicateMsgIDReq)) + + inspector.Start(signalerCtx) + + require.NoError(t, inspector.Inspect(from, duplicateMsgIDRpc)) + // sleep for 1 second to ensure rpc's is processed + time.Sleep(time.Second) + stopInspector(t, cancel, inspector) }) t.Run("inspectIWantMessages should disseminate invalid control message notification for iWant messages when cache misses exceeds allowed threshold", func(t *testing.T) { - inspector, distributor, rpcTracker, _, _ := inspectorFixture(t) - // set cache miss check size to 0 forcing the inspector to check the cache misses with only a single iWant - inspector.config.CacheMissCheckSize = 0 - // set high cache miss threshold to ensure we only disseminate notification when it is exceeded - inspector.config.IWantRPCInspectionConfig.CacheMissThreshold = .9 - msgIds := unittest.IdentifierListFixture(100).Strings() - expectedPeerID := unittest.PeerIdFixture(t) - inspectMsgReq, err := NewInspectRPCRequest(expectedPeerID, unittest.P2PRPCFixture(unittest.WithIWants(unittest.P2PRPCIWantFixture(msgIds...)))) - require.NoError(t, err, "failed to get inspect message request") - - checkNotification := checkNotificationFunc(t, expectedPeerID, p2pmsg.CtrlMsgIWant, IsIWantCacheMissThresholdErr) + cacheMissCheckSize := 1000 + inspector, signalerCtx, cancel, distributor, rpcTracker, _, _, _ := inspectorFixture(t, func(params *validation.InspectorParams) { + params.Config.CacheMissCheckSize = cacheMissCheckSize + // set high cache miss threshold to ensure we only disseminate notification when it is exceeded + params.Config.IWantRPCInspectionConfig.CacheMissThreshold = .9 + }) + // oracle must be set even though iWant messages do not have topic IDs + inspectMsgRpc := unittest.P2PRPCFixture(unittest.WithIWants(unittest.P2PRPCIWantFixtures(cacheMissCheckSize+1, 100)...)) + + from := unittest.PeerIdFixture(t) + checkNotification := checkNotificationFunc(t, from, p2pmsg.CtrlMsgIWant, validation.IsIWantCacheMissThresholdErr, p2p.CtrlMsgNonClusterTopicType) distributor.On("Distribute", mock.AnythingOfType("*p2p.InvCtrlMsgNotif")).Return(nil).Once().Run(checkNotification) rpcTracker.On("LastHighestIHaveRPCSize").Return(int64(100)).Maybe() // return false each time to eventually force a notification to be disseminated when the cache miss count finally exceeds the 90% threshold rpcTracker.On("WasIHaveRPCSent", mock.AnythingOfType("string")).Return(false).Run(func(args mock.Arguments) { id, ok := args[0].(string) require.True(t, ok) - require.Contains(t, msgIds, id) + found := false + for _, iwant := range inspectMsgRpc.GetControl().GetIwant() { + for _, messageID := range iwant.GetMessageIDs() { + if id == messageID { + found = true + } + } + } + require.True(t, found) }) - require.NoError(t, inspector.processInspectRPCReq(inspectMsgReq)) + + inspector.Start(signalerCtx) + + require.NoError(t, inspector.Inspect(from, inspectMsgRpc)) + // sleep for 1 second to ensure rpc's is processed + time.Sleep(time.Second) + stopInspector(t, cancel, inspector) }) - t.Run("inspectIWantMessages should not disseminate invalid control message notification for iWant messages when cache misses exceeds allowed threshold if cache miss check size not exceeded", func(t *testing.T) { - inspector, distributor, rpcTracker, _, _ := inspectorFixture(t) - defer distributor.AssertNotCalled(t, "Distribute") - // if size of iwants not greater than 10 cache misses will not be checked - inspector.config.CacheMissCheckSize = 10 - // set high cache miss threshold to ensure we only disseminate notification when it is exceeded - inspector.config.IWantRPCInspectionConfig.CacheMissThreshold = .9 - msgIds := unittest.IdentifierListFixture(100).Strings() - expectedPeerID := unittest.PeerIdFixture(t) - inspectMsgReq, err := NewInspectRPCRequest(expectedPeerID, unittest.P2PRPCFixture(unittest.WithIWants(unittest.P2PRPCIWantFixture(msgIds...)))) - require.NoError(t, err, "failed to get inspect message request") - rpcTracker.On("LastHighestIHaveRPCSize").Return(int64(100)).Maybe() - // return false each time to eventually force a notification to be disseminated when the cache miss count finally exceeds the 90% threshold - rpcTracker.On("WasIHaveRPCSent", mock.AnythingOfType("string")).Return(false).Run(func(args mock.Arguments) { - id, ok := args[0].(string) - require.True(t, ok) - require.Contains(t, msgIds, id) + t.Run("inspectIWantMessages should not disseminate invalid control message notification for iWant messages when cache misses exceeds allowed threshold if cache miss check size not exceeded", + func(t *testing.T) { + inspector, signalerCtx, cancel, distributor, rpcTracker, _, _, _ := inspectorFixture(t, func(params *validation.InspectorParams) { + // if size of iwants not greater than 10 cache misses will not be checked + params.Config.CacheMissCheckSize = 10 + // set high cache miss threshold to ensure we only disseminate notification when it is exceeded + params.Config.IWantRPCInspectionConfig.CacheMissThreshold = .9 + }) + // oracle must be set even though iWant messages do not have topic IDs + defer distributor.AssertNotCalled(t, "Distribute") + + msgIds := unittest.IdentifierListFixture(100).Strings() + inspectMsgRpc := unittest.P2PRPCFixture(unittest.WithIWants(unittest.P2PRPCIWantFixture(msgIds...))) + rpcTracker.On("LastHighestIHaveRPCSize").Return(int64(100)).Maybe() + // return false each time to eventually force a notification to be disseminated when the cache miss count finally exceeds the 90% threshold + rpcTracker.On("WasIHaveRPCSent", mock.AnythingOfType("string")).Return(false).Run(func(args mock.Arguments) { + id, ok := args[0].(string) + require.True(t, ok) + require.Contains(t, msgIds, id) + }) + + from := unittest.PeerIdFixture(t) + inspector.Start(signalerCtx) + + require.NoError(t, inspector.Inspect(from, inspectMsgRpc)) + // sleep for 1 second to ensure rpc's is processed + time.Sleep(time.Second) + stopInspector(t, cancel, inspector) }) - require.NoError(t, inspector.processInspectRPCReq(inspectMsgReq)) - }) t.Run("inspectRpcPublishMessages should disseminate invalid control message notification when invalid pubsub messages count greater than configured RpcMessageErrorThreshold", func(t *testing.T) { - inspector, distributor, _, _, sporkID := inspectorFixture(t) - // 5 invalid pubsub messages will force notification dissemination - inspector.config.RpcMessageErrorThreshold = 4 + errThreshold := 500 + inspector, signalerCtx, cancel, distributor, _, sporkID, _, topicProviderOracle := inspectorFixture(t, func(params *validation.InspectorParams) { + params.Config.RpcMessageErrorThreshold = errThreshold + }) // create unknown topic unknownTopic := channels.Topic(fmt.Sprintf("%s/%s", unittest.IdentifierFixture(), sporkID)).String() // create malformed topic malformedTopic := channels.Topic("!@#$%^&**((").String() // a topics spork ID is considered invalid if it does not match the current spork ID invalidSporkIDTopic := channels.Topic(fmt.Sprintf("%s/%s", channels.PushBlocks, unittest.IdentifierFixture())).String() - // create 10 normal messages - pubsubMsgs := unittest.GossipSubMessageFixtures(t, 10, fmt.Sprintf("%s/%s", channels.TestNetworkChannel, sporkID)) - // add 5 invalid messages to force notification dissemination - pubsubMsgs = append(pubsubMsgs, []*pubsub_pb.Message{ + pubsubMsgs := unittest.GossipSubMessageFixtures(50, fmt.Sprintf("%s/%s", channels.TestNetworkChannel, sporkID)) + // add 550 invalid messages to force notification dissemination + invalidMessageFixtures := []*pubsub_pb.Message{ {Topic: &unknownTopic}, {Topic: &malformedTopic}, - {Topic: &malformedTopic}, - {Topic: &invalidSporkIDTopic}, {Topic: &invalidSporkIDTopic}, - }...) - expectedPeerID := unittest.PeerIdFixture(t) - req, err := NewInspectRPCRequest(expectedPeerID, unittest.P2PRPCFixture(unittest.WithPubsubMessages(pubsubMsgs...))) - require.NoError(t, err, "failed to get inspect message request") + } + for i := 0; i < errThreshold+1; i++ { + pubsubMsgs = append(pubsubMsgs, invalidMessageFixtures[rand.Intn(len(invalidMessageFixtures))]) + } + rpc := unittest.P2PRPCFixture(unittest.WithPubsubMessages(pubsubMsgs...)) topics := make([]string, len(pubsubMsgs)) for i, msg := range pubsubMsgs { topics[i] = *msg.Topic } // set topic oracle to return list of topics to avoid hasSubscription errors and force topic validation - require.NoError(t, inspector.SetTopicOracle(func() []string { - return topics - })) - checkNotification := checkNotificationFunc(t, expectedPeerID, p2pmsg.RpcPublishMessage, IsInvalidRpcPublishMessagesErr) + topicProviderOracle.UpdateTopics(topics) + from := unittest.PeerIdFixture(t) + checkNotification := checkNotificationFunc(t, from, p2pmsg.RpcPublishMessage, validation.IsInvalidRpcPublishMessagesErr, p2p.CtrlMsgNonClusterTopicType) distributor.On("Distribute", mock.AnythingOfType("*p2p.InvCtrlMsgNotif")).Return(nil).Once().Run(checkNotification) - require.NoError(t, inspector.processInspectRPCReq(req)) - }) + inspector.Start(signalerCtx) + + require.NoError(t, inspector.Inspect(from, rpc)) + // sleep for 1 second to ensure rpc's is processed + time.Sleep(time.Second) + stopInspector(t, cancel, inspector) + }) t.Run("inspectRpcPublishMessages should disseminate invalid control message notification when subscription missing for topic", func(t *testing.T) { - inspector, distributor, _, _, sporkID := inspectorFixture(t) - // 5 invalid pubsub messages will force notification dissemination - inspector.config.RpcMessageErrorThreshold = 4 - pubsubMsgs := unittest.GossipSubMessageFixtures(t, 5, fmt.Sprintf("%s/%s", channels.TestNetworkChannel, sporkID)) - expectedPeerID := unittest.PeerIdFixture(t) - req, err := NewInspectRPCRequest(expectedPeerID, unittest.P2PRPCFixture(unittest.WithPubsubMessages(pubsubMsgs...))) - require.NoError(t, err, "failed to get inspect message request") - topics := make([]string, len(pubsubMsgs)) - for i, msg := range pubsubMsgs { - topics[i] = *msg.Topic - } - // set topic oracle to return list of topics excluding first topic sent - require.NoError(t, inspector.SetTopicOracle(func() []string { - return []string{} - })) - checkNotification := checkNotificationFunc(t, expectedPeerID, p2pmsg.RpcPublishMessage, IsInvalidRpcPublishMessagesErr) + errThreshold := 500 + inspector, signalerCtx, cancel, distributor, _, sporkID, _, _ := inspectorFixture(t, func(params *validation.InspectorParams) { + params.Config.RpcMessageErrorThreshold = errThreshold + }) + pubsubMsgs := unittest.GossipSubMessageFixtures(errThreshold+1, fmt.Sprintf("%s/%s", channels.TestNetworkChannel, sporkID)) + from := unittest.PeerIdFixture(t) + rpc := unittest.P2PRPCFixture(unittest.WithPubsubMessages(pubsubMsgs...)) + checkNotification := checkNotificationFunc(t, from, p2pmsg.RpcPublishMessage, validation.IsInvalidRpcPublishMessagesErr, p2p.CtrlMsgNonClusterTopicType) distributor.On("Distribute", mock.AnythingOfType("*p2p.InvCtrlMsgNotif")).Return(nil).Once().Run(checkNotification) - require.NoError(t, inspector.processInspectRPCReq(req)) + inspector.Start(signalerCtx) + require.NoError(t, inspector.Inspect(from, rpc)) + // sleep for 1 second to ensure rpc's is processed + time.Sleep(time.Second) + stopInspector(t, cancel, inspector) }) t.Run("inspectRpcPublishMessages should disseminate invalid control message notification when publish messages contain no topic", func(t *testing.T) { - inspector, distributor, _, _, _ := inspectorFixture(t) - // 5 invalid pubsub messages will force notification dissemination - inspector.config.RpcMessageErrorThreshold = 4 - pubsubMsgs := unittest.GossipSubMessageFixtures(t, 10, "") - expectedPeerID := unittest.PeerIdFixture(t) - req, err := NewInspectRPCRequest(expectedPeerID, unittest.P2PRPCFixture(unittest.WithPubsubMessages(pubsubMsgs...))) - require.NoError(t, err, "failed to get inspect message request") + errThreshold := 500 + inspector, signalerCtx, cancel, distributor, _, _, _, _ := inspectorFixture(t, func(params *validation.InspectorParams) { + // 5 invalid pubsub messages will force notification dissemination + params.Config.RpcMessageErrorThreshold = errThreshold + }) + pubsubMsgs := unittest.GossipSubMessageFixtures(errThreshold+1, "") + rpc := unittest.P2PRPCFixture(unittest.WithPubsubMessages(pubsubMsgs...)) topics := make([]string, len(pubsubMsgs)) for i, msg := range pubsubMsgs { topics[i] = *msg.Topic } // set topic oracle to return list of topics excluding first topic sent - require.NoError(t, inspector.SetTopicOracle(func() []string { - return []string{} - })) - checkNotification := checkNotificationFunc(t, expectedPeerID, p2pmsg.RpcPublishMessage, IsInvalidRpcPublishMessagesErr) + from := unittest.PeerIdFixture(t) + checkNotification := checkNotificationFunc(t, from, p2pmsg.RpcPublishMessage, validation.IsInvalidRpcPublishMessagesErr, p2p.CtrlMsgNonClusterTopicType) + distributor.On("Distribute", mock.AnythingOfType("*p2p.InvCtrlMsgNotif")).Return(nil).Once().Run(checkNotification) + inspector.Start(signalerCtx) + require.NoError(t, inspector.Inspect(from, rpc)) + // sleep for 1 second to ensure rpc's is processed + time.Sleep(time.Second) + stopInspector(t, cancel, inspector) + }) + t.Run("inspectRpcPublishMessages should not inspect pubsub message sender on public networks", func(t *testing.T) { + inspector, signalerCtx, cancel, _, _, sporkID, idProvider, topicProviderOracle := inspectorFixture(t) + from := unittest.PeerIdFixture(t) + defer idProvider.AssertNotCalled(t, "ByPeerID", from) + topic := fmt.Sprintf("%s/%s", channels.TestNetworkChannel, sporkID) + topicProviderOracle.UpdateTopics([]string{topic}) + pubsubMsgs := unittest.GossipSubMessageFixtures(10, topic, unittest.WithFrom(from)) + rpc := unittest.P2PRPCFixture(unittest.WithPubsubMessages(pubsubMsgs...)) + inspector.Start(signalerCtx) + require.NoError(t, inspector.Inspect(from, rpc)) + // sleep for 1 second to ensure rpc's is processed + time.Sleep(time.Second) + stopInspector(t, cancel, inspector) + }) + t.Run("inspectRpcPublishMessages should disseminate invalid control message notification when message is from unstaked peer", func(t *testing.T) { + inspector, signalerCtx, cancel, distributor, _, sporkID, idProvider, topicProviderOracle := inspectorFixture(t, func(params *validation.InspectorParams) { + // override the inspector and params, run the inspector in private mode + params.NetworkingType = network.PrivateNetwork + }) + from := unittest.PeerIdFixture(t) + topic := fmt.Sprintf("%s/%s", channels.TestNetworkChannel, sporkID) + topicProviderOracle.UpdateTopics([]string{topic}) + // default RpcMessageErrorThreshold is 500, 501 messages should trigger a notification + pubsubMsgs := unittest.GossipSubMessageFixtures(501, topic, unittest.WithFrom(from)) + idProvider.On("ByPeerID", from).Return(nil, false).Times(501) + rpc := unittest.P2PRPCFixture(unittest.WithPubsubMessages(pubsubMsgs...)) + checkNotification := checkNotificationFunc(t, from, p2pmsg.RpcPublishMessage, validation.IsInvalidRpcPublishMessagesErr, p2p.CtrlMsgNonClusterTopicType) + distributor.On("Distribute", mock.AnythingOfType("*p2p.InvCtrlMsgNotif")).Return(nil).Once().Run(checkNotification) + inspector.Start(signalerCtx) + require.NoError(t, inspector.Inspect(from, rpc)) + // sleep for 1 second to ensure rpc's is processed + time.Sleep(time.Second) + stopInspector(t, cancel, inspector) + }) + t.Run("inspectRpcPublishMessages should disseminate invalid control message notification when message is from ejected peer", func(t *testing.T) { + inspector, signalerCtx, cancel, distributor, _, sporkID, idProvider, topicProviderOracle := inspectorFixture(t, func(params *validation.InspectorParams) { + // override the inspector and params, run the inspector in private mode + params.NetworkingType = network.PrivateNetwork + }) + from := unittest.PeerIdFixture(t) + id := unittest.IdentityFixture() + id.EpochParticipationStatus = flow.EpochParticipationStatusEjected + topic := fmt.Sprintf("%s/%s", channels.TestNetworkChannel, sporkID) + topicProviderOracle.UpdateTopics([]string{topic}) + pubsubMsgs := unittest.GossipSubMessageFixtures(501, topic, unittest.WithFrom(from)) + idProvider.On("ByPeerID", from).Return(id, true).Times(501) + rpc := unittest.P2PRPCFixture(unittest.WithPubsubMessages(pubsubMsgs...)) + checkNotification := checkNotificationFunc(t, from, p2pmsg.RpcPublishMessage, validation.IsInvalidRpcPublishMessagesErr, p2p.CtrlMsgNonClusterTopicType) distributor.On("Distribute", mock.AnythingOfType("*p2p.InvCtrlMsgNotif")).Return(nil).Once().Run(checkNotification) - require.NoError(t, inspector.processInspectRPCReq(req)) + inspector.Start(signalerCtx) + require.NoError(t, inspector.Inspect(from, rpc)) + // sleep for 1 second to ensure rpc's is processed + time.Sleep(time.Second) + stopInspector(t, cancel, inspector) }) } // TestNewControlMsgValidationInspector_validateClusterPrefixedTopic ensures cluster prefixed topics are validated as expected. func TestNewControlMsgValidationInspector_validateClusterPrefixedTopic(t *testing.T) { t.Run("validateClusterPrefixedTopic should not return an error for valid cluster prefixed topics", func(t *testing.T) { - inspector, distributor, _, idProvider, sporkID := inspectorFixture(t) + inspector, signalerCtx, cancel, distributor, _, sporkID, idProvider, topicProviderOracle := inspectorFixture(t) defer distributor.AssertNotCalled(t, "Distribute") clusterID := flow.ChainID(unittest.IdentifierFixture().String()) - clusterPrefixedTopic := channels.Topic(fmt.Sprintf("%s/%s", channels.SyncCluster(clusterID), sporkID)) - from := peer.ID("peerID987654321") + clusterPrefixedTopic := channels.Topic(fmt.Sprintf("%s/%s", channels.SyncCluster(clusterID), sporkID)).String() + topicProviderOracle.UpdateTopics([]string{clusterPrefixedTopic}) + from := unittest.PeerIdFixture(t) idProvider.On("ByPeerID", from).Return(unittest.IdentityFixture(), true).Once() - require.NoError(t, inspector.validateClusterPrefixedTopic(from, clusterPrefixedTopic, flow.ChainIDList{clusterID, flow.ChainID(unittest.IdentifierFixture().String()), flow.ChainID(unittest.IdentifierFixture().String())})) + inspectMsgRpc := unittest.P2PRPCFixture(unittest.WithGrafts(unittest.P2PRPCGraftFixture(&clusterPrefixedTopic))) + inspector.ActiveClustersChanged(flow.ChainIDList{clusterID, flow.ChainID(unittest.IdentifierFixture().String()), flow.ChainID(unittest.IdentifierFixture().String())}) + inspector.Start(signalerCtx) + require.NoError(t, inspector.Inspect(from, inspectMsgRpc)) + // sleep for 1 second to ensure rpc's is processed + time.Sleep(time.Second) + stopInspector(t, cancel, inspector) }) t.Run("validateClusterPrefixedTopic should not return error if cluster prefixed hard threshold not exceeded for unknown cluster ids", func(t *testing.T) { - inspector, distributor, _, idProvider, sporkID := inspectorFixture(t) - // set hard threshold to small number , ensure that a single unknown cluster prefix id does not cause a notification to be disseminated - inspector.config.ClusterPrefixHardThreshold = 2 + inspector, signalerCtx, cancel, distributor, _, sporkID, idProvider, _ := inspectorFixture(t, func(params *validation.InspectorParams) { + // set hard threshold to small number , ensure that a single unknown cluster prefix id does not cause a notification to be disseminated + params.Config.ClusterPrefixHardThreshold = 2 + }) defer distributor.AssertNotCalled(t, "Distribute") clusterID := flow.ChainID(unittest.IdentifierFixture().String()) clusterPrefixedTopic := channels.Topic(fmt.Sprintf("%s/%s", channels.SyncCluster(clusterID), sporkID)).String() - from := peer.ID("peerID987654321") - inspectMsgReq, err := NewInspectRPCRequest(from, unittest.P2PRPCFixture(unittest.WithGrafts(unittest.P2PRPCGraftFixture(&clusterPrefixedTopic)))) - require.NoError(t, err, "failed to get inspect message request") - idProvider.On("ByPeerID", from).Return(unittest.IdentityFixture(), true).Once() - require.NoError(t, inspector.processInspectRPCReq(inspectMsgReq)) + from := unittest.PeerIdFixture(t) + inspectMsgRpc := unittest.P2PRPCFixture(unittest.WithGrafts(unittest.P2PRPCGraftFixture(&clusterPrefixedTopic))) + id := unittest.IdentityFixture() + idProvider.On("ByPeerID", from).Return(id, true).Once() + inspector.Start(signalerCtx) + require.NoError(t, inspector.Inspect(from, inspectMsgRpc)) + // sleep for 1 second to ensure rpc's is processed + time.Sleep(time.Second) + stopInspector(t, cancel, inspector) + stopInspector(t, cancel, inspector) }) t.Run("validateClusterPrefixedTopic should return an error when sender is unstaked", func(t *testing.T) { - inspector, distributor, _, idProvider, sporkID := inspectorFixture(t) + inspector, signalerCtx, cancel, distributor, _, sporkID, idProvider, topicProviderOracle := inspectorFixture(t) defer distributor.AssertNotCalled(t, "Distribute") clusterID := flow.ChainID(unittest.IdentifierFixture().String()) - clusterPrefixedTopic := channels.Topic(fmt.Sprintf("%s/%s", channels.SyncCluster(clusterID), sporkID)) - from := peer.ID("peerID987654321") + clusterPrefixedTopic := channels.Topic(fmt.Sprintf("%s/%s", channels.SyncCluster(clusterID), sporkID)).String() + topicProviderOracle.UpdateTopics([]string{clusterPrefixedTopic}) + from := unittest.PeerIdFixture(t) idProvider.On("ByPeerID", from).Return(nil, false).Once() - err := inspector.validateClusterPrefixedTopic(from, clusterPrefixedTopic, flow.ChainIDList{flow.ChainID(unittest.IdentifierFixture().String())}) - require.True(t, IsErrUnstakedPeer(err)) + inspectMsgRpc := unittest.P2PRPCFixture(unittest.WithGrafts(unittest.P2PRPCGraftFixture(&clusterPrefixedTopic))) + inspector.ActiveClustersChanged(flow.ChainIDList{flow.ChainID(unittest.IdentifierFixture().String())}) + inspector.Start(signalerCtx) + require.NoError(t, inspector.Inspect(from, inspectMsgRpc)) + // sleep for 1 second to ensure rpc's is processed + time.Sleep(time.Second) + stopInspector(t, cancel, inspector) }) t.Run("validateClusterPrefixedTopic should return error if cluster prefixed hard threshold exceeded for unknown cluster ids", func(t *testing.T) { - inspector, distributor, _, idProvider, sporkID := inspectorFixture(t) - defer distributor.AssertNotCalled(t, "Distribute") + inspector, signalerCtx, cancel, distributor, _, sporkID, idProvider, topicProviderOracle := inspectorFixture(t, func(params *validation.InspectorParams) { + // the 11th unknown cluster ID error should cause an error + params.Config.ClusterPrefixHardThreshold = 10 + }) clusterID := flow.ChainID(unittest.IdentifierFixture().String()) - clusterPrefixedTopic := channels.Topic(fmt.Sprintf("%s/%s", channels.SyncCluster(clusterID), sporkID)) - from := peer.ID("peerID987654321") + clusterPrefixedTopic := channels.Topic(fmt.Sprintf("%s/%s", channels.SyncCluster(clusterID), sporkID)).String() + topicProviderOracle.UpdateTopics([]string{clusterPrefixedTopic}) + from := unittest.PeerIdFixture(t) identity := unittest.IdentityFixture() - idProvider.On("ByPeerID", from).Return(identity, true).Once() - inspector.config.ClusterPrefixHardThreshold = 10 - for i := 0; i < 15; i++ { - _, err := inspector.tracker.Inc(identity.NodeID) - require.NoError(t, err) + idProvider.On("ByPeerID", from).Return(identity, true).Times(11) + checkNotification := checkNotificationFunc(t, from, p2pmsg.CtrlMsgGraft, channels.IsUnknownClusterIDErr, p2p.CtrlMsgTopicTypeClusterPrefixed) + inspectMsgRpc := unittest.P2PRPCFixture(unittest.WithGrafts(unittest.P2PRPCGraftFixture(&clusterPrefixedTopic))) + inspector.ActiveClustersChanged(flow.ChainIDList{flow.ChainID(unittest.IdentifierFixture().String())}) + distributor.On("Distribute", mock.AnythingOfType("*p2p.InvCtrlMsgNotif")).Return(nil).Once().Run(checkNotification) + inspector.Start(signalerCtx) + for i := 0; i < 11; i++ { + require.NoError(t, inspector.Inspect(from, inspectMsgRpc)) } - err := inspector.validateClusterPrefixedTopic(from, clusterPrefixedTopic, flow.ChainIDList{flow.ChainID(unittest.IdentifierFixture().String())}) - require.True(t, channels.IsUnknownClusterIDErr(err)) + // sleep for 1 second to ensure rpc's is processed + time.Sleep(time.Second) + stopInspector(t, cancel, inspector) }) } // TestControlMessageValidationInspector_ActiveClustersChanged validates the expected update of the active cluster IDs list. func TestControlMessageValidationInspector_ActiveClustersChanged(t *testing.T) { - sporkID := unittest.IdentifierFixture() - flowConfig, err := config.DefaultConfig() - require.NoError(t, err, "failed to get default flow config") - distributor := mockp2p.NewGossipSubInspectorNotifDistributor(t) - signalerCtx := irrecoverable.NewMockSignalerContext(t, context.Background()) - inspector, err := NewControlMsgValidationInspector(signalerCtx, unittest.Logger(), sporkID, &flowConfig.NetworkConfig.GossipSubRPCValidationInspectorConfigs, distributor, metrics.NewNoopCollector(), metrics.NewNoopCollector(), mockmodule.NewIdentityProvider(t), metrics.NewNoopCollector(), mockp2p.NewRpcControlTracking(t)) - require.NoError(t, err) + inspector, signalerCtx, cancel, distributor, _, sporkID, idProvider, _ := inspectorFixture(t) + defer distributor.AssertNotCalled(t, "Distribute") + identity := unittest.IdentityFixture() + idProvider.On("ByPeerID", mock.AnythingOfType("peer.ID")).Return(identity, true).Times(5) activeClusterIds := make(flow.ChainIDList, 0) for _, id := range unittest.IdentifierListFixture(5) { activeClusterIds = append(activeClusterIds, flow.ChainID(id.String())) } - inspector.ActiveClustersChanged(activeClusterIds) - require.ElementsMatch(t, activeClusterIds, inspector.tracker.GetActiveClusterIds(), "mismatch active cluster ids list") -} - -// inspectorFixture returns a *ControlMsgValidationInspector fixture. -func inspectorFixture(t *testing.T) (*ControlMsgValidationInspector, *mockp2p.GossipSubInspectorNotifDistributor, *mockp2p.RpcControlTracking, *mockmodule.IdentityProvider, flow.Identifier) { - sporkID := unittest.IdentifierFixture() - flowConfig, err := config.DefaultConfig() - require.NoError(t, err, "failed to get default flow config") - distributor := mockp2p.NewGossipSubInspectorNotifDistributor(t) - idProvider := mockmodule.NewIdentityProvider(t) - signalerCtx := irrecoverable.NewMockSignalerContext(t, context.Background()) - inspector, err := NewControlMsgValidationInspector(signalerCtx, unittest.Logger(), sporkID, &flowConfig.NetworkConfig.GossipSubRPCValidationInspectorConfigs, distributor, metrics.NewNoopCollector(), metrics.NewNoopCollector(), idProvider, metrics.NewNoopCollector(), mockp2p.NewRpcControlTracking(t)) - require.NoError(t, err, "failed to create control message validation inspector fixture") - rpcTracker := mockp2p.NewRpcControlTracking(t) - inspector.rpcTracker = rpcTracker - return inspector, distributor, rpcTracker, idProvider, sporkID + inspector.Start(signalerCtx) + from := unittest.PeerIdFixture(t) + for _, id := range activeClusterIds { + topic := channels.Topic(fmt.Sprintf("%s/%s", channels.SyncCluster(id), sporkID)).String() + rpc := unittest.P2PRPCFixture(unittest.WithGrafts(unittest.P2PRPCGraftFixture(&topic))) + require.NoError(t, inspector.Inspect(from, rpc)) + } + // sleep for 1 second to ensure rpc's is processed + time.Sleep(time.Second) + stopInspector(t, cancel, inspector) } // invalidTopics returns 3 invalid topics. @@ -541,12 +805,66 @@ func invalidTopics(t *testing.T, sporkID flow.Identifier) (string, string, strin } // checkNotificationFunc returns util func used to ensure invalid control message notification disseminated contains expected information. -func checkNotificationFunc(t *testing.T, expectedPeerID peer.ID, expectedMsgType p2pmsg.ControlMessageType, isExpectedErr func(err error) bool) func(args mock.Arguments) { +func checkNotificationFunc(t *testing.T, + expectedPeerID peer.ID, + expectedMsgType p2pmsg.ControlMessageType, + isExpectedErr func(err error) bool, + topicType p2p.CtrlMsgTopicType) func(args mock.Arguments) { return func(args mock.Arguments) { notification, ok := args[0].(*p2p.InvCtrlMsgNotif) require.True(t, ok) + require.Equal(t, topicType, notification.TopicType) require.Equal(t, expectedPeerID, notification.PeerID) require.Equal(t, expectedMsgType, notification.MsgType) require.True(t, isExpectedErr(notification.Error)) } } + +func inspectorFixture(t *testing.T, opts ...func(params *validation.InspectorParams)) (*validation.ControlMsgValidationInspector, + *irrecoverable.MockSignalerContext, + context.CancelFunc, + *mockp2p.GossipSubInspectorNotificationDistributor, + *mockp2p.RpcControlTracking, + flow.Identifier, + *mockmodule.IdentityProvider, + *internal.MockUpdatableTopicProvider) { + sporkID := unittest.IdentifierFixture() + flowConfig, err := config.DefaultConfig() + require.NoError(t, err) + distributor := mockp2p.NewGossipSubInspectorNotificationDistributor(t) + p2ptest.MockInspectorNotificationDistributorReadyDoneAware(distributor) + idProvider := mockmodule.NewIdentityProvider(t) + rpcTracker := mockp2p.NewRpcControlTracking(t) + topicProviderOracle := internal.NewMockUpdatableTopicProvider() + params := &validation.InspectorParams{ + Logger: unittest.Logger(), + SporkID: sporkID, + Config: &flowConfig.NetworkConfig.GossipSubRPCValidationInspectorConfigs, + Distributor: distributor, + IdProvider: idProvider, + HeroCacheMetricsFactory: metrics.NewNoopHeroCacheMetricsFactory(), + InspectorMetrics: metrics.NewNoopCollector(), + RpcTracker: rpcTracker, + NetworkingType: network.PublicNetwork, + TopicOracle: func() p2p.TopicProvider { + return topicProviderOracle + }, + } + for _, opt := range opts { + opt(params) + } + validationInspector, err := validation.NewControlMsgValidationInspector(params) + require.NoError(t, err, "failed to create control message validation inspector fixture") + ctx, cancel := context.WithCancel(context.Background()) + signalerCtx := irrecoverable.NewMockSignalerContext(t, ctx) + return validationInspector, signalerCtx, cancel, distributor, rpcTracker, sporkID, idProvider, topicProviderOracle +} + +func stopInspector(t *testing.T, cancel context.CancelFunc, inspector *validation.ControlMsgValidationInspector) { + cancel() + unittest.RequireCloseBefore(t, inspector.Done(), 5*time.Second, "inspector did not stop") +} + +func defaultTopicOracle() []string { + return []string{} +} diff --git a/network/p2p/mock/gossip_sub_inspector_suite.go b/network/p2p/mock/gossip_sub_inspector_suite.go index 59e4fa743f6..90c7e5b15d7 100644 --- a/network/p2p/mock/gossip_sub_inspector_suite.go +++ b/network/p2p/mock/gossip_sub_inspector_suite.go @@ -78,20 +78,6 @@ func (_m *GossipSubInspectorSuite) Ready() <-chan struct{} { return r0 } -// SetTopicOracle provides a mock function with given fields: topicOracle -func (_m *GossipSubInspectorSuite) SetTopicOracle(topicOracle func() []string) error { - ret := _m.Called(topicOracle) - - var r0 error - if rf, ok := ret.Get(0).(func(func() []string) error); ok { - r0 = rf(topicOracle) - } else { - r0 = ret.Error(0) - } - - return r0 -} - // Start provides a mock function with given fields: _a0 func (_m *GossipSubInspectorSuite) Start(_a0 irrecoverable.SignalerContext) { _m.Called(_a0) diff --git a/network/p2p/mock/gossip_sub_rpc_inspector_suite_factory_func.go b/network/p2p/mock/gossip_sub_rpc_inspector_suite_factory_func.go index ae84974031c..24c253b70d2 100644 --- a/network/p2p/mock/gossip_sub_rpc_inspector_suite_factory_func.go +++ b/network/p2p/mock/gossip_sub_rpc_inspector_suite_factory_func.go @@ -26,25 +26,25 @@ type GossipSubRpcInspectorSuiteFactoryFunc struct { mock.Mock } -// Execute provides a mock function with given fields: _a0, _a1, _a2, _a3, _a4, _a5, _a6, _a7 -func (_m *GossipSubRpcInspectorSuiteFactoryFunc) Execute(_a0 irrecoverable.SignalerContext, _a1 zerolog.Logger, _a2 flow.Identifier, _a3 *p2pconf.GossipSubRPCInspectorsConfig, _a4 module.GossipSubMetrics, _a5 metrics.HeroCacheMetricsFactory, _a6 network.NetworkingType, _a7 module.IdentityProvider) (p2p.GossipSubInspectorSuite, error) { - ret := _m.Called(_a0, _a1, _a2, _a3, _a4, _a5, _a6, _a7) +// Execute provides a mock function with given fields: _a0, _a1, _a2, _a3, _a4, _a5, _a6, _a7, _a8 +func (_m *GossipSubRpcInspectorSuiteFactoryFunc) Execute(_a0 irrecoverable.SignalerContext, _a1 zerolog.Logger, _a2 flow.Identifier, _a3 *p2pconf.GossipSubRPCInspectorsConfig, _a4 module.GossipSubMetrics, _a5 metrics.HeroCacheMetricsFactory, _a6 network.NetworkingType, _a7 module.IdentityProvider, _a8 func() p2p.TopicProvider) (p2p.GossipSubInspectorSuite, error) { + ret := _m.Called(_a0, _a1, _a2, _a3, _a4, _a5, _a6, _a7, _a8) var r0 p2p.GossipSubInspectorSuite var r1 error - if rf, ok := ret.Get(0).(func(irrecoverable.SignalerContext, zerolog.Logger, flow.Identifier, *p2pconf.GossipSubRPCInspectorsConfig, module.GossipSubMetrics, metrics.HeroCacheMetricsFactory, network.NetworkingType, module.IdentityProvider) (p2p.GossipSubInspectorSuite, error)); ok { - return rf(_a0, _a1, _a2, _a3, _a4, _a5, _a6, _a7) + if rf, ok := ret.Get(0).(func(irrecoverable.SignalerContext, zerolog.Logger, flow.Identifier, *p2pconf.GossipSubRPCInspectorsConfig, module.GossipSubMetrics, metrics.HeroCacheMetricsFactory, network.NetworkingType, module.IdentityProvider, func() p2p.TopicProvider) (p2p.GossipSubInspectorSuite, error)); ok { + return rf(_a0, _a1, _a2, _a3, _a4, _a5, _a6, _a7, _a8) } - if rf, ok := ret.Get(0).(func(irrecoverable.SignalerContext, zerolog.Logger, flow.Identifier, *p2pconf.GossipSubRPCInspectorsConfig, module.GossipSubMetrics, metrics.HeroCacheMetricsFactory, network.NetworkingType, module.IdentityProvider) p2p.GossipSubInspectorSuite); ok { - r0 = rf(_a0, _a1, _a2, _a3, _a4, _a5, _a6, _a7) + if rf, ok := ret.Get(0).(func(irrecoverable.SignalerContext, zerolog.Logger, flow.Identifier, *p2pconf.GossipSubRPCInspectorsConfig, module.GossipSubMetrics, metrics.HeroCacheMetricsFactory, network.NetworkingType, module.IdentityProvider, func() p2p.TopicProvider) p2p.GossipSubInspectorSuite); ok { + r0 = rf(_a0, _a1, _a2, _a3, _a4, _a5, _a6, _a7, _a8) } else { if ret.Get(0) != nil { r0 = ret.Get(0).(p2p.GossipSubInspectorSuite) } } - if rf, ok := ret.Get(1).(func(irrecoverable.SignalerContext, zerolog.Logger, flow.Identifier, *p2pconf.GossipSubRPCInspectorsConfig, module.GossipSubMetrics, metrics.HeroCacheMetricsFactory, network.NetworkingType, module.IdentityProvider) error); ok { - r1 = rf(_a0, _a1, _a2, _a3, _a4, _a5, _a6, _a7) + if rf, ok := ret.Get(1).(func(irrecoverable.SignalerContext, zerolog.Logger, flow.Identifier, *p2pconf.GossipSubRPCInspectorsConfig, module.GossipSubMetrics, metrics.HeroCacheMetricsFactory, network.NetworkingType, module.IdentityProvider, func() p2p.TopicProvider) error); ok { + r1 = rf(_a0, _a1, _a2, _a3, _a4, _a5, _a6, _a7, _a8) } else { r1 = ret.Error(1) } diff --git a/network/p2p/mock/score_option_builder.go b/network/p2p/mock/score_option_builder.go index d0f437bfc12..e5698aadf94 100644 --- a/network/p2p/mock/score_option_builder.go +++ b/network/p2p/mock/score_option_builder.go @@ -3,6 +3,7 @@ package mockp2p import ( + irrecoverable "github.com/onflow/flow-go/module/irrecoverable" mock "github.com/stretchr/testify/mock" pubsub "github.com/libp2p/go-libp2p-pubsub" @@ -41,6 +42,43 @@ func (_m *ScoreOptionBuilder) BuildFlowPubSubScoreOption() (*pubsub.PeerScorePar return r0, r1 } +// Done provides a mock function with given fields: +func (_m *ScoreOptionBuilder) Done() <-chan struct{} { + ret := _m.Called() + + var r0 <-chan struct{} + if rf, ok := ret.Get(0).(func() <-chan struct{}); ok { + r0 = rf() + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).(<-chan struct{}) + } + } + + return r0 +} + +// Ready provides a mock function with given fields: +func (_m *ScoreOptionBuilder) Ready() <-chan struct{} { + ret := _m.Called() + + var r0 <-chan struct{} + if rf, ok := ret.Get(0).(func() <-chan struct{}); ok { + r0 = rf() + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).(<-chan struct{}) + } + } + + return r0 +} + +// Start provides a mock function with given fields: _a0 +func (_m *ScoreOptionBuilder) Start(_a0 irrecoverable.SignalerContext) { + _m.Called(_a0) +} + // TopicScoreParams provides a mock function with given fields: _a0 func (_m *ScoreOptionBuilder) TopicScoreParams(_a0 *pubsub.Topic) *pubsub.TopicScoreParams { ret := _m.Called(_a0) diff --git a/network/p2p/mock/stream_factory.go b/network/p2p/mock/stream_factory.go index 5b2192e703f..b95e52d3ff8 100644 --- a/network/p2p/mock/stream_factory.go +++ b/network/p2p/mock/stream_factory.go @@ -18,46 +18,25 @@ type StreamFactory struct { mock.Mock } -// Connect provides a mock function with given fields: _a0, _a1 -func (_m *StreamFactory) Connect(_a0 context.Context, _a1 peer.AddrInfo) error { - ret := _m.Called(_a0, _a1) - - var r0 error - if rf, ok := ret.Get(0).(func(context.Context, peer.AddrInfo) error); ok { - r0 = rf(_a0, _a1) - } else { - r0 = ret.Error(0) - } - - return r0 -} - // NewStream provides a mock function with given fields: _a0, _a1, _a2 -func (_m *StreamFactory) NewStream(_a0 context.Context, _a1 peer.ID, _a2 ...protocol.ID) (network.Stream, error) { - _va := make([]interface{}, len(_a2)) - for _i := range _a2 { - _va[_i] = _a2[_i] - } - var _ca []interface{} - _ca = append(_ca, _a0, _a1) - _ca = append(_ca, _va...) - ret := _m.Called(_ca...) +func (_m *StreamFactory) NewStream(_a0 context.Context, _a1 peer.ID, _a2 protocol.ID) (network.Stream, error) { + ret := _m.Called(_a0, _a1, _a2) var r0 network.Stream var r1 error - if rf, ok := ret.Get(0).(func(context.Context, peer.ID, ...protocol.ID) (network.Stream, error)); ok { - return rf(_a0, _a1, _a2...) + if rf, ok := ret.Get(0).(func(context.Context, peer.ID, protocol.ID) (network.Stream, error)); ok { + return rf(_a0, _a1, _a2) } - if rf, ok := ret.Get(0).(func(context.Context, peer.ID, ...protocol.ID) network.Stream); ok { - r0 = rf(_a0, _a1, _a2...) + if rf, ok := ret.Get(0).(func(context.Context, peer.ID, protocol.ID) network.Stream); ok { + r0 = rf(_a0, _a1, _a2) } else { if ret.Get(0) != nil { r0 = ret.Get(0).(network.Stream) } } - if rf, ok := ret.Get(1).(func(context.Context, peer.ID, ...protocol.ID) error); ok { - r1 = rf(_a0, _a1, _a2...) + if rf, ok := ret.Get(1).(func(context.Context, peer.ID, protocol.ID) error); ok { + r1 = rf(_a0, _a1, _a2) } else { r1 = ret.Error(1) } diff --git a/network/p2p/mock/subscription_provider.go b/network/p2p/mock/subscription_provider.go index bc119c00f02..3445a89e6a0 100644 --- a/network/p2p/mock/subscription_provider.go +++ b/network/p2p/mock/subscription_provider.go @@ -3,6 +3,7 @@ package mockp2p import ( + irrecoverable "github.com/onflow/flow-go/module/irrecoverable" mock "github.com/stretchr/testify/mock" peer "github.com/libp2p/go-libp2p/core/peer" @@ -13,6 +14,22 @@ type SubscriptionProvider struct { mock.Mock } +// Done provides a mock function with given fields: +func (_m *SubscriptionProvider) Done() <-chan struct{} { + ret := _m.Called() + + var r0 <-chan struct{} + if rf, ok := ret.Get(0).(func() <-chan struct{}); ok { + r0 = rf() + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).(<-chan struct{}) + } + } + + return r0 +} + // GetSubscribedTopics provides a mock function with given fields: pid func (_m *SubscriptionProvider) GetSubscribedTopics(pid peer.ID) []string { ret := _m.Called(pid) @@ -29,6 +46,27 @@ func (_m *SubscriptionProvider) GetSubscribedTopics(pid peer.ID) []string { return r0 } +// Ready provides a mock function with given fields: +func (_m *SubscriptionProvider) Ready() <-chan struct{} { + ret := _m.Called() + + var r0 <-chan struct{} + if rf, ok := ret.Get(0).(func() <-chan struct{}); ok { + r0 = rf() + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).(<-chan struct{}) + } + } + + return r0 +} + +// Start provides a mock function with given fields: _a0 +func (_m *SubscriptionProvider) Start(_a0 irrecoverable.SignalerContext) { + _m.Called(_a0) +} + type mockConstructorTestingTNewSubscriptionProvider interface { mock.TestingT Cleanup(func()) diff --git a/network/p2p/mock/subscription_validator.go b/network/p2p/mock/subscription_validator.go index b7f71843639..33c7d1f2d75 100644 --- a/network/p2p/mock/subscription_validator.go +++ b/network/p2p/mock/subscription_validator.go @@ -4,9 +4,9 @@ package mockp2p import ( flow "github.com/onflow/flow-go/model/flow" - mock "github.com/stretchr/testify/mock" + irrecoverable "github.com/onflow/flow-go/module/irrecoverable" - p2p "github.com/onflow/flow-go/network/p2p" + mock "github.com/stretchr/testify/mock" peer "github.com/libp2p/go-libp2p/core/peer" ) @@ -30,20 +30,43 @@ func (_m *SubscriptionValidator) CheckSubscribedToAllowedTopics(pid peer.ID, rol return r0 } -// RegisterSubscriptionProvider provides a mock function with given fields: provider -func (_m *SubscriptionValidator) RegisterSubscriptionProvider(provider p2p.SubscriptionProvider) error { - ret := _m.Called(provider) +// Done provides a mock function with given fields: +func (_m *SubscriptionValidator) Done() <-chan struct{} { + ret := _m.Called() - var r0 error - if rf, ok := ret.Get(0).(func(p2p.SubscriptionProvider) error); ok { - r0 = rf(provider) + var r0 <-chan struct{} + if rf, ok := ret.Get(0).(func() <-chan struct{}); ok { + r0 = rf() } else { - r0 = ret.Error(0) + if ret.Get(0) != nil { + r0 = ret.Get(0).(<-chan struct{}) + } } return r0 } +// Ready provides a mock function with given fields: +func (_m *SubscriptionValidator) Ready() <-chan struct{} { + ret := _m.Called() + + var r0 <-chan struct{} + if rf, ok := ret.Get(0).(func() <-chan struct{}); ok { + r0 = rf() + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).(<-chan struct{}) + } + } + + return r0 +} + +// Start provides a mock function with given fields: _a0 +func (_m *SubscriptionValidator) Start(_a0 irrecoverable.SignalerContext) { + _m.Called(_a0) +} + type mockConstructorTestingTNewSubscriptionValidator interface { mock.TestingT Cleanup(func()) diff --git a/network/p2p/p2pbuilder/gossipsub/gossipSubBuilder.go b/network/p2p/p2pbuilder/gossipsub/gossipSubBuilder.go index f3221555b09..8237944f13f 100644 --- a/network/p2p/p2pbuilder/gossipsub/gossipSubBuilder.go +++ b/network/p2p/p2pbuilder/gossipsub/gossipSubBuilder.go @@ -40,16 +40,18 @@ type Builder struct { subscriptionFilter pubsub.SubscriptionFilter gossipSubFactory p2p.GossipSubFactoryFunc gossipSubConfigFunc p2p.GossipSubAdapterConfigFunc - gossipSubPeerScoring bool // whether to enable gossipsub peer scoring + gossipSubPeerScoring bool // whether to enable gossipsub peer scoring + scoringRegistryConfig p2pconf.GossipSubScoringRegistryConfig gossipSubScoreTracerInterval time.Duration // the interval at which the gossipsub score tracer logs the peer scores. // gossipSubTracer is a callback interface that is called by the gossipsub implementation upon // certain events. Currently, we use it to log and observe the local mesh of the node. - gossipSubTracer p2p.PubSubTracer - scoreOptionConfig *scoring.ScoreOptionConfig - idProvider module.IdentityProvider - routingSystem routing.Routing - rpcInspectorConfig *p2pconf.GossipSubRPCInspectorsConfig - rpcInspectorSuiteFactory p2p.GossipSubRpcInspectorSuiteFactoryFunc + gossipSubTracer p2p.PubSubTracer + scoreOptionConfig *scoring.ScoreOptionConfig + subscriptionProviderParam *p2pconf.SubscriptionProviderParameters + idProvider module.IdentityProvider + routingSystem routing.Routing + rpcInspectorConfig *p2pconf.GossipSubRPCInspectorsConfig + rpcInspectorSuiteFactory p2p.GossipSubRpcInspectorSuiteFactoryFunc } var _ p2p.GossipSubBuilder = (*Builder)(nil) @@ -183,25 +185,27 @@ func NewGossipSubBuilder( networkType network.NetworkingType, sporkId flow.Identifier, idProvider module.IdentityProvider, + scoringRegistryConfig p2pconf.GossipSubScoringRegistryConfig, rpcInspectorConfig *p2pconf.GossipSubRPCInspectorsConfig, - rpcTracker p2p.RpcControlTracking, -) *Builder { + subscriptionProviderPrams *p2pconf.SubscriptionProviderParameters, + rpcTracker p2p.RpcControlTracking) *Builder { lg := logger.With(). Str("component", "gossipsub"). Str("network-type", networkType.String()). Logger() b := &Builder{ - logger: lg, - metricsCfg: metricsCfg, - sporkId: sporkId, - networkType: networkType, - idProvider: idProvider, - gossipSubFactory: defaultGossipSubFactory(), - gossipSubConfigFunc: defaultGossipSubAdapterConfig(), - scoreOptionConfig: scoring.NewScoreOptionConfig(lg, idProvider), - rpcInspectorConfig: rpcInspectorConfig, - rpcInspectorSuiteFactory: defaultInspectorSuite(rpcTracker), + logger: lg, + metricsCfg: metricsCfg, + sporkId: sporkId, + networkType: networkType, + idProvider: idProvider, + gossipSubFactory: defaultGossipSubFactory(), + gossipSubConfigFunc: defaultGossipSubAdapterConfig(), + scoreOptionConfig: scoring.NewScoreOptionConfig(lg, idProvider), + rpcInspectorConfig: rpcInspectorConfig, + rpcInspectorSuiteFactory: defaultInspectorSuite(rpcTracker), + subscriptionProviderParam: subscriptionProviderPrams, } return b @@ -240,7 +244,8 @@ func defaultInspectorSuite(rpcTracker p2p.RpcControlTracking) p2p.GossipSubRpcIn gossipSubMetrics module.GossipSubMetrics, heroCacheMetricsFactory metrics.HeroCacheMetricsFactory, networkType network.NetworkingType, - idProvider module.IdentityProvider) (p2p.GossipSubInspectorSuite, error) { + idProvider module.IdentityProvider, + topicProvider func() p2p.TopicProvider) (p2p.GossipSubInspectorSuite, error) { metricsInspector := inspector.NewControlMsgMetricsInspector( logger, p2pnode.NewGossipSubControlMessageMetrics(gossipSubMetrics, logger), @@ -253,27 +258,23 @@ func defaultInspectorSuite(rpcTracker p2p.RpcControlTracking) p2p.GossipSubRpcIn networkType)), }...) notificationDistributor := distributor.DefaultGossipSubInspectorNotificationDistributor( - logger, - []queue.HeroStoreConfigOption{ + logger, []queue.HeroStoreConfigOption{ queue.WithHeroStoreSizeLimit(inspectorCfg.GossipSubRPCInspectorNotificationCacheSize), queue.WithHeroStoreCollector(metrics.RpcInspectorNotificationQueueMetricFactory(heroCacheMetricsFactory, networkType))}...) - inspectMsgQueueCacheCollector := metrics.GossipSubRPCInspectorQueueMetricFactory(heroCacheMetricsFactory, networkType) - clusterPrefixedCacheCollector := metrics.GossipSubRPCInspectorClusterPrefixedCacheMetricFactory( - heroCacheMetricsFactory, - networkType) - rpcValidationInspector, err := validation.NewControlMsgValidationInspector( - ctx, - logger, - sporkId, - &inspectorCfg.GossipSubRPCValidationInspectorConfigs, - notificationDistributor, - inspectMsgQueueCacheCollector, - clusterPrefixedCacheCollector, - idProvider, - gossipSubMetrics, - rpcTracker, - ) + params := &validation.InspectorParams{ + Logger: logger, + SporkID: sporkId, + Config: &inspectorCfg.GossipSubRPCValidationInspectorConfigs, + Distributor: notificationDistributor, + HeroCacheMetricsFactory: heroCacheMetricsFactory, + IdProvider: idProvider, + InspectorMetrics: gossipSubMetrics, + RpcTracker: rpcTracker, + NetworkingType: networkType, + TopicOracle: topicProvider, + } + rpcValidationInspector, err := validation.NewControlMsgValidationInspector(params) if err != nil { return nil, fmt.Errorf("failed to create new control message valiadation inspector: %w", err) } @@ -292,6 +293,10 @@ func defaultInspectorSuite(rpcTracker p2p.RpcControlTracking) p2p.GossipSubRpcIn // - error: if an error occurs during the creation of the GossipSub pubsub system, it is returned. Otherwise, nil is returned. // Note that on happy path, the returned error is nil. Any error returned is unexpected and should be handled as irrecoverable. func (g *Builder) Build(ctx irrecoverable.SignalerContext) (p2p.PubSubAdapter, error) { + // placeholder for the gossipsub pubsub system that will be created (so that it can be passed around even + // before it is created). + var gossipSub p2p.PubSubAdapter + gossipSubConfigs := g.gossipSubConfigFunc( &p2p.BasePubSubAdapterConfig{ MaxMessageSize: p2pnode.DefaultMaxPubSubMsgSize, @@ -314,7 +319,10 @@ func (g *Builder) Build(ctx irrecoverable.SignalerContext) (p2p.PubSubAdapter, e g.metricsCfg.Metrics, g.metricsCfg.HeroCacheFactory, g.networkType, - g.idProvider) + g.idProvider, + func() p2p.TopicProvider { + return gossipSub + }) if err != nil { return nil, fmt.Errorf("could not create gossipsub inspector suite: %w", err) } @@ -323,19 +331,31 @@ func (g *Builder) Build(ctx irrecoverable.SignalerContext) (p2p.PubSubAdapter, e var scoreOpt *scoring.ScoreOption var scoreTracer p2p.PeerScoreTracer if g.gossipSubPeerScoring { + // wires the gossipsub score option to the subscription provider. + subscriptionProvider, err := scoring.NewSubscriptionProvider(&scoring.SubscriptionProviderConfig{ + Logger: g.logger, + TopicProviderOracle: func() p2p.TopicProvider { + // gossipSub has not been created yet, hence instead of passing it directly, we pass a function that returns it. + // the cardinal assumption is this function is only invoked when the subscription provider is started, which is + // after the gossipsub is created. + return gossipSub + }, + IdProvider: g.idProvider, + Params: g.subscriptionProviderParam, + HeroCacheMetricsFactory: g.metricsCfg.HeroCacheFactory, + }) + if err != nil { + return nil, fmt.Errorf("could not create subscription provider: %w", err) + } + g.scoreOptionConfig.SetRegisterNotificationConsumerFunc(inspectorSuite.AddInvalidControlMessageConsumer) - scoreOpt = scoring.NewScoreOption(g.scoreOptionConfig) + scoreOpt = scoring.NewScoreOption(g.scoringRegistryConfig, g.scoreOptionConfig, subscriptionProvider) gossipSubConfigs.WithScoreOption(scoreOpt) if g.gossipSubScoreTracerInterval > 0 { - scoreTracer = tracer.NewGossipSubScoreTracer( - g.logger, - g.idProvider, - g.metricsCfg.Metrics, - g.gossipSubScoreTracerInterval) + scoreTracer = tracer.NewGossipSubScoreTracer(g.logger, g.idProvider, g.metricsCfg.Metrics, g.gossipSubScoreTracerInterval) gossipSubConfigs.WithScoreTracer(scoreTracer) } - } else { g.logger.Warn(). Str(logging.KeyNetworkingSecurity, "true"). @@ -350,22 +370,10 @@ func (g *Builder) Build(ctx irrecoverable.SignalerContext) (p2p.PubSubAdapter, e return nil, fmt.Errorf("could not create gossipsub: host is nil") } - gossipSub, err := g.gossipSubFactory(ctx, g.logger, g.h, gossipSubConfigs, inspectorSuite) + gossipSub, err = g.gossipSubFactory(ctx, g.logger, g.h, gossipSubConfigs, inspectorSuite) if err != nil { return nil, fmt.Errorf("could not create gossipsub: %w", err) } - err = inspectorSuite.SetTopicOracle(gossipSub.GetTopics) - if err != nil { - return nil, fmt.Errorf("could not set topic oracle on inspector suite: %w", err) - } - - if scoreOpt != nil { - err := scoreOpt.SetSubscriptionProvider(scoring.NewSubscriptionProvider(g.logger, gossipSub)) - if err != nil { - return nil, fmt.Errorf("could not set subscription provider: %w", err) - } - } - return gossipSub, nil } diff --git a/network/p2p/p2pbuilder/inspector/suite.go b/network/p2p/p2pbuilder/inspector/suite.go index 167a227ca30..8fe6a1c4547 100644 --- a/network/p2p/p2pbuilder/inspector/suite.go +++ b/network/p2p/p2pbuilder/inspector/suite.go @@ -36,7 +36,9 @@ var _ p2p.GossipSubInspectorSuite = (*GossipSubInspectorSuite)(nil) // regarding gossipsub control messages is detected. // Returns: // - the new GossipSubInspectorSuite. -func NewGossipSubInspectorSuite(metricsInspector *inspector.ControlMsgMetricsInspector, validationInspector *validation.ControlMsgValidationInspector, ctrlMsgInspectDistributor p2p.GossipSubInspectorNotifDistributor) *GossipSubInspectorSuite { +func NewGossipSubInspectorSuite(metricsInspector *inspector.ControlMsgMetricsInspector, + validationInspector *validation.ControlMsgValidationInspector, + ctrlMsgInspectDistributor p2p.GossipSubInspectorNotifDistributor) *GossipSubInspectorSuite { inspectors := []p2p.GossipSubRPCInspector{metricsInspector, validationInspector} s := &GossipSubInspectorSuite{ ctrlMsgInspectDistributor: ctrlMsgInspectDistributor, @@ -89,13 +91,3 @@ func (s *GossipSubInspectorSuite) ActiveClustersChanged(list flow.ChainIDList) { } } } - -// SetTopicOracle sets the topic oracle of the gossipsub inspector suite. -// The topic oracle is used to determine the list of topics that the node is subscribed to. -// If an oracle is not set, the node will not be able to determine the list of topics that the node is subscribed to. -// Currently, the only inspector that utilizes the topic oracle is the validation inspector. -// This func is expected to be called once and will return an error on all subsequent calls. -// All errors returned from this func are considered irrecoverable. -func (s *GossipSubInspectorSuite) SetTopicOracle(topicOracle func() []string) error { - return s.validationInspector.SetTopicOracle(topicOracle) -} diff --git a/network/p2p/p2pbuilder/libp2pNodeBuilder.go b/network/p2p/p2pbuilder/libp2pNodeBuilder.go index 8d0380db23b..3bba1a0b10d 100644 --- a/network/p2p/p2pbuilder/libp2pNodeBuilder.go +++ b/network/p2p/p2pbuilder/libp2pNodeBuilder.go @@ -36,6 +36,7 @@ import ( p2pconfig "github.com/onflow/flow-go/network/p2p/p2pbuilder/config" gossipsubbuilder "github.com/onflow/flow-go/network/p2p/p2pbuilder/gossipsub" "github.com/onflow/flow-go/network/p2p/p2pconf" + "github.com/onflow/flow-go/network/p2p/p2plogging" "github.com/onflow/flow-go/network/p2p/p2pnode" "github.com/onflow/flow-go/network/p2p/subscription" "github.com/onflow/flow-go/network/p2p/tracer" @@ -83,13 +84,13 @@ func NewNodeBuilder( networkKey fcrypto.PrivateKey, sporkId flow.Identifier, idProvider module.IdentityProvider, + scoringRegistryConfig p2pconf.GossipSubScoringRegistryConfig, rCfg *p2pconf.ResourceManagerConfig, - rpcInspectorCfg *p2pconf.GossipSubRPCInspectorsConfig, + gossipCfg *p2pconf.GossipSubConfig, peerManagerConfig *p2pconfig.PeerManagerConfig, disallowListCacheCfg *p2p.DisallowListCacheConfig, rpcTracker p2p.RpcControlTracking, - unicastConfig *p2pconfig.UnicastConfig, -) *LibP2PNodeBuilder { + unicastConfig *p2pconfig.UnicastConfig) *LibP2PNodeBuilder { return &LibP2PNodeBuilder{ logger: logger, sporkId: sporkId, @@ -105,7 +106,9 @@ func NewNodeBuilder( networkingType, sporkId, idProvider, - rpcInspectorCfg, + scoringRegistryConfig, + &gossipCfg.GossipSubRPCInspectorsConfig, + &gossipCfg.SubscriptionProviderConfig, rpcTracker), peerManagerConfig: peerManagerConfig, unicastConfig: unicastConfig, @@ -237,6 +240,7 @@ func (builder *LibP2PNodeBuilder) Build() (p2p.LibP2PNode, error) { return nil, err } builder.gossipSubBuilder.SetHost(h) + lg := builder.logger.With().Str("local_peer_id", p2plogging.PeerId(h.ID())).Logger() pCache, err := p2pnode.NewProtocolPeerCache(builder.logger, h) if err != nil { @@ -252,7 +256,7 @@ func (builder *LibP2PNodeBuilder) Build() (p2p.LibP2PNode, error) { peerUpdater, err := connection.NewPeerUpdater( &connection.PeerUpdaterConfig{ PruneConnections: builder.peerManagerConfig.ConnectionPruning, - Logger: builder.logger, + Logger: lg, Host: connection.NewConnectorHost(h), Connector: connector, }) @@ -260,35 +264,30 @@ func (builder *LibP2PNodeBuilder) Build() (p2p.LibP2PNode, error) { return nil, fmt.Errorf("failed to create libp2p connector: %w", err) } - peerManager = connection.NewPeerManager(builder.logger, builder.peerManagerConfig.UpdateInterval, peerUpdater) + peerManager = connection.NewPeerManager(lg, builder.peerManagerConfig.UpdateInterval, peerUpdater) if builder.unicastConfig.RateLimiterDistributor != nil { builder.unicastConfig.RateLimiterDistributor.AddConsumer(peerManager) } } - node := builder.createNode(builder.logger, h, pCache, peerManager, builder.disallowListCacheCfg) + node := builder.createNode(lg, h, pCache, peerManager, builder.disallowListCacheCfg) if builder.connGater != nil { builder.connGater.SetDisallowListOracle(node) } unicastManager, err := unicast.NewUnicastManager(&unicast.ManagerConfig{ - Logger: builder.logger, + Logger: lg, StreamFactory: stream.NewLibP2PStreamFactory(h), SporkId: builder.sporkId, - ConnStatus: node, CreateStreamBackoffDelay: builder.unicastConfig.CreateStreamBackoffDelay, - DialBackoffDelay: builder.unicastConfig.DialBackoffDelay, - DialInProgressBackoffDelay: builder.unicastConfig.DialInProgressBackoffDelay, Metrics: builder.metricsConfig.Metrics, StreamZeroRetryResetThreshold: builder.unicastConfig.StreamZeroRetryResetThreshold, - DialZeroRetryResetThreshold: builder.unicastConfig.DialZeroRetryResetThreshold, MaxStreamCreationRetryAttemptTimes: builder.unicastConfig.MaxStreamCreationRetryAttemptTimes, - MaxDialRetryAttemptTimes: builder.unicastConfig.MaxDialRetryAttemptTimes, - DialConfigCacheFactory: func(configFactory func() unicast.DialConfig) unicast.DialConfigCache { - return unicastcache.NewDialConfigCache(builder.unicastConfig.DialConfigCacheSize, - builder.logger, + UnicastConfigCacheFactory: func(configFactory func() unicast.Config) unicast.ConfigCache { + return unicastcache.NewUnicastConfigCache(builder.unicastConfig.ConfigCacheSize, + lg, metrics.DialConfigCacheMetricFactory(builder.metricsConfig.HeroCacheFactory, builder.networkingType), configFactory) }, @@ -310,7 +309,7 @@ func (builder *LibP2PNodeBuilder) Build() (p2p.LibP2PNode, error) { ctx.Throw(fmt.Errorf("could not set routing system: %w", err)) } builder.gossipSubBuilder.SetRoutingSystem(routingSystem) - builder.logger.Debug().Msg("routing system created") + lg.Debug().Msg("routing system created") } // gossipsub is created here, because it needs to be created during the node startup. gossipSub, err := builder.gossipSubBuilder.Build(ctx) @@ -426,7 +425,6 @@ func DefaultNodeBuilder( connGaterCfg *p2pconfig.ConnectionGaterConfig, peerManagerCfg *p2pconfig.PeerManagerConfig, gossipCfg *p2pconf.GossipSubConfig, - rpcInspectorCfg *p2pconf.GossipSubRPCInspectorsConfig, rCfg *p2pconf.ResourceManagerConfig, uniCfg *p2pconfig.UnicastConfig, connMgrConfig *netconf.ConnectionManagerConfig, @@ -469,8 +467,9 @@ func DefaultNodeBuilder( flowKey, sporkId, idProvider, + gossipCfg.GossipSubScoringRegistryConfig, rCfg, - rpcInspectorCfg, + gossipCfg, peerManagerCfg, disallowListCacheCfg, meshTracer, diff --git a/network/p2p/p2pconf/gossipsub.go b/network/p2p/p2pconf/gossipsub.go index 1bf50263e4c..c5d9a6b3bfd 100644 --- a/network/p2p/p2pconf/gossipsub.go +++ b/network/p2p/p2pconf/gossipsub.go @@ -62,8 +62,36 @@ type GossipSubConfig struct { // GossipSubTracerConfig is the configuration for the gossipsub tracer. GossipSub tracer is used to trace the local mesh events and peer scores. GossipSubTracerConfig `mapstructure:",squash"` + // GossipSubScoringRegistryConfig is the configuration for the GossipSub score registry. + GossipSubScoringRegistryConfig `mapstructure:",squash"` + // PeerScoring is whether to enable GossipSub peer scoring. PeerScoring bool `mapstructure:"gossipsub-peer-scoring-enabled"` + + SubscriptionProviderConfig SubscriptionProviderParameters `mapstructure:",squash"` +} + +type SubscriptionProviderParameters struct { + // SubscriptionUpdateInterval is the interval for updating the list of topics the node have subscribed to; as well as the list of all + // peers subscribed to each of those topics. This is used to penalize peers that have an invalid subscription based on their role. + SubscriptionUpdateInterval time.Duration `validate:"gt=0s" mapstructure:"gossipsub-subscription-provider-update-interval"` + + // CacheSize is the size of the cache that keeps the list of peers subscribed to each topic as the local node. + // This is the local view of the current node towards the subscription status of other nodes in the system. + // The cache must be large enough to accommodate the maximum number of nodes in the system, otherwise the view of the local node will be incomplete + // due to cache eviction. + CacheSize uint32 `validate:"gt=0" mapstructure:"gossipsub-subscription-provider-cache-size"` +} + +// GossipSubScoringRegistryConfig is the configuration for the GossipSub score registry. +type GossipSubScoringRegistryConfig struct { + // PenaltyDecaySlowdownThreshold defines the penalty level which the decay rate is reduced by `DecayRateReductionFactor` every time the penalty of a node falls below the threshold, thereby slowing down the decay process. + // This mechanism ensures that malicious nodes experience longer decay periods, while honest nodes benefit from quicker decay. + PenaltyDecaySlowdownThreshold float64 `validate:"lt=0" mapstructure:"gossipsub-app-specific-penalty-decay-slowdown-threshold"` + // DecayRateReductionFactor defines the value by which the decay rate is decreased every time the penalty is below the PenaltyDecaySlowdownThreshold. A reduced decay rate extends the time it takes for penalties to diminish. + DecayRateReductionFactor float64 `validate:"gt=0,lt=1" mapstructure:"gossipsub-app-specific-penalty-decay-rate-reduction-factor"` + // PenaltyDecayEvaluationPeriod defines the interval at which the decay for a spam record is okay to be adjusted. + PenaltyDecayEvaluationPeriod time.Duration `validate:"gt=0" mapstructure:"gossipsub-app-specific-penalty-decay-evaluation-period"` } // GossipSubTracerConfig is the config for the gossipsub tracer. GossipSub tracer is used to trace the local mesh events and peer scores. diff --git a/network/p2p/p2pconf/gossipsub_rpc_inspectors.go b/network/p2p/p2pconf/gossipsub_rpc_inspectors.go index 497df0bf724..3d3cea79b21 100644 --- a/network/p2p/p2pconf/gossipsub_rpc_inspectors.go +++ b/network/p2p/p2pconf/gossipsub_rpc_inspectors.go @@ -40,7 +40,7 @@ type IWantRPCInspectionConfig struct { // If the cache miss threshold is exceeded an invalid control message notification is disseminated and the sender will be penalized. CacheMissThreshold float64 `validate:"gt=0" mapstructure:"gossipsub-rpc-iwant-cache-miss-threshold"` // CacheMissCheckSize the iWants size at which message id cache misses will be checked. - CacheMissCheckSize int `validate:"gte=1000" mapstructure:"gossipsub-rpc-iwant-cache-miss-check-size"` + CacheMissCheckSize int `validate:"gt=0" mapstructure:"gossipsub-rpc-iwant-cache-miss-check-size"` // DuplicateMsgIDThreshold maximum allowed duplicate message IDs in a single iWant control message. // If the duplicate message threshold is exceeded an invalid control message notification is disseminated and the sender will be penalized. DuplicateMsgIDThreshold float64 `validate:"gt=0" mapstructure:"gossipsub-rpc-iwant-duplicate-message-id-threshold"` @@ -63,7 +63,7 @@ type ClusterPrefixedMessageConfig struct { // when the cluster ID's provider is set asynchronously. It also allows processing of some stale messages that may be sent by nodes // that fall behind in the protocol. After the amount of cluster prefixed control messages processed exceeds this threshold the node // will be pushed to the edge of the network mesh. - ClusterPrefixHardThreshold float64 `validate:"gt=0" mapstructure:"gossipsub-rpc-cluster-prefixed-hard-threshold"` + ClusterPrefixHardThreshold float64 `validate:"gte=0" mapstructure:"gossipsub-rpc-cluster-prefixed-hard-threshold"` // ClusterPrefixedControlMsgsReceivedCacheSize size of the cache used to track the amount of cluster prefixed topics received by peers. ClusterPrefixedControlMsgsReceivedCacheSize uint32 `validate:"gt=0" mapstructure:"gossipsub-cluster-prefix-tracker-cache-size"` // ClusterPrefixedControlMsgsReceivedCacheDecay decay val used for the geometric decay of cache counters used to keep track of cluster prefixed topics received by peers. diff --git a/network/p2p/p2plogging/internal/peerIdCache_test.go b/network/p2p/p2plogging/internal/peerIdCache_test.go index 08d32ebb44f..e4e799d9d62 100644 --- a/network/p2p/p2plogging/internal/peerIdCache_test.go +++ b/network/p2p/p2plogging/internal/peerIdCache_test.go @@ -5,7 +5,6 @@ import ( "github.com/stretchr/testify/assert" - "github.com/onflow/flow-go/network/internal/p2pfixtures" "github.com/onflow/flow-go/network/p2p/p2plogging/internal" "github.com/onflow/flow-go/utils/unittest" ) @@ -65,9 +64,9 @@ func TestPeerIdCache_EjectionScenarios(t *testing.T) { assert.Equal(t, 0, cache.Size()) // add peer IDs to fill the cache - pid1 := p2pfixtures.PeerIdFixture(t) - pid2 := p2pfixtures.PeerIdFixture(t) - pid3 := p2pfixtures.PeerIdFixture(t) + pid1 := unittest.PeerIdFixture(t) + pid2 := unittest.PeerIdFixture(t) + pid3 := unittest.PeerIdFixture(t) cache.PeerIdString(pid1) assert.Equal(t, 1, cache.Size()) @@ -83,7 +82,7 @@ func TestPeerIdCache_EjectionScenarios(t *testing.T) { assert.Equal(t, 3, cache.Size()) // add a new peer ID - pid4 := p2pfixtures.PeerIdFixture(t) + pid4 := unittest.PeerIdFixture(t) cache.PeerIdString(pid4) assert.Equal(t, 3, cache.Size()) diff --git a/network/p2p/p2pnode/gossipSubAdapter.go b/network/p2p/p2pnode/gossipSubAdapter.go index 59bd2f2d65a..f2d1296b588 100644 --- a/network/p2p/p2pnode/gossipSubAdapter.go +++ b/network/p2p/p2pnode/gossipSubAdapter.go @@ -39,7 +39,11 @@ type GossipSubAdapter struct { var _ p2p.PubSubAdapter = (*GossipSubAdapter)(nil) -func NewGossipSubAdapter(ctx context.Context, logger zerolog.Logger, h host.Host, cfg p2p.PubSubAdapterConfig, clusterChangeConsumer p2p.CollectionClusterChangesConsumer) (p2p.PubSubAdapter, error) { +func NewGossipSubAdapter(ctx context.Context, + logger zerolog.Logger, + h host.Host, + cfg p2p.PubSubAdapterConfig, + clusterChangeConsumer p2p.CollectionClusterChangesConsumer) (p2p.PubSubAdapter, error) { gossipSubConfig, ok := cfg.(*GossipSubAdapterConfig) if !ok { return nil, fmt.Errorf("invalid gossipsub config type: %T", cfg) @@ -68,44 +72,78 @@ func NewGossipSubAdapter(ctx context.Context, logger zerolog.Logger, h host.Host if scoreTracer := gossipSubConfig.ScoreTracer(); scoreTracer != nil { builder.AddWorker(func(ctx irrecoverable.SignalerContext, ready component.ReadyFunc) { ready() - a.logger.Debug().Str("component", "gossipsub_score_tracer").Msg("starting score tracer") + a.logger.Info().Msg("starting score tracer") scoreTracer.Start(ctx) - a.logger.Debug().Str("component", "gossipsub_score_tracer").Msg("score tracer started") + select { + case <-ctx.Done(): + a.logger.Warn().Msg("aborting score tracer startup due to context done") + case <-scoreTracer.Ready(): + a.logger.Info().Msg("score tracer is ready") + } + ready() + <-ctx.Done() + a.logger.Info().Msg("stopping score tracer") <-scoreTracer.Done() - a.logger.Debug().Str("component", "gossipsub_score_tracer").Msg("score tracer stopped") + a.logger.Info().Msg("score tracer stopped") }) a.peerScoreExposer = scoreTracer } if tracer := gossipSubConfig.PubSubTracer(); tracer != nil { builder.AddWorker(func(ctx irrecoverable.SignalerContext, ready component.ReadyFunc) { - ready() - a.logger.Debug().Str("component", "gossipsub_tracer").Msg("starting tracer") + a.logger.Info().Msg("starting pubsub tracer") tracer.Start(ctx) - a.logger.Debug().Str("component", "gossipsub_tracer").Msg("tracer started") + select { + case <-ctx.Done(): + a.logger.Warn().Msg("aborting pubsub tracer startup due to context done") + case <-tracer.Ready(): + a.logger.Info().Msg("pubsub tracer is ready") + } + ready() + <-ctx.Done() + a.logger.Info().Msg("stopping pubsub tracer") <-tracer.Done() - a.logger.Debug().Str("component", "gossipsub_tracer").Msg("tracer stopped") + a.logger.Info().Msg("pubsub tracer stopped") }) } if inspectorSuite := gossipSubConfig.InspectorSuiteComponent(); inspectorSuite != nil { builder.AddWorker(func(ctx irrecoverable.SignalerContext, ready component.ReadyFunc) { - a.logger.Debug().Str("component", "gossipsub_inspector_suite").Msg("starting inspector suite") + a.logger.Info().Msg("starting inspector suite") inspectorSuite.Start(ctx) - a.logger.Debug().Str("component", "gossipsub_inspector_suite").Msg("inspector suite started") - select { case <-ctx.Done(): - a.logger.Debug().Str("component", "gossipsub_inspector_suite").Msg("inspector suite context done") + a.logger.Warn().Msg("aborting inspector suite startup due to context done") case <-inspectorSuite.Ready(): - ready() - a.logger.Debug().Str("component", "gossipsub_inspector_suite").Msg("inspector suite ready") + a.logger.Info().Msg("inspector suite is ready") } + ready() + <-ctx.Done() + a.logger.Info().Msg("stopping inspector suite") <-inspectorSuite.Done() - a.logger.Debug().Str("component", "gossipsub_inspector_suite").Msg("inspector suite stopped") + a.logger.Info().Msg("inspector suite stopped") + }) + } + + if scoringComponent := gossipSubConfig.ScoringComponent(); scoringComponent != nil { + builder.AddWorker(func(ctx irrecoverable.SignalerContext, ready component.ReadyFunc) { + a.logger.Info().Msg("starting gossipsub scoring component") + scoringComponent.Start(ctx) + select { + case <-ctx.Done(): + a.logger.Warn().Msg("aborting gossipsub scoring component startup due to context done") + case <-scoringComponent.Ready(): + a.logger.Info().Msg("gossipsub scoring component is ready") + } + ready() + + <-ctx.Done() + a.logger.Info().Msg("stopping gossipsub scoring component") + <-scoringComponent.Done() + a.logger.Info().Msg("gossipsub scoring component stopped") }) } diff --git a/network/p2p/p2pnode/gossipSubAdapterConfig.go b/network/p2p/p2pnode/gossipSubAdapterConfig.go index a2dbe59289f..f4069930612 100644 --- a/network/p2p/p2pnode/gossipSubAdapterConfig.go +++ b/network/p2p/p2pnode/gossipSubAdapterConfig.go @@ -116,6 +116,10 @@ func (g *GossipSubAdapterConfig) PubSubTracer() p2p.PubSubTracer { return g.pubsubTracer } +func (g *GossipSubAdapterConfig) ScoringComponent() component.Component { + return g.scoreOption +} + // InspectorSuiteComponent returns the component that manages the lifecycle of the inspector suite. // This is used to start and stop the inspector suite by the PubSubAdapter. // Args: diff --git a/network/p2p/pubsub.go b/network/p2p/pubsub.go index 1b45336bdfa..d0ceb33fe8c 100644 --- a/network/p2p/pubsub.go +++ b/network/p2p/pubsub.go @@ -128,6 +128,7 @@ type Topic interface { // ScoreOptionBuilder abstracts the configuration for the underlying pubsub score implementation. type ScoreOptionBuilder interface { + component.Component // BuildFlowPubSubScoreOption builds the pubsub score options as pubsub.Option for the Flow network. BuildFlowPubSubScoreOption() (*pubsub.PeerScoreParams, *pubsub.PeerScoreThresholds) // TopicScoreParams returns the topic score params for the given topic. diff --git a/network/p2p/scoring/decay_test.go b/network/p2p/scoring/decay_test.go index b75aec0ae47..281ed194f15 100644 --- a/network/p2p/scoring/decay_test.go +++ b/network/p2p/scoring/decay_test.go @@ -8,6 +8,7 @@ import ( "github.com/stretchr/testify/assert" + "github.com/onflow/flow-go/config" "github.com/onflow/flow-go/network/p2p" "github.com/onflow/flow-go/network/p2p/scoring" ) @@ -137,6 +138,9 @@ func TestGeometricDecay(t *testing.T) { // 4. penalty is negative and below the skipDecayThreshold and lastUpdated is too recent. In this case, the penalty should not be decayed. // 5. penalty is negative and below the skipDecayThreshold and lastUpdated is too old. In this case, the penalty should be decayed. func TestDefaultDecayFunction(t *testing.T) { + flowConfig, err := config.DefaultConfig() + assert.NoError(t, err) + type args struct { record p2p.GossipSubSpamRecord lastUpdated time.Time @@ -181,8 +185,9 @@ func TestDefaultDecayFunction(t *testing.T) { }, want: want{ record: p2p.GossipSubSpamRecord{ - Penalty: 0, // penalty is set to 0 - Decay: 0.8, + Penalty: 0, // penalty is set to 0 + Decay: 0.8, + LastDecayAdjustment: time.Time{}, }, }, }, @@ -199,8 +204,9 @@ func TestDefaultDecayFunction(t *testing.T) { }, want: want{ record: p2p.GossipSubSpamRecord{ - Penalty: 0, // penalty is set to 0 - Decay: 0.8, + Penalty: 0, // penalty is set to 0 + Decay: 0.8, + LastDecayAdjustment: time.Time{}, }, }, }, @@ -239,15 +245,78 @@ func TestDefaultDecayFunction(t *testing.T) { }, }, }, + { + // 6. penalty is negative and below slowerDecayPenaltyThreshold record decay should be adjusted. The `LastDecayAdjustment` has not been updated since initialization. + name: "penalty is negative and below slowerDecayPenaltyThreshold record decay should be adjusted", + args: args{ + record: p2p.GossipSubSpamRecord{ + Penalty: -100, + Decay: 0.8, + }, + lastUpdated: time.Now(), + }, + want: want{ + record: p2p.GossipSubSpamRecord{ + Penalty: -100, + Decay: 0.81, + }, + }, + }, + { + // 7. penalty is negative and below slowerDecayPenaltyThreshold but record.LastDecayAdjustment is too recent. In this case the decay should not be adjusted. + name: "penalty is negative and below slowerDecayPenaltyThreshold record decay should not be adjusted", + args: args{ + record: p2p.GossipSubSpamRecord{ + Penalty: -100, + Decay: 0.9, + LastDecayAdjustment: time.Now().Add(10 * time.Second), + }, + lastUpdated: time.Now(), + }, + want: want{ + record: p2p.GossipSubSpamRecord{ + Penalty: -100, + Decay: 0.9, + }, + }, + }, + { + // 8. penalty is negative and below slowerDecayPenaltyThreshold; and LastDecayAdjustment time passed the decay adjust interval. record decay should be adjusted. + name: "penalty is negative and below slowerDecayPenaltyThreshold and LastDecayAdjustment time passed the decay adjust interval. Record decay should be adjusted", + args: args{ + record: p2p.GossipSubSpamRecord{ + Penalty: -100, + Decay: 0.8, + LastDecayAdjustment: time.Now().Add(-flowConfig.NetworkConfig.GossipSubConfig.GossipSubScoringRegistryConfig.PenaltyDecayEvaluationPeriod), + }, + lastUpdated: time.Now(), + }, + want: want{ + record: p2p.GossipSubSpamRecord{ + Penalty: -100, + Decay: 0.81, + }, + }, + }, } - - decayFunc := scoring.DefaultDecayFunction() + scoringRegistryConfig := flowConfig.NetworkConfig.GossipSubConfig.GossipSubScoringRegistryConfig + decayFunc := scoring.DefaultDecayFunction(scoringRegistryConfig.PenaltyDecaySlowdownThreshold, scoringRegistryConfig.DecayRateReductionFactor, scoringRegistryConfig.PenaltyDecayEvaluationPeriod) for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { got, err := decayFunc(tt.args.record, tt.args.lastUpdated) assert.NoError(t, err) - assert.Less(t, math.Abs(got.Penalty-tt.want.record.Penalty), 10e-3) - assert.Equal(t, got.Decay, tt.want.record.Decay) + tolerance := 0.01 // 1% tolerance + expectedPenalty := tt.want.record.Penalty + + // ensure expectedPenalty is not zero to avoid division by zero + if expectedPenalty != 0 { + normalizedDifference := math.Abs(got.Penalty-expectedPenalty) / math.Abs(expectedPenalty) + assert.Less(t, normalizedDifference, tolerance) + } else { + // handles the case where expectedPenalty is zero + assert.Less(t, math.Abs(got.Penalty), tolerance) + } + assert.Equal(t, tt.want.record.Decay, got.Decay) }) } } diff --git a/network/p2p/scoring/internal/subscriptionCache.go b/network/p2p/scoring/internal/subscriptionCache.go new file mode 100644 index 00000000000..95acafdd422 --- /dev/null +++ b/network/p2p/scoring/internal/subscriptionCache.go @@ -0,0 +1,176 @@ +package internal + +import ( + "errors" + "fmt" + + "github.com/libp2p/go-libp2p/core/peer" + "github.com/rs/zerolog" + "go.uber.org/atomic" + + "github.com/onflow/flow-go/model/flow" + "github.com/onflow/flow-go/module" + herocache "github.com/onflow/flow-go/module/mempool/herocache/backdata" + "github.com/onflow/flow-go/module/mempool/herocache/backdata/heropool" + "github.com/onflow/flow-go/module/mempool/stdmap" +) + +var ErrTopicRecordNotFound = fmt.Errorf("topic record not found") + +// SubscriptionRecordCache manages the subscription records of peers in a network. +// It uses a currentCycle counter to track the update cycles of the cache, ensuring the relevance of subscription data. +type SubscriptionRecordCache struct { + c *stdmap.Backend + + // currentCycle is an atomic counter used to track the update cycles of the subscription cache. + // It plays a critical role in maintaining the cache's data relevance and coherence. + // Each increment of currentCycle represents a new update cycle, signifying the cache's transition to a new state + // where only the most recent and relevant subscriptions are maintained. This design choice ensures that the cache + // does not retain stale or outdated subscription information, thereby reflecting the dynamic nature of peer + // subscriptions in the network. It is incremented every time the subscription cache is updated, either with new + // topic subscriptions or other update operations. + // The currentCycle is incremented atomically and externally by calling the MoveToNextUpdateCycle() function. + // This is called by the module that uses the subscription provider cache signaling that whatever updates it has + // made to the cache so far can be considered out-of-date, and the new updates to the cache records should + // overwrite the old ones. + currentCycle atomic.Uint64 +} + +// NewSubscriptionRecordCache creates a new subscription cache with the given size limit. +// Args: +// - sizeLimit: the size limit of the cache. +// - logger: the logger to use for logging. +// - collector: the metrics collector to use for collecting metrics. +func NewSubscriptionRecordCache(sizeLimit uint32, + logger zerolog.Logger, + collector module.HeroCacheMetrics) *SubscriptionRecordCache { + backData := herocache.NewCache(sizeLimit, + herocache.DefaultOversizeFactor, + heropool.LRUEjection, + logger.With().Str("mempool", "subscription-records").Logger(), + collector) + + return &SubscriptionRecordCache{ + c: stdmap.NewBackend(stdmap.WithBackData(backData)), + currentCycle: *atomic.NewUint64(0), + } +} + +// GetSubscribedTopics returns the list of topics a peer is subscribed to. +// Returns: +// - []string: the list of topics the peer is subscribed to. +// - bool: true if there is a record for the peer, false otherwise. +func (s *SubscriptionRecordCache) GetSubscribedTopics(pid peer.ID) ([]string, bool) { + e, ok := s.c.ByID(flow.MakeID(pid)) + if !ok { + return nil, false + } + return e.(SubscriptionRecordEntity).Topics, true +} + +// MoveToNextUpdateCycle moves the subscription cache to the next update cycle. +// A new update cycle is started when the subscription cache is first created, and then every time the subscription cache +// is updated. The update cycle is used to keep track of the last time the subscription cache was updated. It is used to +// implement a notion of time in the subscription cache. +// When the update cycle is moved forward, it means that all the updates made to the subscription cache so far are +// considered out-of-date, and the new updates to the cache records should overwrite the old ones. +// The expected behavior is that the update cycle is moved forward by the module that uses the subscription provider once +// per each update on the "entire" cache (and not per each update on a single record). +// In other words, assume a cache with 3 records: A, B, and C. If the module updates record A, then record B, and then +// record C, the module should move the update cycle forward only once after updating record C, and then update record A +// B, and C again. If the module moves the update cycle forward after updating record A, then again after updating +// record B, and then again after updating record C, the cache will be in an inconsistent state. +// Returns: +// - uint64: the current update cycle. +func (s *SubscriptionRecordCache) MoveToNextUpdateCycle() uint64 { + s.currentCycle.Inc() + return s.currentCycle.Load() +} + +// AddTopicForPeer appends a topic to the list of topics a peer is subscribed to. If the peer is not subscribed to any +// topics yet, a new record is created. +// If the last update cycle is older than the current cycle, the list of topics for the peer is first cleared, and then +// the topic is added to the list. This is to ensure that the list of topics for a peer is always up to date. +// Args: +// - pid: the peer id of the peer. +// - topic: the topic to add. +// Returns: +// - []string: the list of topics the peer is subscribed to after the update. +// - error: an error if the update failed; any returned error is an irrecoverable error and indicates a bug or misconfiguration. +// Implementation must be thread-safe. +func (s *SubscriptionRecordCache) AddTopicForPeer(pid peer.ID, topic string) ([]string, error) { + // first, we try to optimistically adjust the record assuming that the record already exists. + entityId := flow.MakeID(pid) + topics, err := s.addTopicForPeer(entityId, topic) + + switch { + case errors.Is(err, ErrTopicRecordNotFound): + // if the record does not exist, we initialize the record and try to adjust it again. + // Note: there is an edge case where the record is initialized by another goroutine between the two calls. + // In this case, the init function is invoked twice, but it is not a problem because the underlying + // cache is thread-safe. Hence, we do not need to synchronize the two calls. In such cases, one of the + // two calls returns false, and the other call returns true. We do not care which call returns false, hence, + // we ignore the return value of the init function. + _ = s.c.Add(SubscriptionRecordEntity{ + entityId: entityId, + PeerID: pid, + Topics: make([]string, 0), + LastUpdatedCycle: s.currentCycle.Load(), + }) + // as the record is initialized, the adjust attempt should not return an error, and any returned error + // is an irrecoverable error and indicates a bug. + return s.addTopicForPeer(entityId, topic) + case err != nil: + // if the adjust function returns an unexpected error on the first attempt, we return the error directly. + return nil, err + default: + // if the adjust function returns no error, we return the updated list of topics. + return topics, nil + } +} + +func (s *SubscriptionRecordCache) addTopicForPeer(entityId flow.Identifier, topic string) ([]string, error) { + var rErr error + updatedEntity, adjusted := s.c.Adjust(entityId, func(entity flow.Entity) flow.Entity { + record, ok := entity.(SubscriptionRecordEntity) + if !ok { + // sanity check + // This should never happen, because the cache only contains SubscriptionRecordEntity entities. + panic(fmt.Sprintf("invalid entity type, expected SubscriptionRecordEntity type, got: %T", entity)) + } + + currentCycle := s.currentCycle.Load() + if record.LastUpdatedCycle > currentCycle { + // sanity check + // This should never happen, because the update cycle must be moved forward before adding a topic. + panic(fmt.Sprintf("invalid last updated cycle, expected <= %d, got: %d", currentCycle, record.LastUpdatedCycle)) + } + if record.LastUpdatedCycle < currentCycle { + // This record was not updated in the current cycle, so we can wipe its topics list (topic list is only + // valid for the current cycle). + record.Topics = make([]string, 0) + } + // check if the topic already exists; if it does, we do not need to update the record. + for _, t := range record.Topics { + if t == topic { + // topic already exists + return record + } + } + record.LastUpdatedCycle = currentCycle + record.Topics = append(record.Topics, topic) + + // Return the adjusted record. + return record + }) + + if rErr != nil { + return nil, fmt.Errorf("failed to adjust record: %w", rErr) + } + + if !adjusted { + return nil, ErrTopicRecordNotFound + } + + return updatedEntity.(SubscriptionRecordEntity).Topics, nil +} diff --git a/network/p2p/scoring/internal/subscriptionCache_test.go b/network/p2p/scoring/internal/subscriptionCache_test.go new file mode 100644 index 00000000000..a333c18bdd8 --- /dev/null +++ b/network/p2p/scoring/internal/subscriptionCache_test.go @@ -0,0 +1,319 @@ +package internal_test + +import ( + "sync" + "testing" + "time" + + "github.com/stretchr/testify/require" + + "github.com/onflow/flow-go/module/metrics" + "github.com/onflow/flow-go/network/p2p/scoring/internal" + "github.com/onflow/flow-go/utils/unittest" +) + +// TestNewSubscriptionRecordCache tests that NewSubscriptionRecordCache returns a valid cache. +func TestNewSubscriptionRecordCache(t *testing.T) { + sizeLimit := uint32(100) + + cache := internal.NewSubscriptionRecordCache( + sizeLimit, + unittest.Logger(), + metrics.NewSubscriptionRecordCacheMetricsFactory(metrics.NewNoopHeroCacheMetricsFactory())) + + require.NotNil(t, cache, "cache should not be nil") + require.IsType(t, &internal.SubscriptionRecordCache{}, cache, "cache should be of type *SubscriptionRecordCache") +} + +// TestSubscriptionCache_GetSubscribedTopics tests the retrieval of subscribed topics for a peer. +func TestSubscriptionCache_GetSubscribedTopics(t *testing.T) { + sizeLimit := uint32(100) + cache := internal.NewSubscriptionRecordCache( + sizeLimit, + unittest.Logger(), + metrics.NewSubscriptionRecordCacheMetricsFactory(metrics.NewNoopHeroCacheMetricsFactory())) + + // create a dummy peer ID + peerID := unittest.PeerIdFixture(t) + + // case when the peer has a subscription + topics := []string{"topic1", "topic2"} + updatedTopics, err := cache.AddTopicForPeer(peerID, topics[0]) + require.NoError(t, err, "adding topic 1 should not produce an error") + require.Equal(t, topics[:1], updatedTopics, "updated topics should match the added topic") + updatedTopics, err = cache.AddTopicForPeer(peerID, topics[1]) + require.NoError(t, err, "adding topic 2 should not produce an error") + require.Equal(t, topics, updatedTopics, "updated topics should match the added topic") + + retrievedTopics, found := cache.GetSubscribedTopics(peerID) + require.True(t, found, "peer should be found") + require.ElementsMatch(t, topics, retrievedTopics, "retrieved topics should match the added topics") + + // case when the peer does not have a subscription + nonExistentPeerID := unittest.PeerIdFixture(t) + retrievedTopics, found = cache.GetSubscribedTopics(nonExistentPeerID) + require.False(t, found, "non-existent peer should not be found") + require.Nil(t, retrievedTopics, "retrieved topics for non-existent peer should be nil") +} + +// TestSubscriptionCache_MoveToNextUpdateCycle tests the increment of update cycles in SubscriptionRecordCache. +// The first increment should set the cycle to 1, and the second increment should set the cycle to 2. +func TestSubscriptionCache_MoveToNextUpdateCycle(t *testing.T) { + sizeLimit := uint32(100) + cache := internal.NewSubscriptionRecordCache( + sizeLimit, + unittest.Logger(), + metrics.NewSubscriptionRecordCacheMetricsFactory(metrics.NewNoopHeroCacheMetricsFactory())) + + // initial cycle should be 0, so first increment sets it to 1 + firstCycle := cache.MoveToNextUpdateCycle() + require.Equal(t, uint64(1), firstCycle, "first cycle should be 1 after first increment") + + // increment cycle again and verify it's now 2 + secondCycle := cache.MoveToNextUpdateCycle() + require.Equal(t, uint64(2), secondCycle, "second cycle should be 2 after second increment") +} + +// TestSubscriptionCache_TestAddTopicForPeer tests adding a topic for a peer. +func TestSubscriptionCache_TestAddTopicForPeer(t *testing.T) { + sizeLimit := uint32(100) + cache := internal.NewSubscriptionRecordCache( + sizeLimit, + unittest.Logger(), + metrics.NewSubscriptionRecordCacheMetricsFactory(metrics.NewNoopHeroCacheMetricsFactory())) + + // case when adding a topic to an existing peer + existingPeerID := unittest.PeerIdFixture(t) + firstTopic := "topic1" + secondTopic := "topic2" + + // add first topic to the existing peer + _, err := cache.AddTopicForPeer(existingPeerID, firstTopic) + require.NoError(t, err, "adding first topic to existing peer should not produce an error") + + // add second topic to the same peer + updatedTopics, err := cache.AddTopicForPeer(existingPeerID, secondTopic) + require.NoError(t, err, "adding second topic to existing peer should not produce an error") + require.ElementsMatch(t, []string{firstTopic, secondTopic}, updatedTopics, "updated topics should match the added topics") + + // case when adding a topic to a new peer + newPeerID := unittest.PeerIdFixture(t) + newTopic := "newTopic" + + // add a topic to the new peer + updatedTopics, err = cache.AddTopicForPeer(newPeerID, newTopic) + require.NoError(t, err, "adding topic to new peer should not produce an error") + require.Equal(t, []string{newTopic}, updatedTopics, "updated topics for new peer should match the added topic") + + // sanity check that the topics for existing peer are still the same + retrievedTopics, found := cache.GetSubscribedTopics(existingPeerID) + require.True(t, found, "existing peer should be found") + require.ElementsMatch(t, []string{firstTopic, secondTopic}, retrievedTopics, "retrieved topics should match the added topics") +} + +// TestSubscriptionCache_DuplicateTopics tests adding a duplicate topic for a peer. The duplicate topic should not be added. +func TestSubscriptionCache_DuplicateTopics(t *testing.T) { + sizeLimit := uint32(100) + cache := internal.NewSubscriptionRecordCache( + sizeLimit, + unittest.Logger(), + metrics.NewSubscriptionRecordCacheMetricsFactory(metrics.NewNoopHeroCacheMetricsFactory())) + + peerID := unittest.PeerIdFixture(t) + topic := "topic1" + + // add first topic to the existing peer + _, err := cache.AddTopicForPeer(peerID, topic) + require.NoError(t, err, "adding first topic to existing peer should not produce an error") + + // add second topic to the same peer + updatedTopics, err := cache.AddTopicForPeer(peerID, topic) + require.NoError(t, err, "adding duplicate topic to existing peer should not produce an error") + require.Equal(t, []string{topic}, updatedTopics, "duplicate topic should not be added") +} + +// TestSubscriptionCache_MoveUpdateCycle tests that (1) within one update cycle, "AddTopicForPeer" calls append the topics to the list of +// subscribed topics for peer, (2) as long as there is no "AddTopicForPeer" call, moving to the next update cycle +// does not change the subscribed topics for a peer, and (3) calling "AddTopicForPeer" after moving to the next update +// cycle clears the subscribed topics for a peer and adds the new topic. +func TestSubscriptionCache_MoveUpdateCycle(t *testing.T) { + sizeLimit := uint32(100) + cache := internal.NewSubscriptionRecordCache( + sizeLimit, + unittest.Logger(), + metrics.NewSubscriptionRecordCacheMetricsFactory(metrics.NewNoopHeroCacheMetricsFactory())) + + peerID := unittest.PeerIdFixture(t) + topic1 := "topic1" + topic2 := "topic2" + topic3 := "topic3" + topic4 := "topic4" + + // adds topic1, topic2, and topic3 to the peer + topics, err := cache.AddTopicForPeer(peerID, topic1) + require.NoError(t, err, "adding first topic to existing peer should not produce an error") + require.Equal(t, []string{topic1}, topics, "updated topics should match the added topic") + topics, err = cache.AddTopicForPeer(peerID, topic2) + require.NoError(t, err, "adding second topic to existing peer should not produce an error") + require.Equal(t, []string{topic1, topic2}, topics, "updated topics should match the added topics") + topics, err = cache.AddTopicForPeer(peerID, topic3) + require.NoError(t, err, "adding third topic to existing peer should not produce an error") + require.Equal(t, []string{topic1, topic2, topic3}, topics, "updated topics should match the added topics") + + // move to next update cycle + cache.MoveToNextUpdateCycle() + topics, found := cache.GetSubscribedTopics(peerID) + require.True(t, found, "existing peer should be found") + require.ElementsMatch(t, []string{topic1, topic2, topic3}, topics, "retrieved topics should match the added topics") + + // add topic4 to the peer; since we moved to the next update cycle, the topics for the peer should be cleared + // and topic4 should be the only topic for the peer + topics, err = cache.AddTopicForPeer(peerID, topic4) + require.NoError(t, err, "adding fourth topic to existing peer should not produce an error") + require.Equal(t, []string{topic4}, topics, "updated topics should match the added topic") + + // move to next update cycle + cache.MoveToNextUpdateCycle() + + // since we did not add any topic to the peer, the topics for the peer should be the same as before + topics, found = cache.GetSubscribedTopics(peerID) + require.True(t, found, "existing peer should be found") + require.ElementsMatch(t, []string{topic4}, topics, "retrieved topics should match the added topics") +} + +// TestSubscriptionCache_MoveUpdateCycleWithDifferentPeers tests that moving to the next update cycle does not affect the subscribed +// topics for other peers. +func TestSubscriptionCache_MoveUpdateCycleWithDifferentPeers(t *testing.T) { + sizeLimit := uint32(100) + cache := internal.NewSubscriptionRecordCache( + sizeLimit, + unittest.Logger(), + metrics.NewSubscriptionRecordCacheMetricsFactory(metrics.NewNoopHeroCacheMetricsFactory())) + + peer1 := unittest.PeerIdFixture(t) + peer2 := unittest.PeerIdFixture(t) + topic1 := "topic1" + topic2 := "topic2" + + // add topic1 to peer1 + topics, err := cache.AddTopicForPeer(peer1, topic1) + require.NoError(t, err, "adding first topic to peer1 should not produce an error") + require.Equal(t, []string{topic1}, topics, "updated topics should match the added topic") + + // add topic2 to peer2 + topics, err = cache.AddTopicForPeer(peer2, topic2) + require.NoError(t, err, "adding first topic to peer2 should not produce an error") + require.Equal(t, []string{topic2}, topics, "updated topics should match the added topic") + + // move to next update cycle + cache.MoveToNextUpdateCycle() + + // since we did not add any topic to the peers, the topics for the peers should be the same as before + topics, found := cache.GetSubscribedTopics(peer1) + require.True(t, found, "peer1 should be found") + require.ElementsMatch(t, []string{topic1}, topics, "retrieved topics should match the added topics") + + topics, found = cache.GetSubscribedTopics(peer2) + require.True(t, found, "peer2 should be found") + require.ElementsMatch(t, []string{topic2}, topics, "retrieved topics should match the added topics") + + // now add topic2 to peer1; it should overwrite the previous topics for peer1, but not affect the topics for peer2 + topics, err = cache.AddTopicForPeer(peer1, topic2) + require.NoError(t, err, "adding second topic to peer1 should not produce an error") + require.Equal(t, []string{topic2}, topics, "updated topics should match the added topic") + + topics, found = cache.GetSubscribedTopics(peer2) + require.True(t, found, "peer2 should be found") + require.ElementsMatch(t, []string{topic2}, topics, "retrieved topics should match the added topics") +} + +// TestSubscriptionCache_ConcurrentUpdate tests subscription cache update in a concurrent environment. +func TestSubscriptionCache_ConcurrentUpdate(t *testing.T) { + unittest.SkipUnless(t, unittest.TEST_TODO, "this test requires atomic AdjustOrGet method to be implemented for backend") + sizeLimit := uint32(100) + cache := internal.NewSubscriptionRecordCache( + sizeLimit, + unittest.Logger(), + metrics.NewSubscriptionRecordCacheMetricsFactory(metrics.NewNoopHeroCacheMetricsFactory())) + + peerIds := unittest.PeerIdFixtures(t, 100) + topics := []string{"topic1", "topic2", "topic3"} + + allUpdatesDone := sync.WaitGroup{} + for _, pid := range peerIds { + for _, topic := range topics { + pid := pid + topic := topic + allUpdatesDone.Add(1) + go func() { + defer allUpdatesDone.Done() + _, err := cache.AddTopicForPeer(pid, topic) + require.NoError(t, err, "adding topic to peer should not produce an error") + }() + } + } + + unittest.RequireReturnsBefore(t, allUpdatesDone.Wait, 1*time.Second, "all updates did not finish in time") + + // verify that all peers have all topics; concurrently + allTopicsVerified := sync.WaitGroup{} + for _, pid := range peerIds { + pid := pid + allTopicsVerified.Add(1) + go func() { + defer allTopicsVerified.Done() + topics, found := cache.GetSubscribedTopics(pid) + require.True(t, found, "peer should be found") + require.ElementsMatch(t, topics, topics, "retrieved topics should match the added topics") + }() + } + + unittest.RequireReturnsBefore(t, allTopicsVerified.Wait, 1*time.Second, "all topics were not verified in time") +} + +// TestSubscriptionCache_TestSizeLimit tests that the cache evicts the least recently used peer when the cache size limit is reached. +func TestSubscriptionCache_TestSizeLimit(t *testing.T) { + sizeLimit := uint32(100) + cache := internal.NewSubscriptionRecordCache( + sizeLimit, + unittest.Logger(), + metrics.NewSubscriptionRecordCacheMetricsFactory(metrics.NewNoopHeroCacheMetricsFactory())) + + peerIds := unittest.PeerIdFixtures(t, 100) + topics := []string{"topic1", "topic2", "topic3"} + + // add topics to peers + for _, pid := range peerIds { + for _, topic := range topics { + _, err := cache.AddTopicForPeer(pid, topic) + require.NoError(t, err, "adding topic to peer should not produce an error") + } + } + + // verify that all peers have all topics + for _, pid := range peerIds { + topics, found := cache.GetSubscribedTopics(pid) + require.True(t, found, "peer should be found") + require.ElementsMatch(t, topics, topics, "retrieved topics should match the added topics") + } + + // add one more peer and verify that the first peer is evicted + newPeerID := unittest.PeerIdFixture(t) + _, err := cache.AddTopicForPeer(newPeerID, topics[0]) + require.NoError(t, err, "adding topic to peer should not produce an error") + + _, found := cache.GetSubscribedTopics(peerIds[0]) + require.False(t, found, "peer should not be found") + + // verify that all other peers still have all topics + for _, pid := range peerIds[1:] { + topics, found := cache.GetSubscribedTopics(pid) + require.True(t, found, "peer should be found") + require.ElementsMatch(t, topics, topics, "retrieved topics should match the added topics") + } + + // verify that the new peer has the topic + topics, found = cache.GetSubscribedTopics(newPeerID) + require.True(t, found, "peer should be found") + require.ElementsMatch(t, topics, topics, "retrieved topics should match the added topics") +} diff --git a/network/p2p/scoring/internal/subscriptionRecord.go b/network/p2p/scoring/internal/subscriptionRecord.go new file mode 100644 index 00000000000..2ac6946c25b --- /dev/null +++ b/network/p2p/scoring/internal/subscriptionRecord.go @@ -0,0 +1,38 @@ +package internal + +import ( + "github.com/libp2p/go-libp2p/core/peer" + + "github.com/onflow/flow-go/model/flow" +) + +// SubscriptionRecordEntity is an entity that represents a the list of topics a peer is subscribed to. +// It is internally used by the SubscriptionRecordCache to store the subscription records in the cache. +type SubscriptionRecordEntity struct { + // entityId is the key of the entity in the cache. It is the hash of the peer id. + // It is intentionally encoded as part of the struct to avoid recomputing it. + entityId flow.Identifier + + // PeerID is the peer id of the peer that is the owner of the subscription. + PeerID peer.ID + + // Topics is the list of topics the peer is subscribed to. + Topics []string + + // LastUpdatedCycle is the last cycle counter value that this record was updated. + // This is used to clean up old records' topics upon update. + LastUpdatedCycle uint64 +} + +var _ flow.Entity = (*SubscriptionRecordEntity)(nil) + +// ID returns the entity id of the subscription record, which is the hash of the peer id. +func (s SubscriptionRecordEntity) ID() flow.Identifier { + return s.entityId +} + +// Checksum returns the entity id of the subscription record, which is the hash of the peer id. +// It is of no use in the cache, but it is implemented to satisfy the flow.Entity interface. +func (s SubscriptionRecordEntity) Checksum() flow.Identifier { + return s.ID() +} diff --git a/network/p2p/scoring/registry.go b/network/p2p/scoring/registry.go index f30ef63646a..0b4ac5c707e 100644 --- a/network/p2p/scoring/registry.go +++ b/network/p2p/scoring/registry.go @@ -2,6 +2,7 @@ package scoring import ( "fmt" + "math" "time" "github.com/libp2p/go-libp2p/core/peer" @@ -9,6 +10,8 @@ import ( "github.com/onflow/flow-go/model/flow" "github.com/onflow/flow-go/module" + "github.com/onflow/flow-go/module/component" + "github.com/onflow/flow-go/module/irrecoverable" "github.com/onflow/flow-go/network/p2p" netcache "github.com/onflow/flow-go/network/p2p/cache" p2pmsg "github.com/onflow/flow-go/network/p2p/message" @@ -17,11 +20,10 @@ import ( ) const ( - // skipDecayThreshold is the threshold for which when the negative penalty is above this value, the decay function will not be called. - // instead, the penalty will be set to 0. This is to prevent the penalty from keeping a small negative value for a long time. - skipDecayThreshold = -0.1 - // defaultDecay is the default decay value for the application specific penalty. - // this value is used when no custom decay value is provided, and decays the penalty by 1% every second. + // MinimumSpamPenaltyDecayFactor is minimum speed at which the spam penalty value of a peer is decayed. + // Spam record will be initialized with a decay value between .5 , .7 and this value will then be decayed up to .99 on consecutive misbehavior's, + // The maximum decay value decays the penalty by 1% every second. The decay is applied geometrically, i.e., `newPenalty = oldPenalty * decay`, hence, the higher decay value + // indicates a lower decay speed, i.e., it takes more heartbeat intervals to decay a penalty back to zero when the decay value is high. // assume: // penalty = -100 (the maximum application specific penalty is -100) // skipDecayThreshold = -0.1 @@ -39,7 +41,14 @@ const ( // n > log( 0.001 ) / log( 0.99 ) // n > -3 / log( 0.99 ) // n > 458.22 - defaultDecay = 0.99 // default decay value for the application specific penalty. + MinimumSpamPenaltyDecayFactor = 0.99 + // MaximumSpamPenaltyDecayFactor represents the maximum rate at which the spam penalty value of a peer decays. Decay speeds increase + // during sustained malicious activity, leading to a slower recovery of the app-specific score for the penalized node. Conversely, + // decay speeds decrease, allowing faster recoveries, when nodes exhibit fleeting misbehavior. + MaximumSpamPenaltyDecayFactor = 0.8 + // skipDecayThreshold is the threshold for which when the negative penalty is above this value, the decay function will not be called. + // instead, the penalty will be set to 0. This is to prevent the penalty from keeping a small negative value for a long time. + skipDecayThreshold = -0.1 // graftMisbehaviourPenalty is the penalty applied to the application specific penalty when a peer conducts a graft misbehaviour. graftMisbehaviourPenalty = -10 // pruneMisbehaviourPenalty is the penalty applied to the application specific penalty when a peer conducts a prune misbehaviour. @@ -48,27 +57,36 @@ const ( iHaveMisbehaviourPenalty = -10 // iWantMisbehaviourPenalty is the penalty applied to the application specific penalty when a peer conducts a iWant misbehaviour. iWantMisbehaviourPenalty = -10 + // clusterPrefixedPenaltyReductionFactor factor used to reduce the penalty for control message misbehaviours on cluster prefixed topics. This allows a more lenient punishment for nodes + // that fall behind and may need to request old data. + clusterPrefixedPenaltyReductionFactor = .5 // rpcPublishMessageMisbehaviourPenalty is the penalty applied to the application specific penalty when a peer conducts a RpcPublishMessageMisbehaviourPenalty misbehaviour. rpcPublishMessageMisbehaviourPenalty = -10 ) +type SpamRecordInitFunc func() p2p.GossipSubSpamRecord + // GossipSubCtrlMsgPenaltyValue is the penalty value for each control message type. type GossipSubCtrlMsgPenaltyValue struct { - Graft float64 // penalty value for an individual graft message misbehaviour. - Prune float64 // penalty value for an individual prune message misbehaviour. - IHave float64 // penalty value for an individual iHave message misbehaviour. - IWant float64 // penalty value for an individual iWant message misbehaviour. - RpcPublishMessage float64 // penalty value for an individual RpcPublishMessage message misbehaviour. + Graft float64 // penalty value for an individual graft message misbehaviour. + Prune float64 // penalty value for an individual prune message misbehaviour. + IHave float64 // penalty value for an individual iHave message misbehaviour. + IWant float64 // penalty value for an individual iWant message misbehaviour. + // ClusterPrefixedPenaltyReductionFactor factor used to reduce the penalty for control message misbehaviours on cluster prefixed topics. This is allows a more lenient punishment for nodes + // that fall behind and may need to request old data. + ClusterPrefixedPenaltyReductionFactor float64 + RpcPublishMessage float64 // penalty value for an individual RpcPublishMessage message misbehaviour. } // DefaultGossipSubCtrlMsgPenaltyValue returns the default penalty value for each control message type. func DefaultGossipSubCtrlMsgPenaltyValue() GossipSubCtrlMsgPenaltyValue { return GossipSubCtrlMsgPenaltyValue{ - Graft: graftMisbehaviourPenalty, - Prune: pruneMisbehaviourPenalty, - IHave: iHaveMisbehaviourPenalty, - IWant: iWantMisbehaviourPenalty, - RpcPublishMessage: rpcPublishMessageMisbehaviourPenalty, + Graft: graftMisbehaviourPenalty, + Prune: pruneMisbehaviourPenalty, + IHave: iHaveMisbehaviourPenalty, + IWant: iWantMisbehaviourPenalty, + ClusterPrefixedPenaltyReductionFactor: clusterPrefixedPenaltyReductionFactor, + RpcPublishMessage: rpcPublishMessageMisbehaviourPenalty, } } @@ -80,13 +98,14 @@ func DefaultGossipSubCtrlMsgPenaltyValue() GossipSubCtrlMsgPenaltyValue { // Similar to the GossipSub score, the application specific score is meant to be private to the local peer, and is not // shared with other peers in the network. type GossipSubAppSpecificScoreRegistry struct { + component.Component logger zerolog.Logger idProvider module.IdentityProvider // spamScoreCache currently only holds the control message misbehaviour penalty (spam related penalty). spamScoreCache p2p.GossipSubSpamRecordCache penalty GossipSubCtrlMsgPenaltyValue // initial application specific penalty record, used to initialize the penalty cache entry. - init func() p2p.GossipSubSpamRecord + init SpamRecordInitFunc validator p2p.SubscriptionValidator } @@ -108,7 +127,7 @@ type GossipSubAppSpecificScoreRegistryConfig struct { // Init is a factory function that returns a new GossipSubSpamRecord. It is used to initialize the spam record of // a peer when the peer is first observed by the local peer. - Init func() p2p.GossipSubSpamRecord + Init SpamRecordInitFunc // CacheFactory is a factory function that returns a new GossipSubSpamRecordCache. It is used to initialize the spamScoreCache. // The cache is used to store the application specific penalty of peers. @@ -118,7 +137,7 @@ type GossipSubAppSpecificScoreRegistryConfig struct { // NewGossipSubAppSpecificScoreRegistry returns a new GossipSubAppSpecificScoreRegistry. // Args: // -// config: the configuration for the registry. +// config: the config for the registry. // // Returns: // @@ -133,6 +152,26 @@ func NewGossipSubAppSpecificScoreRegistry(config *GossipSubAppSpecificScoreRegis idProvider: config.IdProvider, } + builder := component.NewComponentManagerBuilder() + builder.AddWorker(func(ctx irrecoverable.SignalerContext, ready component.ReadyFunc) { + reg.logger.Info().Msg("starting subscription validator") + reg.validator.Start(ctx) + select { + case <-ctx.Done(): + reg.logger.Warn().Msg("aborting subscription validator startup, context cancelled") + case <-reg.validator.Ready(): + reg.logger.Info().Msg("subscription validator started") + ready() + reg.logger.Info().Msg("subscription validator is ready") + } + + <-ctx.Done() + reg.logger.Info().Msg("stopping subscription validator") + <-reg.validator.Done() + reg.logger.Info().Msg("subscription validator stopped") + }) + reg.Component = builder.Build() + return reg } @@ -143,7 +182,7 @@ func (r *GossipSubAppSpecificScoreRegistry) AppSpecificScoreFunc() func(peer.ID) return func(pid peer.ID) float64 { appSpecificScore := float64(0) - lg := r.logger.With().Str("peer_id", p2plogging.PeerId(pid)).Logger() + lg := r.logger.With().Str("remote_peer_id", p2plogging.PeerId(pid)).Logger() // (1) spam penalty: the penalty is applied to the application specific penalty when a peer conducts a spamming misbehaviour. spamRecord, err, spamRecordExists := r.spamScoreCache.Get(pid) if err != nil { @@ -185,7 +224,7 @@ func (r *GossipSubAppSpecificScoreRegistry) AppSpecificScoreFunc() func(peer.ID) appSpecificScore += stakingScore } - lg.Trace(). + lg.Debug(). Float64("total_app_specific_score", appSpecificScore). Msg("application specific penalty computed") @@ -252,28 +291,38 @@ func (r *GossipSubAppSpecificScoreRegistry) OnInvalidControlMessageNotification( // try initializing the application specific penalty for the peer if it is not yet initialized. // this is done to avoid the case where the peer is not yet cached and the application specific penalty is not yet initialized. - // initialization is successful only if the peer is not yet cached. - initialized := r.spamScoreCache.Add(notification.PeerID, r.init()) + // initialization is successful only if the peer is not yet cached. If any error is occurred during initialization we log a fatal error + initRecord := r.init() + initialized := r.spamScoreCache.Add(notification.PeerID, initRecord) if initialized { lg.Trace().Str("peer_id", p2plogging.PeerId(notification.PeerID)).Msg("application specific penalty initialized for peer") } record, err := r.spamScoreCache.Update(notification.PeerID, func(record p2p.GossipSubSpamRecord) p2p.GossipSubSpamRecord { + penalty := 0.0 switch notification.MsgType { case p2pmsg.CtrlMsgGraft: - record.Penalty += r.penalty.Graft + penalty += r.penalty.Graft case p2pmsg.CtrlMsgPrune: - record.Penalty += r.penalty.Prune + penalty += r.penalty.Prune case p2pmsg.CtrlMsgIHave: - record.Penalty += r.penalty.IHave + penalty += r.penalty.IHave case p2pmsg.CtrlMsgIWant: - record.Penalty += r.penalty.IWant + penalty += r.penalty.IWant case p2pmsg.RpcPublishMessage: - record.Penalty += r.penalty.RpcPublishMessage + penalty += r.penalty.RpcPublishMessage default: // the error is considered fatal as it means that we have an unsupported misbehaviour type, we should crash the node to prevent routing attack vulnerability. lg.Fatal().Str("misbehavior_type", notification.MsgType.String()).Msg("unknown misbehaviour type") } + + // reduce penalty for cluster prefixed topics allowing nodes that are potentially behind to catch up + if notification.TopicType == p2p.CtrlMsgTopicTypeClusterPrefixed { + penalty *= r.penalty.ClusterPrefixedPenaltyReductionFactor + } + + record.Penalty += penalty + return record }) if err != nil { @@ -289,7 +338,7 @@ func (r *GossipSubAppSpecificScoreRegistry) OnInvalidControlMessageNotification( // DefaultDecayFunction is the default decay function that is used to decay the application specific penalty of a peer. // It is used if no decay function is provided in the configuration. // It decays the application specific penalty of a peer if it is negative. -func DefaultDecayFunction() netcache.PreprocessorFunc { +func DefaultDecayFunction(slowerDecayPenaltyThreshold, decayRateDecrement float64, decayAdjustInterval time.Duration) netcache.PreprocessorFunc { return func(record p2p.GossipSubSpamRecord, lastUpdated time.Time) (p2p.GossipSubSpamRecord, error) { if record.Penalty >= 0 { // no need to decay the penalty if it is positive, the reason is currently the app specific penalty @@ -301,6 +350,8 @@ func DefaultDecayFunction() netcache.PreprocessorFunc { if record.Penalty > skipDecayThreshold { // penalty is negative but greater than the threshold, we set it to 0. record.Penalty = 0 + record.Decay = MaximumSpamPenaltyDecayFactor + record.LastDecayAdjustment = time.Time{} return record, nil } @@ -310,6 +361,14 @@ func DefaultDecayFunction() netcache.PreprocessorFunc { return record, fmt.Errorf("could not decay application specific penalty: %w", err) } record.Penalty = penalty + + if record.Penalty <= slowerDecayPenaltyThreshold { + if time.Since(record.LastDecayAdjustment) > decayAdjustInterval || record.LastDecayAdjustment.IsZero() { + // reduces the decay speed flooring at MinimumSpamRecordDecaySpeed + record.Decay = math.Min(record.Decay+decayRateDecrement, MinimumSpamPenaltyDecayFactor) + record.LastDecayAdjustment = time.Now() + } + } return record, nil } } @@ -319,7 +378,8 @@ func DefaultDecayFunction() netcache.PreprocessorFunc { // - a gossipsub spam record with the default decay value and 0 penalty. func InitAppScoreRecordState() p2p.GossipSubSpamRecord { return p2p.GossipSubSpamRecord{ - Decay: defaultDecay, - Penalty: 0, + Decay: MaximumSpamPenaltyDecayFactor, + Penalty: 0, + LastDecayAdjustment: time.Now(), } } diff --git a/network/p2p/scoring/registry_test.go b/network/p2p/scoring/registry_test.go index e89196601fc..92d93942afa 100644 --- a/network/p2p/scoring/registry_test.go +++ b/network/p2p/scoring/registry_test.go @@ -3,6 +3,7 @@ package scoring_test import ( "fmt" "math" + "sync" "testing" "time" @@ -11,12 +12,14 @@ import ( testifymock "github.com/stretchr/testify/mock" "github.com/stretchr/testify/require" + "github.com/onflow/flow-go/config" "github.com/onflow/flow-go/module/metrics" "github.com/onflow/flow-go/module/mock" "github.com/onflow/flow-go/network/p2p" netcache "github.com/onflow/flow-go/network/p2p/cache" p2pmsg "github.com/onflow/flow-go/network/p2p/message" mockp2p "github.com/onflow/flow-go/network/p2p/mock" + "github.com/onflow/flow-go/network/p2p/p2pconf" "github.com/onflow/flow-go/network/p2p/scoring" "github.com/onflow/flow-go/utils/unittest" ) @@ -25,7 +28,7 @@ import ( // app specific reward. This is the default reward for a staked peer that has valid subscriptions and has not been // penalized. func TestNoPenaltyRecord(t *testing.T) { - peerID := peer.ID("peer-1") + peerID := unittest.PeerIdFixture(t) reg, spamRecords := newGossipSubAppSpecificScoreRegistry( t, withStakedIdentity(peerID), @@ -66,7 +69,7 @@ func TestPeerWithSpamRecord(t *testing.T) { } func testPeerWithSpamRecord(t *testing.T, messageType p2pmsg.ControlMessageType, expectedPenalty float64) { - peerID := peer.ID("peer-1") + peerID := unittest.PeerIdFixture(t) reg, spamRecords := newGossipSubAppSpecificScoreRegistry( t, withStakedIdentity(peerID), @@ -120,7 +123,7 @@ func TestSpamRecord_With_UnknownIdentity(t *testing.T) { // testSpamRecordWithUnknownIdentity tests the app specific penalty computation of the node when there is a spam record for the peer id and // the peer id has an unknown identity. func testSpamRecordWithUnknownIdentity(t *testing.T, messageType p2pmsg.ControlMessageType, expectedPenalty float64) { - peerID := peer.ID("peer-1") + peerID := unittest.PeerIdFixture(t) reg, spamRecords := newGossipSubAppSpecificScoreRegistry( t, withUnknownIdentity(peerID), @@ -145,7 +148,6 @@ func testSpamRecordWithUnknownIdentity(t *testing.T, messageType p2pmsg.ControlM assert.NoError(t, err) assert.Less(t, math.Abs(expectedPenalty-record.Penalty), 10e-3) // penalty should be updated to -10, we account for decay. assert.Equal(t, scoring.InitAppScoreRecordState().Decay, record.Decay) // decay should be initialized to the initial state. - // the peer has spam record as well as an unknown identity. Hence, the app specific score should be the spam penalty // and the staking penalty. score = reg.AppSpecificScoreFunc()(peerID) @@ -173,7 +175,7 @@ func TestSpamRecord_With_SubscriptionPenalty(t *testing.T) { // testSpamRecordWithUnknownIdentity tests the app specific penalty computation of the node when there is a spam record for the peer id and // the peer id has an invalid subscription as well. func testSpamRecordWithSubscriptionPenalty(t *testing.T, messageType p2pmsg.ControlMessageType, expectedPenalty float64) { - peerID := peer.ID("peer-1") + peerID := unittest.PeerIdFixture(t) reg, spamRecords := newGossipSubAppSpecificScoreRegistry( t, withStakedIdentity(peerID), @@ -198,7 +200,6 @@ func testSpamRecordWithSubscriptionPenalty(t *testing.T, messageType p2pmsg.Cont assert.NoError(t, err) assert.Less(t, math.Abs(expectedPenalty-record.Penalty), 10e-3) assert.Equal(t, scoring.InitAppScoreRecordState().Decay, record.Decay) // decay should be initialized to the initial state. - // the peer has spam record as well as an unknown identity. Hence, the app specific score should be the spam penalty // and the staking penalty. score = reg.AppSpecificScoreFunc()(peerID) @@ -207,7 +208,7 @@ func testSpamRecordWithSubscriptionPenalty(t *testing.T, messageType p2pmsg.Cont // TestSpamPenaltyDecaysInCache tests that the spam penalty records decay over time in the cache. func TestSpamPenaltyDecaysInCache(t *testing.T) { - peerID := peer.ID("peer-1") + peerID := unittest.PeerIdFixture(t) reg, _ := newGossipSubAppSpecificScoreRegistry(t, withStakedIdentity(peerID), withValidSubscriptions(peerID)) @@ -260,7 +261,8 @@ func TestSpamPenaltyDecaysInCache(t *testing.T) { penaltyValueFixtures().RpcPublishMessage // the lower bound is the sum of the penalties with decay assuming the decay is applied 4 times to the sum of the penalties. // in reality, the decay is applied 4 times to the first penalty, then 3 times to the second penalty, and so on. - scoreLowerBound := scoreUpperBound * math.Pow(scoring.InitAppScoreRecordState().Decay, 4) + r := scoring.InitAppScoreRecordState() + scoreLowerBound := scoreUpperBound * math.Pow(r.Decay, 4) // with decay, the penalty should be between the upper and lower bounds. assert.Greater(t, score, scoreUpperBound) @@ -270,7 +272,7 @@ func TestSpamPenaltyDecaysInCache(t *testing.T) { // TestSpamPenaltyDecayToZero tests that the spam penalty decays to zero over time, and when the spam penalty of // a peer is set back to zero, its app specific penalty is also reset to the initial state. func TestSpamPenaltyDecayToZero(t *testing.T) { - peerID := peer.ID("peer-1") + peerID := unittest.PeerIdFixture(t) reg, spamRecords := newGossipSubAppSpecificScoreRegistry( t, withStakedIdentity(peerID), @@ -316,7 +318,7 @@ func TestSpamPenaltyDecayToZero(t *testing.T) { // TestPersistingUnknownIdentityPenalty tests that even though the spam penalty is decayed to zero, the unknown identity penalty // is persisted. This is because the unknown identity penalty is not decayed. func TestPersistingUnknownIdentityPenalty(t *testing.T) { - peerID := peer.ID("peer-1") + peerID := unittest.PeerIdFixture(t) reg, spamRecords := newGossipSubAppSpecificScoreRegistry( t, withUnknownIdentity(peerID), // the peer id has an unknown identity. @@ -373,7 +375,7 @@ func TestPersistingUnknownIdentityPenalty(t *testing.T) { // TestPersistingInvalidSubscriptionPenalty tests that even though the spam penalty is decayed to zero, the invalid subscription penalty // is persisted. This is because the invalid subscription penalty is not decayed. func TestPersistingInvalidSubscriptionPenalty(t *testing.T) { - peerID := peer.ID("peer-1") + peerID := unittest.PeerIdFixture(t) reg, spamRecords := newGossipSubAppSpecificScoreRegistry( t, withStakedIdentity(peerID), @@ -422,6 +424,159 @@ func TestPersistingInvalidSubscriptionPenalty(t *testing.T) { assert.Equal(t, 0.0, record.Penalty) // penalty should be zero. } +// TestSpamRecordDecayAdjustment ensures that spam record decay is increased each time a peers score reaches the scoring.IncreaseDecayThreshold eventually +// sustained misbehavior will result in the spam record decay reaching the minimum decay speed .99, and the decay speed is reset to the max decay speed .8. +func TestSpamRecordDecayAdjustment(t *testing.T) { + flowConfig, err := config.DefaultConfig() + require.NoError(t, err) + scoringRegistryConfig := flowConfig.NetworkConfig.GossipSubConfig.GossipSubScoringRegistryConfig + // increase configured DecayRateReductionFactor so that the decay time is increased faster + scoringRegistryConfig.DecayRateReductionFactor = .1 + scoringRegistryConfig.PenaltyDecayEvaluationPeriod = time.Second + + peer1 := unittest.PeerIdFixture(t) + peer2 := unittest.PeerIdFixture(t) + reg, spamRecords := newScoringRegistry( + t, + scoringRegistryConfig, + withStakedIdentity(peer1), + withValidSubscriptions(peer1), + withStakedIdentity(peer2), + withValidSubscriptions(peer2)) + + // initially, the spamRecords should not have the peer ids. + assert.False(t, spamRecords.Has(peer1)) + assert.False(t, spamRecords.Has(peer2)) + // since the both peers do not have a spam record, their app specific score should be the max app specific reward, which + // is the default reward for a staked peer that has valid subscriptions. + assert.Equal(t, scoring.MaxAppSpecificReward, reg.AppSpecificScoreFunc()(peer1)) + assert.Equal(t, scoring.MaxAppSpecificReward, reg.AppSpecificScoreFunc()(peer2)) + + // simulate sustained malicious activity from peer1, eventually the decay speed + // for a spam record should be reduced to the MinimumSpamPenaltyDecayFactor + prevDecay := scoring.MaximumSpamPenaltyDecayFactor + tolerance := 0.1 + require.Eventually(t, func() bool { + reg.OnInvalidControlMessageNotification(&p2p.InvCtrlMsgNotif{ + PeerID: peer1, + MsgType: p2pmsg.CtrlMsgPrune, + }) + record, err, ok := spamRecords.Get(peer1) + require.NoError(t, err) + require.True(t, ok) + assert.Less(t, math.Abs(prevDecay-record.Decay), tolerance) + prevDecay = record.Decay + return record.Decay == scoring.MinimumSpamPenaltyDecayFactor + }, 5*time.Second, 500*time.Millisecond) + + // initialize a spam record for peer2 + reg.OnInvalidControlMessageNotification(&p2p.InvCtrlMsgNotif{ + PeerID: peer2, + MsgType: p2pmsg.CtrlMsgPrune, + }) + // reduce penalty and increase Decay to scoring.MinimumSpamPenaltyDecayFactor + record, err := spamRecords.Update(peer2, func(record p2p.GossipSubSpamRecord) p2p.GossipSubSpamRecord { + record.Penalty = -.1 + record.Decay = scoring.MinimumSpamPenaltyDecayFactor + return record + }) + require.NoError(t, err) + require.True(t, record.Decay == scoring.MinimumSpamPenaltyDecayFactor) + require.True(t, record.Penalty == -.1) + // simulate sustained good behavior from peer 2, each time the spam record is read from the cache + // using Get method the record penalty will be decayed until it is eventually reset to + // 0 at this point the decay speed for the record should be reset to MaximumSpamPenaltyDecayFactor + // eventually after penalty reaches the skipDecaThreshold the record decay will be reset to scoring.MaximumSpamPenaltyDecayFactor + require.Eventually(t, func() bool { + record, err, ok := spamRecords.Get(peer2) + require.NoError(t, err) + require.True(t, ok) + return record.Decay == scoring.MaximumSpamPenaltyDecayFactor && + record.Penalty == 0 && + record.LastDecayAdjustment.IsZero() + }, 5*time.Second, time.Second) + + // ensure decay can be reduced again after recovery for peerID 2 + require.Eventually(t, func() bool { + reg.OnInvalidControlMessageNotification(&p2p.InvCtrlMsgNotif{ + PeerID: peer2, + MsgType: p2pmsg.CtrlMsgPrune, + }) + record, err, ok := spamRecords.Get(peer1) + require.NoError(t, err) + require.True(t, ok) + return record.Decay == scoring.MinimumSpamPenaltyDecayFactor + }, 5*time.Second, 500*time.Millisecond) +} + +// TestPeerSpamPenaltyClusterPrefixed evaluates the application-specific penalty calculation for a node when a spam record is present +// for cluster-prefixed topics. In the case of an invalid control message notification marked as cluster-prefixed, +// the application-specific penalty should be reduced by the default reduction factor. This test verifies the accurate computation +// of the application-specific score under these conditions. +func TestPeerSpamPenaltyClusterPrefixed(t *testing.T) { + ctlMsgTypes := p2pmsg.ControlMessageTypes() + peerIds := unittest.PeerIdFixtures(t, len(ctlMsgTypes)) + opts := make([]scoringRegistryParamsOpt, 0) + for _, peerID := range peerIds { + opts = append(opts, withStakedIdentity(peerID), withValidSubscriptions(peerID)) + } + reg, spamRecords := newGossipSubAppSpecificScoreRegistry(t, opts...) + + for _, peerID := range peerIds { + // initially, the spamRecords should not have the peer id. + assert.False(t, spamRecords.Has(peerID)) + // since the peer id does not have a spam record, the app specific score should be the max app specific reward, which + // is the default reward for a staked peer that has valid subscriptions. + score := reg.AppSpecificScoreFunc()(peerID) + assert.Equal(t, scoring.MaxAppSpecificReward, score) + } + + // Report consecutive misbehavior's for the specified peer ID. Two misbehavior's are reported concurrently: + // 1. With IsClusterPrefixed set to false, ensuring the penalty applied to the application-specific score is not reduced. + // 2. With IsClusterPrefixed set to true, reducing the penalty added to the overall app-specific score by the default reduction factor. + for i, ctlMsgType := range ctlMsgTypes { + peerID := peerIds[i] + var wg sync.WaitGroup + wg.Add(2) + go func() { + defer wg.Done() + reg.OnInvalidControlMessageNotification(&p2p.InvCtrlMsgNotif{ + PeerID: peerID, + MsgType: ctlMsgType, + TopicType: p2p.CtrlMsgNonClusterTopicType, + }) + }() + go func() { + defer wg.Done() + reg.OnInvalidControlMessageNotification(&p2p.InvCtrlMsgNotif{ + PeerID: peerID, + MsgType: ctlMsgType, + TopicType: p2p.CtrlMsgTopicTypeClusterPrefixed, + }) + }() + unittest.RequireReturnsBefore(t, wg.Wait, 100*time.Millisecond, "timed out waiting for goroutines to finish") + + // expected penalty should be penaltyValueFixtures().Graft * (1 + clusterReductionFactor) + expectedPenalty := penaltyValueFixture(ctlMsgType) * (1 + penaltyValueFixtures().ClusterPrefixedPenaltyReductionFactor) + + // the penalty should now be updated in the spamRecords + record, err, ok := spamRecords.Get(peerID) // get the record from the spamRecords. + assert.True(t, ok) + assert.NoError(t, err) + assert.Less(t, math.Abs(expectedPenalty-record.Penalty), 10e-3) + assert.Equal(t, scoring.InitAppScoreRecordState().Decay, record.Decay) + // this peer has a spam record, with no subscription penalty. Hence, the app specific score should only be the spam penalty, + // and the peer should be deprived of the default reward for its valid staked role. + score := reg.AppSpecificScoreFunc()(peerID) + tolerance := 10e-3 // 0.1% + if expectedPenalty == 0 { + assert.Less(t, math.Abs(expectedPenalty), tolerance) + } else { + assert.Less(t, math.Abs(expectedPenalty-score)/expectedPenalty, tolerance) + } + } +} + // withStakedIdentity returns a function that sets the identity provider to return an staked identity for the given peer id. // It is used for testing purposes, and causes the given peer id to benefit from the staked identity reward in GossipSub. func withStakedIdentity(peerId peer.ID) func(cfg *scoring.GossipSubAppSpecificScoreRegistryConfig) { @@ -454,16 +609,30 @@ func withInvalidSubscriptions(peer peer.ID) func(cfg *scoring.GossipSubAppSpecif } } -func withInitFunction(initFunction func() p2p.GossipSubSpamRecord) func(cfg *scoring.GossipSubAppSpecificScoreRegistryConfig) { +func withInitFunction(initFunction scoring.SpamRecordInitFunc) func(cfg *scoring.GossipSubAppSpecificScoreRegistryConfig) { return func(cfg *scoring.GossipSubAppSpecificScoreRegistryConfig) { cfg.Init = initFunction } } +type scoringRegistryParamsOpt func(*scoring.GossipSubAppSpecificScoreRegistryConfig) + // newGossipSubAppSpecificScoreRegistry returns a new instance of GossipSubAppSpecificScoreRegistry with default values // for the testing purposes. -func newGossipSubAppSpecificScoreRegistry(t *testing.T, opts ...func(*scoring.GossipSubAppSpecificScoreRegistryConfig)) (*scoring.GossipSubAppSpecificScoreRegistry, *netcache.GossipSubSpamRecordCache) { - cache := netcache.NewGossipSubSpamRecordCache(100, unittest.Logger(), metrics.NewNoopCollector(), scoring.DefaultDecayFunction()) +func newGossipSubAppSpecificScoreRegistry(t *testing.T, opts ...scoringRegistryParamsOpt) (*scoring.GossipSubAppSpecificScoreRegistry, *netcache.GossipSubSpamRecordCache) { + flowConfig, err := config.DefaultConfig() + require.NoError(t, err) + scoringRegistryConfig := flowConfig.NetworkConfig.GossipSubConfig.GossipSubScoringRegistryConfig + return newScoringRegistry(t, scoringRegistryConfig, opts...) +} + +func newScoringRegistry(t *testing.T, config p2pconf.GossipSubScoringRegistryConfig, opts ...scoringRegistryParamsOpt) (*scoring.GossipSubAppSpecificScoreRegistry, *netcache.GossipSubSpamRecordCache) { + cache := netcache.NewGossipSubSpamRecordCache( + 100, + unittest.Logger(), + metrics.NewNoopCollector(), + scoring.DefaultDecayFunction(config.PenaltyDecaySlowdownThreshold, config.DecayRateReductionFactor, config.PenaltyDecayEvaluationPeriod), + ) cfg := &scoring.GossipSubAppSpecificScoreRegistryConfig{ Logger: unittest.Logger(), Init: scoring.InitAppScoreRecordState, @@ -485,10 +654,30 @@ func newGossipSubAppSpecificScoreRegistry(t *testing.T, opts ...func(*scoring.Go // that the tests are not passing because of the default values. func penaltyValueFixtures() scoring.GossipSubCtrlMsgPenaltyValue { return scoring.GossipSubCtrlMsgPenaltyValue{ - Graft: -100, - Prune: -50, - IHave: -20, - IWant: -10, - RpcPublishMessage: -10, + Graft: -100, + Prune: -50, + IHave: -20, + IWant: -10, + ClusterPrefixedPenaltyReductionFactor: .5, + RpcPublishMessage: -10, + } +} + +// penaltyValueFixture returns the set penalty of the provided control message type returned from the fixture func penaltyValueFixtures. +func penaltyValueFixture(msgType p2pmsg.ControlMessageType) float64 { + penaltyValues := penaltyValueFixtures() + switch msgType { + case p2pmsg.CtrlMsgGraft: + return penaltyValues.Graft + case p2pmsg.CtrlMsgPrune: + return penaltyValues.Prune + case p2pmsg.CtrlMsgIHave: + return penaltyValues.IHave + case p2pmsg.CtrlMsgIWant: + return penaltyValues.IWant + case p2pmsg.RpcPublishMessage: + return penaltyValues.RpcPublishMessage + default: + return penaltyValues.ClusterPrefixedPenaltyReductionFactor } } diff --git a/network/p2p/scoring/score_option.go b/network/p2p/scoring/score_option.go index 0ae676005cb..f3955700479 100644 --- a/network/p2p/scoring/score_option.go +++ b/network/p2p/scoring/score_option.go @@ -9,10 +9,13 @@ import ( "github.com/rs/zerolog" "github.com/onflow/flow-go/module" + "github.com/onflow/flow-go/module/component" + "github.com/onflow/flow-go/module/irrecoverable" "github.com/onflow/flow-go/module/metrics" "github.com/onflow/flow-go/network/channels" "github.com/onflow/flow-go/network/p2p" netcache "github.com/onflow/flow-go/network/p2p/cache" + "github.com/onflow/flow-go/network/p2p/p2pconf" "github.com/onflow/flow-go/network/p2p/utils" "github.com/onflow/flow-go/utils/logging" ) @@ -292,7 +295,9 @@ const ( ) // ScoreOption is a functional option for configuring the peer scoring system. +// TODO: rename it to ScoreManager. type ScoreOption struct { + component.Component logger zerolog.Logger peerScoreParams *pubsub.PeerScoreParams @@ -378,24 +383,32 @@ func (c *ScoreOptionConfig) OverrideDecayInterval(interval time.Duration) { } // NewScoreOption creates a new penalty option with the given configuration. -func NewScoreOption(cfg *ScoreOptionConfig) *ScoreOption { +func NewScoreOption(scoreRegistryConfig p2pconf.GossipSubScoringRegistryConfig, scoreOptionConfig *ScoreOptionConfig, provider p2p.SubscriptionProvider) *ScoreOption { throttledSampler := logging.BurstSampler(MaxDebugLogs, time.Second) - logger := cfg.logger.With(). + logger := scoreOptionConfig.logger.With(). Str("module", "pubsub_score_option"). Logger(). Sample(zerolog.LevelSampler{ TraceSampler: throttledSampler, DebugSampler: throttledSampler, }) - validator := NewSubscriptionValidator() + validator := NewSubscriptionValidator(scoreOptionConfig.logger, provider) scoreRegistry := NewGossipSubAppSpecificScoreRegistry(&GossipSubAppSpecificScoreRegistryConfig{ Logger: logger, Penalty: DefaultGossipSubCtrlMsgPenaltyValue(), Validator: validator, Init: InitAppScoreRecordState, - IdProvider: cfg.provider, + IdProvider: scoreOptionConfig.provider, CacheFactory: func() p2p.GossipSubSpamRecordCache { - return netcache.NewGossipSubSpamRecordCache(cfg.cacheSize, cfg.logger, cfg.cacheMetrics, DefaultDecayFunction()) + return netcache.NewGossipSubSpamRecordCache( + scoreOptionConfig.cacheSize, + scoreOptionConfig.logger, + scoreOptionConfig.cacheMetrics, + DefaultDecayFunction( + scoreRegistryConfig.PenaltyDecaySlowdownThreshold, + scoreRegistryConfig.DecayRateReductionFactor, + scoreRegistryConfig.PenaltyDecayEvaluationPeriod, + )) }, }) s := &ScoreOption{ @@ -407,40 +420,55 @@ func NewScoreOption(cfg *ScoreOptionConfig) *ScoreOption { // set the app specific penalty function for the penalty option // if the app specific penalty function is not set, use the default one - if cfg.appScoreFunc != nil { - s.appScoreFunc = cfg.appScoreFunc + if scoreOptionConfig.appScoreFunc != nil { + s.appScoreFunc = scoreOptionConfig.appScoreFunc s.logger. Warn(). Str(logging.KeyNetworkingSecurity, "true"). Msg("app specific score function is overridden, should never happen in production") } - if cfg.decayInterval > 0 { + if scoreOptionConfig.decayInterval > 0 { // overrides the default decay interval if the decay interval is set. - s.peerScoreParams.DecayInterval = cfg.decayInterval + s.peerScoreParams.DecayInterval = scoreOptionConfig.decayInterval s.logger. Warn(). Str(logging.KeyNetworkingSecurity, "true"). - Dur("decay_interval_ms", cfg.decayInterval). + Dur("decay_interval_ms", scoreOptionConfig.decayInterval). Msg("decay interval is overridden, should never happen in production") } // registers the score registry as the consumer of the invalid control message notifications - if cfg.registerNotificationConsumerFunc != nil { - cfg.registerNotificationConsumerFunc(scoreRegistry) + if scoreOptionConfig.registerNotificationConsumerFunc != nil { + scoreOptionConfig.registerNotificationConsumerFunc(scoreRegistry) } s.peerScoreParams.AppSpecificScore = s.appScoreFunc // apply the topic penalty parameters if any. - for _, topicParams := range cfg.topicParams { + for _, topicParams := range scoreOptionConfig.topicParams { topicParams(s.peerScoreParams.Topics) } - return s -} -func (s *ScoreOption) SetSubscriptionProvider(provider *SubscriptionProvider) error { - return s.validator.RegisterSubscriptionProvider(provider) + s.Component = component.NewComponentManagerBuilder().AddWorker(func(ctx irrecoverable.SignalerContext, ready component.ReadyFunc) { + s.logger.Info().Msg("starting score registry") + scoreRegistry.Start(ctx) + select { + case <-ctx.Done(): + s.logger.Warn().Msg("stopping score registry; context done") + case <-scoreRegistry.Ready(): + s.logger.Info().Msg("score registry started") + ready() + s.logger.Info().Msg("score registry ready") + } + + <-ctx.Done() + s.logger.Info().Msg("stopping score registry") + <-scoreRegistry.Done() + s.logger.Info().Msg("score registry stopped") + }).Build() + + return s } func (s *ScoreOption) BuildFlowPubSubScoreOption() (*pubsub.PeerScoreParams, *pubsub.PeerScoreThresholds) { diff --git a/network/p2p/scoring/scoring_test.go b/network/p2p/scoring/scoring_test.go index c30736ae994..8e55e231195 100644 --- a/network/p2p/scoring/scoring_test.go +++ b/network/p2p/scoring/scoring_test.go @@ -46,10 +46,6 @@ func (m *mockInspectorSuite) AddInvalidControlMessageConsumer(consumer p2p.Gossi func (m *mockInspectorSuite) ActiveClustersChanged(_ flow.ChainIDList) { // no-op } -func (m *mockInspectorSuite) SetTopicOracle(_ func() []string) error { - // no-op - return nil -} // newMockInspectorSuite creates a new mockInspectorSuite. // Args: @@ -97,7 +93,8 @@ func TestInvalidCtrlMsgScoringIntegration(t *testing.T) { module.GossipSubMetrics, metrics.HeroCacheMetricsFactory, flownet.NetworkingType, - module.IdentityProvider) (p2p.GossipSubInspectorSuite, error) { + module.IdentityProvider, + func() p2p.TopicProvider) (p2p.GossipSubInspectorSuite, error) { // override the gossipsub rpc inspector suite factory to return the mock inspector suite return inspectorSuite1, nil } @@ -140,8 +137,9 @@ func TestInvalidCtrlMsgScoringIntegration(t *testing.T) { return unittest.ProposalFixture() }) - // now simulates node2 spamming node1 with invalid gossipsub control messages. - for i := 0; i < 30; i++ { + // simulates node2 spamming node1 with invalid gossipsub control messages until node2 gets dissallow listed. + // since the decay will start lower than .99 and will only be incremented by default .01, we need to spam a lot of messages so that the node gets disallow listed + for i := 0; i < 750; i++ { inspectorSuite1.consumer.OnInvalidControlMessageNotification(&p2p.InvCtrlMsgNotif{ PeerID: node2.ID(), MsgType: p2pmsg.ControlMessageTypes()[rand.Intn(len(p2pmsg.ControlMessageTypes()))], diff --git a/network/p2p/scoring/subscriptionCache.go b/network/p2p/scoring/subscriptionCache.go new file mode 100644 index 00000000000..8eae60bd385 --- /dev/null +++ b/network/p2p/scoring/subscriptionCache.go @@ -0,0 +1,35 @@ +package scoring + +import "github.com/libp2p/go-libp2p/core/peer" + +// SubscriptionCache implements an in-memory cache that keeps track of the topics a peer is subscribed to. +// The cache is modeled abstracted to be used in update cycles, i.e., every regular interval of time, the cache is updated for +// all peers. +type SubscriptionCache interface { + // GetSubscribedTopics returns the list of topics a peer is subscribed to. + // Returns: + // - []string: the list of topics the peer is subscribed to. + // - bool: true if there is a record for the peer, false otherwise. + GetSubscribedTopics(pid peer.ID) ([]string, bool) + + // MoveToNextUpdateCycle moves the subscription cache to the next update cycle. + // A new update cycle is started when the subscription cache is first created, and then every time the subscription cache + // is updated. The update cycle is used to keep track of the last time the subscription cache was updated. It is used to + // implement a notion of time in the subscription cache. + // Returns: + // - uint64: the current update cycle. + MoveToNextUpdateCycle() uint64 + + // AddTopicForPeer appends a topic to the list of topics a peer is subscribed to. If the peer is not subscribed to any + // topics yet, a new record is created. + // If the last update cycle is older than the current cycle, the list of topics for the peer is first cleared, and then + // the topic is added to the list. This is to ensure that the list of topics for a peer is always up to date. + // Args: + // - pid: the peer id of the peer. + // - topic: the topic to add. + // Returns: + // - []string: the list of topics the peer is subscribed to after the update. + // - error: an error if the update failed; any returned error is an irrecoverable error and indicates a bug or misconfiguration. + // Implementation must be thread-safe. + AddTopicForPeer(pid peer.ID, topic string) ([]string, error) +} diff --git a/network/p2p/scoring/subscription_provider.go b/network/p2p/scoring/subscription_provider.go index 23aea760de1..4f6918a81a0 100644 --- a/network/p2p/scoring/subscription_provider.go +++ b/network/p2p/scoring/subscription_provider.go @@ -1,123 +1,160 @@ package scoring import ( - "sync" + "fmt" + "time" + "github.com/go-playground/validator/v10" "github.com/libp2p/go-libp2p/core/peer" "github.com/rs/zerolog" "go.uber.org/atomic" + "github.com/onflow/flow-go/module" + "github.com/onflow/flow-go/module/component" + "github.com/onflow/flow-go/module/irrecoverable" + "github.com/onflow/flow-go/module/metrics" "github.com/onflow/flow-go/network/p2p" + "github.com/onflow/flow-go/network/p2p/p2pconf" + "github.com/onflow/flow-go/network/p2p/p2plogging" + "github.com/onflow/flow-go/network/p2p/scoring/internal" + "github.com/onflow/flow-go/utils/logging" ) // SubscriptionProvider provides a list of topics a peer is subscribed to. type SubscriptionProvider struct { - logger zerolog.Logger - tp p2p.TopicProvider + component.Component + logger zerolog.Logger + topicProviderOracle func() p2p.TopicProvider - // allTopics is a list of all topics in the pubsub network // TODO: we should add an expiry time to this cache and clean up the cache periodically // to avoid leakage of stale topics. - peersByTopic sync.Map // map[topic]peers - peersByTopicUpdating sync.Map // whether a goroutine is already updating the list of peers for a topic + cache SubscriptionCache + + // idProvider translates the peer ids to flow ids. + idProvider module.IdentityProvider // allTopics is a list of all topics in the pubsub network that this node is subscribed to. - allTopicsLock sync.RWMutex // protects allTopics - allTopics []string // list of all topics in the pubsub network that this node has subscribed to. - allTopicsUpdate atomic.Bool // whether a goroutine is already updating the list of topics. + allTopicsUpdate atomic.Bool // whether a goroutine is already updating the list of topics + allTopicsUpdateInterval time.Duration // the interval for updating the list of topics in the pubsub network that this node has subscribed to. } -func NewSubscriptionProvider(logger zerolog.Logger, tp p2p.TopicProvider) *SubscriptionProvider { - return &SubscriptionProvider{ - logger: logger.With().Str("module", "subscription_provider").Logger(), - tp: tp, - allTopics: make([]string, 0), - } +type SubscriptionProviderConfig struct { + Logger zerolog.Logger `validate:"required"` + TopicProviderOracle func() p2p.TopicProvider `validate:"required"` + IdProvider module.IdentityProvider `validate:"required"` + HeroCacheMetricsFactory metrics.HeroCacheMetricsFactory `validate:"required"` + Params *p2pconf.SubscriptionProviderParameters `validate:"required"` } -// GetSubscribedTopics returns all the subscriptions of a peer within the pubsub network. -// Note that the current node can only see peer subscriptions to topics that it has also subscribed to -// e.g., if current node has subscribed to topics A and B, and peer1 has subscribed to topics A, B, and C, -// then GetSubscribedTopics(peer1) will return A and B. Since this node has not subscribed to topic C, -// it will not be able to query for other peers subscribed to topic C. -func (s *SubscriptionProvider) GetSubscribedTopics(pid peer.ID) []string { - topics := s.getAllTopics() +var _ p2p.SubscriptionProvider = (*SubscriptionProvider)(nil) - // finds the topics that this peer is subscribed to. - subscriptions := make([]string, 0) - for _, topic := range topics { - peers := s.getPeersByTopic(topic) - for _, p := range peers { - if p == pid { - subscriptions = append(subscriptions, topic) - } - } +func NewSubscriptionProvider(cfg *SubscriptionProviderConfig) (*SubscriptionProvider, error) { + if err := validator.New().Struct(cfg); err != nil { + return nil, fmt.Errorf("invalid subscription provider config: %w", err) } - return subscriptions -} + cacheMetrics := metrics.NewSubscriptionRecordCacheMetricsFactory(cfg.HeroCacheMetricsFactory) + cache := internal.NewSubscriptionRecordCache(cfg.Params.CacheSize, cfg.Logger, cacheMetrics) -// getAllTopics returns all the topics in the pubsub network that this node (peer) has subscribed to. -// Note that this method always returns the cached version of the subscribed topics while querying the -// pubsub network for the list of topics in a goroutine. Hence, the first call to this method always returns an empty -// list. -func (s *SubscriptionProvider) getAllTopics() []string { - go func() { - // TODO: refactor this to a component manager worker once we have a startable libp2p node. - if updateInProgress := s.allTopicsUpdate.CompareAndSwap(false, true); updateInProgress { - // another goroutine is already updating the list of topics - return - } + p := &SubscriptionProvider{ + logger: cfg.Logger.With().Str("module", "subscription_provider").Logger(), + topicProviderOracle: cfg.TopicProviderOracle, + allTopicsUpdateInterval: cfg.Params.SubscriptionUpdateInterval, + idProvider: cfg.IdProvider, + cache: cache, + } + + builder := component.NewComponentManagerBuilder() + p.Component = builder.AddWorker( + func(ctx irrecoverable.SignalerContext, ready component.ReadyFunc) { + ready() + p.logger.Debug(). + Float64("update_interval_seconds", cfg.Params.SubscriptionUpdateInterval.Seconds()). + Msg("subscription provider started; starting update topics loop") + p.updateTopicsLoop(ctx) - allTopics := s.tp.GetTopics() - s.atomicUpdateAllTopics(allTopics) + <-ctx.Done() + p.logger.Debug().Msg("subscription provider stopped; stopping update topics loop") + }).Build() - // remove the update flag - s.allTopicsUpdate.Store(false) + return p, nil +} - s.logger.Trace().Msgf("all topics updated: %v", allTopics) - }() +func (s *SubscriptionProvider) updateTopicsLoop(ctx irrecoverable.SignalerContext) { + ticker := time.NewTicker(s.allTopicsUpdateInterval) + defer ticker.Stop() - s.allTopicsLock.RLock() - defer s.allTopicsLock.RUnlock() - return s.allTopics + for { + select { + case <-ctx.Done(): + return + case <-ticker.C: + if err := s.updateTopics(); err != nil { + ctx.Throw(fmt.Errorf("update loop failed: %w", err)) + return + } + } + } } -// getPeersByTopic returns all the peers subscribed to a topic. -// Note that this method always returns the cached version of the subscribed peers while querying the +// updateTopics returns all the topics in the pubsub network that this node (peer) has subscribed to. +// Note that this method always returns the cached version of the subscribed topics while querying the // pubsub network for the list of topics in a goroutine. Hence, the first call to this method always returns an empty // list. -// As this method is injected into GossipSub, it is vital that it never block the caller, otherwise it causes a -// deadlock on the GossipSub. -// Also note that, this peer itself should be subscribed to the topic, otherwise, it cannot find the list of peers -// subscribed to the topic in the pubsub network due to an inherent limitation of GossipSub. -func (s *SubscriptionProvider) getPeersByTopic(topic string) []peer.ID { - go func() { - // TODO: refactor this to a component manager worker once we have a startable libp2p node. - if _, updateInProgress := s.peersByTopicUpdating.LoadOrStore(topic, true); updateInProgress { - // another goroutine is already updating the list of peers for this topic - return - } +// Args: +// - ctx: the context of the caller. +// Returns: +// - error on failure to update the list of topics. The returned error is irrecoverable and indicates an exception. +func (s *SubscriptionProvider) updateTopics() error { + if updateInProgress := s.allTopicsUpdate.CompareAndSwap(false, true); updateInProgress { + // another goroutine is already updating the list of topics + s.logger.Trace().Msg("skipping topic update; another update is already in progress") + return nil + } - subscribedPeers := s.tp.ListPeers(topic) - s.peersByTopic.Store(topic, subscribedPeers) + // start of critical section; protected by updateInProgress atomic flag + allTopics := s.topicProviderOracle().GetTopics() + s.logger.Trace().Msgf("all topics updated: %v", allTopics) - // remove the update flag - s.peersByTopicUpdating.Delete(topic) + // increments the update cycle of the cache; so that the previous cache entries are invalidated upon a read or write. + s.cache.MoveToNextUpdateCycle() + for _, topic := range allTopics { + peers := s.topicProviderOracle().ListPeers(topic) - s.logger.Trace().Str("topic", topic).Msgf("peers by topic updated: %v", subscribedPeers) - }() + for _, p := range peers { + if _, authorized := s.idProvider.ByPeerID(p); !authorized { + // peer is not authorized (staked); hence it does not have a valid role in the network; and + // we skip the topic update for this peer (also avoiding sybil attacks on the cache). + s.logger.Debug(). + Str("remote_peer_id", p2plogging.PeerId(p)). + Bool(logging.KeyNetworkingSecurity, true). + Msg("skipping topic update for unauthorized peer") + continue + } - peerId, ok := s.peersByTopic.Load(topic) - if !ok { - return make([]peer.ID, 0) + updatedTopics, err := s.cache.AddTopicForPeer(p, topic) + if err != nil { + // this is an irrecoverable error; hence, we crash the node. + return fmt.Errorf("failed to update topics for peer %s: %w", p, err) + } + s.logger.Debug(). + Str("remote_peer_id", p2plogging.PeerId(p)). + Strs("updated_topics", updatedTopics). + Msg("updated topics for peer") + } } - return peerId.([]peer.ID) + + // remove the update flag; end of critical section + s.allTopicsUpdate.Store(false) + return nil } -// atomicUpdateAllTopics updates the list of all topics in the pubsub network that this node has subscribed to. -func (s *SubscriptionProvider) atomicUpdateAllTopics(allTopics []string) { - s.allTopicsLock.Lock() - s.allTopics = allTopics - s.allTopicsLock.Unlock() +// GetSubscribedTopics returns all the subscriptions of a peer within the pubsub network. +func (s *SubscriptionProvider) GetSubscribedTopics(pid peer.ID) []string { + topics, ok := s.cache.GetSubscribedTopics(pid) + if !ok { + s.logger.Trace().Str("peer_id", p2plogging.PeerId(pid)).Msg("no topics found for peer") + return nil + } + return topics } diff --git a/network/p2p/scoring/subscription_provider_test.go b/network/p2p/scoring/subscription_provider_test.go index 25d4be455c8..cb3b45ecbd1 100644 --- a/network/p2p/scoring/subscription_provider_test.go +++ b/network/p2p/scoring/subscription_provider_test.go @@ -1,13 +1,21 @@ package scoring_test import ( + "context" "testing" "time" "github.com/libp2p/go-libp2p/core/peer" "github.com/stretchr/testify/assert" + mockery "github.com/stretchr/testify/mock" + "github.com/stretchr/testify/require" - "github.com/onflow/flow-go/network/internal/p2pfixtures" + "github.com/onflow/flow-go/config" + "github.com/onflow/flow-go/model/flow" + "github.com/onflow/flow-go/module/irrecoverable" + "github.com/onflow/flow-go/module/metrics" + "github.com/onflow/flow-go/module/mock" + "github.com/onflow/flow-go/network/p2p" mockp2p "github.com/onflow/flow-go/network/p2p/mock" "github.com/onflow/flow-go/network/p2p/scoring" "github.com/onflow/flow-go/utils/slices" @@ -18,20 +26,47 @@ import ( // list of topics a peer is subscribed to. func TestSubscriptionProvider_GetSubscribedTopics(t *testing.T) { tp := mockp2p.NewTopicProvider(t) - sp := scoring.NewSubscriptionProvider(unittest.Logger(), tp) + cfg, err := config.DefaultConfig() + require.NoError(t, err) + idProvider := mock.NewIdentityProvider(t) + + // set a low update interval to speed up the test + cfg.NetworkConfig.SubscriptionProviderConfig.SubscriptionUpdateInterval = 100 * time.Millisecond + + sp, err := scoring.NewSubscriptionProvider(&scoring.SubscriptionProviderConfig{ + Logger: unittest.Logger(), + TopicProviderOracle: func() p2p.TopicProvider { + return tp + }, + Params: &cfg.NetworkConfig.SubscriptionProviderConfig, + HeroCacheMetricsFactory: metrics.NewNoopHeroCacheMetricsFactory(), + IdProvider: idProvider, + }) + require.NoError(t, err) tp.On("GetTopics").Return([]string{"topic1", "topic2", "topic3"}).Maybe() - peer1 := p2pfixtures.PeerIdFixture(t) - peer2 := p2pfixtures.PeerIdFixture(t) - peer3 := p2pfixtures.PeerIdFixture(t) + peer1 := unittest.PeerIdFixture(t) + peer2 := unittest.PeerIdFixture(t) + peer3 := unittest.PeerIdFixture(t) + + idProvider.On("ByPeerID", mockery.Anything).Return(unittest.IdentityFixture(), true).Maybe() // mock peers 1 and 2 subscribed to topic 1 (along with other random peers) - tp.On("ListPeers", "topic1").Return(append([]peer.ID{peer1, peer2}, p2pfixtures.PeerIdsFixture(t, 10)...)) + tp.On("ListPeers", "topic1").Return(append([]peer.ID{peer1, peer2}, unittest.PeerIdFixtures(t, 10)...)) // mock peers 2 and 3 subscribed to topic 2 (along with other random peers) - tp.On("ListPeers", "topic2").Return(append([]peer.ID{peer2, peer3}, p2pfixtures.PeerIdsFixture(t, 10)...)) + tp.On("ListPeers", "topic2").Return(append([]peer.ID{peer2, peer3}, unittest.PeerIdFixtures(t, 10)...)) // mock peers 1 and 3 subscribed to topic 3 (along with other random peers) - tp.On("ListPeers", "topic3").Return(append([]peer.ID{peer1, peer3}, p2pfixtures.PeerIdsFixture(t, 10)...)) + tp.On("ListPeers", "topic3").Return(append([]peer.ID{peer1, peer3}, unittest.PeerIdFixtures(t, 10)...)) + + ctx, cancel := context.WithCancel(context.Background()) + defer func() { + cancel() + unittest.RequireCloseBefore(t, sp.Done(), 1*time.Second, "subscription provider did not stop in time") + }() + signalerCtx := irrecoverable.NewMockSignalerContext(t, ctx) + sp.Start(signalerCtx) + unittest.RequireCloseBefore(t, sp.Ready(), 1*time.Second, "subscription provider did not start in time") // As the calls to the TopicProvider are asynchronous, we need to wait for the goroutines to finish. assert.Eventually(t, func() bool { @@ -46,3 +81,76 @@ func TestSubscriptionProvider_GetSubscribedTopics(t *testing.T) { return slices.AreStringSlicesEqual([]string{"topic2", "topic3"}, sp.GetSubscribedTopics(peer3)) }, 1*time.Second, 100*time.Millisecond) } + +// TestSubscriptionProvider_GetSubscribedTopics_SkippingUnknownPeers tests that the SubscriptionProvider skips +// unknown peers when returning the list of topics a peer is subscribed to. In other words, if a peer is unknown, +// the SubscriptionProvider should not keep track of its subscriptions. +func TestSubscriptionProvider_GetSubscribedTopics_SkippingUnknownPeers(t *testing.T) { + tp := mockp2p.NewTopicProvider(t) + cfg, err := config.DefaultConfig() + require.NoError(t, err) + idProvider := mock.NewIdentityProvider(t) + + // set a low update interval to speed up the test + cfg.NetworkConfig.SubscriptionProviderConfig.SubscriptionUpdateInterval = 100 * time.Millisecond + + sp, err := scoring.NewSubscriptionProvider(&scoring.SubscriptionProviderConfig{ + Logger: unittest.Logger(), + TopicProviderOracle: func() p2p.TopicProvider { + return tp + }, + Params: &cfg.NetworkConfig.SubscriptionProviderConfig, + HeroCacheMetricsFactory: metrics.NewNoopHeroCacheMetricsFactory(), + IdProvider: idProvider, + }) + require.NoError(t, err) + + tp.On("GetTopics").Return([]string{"topic1", "topic2", "topic3"}).Maybe() + + peer1 := unittest.PeerIdFixture(t) + peer2 := unittest.PeerIdFixture(t) + peer3 := unittest.PeerIdFixture(t) + + // mock peers 1 and 2 as a known peer; peer 3 as an unknown peer + idProvider.On("ByPeerID", mockery.Anything). + Return(func(pid peer.ID) *flow.Identity { + if pid == peer1 || pid == peer2 { + return unittest.IdentityFixture() + } + return nil + }, func(pid peer.ID) bool { + if pid == peer1 || pid == peer2 { + return true + } + return false + }).Maybe() + + // mock peers 1 and 2 subscribed to topic 1 (along with other random peers) + tp.On("ListPeers", "topic1").Return(append([]peer.ID{peer1, peer2}, unittest.PeerIdFixtures(t, 10)...)) + // mock peers 2 and 3 subscribed to topic 2 (along with other random peers) + tp.On("ListPeers", "topic2").Return(append([]peer.ID{peer2, peer3}, unittest.PeerIdFixtures(t, 10)...)) + // mock peers 1 and 3 subscribed to topic 3 (along with other random peers) + tp.On("ListPeers", "topic3").Return(append([]peer.ID{peer1, peer3}, unittest.PeerIdFixtures(t, 10)...)) + + ctx, cancel := context.WithCancel(context.Background()) + defer func() { + cancel() + unittest.RequireCloseBefore(t, sp.Done(), 1*time.Second, "subscription provider did not stop in time") + }() + signalerCtx := irrecoverable.NewMockSignalerContext(t, ctx) + sp.Start(signalerCtx) + unittest.RequireCloseBefore(t, sp.Ready(), 1*time.Second, "subscription provider did not start in time") + + // As the calls to the TopicProvider are asynchronous, we need to wait for the goroutines to finish. + // peer 1 should be eventually subscribed to topic 1 and topic 3; while peer 3 should not have any subscriptions record since it is unknown + assert.Eventually(t, func() bool { + return slices.AreStringSlicesEqual([]string{"topic1", "topic3"}, sp.GetSubscribedTopics(peer1)) && + slices.AreStringSlicesEqual([]string{}, sp.GetSubscribedTopics(peer3)) + }, 1*time.Second, 100*time.Millisecond) + + // peer 2 should be eventually subscribed to topic 1 and topic 2; while peer 3 should not have any subscriptions record since it is unknown + assert.Eventually(t, func() bool { + return slices.AreStringSlicesEqual([]string{"topic1", "topic2"}, sp.GetSubscribedTopics(peer2)) && + slices.AreStringSlicesEqual([]string{}, sp.GetSubscribedTopics(peer3)) + }, 1*time.Second, 100*time.Millisecond) +} diff --git a/network/p2p/scoring/subscription_validator.go b/network/p2p/scoring/subscription_validator.go index fbffe27752a..8c3fc048168 100644 --- a/network/p2p/scoring/subscription_validator.go +++ b/network/p2p/scoring/subscription_validator.go @@ -1,47 +1,55 @@ package scoring import ( - "fmt" - "github.com/libp2p/go-libp2p/core/peer" + "github.com/rs/zerolog" "github.com/onflow/flow-go/model/flow" + "github.com/onflow/flow-go/module/component" + "github.com/onflow/flow-go/module/irrecoverable" "github.com/onflow/flow-go/network/p2p" + "github.com/onflow/flow-go/network/p2p/p2plogging" p2putils "github.com/onflow/flow-go/network/p2p/utils" ) // SubscriptionValidator validates that a peer is subscribed to topics that it is allowed to subscribe to. // It is used to penalize peers that subscribe to topics that they are not allowed to subscribe to in GossipSub. type SubscriptionValidator struct { + component.Component + logger zerolog.Logger subscriptionProvider p2p.SubscriptionProvider } -func NewSubscriptionValidator() *SubscriptionValidator { - return &SubscriptionValidator{} -} +func NewSubscriptionValidator(logger zerolog.Logger, provider p2p.SubscriptionProvider) *SubscriptionValidator { + v := &SubscriptionValidator{ + logger: logger.With().Str("component", "subscription_validator").Logger(), + subscriptionProvider: provider, + } -var _ p2p.SubscriptionValidator = (*SubscriptionValidator)(nil) + v.Component = component.NewComponentManagerBuilder(). + AddWorker(func(ctx irrecoverable.SignalerContext, ready component.ReadyFunc) { + logger.Debug().Msg("starting subscription validator") + v.subscriptionProvider.Start(ctx) + select { + case <-ctx.Done(): + logger.Debug().Msg("subscription validator is stopping") + case <-v.subscriptionProvider.Ready(): + logger.Debug().Msg("subscription validator started") + ready() + logger.Debug().Msg("subscription validator is ready") + } -// RegisterSubscriptionProvider registers the subscription provider with the subscription validator. -// This follows a dependency injection pattern. -// Args: -// -// provider: the subscription provider -// -// Returns: -// -// error: if the subscription provider is nil, an error is returned. The error is irrecoverable, i.e., -// it indicates an illegal state in the execution of the code. We expect this error only when there is a bug in the code. -// Such errors should lead to a crash of the node. -func (v *SubscriptionValidator) RegisterSubscriptionProvider(provider p2p.SubscriptionProvider) error { - if v.subscriptionProvider != nil { - return fmt.Errorf("subscription provider already registered") - } - v.subscriptionProvider = provider + <-ctx.Done() + logger.Debug().Msg("subscription validator is stopping") + <-v.subscriptionProvider.Done() + logger.Debug().Msg("subscription validator stopped") + }).Build() - return nil + return v } +var _ p2p.SubscriptionValidator = (*SubscriptionValidator)(nil) + // CheckSubscribedToAllowedTopics checks if a peer is subscribed to topics that it is allowed to subscribe to. // Args: // @@ -53,7 +61,10 @@ func (v *SubscriptionValidator) RegisterSubscriptionProvider(provider p2p.Subscr // The error is benign, i.e., it does not indicate an illegal state in the execution of the code. We expect this error // when there are malicious peers in the network. But such errors should not lead to a crash of the node. func (v *SubscriptionValidator) CheckSubscribedToAllowedTopics(pid peer.ID, role flow.Role) error { + lg := v.logger.With().Str("remote_peer_id", p2plogging.PeerId(pid)).Logger() + topics := v.subscriptionProvider.GetSubscribedTopics(pid) + lg.Trace().Strs("topics", topics).Msg("checking subscription for remote peer id") for _, topic := range topics { if !p2putils.AllowedSubscription(role, topic) { @@ -61,5 +72,6 @@ func (v *SubscriptionValidator) CheckSubscribedToAllowedTopics(pid peer.ID, role } } + lg.Trace().Msg("subscription is valid") return nil } diff --git a/network/p2p/scoring/subscription_validator_test.go b/network/p2p/scoring/subscription_validator_test.go index 338d26d67c5..770f74cf146 100644 --- a/network/p2p/scoring/subscription_validator_test.go +++ b/network/p2p/scoring/subscription_validator_test.go @@ -7,6 +7,7 @@ import ( "testing" "time" + "github.com/onflow/flow-go/config" "github.com/onflow/flow-go/network/message" "github.com/onflow/flow-go/network/p2p" p2ptest "github.com/onflow/flow-go/network/p2p/test" @@ -32,12 +33,10 @@ import ( // any topic, the subscription validator returns no error. func TestSubscriptionValidator_NoSubscribedTopic(t *testing.T) { sp := mockp2p.NewSubscriptionProvider(t) - - sv := scoring.NewSubscriptionValidator() - require.NoError(t, sv.RegisterSubscriptionProvider(sp)) + sv := scoring.NewSubscriptionValidator(unittest.Logger(), sp) // mocks peer 1 not subscribed to any topic. - peer1 := p2pfixtures.PeerIdFixture(t) + peer1 := unittest.PeerIdFixture(t) sp.On("GetSubscribedTopics", peer1).Return([]string{}) // as peer 1 has not subscribed to any topic, the subscription validator should return no error regardless of the @@ -51,11 +50,10 @@ func TestSubscriptionValidator_NoSubscribedTopic(t *testing.T) { // topic, the subscription validator returns an error. func TestSubscriptionValidator_UnknownChannel(t *testing.T) { sp := mockp2p.NewSubscriptionProvider(t) - sv := scoring.NewSubscriptionValidator() - require.NoError(t, sv.RegisterSubscriptionProvider(sp)) + sv := scoring.NewSubscriptionValidator(unittest.Logger(), sp) // mocks peer 1 not subscribed to an unknown topic. - peer1 := p2pfixtures.PeerIdFixture(t) + peer1 := unittest.PeerIdFixture(t) sp.On("GetSubscribedTopics", peer1).Return([]string{"unknown-topic-1", "unknown-topic-2"}) // as peer 1 has subscribed to unknown topics, the subscription validator should return an error @@ -71,11 +69,10 @@ func TestSubscriptionValidator_UnknownChannel(t *testing.T) { // topics based on its Flow protocol role, the subscription validator returns no error. func TestSubscriptionValidator_ValidSubscriptions(t *testing.T) { sp := mockp2p.NewSubscriptionProvider(t) - sv := scoring.NewSubscriptionValidator() - require.NoError(t, sv.RegisterSubscriptionProvider(sp)) + sv := scoring.NewSubscriptionValidator(unittest.Logger(), sp) for _, role := range flow.Roles() { - peerId := p2pfixtures.PeerIdFixture(t) + peerId := unittest.PeerIdFixture(t) // allowed channels for the role excluding the test channels. allowedChannels := channels.ChannelsByRole(role).ExcludePattern(regexp.MustCompile("^(test).*")) sporkID := unittest.IdentifierFixture() @@ -102,8 +99,7 @@ func TestSubscriptionValidator_ValidSubscriptions(t *testing.T) { // is no longer true. func TestSubscriptionValidator_SubscribeToAllTopics(t *testing.T) { sp := mockp2p.NewSubscriptionProvider(t) - sv := scoring.NewSubscriptionValidator() - require.NoError(t, sv.RegisterSubscriptionProvider(sp)) + sv := scoring.NewSubscriptionValidator(unittest.Logger(), sp) allChannels := channels.Channels().ExcludePattern(regexp.MustCompile("^(test).*")) sporkID := unittest.IdentifierFixture() @@ -113,7 +109,7 @@ func TestSubscriptionValidator_SubscribeToAllTopics(t *testing.T) { } for _, role := range flow.Roles() { - peerId := p2pfixtures.PeerIdFixture(t) + peerId := unittest.PeerIdFixture(t) sp.On("GetSubscribedTopics", peerId).Return(allTopics) err := sv.CheckSubscribedToAllowedTopics(peerId, role) require.Error(t, err, role) @@ -125,11 +121,10 @@ func TestSubscriptionValidator_SubscribeToAllTopics(t *testing.T) { // topics based on its Flow protocol role, the subscription validator returns an error. func TestSubscriptionValidator_InvalidSubscriptions(t *testing.T) { sp := mockp2p.NewSubscriptionProvider(t) - sv := scoring.NewSubscriptionValidator() - require.NoError(t, sv.RegisterSubscriptionProvider(sp)) + sv := scoring.NewSubscriptionValidator(unittest.Logger(), sp) for _, role := range flow.Roles() { - peerId := p2pfixtures.PeerIdFixture(t) + peerId := unittest.PeerIdFixture(t) unauthorizedChannels := channels.Channels(). // all channels ExcludeChannels(channels.ChannelsByRole(role)). // excluding the channels for the role ExcludePattern(regexp.MustCompile("^(test).*")) // excluding the test channels. @@ -172,6 +167,11 @@ func TestSubscriptionValidator_Integration(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) signalerCtx := irrecoverable.NewMockSignalerContext(t, ctx) + cfg, err := config.DefaultConfig() + require.NoError(t, err) + // set a low update interval to speed up the test + cfg.NetworkConfig.SubscriptionProviderConfig.SubscriptionUpdateInterval = 100 * time.Millisecond + sporkId := unittest.IdentifierFixture() idProvider := mock.NewIdentityProvider(t) @@ -179,6 +179,7 @@ func TestSubscriptionValidator_Integration(t *testing.T) { conNode, conId := p2ptest.NodeFixture(t, sporkId, t.Name(), idProvider, p2ptest.WithLogger(unittest.Logger()), + p2ptest.OverrideFlowConfig(cfg), p2ptest.EnablePeerScoringWithOverride(p2p.PeerScoringConfigNoOverride), p2ptest.WithRole(flow.RoleConsensus)) @@ -186,12 +187,14 @@ func TestSubscriptionValidator_Integration(t *testing.T) { verNode1, verId1 := p2ptest.NodeFixture(t, sporkId, t.Name(), idProvider, p2ptest.WithLogger(unittest.Logger()), + p2ptest.OverrideFlowConfig(cfg), p2ptest.EnablePeerScoringWithOverride(p2p.PeerScoringConfigNoOverride), p2ptest.WithRole(flow.RoleVerification)) verNode2, verId2 := p2ptest.NodeFixture(t, sporkId, t.Name(), idProvider, p2ptest.WithLogger(unittest.Logger()), + p2ptest.OverrideFlowConfig(cfg), p2ptest.EnablePeerScoringWithOverride(p2p.PeerScoringConfigNoOverride), p2ptest.WithRole(flow.RoleVerification)) diff --git a/network/p2p/scoring/utils_test.go b/network/p2p/scoring/utils_test.go index c3c7284c55d..3ddfdb09e97 100644 --- a/network/p2p/scoring/utils_test.go +++ b/network/p2p/scoring/utils_test.go @@ -7,14 +7,13 @@ import ( "github.com/onflow/flow-go/model/flow" "github.com/onflow/flow-go/module/mock" - "github.com/onflow/flow-go/network/internal/p2pfixtures" "github.com/onflow/flow-go/network/p2p/scoring" "github.com/onflow/flow-go/utils/unittest" ) // TestHasValidIdentity_Unknown tests that when a peer has an unknown identity, the HasValidIdentity returns InvalidPeerIDError func TestHasValidIdentity_Unknown(t *testing.T) { - peerId := p2pfixtures.PeerIdFixture(t) + peerId := unittest.PeerIdFixture(t) idProvider := mock.NewIdentityProvider(t) idProvider.On("ByPeerID", peerId).Return(nil, false) @@ -31,7 +30,7 @@ func TestHasValidIdentity_Ejected(t *testing.T) { ejectedIdentity := unittest.IdentityFixture() ejectedIdentity.EpochParticipationStatus = flow.EpochParticipationStatusEjected - peerId := p2pfixtures.PeerIdFixture(t) + peerId := unittest.PeerIdFixture(t) idProvider.On("ByPeerID", peerId).Return(ejectedIdentity, true) identity, err := scoring.HasValidFlowIdentity(idProvider, peerId) @@ -46,7 +45,7 @@ func TestHasValidIdentity_Valid(t *testing.T) { idProvider := mock.NewIdentityProvider(t) trueID := unittest.IdentityFixture() - peerId := p2pfixtures.PeerIdFixture(t) + peerId := unittest.PeerIdFixture(t) idProvider.On("ByPeerID", peerId).Return(trueID, true) identity, err := scoring.HasValidFlowIdentity(idProvider, peerId) diff --git a/network/p2p/stream.go b/network/p2p/stream.go index 7b73187b100..a012ef8926c 100644 --- a/network/p2p/stream.go +++ b/network/p2p/stream.go @@ -12,14 +12,8 @@ import ( // it can create libp2p streams with finer granularity. type StreamFactory interface { SetStreamHandler(protocol.ID, network.StreamHandler) - // Connect connects host to peer with peerAddrInfo. - // All errors returned from this function can be considered benign. We expect the following errors during normal operations: - // - ErrSecurityProtocolNegotiationFailed this indicates there was an issue upgrading the connection. - // - ErrGaterDisallowedConnection this indicates the connection was disallowed by the gater. - // - There may be other unexpected errors from libp2p but they should be considered benign. - Connect(context.Context, peer.AddrInfo) error // NewStream creates a new stream on the libp2p host. // Expected errors during normal operations: // - ErrProtocolNotSupported this indicates remote node is running on a different spork. - NewStream(context.Context, peer.ID, ...protocol.ID) (network.Stream, error) + NewStream(context.Context, peer.ID, protocol.ID) (network.Stream, error) } diff --git a/network/p2p/subscription.go b/network/p2p/subscription.go index 9d4a117d0bc..99212b566d1 100644 --- a/network/p2p/subscription.go +++ b/network/p2p/subscription.go @@ -7,10 +7,12 @@ import ( "github.com/libp2p/go-libp2p/core/peer" "github.com/onflow/flow-go/model/flow" + "github.com/onflow/flow-go/module/component" ) // SubscriptionProvider provides a list of topics a peer is subscribed to. type SubscriptionProvider interface { + component.Component // GetSubscribedTopics returns all the subscriptions of a peer within the pubsub network. // Note that the current peer must be subscribed to the topic for it to the same topics in order // to query for other peers, e.g., if current peer has subscribed to topics A and B, and peer1 @@ -22,9 +24,7 @@ type SubscriptionProvider interface { // SubscriptionValidator validates the subscription of a peer to a topic. // It is used to ensure that a peer is only subscribed to topics that it is allowed to subscribe to. type SubscriptionValidator interface { - // RegisterSubscriptionProvider registers the subscription provider with the subscription validator. - // If there is a subscription provider already registered, it will be replaced by the new one. - RegisterSubscriptionProvider(provider SubscriptionProvider) error + component.Component // CheckSubscribedToAllowedTopics checks if a peer is subscribed to topics that it is allowed to subscribe to. // Args: // pid: the peer ID of the peer to check diff --git a/network/p2p/test/fixtures.go b/network/p2p/test/fixtures.go index 9bd32254454..1b6bce5eb7c 100644 --- a/network/p2p/test/fixtures.go +++ b/network/p2p/test/fixtures.go @@ -9,6 +9,7 @@ import ( "time" dht "github.com/libp2p/go-libp2p-kad-dht" + pb "github.com/libp2p/go-libp2p-pubsub/pb" "github.com/libp2p/go-libp2p/core/connmgr" "github.com/libp2p/go-libp2p/core/host" "github.com/libp2p/go-libp2p/core/network" @@ -17,7 +18,9 @@ import ( "github.com/libp2p/go-libp2p/core/routing" discoveryBackoff "github.com/libp2p/go-libp2p/p2p/discovery/backoff" "github.com/rs/zerolog" + mockery "github.com/stretchr/testify/mock" "github.com/stretchr/testify/require" + "golang.org/x/exp/rand" "github.com/onflow/flow-go/config" "github.com/onflow/flow-go/crypto" @@ -32,6 +35,7 @@ import ( "github.com/onflow/flow-go/network/p2p" "github.com/onflow/flow-go/network/p2p/connection" p2pdht "github.com/onflow/flow-go/network/p2p/dht" + mockp2p "github.com/onflow/flow-go/network/p2p/mock" "github.com/onflow/flow-go/network/p2p/p2pbuilder" p2pconfig "github.com/onflow/flow-go/network/p2p/p2pbuilder/config" "github.com/onflow/flow-go/network/p2p/tracer" @@ -55,6 +59,11 @@ const ( // expected to be necessary. Any failure to start a node within this timeout is likely to be // caused by a bug in the code. libp2pNodeShutdownTimeout = 10 * time.Second + + // topicIDFixtureLen is the length of the topic ID fixture for testing. + topicIDFixtureLen = 10 + // messageIDFixtureLen is the length of the message ID fixture for testing. + messageIDFixtureLen = 10 ) // NetworkingKeyFixtures is a test helper that generates a ECDSA flow key pair. @@ -67,18 +76,19 @@ func NetworkingKeyFixtures(t *testing.T) crypto.PrivateKey { // NodeFixture is a test fixture that creates a single libp2p node with the given key, spork id, and options. // It returns the node and its identity. -func NodeFixture( - t *testing.T, sporkID flow.Identifier, dhtPrefix string, idProvider module.IdentityProvider, opts ...NodeFixtureParameterOption, -) (p2p.LibP2PNode, flow.Identity) { +func NodeFixture(t *testing.T, + sporkID flow.Identifier, + dhtPrefix string, + idProvider module.IdentityProvider, + opts ...NodeFixtureParameterOption) (p2p.LibP2PNode, flow.Identity) { defaultFlowConfig, err := config.DefaultConfig() require.NoError(t, err) - logger := unittest.Logger().Level(zerolog.WarnLevel) + logger := unittest.Logger() require.NotNil(t, idProvider) - connectionGater := NewConnectionGater( - idProvider, func(p peer.ID) error { - return nil - }) + connectionGater := NewConnectionGater(idProvider, func(p peer.ID) error { + return nil + }) require.NotNil(t, connectionGater) meshTracerCfg := &tracer.GossipSubMeshTracerConfig{ @@ -106,7 +116,7 @@ func NodeFixture( Metrics: metrics.NewNoopCollector(), }, ResourceManager: &network.NullResourceManager{}, - GossipSubPeerScoreTracerInterval: 0, // disabled by default + GossipSubPeerScoreTracerInterval: defaultFlowConfig.NetworkConfig.GossipSubConfig.ScoreTracerInterval, ConnGater: connectionGater, PeerManagerConfig: PeerManagerConfigFixture(), // disabled by default FlowConfig: defaultFlowConfig, @@ -126,21 +136,20 @@ func NodeFixture( logger = parameters.Logger.With().Hex("node_id", logging.ID(identity.NodeID)).Logger() - connManager, err := connection.NewConnManager( - logger, - parameters.MetricsCfg.Metrics, - &defaultFlowConfig.NetworkConfig.ConnectionManagerConfig) + connManager, err := connection.NewConnManager(logger, parameters.MetricsCfg.Metrics, ¶meters.FlowConfig.NetworkConfig.ConnectionManagerConfig) require.NoError(t, err) - builder := p2pbuilder.NewNodeBuilder(logger, + builder := p2pbuilder.NewNodeBuilder( + logger, parameters.MetricsCfg, parameters.NetworkingType, parameters.Address, parameters.Key, sporkID, parameters.IdProvider, - &defaultFlowConfig.NetworkConfig.ResourceManager, - ¶meters.FlowConfig.NetworkConfig.GossipSubRPCInspectorsConfig, + defaultFlowConfig.NetworkConfig.GossipSubConfig.GossipSubScoringRegistryConfig, + ¶meters.FlowConfig.NetworkConfig.ResourceManager, + ¶meters.FlowConfig.NetworkConfig.GossipSubConfig, parameters.PeerManagerConfig, &p2p.DisallowListCacheConfig{ MaxSize: uint32(1000), @@ -160,16 +169,14 @@ func NodeFixture( // Only access and execution nodes need to run DHT; // Access nodes and execution nodes need DHT to run a blob service. // Moreover, access nodes run a DHT to let un-staked (public) access nodes find each other on the public network. - builder.SetRoutingSystem( - func(ctx context.Context, host host.Host) (routing.Routing, error) { - return p2pdht.NewDHT( - ctx, - host, - protocol.ID(protocols.FlowDHTProtocolIDPrefix+sporkID.String()+"/"+dhtPrefix), - logger, - parameters.MetricsCfg.Metrics, - parameters.DhtOptions...) - }) + builder.SetRoutingSystem(func(ctx context.Context, host host.Host) (routing.Routing, error) { + return p2pdht.NewDHT(ctx, + host, + protocol.ID(protocols.FlowDHTProtocolIDPrefix+sporkID.String()+"/"+dhtPrefix), + logger, + parameters.MetricsCfg.Metrics, + parameters.DhtOptions...) + }) } if parameters.GossipSubRpcInspectorSuiteFactory != nil { @@ -432,15 +439,12 @@ func WithZeroJitterAndZeroBackoff(t *testing.T) func(*p2pconfig.PeerManagerConfi // NodesFixture is a test fixture that creates a number of libp2p nodes with the given callback function for stream handling. // It returns the nodes and their identities. -func NodesFixture( - t *testing.T, +func NodesFixture(t *testing.T, sporkID flow.Identifier, dhtPrefix string, count int, idProvider module.IdentityProvider, - opts ...NodeFixtureParameterOption) ( - []p2p.LibP2PNode, - flow.IdentityList) { + opts ...NodeFixtureParameterOption) ([]p2p.LibP2PNode, flow.IdentityList) { var nodes []p2p.LibP2PNode // creating nodes @@ -563,23 +567,22 @@ func TryConnectionAndEnsureConnected(t *testing.T, ctx context.Context, nodes [] // - tick: the tick duration // - timeout: the timeout duration func RequireConnectedEventually(t *testing.T, nodes []p2p.LibP2PNode, tick time.Duration, timeout time.Duration) { - require.Eventually( - t, func() bool { - for _, node := range nodes { - for _, other := range nodes { - if node == other { - continue - } - if node.Host().Network().Connectedness(other.ID()) != network.Connected { - return false - } - if len(node.Host().Network().ConnsToPeer(other.ID())) == 0 { - return false - } + require.Eventually(t, func() bool { + for _, node := range nodes { + for _, other := range nodes { + if node == other { + continue + } + if node.Host().Network().Connectedness(other.ID()) != network.Connected { + return false + } + if len(node.Host().Network().ConnsToPeer(other.ID())) == 0 { + return false } } - return true - }, timeout, tick) + } + return true + }, timeout, tick) } // RequireEventuallyNotConnected ensures eventually that the given groups of nodes are not connected to each other. @@ -589,26 +592,20 @@ func RequireConnectedEventually(t *testing.T, nodes []p2p.LibP2PNode, tick time. // - groupB: the second group of nodes // - tick: the tick duration // - timeout: the timeout duration -func RequireEventuallyNotConnected( - t *testing.T, - groupA []p2p.LibP2PNode, - groupB []p2p.LibP2PNode, - tick time.Duration, - timeout time.Duration) { - require.Eventually( - t, func() bool { - for _, node := range groupA { - for _, other := range groupB { - if node.Host().Network().Connectedness(other.ID()) == network.Connected { - return false - } - if len(node.Host().Network().ConnsToPeer(other.ID())) > 0 { - return false - } +func RequireEventuallyNotConnected(t *testing.T, groupA []p2p.LibP2PNode, groupB []p2p.LibP2PNode, tick time.Duration, timeout time.Duration) { + require.Eventually(t, func() bool { + for _, node := range groupA { + for _, other := range groupB { + if node.Host().Network().Connectedness(other.ID()) == network.Connected { + return false + } + if len(node.Host().Network().ConnsToPeer(other.ID())) > 0 { + return false } } - return true - }, timeout, tick) + } + return true + }, timeout, tick) } // EnsureStreamCreationInBothDirections ensure that between each pair of nodes in the given list, a stream is created in both directions. @@ -619,12 +616,11 @@ func EnsureStreamCreationInBothDirections(t *testing.T, ctx context.Context, nod continue } // stream creation should pass without error - err := this.OpenProtectedStream( - ctx, other.ID(), t.Name(), func(stream network.Stream) error { - // do nothing - require.NotNil(t, stream) - return nil - }) + err := this.OpenProtectedStream(ctx, other.ID(), t.Name(), func(stream network.Stream) error { + // do nothing + require.NotNil(t, stream) + return nil + }) require.NoError(t, err) } @@ -642,13 +638,7 @@ func EnsureStreamCreationInBothDirections(t *testing.T, ctx context.Context, nod // // Note-1: this function assumes a timeout of 5 seconds for each message to be received. // Note-2: TryConnectionAndEnsureConnected() must be called to connect all nodes before calling this function. -func EnsurePubsubMessageExchange( - t *testing.T, - ctx context.Context, - nodes []p2p.LibP2PNode, - topic channels.Topic, - count int, - messageFactory func() interface{}) { +func EnsurePubsubMessageExchange(t *testing.T, ctx context.Context, nodes []p2p.LibP2PNode, topic channels.Topic, count int, messageFactory func() interface{}) { subs := make([]p2p.Subscription, len(nodes)) for i, node := range nodes { ps, err := node.Subscribe(topic, validator.TopicValidator(unittest.Logger(), unittest.AllowAllPeerFilter())) @@ -692,16 +682,14 @@ func EnsurePubsubMessageExchange( // - topic: the topic to exchange messages on. // - count: the number of messages to exchange from `sender` to `receiver`. // - messageFactory: a function that creates a unique message to be published by the node. -func EnsurePubsubMessageExchangeFromNode( - t *testing.T, +func EnsurePubsubMessageExchangeFromNode(t *testing.T, ctx context.Context, sender p2p.LibP2PNode, receiverNode p2p.LibP2PNode, receiverIdentifier flow.Identifier, topic channels.Topic, count int, - messageFactory func() interface{}, -) { + messageFactory func() interface{}) { _, err := sender.Subscribe(topic, validator.TopicValidator(unittest.Logger(), unittest.AllowAllPeerFilter())) require.NoError(t, err) @@ -747,16 +735,14 @@ func EnsureNotConnectedBetweenGroups(t *testing.T, ctx context.Context, groupA [ // - topic: the topic to exchange messages on. // - count: the number of messages to exchange from each node. // - messageFactory: a function that creates a unique message to be published by the node. -func EnsureNoPubsubMessageExchange( - t *testing.T, +func EnsureNoPubsubMessageExchange(t *testing.T, ctx context.Context, from []p2p.LibP2PNode, to []p2p.LibP2PNode, toIdentifiers flow.IdentifierList, topic channels.Topic, count int, - messageFactory func() interface{}, -) { + messageFactory func() interface{}) { subs := make([]p2p.Subscription, len(to)) tv := validator.TopicValidator(unittest.Logger(), unittest.AllowAllPeerFilter()) var err error @@ -811,8 +797,7 @@ func EnsureNoPubsubMessageExchange( // - topic: pubsub topic- no message should be exchanged on this topic. // - count: number of messages to be exchanged- no message should be exchanged. // - messageFactory: function to create a unique message to be published by the node. -func EnsureNoPubsubExchangeBetweenGroups( - t *testing.T, +func EnsureNoPubsubExchangeBetweenGroups(t *testing.T, ctx context.Context, groupANodes []p2p.LibP2PNode, groupAIdentifiers flow.IdentifierList, @@ -820,8 +805,7 @@ func EnsureNoPubsubExchangeBetweenGroups( groupBIdentifiers flow.IdentifierList, topic channels.Topic, count int, - messageFactory func() interface{}, -) { + messageFactory func() interface{}) { // ensure no message exchange from group A to group B EnsureNoPubsubMessageExchange(t, ctx, groupANodes, groupBNodes, groupBIdentifiers, topic, count, messageFactory) // ensure no message exchange from group B to group A @@ -846,9 +830,202 @@ func PeerIdSliceFixture(t *testing.T, n int) peer.IDSlice { // NewConnectionGater creates a new connection gater for testing with given allow listing filter. func NewConnectionGater(idProvider module.IdentityProvider, allowListFilter p2p.PeerFilter) p2p.ConnectionGater { filters := []p2p.PeerFilter{allowListFilter} - return connection.NewConnGater( - unittest.Logger(), - idProvider, - connection.WithOnInterceptPeerDialFilters(filters), - connection.WithOnInterceptSecuredFilters(filters)) + return connection.NewConnGater(unittest.Logger(), idProvider, connection.WithOnInterceptPeerDialFilters(filters), connection.WithOnInterceptSecuredFilters(filters)) +} + +// MockInspectorNotificationDistributorReadyDoneAware mocks the Ready and Done methods of the distributor to return a channel that is already closed, +// so that the distributor is considered ready and done when the test needs. +func MockInspectorNotificationDistributorReadyDoneAware(d *mockp2p.GossipSubInspectorNotificationDistributor) { + d.On("Start", mockery.Anything).Return().Maybe() + d.On("Ready").Return(func() <-chan struct{} { + ch := make(chan struct{}) + close(ch) + return ch + }()).Maybe() + d.On("Done").Return(func() <-chan struct{} { + ch := make(chan struct{}) + close(ch) + return ch + }()).Maybe() +} + +// GossipSubRpcFixtures returns a slice of random message IDs for testing. +// Args: +// - t: *testing.T instance +// - count: number of message IDs to generate +// Returns: +// - []string: slice of message IDs. +// Note: evey other parameters that are not explicitly set are set to 10. This function suites applications that need to generate a large number of RPC messages with +// filled random data. For a better control over the generated data, use GossipSubRpcFixture. +func GossipSubRpcFixtures(t *testing.T, count int) []*pb.RPC { + c := 10 + rpcs := make([]*pb.RPC, 0) + for i := 0; i < count; i++ { + rpcs = append(rpcs, + GossipSubRpcFixture(t, + c, + WithPrune(c, GossipSubTopicIdFixture()), + WithGraft(c, GossipSubTopicIdFixture()), + WithIHave(c, c, GossipSubTopicIdFixture()), + WithIWant(c, c))) + } + return rpcs +} + +// GossipSubRpcFixture returns a random GossipSub RPC message. An RPC message is the GossipSub-level message that is exchanged between nodes. +// It contains individual messages, subscriptions, and control messages. +// Args: +// - t: *testing.T instance +// - msgCnt: number of messages to generate +// - opts: options to customize control messages (not having an option means no control message). +// Returns: +// - *pb.RPC: a random GossipSub RPC message +// Note: the message is not signed. +func GossipSubRpcFixture(t *testing.T, msgCnt int, opts ...GossipSubCtrlOption) *pb.RPC { + rand.Seed(uint64(time.Now().UnixNano())) + + // creates a random number of Subscriptions + numSubscriptions := 10 + topicIdSize := 10 + subscriptions := make([]*pb.RPC_SubOpts, numSubscriptions) + for i := 0; i < numSubscriptions; i++ { + subscribe := rand.Intn(2) == 1 + topicID := unittest.RandomStringFixture(t, topicIdSize) + subscriptions[i] = &pb.RPC_SubOpts{ + Subscribe: &subscribe, + Topicid: &topicID, + } + } + + // generates random messages + messages := make([]*pb.Message, msgCnt) + for i := 0; i < msgCnt; i++ { + messages[i] = GossipSubMessageFixture(t) + } + + // Create a Control Message + controlMessages := GossipSubCtrlFixture(opts...) + + // Create the RPC + rpc := &pb.RPC{ + Subscriptions: subscriptions, + Publish: messages, + Control: controlMessages, + } + + return rpc +} + +type GossipSubCtrlOption func(*pb.ControlMessage) + +// GossipSubCtrlFixture returns a ControlMessage with the given options. +func GossipSubCtrlFixture(opts ...GossipSubCtrlOption) *pb.ControlMessage { + msg := &pb.ControlMessage{} + for _, opt := range opts { + opt(msg) + } + return msg +} + +// WithIHave adds iHave control messages of the given size and number to the control message. +func WithIHave(msgCount, msgIDCount int, topicId string) GossipSubCtrlOption { + return func(msg *pb.ControlMessage) { + iHaves := make([]*pb.ControlIHave, msgCount) + for i := 0; i < msgCount; i++ { + iHaves[i] = &pb.ControlIHave{ + TopicID: &topicId, + MessageIDs: GossipSubMessageIdsFixture(msgIDCount), + } + } + msg.Ihave = iHaves + } +} + +// WithIWant adds iWant control messages of the given size and number to the control message. +// The message IDs are generated randomly. +// Args: +// +// msgCount: number of iWant messages to add. +// msgIdsPerIWant: number of message IDs to add to each iWant message. +// +// Returns: +// A GossipSubCtrlOption that adds iWant messages to the control message. +// Example: WithIWant(2, 3) will add 2 iWant messages, each with 3 message IDs. +func WithIWant(iWantCount int, msgIdsPerIWant int) GossipSubCtrlOption { + return func(msg *pb.ControlMessage) { + iWants := make([]*pb.ControlIWant, iWantCount) + for i := 0; i < iWantCount; i++ { + iWants[i] = &pb.ControlIWant{ + MessageIDs: GossipSubMessageIdsFixture(msgIdsPerIWant), + } + } + msg.Iwant = iWants + } +} + +// WithGraft adds GRAFT control messages with given topicID to the control message. +func WithGraft(msgCount int, topicId string) GossipSubCtrlOption { + return func(msg *pb.ControlMessage) { + grafts := make([]*pb.ControlGraft, msgCount) + for i := 0; i < msgCount; i++ { + grafts[i] = &pb.ControlGraft{ + TopicID: &topicId, + } + } + msg.Graft = grafts + } +} + +// WithPrune adds PRUNE control messages with given topicID to the control message. +func WithPrune(msgCount int, topicId string) GossipSubCtrlOption { + return func(msg *pb.ControlMessage) { + prunes := make([]*pb.ControlPrune, msgCount) + for i := 0; i < msgCount; i++ { + prunes[i] = &pb.ControlPrune{ + TopicID: &topicId, + } + } + msg.Prune = prunes + } +} + +// gossipSubMessageIdFixture returns a random gossipSub message ID. +func gossipSubMessageIdFixture() string { + // TODO: messageID length should be a parameter. + return unittest.GenerateRandomStringWithLen(messageIDFixtureLen) +} + +// GossipSubTopicIdFixture returns a random gossipSub topic ID. +func GossipSubTopicIdFixture() string { + // TODO: topicID length should be a parameter. + return unittest.GenerateRandomStringWithLen(topicIDFixtureLen) +} + +// GossipSubMessageIdsFixture returns a slice of random gossipSub message IDs of the given size. +func GossipSubMessageIdsFixture(count int) []string { + msgIds := make([]string, count) + for i := 0; i < count; i++ { + msgIds[i] = gossipSubMessageIdFixture() + } + return msgIds +} + +// GossipSubMessageFixture returns a random gossipSub message; this contains a single pubsub message that is exchanged between nodes. +// The message is generated randomly. +// Args: +// - t: *testing.T instance +// Returns: +// - *pb.Message: a random gossipSub message +// Note: the message is not signed. +func GossipSubMessageFixture(t *testing.T) *pb.Message { + byteSize := 100 + topic := unittest.RandomStringFixture(t, byteSize) + return &pb.Message{ + From: unittest.RandomBytes(byteSize), + Data: unittest.RandomBytes(byteSize), + Seqno: unittest.RandomBytes(byteSize), + Topic: &topic, + Signature: unittest.RandomBytes(byteSize), + Key: unittest.RandomBytes(byteSize), + } } diff --git a/network/p2p/unicast/README.MD b/network/p2p/unicast/README.MD index 8e094275dfd..47829c6c80f 100644 --- a/network/p2p/unicast/README.MD +++ b/network/p2p/unicast/README.MD @@ -19,15 +19,19 @@ functionalities, hence, it operates on the notion of the `peer` (rather than Flo than `flow.Identifier`. It is the responsibility of the caller to provide the correct `peer.ID` of the remote node. -If there is existing connection between the local node and the remote node, the manager will try to establish -a connection first, and then open a stream over the connection. The connection is assumed persistent, i.e., it -will be kept until certain events such as Flow node shutdown, restart, disallow-listing of either ends of the connection -by each other, etc. However, a stream is a one-time communication channel, i.e., it is assumed to be closed +The `UnicastManager` relies on the underlying libp2p node to establish the connection to the remote peer. Once the underlying +libp2p node receives a stream creation request from the `UnicastManager`, it will try to establish a connection to the remote peer if +there is no existing connection to the peer. Otherwise, it will pick and re-use the best existing connection to the remote peer. +Hence, the `UnicastManager` does not (and should not) care about the connection establishment, and rather relies on the underlying +libp2p node to establish the connection. The `UnicastManager` only cares about the stream creation, and will return an error +if the underlying libp2p node fails to establish a connection to the remote peer. + + +A stream is a one-time communication channel, i.e., it is assumed to be closed by the caller once the message is sent. The caller (i.e., the Flow node) does not necessarily re-use a stream, and the `Manager` creates one stream per request (i.e., `CreateStream` invocation), which is typically a single message. -However, we have certain safeguards in place to prevent nodes from establishing more than one connection to each other. -That is why the `Manager` establishes the connection only when there is no existing connection between the nodes, and otherwise -re-uses the existing connection. + +Note: the limit of number of streams and connections between nodes is set throught eh libp2p resource manager limits (see `config/default-config.yml`): Note: `pubsub` protocol also establishes connections between nodes to exchange gossip messages with each other. The connection type is the same between `pubsub` and `unicast` protocols, as they both consult the underlying LibP2P node to @@ -48,84 +52,65 @@ that the connection is persistent and will be kept open by the `PeerManager`. ## Backoff and Retry Attempts The flowchart below explains the abstract logic of the `UnicastManager` when it receives a `CreateStream` invocation. -One a happy path, the `UnicastManager` expects a connection to the remote peer exists and hence it can successfully open a stream to the peer. -However, there can be cases that the connection does not exist, the remote peer is not reliable for stream creation, or the remote peer acts -maliciously and does not respond to connection and stream creation requests. In order to distinguish between the cases that the remote peer +On the happy path, the `UnicastManager` successfully opens a stream to the peer. +However, there can be cases that the remote peer is not reliable for stream creation, or the remote peer acts +maliciously and does not respond stream creation requests. In order to distinguish between the cases that the remote peer is not reliable and the cases that the remote peer is malicious, the `UnicastManager` uses a backoff and retry mechanism. ![retry.png](retry.png) ### Addressing Unreliable Remote Peer -To address the unreliability of remote peer, upon an unsuccessful attempt to establish a connection or stream, the `UnicastManager` will wait for a certain amount of time before it tries to establish (i.e., the backoff mechanism), -and will retry a certain number of times before it gives up (i.e., the retry mechanism). The backoff and retry parameters are configurable through runtime flags. +To address the unreliability of remote peer, upon an unsuccessful attempt to establish a stream, the `UnicastManager` will wait for a certain +amount of time before it tries to establish (i.e., the backoff mechanism), and will retry a certain number of times before it gives up (i.e., the retry mechanism). +The backoff and retry parameters are configurable through runtime flags. If all backoff and retry attempts fail, the `UnicastManager` will return an error to the caller. The caller can then decide to retry the request or not. -By default, `UnicastManager` retries each connection (dialing) attempt as well as stream creation attempt 3 times. Also, the backoff intervals for dialing and stream creation are initialized to 1 second and progress -exponentially with a factor of 2, i.e., the `i-th` retry attempt is made after `t * 2^(i-1)`, where `t` is the backoff interval. The formulation is the same for dialing and -stream creation. For example, if the backoff interval is 1s, the first attempt is made right-away, the first (retry) attempt is made after 1s * 2^(1 - 1) = 1s, the third (retry) attempt is made +By default, `UnicastManager` retries each stream creation attempt 3 times. Also, the backoff intervals for dialing and stream creation are initialized to 1 second and progress +exponentially with a factor of 2, i.e., the `i-th` retry attempt is made after `t * 2^(i-1)`, where `t` is the backoff interval. +For example, if the backoff interval is 1s, the first attempt is made right-away, the first (retry) attempt is made after 1s * 2^(1 - 1) = 1s, the third (retry) attempt is made after `1s * 2^(2 - 1) = 2s`, and so on. These parameters are configured using the `config/default-config.yml` file: ```yaml # Unicast create stream retry delay is initial delay used in the exponential backoff for create stream retries unicast-create-stream-retry-delay: 1s - # The backoff delay used in the exponential backoff for consecutive failed unicast dial attempts to a remote peer. - unicast-dial-backoff-delay: 1s -``` - -#### Addressing Concurrent Stream Creation Attempts -There might be the case that multiple threads attempt to create a stream to the same remote peer concurrently, while there is no -existing connection to the remote peer. In such cases, the `UnicastManager` will let the first attempt to create a stream to the remote peer to proceed with dialing -while it will backoff the concurrent attempts for a certain amount of time. The backoff delay is configurable through the `config/default-config.yml` file: -```yaml - # The backoff delay used in the exponential backoff for backing off concurrent create stream attempts to the same remote peer - # when there is no available connections to that remote peer and a dial is in progress. - unicast-dial-in-progress-backoff-delay: 1s ``` -This is done for several reasons including: -- The resource manager of the remote peer may block the concurrent dial attempts if they exceed a certain threshold. -- As a convention in networking layer, we don't desire more than one connection to a remote peer, and there are hard reactive constraints in place. - However, as a soft proactive measure, we backoff concurrent dial attempts to the same remote peer to prevent multiple connections to the same peer. -- Dialing is a resource-intensive operation, and we don't want to waste resources on concurrent dial attempts to the same remote peer. ### Addressing Malicious Remote Peer The backoff and retry mechanism is used to address the cases that the remote peer is not reliable. -However, there can be cases that the remote peer is malicious and does not respond to connection and stream creation requests. -Such cases may cause the `UnicastManager` to wait for a long time before it gives up, resulting in a resource exhaustion and slow-down of the dialing node. -To mitigate such cases, the `UnicastManager` uses a retry budget for the stream creation and dialing. The retry budgets are initialized +However, there can be cases that the remote peer is malicious and does not respond to stream creation requests. +Such cases may cause the `UnicastManager` to wait for a long time before it gives up, resulting in a resource exhaustion and slow-down of the stream creation. +To mitigate such cases, the `UnicastManager` uses a retry budget for the stream creation. The retry budgets are initialized using the `config/default-config.yml` file: ```yaml # The maximum number of retry attempts for creating a unicast stream to a remote peer before giving up. If it is set to 3 for example, it means that if a peer fails to create # retry a unicast stream to a remote peer 3 times, the peer will give up and will not retry creating a unicast stream to that remote peer. # When it is set to zero it means that the peer will not retry creating a unicast stream to a remote peer if it fails. unicast-max-stream-creation-retry-attempt-times: 3 - # The maximum number of retry attempts for dialing a remote peer before giving up. If it is set to 3 for example, it means that if a peer fails to dial a remote peer 3 times, - # the peer will give up and will not retry dialing that remote peer. - unicast-max-dial-retry-attempt-times: 3 ``` -As shown in the above snippet, both retry budgets for dialing and stream creation are set to 3 by default for every remote peer. -Each time the `UnicastManager` is invoked on `CreateStream` to `pid` (`peer.ID`), it loads the retry budgets for `pid` from the dial config cache. -If no dial config record exists for `pid`, one is created with the default retry budgets. The `UnicastManager` then uses the retry budgets to decide -whether to retry the dialing or stream creation attempt or not. If the retry budget for dialing or stream creation is exhausted, the `UnicastManager` -will not retry the dialing or stream creation attempt, respectively, and returns an error to the caller. The caller can then decide to retry the request or not. +As shown in the above snippet, the stream creation is set to 3 by default for every remote peer. +Each time the `UnicastManager` is invoked on `CreateStream` to `pid` (`peer.ID`), it loads the retry budgets for `pid` from the unicast config cache. +If no unicast config record exists for `pid`, one is created with the default retry budgets. The `UnicastManager` then uses the retry budgets to decide +whether to retry the stream creation attempt or not. If the retry budget for stream creation is exhausted, the `UnicastManager` +will not retry the stream creation attempt, and returns an error to the caller. The caller can then decide to retry the request or not. +Note that even when the retry budget is exhausted, the `UnicastManager` will try the stream creation attempt once, though it will not retry the attempt if it fails. #### Penalizing Malicious Remote Peer -Each time the `UnicastManager` fails to dial or create a stream to a remote peer and exhausts the retry budget, it penalizes the remote peer as follows: -- If the `UnicastManager` exhausts the retry budget for dialing, it will decrement the dial retry budget as well as the stream creation retry budget for the remote peer. +Each time the `UnicastManager` fails to create a stream to a remote peer and exhausts the retry budget, it penalizes the remote peer as follows: - If the `UnicastManager` exhausts the retry budget for stream creation, it will decrement the stream creation retry budget for the remote peer. -- If the retry budget reaches zero, the `UnicastManager` will only attempt once to dial or create a stream to the remote peer, and will not retry the attempt, and rather return an error to the caller. -- When any of the budgets reaches zero, the `UnicastManager` will not decrement the budget anymore. +- If the retry budget reaches zero, the `UnicastManager` will only attempt once to create a stream to the remote peer, and will not retry the attempt, and rather return an error to the caller. +- When the budget reaches zero, the `UnicastManager` will not decrement the budget anymore. **Note:** `UnicastManager` is part of the networking layer of the Flow node, which is a lower-order component than the Flow protocol engines who call the `UnicastManager` to send messages to remote peers. Hence, the `UnicastManager` _must not_ outsmart -the Flow protocol engines on deciding whether to _dial or create stream_ in the first place. This means that `UnicastManager` will attempt -to dial and create stream even to peers with zero retry budgets. However, `UnicastManager` does not retry attempts for the peers with zero budgets, and rather +the Flow protocol engines on deciding whether to _create stream_ in the first place. This means that `UnicastManager` will attempt +to create stream even to peers with zero retry budgets. However, `UnicastManager` does not retry attempts for the peers with zero budgets, and rather returns an error immediately upon a failure. This is the responsibility of the Flow protocol engines to decide whether to send a message to a remote peer or not after a certain number of failures. #### Restoring Retry Budgets -The `UnicastManager` may reset the dial and stream creation budgets for a remote peers _from zero to the default values_ in the following cases: +The `UnicastManager` may reset the stream creation budget for a remote peers _from zero to the default values_ in the following cases: - **Restoring Stream Creation Retry Budget**: To restore the stream creation budget from zero to the default value, the `UnicastManager` keeps track of the _consecutive_ successful streams created to the remote peer. Everytime a stream is created successfully, the `UnicastManager` increments a counter for the remote peer. The counter is @@ -137,19 +122,4 @@ The `UnicastManager` may reset the dial and stream creation budgets for a remote # the unicast stream creation retry budget for that remote peer will be reset to the maximum default. unicast-stream-zero-retry-reset-threshold: 100 ``` - Reaching the threshold means that the remote peer is reliable enough to regain the default retry budget for stream creation. -- **Restoring Dial Retry Budget**: To restore the dial retry budget from zero to the default value, the `UnicastManager` keeps track of the last successful - dial time to the remote peer. Every failed dialing attempt will reset the last successful dial time to zero. If the time since the last successful dialing attempt - reaches a certain threshold, the `UnicastManager` will reset the dial budget for the remote peer to the default value. - The threshold is configurable through the `config/default-config.yml` file: - ```yaml - # The number of seconds that the local peer waits since the last successful dial to a remote peer before resetting the unicast dial retry budget from zero to the maximum default. - # If it is set to 3600s (1h) for example, it means that if it has passed at least one hour since the last successful dial, and the remote peer has a zero dial retry budget, - # the unicast dial retry budget for that remote peer will be reset to the maximum default. - unicast-dial-zero-retry-reset-threshold: 3600s - ``` - Reaching the threshold means that either the `UnicastManager` has not dialed the remote peer for a long time, and the peer - deserves a chance to regain its dial retry budget, or the remote peer maintains a persistent connection to the local peer, for a long time, and - deserves a chance to regain its dial retry budget. Note that the networking layer enforces a maximum number of _one_ connection to a remote peer, hence - the remote peer cannot have multiple connections to the local peer. Also, connection establishment is assumed a more resource-intensive operation than the stream creation, - hence, in contrast to the stream reliability that is measured by the number of consecutive successful streams, the dial reliability is measured by the time since the last successful dial. \ No newline at end of file + Reaching the threshold means that the remote peer is reliable enough to regain the default retry budget for stream creation. \ No newline at end of file diff --git a/network/p2p/unicast/cache/dialConfigCache_test.go b/network/p2p/unicast/cache/dialConfigCache_test.go deleted file mode 100644 index 1945070d0c5..00000000000 --- a/network/p2p/unicast/cache/dialConfigCache_test.go +++ /dev/null @@ -1,281 +0,0 @@ -package unicastcache_test - -import ( - "fmt" - "sync" - "testing" - "time" - - "github.com/libp2p/go-libp2p/core/peer" - "github.com/rs/zerolog" - "github.com/stretchr/testify/require" - - "github.com/onflow/flow-go/module/metrics" - "github.com/onflow/flow-go/network/p2p/unicast" - unicastcache "github.com/onflow/flow-go/network/p2p/unicast/cache" - "github.com/onflow/flow-go/utils/unittest" -) - -// TestNewDialConfigCache tests the creation of a new DialConfigCache. -// It asserts that the cache is created and its size is 0. -func TestNewDialConfigCache(t *testing.T) { - sizeLimit := uint32(100) - logger := zerolog.Nop() - collector := metrics.NewNoopCollector() - cache := unicastcache.NewDialConfigCache(sizeLimit, logger, collector, dialConfigFixture) - require.NotNil(t, cache) - require.Equalf(t, uint(0), cache.Size(), "cache size must be 0") -} - -// dialConfigFixture returns a dial config fixture. -// The dial config is initialized with the default values. -func dialConfigFixture() unicast.DialConfig { - return unicast.DialConfig{ - DialRetryAttemptBudget: 3, - StreamCreationRetryAttemptBudget: 3, - } -} - -// TestDialConfigCache_Adjust tests the Adjust method of the DialConfigCache. It asserts that the dial config is initialized, adjusted, -// and stored in the cache. -func TestDialConfigCache_Adjust_Init(t *testing.T) { - sizeLimit := uint32(100) - logger := zerolog.Nop() - collector := metrics.NewNoopCollector() - - dialFactoryCalled := 0 - dialConfigFactory := func() unicast.DialConfig { - require.Less(t, dialFactoryCalled, 2, "dial config factory must be called at most twice") - dialFactoryCalled++ - return dialConfigFixture() - } - adjustFuncIncrement := func(cfg unicast.DialConfig) (unicast.DialConfig, error) { - cfg.DialRetryAttemptBudget++ - return cfg, nil - } - - cache := unicastcache.NewDialConfigCache(sizeLimit, logger, collector, dialConfigFactory) - require.NotNil(t, cache) - require.Zerof(t, cache.Size(), "cache size must be 0") - - peerID1 := unittest.PeerIdFixture(t) - peerID2 := unittest.PeerIdFixture(t) - - // Initializing the dial config for peerID1 through GetOrInit. - // dial config for peerID1 does not exist in the cache, so it must be initialized when using GetOrInit. - cfg, err := cache.GetOrInit(peerID1) - require.NoError(t, err) - require.NotNil(t, cfg, "dial config must not be nil") - require.Equal(t, dialConfigFixture(), *cfg, "dial config must be initialized with the default values") - require.Equal(t, uint(1), cache.Size(), "cache size must be 1") - - // Initializing and adjusting the dial config for peerID2 through Adjust. - // dial config for peerID2 does not exist in the cache, so it must be initialized when using Adjust. - cfg, err = cache.Adjust(peerID2, adjustFuncIncrement) - require.NoError(t, err) - // adjusting a non-existing dial config must not initialize the config. - require.Equal(t, uint(2), cache.Size(), "cache size must be 2") - require.Equal(t, cfg.LastSuccessfulDial, dialConfigFixture().LastSuccessfulDial, "last successful dial must be 0") - require.Equal(t, cfg.DialRetryAttemptBudget, dialConfigFixture().DialRetryAttemptBudget+1, "dial backoff must be adjusted") - require.Equal(t, cfg.StreamCreationRetryAttemptBudget, dialConfigFixture().StreamCreationRetryAttemptBudget, "stream backoff must be 1") - - // Retrieving the dial config of peerID2 through GetOrInit. - // retrieve the dial config for peerID2 and assert than it is initialized with the default values; and the adjust function is applied. - cfg, err = cache.GetOrInit(peerID2) - require.NoError(t, err, "dial config must exist in the cache") - require.NotNil(t, cfg, "dial config must not be nil") - // retrieving an existing dial config must not change the cache size. - require.Equal(t, uint(2), cache.Size(), "cache size must be 2") - // config should be the same as the one returned by Adjust. - require.Equal(t, cfg.LastSuccessfulDial, dialConfigFixture().LastSuccessfulDial, "last successful dial must be 0") - require.Equal(t, cfg.DialRetryAttemptBudget, dialConfigFixture().DialRetryAttemptBudget+1, "dial backoff must be adjusted") - require.Equal(t, cfg.StreamCreationRetryAttemptBudget, dialConfigFixture().StreamCreationRetryAttemptBudget, "stream backoff must be 1") - - // Adjusting the dial config of peerID1 through Adjust. - // dial config for peerID1 already exists in the cache, so it must be adjusted when using Adjust. - cfg, err = cache.Adjust(peerID1, adjustFuncIncrement) - require.NoError(t, err) - // adjusting an existing dial config must not change the cache size. - require.Equal(t, uint(2), cache.Size(), "cache size must be 2") - require.Equal(t, cfg.LastSuccessfulDial, dialConfigFixture().LastSuccessfulDial, "last successful dial must be 0") - require.Equal(t, cfg.DialRetryAttemptBudget, dialConfigFixture().DialRetryAttemptBudget+1, "dial backoff must be adjusted") - require.Equal(t, cfg.StreamCreationRetryAttemptBudget, dialConfigFixture().StreamCreationRetryAttemptBudget, "stream backoff must be 1") - - // Recurring adjustment of the dial config of peerID1 through Adjust. - // dial config for peerID1 already exists in the cache, so it must be adjusted when using Adjust. - cfg, err = cache.Adjust(peerID1, adjustFuncIncrement) - require.NoError(t, err) - // adjusting an existing dial config must not change the cache size. - require.Equal(t, uint(2), cache.Size(), "cache size must be 2") - require.Equal(t, cfg.LastSuccessfulDial, dialConfigFixture().LastSuccessfulDial, "last successful dial must be 0") - require.Equal(t, cfg.DialRetryAttemptBudget, dialConfigFixture().DialRetryAttemptBudget+2, "dial backoff must be adjusted") - require.Equal(t, cfg.StreamCreationRetryAttemptBudget, dialConfigFixture().StreamCreationRetryAttemptBudget, "stream backoff must be 1") -} - -// TestDialConfigCache_Adjust tests the Adjust method of the DialConfigCache. It asserts that the dial config is adjusted, -// and stored in the cache as expected under concurrent adjustments. -func TestDialConfigCache_Concurrent_Adjust(t *testing.T) { - sizeLimit := uint32(100) - logger := zerolog.Nop() - collector := metrics.NewNoopCollector() - - cache := unicastcache.NewDialConfigCache(sizeLimit, logger, collector, func() unicast.DialConfig { - return unicast.DialConfig{} // empty dial config - }) - require.NotNil(t, cache) - require.Zerof(t, cache.Size(), "cache size must be 0") - - peerIds := make([]peer.ID, sizeLimit) - for i := 0; i < int(sizeLimit); i++ { - peerId := unittest.PeerIdFixture(t) - require.NotContainsf(t, peerIds, peerId, "peer id must be unique") - peerIds[i] = peerId - } - - wg := sync.WaitGroup{} - for i := 0; i < int(sizeLimit); i++ { - // adjusts the ith dial config for peerID i times, concurrently. - for j := 0; j < i+1; j++ { - wg.Add(1) - go func(peerId peer.ID) { - defer wg.Done() - _, err := cache.Adjust(peerId, func(cfg unicast.DialConfig) (unicast.DialConfig, error) { - cfg.DialRetryAttemptBudget++ - return cfg, nil - }) - require.NoError(t, err) - }(peerIds[i]) - } - } - - unittest.RequireReturnsBefore(t, wg.Wait, time.Second*1, "adjustments must be done on time") - - // assert that the cache size is equal to the size limit. - require.Equal(t, uint(sizeLimit), cache.Size(), "cache size must be equal to the size limit") - - // assert that the dial config for each peer is adjusted i times, concurrently. - for i := 0; i < int(sizeLimit); i++ { - wg.Add(1) - go func(j int) { - wg.Done() - - peerID := peerIds[j] - cfg, err := cache.GetOrInit(peerID) - require.NoError(t, err) - require.Equal(t, uint64(j+1), cfg.DialRetryAttemptBudget, fmt.Sprintf("peerId %s dial backoff must be adjusted %d times got: %d", peerID, j+1, cfg.DialRetryAttemptBudget)) - }(i) - } - - unittest.RequireReturnsBefore(t, wg.Wait, time.Second*1, "retrievals must be done on time") -} - -// TestConcurrent_Adjust_And_Get_Is_Safe tests that concurrent adjustments and retrievals are safe, and do not cause error even if they cause eviction. The test stress tests the cache -// with 2 * SizeLimit concurrent operations (SizeLimit times concurrent adjustments and SizeLimit times concurrent retrievals). -// It asserts that the cache size is equal to the size limit, and the dial config for each peer is adjusted and retrieved correctly. -func TestConcurrent_Adjust_And_Get_Is_Safe(t *testing.T) { - sizeLimit := uint32(100) - logger := zerolog.Nop() - collector := metrics.NewNoopCollector() - - cache := unicastcache.NewDialConfigCache(sizeLimit, logger, collector, dialConfigFixture) - require.NotNil(t, cache) - require.Zerof(t, cache.Size(), "cache size must be 0") - - wg := sync.WaitGroup{} - for i := 0; i < int(sizeLimit); i++ { - // concurrently adjusts the dial configs. - wg.Add(1) - go func() { - defer wg.Done() - peerId := unittest.PeerIdFixture(t) - dialTime := time.Now() - updatedConfig, err := cache.Adjust(peerId, func(cfg unicast.DialConfig) (unicast.DialConfig, error) { - cfg.DialRetryAttemptBudget = 1 // some random adjustment - cfg.LastSuccessfulDial = dialTime - cfg.StreamCreationRetryAttemptBudget = 2 // some random adjustment - cfg.ConsecutiveSuccessfulStream = 3 // some random adjustment - return cfg, nil - }) - require.NoError(t, err) // concurrent adjustment must not fail. - require.Equal(t, uint64(1), updatedConfig.DialRetryAttemptBudget) // adjustment must be successful - require.Equal(t, uint64(2), updatedConfig.StreamCreationRetryAttemptBudget) - require.Equal(t, uint64(3), updatedConfig.ConsecutiveSuccessfulStream) - require.Equal(t, dialTime, updatedConfig.LastSuccessfulDial) - }() - } - - // assert that the dial config for each peer is adjusted i times, concurrently. - for i := 0; i < int(sizeLimit); i++ { - wg.Add(1) - go func() { - wg.Done() - peerId := unittest.PeerIdFixture(t) - cfg, err := cache.GetOrInit(peerId) - require.NoError(t, err) // concurrent retrieval must not fail. - require.Equal(t, dialConfigFixture().DialRetryAttemptBudget, cfg.DialRetryAttemptBudget) // dial config must be initialized with the default values. - require.Equal(t, dialConfigFixture().StreamCreationRetryAttemptBudget, cfg.StreamCreationRetryAttemptBudget) - require.Equal(t, uint64(0), cfg.ConsecutiveSuccessfulStream) - require.True(t, cfg.LastSuccessfulDial.IsZero()) - }() - } - - unittest.RequireReturnsBefore(t, wg.Wait, time.Second*1, "all operations must be done on time") - - // cache was stress-tested with 2 * SizeLimit concurrent operations. Nevertheless, the cache size must be equal to the size limit due to LRU eviction. - require.Equal(t, uint(sizeLimit), cache.Size(), "cache size must be equal to the size limit") -} - -// TestDialConfigCache_LRU_Eviction tests that the cache evicts the least recently used dial config when the cache size reaches the size limit. -func TestDialConfigCache_LRU_Eviction(t *testing.T) { - sizeLimit := uint32(100) - logger := zerolog.Nop() - collector := metrics.NewNoopCollector() - - cache := unicastcache.NewDialConfigCache(sizeLimit, logger, collector, dialConfigFixture) - require.NotNil(t, cache) - require.Zerof(t, cache.Size(), "cache size must be 0") - - peerIds := make([]peer.ID, sizeLimit+1) - for i := 0; i < int(sizeLimit+1); i++ { - peerId := unittest.PeerIdFixture(t) - require.NotContainsf(t, peerIds, peerId, "peer id must be unique") - peerIds[i] = peerId - } - for i := 0; i < int(sizeLimit+1); i++ { - dialTime := time.Now() - updatedConfig, err := cache.Adjust(peerIds[i], func(cfg unicast.DialConfig) (unicast.DialConfig, error) { - cfg.DialRetryAttemptBudget = 1 // some random adjustment - cfg.StreamCreationRetryAttemptBudget = 2 // some random adjustment - cfg.ConsecutiveSuccessfulStream = 3 // some random adjustment - cfg.LastSuccessfulDial = dialTime - return cfg, nil - }) - require.NoError(t, err) // concurrent adjustment must not fail. - require.Equal(t, uint64(1), updatedConfig.DialRetryAttemptBudget) // adjustment must be successful - require.Equal(t, uint64(2), updatedConfig.StreamCreationRetryAttemptBudget) - require.Equal(t, uint64(3), updatedConfig.ConsecutiveSuccessfulStream) - require.Equal(t, dialTime, updatedConfig.LastSuccessfulDial) - } - - // except the first peer id, all other peer ids should stay intact in the cache. - for i := 1; i < int(sizeLimit+1); i++ { - cfg, err := cache.GetOrInit(peerIds[i]) - require.NoError(t, err) - require.Equal(t, uint64(1), cfg.DialRetryAttemptBudget) - require.Equal(t, uint64(2), cfg.StreamCreationRetryAttemptBudget) - require.Equal(t, uint64(3), cfg.ConsecutiveSuccessfulStream) - require.False(t, cfg.LastSuccessfulDial.IsZero()) - } - - require.Equal(t, uint(sizeLimit), cache.Size(), "cache size must be equal to the size limit") - - // querying the first peer id should return a fresh dial config, since it should be evicted due to LRU eviction, and the initiated with the default values. - cfg, err := cache.GetOrInit(peerIds[0]) - require.NoError(t, err) - require.Equal(t, dialConfigFixture().DialRetryAttemptBudget, cfg.DialRetryAttemptBudget) - require.Equal(t, dialConfigFixture().StreamCreationRetryAttemptBudget, cfg.StreamCreationRetryAttemptBudget) - require.Equal(t, uint64(0), cfg.ConsecutiveSuccessfulStream) - require.True(t, cfg.LastSuccessfulDial.IsZero()) - - require.Equal(t, uint(sizeLimit), cache.Size(), "cache size must be equal to the size limit") -} diff --git a/network/p2p/unicast/cache/dialConfigCache.go b/network/p2p/unicast/cache/unicastConfigCache.go similarity index 54% rename from network/p2p/unicast/cache/dialConfigCache.go rename to network/p2p/unicast/cache/unicastConfigCache.go index d0e88e43786..13c000110fe 100644 --- a/network/p2p/unicast/cache/dialConfigCache.go +++ b/network/p2p/unicast/cache/unicastConfigCache.go @@ -15,74 +15,74 @@ import ( "github.com/onflow/flow-go/network/p2p/unicast" ) -// ErrDialConfigNotFound is a benign error that indicates that the dial config does not exist in the cache. It is not a fatal error. -var ErrDialConfigNotFound = fmt.Errorf("dial config not found") +// ErrUnicastConfigNotFound is a benign error that indicates that the unicast config does not exist in the cache. It is not a fatal error. +var ErrUnicastConfigNotFound = fmt.Errorf("unicast config not found") -type DialConfigCache struct { +type UnicastConfigCache struct { // mutex is temporarily protect the edge case in HeroCache that optimistic adjustment causes the cache to be full. // TODO: remove this mutex after the HeroCache is fixed. mutex sync.RWMutex peerCache *stdmap.Backend - cfgFactory func() unicast.DialConfig // factory function that creates a new dial config. + cfgFactory func() unicast.Config // factory function that creates a new unicast config. } -var _ unicast.DialConfigCache = (*DialConfigCache)(nil) +var _ unicast.ConfigCache = (*UnicastConfigCache)(nil) -// NewDialConfigCache creates a new DialConfigCache. +// NewUnicastConfigCache creates a new UnicastConfigCache. // Args: -// - size: the maximum number of dial configs that the cache can hold. +// - size: the maximum number of unicast configs that the cache can hold. // - logger: the logger used by the cache. // - collector: the metrics collector used by the cache. -// - cfgFactory: a factory function that creates a new dial config. +// - cfgFactory: a factory function that creates a new unicast config. // Returns: -// - *DialConfigCache, the created cache. -// Note that the cache is supposed to keep the dial config for all types of nodes. Since the number of such nodes is -// expected to be small, size must be large enough to hold all the dial configs of the authorized nodes. +// - *UnicastConfigCache, the created cache. +// Note that the cache is supposed to keep the unicast config for all types of nodes. Since the number of such nodes is +// expected to be small, size must be large enough to hold all the unicast configs of the authorized nodes. // To avoid any crash-failure, the cache is configured to eject the least recently used configs when the cache is full. // Hence, we recommend setting the size to a large value to minimize the ejections. -func NewDialConfigCache( +func NewUnicastConfigCache( size uint32, logger zerolog.Logger, collector module.HeroCacheMetrics, - cfgFactory func() unicast.DialConfig, -) *DialConfigCache { - return &DialConfigCache{ + cfgFactory func() unicast.Config, +) *UnicastConfigCache { + return &UnicastConfigCache{ peerCache: stdmap.NewBackend(stdmap.WithBackData(herocache.NewCache(size, herocache.DefaultOversizeFactor, heropool.LRUEjection, - logger.With().Str("module", "dial-config-cache").Logger(), + logger.With().Str("module", "unicast-config-cache").Logger(), collector))), cfgFactory: cfgFactory, } } -// Adjust applies the given adjust function to the dial config of the given peer ID, and stores the adjusted config in the cache. +// Adjust applies the given adjust function to the unicast config of the given peer ID, and stores the adjusted config in the cache. // It returns an error if the adjustFunc returns an error. // Note that if the Adjust is called when the config does not exist, the config is initialized and the // adjust function is applied to the initialized config again. In this case, the adjust function should not return an error. // Args: -// - peerID: the peer id of the dial config. -// - adjustFunc: the function that adjusts the dial config. +// - peerID: the peer id of the unicast config. +// - adjustFunc: the function that adjusts the unicast config. // Returns: // - error any returned error should be considered as an irrecoverable error and indicates a bug. -func (d *DialConfigCache) Adjust(peerID peer.ID, adjustFunc unicast.DialConfigAdjustFunc) (*unicast.DialConfig, error) { +func (d *UnicastConfigCache) Adjust(peerID peer.ID, adjustFunc unicast.UnicastConfigAdjustFunc) (*unicast.Config, error) { d.mutex.Lock() // making optimistic adjustment atomic. defer d.mutex.Unlock() // first we translate the peer id to a flow id (taking peerIdHash := PeerIdToFlowId(peerID) - adjustedDialCfg, err := d.adjust(peerIdHash, adjustFunc) + adjustedUnicastCfg, err := d.adjust(peerIdHash, adjustFunc) if err != nil { - if err == ErrDialConfigNotFound { + if err == ErrUnicastConfigNotFound { // if the config does not exist, we initialize the config and try to adjust it again. // Note: there is an edge case where the config is initialized by another goroutine between the two calls. // In this case, the init function is invoked twice, but it is not a problem because the underlying // cache is thread-safe. Hence, we do not need to synchronize the two calls. In such cases, one of the // two calls returns false, and the other call returns true. We do not care which call returns false, hence, // we ignore the return value of the init function. - e := DialConfigEntity{ - PeerId: peerID, - DialConfig: d.cfgFactory(), + e := UnicastConfigEntity{ + PeerId: peerID, + Config: d.cfgFactory(), } _ = d.peerCache.Add(e) @@ -93,39 +93,39 @@ func (d *DialConfigCache) Adjust(peerID peer.ID, adjustFunc unicast.DialConfigAd } // if the adjust function returns an unexpected error on the first attempt, we return the error directly. // any returned error should be considered as an irrecoverable error and indicates a bug. - return nil, fmt.Errorf("failed to adjust dial config: %w", err) + return nil, fmt.Errorf("failed to adjust unicast config: %w", err) } // if the adjust function returns no error on the first attempt, we return the adjusted config. - return adjustedDialCfg, nil + return adjustedUnicastCfg, nil } -// adjust applies the given adjust function to the dial config of the given origin id. +// adjust applies the given adjust function to the unicast config of the given origin id. // It returns an error if the adjustFunc returns an error or if the config does not exist. // Args: -// - peerIDHash: the hash value of the peer id of the dial config (i.e., the ID of the dial config entity). -// - adjustFunc: the function that adjusts the dial config. +// - peerIDHash: the hash value of the peer id of the unicast config (i.e., the ID of the unicast config entity). +// - adjustFunc: the function that adjusts the unicast config. // Returns: -// - error if the adjustFunc returns an error or if the config does not exist (ErrDialConfigNotFound). Except the ErrDialConfigNotFound, +// - error if the adjustFunc returns an error or if the config does not exist (ErrUnicastConfigNotFound). Except the ErrUnicastConfigNotFound, // any other error should be treated as an irrecoverable error and indicates a bug. -func (d *DialConfigCache) adjust(peerIdHash flow.Identifier, adjustFunc unicast.DialConfigAdjustFunc) (*unicast.DialConfig, error) { +func (d *UnicastConfigCache) adjust(peerIdHash flow.Identifier, adjustFunc unicast.UnicastConfigAdjustFunc) (*unicast.Config, error) { var rErr error adjustedEntity, adjusted := d.peerCache.Adjust(peerIdHash, func(entity flow.Entity) flow.Entity { - cfgEntity, ok := entity.(DialConfigEntity) + cfgEntity, ok := entity.(UnicastConfigEntity) if !ok { // sanity check - // This should never happen, because the cache only contains DialConfigEntity entities. - panic(fmt.Sprintf("invalid entity type, expected DialConfigEntity type, got: %T", entity)) + // This should never happen, because the cache only contains UnicastConfigEntity entities. + panic(fmt.Sprintf("invalid entity type, expected UnicastConfigEntity type, got: %T", entity)) } - // adjust the dial config. - adjustedCfg, err := adjustFunc(cfgEntity.DialConfig) + // adjust the unicast config. + adjustedCfg, err := adjustFunc(cfgEntity.Config) if err != nil { rErr = fmt.Errorf("adjust function failed: %w", err) return entity // returns the original entity (reverse the adjustment). } // Return the adjusted config. - cfgEntity.DialConfig = adjustedCfg + cfgEntity.Config = adjustedCfg return cfgEntity }) @@ -134,65 +134,61 @@ func (d *DialConfigCache) adjust(peerIdHash flow.Identifier, adjustFunc unicast. } if !adjusted { - return nil, ErrDialConfigNotFound + return nil, ErrUnicastConfigNotFound } - return &unicast.DialConfig{ - DialRetryAttemptBudget: adjustedEntity.(DialConfigEntity).DialRetryAttemptBudget, - StreamCreationRetryAttemptBudget: adjustedEntity.(DialConfigEntity).StreamCreationRetryAttemptBudget, - LastSuccessfulDial: adjustedEntity.(DialConfigEntity).LastSuccessfulDial, - ConsecutiveSuccessfulStream: adjustedEntity.(DialConfigEntity).ConsecutiveSuccessfulStream, + return &unicast.Config{ + StreamCreationRetryAttemptBudget: adjustedEntity.(UnicastConfigEntity).StreamCreationRetryAttemptBudget, + ConsecutiveSuccessfulStream: adjustedEntity.(UnicastConfigEntity).ConsecutiveSuccessfulStream, }, nil } -// GetOrInit returns the dial config for the given peer id. If the config does not exist, it creates a new config +// GetOrInit returns the unicast config for the given peer id. If the config does not exist, it creates a new config // using the factory function and stores it in the cache. // Args: -// - peerID: the peer id of the dial config. +// - peerID: the peer id of the unicast config. // Returns: -// - *DialConfig, the dial config for the given peer id. +// - *Config, the unicast config for the given peer id. // - error if the factory function returns an error. Any error should be treated as an irrecoverable error and indicates a bug. -func (d *DialConfigCache) GetOrInit(peerID peer.ID) (*unicast.DialConfig, error) { +func (d *UnicastConfigCache) GetOrInit(peerID peer.ID) (*unicast.Config, error) { // first we translate the peer id to a flow id (taking flowPeerId := PeerIdToFlowId(peerID) cfg, ok := d.get(flowPeerId) if !ok { - _ = d.peerCache.Add(DialConfigEntity{ - PeerId: peerID, - DialConfig: d.cfgFactory(), + _ = d.peerCache.Add(UnicastConfigEntity{ + PeerId: peerID, + Config: d.cfgFactory(), }) cfg, ok = d.get(flowPeerId) if !ok { - return nil, fmt.Errorf("failed to initialize dial config for peer %s", peerID) + return nil, fmt.Errorf("failed to initialize unicast config for peer %s", peerID) } } return cfg, nil } -// Get returns the dial config of the given peer ID. -func (d *DialConfigCache) get(peerIDHash flow.Identifier) (*unicast.DialConfig, bool) { +// Get returns the unicast config of the given peer ID. +func (d *UnicastConfigCache) get(peerIDHash flow.Identifier) (*unicast.Config, bool) { entity, ok := d.peerCache.ByID(peerIDHash) if !ok { return nil, false } - cfg, ok := entity.(DialConfigEntity) + cfg, ok := entity.(UnicastConfigEntity) if !ok { // sanity check - // This should never happen, because the cache only contains DialConfigEntity entities. - panic(fmt.Sprintf("invalid entity type, expected DialConfigEntity type, got: %T", entity)) + // This should never happen, because the cache only contains UnicastConfigEntity entities. + panic(fmt.Sprintf("invalid entity type, expected UnicastConfigEntity type, got: %T", entity)) } // return a copy of the config (we do not want the caller to modify the config). - return &unicast.DialConfig{ - DialRetryAttemptBudget: cfg.DialRetryAttemptBudget, + return &unicast.Config{ StreamCreationRetryAttemptBudget: cfg.StreamCreationRetryAttemptBudget, - LastSuccessfulDial: cfg.LastSuccessfulDial, ConsecutiveSuccessfulStream: cfg.ConsecutiveSuccessfulStream, }, true } -// Size returns the number of dial configs in the cache. -func (d *DialConfigCache) Size() uint { +// Size returns the number of unicast configs in the cache. +func (d *UnicastConfigCache) Size() uint { return d.peerCache.Size() } diff --git a/network/p2p/unicast/cache/unicastConfigCache_test.go b/network/p2p/unicast/cache/unicastConfigCache_test.go new file mode 100644 index 00000000000..4d07c9980d2 --- /dev/null +++ b/network/p2p/unicast/cache/unicastConfigCache_test.go @@ -0,0 +1,260 @@ +package unicastcache_test + +import ( + "fmt" + "sync" + "testing" + "time" + + "github.com/libp2p/go-libp2p/core/peer" + "github.com/rs/zerolog" + "github.com/stretchr/testify/require" + + "github.com/onflow/flow-go/module/metrics" + "github.com/onflow/flow-go/network/p2p/unicast" + unicastcache "github.com/onflow/flow-go/network/p2p/unicast/cache" + "github.com/onflow/flow-go/utils/unittest" +) + +// TestNewUnicastConfigCache tests the creation of a new UnicastConfigCache. +// It asserts that the cache is created and its size is 0. +func TestNewUnicastConfigCache(t *testing.T) { + sizeLimit := uint32(100) + logger := zerolog.Nop() + collector := metrics.NewNoopCollector() + cache := unicastcache.NewUnicastConfigCache(sizeLimit, logger, collector, unicastConfigFixture) + require.NotNil(t, cache) + require.Equalf(t, uint(0), cache.Size(), "cache size must be 0") +} + +// unicastConfigFixture returns a unicast config fixture. +// The unicast config is initialized with the default values. +func unicastConfigFixture() unicast.Config { + return unicast.Config{ + StreamCreationRetryAttemptBudget: 3, + } +} + +// TestUnicastConfigCache_Adjust tests the Adjust method of the UnicastConfigCache. It asserts that the unicast config is initialized, adjusted, +// and stored in the cache. +func TestUnicastConfigCache_Adjust_Init(t *testing.T) { + sizeLimit := uint32(100) + logger := zerolog.Nop() + collector := metrics.NewNoopCollector() + + unicastFactoryCalled := 0 + unicastConfigFactory := func() unicast.Config { + require.Less(t, unicastFactoryCalled, 2, "unicast config factory must be called at most twice") + unicastFactoryCalled++ + return unicastConfigFixture() + } + adjustFuncIncrement := func(cfg unicast.Config) (unicast.Config, error) { + cfg.StreamCreationRetryAttemptBudget++ + return cfg, nil + } + + cache := unicastcache.NewUnicastConfigCache(sizeLimit, logger, collector, unicastConfigFactory) + require.NotNil(t, cache) + require.Zerof(t, cache.Size(), "cache size must be 0") + + peerID1 := unittest.PeerIdFixture(t) + peerID2 := unittest.PeerIdFixture(t) + + // Initializing the unicast config for peerID1 through GetOrInit. + // unicast config for peerID1 does not exist in the cache, so it must be initialized when using GetOrInit. + cfg, err := cache.GetOrInit(peerID1) + require.NoError(t, err) + require.NotNil(t, cfg, "unicast config must not be nil") + require.Equal(t, unicastConfigFixture(), *cfg, "unicast config must be initialized with the default values") + require.Equal(t, uint(1), cache.Size(), "cache size must be 1") + + // Initializing and adjusting the unicast config for peerID2 through Adjust. + // unicast config for peerID2 does not exist in the cache, so it must be initialized when using Adjust. + cfg, err = cache.Adjust(peerID2, adjustFuncIncrement) + require.NoError(t, err) + // adjusting a non-existing unicast config must not initialize the config. + require.Equal(t, uint(2), cache.Size(), "cache size must be 2") + require.Equal(t, cfg.StreamCreationRetryAttemptBudget, unicastConfigFixture().StreamCreationRetryAttemptBudget+1, "stream backoff must be 2") + + // Retrieving the unicast config of peerID2 through GetOrInit. + // retrieve the unicast config for peerID2 and assert than it is initialized with the default values; and the adjust function is applied. + cfg, err = cache.GetOrInit(peerID2) + require.NoError(t, err, "unicast config must exist in the cache") + require.NotNil(t, cfg, "unicast config must not be nil") + // retrieving an existing unicast config must not change the cache size. + require.Equal(t, uint(2), cache.Size(), "cache size must be 2") + // config should be the same as the one returned by Adjust. + require.Equal(t, cfg.StreamCreationRetryAttemptBudget, unicastConfigFixture().StreamCreationRetryAttemptBudget+1, "stream backoff must be 2") + + // Adjusting the unicast config of peerID1 through Adjust. + // unicast config for peerID1 already exists in the cache, so it must be adjusted when using Adjust. + cfg, err = cache.Adjust(peerID1, adjustFuncIncrement) + require.NoError(t, err) + // adjusting an existing unicast config must not change the cache size. + require.Equal(t, uint(2), cache.Size(), "cache size must be 2") + require.Equal(t, cfg.StreamCreationRetryAttemptBudget, unicastConfigFixture().StreamCreationRetryAttemptBudget+1, "stream backoff must be 2") + + // Recurring adjustment of the unicast config of peerID1 through Adjust. + // unicast config for peerID1 already exists in the cache, so it must be adjusted when using Adjust. + cfg, err = cache.Adjust(peerID1, adjustFuncIncrement) + require.NoError(t, err) + // adjusting an existing unicast config must not change the cache size. + require.Equal(t, uint(2), cache.Size(), "cache size must be 2") + require.Equal(t, cfg.StreamCreationRetryAttemptBudget, unicastConfigFixture().StreamCreationRetryAttemptBudget+2, "stream backoff must be 3") +} + +// TestUnicastConfigCache_Adjust tests the Adjust method of the UnicastConfigCache. It asserts that the unicast config is adjusted, +// and stored in the cache as expected under concurrent adjustments. +func TestUnicastConfigCache_Concurrent_Adjust(t *testing.T) { + sizeLimit := uint32(100) + logger := zerolog.Nop() + collector := metrics.NewNoopCollector() + + cache := unicastcache.NewUnicastConfigCache(sizeLimit, logger, collector, func() unicast.Config { + return unicast.Config{} // empty unicast config + }) + require.NotNil(t, cache) + require.Zerof(t, cache.Size(), "cache size must be 0") + + peerIds := make([]peer.ID, sizeLimit) + for i := 0; i < int(sizeLimit); i++ { + peerId := unittest.PeerIdFixture(t) + require.NotContainsf(t, peerIds, peerId, "peer id must be unique") + peerIds[i] = peerId + } + + wg := sync.WaitGroup{} + for i := 0; i < int(sizeLimit); i++ { + // adjusts the ith unicast config for peerID i times, concurrently. + for j := 0; j < i+1; j++ { + wg.Add(1) + go func(peerId peer.ID) { + defer wg.Done() + _, err := cache.Adjust(peerId, func(cfg unicast.Config) (unicast.Config, error) { + cfg.StreamCreationRetryAttemptBudget++ + return cfg, nil + }) + require.NoError(t, err) + }(peerIds[i]) + } + } + + unittest.RequireReturnsBefore(t, wg.Wait, time.Second*1, "adjustments must be done on time") + + // assert that the cache size is equal to the size limit. + require.Equal(t, uint(sizeLimit), cache.Size(), "cache size must be equal to the size limit") + + // assert that the unicast config for each peer is adjusted i times, concurrently. + for i := 0; i < int(sizeLimit); i++ { + wg.Add(1) + go func(j int) { + wg.Done() + + peerID := peerIds[j] + cfg, err := cache.GetOrInit(peerID) + require.NoError(t, err) + require.Equal(t, + uint64(j+1), + cfg.StreamCreationRetryAttemptBudget, + fmt.Sprintf("peerId %s unicast backoff must be adjusted %d times got: %d", peerID, j+1, cfg.StreamCreationRetryAttemptBudget)) + }(i) + } + + unittest.RequireReturnsBefore(t, wg.Wait, time.Second*1, "retrievals must be done on time") +} + +// TestConcurrent_Adjust_And_Get_Is_Safe tests that concurrent adjustments and retrievals are safe, and do not cause error even if they cause eviction. The test stress tests the cache +// with 2 * SizeLimit concurrent operations (SizeLimit times concurrent adjustments and SizeLimit times concurrent retrievals). +// It asserts that the cache size is equal to the size limit, and the unicast config for each peer is adjusted and retrieved correctly. +func TestConcurrent_Adjust_And_Get_Is_Safe(t *testing.T) { + sizeLimit := uint32(100) + logger := zerolog.Nop() + collector := metrics.NewNoopCollector() + + cache := unicastcache.NewUnicastConfigCache(sizeLimit, logger, collector, unicastConfigFixture) + require.NotNil(t, cache) + require.Zerof(t, cache.Size(), "cache size must be 0") + + wg := sync.WaitGroup{} + for i := 0; i < int(sizeLimit); i++ { + // concurrently adjusts the unicast configs. + wg.Add(1) + go func() { + defer wg.Done() + peerId := unittest.PeerIdFixture(t) + updatedConfig, err := cache.Adjust(peerId, func(cfg unicast.Config) (unicast.Config, error) { + cfg.StreamCreationRetryAttemptBudget = 2 // some random adjustment + cfg.ConsecutiveSuccessfulStream = 3 // some random adjustment + return cfg, nil + }) + require.NoError(t, err) // concurrent adjustment must not fail. + require.Equal(t, uint64(2), updatedConfig.StreamCreationRetryAttemptBudget) + require.Equal(t, uint64(3), updatedConfig.ConsecutiveSuccessfulStream) + }() + } + + // assert that the unicast config for each peer is adjusted i times, concurrently. + for i := 0; i < int(sizeLimit); i++ { + wg.Add(1) + go func() { + wg.Done() + peerId := unittest.PeerIdFixture(t) + cfg, err := cache.GetOrInit(peerId) + require.NoError(t, err) // concurrent retrieval must not fail. + require.Equal(t, unicastConfigFixture().StreamCreationRetryAttemptBudget, cfg.StreamCreationRetryAttemptBudget) + require.Equal(t, uint64(0), cfg.ConsecutiveSuccessfulStream) + }() + } + + unittest.RequireReturnsBefore(t, wg.Wait, time.Second*1, "all operations must be done on time") + + // cache was stress-tested with 2 * SizeLimit concurrent operations. Nevertheless, the cache size must be equal to the size limit due to LRU eviction. + require.Equal(t, uint(sizeLimit), cache.Size(), "cache size must be equal to the size limit") +} + +// TestUnicastConfigCache_LRU_Eviction tests that the cache evicts the least recently used unicast config when the cache size reaches the size limit. +func TestUnicastConfigCache_LRU_Eviction(t *testing.T) { + sizeLimit := uint32(100) + logger := zerolog.Nop() + collector := metrics.NewNoopCollector() + + cache := unicastcache.NewUnicastConfigCache(sizeLimit, logger, collector, unicastConfigFixture) + require.NotNil(t, cache) + require.Zerof(t, cache.Size(), "cache size must be 0") + + peerIds := make([]peer.ID, sizeLimit+1) + for i := 0; i < int(sizeLimit+1); i++ { + peerId := unittest.PeerIdFixture(t) + require.NotContainsf(t, peerIds, peerId, "peer id must be unique") + peerIds[i] = peerId + } + for i := 0; i < int(sizeLimit+1); i++ { + updatedConfig, err := cache.Adjust(peerIds[i], func(cfg unicast.Config) (unicast.Config, error) { + cfg.StreamCreationRetryAttemptBudget = 2 // some random adjustment + cfg.ConsecutiveSuccessfulStream = 3 // some random adjustment + return cfg, nil + }) + require.NoError(t, err) // concurrent adjustment must not fail. + require.Equal(t, uint64(2), updatedConfig.StreamCreationRetryAttemptBudget) + require.Equal(t, uint64(3), updatedConfig.ConsecutiveSuccessfulStream) + } + + // except the first peer id, all other peer ids should stay intact in the cache. + for i := 1; i < int(sizeLimit+1); i++ { + cfg, err := cache.GetOrInit(peerIds[i]) + require.NoError(t, err) + require.Equal(t, uint64(2), cfg.StreamCreationRetryAttemptBudget) + require.Equal(t, uint64(3), cfg.ConsecutiveSuccessfulStream) + } + + require.Equal(t, uint(sizeLimit), cache.Size(), "cache size must be equal to the size limit") + + // querying the first peer id should return a fresh unicast config, + // since it should be evicted due to LRU eviction, and the initiated with the default values. + cfg, err := cache.GetOrInit(peerIds[0]) + require.NoError(t, err) + require.Equal(t, unicastConfigFixture().StreamCreationRetryAttemptBudget, cfg.StreamCreationRetryAttemptBudget) + require.Equal(t, uint64(0), cfg.ConsecutiveSuccessfulStream) + + require.Equal(t, uint(sizeLimit), cache.Size(), "cache size must be equal to the size limit") +} diff --git a/network/p2p/unicast/cache/dialConfigEntity.go b/network/p2p/unicast/cache/unicastConfigEntity.go similarity index 60% rename from network/p2p/unicast/cache/dialConfigEntity.go rename to network/p2p/unicast/cache/unicastConfigEntity.go index 71a9b6844c9..c1db31523fe 100644 --- a/network/p2p/unicast/cache/dialConfigEntity.go +++ b/network/p2p/unicast/cache/unicastConfigEntity.go @@ -7,18 +7,18 @@ import ( "github.com/onflow/flow-go/network/p2p/unicast" ) -// DialConfigEntity is a struct that represents a dial config entry for storing in the dial config cache. +// UnicastConfigEntity is a struct that represents a unicast config entry for storing in the unicast config cache. // It implements the flow.Entity interface. -type DialConfigEntity struct { - unicast.DialConfig - PeerId peer.ID // remote peer id; used as the "key" in the dial config cache. +type UnicastConfigEntity struct { + unicast.Config + PeerId peer.ID // remote peer id; used as the "key" in the unicast config cache. id flow.Identifier // cache the id for fast lookup (HeroCache). } -var _ flow.Entity = (*DialConfigEntity)(nil) +var _ flow.Entity = (*UnicastConfigEntity)(nil) -// ID returns the ID of the dial config entity; it is hash value of the peer id. -func (d DialConfigEntity) ID() flow.Identifier { +// ID returns the ID of the unicast config entity; it is hash value of the peer id. +func (d UnicastConfigEntity) ID() flow.Identifier { if d.id == flow.ZeroID { d.id = PeerIdToFlowId(d.PeerId) } @@ -26,7 +26,7 @@ func (d DialConfigEntity) ID() flow.Identifier { } // Checksum acts the same as ID. -func (d DialConfigEntity) Checksum() flow.Identifier { +func (d UnicastConfigEntity) Checksum() flow.Identifier { return d.ID() } diff --git a/network/p2p/unicast/cache/dialConfigEntity_test.go b/network/p2p/unicast/cache/unicastConfigEntity_test.go similarity index 64% rename from network/p2p/unicast/cache/dialConfigEntity_test.go rename to network/p2p/unicast/cache/unicastConfigEntity_test.go index 87da16248c4..d7bad635c04 100644 --- a/network/p2p/unicast/cache/dialConfigEntity_test.go +++ b/network/p2p/unicast/cache/unicastConfigEntity_test.go @@ -2,7 +2,6 @@ package unicastcache_test import ( "testing" - "time" "github.com/stretchr/testify/require" @@ -11,16 +10,14 @@ import ( "github.com/onflow/flow-go/utils/unittest" ) -// TestDialConfigEntity tests the DialConfigEntity struct and its methods. -func TestDialConfigEntity(t *testing.T) { +// TestUnicastConfigEntity tests the UnicastConfigEntity struct and its methods. +func TestUnicastConfigEntity(t *testing.T) { peerID := unittest.PeerIdFixture(t) - d := &unicastcache.DialConfigEntity{ + d := &unicastcache.UnicastConfigEntity{ PeerId: peerID, - DialConfig: unicast.DialConfig{ - DialRetryAttemptBudget: 10, + Config: unicast.Config{ StreamCreationRetryAttemptBudget: 20, - LastSuccessfulDial: time.Now(), ConsecutiveSuccessfulStream: 30, }, } @@ -39,20 +36,18 @@ func TestDialConfigEntity(t *testing.T) { ) t.Run("ID is only calculated from peer.ID", func(t *testing.T) { - d2 := &unicastcache.DialConfigEntity{ - PeerId: unittest.PeerIdFixture(t), - DialConfig: d.DialConfig, + d2 := &unicastcache.UnicastConfigEntity{ + PeerId: unittest.PeerIdFixture(t), + Config: d.Config, } require.NotEqual(t, d.ID(), d2.ID()) // different peer id, different id. - d3 := &unicastcache.DialConfigEntity{ + d3 := &unicastcache.UnicastConfigEntity{ PeerId: d.PeerId, - DialConfig: unicast.DialConfig{ - DialRetryAttemptBudget: 100, + Config: unicast.Config{ StreamCreationRetryAttemptBudget: 200, - LastSuccessfulDial: time.Now(), }, } - require.Equal(t, d.ID(), d3.ID()) // same peer id, same id, even though the dial config is different. + require.Equal(t, d.ID(), d3.ID()) // same peer id, same id, even though the unicast config is different. }) } diff --git a/network/p2p/unicast/dialConfig.go b/network/p2p/unicast/dialConfig.go index 7d53e829a29..e88c4fd7554 100644 --- a/network/p2p/unicast/dialConfig.go +++ b/network/p2p/unicast/dialConfig.go @@ -1,17 +1,13 @@ package unicast -import "time" - -// DialConfig is a struct that represents the dial config for a peer. -type DialConfig struct { - DialRetryAttemptBudget uint64 // number of times we have to try to dial the peer before we give up. - StreamCreationRetryAttemptBudget uint64 // number of times we have to try to open a stream to the peer before we give up. - LastSuccessfulDial time.Time // timestamp of the last successful dial to the peer. - ConsecutiveSuccessfulStream uint64 // consecutive number of successful streams to the peer since the last time stream creation failed. +// Config is a struct that represents the dial config for a peer. +type Config struct { + StreamCreationRetryAttemptBudget uint64 // number of times we have to try to open a stream to the peer before we give up. + ConsecutiveSuccessfulStream uint64 // consecutive number of successful streams to the peer since the last time stream creation failed. } -// DialConfigAdjustFunc is a function that is used to adjust the fields of a DialConfigEntity. +// UnicastConfigAdjustFunc is a function that is used to adjust the fields of a DialConfigEntity. // The function is called with the current config and should return the adjusted record. // Returned error indicates that the adjustment is not applied, and the config should not be updated. // In BFT setup, the returned error should be treated as a fatal error. -type DialConfigAdjustFunc func(DialConfig) (DialConfig, error) +type UnicastConfigAdjustFunc func(Config) (Config, error) diff --git a/network/p2p/unicast/dialConfigCache.go b/network/p2p/unicast/dialConfigCache.go index 879e2756d49..fc4c3199b5b 100644 --- a/network/p2p/unicast/dialConfigCache.go +++ b/network/p2p/unicast/dialConfigCache.go @@ -4,17 +4,17 @@ import ( "github.com/libp2p/go-libp2p/core/peer" ) -// DialConfigCache is a thread-safe cache for dial configs. It is used by the unicast service to store +// ConfigCache is a thread-safe cache for dial configs. It is used by the unicast service to store // the dial configs for peers. -type DialConfigCache interface { +type ConfigCache interface { // GetOrInit returns the dial config for the given peer id. If the config does not exist, it creates a new config // using the factory function and stores it in the cache. // Args: // - peerID: the peer id of the dial config. // Returns: - // - *DialConfig, the dial config for the given peer id. + // - *Config, the dial config for the given peer id. // - error if the factory function returns an error. Any error should be treated as an irrecoverable error and indicates a bug. - GetOrInit(peerID peer.ID) (*DialConfig, error) + GetOrInit(peerID peer.ID) (*Config, error) // Adjust adjusts the dial config for the given peer id using the given adjustFunc. // It returns an error if the adjustFunc returns an error. @@ -23,7 +23,7 @@ type DialConfigCache interface { // - adjustFunc: the function that adjusts the dial config. // Returns: // - error if the adjustFunc returns an error. Any error should be treated as an irrecoverable error and indicates a bug. - Adjust(peerID peer.ID, adjustFunc DialConfigAdjustFunc) (*DialConfig, error) + Adjust(peerID peer.ID, adjustFunc UnicastConfigAdjustFunc) (*Config, error) // Size returns the number of dial configs in the cache. Size() uint diff --git a/network/p2p/unicast/errors.go b/network/p2p/unicast/errors.go index d8abb2624f7..99bb8bdeaed 100644 --- a/network/p2p/unicast/errors.go +++ b/network/p2p/unicast/errors.go @@ -3,32 +3,8 @@ package unicast import ( "errors" "fmt" - - "github.com/libp2p/go-libp2p/core/peer" - - "github.com/onflow/flow-go/network/p2p/p2plogging" ) -// ErrDialInProgress indicates that the libp2p node is currently dialing the peer. -type ErrDialInProgress struct { - pid peer.ID -} - -func (e ErrDialInProgress) Error() string { - return fmt.Sprintf("dialing to peer %s already in progress", p2plogging.PeerId(e.pid)) -} - -// NewDialInProgressErr returns a new ErrDialInProgress. -func NewDialInProgressErr(pid peer.ID) ErrDialInProgress { - return ErrDialInProgress{pid: pid} -} - -// IsErrDialInProgress returns whether an error is ErrDialInProgress -func IsErrDialInProgress(err error) bool { - var e ErrDialInProgress - return errors.As(err, &e) -} - // ErrMaxRetries indicates retries completed with max retries without a successful attempt. type ErrMaxRetries struct { attempts uint64 diff --git a/network/p2p/unicast/manager.go b/network/p2p/unicast/manager.go index b5dea5de02b..16b4dce703b 100644 --- a/network/p2p/unicast/manager.go +++ b/network/p2p/unicast/manager.go @@ -4,7 +4,6 @@ import ( "context" "errors" "fmt" - "sync" "time" "github.com/go-playground/validator/v10" @@ -34,7 +33,7 @@ var ( _ p2p.UnicastManager = (*Manager)(nil) ) -type DialConfigCacheFactory func(configFactory func() DialConfig) DialConfigCache +type DialConfigCacheFactory func(configFactory func() Config) ConfigCache // Manager manages libp2p stream negotiation and creation, which is utilized for unicast dispatches. type Manager struct { @@ -43,8 +42,6 @@ type Manager struct { protocols []protocols.Protocol defaultHandler libp2pnet.StreamHandler sporkId flow.Identifier - connStatus p2p.PeerConnections - peerDialing sync.Map metrics module.UnicastManagerMetrics // createStreamBackoffDelay is the delay between each stream creation retry attempt. @@ -52,19 +49,9 @@ type Manager struct { // is the initial delay between each retry attempt. The delay is doubled after each retry attempt. createStreamBackoffDelay time.Duration - // dialInProgressBackoffDelay is the backoff delay for parallel attempts on dialing to the same peer. - // When the unicast manager is invoked to create stream to the same peer concurrently while there is - // already an ongoing dialing attempt to the same peer, the unicast manager will wait for this backoff delay - // and retry creating the stream after the backoff delay has elapsed. This is to prevent the unicast manager - // from creating too many parallel dialing attempts to the same peer. - dialInProgressBackoffDelay time.Duration - - // dialBackoffDelay is the backoff delay between retrying connection to the same peer. - dialBackoffDelay time.Duration - // dialConfigCache is a cache to store the dial config for each peer. // TODO: encapsulation can be further improved by wrapping the dialConfigCache together with the dial config adjustment logic into a single struct. - dialConfigCache DialConfigCache + dialConfigCache ConfigCache // streamZeroBackoffResetThreshold is the threshold that determines when to reset the stream creation backoff budget to the default value. // @@ -78,21 +65,6 @@ type Manager struct { // 100 stream creations are all successful. streamZeroBackoffResetThreshold uint64 - // dialZeroBackoffResetThreshold is the threshold that determines when to reset the dial backoff budget to the default value. - // - // For example the threshold of 1 hour means that if the dial backoff budget is decreased to 0, then it will be reset to default value - // when it has been 1 hour since the last successful dial. - // - // This is to prevent the backoff budget from being reset too frequently, as the backoff budget is used to gauge the reliability of the dialing a remote peer. - // When the dial backoff budget is reset to the default value, it means that the dialing is reliable enough to be trusted again. - // This parameter mandates when the dialing is reliable enough to be trusted again; i.e., when it has been 1 hour since the last successful dial. - // Note that the last dial attempt timestamp is reset to zero when the dial fails, so the value of for example 1 hour means that the dialing to the remote peer is reliable enough that the last - // successful dial attempt was 1 hour ago. - dialZeroBackoffResetThreshold time.Duration - - // maxDialAttemptTimes is the maximum number of attempts to be made to connect to a remote node to establish a unicast (1:1) connection before we give up. - maxDialAttemptTimes uint64 - // maxStreamCreationAttemptTimes is the maximum number of attempts to be made to create a stream to a remote node over a direct unicast (1:1) connection before we give up. maxStreamCreationAttemptTimes uint64 } @@ -111,33 +83,23 @@ func NewUnicastManager(cfg *ManagerConfig) (*Manager, error) { m := &Manager{ logger: cfg.Logger.With().Str("module", "unicast-manager").Logger(), - dialConfigCache: cfg.DialConfigCacheFactory(func() DialConfig { - return DialConfig{ + dialConfigCache: cfg.UnicastConfigCacheFactory(func() Config { + return Config{ StreamCreationRetryAttemptBudget: cfg.MaxStreamCreationRetryAttemptTimes, - DialRetryAttemptBudget: cfg.MaxDialRetryAttemptTimes, } }), streamFactory: cfg.StreamFactory, sporkId: cfg.SporkId, - connStatus: cfg.ConnStatus, - peerDialing: sync.Map{}, metrics: cfg.Metrics, createStreamBackoffDelay: cfg.CreateStreamBackoffDelay, - dialBackoffDelay: cfg.DialBackoffDelay, - dialInProgressBackoffDelay: cfg.DialInProgressBackoffDelay, streamZeroBackoffResetThreshold: cfg.StreamZeroRetryResetThreshold, - dialZeroBackoffResetThreshold: cfg.DialZeroRetryResetThreshold, maxStreamCreationAttemptTimes: cfg.MaxStreamCreationRetryAttemptTimes, - maxDialAttemptTimes: cfg.MaxDialRetryAttemptTimes, } m.logger.Info(). Hex("spork_id", logging.ID(cfg.SporkId)). Dur("create_stream_backoff_delay", cfg.CreateStreamBackoffDelay). - Dur("dial_backoff_delay", cfg.DialBackoffDelay). - Dur("dial_in_progress_backoff_delay", cfg.DialInProgressBackoffDelay). Uint64("stream_zero_backoff_reset_threshold", cfg.StreamZeroRetryResetThreshold). - Dur("dial_zero_backoff_reset_threshold", cfg.DialZeroRetryResetThreshold). Msg("unicast manager created") return m, nil @@ -207,7 +169,7 @@ func (m *Manager) CreateStream(ctx context.Context, peerID peer.ID) (libp2pnet.S Msg("dial config for the peer retrieved") for i := len(m.protocols) - 1; i >= 0; i-- { - s, err := m.tryCreateStream(ctx, peerID, m.protocols[i], dialCfg) + s, err := m.createStream(ctx, peerID, m.protocols[i], dialCfg) if err != nil { errs = multierror.Append(errs, err) continue @@ -217,8 +179,7 @@ func (m *Manager) CreateStream(ctx context.Context, peerID peer.ID) (libp2pnet.S return s, nil } - connected, connErr := m.connStatus.IsConnected(peerID) // we don't check connErr as it indicates that the peer is not connected. - updatedCfg, err := m.adjustUnsuccessfulStreamAttempt(peerID, connErr == nil && connected) + updatedCfg, err := m.adjustUnsuccessfulStreamAttempt(peerID) if err != nil { // TODO: technically, we better to return an error here, but the error must be irrecoverable, and we cannot // guarantee a clear distinction between recoverable and irrecoverable errors at the moment with CreateStream. @@ -235,64 +196,45 @@ func (m *Manager) CreateStream(ctx context.Context, peerID peer.ID) (libp2pnet.S Bool(logging.KeySuspicious, true). Str("peer_id", p2plogging.PeerId(peerID)). Str("dial_config", fmt.Sprintf("%+v", updatedCfg)). - Bool("is_connected", err == nil && connected). Msg("failed to create stream to peer id, dial config adjusted") return nil, fmt.Errorf("could not create stream on any available unicast protocol: %w", errs) } -// tryCreateStream will retry createStream with the configured exponential backoff delay and maxAttempts. -// During retries, each error encountered is aggregated in a multierror. If max attempts are made before a -// stream can be successfully the multierror will be returned. During stream creation when IsErrDialInProgress -// is encountered during retries this would indicate that no connection to the peer exists yet. -// In this case we will retry creating the stream with a backoff until a connection is established. -func (m *Manager) tryCreateStream(ctx context.Context, peerID peer.ID, protocol protocols.Protocol, dialCfg *DialConfig) (libp2pnet.Stream, error) { +// createStream attempts to establish a new stream with a peer using the specified protocol. It employs +// exponential backoff with a maximum number of attempts defined by dialCfg.StreamCreationRetryAttemptBudget. +// If the stream cannot be established after the maximum attempts, it returns a compiled multierror of all +// encountered errors. Errors related to in-progress dials trigger a retry until a connection is established +// or the attempt budget is exhausted. +// +// The function increments the Config's ConsecutiveSuccessfulStream count upon success. In the case of +// adjustment errors in Config, a fatal error is logged indicating an issue that requires attention. +// Metrics are collected to monitor the duration and number of attempts for stream creation. +// +// Arguments: +// - ctx: Context to control the lifecycle of the stream creation. +// - peerID: The ID of the peer with which the stream is to be established. +// - protocol: The specific protocol used for the stream. +// - dialCfg: Configuration parameters for dialing and stream creation, including retry logic. +// +// Returns: +// - libp2pnet.Stream: The successfully created stream, or nil if the stream creation fails. +// - error: An aggregated multierror of all encountered errors during stream creation, or nil if successful; any returned error is benign and can be retried. +func (m *Manager) createStream(ctx context.Context, peerID peer.ID, protocol protocols.Protocol, dialCfg *Config) (libp2pnet.Stream, error) { var err error var s libp2pnet.Stream - // backoff delay for dial in progress errors; this backoff delay only kicks in if there is no connection to the peer - // and there is already a dial in progress to the peer. - backoff := retry.NewExponential(m.dialInProgressBackoffDelay) - // https://github.com/sethvargo/go-retry#maxretries retries counter starts at zero and library will make last attempt - // when retries == maxAttempts causing 1 more func invocation than expected. - maxRetries := dialCfg.StreamCreationRetryAttemptBudget - backoff = retry.WithMaxRetries(maxRetries, backoff) - - attempts := 0 - // retryable func will attempt to create the stream and only retry if dialing the peer is in progress - f := func(context.Context) error { - attempts++ - s, err = m.rawStreamWithProtocol(ctx, protocol.ProtocolId(), peerID, dialCfg) - if err != nil { - if IsErrDialInProgress(err) { - m.logger.Warn(). - Err(err). - Str("peer_id", p2plogging.PeerId(peerID)). - Int("attempt", attempts). - Uint64("max_retries", maxRetries). - Msg("retrying create stream, dial to peer in progress") - return retry.RetryableError(err) - } - return err - } - - s, err = protocol.UpgradeRawStream(s) - if err != nil { - return fmt.Errorf("failed to upgrade raw stream: %w", err) - } - - return nil + s, err = m.createStreamWithRetry(ctx, peerID, protocol.ProtocolId(), dialCfg) + if err != nil { + return nil, fmt.Errorf("failed to create a stream to peer: %w", err) } - start := time.Now() - err = retry.Do(ctx, backoff, f) - duration := time.Since(start) + + s, err = protocol.UpgradeRawStream(s) if err != nil { - m.metrics.OnStreamCreationFailure(duration, attempts) - return nil, err + return nil, fmt.Errorf("failed to upgrade raw stream: %w", err) } - m.metrics.OnStreamCreated(duration, attempts) - updatedConfig, err := m.dialConfigCache.Adjust(peerID, func(config DialConfig) (DialConfig, error) { + updatedConfig, err := m.dialConfigCache.Adjust(peerID, func(config Config) (Config, error) { config.ConsecutiveSuccessfulStream++ // increase consecutive successful stream count. return config, nil }) @@ -314,120 +256,26 @@ func (m *Manager) tryCreateStream(ctx context.Context, peerID peer.ID, protocol return s, nil } -// rawStreamWithProtocol creates a stream raw libp2p stream on specified protocol. +// createStreamWithRetry attempts to create a new stream to the specified peer using the given protocolID. +// This function is streamlined for use-cases where retries are managed externally or +// not required at all. // -// Note: a raw stream must be upgraded by the given unicast protocol id. +// Expected errors: +// - If the context expires before stream creation, it returns a context-related error with the number of attempts. +// - If the protocol ID is not supported, no retries are attempted and the error is returned immediately. // -// It makes at most `maxAttempts` to create a stream with the peer. -// This was put in as a fix for #2416. PubSub and 1-1 communication compete with each other when trying to connect to -// remote nodes and once in a while NewStream returns an error 'both yamux endpoints are clients'. +// Metrics are collected to monitor the duration and attempts of the stream creation process. // -// Note that in case an existing TCP connection underneath to `peerID` exists, that connection is utilized for creating a new stream. -// The multiaddr.Multiaddr return value represents the addresses of `peerID` we dial while trying to create a stream to it, the -// multiaddr is only returned when a peer is initially dialed. -// Expected errors during normal operations: -// - ErrDialInProgress if no connection to the peer exists and there is already a dial in progress to the peer. If a dial to -// the peer is already in progress the caller needs to wait until it is completed, a peer should be dialed only once. +// Arguments: +// - ctx: Context to control the lifecycle of the stream creation. +// - peerID: The ID of the peer with which the stream is to be established. +// - protocolID: The identifier for the protocol used for the stream. +// - dialCfg: Configuration parameters for dialing, including the retry attempt budget. // -// Unexpected errors during normal operations: -// - network.ErrIllegalConnectionState indicates bug in libpp2p when checking IsConnected status of peer. -func (m *Manager) rawStreamWithProtocol(ctx context.Context, protocolID protocol.ID, peerID peer.ID, dialCfg *DialConfig) (libp2pnet.Stream, error) { - isConnected, err := m.connStatus.IsConnected(peerID) - if err != nil { - return nil, err - } - - // check connection status and attempt to dial the peer if dialing is not in progress - if !isConnected { - // return error if we can't start dialing - if _, inProgress := m.peerDialing.LoadOrStore(peerID, struct{}{}); inProgress { - return nil, NewDialInProgressErr(peerID) - } - defer m.peerDialing.Delete(peerID) - err := m.dialPeer(ctx, peerID, dialCfg) - if err != nil { - return nil, err - } - } - - // at this point dialing should have completed, we are already connected we can attempt to create the stream - s, err := m.rawStream(ctx, peerID, protocolID, dialCfg) - if err != nil { - return nil, err - } - - return s, nil -} - -// dialPeer dial peer with retries. -// Expected errors during normal operations: -// - ErrMaxRetries if retry attempts are exhausted -func (m *Manager) dialPeer(ctx context.Context, peerID peer.ID, dialCfg *DialConfig) error { - // aggregated retryable errors that occur during retries, errs will be returned - // if retry context times out or maxAttempts have been made before a successful retry occurs - var errs error - dialAttempts := 0 - backoff := retryBackoff(dialCfg.DialRetryAttemptBudget, m.dialBackoffDelay) - f := func(context.Context) error { - dialAttempts++ - select { - case <-ctx.Done(): - return fmt.Errorf("context done before stream could be created (retry attempt: %d, errors: %w)", dialAttempts, errs) - default: - } - err := m.streamFactory.Connect(ctx, peer.AddrInfo{ID: peerID}) - if err != nil { - // if the connection was rejected due to invalid node id or - // if the connection was rejected due to connection gating skip the re-attempt - // if there is no address for the peer skip the re-attempt - if stream.IsErrSecurityProtocolNegotiationFailed(err) || stream.IsErrGaterDisallowedConnection(err) || errors.Is(err, swarm.ErrNoAddresses) { - return multierror.Append(errs, err) - } - m.logger.Warn(). - Err(err). - Str("peer_id", p2plogging.PeerId(peerID)). - Int("attempt", dialAttempts). - Uint64("max_attempts", dialCfg.DialRetryAttemptBudget). - Msg("retrying peer dialing") - return retry.RetryableError(multierror.Append(errs, err)) - } - updatedConfig, err := m.dialConfigCache.Adjust(peerID, func(config DialConfig) (DialConfig, error) { - config.LastSuccessfulDial = time.Now() // update last successful dial time - return config, nil - }) - if err != nil { - // This is not a connection retryable error, this is a fatal error. - // TODO: technically, we better to return an error here, but the error must be irrecoverable, and we cannot - // guarantee a clear distinction between recoverable and irrecoverable errors at the moment with CreateStream. - // We have to revisit this once we studied the error handling paths in the unicast manager. - m.logger.Fatal(). - Err(err). - Bool(logging.KeyNetworkingSecurity, true). - Str("peer_id", p2plogging.PeerId(peerID)). - Msg("failed to adjust dial config for peer id") - } - m.logger.Info(). - Str("peer_id", p2plogging.PeerId(peerID)). - Str("updated_dial_config", fmt.Sprintf("%+v", updatedConfig)). - Msg("peer dialed successfully") - return nil - } - - start := time.Now() - err := retry.Do(ctx, backoff, f) - duration := time.Since(start) - if err != nil { - m.metrics.OnPeerDialFailure(duration, dialAttempts) - return retryFailedError(uint64(dialAttempts), dialCfg.DialRetryAttemptBudget, fmt.Errorf("failed to dial peer %s: %w", p2plogging.PeerId(peerID), err)) - } - m.metrics.OnPeerDialed(duration, dialAttempts) - return nil -} - -// rawStream creates a stream to peer with retries. -// Expected errors during normal operations: -// - ErrMaxRetries if retry attempts are exhausted -func (m *Manager) rawStream(ctx context.Context, peerID peer.ID, protocolID protocol.ID, dialCfg *DialConfig) (libp2pnet.Stream, error) { +// Returns: +// - libp2pnet.Stream: The successfully created stream, or nil if an error occurs. +// - error: An error encountered during the stream creation, or nil if the stream is successfully established. +func (m *Manager) createStreamWithRetry(ctx context.Context, peerID peer.ID, protocolID protocol.ID, dialCfg *Config) (libp2pnet.Stream, error) { // aggregated retryable errors that occur during retries, errs will be returned // if retry context times out or maxAttempts have been made before a successful retry occurs var errs error @@ -442,14 +290,14 @@ func (m *Manager) rawStream(ctx context.Context, peerID peer.ID, protocolID prot } var err error - // add libp2p context value NoDial to prevent the underlying host from dialingComplete the peer while creating the stream - // we've already ensured that a connection already exists. - ctx = libp2pnet.WithNoDial(ctx, "application ensured connection to peer exists") // creates stream using stream factory s, err = m.streamFactory.NewStream(ctx, peerID, protocolID) if err != nil { - // if the stream creation failed due to invalid protocol id, skip the re-attempt - if stream.IsErrProtocolNotSupported(err) { + // if the stream creation failed due to invalid protocol id or no address, skip the re-attempt + if stream.IsErrProtocolNotSupported(err) || + errors.Is(err, swarm.ErrNoAddresses) || + stream.IsErrSecurityProtocolNegotiationFailed(err) || + stream.IsErrGaterDisallowedConnection(err) { return err } return retry.RetryableError(multierror.Append(errs, err)) @@ -504,7 +352,7 @@ func retryFailedError(dialAttempts, maxAttempts uint64, err error) error { // Returns: // - dial config for the given peer id. // - error if the dial config cannot be retrieved or adjusted; any error is irrecoverable and indicates a fatal error. -func (m *Manager) getDialConfig(peerID peer.ID) (*DialConfig, error) { +func (m *Manager) getDialConfig(peerID peer.ID) (*Config, error) { dialCfg, err := m.dialConfigCache.GetOrInit(peerID) if err != nil { return nil, fmt.Errorf("failed to get or init dial config for peer id: %w", err) @@ -513,7 +361,7 @@ func (m *Manager) getDialConfig(peerID peer.ID) (*DialConfig, error) { if dialCfg.StreamCreationRetryAttemptBudget == uint64(0) && dialCfg.ConsecutiveSuccessfulStream >= m.streamZeroBackoffResetThreshold { // reset the stream creation backoff budget to the default value if the number of consecutive successful streams reaches the threshold, // as the stream creation is reliable enough to be trusted again. - dialCfg, err = m.dialConfigCache.Adjust(peerID, func(config DialConfig) (DialConfig, error) { + dialCfg, err = m.dialConfigCache.Adjust(peerID, func(config Config) (Config, error) { config.StreamCreationRetryAttemptBudget = m.maxStreamCreationAttemptTimes m.metrics.OnStreamCreationRetryBudgetUpdated(config.StreamCreationRetryAttemptBudget) m.metrics.OnStreamCreationRetryBudgetResetToDefault() @@ -523,21 +371,6 @@ func (m *Manager) getDialConfig(peerID peer.ID) (*DialConfig, error) { return nil, fmt.Errorf("failed to adjust dial config for peer id (resetting stream creation attempt budget): %w", err) } } - if dialCfg.DialRetryAttemptBudget == uint64(0) && - !dialCfg.LastSuccessfulDial.IsZero() && // if the last successful dial time is zero, it means that we have never successfully dialed to the peer, so we should not reset the dial backoff budget. - time.Since(dialCfg.LastSuccessfulDial) >= m.dialZeroBackoffResetThreshold { - // reset the dial backoff budget to the default value if the last successful dial was long enough ago, - // as the dialing is reliable enough to be trusted again. - dialCfg, err = m.dialConfigCache.Adjust(peerID, func(config DialConfig) (DialConfig, error) { - config.DialRetryAttemptBudget = m.maxDialAttemptTimes - m.metrics.OnDialRetryBudgetUpdated(config.DialRetryAttemptBudget) - m.metrics.OnDialRetryBudgetResetToDefault() - return config, nil - }) - if err != nil { - return nil, fmt.Errorf("failed to adjust dial config for peer id (resetting dial attempt budget): %w", err) - } - } return dialCfg, nil } @@ -551,28 +384,18 @@ func (m *Manager) getDialConfig(peerID peer.ID) (*DialConfig, error) { // - dial config for the given peer id. // - connected indicates whether there is a connection to the peer. // - error if the dial config cannot be adjusted; any error is irrecoverable and indicates a fatal error. -func (m *Manager) adjustUnsuccessfulStreamAttempt(peerID peer.ID, connected bool) (*DialConfig, error) { - updatedCfg, err := m.dialConfigCache.Adjust(peerID, func(config DialConfig) (DialConfig, error) { +func (m *Manager) adjustUnsuccessfulStreamAttempt(peerID peer.ID) (*Config, error) { + updatedCfg, err := m.dialConfigCache.Adjust(peerID, func(config Config) (Config, error) { // consecutive successful stream count is reset to 0 if we fail to create a stream or connection to the peer. config.ConsecutiveSuccessfulStream = 0 - if !connected { - // if no connections could be established to the peer, we will try to dial with a more strict dial config next time. - if config.DialRetryAttemptBudget > 0 { - config.DialRetryAttemptBudget-- - m.metrics.OnDialRetryBudgetUpdated(config.DialRetryAttemptBudget) - } - // last successful dial time is reset to 0 if we fail to create a stream to the peer. - config.LastSuccessfulDial = time.Time{} - - } else { - // there is a connection to the peer it means that the stream creation failed, hence we decrease the stream backoff budget - // to try to create a stream with a more strict dial config next time. - if config.StreamCreationRetryAttemptBudget > 0 { - config.StreamCreationRetryAttemptBudget-- - m.metrics.OnStreamCreationRetryBudgetUpdated(config.StreamCreationRetryAttemptBudget) - } + // there is a connection to the peer it means that the stream creation failed, hence we decrease the stream backoff budget + // to try to create a stream with a more strict dial config next time. + if config.StreamCreationRetryAttemptBudget > 0 { + config.StreamCreationRetryAttemptBudget-- + m.metrics.OnStreamCreationRetryBudgetUpdated(config.StreamCreationRetryAttemptBudget) } + return config, nil }) diff --git a/network/p2p/unicast/manager_config.go b/network/p2p/unicast/manager_config.go index 438e05daa75..eac00c76611 100644 --- a/network/p2p/unicast/manager_config.go +++ b/network/p2p/unicast/manager_config.go @@ -14,22 +14,11 @@ type ManagerConfig struct { Logger zerolog.Logger `validate:"required"` StreamFactory p2p.StreamFactory `validate:"required"` SporkId flow.Identifier `validate:"required"` - ConnStatus p2p.PeerConnections `validate:"required"` Metrics module.UnicastManagerMetrics `validate:"required"` // CreateStreamBackoffDelay is the backoff delay between retrying stream creations to the same peer. CreateStreamBackoffDelay time.Duration `validate:"gt=0"` - // DialInProgressBackoffDelay is the backoff delay for parallel attempts on dialing to the same peer. - // When the unicast manager is invoked to create stream to the same peer concurrently while there is - // already an ongoing dialing attempt to the same peer, the unicast manager will wait for this backoff delay - // and retry creating the stream after the backoff delay has elapsed. This is to prevent the unicast manager - // from creating too many parallel dialing attempts to the same peer. - DialInProgressBackoffDelay time.Duration `validate:"gt=0"` - - // DialBackoffDelay is the backoff delay between retrying connection to the same peer. - DialBackoffDelay time.Duration `validate:"gt=0"` - // StreamZeroRetryResetThreshold is the threshold that determines when to reset the stream creation retry budget to the default value. // // For example the default value of 100 means that if the stream creation retry budget is decreased to 0, then it will be reset to default value @@ -42,23 +31,9 @@ type ManagerConfig struct { // 100 stream creations are all successful. StreamZeroRetryResetThreshold uint64 `validate:"gt=0"` - // DialZeroRetryResetThreshold is the threshold that determines when to reset the dial retry budget to the default value. - // For example the threshold of 1 hour means that if the dial retry budget is decreased to 0, then it will be reset to default value - // when it has been 1 hour since the last successful dial. - // - // This is to prevent the retry budget from being reset too frequently, as the retry budget is used to gauge the reliability of the dialing a remote peer. - // When the dial retry budget is reset to the default value, it means that the dialing is reliable enough to be trusted again. - // This parameter mandates when the dialing is reliable enough to be trusted again; i.e., when it has been 1 hour since the last successful dial. - // Note that the last dial attempt timestamp is reset to zero when the dial fails, so the value of for example 1 hour means that the dialing to the remote peer is reliable enough that the last - // successful dial attempt was 1 hour ago. - DialZeroRetryResetThreshold time.Duration `validate:"gt=0"` - - // MaxDialRetryAttemptTimes is the maximum number of attempts to be made to connect to a remote node to establish a unicast (1:1) connection before we give up. - MaxDialRetryAttemptTimes uint64 `validate:"gt=0"` - // MaxStreamCreationRetryAttemptTimes is the maximum number of attempts to be made to create a stream to a remote node over a direct unicast (1:1) connection before we give up. MaxStreamCreationRetryAttemptTimes uint64 `validate:"gt=0"` - // DialConfigCacheFactory is a factory function to create a new dial config cache. - DialConfigCacheFactory DialConfigCacheFactory `validate:"required"` + // UnicastConfigCacheFactory is a factory function to create a new dial config cache. + UnicastConfigCacheFactory DialConfigCacheFactory `validate:"required"` } diff --git a/network/p2p/unicast/manager_test.go b/network/p2p/unicast/manager_test.go index ebaed0ccb71..32b35cd9dfc 100644 --- a/network/p2p/unicast/manager_test.go +++ b/network/p2p/unicast/manager_test.go @@ -4,10 +4,8 @@ import ( "context" "fmt" "testing" - "time" libp2pnet "github.com/libp2p/go-libp2p/core/network" - "github.com/libp2p/go-libp2p/core/peer" "github.com/libp2p/go-libp2p/core/protocol" "github.com/libp2p/go-libp2p/p2p/net/swarm" "github.com/stretchr/testify/mock" @@ -15,7 +13,6 @@ import ( "github.com/onflow/flow-go/config" "github.com/onflow/flow-go/module/metrics" - mockmetrics "github.com/onflow/flow-go/module/mock" mockp2p "github.com/onflow/flow-go/network/p2p/mock" p2ptest "github.com/onflow/flow-go/network/p2p/test" "github.com/onflow/flow-go/network/p2p/unicast" @@ -24,20 +21,18 @@ import ( "github.com/onflow/flow-go/utils/unittest" ) -func unicastManagerFixture(t *testing.T) (*unicast.Manager, *mockp2p.StreamFactory, *mockp2p.PeerConnections, unicast.DialConfigCache) { +func unicastManagerFixture(t *testing.T) (*unicast.Manager, *mockp2p.StreamFactory, unicast.ConfigCache) { streamFactory := mockp2p.NewStreamFactory(t) streamFactory.On("SetStreamHandler", mock.AnythingOfType("protocol.ID"), mock.AnythingOfType("network.StreamHandler")).Return().Once() - connStatus := mockp2p.NewPeerConnections(t) cfg, err := config.DefaultConfig() require.NoError(t, err) - dialConfigCache := unicastcache.NewDialConfigCache(cfg.NetworkConfig.UnicastConfig.DialConfigCacheSize, + unicastConfigCache := unicastcache.NewUnicastConfigCache(cfg.NetworkConfig.UnicastConfig.ConfigCacheSize, unittest.Logger(), metrics.NewNoopCollector(), - func() unicast.DialConfig { - return unicast.DialConfig{ - DialRetryAttemptBudget: cfg.NetworkConfig.UnicastConfig.MaxDialRetryAttemptTimes, + func() unicast.Config { + return unicast.Config{ StreamCreationRetryAttemptBudget: cfg.NetworkConfig.UnicastConfig.MaxStreamCreationRetryAttemptTimes, } }) @@ -46,23 +41,18 @@ func unicastManagerFixture(t *testing.T) (*unicast.Manager, *mockp2p.StreamFacto Logger: unittest.Logger(), StreamFactory: streamFactory, SporkId: unittest.IdentifierFixture(), - ConnStatus: connStatus, CreateStreamBackoffDelay: cfg.NetworkConfig.UnicastConfig.CreateStreamBackoffDelay, Metrics: metrics.NewNoopCollector(), StreamZeroRetryResetThreshold: cfg.NetworkConfig.UnicastConfig.StreamZeroRetryResetThreshold, - DialZeroRetryResetThreshold: cfg.NetworkConfig.UnicastConfig.DialZeroRetryResetThreshold, MaxStreamCreationRetryAttemptTimes: cfg.NetworkConfig.UnicastConfig.MaxStreamCreationRetryAttemptTimes, - MaxDialRetryAttemptTimes: cfg.NetworkConfig.UnicastConfig.MaxDialRetryAttemptTimes, - DialInProgressBackoffDelay: cfg.NetworkConfig.UnicastConfig.DialInProgressBackoffDelay, - DialBackoffDelay: cfg.NetworkConfig.UnicastConfig.DialBackoffDelay, - DialConfigCacheFactory: func(func() unicast.DialConfig) unicast.DialConfigCache { - return dialConfigCache + UnicastConfigCacheFactory: func(func() unicast.Config) unicast.ConfigCache { + return unicastConfigCache }, }) require.NoError(t, err) mgr.SetDefaultHandler(func(libp2pnet.Stream) {}) // no-op handler, we don't care about the handler for this test - return mgr, streamFactory, connStatus, dialConfigCache + return mgr, streamFactory, unicastConfigCache } // TestManagerConfigValidation tests the validation of the unicast manager config. @@ -75,22 +65,16 @@ func TestManagerConfigValidation(t *testing.T) { Logger: unittest.Logger(), StreamFactory: mockp2p.NewStreamFactory(t), SporkId: unittest.IdentifierFixture(), - ConnStatus: mockp2p.NewPeerConnections(t), CreateStreamBackoffDelay: cfg.NetworkConfig.UnicastConfig.CreateStreamBackoffDelay, Metrics: metrics.NewNoopCollector(), StreamZeroRetryResetThreshold: cfg.NetworkConfig.UnicastConfig.StreamZeroRetryResetThreshold, - DialZeroRetryResetThreshold: cfg.NetworkConfig.UnicastConfig.DialZeroRetryResetThreshold, MaxStreamCreationRetryAttemptTimes: cfg.NetworkConfig.UnicastConfig.MaxStreamCreationRetryAttemptTimes, - MaxDialRetryAttemptTimes: cfg.NetworkConfig.UnicastConfig.MaxDialRetryAttemptTimes, - DialInProgressBackoffDelay: cfg.NetworkConfig.UnicastConfig.DialInProgressBackoffDelay, - DialBackoffDelay: cfg.NetworkConfig.UnicastConfig.DialBackoffDelay, - DialConfigCacheFactory: func(func() unicast.DialConfig) unicast.DialConfigCache { - return unicastcache.NewDialConfigCache(cfg.NetworkConfig.UnicastConfig.DialConfigCacheSize, + UnicastConfigCacheFactory: func(func() unicast.Config) unicast.ConfigCache { + return unicastcache.NewUnicastConfigCache(cfg.NetworkConfig.UnicastConfig.ConfigCacheSize, unittest.Logger(), metrics.NewNoopCollector(), - func() unicast.DialConfig { - return unicast.DialConfig{ - DialRetryAttemptBudget: cfg.NetworkConfig.UnicastConfig.MaxDialRetryAttemptTimes, + func() unicast.Config { + return unicast.Config{ StreamCreationRetryAttemptBudget: cfg.NetworkConfig.UnicastConfig.MaxStreamCreationRetryAttemptTimes, } }) @@ -118,22 +102,6 @@ func TestManagerConfigValidation(t *testing.T) { require.Nil(t, mgr) }) - t.Run("Invalid DialInProgressBackoffDelay", func(t *testing.T) { - cfg := validConfig - cfg.DialInProgressBackoffDelay = 0 - mgr, err := unicast.NewUnicastManager(&cfg) - require.Error(t, err) - require.Nil(t, mgr) - }) - - t.Run("Invalid DialBackoffDelay", func(t *testing.T) { - cfg := validConfig - cfg.DialBackoffDelay = 0 - mgr, err := unicast.NewUnicastManager(&cfg) - require.Error(t, err) - require.Nil(t, mgr) - }) - t.Run("Invalid StreamZeroRetryResetThreshold", func(t *testing.T) { cfg := validConfig cfg.StreamZeroRetryResetThreshold = 0 @@ -142,22 +110,6 @@ func TestManagerConfigValidation(t *testing.T) { require.Nil(t, mgr) }) - t.Run("Invalid DialZeroRetryResetThreshold", func(t *testing.T) { - cfg := validConfig - cfg.DialZeroRetryResetThreshold = 0 - mgr, err := unicast.NewUnicastManager(&cfg) - require.Error(t, err) - require.Nil(t, mgr) - }) - - t.Run("Invalid MaxDialRetryAttemptTimes", func(t *testing.T) { - cfg := validConfig - cfg.MaxDialRetryAttemptTimes = 0 - mgr, err := unicast.NewUnicastManager(&cfg) - require.Error(t, err) - require.Nil(t, mgr) - }) - t.Run("Invalid MaxStreamCreationRetryAttemptTimes", func(t *testing.T) { cfg := validConfig cfg.MaxStreamCreationRetryAttemptTimes = 0 @@ -166,9 +118,9 @@ func TestManagerConfigValidation(t *testing.T) { require.Nil(t, mgr) }) - t.Run("Invalid DialConfigCacheFactory", func(t *testing.T) { + t.Run("Invalid UnicastConfigCacheFactory", func(t *testing.T) { cfg := validConfig - cfg.DialConfigCacheFactory = nil + cfg.UnicastConfigCacheFactory = nil mgr, err := unicast.NewUnicastManager(&cfg) require.Error(t, err) require.Nil(t, mgr) @@ -182,14 +134,6 @@ func TestManagerConfigValidation(t *testing.T) { require.Nil(t, mgr) }) - t.Run("Missing ConnStatus", func(t *testing.T) { - cfg := validConfig - cfg.ConnStatus = nil - mgr, err := unicast.NewUnicastManager(&cfg) - require.Error(t, err) - require.Nil(t, mgr) - }) - t.Run("Missing Metrics", func(t *testing.T) { cfg := validConfig cfg.Metrics = nil @@ -199,140 +143,79 @@ func TestManagerConfigValidation(t *testing.T) { }) } -// TestUnicastManager_StreamFactory_ConnectionBackoff tests the backoff mechanism of the unicast manager for connection creation. -// It tests that when there is no connection, it tries to connect to the peer some number of times (unicastmodel.MaxDialAttemptTimes), before -// giving up. -func TestUnicastManager_Connection_ConnectionBackoff(t *testing.T) { - peerID := unittest.PeerIdFixture(t) - mgr, streamFactory, connStatus, dialConfigCache := unicastManagerFixture(t) - - cfg, err := config.DefaultConfig() - require.NoError(t, err) - - connStatus.On("IsConnected", peerID).Return(false, nil) // not connected - streamFactory.On("Connect", mock.Anything, peer.AddrInfo{ID: peerID}). - Return(fmt.Errorf("some error")).Times(int(cfg.NetworkConfig.UnicastConfig.MaxDialRetryAttemptTimes + 1)) // connect - - _, err = dialConfigCache.Adjust(peerID, func(dialConfig unicast.DialConfig) (unicast.DialConfig, error) { - // assumes that there was a successful connection to the peer before (2 minutes ago), and now the connection is lost. - dialConfig.LastSuccessfulDial = time.Now().Add(2 * time.Minute) - return dialConfig, nil - }) - require.NoError(t, err) - - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - s, err := mgr.CreateStream(ctx, peerID) - require.Error(t, err) - require.Nil(t, s) - - // The dial config must be updated with the backoff budget decremented. - dialCfg, err := dialConfigCache.GetOrInit(peerID) - require.NoError(t, err) - require.Equal(t, cfg.NetworkConfig.UnicastConfig.MaxDialRetryAttemptTimes-1, dialCfg.DialRetryAttemptBudget) // dial backoff budget must be decremented by 1. - require.Equal(t, - cfg.NetworkConfig.UnicastConfig.MaxStreamCreationRetryAttemptTimes, - dialCfg.StreamCreationRetryAttemptBudget) // stream backoff budget must remain intact (no stream creation attempt yet). - // last successful dial is set back to zero, since although we have a successful dial in the past, the most recent dial failed. - require.True(t, dialCfg.LastSuccessfulDial.IsZero()) - require.Equal(t, uint64(0), dialCfg.ConsecutiveSuccessfulStream) // consecutive successful stream must be intact. -} - -// TestUnicastManager_StreamFactory_Connection_SuccessfulConnection_And_Stream tests that when there is no connection, and CreateStream is successful on the first attempt for connection and stream creation, -// it updates the last successful dial time and the consecutive successful stream counter. -func TestUnicastManager_Connection_SuccessfulConnection_And_Stream(t *testing.T) { +// TestUnicastManager_SuccessfulStream tests that when CreateStream is successful on the first attempt for stream creation, +// it updates the consecutive successful stream counter. +func TestUnicastManager_SuccessfulStream(t *testing.T) { peerID := unittest.PeerIdFixture(t) - mgr, streamFactory, connStatus, dialConfigCache := unicastManagerFixture(t) + mgr, streamFactory, configCache := unicastManagerFixture(t) cfg, err := config.DefaultConfig() require.NoError(t, err) - connStatus.On("IsConnected", peerID).Return(false, nil) // not connected - streamFactory.On("Connect", mock.Anything, peer.AddrInfo{ID: peerID}).Return(nil).Once() // connect on the first attempt. - // mocks that it attempts to create a stream once and succeeds. streamFactory.On("NewStream", mock.Anything, peerID, mock.Anything).Return(&p2ptest.MockStream{}, nil).Once() ctx, cancel := context.WithCancel(context.Background()) defer cancel() - dialTime := time.Now() s, err := mgr.CreateStream(ctx, peerID) require.NoError(t, err) require.NotNil(t, s) - // The dial config must be updated with the backoff budget decremented. - dialCfg, err := dialConfigCache.GetOrInit(peerID) + // The unicast config must be updated with the backoff budget decremented. + unicastCfg, err := configCache.GetOrInit(peerID) require.NoError(t, err) - require.Equal(t, cfg.NetworkConfig.UnicastConfig.MaxDialRetryAttemptTimes, dialCfg.DialRetryAttemptBudget) // dial backoff budget must be intact. require.Equal(t, cfg.NetworkConfig.UnicastConfig.MaxStreamCreationRetryAttemptTimes, - dialCfg.StreamCreationRetryAttemptBudget) // stream backoff budget must remain intact. - // last successful dial must be set AFTER the successful dial. - require.True(t, dialCfg.LastSuccessfulDial.After(dialTime)) - require.Equal(t, uint64(1), dialCfg.ConsecutiveSuccessfulStream) // consecutive successful stream must incremented. + unicastCfg.StreamCreationRetryAttemptBudget) // stream backoff budget must remain intact. + require.Equal(t, uint64(1), unicastCfg.ConsecutiveSuccessfulStream) // consecutive successful stream must incremented. } -// TestUnicastManager_StreamFactory_Connection_SuccessfulConnection_StreamBackoff tests the backoff mechanism of the unicast manager for stream creation. -// It tests the situation that there is no connection when CreateStream is called. The connection is created successfully, but the stream creation fails. -// It tests that when there is a connection, but no stream, it tries to create a stream some number of times (unicastmodel.MaxStreamCreationAttemptTimes), before -// giving up. -// It also checks the consecutive successful stream counter is reset when the stream creation fails, and the last successful dial time is updated. -func TestUnicastManager_Connection_SuccessfulConnection_StreamBackoff(t *testing.T) { +// TestUnicastManager_StreamBackoff tests the backoff mechanism of the unicast manager for stream creation. +// It tests the situation that CreateStream is called but the stream creation fails. +// It tests that it tries to create a stream some number of times (unicastmodel.MaxStreamCreationAttemptTimes), before giving up. +// It also checks the consecutive successful stream counter is reset when the stream creation fails. +func TestUnicastManager_StreamBackoff(t *testing.T) { peerID := unittest.PeerIdFixture(t) - mgr, streamFactory, connStatus, dialConfigCache := unicastManagerFixture(t) + mgr, streamFactory, configCache := unicastManagerFixture(t) cfg, err := config.DefaultConfig() require.NoError(t, err) - isConnectedCalled := 0 - connStatus.On("IsConnected", peerID).Return(func(id peer.ID) bool { - if isConnectedCalled == 0 { - // we mock that the connection is not established on the first call, and is established on the second call and onwards. - isConnectedCalled++ - return false - } - return true - }, nil) - streamFactory.On("Connect", mock.Anything, peer.AddrInfo{ID: peerID}).Return(nil).Once() // connect on the first attempt. - streamFactory.On("NewStream", mock.Anything, peerID, mock.Anything).Return(nil, fmt.Errorf("some error")). - Times(int(cfg.NetworkConfig.UnicastConfig.MaxStreamCreationRetryAttemptTimes + 1)) // mocks that it attempts to create a stream some number of times, before giving up. + // mocks that it attempts to create a stream some number of times, before giving up. + streamFactory.On("NewStream", mock.Anything, peerID, mock.Anything). + Return(nil, fmt.Errorf("some error")). + Times(int(cfg.NetworkConfig.UnicastConfig.MaxStreamCreationRetryAttemptTimes + 1)) ctx, cancel := context.WithCancel(context.Background()) defer cancel() - dialTime := time.Now() s, err := mgr.CreateStream(ctx, peerID) require.Error(t, err) require.Nil(t, s) - // The dial config must be updated with the backoff budget decremented. - dialCfg, err := dialConfigCache.GetOrInit(peerID) + // The unicast config must be updated with the backoff budget decremented. + unicastCfg, err := configCache.GetOrInit(peerID) require.NoError(t, err) - require.Equal(t, - cfg.NetworkConfig.UnicastConfig.MaxDialRetryAttemptTimes, - dialCfg.DialRetryAttemptBudget) // dial backoff budget must be intact, since the connection is successful. - require.Equal(t, - cfg.NetworkConfig.UnicastConfig.MaxStreamCreationRetryAttemptTimes-1, - dialCfg.StreamCreationRetryAttemptBudget) // stream backoff budget must be decremented by 1 since all budget is used up. - // last successful dial must be set AFTER the successful dial. - require.True(t, dialCfg.LastSuccessfulDial.After(dialTime)) - require.Equal(t, uint64(0), dialCfg.ConsecutiveSuccessfulStream) // consecutive successful stream must be reset to zero, since the stream creation failed. + // stream backoff budget must be decremented by 1 since all budget is used up. + require.Equal(t, cfg.NetworkConfig.UnicastConfig.MaxStreamCreationRetryAttemptTimes-1, unicastCfg.StreamCreationRetryAttemptBudget) + // consecutive successful stream must be reset to zero, since the stream creation failed. + require.Equal(t, uint64(0), unicastCfg.ConsecutiveSuccessfulStream) } // TestUnicastManager_StreamFactory_StreamBackoff tests the backoff mechanism of the unicast manager for stream creation. // It tests when there is a connection, but no stream, it tries to create a stream some number of times (unicastmodel.MaxStreamCreationAttemptTimes), before // giving up. func TestUnicastManager_StreamFactory_StreamBackoff(t *testing.T) { - mgr, streamFactory, connStatus, dialConfigCache := unicastManagerFixture(t) + mgr, streamFactory, unicastConfigCache := unicastManagerFixture(t) peerID := unittest.PeerIdFixture(t) cfg, err := config.DefaultConfig() require.NoError(t, err) - connStatus.On("IsConnected", peerID).Return(true, nil) // connected. + // mocks that it attempts to create a stream some number of times, before giving up. streamFactory.On("NewStream", mock.Anything, peerID, mock.Anything). Return(nil, fmt.Errorf("some error")). - Times(int(cfg.NetworkConfig.UnicastConfig.MaxStreamCreationRetryAttemptTimes + 1)) // mocks that it attempts to create a stream some number of times, before giving up. + Times(int(cfg.NetworkConfig.UnicastConfig.MaxStreamCreationRetryAttemptTimes + 1)) ctx, cancel := context.WithCancel(context.Background()) defer cancel() @@ -340,22 +223,19 @@ func TestUnicastManager_StreamFactory_StreamBackoff(t *testing.T) { require.Error(t, err) require.Nil(t, s) - // The dial config must be updated with the stream backoff budget decremented. - dialCfg, err := dialConfigCache.GetOrInit(peerID) + // The unicast config must be updated with the stream backoff budget decremented. + unicastCfg, err := unicastConfigCache.GetOrInit(peerID) require.NoError(t, err) - require.Equal(t, cfg.NetworkConfig.UnicastConfig.MaxDialRetryAttemptTimes, dialCfg.DialRetryAttemptBudget) // dial backoff budget must be intact. - require.Equal(t, - cfg.NetworkConfig.UnicastConfig.MaxStreamCreationRetryAttemptTimes-1, - dialCfg.StreamCreationRetryAttemptBudget) // stream backoff budget must be decremented by 1. - require.Equal(t, - uint64(0), - dialCfg.ConsecutiveSuccessfulStream) // consecutive successful stream must be zero as we have not created a successful stream yet. + // stream backoff budget must be decremented by 1. + require.Equal(t, cfg.NetworkConfig.UnicastConfig.MaxStreamCreationRetryAttemptTimes-1, unicastCfg.StreamCreationRetryAttemptBudget) + // consecutive successful stream must be zero as we have not created a successful stream yet. + require.Equal(t, uint64(0), unicastCfg.ConsecutiveSuccessfulStream) } -// TestUnicastManager_Stream_ConsecutiveStreamCreation_Increment tests that when there is a connection, and the stream creation is successful, -// it increments the consecutive successful stream counter in the dial config. +// TestUnicastManager_Stream_ConsecutiveStreamCreation_Increment tests that when stream creation is successful, +// it increments the consecutive successful stream counter in the unicast config. func TestUnicastManager_Stream_ConsecutiveStreamCreation_Increment(t *testing.T) { - mgr, streamFactory, connStatus, dialConfigCache := unicastManagerFixture(t) + mgr, streamFactory, unicastConfigCache := unicastManagerFixture(t) peerID := unittest.PeerIdFixture(t) cfg, err := config.DefaultConfig() @@ -364,7 +244,6 @@ func TestUnicastManager_Stream_ConsecutiveStreamCreation_Increment(t *testing.T) // total times we successfully create a stream to the peer. totalSuccessAttempts := 10 - connStatus.On("IsConnected", peerID).Return(true, nil) // connected. // mocks that it attempts to create a stream 10 times, and each time it succeeds. streamFactory.On("NewStream", mock.Anything, peerID, mock.Anything).Return(&p2ptest.MockStream{}, nil).Times(totalSuccessAttempts) @@ -376,38 +255,37 @@ func TestUnicastManager_Stream_ConsecutiveStreamCreation_Increment(t *testing.T) require.NoError(t, err) require.NotNil(t, s) - // The dial config must be updated with the stream backoff budget decremented. - dialCfg, err := dialConfigCache.GetOrInit(peerID) + // The unicast config must be updated with the stream backoff budget decremented. + unicastCfg, err := unicastConfigCache.GetOrInit(peerID) require.NoError(t, err) - require.Equal(t, cfg.NetworkConfig.UnicastConfig.MaxDialRetryAttemptTimes, dialCfg.DialRetryAttemptBudget) // dial backoff budget must be intact. // stream backoff budget must be intact (all stream creation attempts are successful). - require.Equal(t, cfg.NetworkConfig.UnicastConfig.MaxStreamCreationRetryAttemptTimes, dialCfg.StreamCreationRetryAttemptBudget) - require.Equal(t, uint64(i+1), dialCfg.ConsecutiveSuccessfulStream) // consecutive successful stream must be incremented. + require.Equal(t, cfg.NetworkConfig.UnicastConfig.MaxStreamCreationRetryAttemptTimes, unicastCfg.StreamCreationRetryAttemptBudget) + // consecutive successful stream must be incremented. + require.Equal(t, uint64(i+1), unicastCfg.ConsecutiveSuccessfulStream) } } -// TestUnicastManager_Stream_ConsecutiveStreamCreation_Reset tests that when there is a connection, and the stream creation fails, it resets -// the consecutive successful stream counter in the dial config. +// TestUnicastManager_Stream_ConsecutiveStreamCreation_Reset tests that when the stream creation fails, it resets +// the consecutive successful stream counter in the unicast config. func TestUnicastManager_Stream_ConsecutiveStreamCreation_Reset(t *testing.T) { - mgr, streamFactory, connStatus, dialConfigCache := unicastManagerFixture(t) + mgr, streamFactory, unicastConfigCache := unicastManagerFixture(t) peerID := unittest.PeerIdFixture(t) - cfg, err := config.DefaultConfig() - require.NoError(t, err) - + // mocks that it attempts to create a stream once and fails. streamFactory.On("NewStream", mock.Anything, peerID, mock.Anything). Return(nil, fmt.Errorf("some error")). - Once() // mocks that it attempts to create a stream once and fails. - connStatus.On("IsConnected", peerID).Return(true, nil) // connected. + Once() - adjustedDialConfig, err := dialConfigCache.Adjust(peerID, func(dialConfig unicast.DialConfig) (unicast.DialConfig, error) { - dialConfig.ConsecutiveSuccessfulStream = 5 // sets the consecutive successful stream to 5 meaning that the last 5 stream creation attempts were successful. - dialConfig.StreamCreationRetryAttemptBudget = 0 // sets the stream back budget to 0 meaning that the stream backoff budget is exhausted. + adjustedUnicastConfig, err := unicastConfigCache.Adjust(peerID, func(unicastConfig unicast.Config) (unicast.Config, error) { + // sets the consecutive successful stream to 5 meaning that the last 5 stream creation attempts were successful. + unicastConfig.ConsecutiveSuccessfulStream = 5 + // sets the stream back budget to 0 meaning that the stream backoff budget is exhausted. + unicastConfig.StreamCreationRetryAttemptBudget = 0 - return dialConfig, nil + return unicastConfig, nil }) require.NoError(t, err) - require.Equal(t, uint64(5), adjustedDialConfig.ConsecutiveSuccessfulStream) + require.Equal(t, uint64(5), adjustedUnicastConfig.ConsecutiveSuccessfulStream) ctx, cancel := context.WithCancel(context.Background()) defer cancel() @@ -416,25 +294,25 @@ func TestUnicastManager_Stream_ConsecutiveStreamCreation_Reset(t *testing.T) { require.Error(t, err) require.Nil(t, s) - // The dial config must be updated with the stream backoff budget decremented. - dialCfg, err := dialConfigCache.GetOrInit(peerID) + // The unicast config must be updated with the stream backoff budget decremented. + unicastCfg, err := unicastConfigCache.GetOrInit(peerID) require.NoError(t, err) - require.Equal(t, cfg.NetworkConfig.UnicastConfig.MaxDialRetryAttemptTimes, dialCfg.DialRetryAttemptBudget) // dial backoff budget must be intact. - require.Equal(t, - uint64(0), - dialCfg.StreamCreationRetryAttemptBudget) // stream backoff budget must be intact (we can't decrement it below 0). - require.Equal(t, uint64(0), dialCfg.ConsecutiveSuccessfulStream) // consecutive successful stream must be reset to 0. + + // stream backoff budget must be intact (we can't decrement it below 0). + require.Equal(t, uint64(0), unicastCfg.StreamCreationRetryAttemptBudget) + // consecutive successful stream must be reset to 0. + require.Equal(t, uint64(0), unicastCfg.ConsecutiveSuccessfulStream) } // TestUnicastManager_StreamFactory_ErrProtocolNotSupported tests that when there is a protocol not supported error, it does not retry creating a stream. func TestUnicastManager_StreamFactory_ErrProtocolNotSupported(t *testing.T) { - mgr, streamFactory, connStatus, _ := unicastManagerFixture(t) + mgr, streamFactory, _ := unicastManagerFixture(t) peerID := unittest.PeerIdFixture(t) - connStatus.On("IsConnected", peerID).Return(true, nil) // connected + // mocks that upon creating a stream, it returns a protocol not supported error, the mock is set to once, meaning that it won't retry stream creation again. streamFactory.On("NewStream", mock.Anything, peerID, mock.Anything). - Return(nil, stream.NewProtocolNotSupportedErr(peerID, []protocol.ID{"protocol-1"}, fmt.Errorf("some error"))). - Once() // mocks that upon creating a stream, it returns a protocol not supported error, the mock is set to once, meaning that it won't retry stream creation again. + Return(nil, stream.NewProtocolNotSupportedErr(peerID, protocol.ID("protocol-1"), fmt.Errorf("some error"))). + Once() ctx, cancel := context.WithCancel(context.Background()) defer cancel() @@ -443,20 +321,19 @@ func TestUnicastManager_StreamFactory_ErrProtocolNotSupported(t *testing.T) { require.Nil(t, s) } -// TestUnicastManager_StreamFactory_ErrNoAddresses tests that when dialing returns a no addresses error, it does not retry dialing again and returns an error immediately. +// TestUnicastManager_StreamFactory_ErrNoAddresses tests that when stream creation returns a no addresses error, +// it does not retry stream creation again and returns an error immediately. func TestUnicastManager_StreamFactory_ErrNoAddresses(t *testing.T) { - mgr, streamFactory, connStatus, dialConfigCache := unicastManagerFixture(t) + mgr, streamFactory, unicastConfigCache := unicastManagerFixture(t) cfg, err := config.DefaultConfig() require.NoError(t, err) peerID := unittest.PeerIdFixture(t) - // mocks that the connection is not established. - connStatus.On("IsConnected", peerID).Return(false, nil) - // mocks that dialing the peer returns a no addresses error, and the mock is set to once, meaning that it won't retry dialing again. - streamFactory.On("Connect", mock.Anything, peer.AddrInfo{ID: peerID}). - Return(fmt.Errorf("some error to ensure wrapping works fine: %w", swarm.ErrNoAddresses)). + // mocks that stream creation returns a no addresses error, and the mock is set to once, meaning that it won't retry stream creation again. + streamFactory.On("NewStream", mock.Anything, peerID, mock.Anything). + Return(nil, fmt.Errorf("some error to ensure wrapping works fine: %w", swarm.ErrNoAddresses)). Once() ctx, cancel := context.WithCancel(context.Background()) @@ -465,32 +342,27 @@ func TestUnicastManager_StreamFactory_ErrNoAddresses(t *testing.T) { require.Error(t, err) require.Nil(t, s) - dialCfg, err := dialConfigCache.GetOrInit(peerID) + unicastCfg, err := unicastConfigCache.GetOrInit(peerID) require.NoError(t, err) - // dial backoff budget must be decremented by 1 (although we didn't have a backoff attempt, the connection was unsuccessful). - require.Equal(t, cfg.NetworkConfig.UnicastConfig.MaxDialRetryAttemptTimes-1, dialCfg.DialRetryAttemptBudget) - // stream backoff budget must remain intact, as we have not tried to create a stream yet. - require.Equal(t, cfg.NetworkConfig.UnicastConfig.MaxStreamCreationRetryAttemptTimes, dialCfg.StreamCreationRetryAttemptBudget) - // last successful dial must be set to zero. - require.True(t, dialCfg.LastSuccessfulDial.IsZero()) + + // stream backoff budget must be reduced by 1 due to failed stream creation. + require.Equal(t, cfg.NetworkConfig.UnicastConfig.MaxStreamCreationRetryAttemptTimes-1, unicastCfg.StreamCreationRetryAttemptBudget) // consecutive successful stream must be set to zero. - require.Equal(t, uint64(0), dialCfg.ConsecutiveSuccessfulStream) + require.Equal(t, uint64(0), unicastCfg.ConsecutiveSuccessfulStream) } -// TestUnicastManager_Dial_ErrSecurityProtocolNegotiationFailed tests that when there is a security protocol negotiation error, it does not retry dialing. -func TestUnicastManager_Dial_ErrSecurityProtocolNegotiationFailed(t *testing.T) { - mgr, streamFactory, connStatus, dialConfigCache := unicastManagerFixture(t) +// TestUnicastManager_Stream_ErrSecurityProtocolNegotiationFailed tests that when there is a security protocol negotiation error, it does not retry stream creation. +func TestUnicastManager_Stream_ErrSecurityProtocolNegotiationFailed(t *testing.T) { + mgr, streamFactory, unicastConfigCache := unicastManagerFixture(t) cfg, err := config.DefaultConfig() require.NoError(t, err) peerID := unittest.PeerIdFixture(t) - // mocks that the connection is not established. - connStatus.On("IsConnected", peerID).Return(false, nil) - // mocks that dialing the peer returns a security protocol negotiation error, and the mock is set to once, meaning that it won't retry dialing again. - streamFactory.On("Connect", mock.Anything, peer.AddrInfo{ID: peerID}). - Return(stream.NewSecurityProtocolNegotiationErr(peerID, fmt.Errorf("some error"))). + // mocks that stream creation returns a security protocol negotiation error, and the mock is set to once, meaning that it won't retry stream creation. + streamFactory.On("NewStream", mock.Anything, peerID, mock.Anything). + Return(nil, stream.NewSecurityProtocolNegotiationErr(peerID, fmt.Errorf("some error"))). Once() ctx, cancel := context.WithCancel(context.Background()) @@ -499,31 +371,25 @@ func TestUnicastManager_Dial_ErrSecurityProtocolNegotiationFailed(t *testing.T) require.Error(t, err) require.Nil(t, s) - dialCfg, err := dialConfigCache.GetOrInit(peerID) + unicastCfg, err := unicastConfigCache.GetOrInit(peerID) require.NoError(t, err) - // dial backoff budget must be decremented by 1 (although we didn't have a backoff attempt, the connection was unsuccessful). - require.Equal(t, cfg.NetworkConfig.UnicastConfig.MaxDialRetryAttemptTimes-1, dialCfg.DialRetryAttemptBudget) - // stream backoff budget must remain intact, as we have not tried to create a stream yet. - require.Equal(t, cfg.NetworkConfig.UnicastConfig.MaxStreamCreationRetryAttemptTimes, dialCfg.StreamCreationRetryAttemptBudget) - // last successful dial must be set to zero. - require.True(t, dialCfg.LastSuccessfulDial.IsZero()) + // stream retry budget must be decremented by 1 (since we didn't have a successful stream creation, the budget is decremented). + require.Equal(t, cfg.NetworkConfig.UnicastConfig.MaxStreamCreationRetryAttemptTimes-1, unicastCfg.StreamCreationRetryAttemptBudget) // consecutive successful stream must be set to zero. - require.Equal(t, uint64(0), dialCfg.ConsecutiveSuccessfulStream) + require.Equal(t, uint64(0), unicastCfg.ConsecutiveSuccessfulStream) } -// TestUnicastManager_Dial_ErrGaterDisallowedConnection tests that when there is a connection gater disallow listing error, it does not retry dialing. -func TestUnicastManager_Dial_ErrGaterDisallowedConnection(t *testing.T) { - mgr, streamFactory, connStatus, dialConfigCache := unicastManagerFixture(t) +// TestUnicastManager_StreamFactory_ErrGaterDisallowedConnection tests that when there is a connection-gater disallow listing error, it does not retry stream creation. +func TestUnicastManager_StreamFactory_ErrGaterDisallowedConnection(t *testing.T) { + mgr, streamFactory, unicastConfigCache := unicastManagerFixture(t) peerID := unittest.PeerIdFixture(t) - // mocks that the connection is not established. - connStatus.On("IsConnected", peerID).Return(false, nil) cfg, err := config.DefaultConfig() require.NoError(t, err) - // mocks that dialing the peer returns a security protocol negotiation error, and the mock is set to once, meaning that it won't retry dialing again. - streamFactory.On("Connect", mock.Anything, peer.AddrInfo{ID: peerID}). - Return(stream.NewGaterDisallowedConnectionErr(fmt.Errorf("some error"))). + // mocks that stream creation to the peer returns a connection gater disallow-listing, and the mock is set to once, meaning that it won't retry stream creation. + streamFactory.On("NewStream", mock.Anything, peerID, mock.Anything). + Return(nil, stream.NewGaterDisallowedConnectionErr(fmt.Errorf("some error"))). Once() ctx, cancel := context.WithCancel(context.Background()) @@ -532,85 +398,18 @@ func TestUnicastManager_Dial_ErrGaterDisallowedConnection(t *testing.T) { require.Error(t, err) require.Nil(t, s) - dialCfg, err := dialConfigCache.GetOrInit(peerID) + unicastCfg, err := unicastConfigCache.GetOrInit(peerID) require.NoError(t, err) - // dial backoff budget must be decremented by 1 (although we didn't have a backoff attempt, the connection was unsuccessful). - require.Equal(t, cfg.NetworkConfig.UnicastConfig.MaxDialRetryAttemptTimes-1, dialCfg.DialRetryAttemptBudget) - // stream backoff budget must remain intact, as we have not tried to create a stream yet. - require.Equal(t, cfg.NetworkConfig.UnicastConfig.MaxStreamCreationRetryAttemptTimes, dialCfg.StreamCreationRetryAttemptBudget) - // last successful dial must be set to zero. - require.True(t, dialCfg.LastSuccessfulDial.IsZero()) + // stream backoff budget must be reduced by 1 due to failed stream creation. + require.Equal(t, cfg.NetworkConfig.UnicastConfig.MaxStreamCreationRetryAttemptTimes-1, unicastCfg.StreamCreationRetryAttemptBudget) // consecutive successful stream must be set to zero. - require.Equal(t, uint64(0), dialCfg.ConsecutiveSuccessfulStream) + require.Equal(t, uint64(0), unicastCfg.ConsecutiveSuccessfulStream) } -// TestUnicastManager_Connection_BackoffBudgetDecremented tests that everytime the unicast manger gives up on creating a connection (after retrials), -// it decrements the backoff budget for the remote peer. -func TestUnicastManager_Connection_BackoffBudgetDecremented(t *testing.T) { - mgr, streamFactory, connStatus, dialConfigCache := unicastManagerFixture(t) - peerID := unittest.PeerIdFixture(t) - - cfg, err := config.DefaultConfig() - require.NoError(t, err) - - // totalAttempts is the total number of times that unicast manager calls Connect on the stream factory to dial the peer. - // Let's consider x = unicastmodel.MaxDialRetryAttemptTimes + 1. Then the test tries x times CreateStream. With dynamic backoffs, - // the first CreateStream call will try to Connect x times, the second CreateStream call will try to Connect x-1 times, - // and so on. So the total number of Connect calls is x + (x-1) + (x-2) + ... + 1 = x(x+1)/2. - // However, we also attempt one more time at the end of the test to CreateStream, when the backoff budget is 0. - maxDialRetryAttemptBudget := int(cfg.NetworkConfig.UnicastConfig.MaxDialRetryAttemptTimes) - attemptTimes := maxDialRetryAttemptBudget + 1 // 1 attempt + retry times - totalAttempts := attemptTimes * (attemptTimes + 1) / 2 - - connStatus.On("IsConnected", peerID).Return(false, nil) // not connected - streamFactory.On("Connect", mock.Anything, peer.AddrInfo{ID: peerID}). - Return(fmt.Errorf("some error")). - Times(int(totalAttempts)) - - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - for i := 0; i < maxDialRetryAttemptBudget; i++ { - s, err := mgr.CreateStream(ctx, peerID) - require.Error(t, err) - require.Nil(t, s) - - dialCfg, err := dialConfigCache.GetOrInit(peerID) - require.NoError(t, err) - - if i == maxDialRetryAttemptBudget-1 { - require.Equal(t, uint64(0), dialCfg.DialRetryAttemptBudget) - } else { - require.Equal(t, uint64(maxDialRetryAttemptBudget-i-1), dialCfg.DialRetryAttemptBudget) - } - - // The stream backoff budget must remain intact, as we have not tried to create a stream yet. - require.Equal(t, cfg.NetworkConfig.UnicastConfig.MaxStreamCreationRetryAttemptTimes, dialCfg.StreamCreationRetryAttemptBudget) - } - // At this time the backoff budget for connection must be 0. - dialCfg, err := dialConfigCache.GetOrInit(peerID) - require.NoError(t, err) - - require.Equal(t, uint64(0), dialCfg.DialRetryAttemptBudget) - // The stream backoff budget must remain intact, as we have not tried to create a stream yet. - require.Equal(t, cfg.NetworkConfig.UnicastConfig.MaxStreamCreationRetryAttemptTimes, dialCfg.StreamCreationRetryAttemptBudget) - - // After all the backoff budget is used up, it should stay at 0. - s, err := mgr.CreateStream(ctx, peerID) - require.Error(t, err) - require.Nil(t, s) - - dialCfg, err = dialConfigCache.GetOrInit(peerID) - require.NoError(t, err) - require.Equal(t, uint64(0), dialCfg.DialRetryAttemptBudget) - - // The stream backoff budget must remain intact, as we have not tried to create a stream yet. - require.Equal(t, cfg.NetworkConfig.UnicastConfig.MaxStreamCreationRetryAttemptTimes, dialCfg.StreamCreationRetryAttemptBudget) -} - -// TestUnicastManager_Connection_BackoffBudgetDecremented tests that everytime the unicast manger gives up on creating a connection (after retrials), +// TestUnicastManager_Connection_BackoffBudgetDecremented tests that everytime the unicast manger gives up on creating a stream (after retrials), // it decrements the backoff budget for the remote peer. func TestUnicastManager_Stream_BackoffBudgetDecremented(t *testing.T) { - mgr, streamFactory, connStatus, dialConfigCache := unicastManagerFixture(t) + mgr, streamFactory, unicastConfigCache := unicastManagerFixture(t) peerID := unittest.PeerIdFixture(t) cfg, err := config.DefaultConfig() @@ -625,7 +424,6 @@ func TestUnicastManager_Stream_BackoffBudgetDecremented(t *testing.T) { maxStreamAttempt := maxStreamRetryBudget + 1 // 1 attempt + retry times totalAttempts := maxStreamAttempt * (maxStreamAttempt + 1) / 2 - connStatus.On("IsConnected", peerID).Return(true, nil) // not connected streamFactory.On("NewStream", mock.Anything, peerID, mock.Anything). Return(nil, fmt.Errorf("some error")). Times(int(totalAttempts)) @@ -637,57 +435,47 @@ func TestUnicastManager_Stream_BackoffBudgetDecremented(t *testing.T) { require.Error(t, err) require.Nil(t, s) - dialCfg, err := dialConfigCache.GetOrInit(peerID) + unicastCfg, err := unicastConfigCache.GetOrInit(peerID) require.NoError(t, err) if i == int(maxStreamRetryBudget)-1 { - require.Equal(t, uint64(0), dialCfg.StreamCreationRetryAttemptBudget) + require.Equal(t, uint64(0), unicastCfg.StreamCreationRetryAttemptBudget) } else { - require.Equal(t, maxStreamRetryBudget-uint64(i)-1, dialCfg.StreamCreationRetryAttemptBudget) + require.Equal(t, maxStreamRetryBudget-uint64(i)-1, unicastCfg.StreamCreationRetryAttemptBudget) } - - // The dial backoff budget must remain intact, as we have not tried to create a stream yet. - require.Equal(t, cfg.NetworkConfig.UnicastConfig.MaxDialRetryAttemptTimes, dialCfg.DialRetryAttemptBudget) } // At this time the backoff budget for connection must be 0. - dialCfg, err := dialConfigCache.GetOrInit(peerID) + unicastCfg, err := unicastConfigCache.GetOrInit(peerID) require.NoError(t, err) - - require.Equal(t, uint64(0), dialCfg.StreamCreationRetryAttemptBudget) - // The dial backoff budget must remain intact, as we have not tried to create a stream yet. - require.Equal(t, cfg.NetworkConfig.UnicastConfig.MaxDialRetryAttemptTimes, dialCfg.DialRetryAttemptBudget) + require.Equal(t, uint64(0), unicastCfg.StreamCreationRetryAttemptBudget) // After all the backoff budget is used up, it should stay at 0. s, err := mgr.CreateStream(ctx, peerID) require.Error(t, err) require.Nil(t, s) - dialCfg, err = dialConfigCache.GetOrInit(peerID) + unicastCfg, err = unicastConfigCache.GetOrInit(peerID) require.NoError(t, err) - require.Equal(t, uint64(0), dialCfg.StreamCreationRetryAttemptBudget) - - // The dial backoff budget must remain intact, as we have not tried to create a stream yet. - require.Equal(t, cfg.NetworkConfig.UnicastConfig.MaxDialRetryAttemptTimes, dialCfg.DialRetryAttemptBudget) + require.Equal(t, uint64(0), unicastCfg.StreamCreationRetryAttemptBudget) } -// TestUnicastManager_StreamFactory_Connection_SuccessfulConnection_And_Stream tests that when there is no connection, and CreateStream is successful on the first attempt for connection and stream creation, -// it updates the last successful dial time and the consecutive successful stream counter. +// TestUnicastManager_Stream_BackoffBudgetResetToDefault tests that when the stream retry attempt budget is zero, and the consecutive successful stream counter is above the reset threshold, +// it resets the stream retry attempt budget to the default value and increments the consecutive successful stream counter. func TestUnicastManager_Stream_BackoffBudgetResetToDefault(t *testing.T) { - mgr, streamFactory, connStatus, dialConfigCache := unicastManagerFixture(t) + mgr, streamFactory, unicastConfigCache := unicastManagerFixture(t) peerID := unittest.PeerIdFixture(t) cfg, err := config.DefaultConfig() require.NoError(t, err) - connStatus.On("IsConnected", peerID).Return(true, nil) // there is a connection. // mocks that it attempts to create a stream once and succeeds. streamFactory.On("NewStream", mock.Anything, peerID, mock.Anything).Return(&p2ptest.MockStream{}, nil).Once() - // update the dial config of the peer to have a zero stream backoff budget but a consecutive successful stream counter above the reset threshold. - adjustedCfg, err := dialConfigCache.Adjust(peerID, func(dialConfig unicast.DialConfig) (unicast.DialConfig, error) { - dialConfig.StreamCreationRetryAttemptBudget = 0 - dialConfig.ConsecutiveSuccessfulStream = cfg.NetworkConfig.UnicastConfig.StreamZeroRetryResetThreshold + 1 - return dialConfig, nil + // update the unicast config of the peer to have a zero stream backoff budget but a consecutive successful stream counter above the reset threshold. + adjustedCfg, err := unicastConfigCache.Adjust(peerID, func(unicastConfig unicast.Config) (unicast.Config, error) { + unicastConfig.StreamCreationRetryAttemptBudget = 0 + unicastConfig.ConsecutiveSuccessfulStream = cfg.NetworkConfig.UnicastConfig.StreamZeroRetryResetThreshold + 1 + return unicastConfig, nil }) require.NoError(t, err) require.Equal(t, uint64(0), adjustedCfg.StreamCreationRetryAttemptBudget) @@ -700,135 +488,30 @@ func TestUnicastManager_Stream_BackoffBudgetResetToDefault(t *testing.T) { require.NoError(t, err) require.NotNil(t, s) - // The dial config must be updated with the backoff budget decremented. - dialCfg, err := dialConfigCache.GetOrInit(peerID) + unicastCfg, err := unicastConfigCache.GetOrInit(peerID) require.NoError(t, err) - require.Equal(t, cfg.NetworkConfig.UnicastConfig.MaxDialRetryAttemptTimes, dialCfg.DialRetryAttemptBudget) // dial backoff budget must be intact. - require.Equal(t, - cfg.NetworkConfig.UnicastConfig.MaxStreamCreationRetryAttemptTimes, - dialCfg.StreamCreationRetryAttemptBudget) // stream backoff budget must reset to default. - require.True(t, dialCfg.LastSuccessfulDial.IsZero()) // last successful dial must be intact. + // stream backoff budget must reset to default. + require.Equal(t, cfg.NetworkConfig.UnicastConfig.MaxStreamCreationRetryAttemptTimes, unicastCfg.StreamCreationRetryAttemptBudget) // consecutive successful stream must increment by 1 (it was threshold + 1 before). - require.Equal(t, cfg.NetworkConfig.UnicastConfig.StreamZeroRetryResetThreshold+1+1, dialCfg.ConsecutiveSuccessfulStream) + require.Equal(t, cfg.NetworkConfig.UnicastConfig.StreamZeroRetryResetThreshold+1+1, unicastCfg.ConsecutiveSuccessfulStream) } -// TestUnicastManager_StreamFactory_Connection_SuccessfulConnection_And_Stream tests that when there is no connection, and CreateStream is successful on the first attempt for connection and stream creation, -// it updates the last successful dial time and the consecutive successful stream counter. -func TestUnicastManager_Stream_BackoffConnectionBudgetResetToDefault(t *testing.T) { - mgr, streamFactory, connStatus, dialConfigCache := unicastManagerFixture(t) - peerID := unittest.PeerIdFixture(t) - - cfg, err := config.DefaultConfig() - require.NoError(t, err) - - connStatus.On("IsConnected", peerID).Return(false, nil) // there is no connection. - streamFactory.On("Connect", mock.Anything, peer.AddrInfo{ID: peerID}).Return(nil).Once() // connect on the first attempt. - // mocks that it attempts to create a stream once and succeeds. - streamFactory.On("NewStream", mock.Anything, peerID, mock.Anything).Return(&p2ptest.MockStream{}, nil).Once() - - // update the dial config of the peer to have a zero dial backoff budget but it has not been long enough since the last successful dial. - adjustedCfg, err := dialConfigCache.Adjust(peerID, func(dialConfig unicast.DialConfig) (unicast.DialConfig, error) { - dialConfig.DialRetryAttemptBudget = 0 - dialConfig.LastSuccessfulDial = time.Now().Add(-cfg.NetworkConfig.UnicastConfig.DialZeroRetryResetThreshold) - return dialConfig, nil - }) - require.NoError(t, err) - require.Equal(t, uint64(0), adjustedCfg.DialRetryAttemptBudget) - require.True(t, - adjustedCfg.LastSuccessfulDial.Before(time.Now().Add(-cfg.NetworkConfig.UnicastConfig.DialZeroRetryResetThreshold))) // last successful dial must be within the threshold. - - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - - dialTime := time.Now() - s, err := mgr.CreateStream(ctx, peerID) - require.NoError(t, err) - require.NotNil(t, s) - - // The dial config must be updated with the backoff budget decremented. - dialCfg, err := dialConfigCache.GetOrInit(peerID) - require.NoError(t, err) - require.Equal(t, cfg.NetworkConfig.UnicastConfig.MaxDialRetryAttemptTimes, dialCfg.DialRetryAttemptBudget) // dial backoff budget must be reset to default. - require.Equal(t, - cfg.NetworkConfig.UnicastConfig.MaxStreamCreationRetryAttemptTimes, - dialCfg.StreamCreationRetryAttemptBudget) // stream backoff budget must be intact. - require.True(t, - dialCfg.LastSuccessfulDial.After(dialTime)) // last successful dial must be updated when the dial was successful. - require.Equal(t, - uint64(1), - dialCfg.ConsecutiveSuccessfulStream) // consecutive successful stream must be incremented by 1 (0 -> 1). -} - -// TestUnicastManager_Connection_NoBackoff_When_Budget_Is_Zero tests that when there is no connection, and the dial backoff budget is zero and last successful dial is not within the zero reset threshold -// the unicast manager does not backoff if the dial attempt fails. -func TestUnicastManager_Connection_NoBackoff_When_Budget_Is_Zero(t *testing.T) { - mgr, streamFactory, connStatus, dialConfigCache := unicastManagerFixture(t) - peerID := unittest.PeerIdFixture(t) - - cfg, err := config.DefaultConfig() - require.NoError(t, err) - - connStatus.On("IsConnected", peerID).Return(false, nil) // there is no connection. - streamFactory.On("Connect", mock.Anything, peer.AddrInfo{ID: peerID}).Return(fmt.Errorf("some error")).Once() // connection is tried only once and fails. - - // update the dial config of the peer to have a zero dial backoff, and the last successful dial is not within the threshold. - adjustedCfg, err := dialConfigCache.Adjust(peerID, func(dialConfig unicast.DialConfig) (unicast.DialConfig, error) { - dialConfig.DialRetryAttemptBudget = 0 // set the dial backoff budget to 0, meaning that the dial backoff budget is exhausted. - dialConfig.LastSuccessfulDial = time.Now().Add(-10 * time.Minute) // last successful dial is not within the threshold. - dialConfig.ConsecutiveSuccessfulStream = 2 // set the consecutive successful stream to 2, meaning that the last 2 stream creation attempts were successful. - return dialConfig, nil - }) - require.NoError(t, err) - require.Equal(t, uint64(0), adjustedCfg.DialRetryAttemptBudget) - require.False(t, - adjustedCfg.LastSuccessfulDial.Before(time.Now().Add(-cfg.NetworkConfig.UnicastConfig.DialZeroRetryResetThreshold))) // last successful dial must not be within the threshold. - require.Equal(t, uint64(2), adjustedCfg.ConsecutiveSuccessfulStream) - - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - - s, err := mgr.CreateStream(ctx, peerID) - require.Error(t, err) - require.Nil(t, s) - - dialCfg, err := dialConfigCache.GetOrInit(peerID) - require.NoError(t, err) - require.Equal(t, uint64(0), dialCfg.DialRetryAttemptBudget) // dial backoff budget must remain at 0. - require.Equal(t, - cfg.NetworkConfig.UnicastConfig.MaxStreamCreationRetryAttemptTimes, - dialCfg.StreamCreationRetryAttemptBudget) // stream backoff budget must be intact. - require.True(t, - dialCfg.LastSuccessfulDial.IsZero()) // last successful dial must be set to zero. - require.Equal(t, - uint64(0), - dialCfg.ConsecutiveSuccessfulStream) // consecutive successful stream must be set to zero. -} - -// TestUnicastManager_Stream_NoBackoff_When_Budget_Is_Zero tests that when there is a connection, and the stream backoff budget is zero and the consecutive successful stream counter is not above the -// zero rest threshold, the unicast manager does not backoff if the dial attempt fails. +// TestUnicastManager_Stream_NoBackoff_When_Budget_Is_Zero tests that when the stream backoff budget is zero and the consecutive successful stream counter is not above the +// zero rest threshold, the unicast manager does not backoff if the stream creation attempt fails. func TestUnicastManager_Stream_NoBackoff_When_Budget_Is_Zero(t *testing.T) { - mgr, streamFactory, connStatus, dialConfigCache := unicastManagerFixture(t) + mgr, streamFactory, unicastConfigCache := unicastManagerFixture(t) peerID := unittest.PeerIdFixture(t) - cfg, err := config.DefaultConfig() - require.NoError(t, err) - - connStatus.On("IsConnected", peerID).Return(true, nil) // there is a connection. // mocks that it attempts to create a stream once and fails, and does not retry. streamFactory.On("NewStream", mock.Anything, peerID, mock.Anything).Return(nil, fmt.Errorf("some error")).Once() - // update the dial config of the peer to have a zero dial backoff, and the last successful dial is not within the threshold. - lastSuccessfulDial := time.Now().Add(-10 * time.Minute) - adjustedCfg, err := dialConfigCache.Adjust(peerID, func(dialConfig unicast.DialConfig) (unicast.DialConfig, error) { - dialConfig.LastSuccessfulDial = lastSuccessfulDial // last successful dial is not within the threshold. - dialConfig.ConsecutiveSuccessfulStream = 2 // set the consecutive successful stream to 2, which is below the reset threshold. - dialConfig.StreamCreationRetryAttemptBudget = 0 // set the stream backoff budget to 0, meaning that the stream backoff budget is exhausted. - return dialConfig, nil + adjustedCfg, err := unicastConfigCache.Adjust(peerID, func(unicastConfig unicast.Config) (unicast.Config, error) { + unicastConfig.ConsecutiveSuccessfulStream = 2 // set the consecutive successful stream to 2, which is below the reset threshold. + unicastConfig.StreamCreationRetryAttemptBudget = 0 // set the stream backoff budget to 0, meaning that the stream backoff budget is exhausted. + return unicastConfig, nil }) require.NoError(t, err) require.Equal(t, uint64(0), adjustedCfg.StreamCreationRetryAttemptBudget) - require.False(t, - adjustedCfg.LastSuccessfulDial.Before(time.Now().Add(-cfg.NetworkConfig.UnicastConfig.DialZeroRetryResetThreshold))) // last successful dial must not be within the threshold. require.Equal(t, uint64(2), adjustedCfg.ConsecutiveSuccessfulStream) ctx, cancel := context.WithCancel(context.Background()) @@ -838,112 +521,8 @@ func TestUnicastManager_Stream_NoBackoff_When_Budget_Is_Zero(t *testing.T) { require.Error(t, err) require.Nil(t, s) - dialCfg, err := dialConfigCache.GetOrInit(peerID) - require.NoError(t, err) - require.Equal(t, cfg.NetworkConfig.UnicastConfig.MaxDialRetryAttemptTimes, dialCfg.DialRetryAttemptBudget) // dial backoff budget must remain intact. - require.Equal(t, uint64(0), dialCfg.StreamCreationRetryAttemptBudget) // stream backoff budget must remain zero. - require.Equal(t, lastSuccessfulDial, dialCfg.LastSuccessfulDial) // last successful dial must be intact. - require.Equal(t, uint64(0), dialCfg.ConsecutiveSuccessfulStream) // consecutive successful stream must be set to zero. -} - -// TestUnicastManager_Dial_In_Progress_Backoff tests that when there is a dial in progress, the unicast manager back-offs concurrent CreateStream calls. -func TestUnicastManager_Dial_In_Progress_Backoff(t *testing.T) { - streamFactory := mockp2p.NewStreamFactory(t) - streamFactory.On("SetStreamHandler", mock.Anything, mock.Anything).Return().Once() - connStatus := mockp2p.NewPeerConnections(t) - - cfg, err := config.DefaultConfig() - require.NoError(t, err) - - dialConfigCache := unicastcache.NewDialConfigCache(cfg.NetworkConfig.UnicastConfig.DialConfigCacheSize, - unittest.Logger(), - metrics.NewNoopCollector(), - func() unicast.DialConfig { - return unicast.DialConfig{ - DialRetryAttemptBudget: cfg.NetworkConfig.UnicastConfig.MaxDialRetryAttemptTimes, - StreamCreationRetryAttemptBudget: cfg.NetworkConfig.UnicastConfig.MaxStreamCreationRetryAttemptTimes, - } - }) - collector := mockmetrics.NewNetworkMetrics(t) - mgr, err := unicast.NewUnicastManager(&unicast.ManagerConfig{ - Logger: unittest.Logger(), - StreamFactory: streamFactory, - SporkId: unittest.IdentifierFixture(), - ConnStatus: connStatus, - CreateStreamBackoffDelay: 1 * time.Millisecond, // overrides the default backoff delay to 1 millisecond to speed up the test. - Metrics: collector, - StreamZeroRetryResetThreshold: cfg.NetworkConfig.UnicastConfig.StreamZeroRetryResetThreshold, - DialZeroRetryResetThreshold: cfg.NetworkConfig.UnicastConfig.DialZeroRetryResetThreshold, - MaxStreamCreationRetryAttemptTimes: cfg.NetworkConfig.UnicastConfig.MaxStreamCreationRetryAttemptTimes, - MaxDialRetryAttemptTimes: cfg.NetworkConfig.UnicastConfig.MaxDialRetryAttemptTimes, - DialInProgressBackoffDelay: 1 * time.Millisecond, // overrides the default backoff delay to 1 millisecond to speed up the test. - DialBackoffDelay: cfg.NetworkConfig.UnicastConfig.DialBackoffDelay, - DialConfigCacheFactory: func(func() unicast.DialConfig) unicast.DialConfigCache { - return dialConfigCache - }, - }) - require.NoError(t, err) - mgr.SetDefaultHandler(func(libp2pnet.Stream) {}) // no-op handler, we don't care about the handler for this test - - testSucceeds := make(chan struct{}) - - // indicates whether OnStreamCreationFailure called with 1 attempt (this happens when dial fails), as the dial budget is 0, - // hence dial attempt is not retried after the first attempt. - streamCreationCalledFor1 := false - // indicates whether OnStreamCreationFailure called with 4 attempts (this happens when stream creation fails due to all backoff budget - // exhausted when there is another dial in progress). The stream creation retry budget is 3, so it will be called 4 times (1 attempt + 3 retries). - streamCreationCalledFor4 := false - - blockingDial := make(chan struct{}) - collector.On("OnStreamCreationFailure", mock.Anything, mock.Anything).Run(func(args mock.Arguments) { - attempts := args.Get(1).(int) - if attempts == 1 && !streamCreationCalledFor1 { // dial attempt is not retried after the first attempt. - streamCreationCalledFor1 = true - } else if attempts == 4 && !streamCreationCalledFor4 { // stream creation attempt is retried 3 times, and exhausts the budget. - close(blockingDial) // close the blocking dial to allow the dial to fail with an error. - streamCreationCalledFor4 = true - } else { - require.Fail(t, "unexpected attempt", "expected 1 or 4 (each once), got %d (maybe twice)", attempts) - } - if streamCreationCalledFor1 && streamCreationCalledFor4 { - close(testSucceeds) - } - }).Twice() - collector.On("OnPeerDialFailure", mock.Anything, mock.Anything).Once() - - peerID := unittest.PeerIdFixture(t) - adjustedCfg, err := dialConfigCache.Adjust(peerID, func(dialConfig unicast.DialConfig) (unicast.DialConfig, error) { - dialConfig.DialRetryAttemptBudget = 0 // set the dial backoff budget to 0, meaning that the dial backoff budget is exhausted. - dialConfig.StreamCreationRetryAttemptBudget = 3 // set the stream backoff budget to 3, meaning that the stream backoff budget is exhausted after 1 attempt + 3 retries. - return dialConfig, nil - }) + unicastCfg, err := unicastConfigCache.GetOrInit(peerID) require.NoError(t, err) - require.Equal(t, uint64(0), adjustedCfg.DialRetryAttemptBudget) - require.Equal(t, uint64(3), adjustedCfg.StreamCreationRetryAttemptBudget) - - connStatus.On("IsConnected", peerID).Return(false, nil) - streamFactory.On("Connect", mock.Anything, peer.AddrInfo{ID: peerID}). - Return(func(ctx context.Context, info peer.AddrInfo) error { - <-blockingDial // blocks the call to Connect until the test unblocks it, this is to simulate a dial in progress. - return fmt.Errorf("some error") // dial fails with an error when it is unblocked. - }). - Once() - - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - - // create 2 streams concurrently, the first one will block the dial and the second one will fail after 1 + 3 backoff attempts (4 attempts). - go func() { - s, err := mgr.CreateStream(ctx, peerID) - require.Error(t, err) - require.Nil(t, s) - }() - - go func() { - s, err := mgr.CreateStream(ctx, peerID) - require.Error(t, err) - require.Nil(t, s) - }() - - unittest.RequireCloseBefore(t, testSucceeds, 1*time.Second, "test timed out") + require.Equal(t, uint64(0), unicastCfg.StreamCreationRetryAttemptBudget) // stream backoff budget must remain zero. + require.Equal(t, uint64(0), unicastCfg.ConsecutiveSuccessfulStream) // consecutive successful stream must be set to zero. } diff --git a/network/p2p/unicast/retry.png b/network/p2p/unicast/retry.png index 72aa18752a3..c86edb3ce5f 100644 Binary files a/network/p2p/unicast/retry.png and b/network/p2p/unicast/retry.png differ diff --git a/network/p2p/unicast/stream/errors.go b/network/p2p/unicast/stream/errors.go index 9c73294c52b..725b5e45247 100644 --- a/network/p2p/unicast/stream/errors.go +++ b/network/p2p/unicast/stream/errors.go @@ -20,31 +20,34 @@ func (e ErrSecurityProtocolNegotiationFailed) Error() string { return fmt.Errorf("failed to dial remote peer %s in stream factory invalid node ID: %w", p2plogging.PeerId(e.pid), e.err).Error() } -// NewSecurityProtocolNegotiationErr returns a new ErrSecurityProtocolNegotiationFailed. -func NewSecurityProtocolNegotiationErr(pid peer.ID, err error) ErrSecurityProtocolNegotiationFailed { - return ErrSecurityProtocolNegotiationFailed{pid: pid, err: err} -} - // IsErrSecurityProtocolNegotiationFailed returns whether an error is ErrSecurityProtocolNegotiationFailed. func IsErrSecurityProtocolNegotiationFailed(err error) bool { var e ErrSecurityProtocolNegotiationFailed return errors.As(err, &e) } +// NewSecurityProtocolNegotiationErr returns a new ErrSecurityProtocolNegotiationFailed. +func NewSecurityProtocolNegotiationErr(pid peer.ID, err error) ErrSecurityProtocolNegotiationFailed { + return ErrSecurityProtocolNegotiationFailed{pid: pid, err: err} +} + // ErrProtocolNotSupported indicates node is running on a different spork. type ErrProtocolNotSupported struct { - peerID peer.ID - protocolIDS []protocol.ID - err error + peerID peer.ID + protocolID protocol.ID + err error } func (e ErrProtocolNotSupported) Error() string { - return fmt.Errorf("failed to dial remote peer %s remote node is running on a different spork: %w, protocol attempted: %s", p2plogging.PeerId(e.peerID), e.err, e.protocolIDS).Error() + return fmt.Errorf("failed to dial remote peer %s remote node is running on a different spork: %w, protocol attempted: %s", + p2plogging.PeerId(e.peerID), + e.err, + e.protocolID).Error() } // NewProtocolNotSupportedErr returns a new ErrSecurityProtocolNegotiationFailed. -func NewProtocolNotSupportedErr(peerID peer.ID, protocolIDS []protocol.ID, err error) ErrProtocolNotSupported { - return ErrProtocolNotSupported{peerID: peerID, protocolIDS: protocolIDS, err: err} +func NewProtocolNotSupportedErr(peerID peer.ID, protocolID protocol.ID, err error) ErrProtocolNotSupported { + return ErrProtocolNotSupported{peerID: peerID, protocolID: protocolID, err: err} } // IsErrProtocolNotSupported returns whether an error is ErrProtocolNotSupported. diff --git a/network/p2p/unicast/stream/factory.go b/network/p2p/unicast/stream/factory.go index 6e3b1804b4a..8336836d3a7 100644 --- a/network/p2p/unicast/stream/factory.go +++ b/network/p2p/unicast/stream/factory.go @@ -3,6 +3,7 @@ package stream import ( "context" "errors" + "fmt" "strings" "github.com/libp2p/go-libp2p/core/host" @@ -33,44 +34,59 @@ func (l *LibP2PStreamFactory) SetStreamHandler(pid protocol.ID, handler network. l.host.SetStreamHandler(pid, handler) } -// Connect connects host to peer with peerAddrInfo. -// All errors returned from this function can be considered benign. We expect the following errors during normal operations: +// NewStream establishes a new stream with the given peer using the provided protocol.ID on the libp2p host. +// This function is a critical part of the network communication, facilitating the creation of a dedicated +// bidirectional channel (stream) between two nodes in the network. +// If there exists no connection between the two nodes, the function attempts to establish one before creating the stream. +// If there are multiple connections between the two nodes, the function selects the best one (based on libp2p internal criteria) to create the stream. +// +// Usage: +// The function is intended to be used when there is a need to initiate a direct communication stream with a peer. +// It is typically invoked in scenarios where a node wants to send a message or start a series of messages to another +// node using a specific protocol. The protocol ID is used to ensure that both nodes communicate over the same +// protocol, which defines the structure and semantics of the communication. +// +// Expected errors: +// During normal operation, the function may encounter specific expected errors, which are handled as follows: +// +// - ErrProtocolNotSupported: This error occurs when the remote node does not support the specified protocol ID, +// which may indicate that the remote node is running a different version of the software or a different spork. +// The error contains details about the peer ID and the unsupported protocol, and it is generated when the +// underlying error message indicates a protocol mismatch. This is a critical error as it signifies that the +// two nodes cannot communicate using the requested protocol, and it must be handled by either retrying with +// a different protocol ID or by performing some form of negotiation or fallback. +// // - ErrSecurityProtocolNegotiationFailed this indicates there was an issue upgrading the connection. +// // - ErrGaterDisallowedConnection this indicates the connection was disallowed by the gater. -// - There may be other unexpected errors from libp2p but they should be considered benign. -func (l *LibP2PStreamFactory) Connect(ctx context.Context, peerAddrInfo peer.AddrInfo) error { - // libp2p internally uses swarm dial - https://github.com/libp2p/go-libp2p-swarm/blob/master/swarm_dial.go - // to connect to a peer. Swarm dial adds a back off each time it fails connecting to a peer. While this is - // the desired behaviour for pub-sub (1-k style of communication) for 1-1 style we want to retry the connection - // immediately without backing off and fail-fast. - // Hence, explicitly cancel the dial back off (if any) and try connecting again - if swm, ok := l.host.Network().(*swarm.Swarm); ok { - swm.Backoff().Clear(peerAddrInfo.ID) - } - - err := l.host.Connect(ctx, peerAddrInfo) +// +// - Any other error returned by the libp2p host: This error indicates that the stream creation failed due to +// some unexpected error, which may be caused by a variety of reasons. This is NOT a critical error, and it +// can be handled by retrying the stream creation or by performing some other action. Crashing node upon this +// error is NOT recommended. +// +// Arguments: +// - ctx: A context.Context that governs the lifetime of the stream creation. It can be used to cancel the +// operation or to set deadlines. +// - p: The peer.ID of the target node with which the stream is to be established. +// - pid: The protocol.ID that specifies the communication protocol to be used for the stream. +// +// Returns: +// - network.Stream: The successfully created stream, ready for reading and writing, or nil if an error occurs. +// - error: An error encountered during stream creation, wrapped in a contextually appropriate error type when necessary, +// or nil if the operation is successful. +func (l *LibP2PStreamFactory) NewStream(ctx context.Context, p peer.ID, pid protocol.ID) (network.Stream, error) { + s, err := l.host.NewStream(ctx, p, pid) switch { case err == nil: - return nil + return s, nil + case strings.Contains(err.Error(), protocolNotSupportedStr): + return nil, NewProtocolNotSupportedErr(p, pid, err) case strings.Contains(err.Error(), protocolNegotiationFailedStr): - return NewSecurityProtocolNegotiationErr(peerAddrInfo.ID, err) + return nil, NewSecurityProtocolNegotiationErr(p, err) case errors.Is(err, swarm.ErrGaterDisallowedConnection): - return NewGaterDisallowedConnectionErr(err) + return nil, NewGaterDisallowedConnectionErr(err) default: - return err - } -} - -// NewStream creates a new stream on the libp2p host. -// Expected errors during normal operations: -// - ErrProtocolNotSupported this indicates remote node is running on a different spork. -func (l *LibP2PStreamFactory) NewStream(ctx context.Context, p peer.ID, pids ...protocol.ID) (network.Stream, error) { - s, err := l.host.NewStream(ctx, p, pids...) - if err != nil { - if strings.Contains(err.Error(), protocolNotSupportedStr) { - return nil, NewProtocolNotSupportedErr(p, pids, err) - } - return nil, err + return nil, fmt.Errorf("failed to create stream: %w", err) } - return s, err } diff --git a/network/validator/target_validator.go b/network/validator/target_validator.go index 5a9b1ab73f9..d02901b166e 100644 --- a/network/validator/target_validator.go +++ b/network/validator/target_validator.go @@ -35,7 +35,8 @@ func (tv *TargetValidator) Validate(msg network.IncomingMessageScope) bool { } } tv.log.Debug(). - Hex("target", logging.ID(tv.target)). + Hex("message_target_id", logging.ID(tv.target)). + Hex("local_node_id", logging.ID(tv.target)). Hex("event_id", msg.EventID()). Msg("message not intended for target") return false diff --git a/state/protocol/badger/snapshot_test.go b/state/protocol/badger/snapshot_test.go index b484beaff55..19df022ea82 100644 --- a/state/protocol/badger/snapshot_test.go +++ b/state/protocol/badger/snapshot_test.go @@ -946,7 +946,7 @@ func TestLatestSealedResult(t *testing.T) { expectedResult, expectedSeal, err := rootSnapshot.SealedResult() require.NoError(t, err) - assert.Equal(t, expectedResult, gotResult) + assert.Equal(t, expectedResult.ID(), gotResult.ID()) assert.Equal(t, expectedSeal, gotSeal) }) }) diff --git a/state/protocol/events.go b/state/protocol/events.go index e97c4f7c84c..c8dcf460159 100644 --- a/state/protocol/events.go +++ b/state/protocol/events.go @@ -56,15 +56,14 @@ type Consumer interface { // the current epoch. This is equivalent to the end of the epoch staking // phase for the current epoch. // - // Referencing the diagram below, the event is emitted when block c is incorporated. - // The block parameter is the first block of the epoch setup phase (block c). + // Referencing the diagram below, the event is emitted when block b is finalized. + // The block parameter is the first block of the epoch setup phase (block b). // // |<-- Epoch N ------------------------------------------------->| // |<-- StakingPhase -->|<-- SetupPhase -->|<-- CommittedPhase -->| // ^--- block A - this block's execution result contains an EpochSetup event - // ^--- block b - contains seal for block A - // ^--- block c - contains qc for block b, first block of Setup phase - // ^--- block d - finalizes block c, triggers EpochSetupPhaseStarted event + // ^--- block b - contains seal for block A, first block of Setup phase + // ^--- block c - finalizes block b, triggers EpochSetupPhaseStarted event // // NOTE: Only called once the phase transition has been finalized. EpochSetupPhaseStarted(currentEpochCounter uint64, first *flow.Header) @@ -73,16 +72,14 @@ type Consumer interface { // for the current epoch. This is equivalent to the end of the epoch setup // phase for the current epoch. // - // Referencing the diagram below, the event is emitted when block f is received. - // The block parameter is the first block of the epoch committed phase (block f). + // Referencing the diagram below, the event is emitted when block e is finalized. + // The block parameter is the first block of the epoch committed phase (block e). // // |<-- Epoch N ------------------------------------------------->| // |<-- StakingPhase -->|<-- SetupPhase -->|<-- CommittedPhase -->| // ^--- block D - this block's execution result contains an EpochCommit event - // ^--- block e - contains seal for block D - // ^--- block f - contains qc for block e, first block of Committed phase - // ^--- block g - finalizes block f, triggers EpochCommittedPhaseStarted event - /// + // ^--- block e - contains seal for block D, first block of Committed phase + // ^--- block f - finalizes block e, triggers EpochCommittedPhaseStarted event // // NOTE: Only called once the phase transition has been finalized. EpochCommittedPhaseStarted(currentEpochCounter uint64, first *flow.Header) diff --git a/state/protocol/validity_test.go b/state/protocol/validity_test.go index 79935aebcce..be17ab66306 100644 --- a/state/protocol/validity_test.go +++ b/state/protocol/validity_test.go @@ -50,7 +50,7 @@ func TestEpochSetupValidity(t *testing.T) { t.Run("short seed", func(t *testing.T) { _, result, _ := unittest.BootstrapFixture(participants) setup := result.ServiceEvents[0].Event.(*flow.EpochSetup) - setup.RandomSource = unittest.SeedFixture(crypto.SeedMinLenDKG - 1) + setup.RandomSource = unittest.SeedFixture(crypto.KeyGenSeedMinLen - 1) err := protocol.IsValidEpochSetup(setup, true) require.Error(t, err) diff --git a/storage/badger/all.go b/storage/badger/all.go index 9ee0aa24002..8d1a718044c 100644 --- a/storage/badger/all.go +++ b/storage/badger/all.go @@ -19,7 +19,8 @@ func InitAll(metrics module.CacheMetrics, db *badger.DB) *storage.All { qcs := NewQuorumCertificates(metrics, db, DefaultCacheSize) setups := NewEpochSetups(metrics, db) epochCommits := NewEpochCommits(metrics, db) - protocolState := NewProtocolState(metrics, setups, epochCommits, db, DefaultCacheSize) + protocolState := NewProtocolState(metrics, setups, epochCommits, db, + DefaultProtocolStateCacheSize, DefaultProtocolStateByBlockIDCacheSize) versionBeacons := NewVersionBeacons(db) commits := NewCommits(metrics, db) diff --git a/storage/badger/events_test.go b/storage/badger/events_test.go index c5d2bf706eb..cb0e956395c 100644 --- a/storage/badger/events_test.go +++ b/storage/badger/events_test.go @@ -60,8 +60,7 @@ func TestEventStoreRetrieve(t *testing.T) { require.Len(t, actual, 1) require.Contains(t, actual, evt2_1) - events, err := systemcontracts.ServiceEventsForChain(flow.Emulator) - require.NoError(t, err) + events := systemcontracts.ServiceEventsForChain(flow.Emulator) actual, err = store.ByBlockIDEventType(blockID, events.EpochSetup.EventType()) require.NoError(t, err) diff --git a/storage/badger/headers.go b/storage/badger/headers.go index bfdaaa320df..49574e5abc9 100644 --- a/storage/badger/headers.go +++ b/storage/badger/headers.go @@ -52,12 +52,12 @@ func NewHeaders(collector module.CacheMetrics, db *badger.DB) *Headers { h := &Headers{ db: db, - cache: newCache[flow.Identifier, *flow.Header](collector, metrics.ResourceHeader, + cache: newCache(collector, metrics.ResourceHeader, withLimit[flow.Identifier, *flow.Header](4*flow.DefaultTransactionExpiry), withStore(store), withRetrieve(retrieve)), - heightCache: newCache[uint64, flow.Identifier](collector, metrics.ResourceFinalizedHeight, + heightCache: newCache(collector, metrics.ResourceFinalizedHeight, withLimit[uint64, flow.Identifier](4*flow.DefaultTransactionExpiry), withStore(storeHeight), withRetrieve(retrieveHeight)), diff --git a/storage/badger/protocol_state.go b/storage/badger/protocol_state.go index 4b3663dd429..b5d588cbd30 100644 --- a/storage/badger/protocol_state.go +++ b/storage/badger/protocol_state.go @@ -15,26 +15,63 @@ import ( "github.com/onflow/flow-go/storage/badger/transaction" ) -// ProtocolState implements persistent storage for storing identity table instances. +// DefaultProtocolStateCacheSize is the default size for primary protocol state cache. +// Minimally, we have 3 entries per epoch (one on epoch Switchover, one on receiving the Epoch Setup and one when seeing the Epoch Commit event). +// Lets be generous and assume we have 20 different Protocol States per epoch. +var DefaultProtocolStateCacheSize uint = 20 + +// DefaultProtocolStateByBlockIDCacheSize is the default value for secondary byBlockIdCache. +// We want to be able to cover a broad interval of views without cache misses, so we use a bigger value. +var DefaultProtocolStateByBlockIDCacheSize uint = 1000 + +// ProtocolState implements persistent storage for storing Protocol States. // Protocol state uses an embedded cache without storing capabilities(store happens on first retrieval) to avoid unnecessary -// operations and to speed up access to frequently used identity tables. -// TODO: update naming to IdentityTable +// operations and to speed up access to frequently used Protocol State. type ProtocolState struct { - db *badger.DB + db *badger.DB + + // cache is essentially an in-memory map from `ProtocolStateEntry.ID()` -> `RichProtocolStateEntry` + // We do _not_ populate this cache which holds the RichProtocolStateEntrys on store. This is because + // (i) we don't have the RichProtocolStateEntry on store readily available and + // (ii) new RichProtocolStateEntry are really rare throughout an epoch, so the total cost of populating + // the cache becomes negligible over several views. + // In the future, we might want to populate the cache on store, if we want to maintain frequently-changing + // information in the protocol state, like the latest sealed block. This should be a smaller amount of work, + // because the `ProtocolStateEntry` is generated by `StateMutator.Build()`. The `StateMutator` should already + // have the needed Epoch Setup and Commit events, since it starts with a RichProtocolStateEntry for the parent + // state and consumes Epoch Setup and Epoch Commit events. Though, we leave this optimization for later. + // + // `cache` only holds the distinct Protocol States. On the happy path, we expect something like 3 entries per epoch. + // On the optimal happy path we have 3 entries per epoch: one entry on epoch Switchover, one on receiving the Epoch Setup + // and one when seeing the Epoch Commit event. Let's be generous and assume we have 20 different Protocol States per epoch. + // Beyond that, we are certainly leaving the domain of normal operations that we optimize for. Therefore, a cache size of + // roughly 100 is a reasonable balance between performance and memory consumption. cache *Cache[flow.Identifier, *flow.RichProtocolStateEntry] + + // byBlockIdCache is essentially an in-memory map from `Block.ID()` -> `ProtocolStateEntry.ID()`. The full + // Protocol state can be retrieved from the `cache` above. + // We populate the `byBlockIdCache` on store, because a new entry is added for every block and we probably also + // query the Protocol state for every block. So argument (ii) from above does not apply here. Furthermore, + // argument (i) from above also does not apply, because we already have the Protocol State's ID on store, + // so populating the cache is easy. + // + // `byBlockIdCache` will contain an entry for every block. We want to be able to cover a broad interval of views + // without cache misses, so a cache size of roughly 1000 entries is reasonable. + byBlockIdCache *Cache[flow.Identifier, flow.Identifier] } var _ storage.ProtocolState = (*ProtocolState)(nil) -// NewProtocolState creates a ProtocolState instance, which is a database of identity table instances. +// NewProtocolState creates a ProtocolState instance, which is a database of Protocol State. // It supports storing, caching and retrieving by ID or the additionally indexed block ID. func NewProtocolState(collector module.CacheMetrics, epochSetups storage.EpochSetups, epochCommits storage.EpochCommits, db *badger.DB, - cacheSize uint, + stateCacheSize uint, + stateByBlockIDCacheSize uint, ) *ProtocolState { - retrieve := func(protocolStateID flow.Identifier) func(tx *badger.Txn) (*flow.RichProtocolStateEntry, error) { + retrieveByProtocolStateID := func(protocolStateID flow.Identifier) func(tx *badger.Txn) (*flow.RichProtocolStateEntry, error) { var protocolStateEntry flow.ProtocolStateEntry return func(tx *badger.Txn) (*flow.RichProtocolStateEntry, error) { err := operation.RetrieveProtocolState(protocolStateID, &protocolStateEntry)(tx) @@ -43,26 +80,53 @@ func NewProtocolState(collector module.CacheMetrics, } result, err := newRichProtocolStateEntry(&protocolStateEntry, epochSetups, epochCommits) if err != nil { - return nil, fmt.Errorf("could not create rich identity table entry: %w", err) + return nil, fmt.Errorf("could not create rich protocol state entry: %w", err) } return result, nil } } + storeByBlockID := func(blockID flow.Identifier, protocolStateID flow.Identifier) func(*transaction.Tx) error { + return func(tx *transaction.Tx) error { + err := transaction.WithTx(operation.IndexProtocolState(blockID, protocolStateID))(tx) + if err != nil { + return fmt.Errorf("could not index protocol state for block (%x): %w", blockID[:], err) + } + return nil + } + } + + retrieveByBlockID := func(blockID flow.Identifier) func(tx *badger.Txn) (flow.Identifier, error) { + return func(tx *badger.Txn) (flow.Identifier, error) { + var protocolStateID flow.Identifier + err := operation.LookupProtocolState(blockID, &protocolStateID)(tx) + if err != nil { + return flow.ZeroID, fmt.Errorf("could not lookup protocol state ID for block (%x): %w", blockID[:], err) + } + return protocolStateID, nil + } + } + return &ProtocolState{ db: db, cache: newCache[flow.Identifier, *flow.RichProtocolStateEntry](collector, metrics.ResourceProtocolState, - withLimit[flow.Identifier, *flow.RichProtocolStateEntry](cacheSize), + withLimit[flow.Identifier, *flow.RichProtocolStateEntry](stateCacheSize), withStore(noopStore[flow.Identifier, *flow.RichProtocolStateEntry]), - withRetrieve(retrieve)), + withRetrieve(retrieveByProtocolStateID)), + byBlockIdCache: newCache[flow.Identifier, flow.Identifier](collector, metrics.ResourceProtocolStateByBlockID, + withLimit[flow.Identifier, flow.Identifier](stateByBlockIDCacheSize), + withStore(storeByBlockID), + withRetrieve(retrieveByBlockID)), } } -// StoreTx allows us to store an identity table as part of a DB tx, while still going through the caching layer. -// Per convention, the given Identity Table must be in canonical order, otherwise an exception is returned. -// Expected error returns during normal operations: -// - storage.ErrAlreadyExists if an Identity Table with the given id is already stored -func (s *ProtocolState) StoreTx(id flow.Identifier, protocolState *flow.ProtocolStateEntry) func(*transaction.Tx) error { +// StoreTx returns an anonymous function (intended to be executed as part of a badger transaction), +// which persists the given protocol state as part of a DB tx. Per convention, the identities in +// the Protocol State must be in canonical order for the current and next epoch (if present), +// otherwise an exception is returned. +// Expected errors of the returned anonymous function: +// - storage.ErrAlreadyExists if a Protocol State with the given id is already stored +func (s *ProtocolState) StoreTx(protocolStateID flow.Identifier, protocolState *flow.ProtocolStateEntry) func(*transaction.Tx) error { // front-load sanity checks: if !protocolState.CurrentEpoch.ActiveIdentities.Sorted(order.IdentifierCanonical) { return transaction.Fail(fmt.Errorf("sanity check failed: identities are not sorted")) @@ -72,60 +136,56 @@ func (s *ProtocolState) StoreTx(id flow.Identifier, protocolState *flow.Protocol } // happy path: return anonymous function, whose future execution (as part of a transaction) will store the protocolState - return transaction.WithTx(operation.InsertProtocolState(id, protocolState)) + return transaction.WithTx(operation.InsertProtocolState(protocolStateID, protocolState)) } -// Index indexes the identity table by block ID. -// Error returns: -// - storage.ErrAlreadyExists if an identity table for the given blockID has already been indexed +// Index returns an anonymous function that is intended to be executed as part of a database transaction. +// In a nutshell, we want to maintain a map from `blockID` to `protocolStateID`, where `blockID` references the +// block that _proposes_ the Protocol State. +// Upon call, the anonymous function persists the specific map entry in the node's database. +// Protocol convention: +// - Consider block B, whose ingestion might potentially lead to an updated protocol state. For example, +// the protocol state changes if we seal some execution results emitting service events. +// - For the key `blockID`, we use the identity of block B which _proposes_ this Protocol State. As value, +// the hash of the resulting protocol state at the end of processing B is to be used. +// - CAUTION: The protocol state requires confirmation by a QC and will only become active at the child block, +// _after_ validating the QC. +// +// Expected errors during normal operations: +// - storage.ErrAlreadyExists if a Protocol State for the given blockID has already been indexed func (s *ProtocolState) Index(blockID flow.Identifier, protocolStateID flow.Identifier) func(*transaction.Tx) error { - return func(tx *transaction.Tx) error { - err := transaction.WithTx(operation.IndexProtocolState(blockID, protocolStateID))(tx) - if err != nil { - return fmt.Errorf("could not index identity table for block (%x): %w", blockID[:], err) - } - return nil - } + return s.byBlockIdCache.PutTx(blockID, protocolStateID) } -// ByID retrieves the identity table by its ID. -// Error returns: -// - storage.ErrNotFound if no identity table with the given ID exists -func (s *ProtocolState) ByID(id flow.Identifier) (*flow.RichProtocolStateEntry, error) { +// ByID returns the protocol state by its ID. +// Expected errors during normal operations: +// - storage.ErrNotFound if no protocol state with the given Identifier is known. +func (s *ProtocolState) ByID(protocolStateID flow.Identifier) (*flow.RichProtocolStateEntry, error) { tx := s.db.NewTransaction(false) defer tx.Discard() - return s.byID(id)(tx) + return s.cache.Get(protocolStateID)(tx) } -// ByBlockID retrieves the identity table by the respective block ID. -// TODO: clarify whether the blockID is the block that defines this identity table or the _child_ block where the identity table is applied. CAUTION: surface for bugs! -// Error returns: -// - storage.ErrNotFound if no identity table for the given blockID exists +// ByBlockID retrieves the Protocol State that the block with the given ID proposes. +// CAUTION: this protocol state requires confirmation by a QC and will only become active at the child block, +// _after_ validating the QC. Protocol convention: +// - Consider block B, whose ingestion might potentially lead to an updated protocol state. For example, +// the protocol state changes if we seal some execution results emitting service events. +// - For the key `blockID`, we use the identity of block B which _proposes_ this Protocol State. As value, +// the hash of the resulting protocol state at the end of processing B is to be used. +// - CAUTION: The protocol state requires confirmation by a QC and will only become active at the child block, +// _after_ validating the QC. +// +// Expected errors during normal operations: +// - storage.ErrNotFound if no protocol state has been indexed for the given block. func (s *ProtocolState) ByBlockID(blockID flow.Identifier) (*flow.RichProtocolStateEntry, error) { tx := s.db.NewTransaction(false) defer tx.Discard() - return s.byBlockID(blockID)(tx) -} - -// byID retrieves the identity table by its ID. Error returns: -// - storage.ErrNotFound if no identity table with the given ID exists -func (s *ProtocolState) byID(protocolStateID flow.Identifier) func(*badger.Txn) (*flow.RichProtocolStateEntry, error) { - return s.cache.Get(protocolStateID) -} - -// byBlockID retrieves the identity table by the respective block ID. -// TODO: clarify whether the blockID is the block that defines this identity table or the _child_ block where the identity table is applied. CAUTION: surface for bugs! -// Error returns: -// - storage.ErrNotFound if no identity table for the given blockID exists -func (s *ProtocolState) byBlockID(blockID flow.Identifier) func(*badger.Txn) (*flow.RichProtocolStateEntry, error) { - return func(tx *badger.Txn) (*flow.RichProtocolStateEntry, error) { - var protocolStateID flow.Identifier - err := operation.LookupProtocolState(blockID, &protocolStateID)(tx) - if err != nil { - return nil, fmt.Errorf("could not lookup identity table ID for block (%x): %w", blockID[:], err) - } - return s.byID(protocolStateID)(tx) + protocolStateID, err := s.byBlockIdCache.Get(blockID)(tx) + if err != nil { + return nil, fmt.Errorf("could not lookup protocol state ID for block (%x): %w", blockID[:], err) } + return s.cache.Get(protocolStateID)(tx) } // newRichProtocolStateEntry constructs a rich protocol state entry from a protocol state entry. diff --git a/storage/badger/protocol_state_test.go b/storage/badger/protocol_state_test.go index 701d4e282e9..857b7fd6900 100644 --- a/storage/badger/protocol_state_test.go +++ b/storage/badger/protocol_state_test.go @@ -22,7 +22,7 @@ func TestProtocolStateStorage(t *testing.T) { setups := NewEpochSetups(metrics, db) commits := NewEpochCommits(metrics, db) - store := NewProtocolState(metrics, setups, commits, db, DefaultCacheSize) + store := NewProtocolState(metrics, setups, commits, db, DefaultProtocolStateCacheSize, DefaultProtocolStateByBlockIDCacheSize) expected := unittest.ProtocolStateFixture(unittest.WithNextEpochProtocolState()) protocolStateID := expected.ID() @@ -73,7 +73,7 @@ func TestProtocolStateStoreInvalidProtocolState(t *testing.T) { metrics := metrics.NewNoopCollector() setups := NewEpochSetups(metrics, db) commits := NewEpochCommits(metrics, db) - store := NewProtocolState(metrics, setups, commits, db, DefaultCacheSize) + store := NewProtocolState(metrics, setups, commits, db, DefaultProtocolStateCacheSize, DefaultProtocolStateByBlockIDCacheSize) invalid := unittest.ProtocolStateFixture().ProtocolStateEntry // swap first and second elements to break canonical order invalid.CurrentEpoch.ActiveIdentities[0], invalid.CurrentEpoch.ActiveIdentities[1] = invalid.CurrentEpoch.ActiveIdentities[1], invalid.CurrentEpoch.ActiveIdentities[0] @@ -99,7 +99,7 @@ func TestProtocolStateMergeParticipants(t *testing.T) { setups := NewEpochSetups(metrics, db) commits := NewEpochCommits(metrics, db) - store := NewProtocolState(metrics, setups, commits, db, DefaultCacheSize) + store := NewProtocolState(metrics, setups, commits, db, DefaultProtocolStateCacheSize, DefaultProtocolStateByBlockIDCacheSize) stateEntry := unittest.ProtocolStateFixture() // change address of participant in current epoch, so we can distinguish it from the one in previous epoch @@ -148,7 +148,7 @@ func TestProtocolStateRootSnapshot(t *testing.T) { setups := NewEpochSetups(metrics, db) commits := NewEpochCommits(metrics, db) - store := NewProtocolState(metrics, setups, commits, db, DefaultCacheSize) + store := NewProtocolState(metrics, setups, commits, db, DefaultProtocolStateCacheSize, DefaultProtocolStateByBlockIDCacheSize) expected := unittest.RootProtocolStateFixture() protocolStateID := expected.ID() diff --git a/storage/mock/protocol_state.go b/storage/mock/protocol_state.go index 3173ff84b0c..407bbc8a699 100644 --- a/storage/mock/protocol_state.go +++ b/storage/mock/protocol_state.go @@ -82,13 +82,13 @@ func (_m *ProtocolState) Index(blockID flow.Identifier, protocolStateID flow.Ide return r0 } -// StoreTx provides a mock function with given fields: id, protocolState -func (_m *ProtocolState) StoreTx(id flow.Identifier, protocolState *flow.ProtocolStateEntry) func(*transaction.Tx) error { - ret := _m.Called(id, protocolState) +// StoreTx provides a mock function with given fields: protocolStateID, protocolState +func (_m *ProtocolState) StoreTx(protocolStateID flow.Identifier, protocolState *flow.ProtocolStateEntry) func(*transaction.Tx) error { + ret := _m.Called(protocolStateID, protocolState) var r0 func(*transaction.Tx) error if rf, ok := ret.Get(0).(func(flow.Identifier, *flow.ProtocolStateEntry) func(*transaction.Tx) error); ok { - r0 = rf(id, protocolState) + r0 = rf(protocolStateID, protocolState) } else { if ret.Get(0) != nil { r0 = ret.Get(0).(func(*transaction.Tx) error) diff --git a/storage/protocol_state.go b/storage/protocol_state.go index b338cf0a46c..9fa905d3924 100644 --- a/storage/protocol_state.go +++ b/storage/protocol_state.go @@ -6,35 +6,47 @@ import ( ) // ProtocolState represents persistent storage for protocol state entries. -// TODO: update naming to IdentityTable type ProtocolState interface { // StoreTx returns an anonymous function (intended to be executed as part of a badger transaction), - // which persists the given protocol state as part of a DB tx. - // Per convention, the given Identity Table must be in canonical order, otherwise an exception is returned. + // which persists the given protocol state as part of a DB tx. Per convention, the identities in + // the Protocol State must be in canonical order for the current and next epoch (if present), + // otherwise an exception is returned. // Expected errors of the returned anonymous function: - // - storage.ErrAlreadyExists if a protocol state snapshot has already been stored under the given id. - StoreTx(id flow.Identifier, protocolState *flow.ProtocolStateEntry) func(*transaction.Tx) error + // - storage.ErrAlreadyExists if a Protocol State with the given id is already stored + StoreTx(protocolStateID flow.Identifier, protocolState *flow.ProtocolStateEntry) func(*transaction.Tx) error // Index returns an anonymous function that is intended to be executed as part of a database transaction. - // In a nutshell, we want to maintain a map from `blockID` to `protocolStateID`. + // In a nutshell, we want to maintain a map from `blockID` to `protocolStateID`, where `blockID` references the + // block that _proposes_ the Protocol State. // Upon call, the anonymous function persists the specific map entry in the node's database. // Protocol convention: // - Consider block B, whose ingestion might potentially lead to an updated protocol state. For example, // the protocol state changes if we seal some execution results emitting service events. - // - For the key `blockID`, we use the identity of block B. As value, the hash of the resulting protocol - // state at the end of processing B is to be used. + // - For the key `blockID`, we use the identity of block B which _proposes_ this Protocol State. As value, + // the hash of the resulting protocol state at the end of processing B is to be used. + // - CAUTION: The protocol state requires confirmation by a QC and will only become active at the child block, + // _after_ validating the QC. + // // Expected errors during normal operations: - // - storage.ErrAlreadyExists if the key already exists in the database. + // - storage.ErrAlreadyExists if a Protocol State for the given blockID has already been indexed Index(blockID flow.Identifier, protocolStateID flow.Identifier) func(*transaction.Tx) error // ByID returns the protocol state by its ID. // Expected errors during normal operations: - // * `storage.ErrNotFound` if no protocol state snapshot with the given Identifier is known. + // - storage.ErrNotFound if no protocol state with the given Identifier is known. ByID(id flow.Identifier) (*flow.RichProtocolStateEntry, error) - // ByBlockID retrieves the identity table by the respective block ID. - // TODO: clarify whether the blockID is the block that defines this identity table or the _child_ block where the identity table is applied. CAUTION: surface for bugs! + // ByBlockID retrieves the Protocol State that the block with the given ID proposes. + // CAUTION: this protocol state requires confirmation by a QC and will only become active at the child block, + // _after_ validating the QC. Protocol convention: + // - Consider block B, whose ingestion might potentially lead to an updated protocol state. For example, + // the protocol state changes if we seal some execution results emitting service events. + // - For the key `blockID`, we use the identity of block B which _proposes_ this Protocol State. As value, + // the hash of the resulting protocol state at the end of processing B is to be used. + // - CAUTION: The protocol state requires confirmation by a QC and will only become active at the child block, + // _after_ validating the QC. + // // Expected errors during normal operations: - // * `storage.ErrNotFound` if no snapshot of the protocol state has been indexed for the given block. + // - storage.ErrNotFound if no protocol state has been indexed for the given block. ByBlockID(blockID flow.Identifier) (*flow.RichProtocolStateEntry, error) } diff --git a/tools/test_monitor/level1/process_summary1_results_test.go b/tools/test_monitor/level1/process_summary1_results_test.go index c64f8442995..6e7b12f0551 100644 --- a/tools/test_monitor/level1/process_summary1_results_test.go +++ b/tools/test_monitor/level1/process_summary1_results_test.go @@ -33,19 +33,19 @@ func TestGenerateLevel1Summary_Struct(t *testing.T) { RawJSONTestRunFile: "test-result-crypto-hash-1-count-skip-pass.json", }, - // raw results generated with: go test -json -count 1 --tags relic ./utils/unittest/... + // raw results generated with: go test -json -count 1 ./utils/unittest/... "2 count all pass": { ExpectedLevel1Summary: testdata.GetTestData_Level1_2CountPass(), RawJSONTestRunFile: "test-result-crypto-hash-2-count-pass.json", }, - // raw results generated with: go test -json -count 1 --tags relic ./utils/unittest/... + // raw results generated with: go test -json -count 1 ./utils/unittest/... "10 count all pass": { ExpectedLevel1Summary: testdata.GetTestData_Level1_10CountPass(), RawJSONTestRunFile: "test-result-crypto-hash-10-count-pass.json", }, - // raw results generated with: go test -json -count 1 --tags relic ./utils/unittest/... + // raw results generated with: go test -json -count 1 ./utils/unittest/... "10 count some failures": { ExpectedLevel1Summary: testdata.GetTestData_Level1_10CountSomeFailures(), RawJSONTestRunFile: "test-result-crypto-hash-10-count-fail.json", @@ -54,14 +54,14 @@ func TestGenerateLevel1Summary_Struct(t *testing.T) { // no result tests - tests below don't generate pass/fail result due to `go test` bug // with using `fmt.printf("log message")` without newline `\n` - // raw results generated with: go test -v -tags relic -count=1 -json ./model/encodable/. -test.run TestEncodableRandomBeaconPrivKeyMsgPack + // raw results generated with: go test -v -count=1 -json ./model/encodable/. -test.run TestEncodableRandomBeaconPrivKeyMsgPack // this is a single unit test that produces a no result "1 count single no result test": { ExpectedLevel1Summary: testdata.GetTestData_Level1_1CountSingleExceptionTest(), RawJSONTestRunFile: "test-result-exception-single-1-count-pass.json", }, - //raw results generated with: go test -v -tags relic -count=5 -json ./model/encodable/. -test.run TestEncodableRandomBeaconPrivKeyMsgPack + //raw results generated with: go test -v -count=5 -json ./model/encodable/. -test.run TestEncodableRandomBeaconPrivKeyMsgPack //multiple no result tests in a row "5 no result tests in a row": { ExpectedLevel1Summary: testdata.GetTestData_Level1_5CountSingleExceptionTest(), @@ -74,7 +74,7 @@ func TestGenerateLevel1Summary_Struct(t *testing.T) { RawJSONTestRunFile: "test-result-exception-single-5-count-4-nil-1-normal-pass.json", }, - // raw results generated with: go test -v -tags relic -count=3 -json ./model/encodable/. + // raw results generated with: go test -v -count=3 -json ./model/encodable/. // group of unit tests with a single no result test "3 count no result test with normal tests": { ExpectedLevel1Summary: testdata.GetTestData_Leve1_3CountExceptionWithNormalTests(), diff --git a/utils/binstat/binstat_external_test.go b/utils/binstat/binstat_external_test.go index 9ffa7b23065..10f8b911ff9 100644 --- a/utils/binstat/binstat_external_test.go +++ b/utils/binstat/binstat_external_test.go @@ -28,7 +28,7 @@ import ( * 5. Strip "time" field from JSON log line output for shorter read, and * 6. Show the amount of code coverage from the tests. * - * pushd utils/binstat ; go fmt ./*.go ; golangci-lint run && go test -v -vv -coverprofile=coverage.txt -covermode=atomic --tags relic ./... | perl -lane 's~\\n~\n~g; s~"time".*?,~~g; print;' ; go tool cover -func=coverage.txt ; popd + * pushd utils/binstat ; go fmt ./*.go ; golangci-lint run && go test -v -vv -coverprofile=coverage.txt -covermode=atomic ./... | perl -lane 's~\\n~\n~g; s~"time".*?,~~g; print;' ; go tool cover -func=coverage.txt ; popd */ /* diff --git a/utils/unittest/fixtures.go b/utils/unittest/fixtures.go index fc98df92263..0b20697972f 100644 --- a/utils/unittest/fixtures.go +++ b/utils/unittest/fixtures.go @@ -1459,6 +1459,14 @@ func TransactionDSLFixture(chain flow.Chain) dsl.Transaction { } } +// RegisterIDFixture returns a RegisterID with a fixed key and owner +func RegisterIDFixture() flow.RegisterID { + return flow.RegisterID{ + Owner: "owner", + Key: "key", + } +} + // VerifiableChunkDataFixture returns a complete verifiable chunk with an // execution receipt referencing the block/collections. func VerifiableChunkDataFixture(chunkIndex uint64) *verification.VerifiableChunkData { @@ -2836,7 +2844,7 @@ func P2PRPCPruneFixture(topic *string) *pubsub_pb.ControlPrune { } } -// P2PRPCIHaveFixtures returns n number of control message rpc iHave fixtures with m number of message ids each. +// P2PRPCIHaveFixtures returns n number of control message where n = len(topics) rpc iHave fixtures with m number of message ids each. func P2PRPCIHaveFixtures(m int, topics ...string) []*pubsub_pb.ControlIHave { n := len(topics) ihaves := make([]*pubsub_pb.ControlIHave, n) @@ -2921,14 +2929,14 @@ func P2PRPCFixture(opts ...RPCFixtureOpt) *pubsub.RPC { return rpc } -func WithTopic(topic string) func(*pubsub_pb.Message) { +func WithFrom(pid peer.ID) func(*pubsub_pb.Message) { return func(msg *pubsub_pb.Message) { - msg.Topic = &topic + msg.From = []byte(pid) } } // GossipSubMessageFixture returns a gossip sub message fixture for the specified topic. -func GossipSubMessageFixture(t *testing.T, s string, opts ...func(*pubsub_pb.Message)) *pubsub_pb.Message { +func GossipSubMessageFixture(s string, opts ...func(*pubsub_pb.Message)) *pubsub_pb.Message { pb := &pubsub_pb.Message{ From: RandomBytes(32), Data: RandomBytes(32), @@ -2946,10 +2954,10 @@ func GossipSubMessageFixture(t *testing.T, s string, opts ...func(*pubsub_pb.Mes } // GossipSubMessageFixtures returns a list of gossipsub message fixtures. -func GossipSubMessageFixtures(t *testing.T, n int, topic string, opts ...func(*pubsub_pb.Message)) []*pubsub_pb.Message { +func GossipSubMessageFixtures(n int, topic string, opts ...func(*pubsub_pb.Message)) []*pubsub_pb.Message { msgs := make([]*pubsub_pb.Message, n) for i := 0; i < n; i++ { - msgs[i] = GossipSubMessageFixture(t, topic, opts...) + msgs[i] = GossipSubMessageFixture(topic, opts...) } return msgs } @@ -2968,3 +2976,25 @@ func LibP2PResourceLimitOverrideFixture() p2pconf.ResourceManagerOverrideLimit { Memory: rand.Intn(1000), } } + +func RegisterEntryFixture() flow.RegisterEntry { + val := make([]byte, 4) + _, _ = crand.Read(val) + return flow.RegisterEntry{ + Key: flow.RegisterID{ + Owner: "owner", + Key: "key1", + }, + Value: val, + } +} + +func MakeOwnerReg(key string, value string) flow.RegisterEntry { + return flow.RegisterEntry{ + Key: flow.RegisterID{ + Owner: "owner", + Key: key, + }, + Value: []byte(value), + } +} diff --git a/utils/unittest/fvm.go b/utils/unittest/fvm.go index 51e3229cdbc..f0df9e0fbcb 100644 --- a/utils/unittest/fvm.go +++ b/utils/unittest/fvm.go @@ -10,7 +10,7 @@ import ( ) func IsServiceEvent(event flow.Event, chainID flow.ChainID) bool { - serviceEvents, _ := systemcontracts.ServiceEventsForChain(chainID) + serviceEvents := systemcontracts.ServiceEventsForChain(chainID) for _, serviceEvent := range serviceEvents.All() { if serviceEvent.EventType() == event.Type { return true diff --git a/utils/unittest/logging.go b/utils/unittest/logging.go index ee9dd762b77..a200a61525e 100644 --- a/utils/unittest/logging.go +++ b/utils/unittest/logging.go @@ -30,7 +30,7 @@ func Logger() zerolog.Logger { writer = os.Stderr } - return LoggerWithWriterAndLevel(writer, zerolog.DebugLevel) + return LoggerWithWriterAndLevel(writer, zerolog.TraceLevel) } func LoggerWithWriterAndLevel(writer io.Writer, level zerolog.Level) zerolog.Logger { diff --git a/utils/unittest/service_events_fixtures.go b/utils/unittest/service_events_fixtures.go index b56bd3cd88c..2273eaf735c 100644 --- a/utils/unittest/service_events_fixtures.go +++ b/utils/unittest/service_events_fixtures.go @@ -15,10 +15,7 @@ import ( // EpochSetupFixtureByChainID returns an EpochSetup service event as a Cadence event // representation and as a protocol model representation. func EpochSetupFixtureByChainID(chain flow.ChainID) (flow.Event, *flow.EpochSetup) { - events, err := systemcontracts.ServiceEventsForChain(chain) - if err != nil { - panic(err) - } + events := systemcontracts.ServiceEventsForChain(chain) event := EventFixture(events.EpochSetup.EventType(), 1, 1, IdentifierFixture(), 0) event.Payload = EpochSetupFixtureCCF @@ -114,10 +111,7 @@ func EpochSetupFixtureByChainID(chain flow.ChainID) (flow.Event, *flow.EpochSetu // representation and as a protocol model representation. func EpochCommitFixtureByChainID(chain flow.ChainID) (flow.Event, *flow.EpochCommit) { - events, err := systemcontracts.ServiceEventsForChain(chain) - if err != nil { - panic(err) - } + events := systemcontracts.ServiceEventsForChain(chain) event := EventFixture(events.EpochCommit.EventType(), 1, 1, IdentifierFixture(), 0) event.Payload = EpochCommitFixtureCCF @@ -153,10 +147,7 @@ func EpochCommitFixtureByChainID(chain flow.ChainID) (flow.Event, *flow.EpochCom // representation and as a protocol model representation. func VersionBeaconFixtureByChainID(chain flow.ChainID) (flow.Event, *flow.VersionBeacon) { - events, err := systemcontracts.ServiceEventsForChain(chain) - if err != nil { - panic(err) - } + events := systemcontracts.ServiceEventsForChain(chain) event := EventFixture(events.VersionBeacon.EventType(), 1, 1, IdentifierFixture(), 0) event.Payload = VersionBeaconFixtureCCF diff --git a/utils/unittest/unittest.go b/utils/unittest/unittest.go index 0ad7a8736e4..9fba23ccd69 100644 --- a/utils/unittest/unittest.go +++ b/utils/unittest/unittest.go @@ -1,7 +1,6 @@ package unittest import ( - crand "crypto/rand" "encoding/json" "math" "math/rand" @@ -18,15 +17,16 @@ import ( "github.com/cockroachdb/pebble" "github.com/dgraph-io/badger/v2" "github.com/libp2p/go-libp2p/core/peer" - "github.com/multiformats/go-multihash" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + "github.com/onflow/flow-go/crypto" "github.com/onflow/flow-go/model/flow" "github.com/onflow/flow-go/module" "github.com/onflow/flow-go/module/util" "github.com/onflow/flow-go/network" cborcodec "github.com/onflow/flow-go/network/codec/cbor" + "github.com/onflow/flow-go/network/p2p/keyutils" "github.com/onflow/flow-go/network/topology" ) @@ -453,15 +453,43 @@ func GenerateRandomStringWithLen(commentLen uint) string { return string(bytes) } -// PeerIdFixture returns a random peer ID for testing. -// peer ID is the identifier of a node on the libp2p network. -func PeerIdFixture(t testing.TB) peer.ID { - buf := make([]byte, 16) - n, err := crand.Read(buf) - require.NoError(t, err) - require.Equal(t, 16, n) - h, err := multihash.Sum(buf, multihash.SHA2_256, -1) - require.NoError(t, err) +// PeerIdFixture creates a random and unique peer ID (libp2p node ID). +func PeerIdFixture(tb testing.TB) peer.ID { + peerID, err := peerIDFixture() + require.NoError(tb, err) + return peerID +} + +func peerIDFixture() (peer.ID, error) { + key, err := generateNetworkingKey(IdentifierFixture()) + if err != nil { + return "", err + } + pubKey, err := keyutils.LibP2PPublicKeyFromFlow(key.PublicKey()) + if err != nil { + return "", err + } + + peerID, err := peer.IDFromPublicKey(pubKey) + if err != nil { + return "", err + } + + return peerID, nil +} - return peer.ID(h) +// generateNetworkingKey generates a Flow ECDSA key using the given seed +func generateNetworkingKey(s flow.Identifier) (crypto.PrivateKey, error) { + seed := make([]byte, crypto.KeyGenSeedMinLen) + copy(seed, s[:]) + return crypto.GeneratePrivateKey(crypto.ECDSASecp256k1, seed) +} + +// PeerIdFixtures creates random and unique peer IDs (libp2p node IDs). +func PeerIdFixtures(t *testing.T, n int) []peer.ID { + peerIDs := make([]peer.ID, n) + for i := 0; i < n; i++ { + peerIDs[i] = PeerIdFixture(t) + } + return peerIDs }