diff --git a/bazel/import_llvm.bzl b/bazel/import_llvm.bzl index 214fdf097..08b84a691 100644 --- a/bazel/import_llvm.bzl +++ b/bazel/import_llvm.bzl @@ -7,7 +7,7 @@ load( def import_llvm(name): """Imports LLVM.""" - LLVM_COMMIT = "bf17016a92bc8a23d2cdd2b51355dd4eb5019c68" + LLVM_COMMIT = "13c761789753862a7cc31a2a26f23010afa668b9" new_git_repository( name = name, diff --git a/patches/llvm.patch b/patches/llvm.patch index 3d2a2525c..8b54ffba7 100644 --- a/patches/llvm.patch +++ b/patches/llvm.patch @@ -1,207 +1,1156 @@ Auto generated patch. Do not edit or delete it, even if empty. -diff -ruN --strip-trailing-cr a/mlir/include/mlir/IR/TypeRange.h b/mlir/include/mlir/IR/TypeRange.h ---- a/mlir/include/mlir/IR/TypeRange.h -+++ b/mlir/include/mlir/IR/TypeRange.h -@@ -29,12 +29,11 @@ - /// a SmallVector/std::vector. This class should be used in places that are not - /// suitable for a more derived type (e.g. ArrayRef) or a template range - /// parameter. --class TypeRange -- : public llvm::detail::indexed_accessor_range_base< -- TypeRange, -- llvm::PointerUnion, -- Type, Type, Type> { -+class TypeRange : public llvm::detail::indexed_accessor_range_base< -+ TypeRange, -+ llvm::PointerUnion, -+ Type, Type, Type> { - public: - using RangeBaseT::RangeBaseT; - TypeRange(ArrayRef types = std::nullopt); -@@ -45,11 +44,8 @@ - TypeRange(ValueTypeRange values) - : TypeRange(ValueRange(ValueRangeT(values.begin().getCurrent(), - values.end().getCurrent()))) {} -- -- TypeRange(Type type) : TypeRange(type, /*count=*/1) {} -- template , Arg> && -- !std::is_constructible_v>> -+ template , Arg>::value>> - TypeRange(Arg &&arg) : TypeRange(ArrayRef(std::forward(arg))) {} - TypeRange(std::initializer_list types) - : TypeRange(ArrayRef(types)) {} -@@ -60,9 +56,8 @@ - /// * A pointer to the first element of an array of types. - /// * A pointer to the first element of an array of operands. - /// * A pointer to the first element of an array of results. -- /// * A single 'Type' instance. - using OwnerT = llvm::PointerUnion; -+ detail::OpResultImpl *>; - - /// See `llvm::detail::indexed_accessor_range_base` for details. - static OwnerT offset_base(OwnerT object, ptrdiff_t index); -diff -ruN --strip-trailing-cr a/mlir/include/mlir/IR/ValueRange.h b/mlir/include/mlir/IR/ValueRange.h ---- a/mlir/include/mlir/IR/ValueRange.h -+++ b/mlir/include/mlir/IR/ValueRange.h -@@ -374,16 +374,16 @@ - /// SmallVector/std::vector. This class should be used in places that are not - /// suitable for a more derived type (e.g. ArrayRef) or a template range - /// parameter. --class ValueRange final : public llvm::detail::indexed_accessor_range_base< -- ValueRange, -- PointerUnion, -- Value, Value, Value> { -+class ValueRange final -+ : public llvm::detail::indexed_accessor_range_base< -+ ValueRange, -+ PointerUnion, -+ Value, Value, Value> { - public: - /// The type representing the owner of a ValueRange. This is either a list of -- /// values, operands, or results or a single value. -+ /// values, operands, or results. - using OwnerT = -- PointerUnion; -+ PointerUnion; - - using RangeBaseT::RangeBaseT; - -@@ -392,7 +392,7 @@ - std::is_constructible, Arg>::value && - !std::is_convertible::value>> - ValueRange(Arg &&arg) : ValueRange(ArrayRef(std::forward(arg))) {} -- ValueRange(Value value) : ValueRange(value, /*count=*/1) {} -+ ValueRange(const Value &value) : ValueRange(&value, /*count=*/1) {} - ValueRange(const std::initializer_list &values) - : ValueRange(ArrayRef(values)) {} - ValueRange(iterator_range values) -diff -ruN --strip-trailing-cr a/mlir/lib/IR/OperationSupport.cpp b/mlir/lib/IR/OperationSupport.cpp ---- a/mlir/lib/IR/OperationSupport.cpp -+++ b/mlir/lib/IR/OperationSupport.cpp -@@ -653,15 +653,6 @@ - /// See `llvm::detail::indexed_accessor_range_base` for details. - ValueRange::OwnerT ValueRange::offset_base(const OwnerT &owner, - ptrdiff_t index) { -- if (llvm::isa_and_nonnull(owner)) { -- // Prevent out-of-bounds indexing for single values. -- // Note that we do allow an index of 1 as is required by 'slice'ing that -- // returns an empty range. This also matches the usual rules of C++ of being -- // allowed to index past the last element of an array. -- assert(index <= 1 && "out-of-bound offset into single-value 'ValueRange'"); -- // Return nullptr to quickly cause segmentation faults on misuse. -- return index == 0 ? owner : nullptr; -- } - if (const auto *value = llvm::dyn_cast_if_present(owner)) - return {value + index}; - if (auto *operand = llvm::dyn_cast_if_present(owner)) -@@ -670,10 +661,6 @@ +diff -ruN --strip-trailing-cr a/flang/include/flang/Optimizer/Builder/HLFIRTools.h b/flang/include/flang/Optimizer/Builder/HLFIRTools.h +--- a/flang/include/flang/Optimizer/Builder/HLFIRTools.h ++++ b/flang/include/flang/Optimizer/Builder/HLFIRTools.h +@@ -513,12 +513,6 @@ + Entity loadElementAt(mlir::Location loc, fir::FirOpBuilder &builder, + Entity entity, mlir::ValueRange oneBasedIndices); + +-/// Return a vector of extents for the given entity. +-/// The function creates new operations, but tries to clean-up +-/// after itself. +-llvm::SmallVector +-genExtentsVector(mlir::Location loc, fir::FirOpBuilder &builder, Entity entity); +- + } // namespace hlfir + + #endif // FORTRAN_OPTIMIZER_BUILDER_HLFIRTOOLS_H +diff -ruN --strip-trailing-cr a/flang/lib/Optimizer/Builder/HLFIRTools.cpp b/flang/lib/Optimizer/Builder/HLFIRTools.cpp +--- a/flang/lib/Optimizer/Builder/HLFIRTools.cpp ++++ b/flang/lib/Optimizer/Builder/HLFIRTools.cpp +@@ -1421,15 +1421,3 @@ + return loadTrivialScalar(loc, builder, + getElementAt(loc, builder, entity, oneBasedIndices)); } - /// See `llvm::detail::indexed_accessor_range_base` for details. - Value ValueRange::dereference_iterator(const OwnerT &owner, ptrdiff_t index) { -- if (auto value = llvm::dyn_cast_if_present(owner)) { -- assert(index == 0 && "cannot offset into single-value 'ValueRange'"); -- return value; +- +-llvm::SmallVector +-hlfir::genExtentsVector(mlir::Location loc, fir::FirOpBuilder &builder, +- hlfir::Entity entity) { +- entity = hlfir::derefPointersAndAllocatables(loc, builder, entity); +- mlir::Value shape = hlfir::genShape(loc, builder, entity); +- llvm::SmallVector extents = +- hlfir::getExplicitExtentsFromShape(shape, builder); +- if (shape.getUses().empty()) +- shape.getDefiningOp()->erase(); +- return extents; +-} +diff -ruN --strip-trailing-cr a/flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp b/flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp +--- a/flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp ++++ b/flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp +@@ -37,79 +37,6 @@ + + namespace { + +-// Helper class to generate operations related to computing +-// product of values. +-class ProductFactory { +-public: +- ProductFactory(mlir::Location loc, fir::FirOpBuilder &builder) +- : loc(loc), builder(builder) {} +- +- // Generate an update of the inner product value: +- // acc += v1 * v2, OR +- // acc += CONJ(v1) * v2, OR +- // acc ||= v1 && v2 +- // +- // CONJ parameter specifies whether the first complex product argument +- // needs to be conjugated. +- template +- mlir::Value genAccumulateProduct(mlir::Value acc, mlir::Value v1, +- mlir::Value v2) { +- mlir::Type resultType = acc.getType(); +- acc = castToProductType(acc, resultType); +- v1 = castToProductType(v1, resultType); +- v2 = castToProductType(v2, resultType); +- mlir::Value result; +- if (mlir::isa(resultType)) { +- result = builder.create( +- loc, acc, builder.create(loc, v1, v2)); +- } else if (mlir::isa(resultType)) { +- if constexpr (CONJ) +- result = fir::IntrinsicLibrary{builder, loc}.genConjg(resultType, v1); +- else +- result = v1; +- +- result = builder.create( +- loc, acc, builder.create(loc, result, v2)); +- } else if (mlir::isa(resultType)) { +- result = builder.create( +- loc, acc, builder.create(loc, v1, v2)); +- } else if (mlir::isa(resultType)) { +- result = builder.create( +- loc, acc, builder.create(loc, v1, v2)); +- } else { +- llvm_unreachable("unsupported type"); +- } +- +- return builder.createConvert(loc, resultType, result); - } - if (const auto *value = llvm::dyn_cast_if_present(owner)) - return value[index]; - if (auto *operand = llvm::dyn_cast_if_present(owner)) -diff -ruN --strip-trailing-cr a/mlir/lib/IR/TypeRange.cpp b/mlir/lib/IR/TypeRange.cpp ---- a/mlir/lib/IR/TypeRange.cpp -+++ b/mlir/lib/IR/TypeRange.cpp -@@ -31,23 +31,12 @@ - this->base = result; - else if (auto *operand = llvm::dyn_cast_if_present(owner)) - this->base = operand; -- else if (auto value = llvm::dyn_cast_if_present(owner)) -- this->base = value.getType(); - else - this->base = cast(owner); - } +- +-private: +- mlir::Location loc; +- fir::FirOpBuilder &builder; +- +- mlir::Value castToProductType(mlir::Value value, mlir::Type type) { +- if (mlir::isa(type)) +- return builder.createConvert(loc, builder.getIntegerType(1), value); +- +- // TODO: the multiplications/additions by/of zero resulting from +- // complex * real are optimized by LLVM under -fno-signed-zeros +- // -fno-honor-nans. +- // We can make them disappear by default if we: +- // * either expand the complex multiplication into real +- // operations, OR +- // * set nnan nsz fast-math flags to the complex operations. +- if (fir::isa_complex(type) && !fir::isa_complex(value.getType())) { +- mlir::Value zeroCmplx = fir::factory::createZeroValue(builder, loc, type); +- fir::factory::Complex helper(builder, loc); +- mlir::Type partType = helper.getComplexPartType(type); +- return helper.insertComplexPart(zeroCmplx, +- castToProductType(value, partType), +- /*isImagPart=*/false); +- } +- return builder.createConvert(loc, type, value); +- } +-}; +- + class TransposeAsElementalConversion + : public mlir::OpRewritePattern { + public: +@@ -163,8 +90,11 @@ + static mlir::Value genResultShape(mlir::Location loc, + fir::FirOpBuilder &builder, + hlfir::Entity array) { +- llvm::SmallVector inExtents = +- hlfir::genExtentsVector(loc, builder, array); ++ mlir::Value inShape = hlfir::genShape(loc, builder, array); ++ llvm::SmallVector inExtents = ++ hlfir::getExplicitExtentsFromShape(inShape, builder); ++ if (inShape.getUses().empty()) ++ inShape.getDefiningOp()->erase(); + + // transpose indices + assert(inExtents.size() == 2 && "checked in TransposeOp::validate"); +@@ -207,7 +137,7 @@ + mlir::Value resultShape, dimExtent; + llvm::SmallVector arrayExtents; + if (isTotalReduction) +- arrayExtents = hlfir::genExtentsVector(loc, builder, array); ++ arrayExtents = genArrayExtents(loc, builder, array); + else + std::tie(resultShape, dimExtent) = + genResultShapeForPartialReduction(loc, builder, array, dimVal); +@@ -233,8 +163,7 @@ + // If DIM is not present, do total reduction. + + // Initial value for the reduction. +- mlir::Value reductionInitValue = +- fir::factory::createZeroValue(builder, loc, elementType); ++ mlir::Value reductionInitValue = genInitValue(loc, builder, elementType); + + // The reduction loop may be unordered if FastMathFlags::reassoc + // transformations are allowed. The integer reduction is always +@@ -335,6 +264,17 @@ + } + + private: ++ static llvm::SmallVector ++ genArrayExtents(mlir::Location loc, fir::FirOpBuilder &builder, ++ hlfir::Entity array) { ++ mlir::Value inShape = hlfir::genShape(loc, builder, array); ++ llvm::SmallVector inExtents = ++ hlfir::getExplicitExtentsFromShape(inShape, builder); ++ if (inShape.getUses().empty()) ++ inShape.getDefiningOp()->erase(); ++ return inExtents; ++ } ++ + // Return fir.shape specifying the shape of the result + // of a SUM reduction with DIM=dimVal. The second return value + // is the extent of the DIM dimension. +@@ -343,7 +283,7 @@ + fir::FirOpBuilder &builder, + hlfir::Entity array, int64_t dimVal) { + llvm::SmallVector inExtents = +- hlfir::genExtentsVector(loc, builder, array); ++ genArrayExtents(loc, builder, array); + assert(dimVal > 0 && dimVal <= static_cast(inExtents.size()) && + "DIM must be present and a positive constant not exceeding " + "the array's rank"); +@@ -353,6 +293,26 @@ + return {builder.create(loc, inExtents), dimExtent}; + } + ++ // Generate the initial value for a SUM reduction with the given ++ // data type. ++ static mlir::Value genInitValue(mlir::Location loc, ++ fir::FirOpBuilder &builder, ++ mlir::Type elementType) { ++ if (auto ty = mlir::dyn_cast(elementType)) { ++ const llvm::fltSemantics &sem = ty.getFloatSemantics(); ++ return builder.createRealConstant(loc, elementType, ++ llvm::APFloat::getZero(sem)); ++ } else if (auto ty = mlir::dyn_cast(elementType)) { ++ mlir::Value initValue = genInitValue(loc, builder, ty.getElementType()); ++ return fir::factory::Complex{builder, loc}.createComplex(ty, initValue, ++ initValue); ++ } else if (mlir::isa(elementType)) { ++ return builder.createIntegerConstant(loc, elementType, 0); ++ } ++ ++ llvm_unreachable("unsupported SUM reduction type"); ++ } ++ + // Generate scalar addition of the two values (of the same data type). + static mlir::Value genScalarAdd(mlir::Location loc, + fir::FirOpBuilder &builder, +@@ -610,10 +570,16 @@ + static std::tuple + genResultShape(mlir::Location loc, fir::FirOpBuilder &builder, + hlfir::Entity input1, hlfir::Entity input2) { +- llvm::SmallVector input1Extents = +- hlfir::genExtentsVector(loc, builder, input1); +- llvm::SmallVector input2Extents = +- hlfir::genExtentsVector(loc, builder, input2); ++ mlir::Value input1Shape = hlfir::genShape(loc, builder, input1); ++ llvm::SmallVector input1Extents = ++ hlfir::getExplicitExtentsFromShape(input1Shape, builder); ++ if (input1Shape.getUses().empty()) ++ input1Shape.getDefiningOp()->erase(); ++ mlir::Value input2Shape = hlfir::genShape(loc, builder, input2); ++ llvm::SmallVector input2Extents = ++ hlfir::getExplicitExtentsFromShape(input2Shape, builder); ++ if (input2Shape.getUses().empty()) ++ input2Shape.getDefiningOp()->erase(); - /// See `llvm::detail::indexed_accessor_range_base` for details. - TypeRange::OwnerT TypeRange::offset_base(OwnerT object, ptrdiff_t index) { -- if (llvm::isa_and_nonnull(object)) { -- // Prevent out-of-bounds indexing for single values. -- // Note that we do allow an index of 1 as is required by 'slice'ing that -- // returns an empty range. This also matches the usual rules of C++ of being -- // allowed to index past the last element of an array. -- assert(index <= 1 && "out-of-bound offset into single-value 'ValueRange'"); -- // Return nullptr to quickly cause segmentation faults on misuse. -- return index == 0 ? object : nullptr; + llvm::SmallVector newExtents; + mlir::Value innerProduct1Extent, innerProduct2Extent; +@@ -661,6 +627,60 @@ + innerProductExtent[0]}; + } + ++ static mlir::Value castToProductType(mlir::Location loc, ++ fir::FirOpBuilder &builder, ++ mlir::Value value, mlir::Type type) { ++ if (mlir::isa(type)) ++ return builder.createConvert(loc, builder.getIntegerType(1), value); ++ ++ // TODO: the multiplications/additions by/of zero resulting from ++ // complex * real are optimized by LLVM under -fno-signed-zeros ++ // -fno-honor-nans. ++ // We can make them disappear by default if we: ++ // * either expand the complex multiplication into real ++ // operations, OR ++ // * set nnan nsz fast-math flags to the complex operations. ++ if (fir::isa_complex(type) && !fir::isa_complex(value.getType())) { ++ mlir::Value zeroCmplx = fir::factory::createZeroValue(builder, loc, type); ++ fir::factory::Complex helper(builder, loc); ++ mlir::Type partType = helper.getComplexPartType(type); ++ return helper.insertComplexPart( ++ zeroCmplx, castToProductType(loc, builder, value, partType), ++ /*isImagPart=*/false); ++ } ++ return builder.createConvert(loc, type, value); ++ } ++ ++ // Generate an update of the inner product value: ++ // acc += v1 * v2, OR ++ // acc ||= v1 && v2 ++ static mlir::Value genAccumulateProduct(mlir::Location loc, ++ fir::FirOpBuilder &builder, ++ mlir::Type resultType, ++ mlir::Value acc, mlir::Value v1, ++ mlir::Value v2) { ++ acc = castToProductType(loc, builder, acc, resultType); ++ v1 = castToProductType(loc, builder, v1, resultType); ++ v2 = castToProductType(loc, builder, v2, resultType); ++ mlir::Value result; ++ if (mlir::isa(resultType)) ++ result = builder.create( ++ loc, acc, builder.create(loc, v1, v2)); ++ else if (mlir::isa(resultType)) ++ result = builder.create( ++ loc, acc, builder.create(loc, v1, v2)); ++ else if (mlir::isa(resultType)) ++ result = builder.create( ++ loc, acc, builder.create(loc, v1, v2)); ++ else if (mlir::isa(resultType)) ++ result = builder.create( ++ loc, acc, builder.create(loc, v1, v2)); ++ else ++ llvm_unreachable("unsupported type"); ++ ++ return builder.createConvert(loc, resultType, result); ++ } ++ + static mlir::LogicalResult + genContiguousMatmul(mlir::Location loc, fir::FirOpBuilder &builder, + hlfir::Entity result, mlir::Value resultShape, +@@ -728,9 +748,9 @@ + hlfir::loadElementAt(loc, builder, lhs, {I, K}); + hlfir::Entity rhsElementValue = + hlfir::loadElementAt(loc, builder, rhs, {K, J}); +- mlir::Value productValue = +- ProductFactory{loc, builder}.genAccumulateProduct( +- resultElementValue, lhsElementValue, rhsElementValue); ++ mlir::Value productValue = genAccumulateProduct( ++ loc, builder, resultElementType, resultElementValue, ++ lhsElementValue, rhsElementValue); + builder.create(loc, productValue, resultElement); + return {}; + }; +@@ -765,9 +785,9 @@ + hlfir::loadElementAt(loc, builder, lhs, {J, K}); + hlfir::Entity rhsElementValue = + hlfir::loadElementAt(loc, builder, rhs, {K}); +- mlir::Value productValue = +- ProductFactory{loc, builder}.genAccumulateProduct( +- resultElementValue, lhsElementValue, rhsElementValue); ++ mlir::Value productValue = genAccumulateProduct( ++ loc, builder, resultElementType, resultElementValue, ++ lhsElementValue, rhsElementValue); + builder.create(loc, productValue, resultElement); + return {}; + }; +@@ -797,9 +817,9 @@ + hlfir::loadElementAt(loc, builder, lhs, {K}); + hlfir::Entity rhsElementValue = + hlfir::loadElementAt(loc, builder, rhs, {K, J}); +- mlir::Value productValue = +- ProductFactory{loc, builder}.genAccumulateProduct( +- resultElementValue, lhsElementValue, rhsElementValue); ++ mlir::Value productValue = genAccumulateProduct( ++ loc, builder, resultElementType, resultElementValue, ++ lhsElementValue, rhsElementValue); + builder.create(loc, productValue, resultElement); + return {}; + }; +@@ -865,9 +885,9 @@ + hlfir::loadElementAt(loc, builder, lhs, lhsIndices); + hlfir::Entity rhsElementValue = + hlfir::loadElementAt(loc, builder, rhs, rhsIndices); +- mlir::Value productValue = +- ProductFactory{loc, builder}.genAccumulateProduct( +- reductionArgs[0], lhsElementValue, rhsElementValue); ++ mlir::Value productValue = genAccumulateProduct( ++ loc, builder, resultElementType, reductionArgs[0], lhsElementValue, ++ rhsElementValue); + return {productValue}; + }; + llvm::SmallVector innerProductValue = +@@ -884,73 +904,6 @@ + } + }; + +-class DotProductConversion +- : public mlir::OpRewritePattern { +-public: +- using mlir::OpRewritePattern::OpRewritePattern; +- +- llvm::LogicalResult +- matchAndRewrite(hlfir::DotProductOp product, +- mlir::PatternRewriter &rewriter) const override { +- hlfir::Entity op = hlfir::Entity{product}; +- if (!op.isScalar()) +- return rewriter.notifyMatchFailure(product, "produces non-scalar result"); +- +- mlir::Location loc = product.getLoc(); +- fir::FirOpBuilder builder{rewriter, product.getOperation()}; +- hlfir::Entity lhs = hlfir::Entity{product.getLhs()}; +- hlfir::Entity rhs = hlfir::Entity{product.getRhs()}; +- mlir::Type resultElementType = product.getType(); +- bool isUnordered = mlir::isa(resultElementType) || +- mlir::isa(resultElementType) || +- static_cast(builder.getFastMathFlags() & +- mlir::arith::FastMathFlags::reassoc); +- +- mlir::Value extent = genProductExtent(loc, builder, lhs, rhs); +- +- auto genBody = [&](mlir::Location loc, fir::FirOpBuilder &builder, +- mlir::ValueRange oneBasedIndices, +- mlir::ValueRange reductionArgs) +- -> llvm::SmallVector { +- hlfir::Entity lhsElementValue = +- hlfir::loadElementAt(loc, builder, lhs, oneBasedIndices); +- hlfir::Entity rhsElementValue = +- hlfir::loadElementAt(loc, builder, rhs, oneBasedIndices); +- mlir::Value productValue = +- ProductFactory{loc, builder}.genAccumulateProduct( +- reductionArgs[0], lhsElementValue, rhsElementValue); +- return {productValue}; +- }; +- +- mlir::Value initValue = +- fir::factory::createZeroValue(builder, loc, resultElementType); +- +- llvm::SmallVector result = hlfir::genLoopNestWithReductions( +- loc, builder, {extent}, +- /*reductionInits=*/{initValue}, genBody, isUnordered); +- +- rewriter.replaceOp(product, result[0]); +- return mlir::success(); - } - if (const auto *value = llvm::dyn_cast_if_present(object)) - return {value + index}; - if (auto *operand = llvm::dyn_cast_if_present(object)) -@@ -59,10 +48,6 @@ - - /// See `llvm::detail::indexed_accessor_range_base` for details. - Type TypeRange::dereference_iterator(OwnerT object, ptrdiff_t index) { -- if (auto type = llvm::dyn_cast_if_present(object)) { -- assert(index == 0 && "cannot offset into single-value 'TypeRange'"); -- return type; +- +-private: +- static mlir::Value genProductExtent(mlir::Location loc, +- fir::FirOpBuilder &builder, +- hlfir::Entity input1, +- hlfir::Entity input2) { +- llvm::SmallVector input1Extents = +- hlfir::genExtentsVector(loc, builder, input1); +- llvm::SmallVector input2Extents = +- hlfir::genExtentsVector(loc, builder, input2); +- +- assert(input1Extents.size() == 1 && input2Extents.size() == 1 && +- "hlfir.dot_product arguments must be vectors"); +- llvm::SmallVector extent = +- fir::factory::deduceOptimalExtents(input1Extents, input2Extents); +- return extent[0]; - } - if (const auto *value = llvm::dyn_cast_if_present(object)) - return (value + index)->getType(); - if (auto *operand = llvm::dyn_cast_if_present(object)) -diff -ruN --strip-trailing-cr a/mlir/unittests/IR/OperationSupportTest.cpp b/mlir/unittests/IR/OperationSupportTest.cpp ---- a/mlir/unittests/IR/OperationSupportTest.cpp -+++ b/mlir/unittests/IR/OperationSupportTest.cpp -@@ -313,21 +313,4 @@ - op2->destroy(); +-}; +- + class SimplifyHLFIRIntrinsics + : public hlfir::impl::SimplifyHLFIRIntrinsicsBase { + public: +@@ -986,8 +939,6 @@ + if (forceMatmulAsElemental || this->allowNewSideEffects) + patterns.insert>(context); + +- patterns.insert(context); +- + if (mlir::failed(mlir::applyPatternsGreedily( + getOperation(), std::move(patterns), config))) { + mlir::emitError(getOperation()->getLoc(), +diff -ruN --strip-trailing-cr a/flang/test/HLFIR/simplify-hlfir-intrinsics-dotproduct.fir b/flang/test/HLFIR/simplify-hlfir-intrinsics-dotproduct.fir +--- a/flang/test/HLFIR/simplify-hlfir-intrinsics-dotproduct.fir ++++ b/flang/test/HLFIR/simplify-hlfir-intrinsics-dotproduct.fir +@@ -1,144 +0,0 @@ +-// Test hlfir.dot_product simplification to a reduction loop: +-// RUN: fir-opt --simplify-hlfir-intrinsics %s | FileCheck %s +- +-func.func @dot_product_integer(%arg0: !hlfir.expr, %arg1: !hlfir.expr) -> i32 { +- %res = hlfir.dot_product %arg0 %arg1 : (!hlfir.expr, !hlfir.expr) -> i32 +- return %res : i32 +-} +-// CHECK-LABEL: func.func @dot_product_integer( +-// CHECK-SAME: %[[VAL_0:.*]]: !hlfir.expr, +-// CHECK-SAME: %[[VAL_1:.*]]: !hlfir.expr) -> i32 { +-// CHECK: %[[VAL_2:.*]] = arith.constant 1 : index +-// CHECK: %[[VAL_3:.*]] = arith.constant 0 : i32 +-// CHECK: %[[VAL_4:.*]] = hlfir.shape_of %[[VAL_0]] : (!hlfir.expr) -> !fir.shape<1> +-// CHECK: %[[VAL_5:.*]] = hlfir.get_extent %[[VAL_4]] {dim = 0 : index} : (!fir.shape<1>) -> index +-// CHECK: %[[VAL_6:.*]] = fir.do_loop %[[VAL_7:.*]] = %[[VAL_2]] to %[[VAL_5]] step %[[VAL_2]] unordered iter_args(%[[VAL_8:.*]] = %[[VAL_3]]) -> (i32) { +-// CHECK: %[[VAL_9:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_7]] : (!hlfir.expr, index) -> i16 +-// CHECK: %[[VAL_10:.*]] = hlfir.apply %[[VAL_1]], %[[VAL_7]] : (!hlfir.expr, index) -> i32 +-// CHECK: %[[VAL_11:.*]] = fir.convert %[[VAL_9]] : (i16) -> i32 +-// CHECK: %[[VAL_12:.*]] = arith.muli %[[VAL_11]], %[[VAL_10]] : i32 +-// CHECK: %[[VAL_13:.*]] = arith.addi %[[VAL_8]], %[[VAL_12]] : i32 +-// CHECK: fir.result %[[VAL_13]] : i32 +-// CHECK: } +-// CHECK: return %[[VAL_6]] : i32 +-// CHECK: } +- +-func.func @dot_product_real(%arg0: !hlfir.expr, %arg1: !hlfir.expr) -> f32 { +- %res = hlfir.dot_product %arg0 %arg1 : (!hlfir.expr, !hlfir.expr) -> f32 +- return %res : f32 +-} +-// CHECK-LABEL: func.func @dot_product_real( +-// CHECK-SAME: %[[VAL_0:.*]]: !hlfir.expr, +-// CHECK-SAME: %[[VAL_1:.*]]: !hlfir.expr) -> f32 { +-// CHECK: %[[VAL_2:.*]] = arith.constant 1 : index +-// CHECK: %[[VAL_3:.*]] = arith.constant 0.000000e+00 : f32 +-// CHECK: %[[VAL_4:.*]] = hlfir.shape_of %[[VAL_0]] : (!hlfir.expr) -> !fir.shape<1> +-// CHECK: %[[VAL_5:.*]] = hlfir.get_extent %[[VAL_4]] {dim = 0 : index} : (!fir.shape<1>) -> index +-// CHECK: %[[VAL_6:.*]] = fir.do_loop %[[VAL_7:.*]] = %[[VAL_2]] to %[[VAL_5]] step %[[VAL_2]] iter_args(%[[VAL_8:.*]] = %[[VAL_3]]) -> (f32) { +-// CHECK: %[[VAL_9:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_7]] : (!hlfir.expr, index) -> f32 +-// CHECK: %[[VAL_10:.*]] = hlfir.apply %[[VAL_1]], %[[VAL_7]] : (!hlfir.expr, index) -> f16 +-// CHECK: %[[VAL_11:.*]] = fir.convert %[[VAL_10]] : (f16) -> f32 +-// CHECK: %[[VAL_12:.*]] = arith.mulf %[[VAL_9]], %[[VAL_11]] : f32 +-// CHECK: %[[VAL_13:.*]] = arith.addf %[[VAL_8]], %[[VAL_12]] : f32 +-// CHECK: fir.result %[[VAL_13]] : f32 +-// CHECK: } +-// CHECK: return %[[VAL_6]] : f32 +-// CHECK: } +- +-func.func @dot_product_complex(%arg0: !hlfir.expr>, %arg1: !hlfir.expr>) -> complex { +- %res = hlfir.dot_product %arg0 %arg1 : (!hlfir.expr>, !hlfir.expr>) -> complex +- return %res : complex +-} +-// CHECK-LABEL: func.func @dot_product_complex( +-// CHECK-SAME: %[[VAL_0:.*]]: !hlfir.expr>, +-// CHECK-SAME: %[[VAL_1:.*]]: !hlfir.expr>) -> complex { +-// CHECK: %[[VAL_2:.*]] = arith.constant 1 : index +-// CHECK: %[[VAL_3:.*]] = arith.constant 0.000000e+00 : f32 +-// CHECK: %[[VAL_4:.*]] = hlfir.shape_of %[[VAL_0]] : (!hlfir.expr>) -> !fir.shape<1> +-// CHECK: %[[VAL_5:.*]] = hlfir.get_extent %[[VAL_4]] {dim = 0 : index} : (!fir.shape<1>) -> index +-// CHECK: %[[VAL_6:.*]] = fir.undefined complex +-// CHECK: %[[VAL_7:.*]] = fir.insert_value %[[VAL_6]], %[[VAL_3]], [0 : index] : (complex, f32) -> complex +-// CHECK: %[[VAL_8:.*]] = fir.insert_value %[[VAL_7]], %[[VAL_3]], [1 : index] : (complex, f32) -> complex +-// CHECK: %[[VAL_9:.*]] = fir.do_loop %[[VAL_10:.*]] = %[[VAL_2]] to %[[VAL_5]] step %[[VAL_2]] iter_args(%[[VAL_11:.*]] = %[[VAL_8]]) -> (complex) { +-// CHECK: %[[VAL_12:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_10]] : (!hlfir.expr>, index) -> complex +-// CHECK: %[[VAL_13:.*]] = hlfir.apply %[[VAL_1]], %[[VAL_10]] : (!hlfir.expr>, index) -> complex +-// CHECK: %[[VAL_14:.*]] = fir.convert %[[VAL_13]] : (complex) -> complex +-// CHECK: %[[VAL_15:.*]] = fir.extract_value %[[VAL_12]], [1 : index] : (complex) -> f32 +-// CHECK: %[[VAL_16:.*]] = arith.negf %[[VAL_15]] : f32 +-// CHECK: %[[VAL_17:.*]] = fir.insert_value %[[VAL_12]], %[[VAL_16]], [1 : index] : (complex, f32) -> complex +-// CHECK: %[[VAL_18:.*]] = fir.mulc %[[VAL_17]], %[[VAL_14]] : complex +-// CHECK: %[[VAL_19:.*]] = fir.addc %[[VAL_11]], %[[VAL_18]] : complex +-// CHECK: fir.result %[[VAL_19]] : complex +-// CHECK: } +-// CHECK: return %[[VAL_9]] : complex +-// CHECK: } +- +-func.func @dot_product_real_complex(%arg0: !hlfir.expr, %arg1: !hlfir.expr>) -> complex { +- %res = hlfir.dot_product %arg0 %arg1 : (!hlfir.expr, !hlfir.expr>) -> complex +- return %res : complex +-} +-// CHECK-LABEL: func.func @dot_product_real_complex( +-// CHECK-SAME: %[[VAL_0:.*]]: !hlfir.expr, +-// CHECK-SAME: %[[VAL_1:.*]]: !hlfir.expr>) -> complex { +-// CHECK: %[[VAL_2:.*]] = arith.constant 1 : index +-// CHECK: %[[VAL_3:.*]] = arith.constant 0.000000e+00 : f32 +-// CHECK: %[[VAL_4:.*]] = hlfir.shape_of %[[VAL_0]] : (!hlfir.expr) -> !fir.shape<1> +-// CHECK: %[[VAL_5:.*]] = hlfir.get_extent %[[VAL_4]] {dim = 0 : index} : (!fir.shape<1>) -> index +-// CHECK: %[[VAL_6:.*]] = fir.undefined complex +-// CHECK: %[[VAL_7:.*]] = fir.insert_value %[[VAL_6]], %[[VAL_3]], [0 : index] : (complex, f32) -> complex +-// CHECK: %[[VAL_8:.*]] = fir.insert_value %[[VAL_7]], %[[VAL_3]], [1 : index] : (complex, f32) -> complex +-// CHECK: %[[VAL_9:.*]] = fir.do_loop %[[VAL_10:.*]] = %[[VAL_2]] to %[[VAL_5]] step %[[VAL_2]] iter_args(%[[VAL_11:.*]] = %[[VAL_8]]) -> (complex) { +-// CHECK: %[[VAL_12:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_10]] : (!hlfir.expr, index) -> f32 +-// CHECK: %[[VAL_13:.*]] = hlfir.apply %[[VAL_1]], %[[VAL_10]] : (!hlfir.expr>, index) -> complex +-// CHECK: %[[VAL_14:.*]] = fir.undefined complex +-// CHECK: %[[VAL_15:.*]] = fir.insert_value %[[VAL_14]], %[[VAL_3]], [0 : index] : (complex, f32) -> complex +-// CHECK: %[[VAL_16:.*]] = fir.insert_value %[[VAL_15]], %[[VAL_3]], [1 : index] : (complex, f32) -> complex +-// CHECK: %[[VAL_17:.*]] = fir.insert_value %[[VAL_16]], %[[VAL_12]], [0 : index] : (complex, f32) -> complex +-// CHECK: %[[VAL_18:.*]] = fir.convert %[[VAL_13]] : (complex) -> complex +-// CHECK: %[[VAL_19:.*]] = fir.extract_value %[[VAL_17]], [1 : index] : (complex) -> f32 +-// CHECK: %[[VAL_20:.*]] = arith.negf %[[VAL_19]] : f32 +-// CHECK: %[[VAL_21:.*]] = fir.insert_value %[[VAL_17]], %[[VAL_20]], [1 : index] : (complex, f32) -> complex +-// CHECK: %[[VAL_22:.*]] = fir.mulc %[[VAL_21]], %[[VAL_18]] : complex +-// CHECK: %[[VAL_23:.*]] = fir.addc %[[VAL_11]], %[[VAL_22]] : complex +-// CHECK: fir.result %[[VAL_23]] : complex +-// CHECK: } +-// CHECK: return %[[VAL_9]] : complex +-// CHECK: } +- +-func.func @dot_product_logical(%arg0: !hlfir.expr>, %arg1: !hlfir.expr>) -> !fir.logical<4> { +- %res = hlfir.dot_product %arg0 %arg1 : (!hlfir.expr>, !hlfir.expr>) -> !fir.logical<4> +- return %res : !fir.logical<4> +-} +-// CHECK-LABEL: func.func @dot_product_logical( +-// CHECK-SAME: %[[VAL_0:.*]]: !hlfir.expr>, +-// CHECK-SAME: %[[VAL_1:.*]]: !hlfir.expr>) -> !fir.logical<4> { +-// CHECK: %[[VAL_2:.*]] = arith.constant 1 : index +-// CHECK: %[[VAL_3:.*]] = arith.constant false +-// CHECK: %[[VAL_4:.*]] = hlfir.shape_of %[[VAL_0]] : (!hlfir.expr>) -> !fir.shape<1> +-// CHECK: %[[VAL_5:.*]] = hlfir.get_extent %[[VAL_4]] {dim = 0 : index} : (!fir.shape<1>) -> index +-// CHECK: %[[VAL_6:.*]] = fir.convert %[[VAL_3]] : (i1) -> !fir.logical<4> +-// CHECK: %[[VAL_7:.*]] = fir.do_loop %[[VAL_8:.*]] = %[[VAL_2]] to %[[VAL_5]] step %[[VAL_2]] unordered iter_args(%[[VAL_9:.*]] = %[[VAL_6]]) -> (!fir.logical<4>) { +-// CHECK: %[[VAL_10:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_8]] : (!hlfir.expr>, index) -> !fir.logical<1> +-// CHECK: %[[VAL_11:.*]] = hlfir.apply %[[VAL_1]], %[[VAL_8]] : (!hlfir.expr>, index) -> !fir.logical<4> +-// CHECK: %[[VAL_12:.*]] = fir.convert %[[VAL_9]] : (!fir.logical<4>) -> i1 +-// CHECK: %[[VAL_13:.*]] = fir.convert %[[VAL_10]] : (!fir.logical<1>) -> i1 +-// CHECK: %[[VAL_14:.*]] = fir.convert %[[VAL_11]] : (!fir.logical<4>) -> i1 +-// CHECK: %[[VAL_15:.*]] = arith.andi %[[VAL_13]], %[[VAL_14]] : i1 +-// CHECK: %[[VAL_16:.*]] = arith.ori %[[VAL_12]], %[[VAL_15]] : i1 +-// CHECK: %[[VAL_17:.*]] = fir.convert %[[VAL_16]] : (i1) -> !fir.logical<4> +-// CHECK: fir.result %[[VAL_17]] : !fir.logical<4> +-// CHECK: } +-// CHECK: return %[[VAL_7]] : !fir.logical<4> +-// CHECK: } +- +-func.func @dot_product_known_dim(%arg0: !hlfir.expr<10xf32>, %arg1: !hlfir.expr) -> f32 { +- %res1 = hlfir.dot_product %arg0 %arg1 : (!hlfir.expr<10xf32>, !hlfir.expr) -> f32 +- %res2 = hlfir.dot_product %arg1 %arg0 : (!hlfir.expr, !hlfir.expr<10xf32>) -> f32 +- %res = arith.addf %res1, %res2 : f32 +- return %res : f32 +-} +-// CHECK-LABEL: func.func @dot_product_known_dim( +-// CHECK: %[[VAL_2:.*]] = arith.constant 1 : index +-// CHECK: %[[VAL_4:.*]] = arith.constant 10 : index +-// CHECK: fir.do_loop %{{.*}} = %[[VAL_2]] to %[[VAL_4]] step %[[VAL_2]] +-// CHECK: fir.do_loop %{{.*}} = %[[VAL_2]] to %[[VAL_4]] step %[[VAL_2]] +diff -ruN --strip-trailing-cr a/libcxx/include/__config b/libcxx/include/__config +--- a/libcxx/include/__config ++++ b/libcxx/include/__config +@@ -1166,9 +1166,7 @@ + # define _LIBCPP_NOESCAPE + # endif + +-// FIXME: Expand this to [[__gnu__::__nodebug__]] again once the testcase reported in +-// https://github.com/llvm/llvm-project/pull/118710 has been analyzed +-# define _LIBCPP_NODEBUG ++# define _LIBCPP_NODEBUG [[__gnu__::__nodebug__]] + + # if __has_attribute(__standalone_debug__) + # define _LIBCPP_STANDALONE_DEBUG __attribute__((__standalone_debug__)) +diff -ruN --strip-trailing-cr a/libcxx/test/tools/clang_tidy_checks/libcpp_module.cpp b/libcxx/test/tools/clang_tidy_checks/libcpp_module.cpp +--- a/libcxx/test/tools/clang_tidy_checks/libcpp_module.cpp ++++ b/libcxx/test/tools/clang_tidy_checks/libcpp_module.cpp +@@ -27,7 +27,7 @@ + check_factories.registerCheck("libcpp-header-exportable-declarations"); + check_factories.registerCheck("libcpp-hide-from-abi"); + check_factories.registerCheck("libcpp-internal-ftms"); +- // check_factories.registerCheck("libcpp-nodebug-on-aliases"); ++ check_factories.registerCheck("libcpp-nodebug-on-aliases"); + check_factories.registerCheck("libcpp-cpp-version-check"); + check_factories.registerCheck("libcpp-robust-against-adl"); + check_factories.registerCheck("libcpp-uglify-attributes"); +diff -ruN --strip-trailing-cr a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp ++++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +@@ -1140,8 +1140,6 @@ + + setTargetDAGCombine(ISD::SCALAR_TO_VECTOR); + +- setTargetDAGCombine(ISD::SHL); +- + // In case of strict alignment, avoid an excessive number of byte wide stores. + MaxStoresPerMemsetOptSize = 8; + MaxStoresPerMemset = +@@ -26473,43 +26471,6 @@ + return NVCAST; } --TEST(ValueRangeTest, ValueConstructable) { -- MLIRContext context; -- Builder builder(&context); +-/// If the operand is a bitwise AND with a constant RHS, and the shift has a +-/// constant RHS and is the only use, we can pull it out of the shift, i.e. +-/// +-/// (shl (and X, C1), C2) -> (and (shl X, C2), (shl C1, C2)) +-/// +-/// We prefer this canonical form to match existing isel patterns. +-static SDValue performSHLCombine(SDNode *N, +- TargetLowering::DAGCombinerInfo &DCI, +- SelectionDAG &DAG) { +- if (DCI.isBeforeLegalizeOps()) +- return SDValue(); - -- Operation *useOp = -- createOp(&context, /*operands=*/std::nullopt, builder.getIntegerType(16)); -- // Valid construction despite a temporary 'OpResult'. -- ValueRange operands = useOp->getResult(0); +- SDValue Op0 = N->getOperand(0); +- if (Op0.getOpcode() != ISD::AND || !Op0.hasOneUse()) +- return SDValue(); - -- useOp->setOperands(operands); -- EXPECT_EQ(useOp->getNumOperands(), 1u); -- EXPECT_EQ(useOp->getOperand(0), useOp->getResult(0)); +- SDValue C1 = Op0->getOperand(1); +- SDValue C2 = N->getOperand(1); +- if (!isa(C1) || !isa(C2)) +- return SDValue(); - -- useOp->dropAllUses(); -- useOp->destroy(); +- // Might be folded into shifted op, do not lower. +- if (N->hasOneUse()) { +- unsigned UseOpc = N->user_begin()->getOpcode(); +- if (UseOpc == ISD::ADD || UseOpc == ISD::SUB || UseOpc == ISD::SETCC || +- UseOpc == AArch64ISD::ADDS || UseOpc == AArch64ISD::SUBS) +- return SDValue(); +- } +- +- SDLoc DL(N); +- EVT VT = N->getValueType(0); +- SDValue X = Op0->getOperand(0); +- SDValue NewRHS = DAG.getNode(ISD::SHL, DL, VT, C1, C2); +- SDValue NewShift = DAG.getNode(ISD::SHL, DL, VT, X, C2); +- return DAG.getNode(ISD::AND, DL, VT, NewShift, NewRHS); +-} +- + SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N, + DAGCombinerInfo &DCI) const { + SelectionDAG &DAG = DCI.DAG; +@@ -26855,8 +26816,6 @@ + return performCTLZCombine(N, DAG, Subtarget); + case ISD::SCALAR_TO_VECTOR: + return performScalarToVectorCombine(N, DCI, DAG); +- case ISD::SHL: +- return performSHLCombine(N, DCI, DAG); + } + return SDValue(); + } +diff -ruN --strip-trailing-cr a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp ++++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +@@ -4979,7 +4979,7 @@ + // the subvector length. + const unsigned VecVF = getNumElements(Vec->getType()); + SmallVector Mask(VecVF, PoisonMaskElem); +- std::iota(Mask.begin(), std::next(Mask.begin(), Index), 0); ++ std::iota(Mask.begin(), Mask.end(), 0); + for (unsigned I : seq(SubVecVF)) + Mask[I + Index] = I + VecVF; + if (Generator) { +@@ -13956,11 +13956,12 @@ + Instruction *InsElt; + if (auto *VecTy = dyn_cast(Scalar->getType())) { + assert(SLPReVec && "FixedVectorType is not expected."); +- Vec = InsElt = cast(createInsertVector( +- Builder, Vec, Scalar, Pos * getNumElements(VecTy))); +- auto *II = dyn_cast(InsElt); ++ Vec = ++ createInsertVector(Builder, Vec, Scalar, Pos * getNumElements(VecTy)); ++ auto *II = dyn_cast(Vec); + if (!II || II->getIntrinsicID() != Intrinsic::vector_insert) + return Vec; ++ InsElt = II; + } else { + Vec = Builder.CreateInsertElement(Vec, Scalar, Builder.getInt32(Pos)); + InsElt = dyn_cast(Vec); +diff -ruN --strip-trailing-cr a/llvm/test/CodeGen/AArch64/const-shift-of-constmasked.ll b/llvm/test/CodeGen/AArch64/const-shift-of-constmasked.ll +--- a/llvm/test/CodeGen/AArch64/const-shift-of-constmasked.ll ++++ b/llvm/test/CodeGen/AArch64/const-shift-of-constmasked.ll +@@ -190,7 +190,8 @@ + define i8 @test_i8_7_mask_shl_1(i8 %a0) { + ; CHECK-LABEL: test_i8_7_mask_shl_1: + ; CHECK: // %bb.0: +-; CHECK-NEXT: ubfiz w0, w0, #1, #3 ++; CHECK-NEXT: and w8, w0, #0x7 ++; CHECK-NEXT: lsl w0, w8, #1 + ; CHECK-NEXT: ret + %t0 = and i8 %a0, 7 + %t1 = shl i8 %t0, 1 +@@ -199,7 +200,8 @@ + define i8 @test_i8_7_mask_shl_4(i8 %a0) { + ; CHECK-LABEL: test_i8_7_mask_shl_4: + ; CHECK: // %bb.0: +-; CHECK-NEXT: ubfiz w0, w0, #4, #3 ++; CHECK-NEXT: and w8, w0, #0x7 ++; CHECK-NEXT: lsl w0, w8, #4 + ; CHECK-NEXT: ret + %t0 = and i8 %a0, 7 + %t1 = shl i8 %t0, 4 +@@ -227,8 +229,8 @@ + define i8 @test_i8_28_mask_shl_1(i8 %a0) { + ; CHECK-LABEL: test_i8_28_mask_shl_1: + ; CHECK: // %bb.0: +-; CHECK-NEXT: lsl w8, w0, #1 +-; CHECK-NEXT: and w0, w8, #0x38 ++; CHECK-NEXT: and w8, w0, #0x1c ++; CHECK-NEXT: lsl w0, w8, #1 + ; CHECK-NEXT: ret + %t0 = and i8 %a0, 28 + %t1 = shl i8 %t0, 1 +@@ -237,8 +239,8 @@ + define i8 @test_i8_28_mask_shl_2(i8 %a0) { + ; CHECK-LABEL: test_i8_28_mask_shl_2: + ; CHECK: // %bb.0: +-; CHECK-NEXT: lsl w8, w0, #2 +-; CHECK-NEXT: and w0, w8, #0x70 ++; CHECK-NEXT: and w8, w0, #0x1c ++; CHECK-NEXT: lsl w0, w8, #2 + ; CHECK-NEXT: ret + %t0 = and i8 %a0, 28 + %t1 = shl i8 %t0, 2 +@@ -247,8 +249,8 @@ + define i8 @test_i8_28_mask_shl_3(i8 %a0) { + ; CHECK-LABEL: test_i8_28_mask_shl_3: + ; CHECK: // %bb.0: +-; CHECK-NEXT: lsl w8, w0, #3 +-; CHECK-NEXT: and w0, w8, #0xe0 ++; CHECK-NEXT: and w8, w0, #0x1c ++; CHECK-NEXT: lsl w0, w8, #3 + ; CHECK-NEXT: ret + %t0 = and i8 %a0, 28 + %t1 = shl i8 %t0, 3 +@@ -257,8 +259,8 @@ + define i8 @test_i8_28_mask_shl_4(i8 %a0) { + ; CHECK-LABEL: test_i8_28_mask_shl_4: + ; CHECK: // %bb.0: +-; CHECK-NEXT: lsl w8, w0, #4 +-; CHECK-NEXT: and w0, w8, #0xc0 ++; CHECK-NEXT: and w8, w0, #0xc ++; CHECK-NEXT: lsl w0, w8, #4 + ; CHECK-NEXT: ret + %t0 = and i8 %a0, 28 + %t1 = shl i8 %t0, 4 +@@ -268,8 +270,8 @@ + define i8 @test_i8_224_mask_shl_1(i8 %a0) { + ; CHECK-LABEL: test_i8_224_mask_shl_1: + ; CHECK: // %bb.0: +-; CHECK-NEXT: lsl w8, w0, #1 +-; CHECK-NEXT: and w0, w8, #0xc0 ++; CHECK-NEXT: and w8, w0, #0x60 ++; CHECK-NEXT: lsl w0, w8, #1 + ; CHECK-NEXT: ret + %t0 = and i8 %a0, 224 + %t1 = shl i8 %t0, 1 +@@ -463,7 +465,8 @@ + define i16 @test_i16_127_mask_shl_1(i16 %a0) { + ; CHECK-LABEL: test_i16_127_mask_shl_1: + ; CHECK: // %bb.0: +-; CHECK-NEXT: ubfiz w0, w0, #1, #7 ++; CHECK-NEXT: and w8, w0, #0x7f ++; CHECK-NEXT: lsl w0, w8, #1 + ; CHECK-NEXT: ret + %t0 = and i16 %a0, 127 + %t1 = shl i16 %t0, 1 +@@ -472,7 +475,8 @@ + define i16 @test_i16_127_mask_shl_8(i16 %a0) { + ; CHECK-LABEL: test_i16_127_mask_shl_8: + ; CHECK: // %bb.0: +-; CHECK-NEXT: ubfiz w0, w0, #8, #7 ++; CHECK-NEXT: and w8, w0, #0x7f ++; CHECK-NEXT: lsl w0, w8, #8 + ; CHECK-NEXT: ret + %t0 = and i16 %a0, 127 + %t1 = shl i16 %t0, 8 +@@ -500,8 +504,8 @@ + define i16 @test_i16_2032_mask_shl_3(i16 %a0) { + ; CHECK-LABEL: test_i16_2032_mask_shl_3: + ; CHECK: // %bb.0: +-; CHECK-NEXT: lsl w8, w0, #3 +-; CHECK-NEXT: and w0, w8, #0x3f80 ++; CHECK-NEXT: and w8, w0, #0x7f0 ++; CHECK-NEXT: lsl w0, w8, #3 + ; CHECK-NEXT: ret + %t0 = and i16 %a0, 2032 + %t1 = shl i16 %t0, 3 +@@ -510,8 +514,8 @@ + define i16 @test_i16_2032_mask_shl_4(i16 %a0) { + ; CHECK-LABEL: test_i16_2032_mask_shl_4: + ; CHECK: // %bb.0: +-; CHECK-NEXT: lsl w8, w0, #4 +-; CHECK-NEXT: and w0, w8, #0x7f00 ++; CHECK-NEXT: and w8, w0, #0x7f0 ++; CHECK-NEXT: lsl w0, w8, #4 + ; CHECK-NEXT: ret + %t0 = and i16 %a0, 2032 + %t1 = shl i16 %t0, 4 +@@ -520,8 +524,8 @@ + define i16 @test_i16_2032_mask_shl_5(i16 %a0) { + ; CHECK-LABEL: test_i16_2032_mask_shl_5: + ; CHECK: // %bb.0: +-; CHECK-NEXT: lsl w8, w0, #5 +-; CHECK-NEXT: and w0, w8, #0xfe00 ++; CHECK-NEXT: and w8, w0, #0x7f0 ++; CHECK-NEXT: lsl w0, w8, #5 + ; CHECK-NEXT: ret + %t0 = and i16 %a0, 2032 + %t1 = shl i16 %t0, 5 +@@ -530,8 +534,8 @@ + define i16 @test_i16_2032_mask_shl_6(i16 %a0) { + ; CHECK-LABEL: test_i16_2032_mask_shl_6: + ; CHECK: // %bb.0: +-; CHECK-NEXT: lsl w8, w0, #6 +-; CHECK-NEXT: and w0, w8, #0xfc00 ++; CHECK-NEXT: and w8, w0, #0x3f0 ++; CHECK-NEXT: lsl w0, w8, #6 + ; CHECK-NEXT: ret + %t0 = and i16 %a0, 2032 + %t1 = shl i16 %t0, 6 +@@ -541,8 +545,8 @@ + define i16 @test_i16_65024_mask_shl_1(i16 %a0) { + ; CHECK-LABEL: test_i16_65024_mask_shl_1: + ; CHECK: // %bb.0: +-; CHECK-NEXT: lsl w8, w0, #1 +-; CHECK-NEXT: and w0, w8, #0xfc00 ++; CHECK-NEXT: and w8, w0, #0x7e00 ++; CHECK-NEXT: lsl w0, w8, #1 + ; CHECK-NEXT: ret + %t0 = and i16 %a0, 65024 + %t1 = shl i16 %t0, 1 +@@ -736,7 +740,8 @@ + define i32 @test_i32_32767_mask_shl_1(i32 %a0) { + ; CHECK-LABEL: test_i32_32767_mask_shl_1: + ; CHECK: // %bb.0: +-; CHECK-NEXT: ubfiz w0, w0, #1, #15 ++; CHECK-NEXT: and w8, w0, #0x7fff ++; CHECK-NEXT: lsl w0, w8, #1 + ; CHECK-NEXT: ret + %t0 = and i32 %a0, 32767 + %t1 = shl i32 %t0, 1 +@@ -745,7 +750,8 @@ + define i32 @test_i32_32767_mask_shl_16(i32 %a0) { + ; CHECK-LABEL: test_i32_32767_mask_shl_16: + ; CHECK: // %bb.0: +-; CHECK-NEXT: ubfiz w0, w0, #16, #15 ++; CHECK-NEXT: and w8, w0, #0x7fff ++; CHECK-NEXT: lsl w0, w8, #16 + ; CHECK-NEXT: ret + %t0 = and i32 %a0, 32767 + %t1 = shl i32 %t0, 16 +@@ -773,8 +779,8 @@ + define i32 @test_i32_8388352_mask_shl_7(i32 %a0) { + ; CHECK-LABEL: test_i32_8388352_mask_shl_7: + ; CHECK: // %bb.0: +-; CHECK-NEXT: lsl w8, w0, #7 +-; CHECK-NEXT: and w0, w8, #0x3fff8000 ++; CHECK-NEXT: and w8, w0, #0x7fff00 ++; CHECK-NEXT: lsl w0, w8, #7 + ; CHECK-NEXT: ret + %t0 = and i32 %a0, 8388352 + %t1 = shl i32 %t0, 7 +@@ -783,8 +789,8 @@ + define i32 @test_i32_8388352_mask_shl_8(i32 %a0) { + ; CHECK-LABEL: test_i32_8388352_mask_shl_8: + ; CHECK: // %bb.0: +-; CHECK-NEXT: lsl w8, w0, #8 +-; CHECK-NEXT: and w0, w8, #0x7fff0000 ++; CHECK-NEXT: and w8, w0, #0x7fff00 ++; CHECK-NEXT: lsl w0, w8, #8 + ; CHECK-NEXT: ret + %t0 = and i32 %a0, 8388352 + %t1 = shl i32 %t0, 8 +@@ -793,8 +799,8 @@ + define i32 @test_i32_8388352_mask_shl_9(i32 %a0) { + ; CHECK-LABEL: test_i32_8388352_mask_shl_9: + ; CHECK: // %bb.0: +-; CHECK-NEXT: lsl w8, w0, #9 +-; CHECK-NEXT: and w0, w8, #0xfffe0000 ++; CHECK-NEXT: and w8, w0, #0x7fff00 ++; CHECK-NEXT: lsl w0, w8, #9 + ; CHECK-NEXT: ret + %t0 = and i32 %a0, 8388352 + %t1 = shl i32 %t0, 9 +@@ -803,8 +809,8 @@ + define i32 @test_i32_8388352_mask_shl_10(i32 %a0) { + ; CHECK-LABEL: test_i32_8388352_mask_shl_10: + ; CHECK: // %bb.0: +-; CHECK-NEXT: lsl w8, w0, #10 +-; CHECK-NEXT: and w0, w8, #0xfffc0000 ++; CHECK-NEXT: and w8, w0, #0x3fff00 ++; CHECK-NEXT: lsl w0, w8, #10 + ; CHECK-NEXT: ret + %t0 = and i32 %a0, 8388352 + %t1 = shl i32 %t0, 10 +@@ -814,8 +820,8 @@ + define i32 @test_i32_4294836224_mask_shl_1(i32 %a0) { + ; CHECK-LABEL: test_i32_4294836224_mask_shl_1: + ; CHECK: // %bb.0: +-; CHECK-NEXT: lsl w8, w0, #1 +-; CHECK-NEXT: and w0, w8, #0xfffc0000 ++; CHECK-NEXT: and w8, w0, #0x7ffe0000 ++; CHECK-NEXT: lsl w0, w8, #1 + ; CHECK-NEXT: ret + %t0 = and i32 %a0, 4294836224 + %t1 = shl i32 %t0, 1 +@@ -1009,7 +1015,8 @@ + define i64 @test_i64_2147483647_mask_shl_1(i64 %a0) { + ; CHECK-LABEL: test_i64_2147483647_mask_shl_1: + ; CHECK: // %bb.0: +-; CHECK-NEXT: lsl w0, w0, #1 ++; CHECK-NEXT: and x8, x0, #0x7fffffff ++; CHECK-NEXT: lsl x0, x8, #1 + ; CHECK-NEXT: ret + %t0 = and i64 %a0, 2147483647 + %t1 = shl i64 %t0, 1 +@@ -1047,8 +1054,8 @@ + define i64 @test_i64_140737488289792_mask_shl_15(i64 %a0) { + ; CHECK-LABEL: test_i64_140737488289792_mask_shl_15: + ; CHECK: // %bb.0: +-; CHECK-NEXT: lsl x8, x0, #15 +-; CHECK-NEXT: and x0, x8, #0x3fffffff80000000 ++; CHECK-NEXT: and x8, x0, #0x7fffffff0000 ++; CHECK-NEXT: lsl x0, x8, #15 + ; CHECK-NEXT: ret + %t0 = and i64 %a0, 140737488289792 + %t1 = shl i64 %t0, 15 +@@ -1057,8 +1064,8 @@ + define i64 @test_i64_140737488289792_mask_shl_16(i64 %a0) { + ; CHECK-LABEL: test_i64_140737488289792_mask_shl_16: + ; CHECK: // %bb.0: +-; CHECK-NEXT: lsl x8, x0, #16 +-; CHECK-NEXT: and x0, x8, #0x7fffffff00000000 ++; CHECK-NEXT: and x8, x0, #0x7fffffff0000 ++; CHECK-NEXT: lsl x0, x8, #16 + ; CHECK-NEXT: ret + %t0 = and i64 %a0, 140737488289792 + %t1 = shl i64 %t0, 16 +@@ -1067,8 +1074,8 @@ + define i64 @test_i64_140737488289792_mask_shl_17(i64 %a0) { + ; CHECK-LABEL: test_i64_140737488289792_mask_shl_17: + ; CHECK: // %bb.0: +-; CHECK-NEXT: lsl x8, x0, #17 +-; CHECK-NEXT: and x0, x8, #0xfffffffe00000000 ++; CHECK-NEXT: and x8, x0, #0x7fffffff0000 ++; CHECK-NEXT: lsl x0, x8, #17 + ; CHECK-NEXT: ret + %t0 = and i64 %a0, 140737488289792 + %t1 = shl i64 %t0, 17 +@@ -1077,8 +1084,8 @@ + define i64 @test_i64_140737488289792_mask_shl_18(i64 %a0) { + ; CHECK-LABEL: test_i64_140737488289792_mask_shl_18: + ; CHECK: // %bb.0: +-; CHECK-NEXT: lsl x8, x0, #18 +-; CHECK-NEXT: and x0, x8, #0xfffffffc00000000 ++; CHECK-NEXT: and x8, x0, #0x3fffffff0000 ++; CHECK-NEXT: lsl x0, x8, #18 + ; CHECK-NEXT: ret + %t0 = and i64 %a0, 140737488289792 + %t1 = shl i64 %t0, 18 +@@ -1088,8 +1095,8 @@ + define i64 @test_i64_18446744065119617024_mask_shl_1(i64 %a0) { + ; CHECK-LABEL: test_i64_18446744065119617024_mask_shl_1: + ; CHECK: // %bb.0: +-; CHECK-NEXT: lsl x8, x0, #1 +-; CHECK-NEXT: and x0, x8, #0xfffffffc00000000 ++; CHECK-NEXT: and x8, x0, #0x7ffffffe00000000 ++; CHECK-NEXT: lsl x0, x8, #1 + ; CHECK-NEXT: ret + %t0 = and i64 %a0, 18446744065119617024 + %t1 = shl i64 %t0, 1 +diff -ruN --strip-trailing-cr a/llvm/test/CodeGen/AArch64/extract-bits.ll b/llvm/test/CodeGen/AArch64/extract-bits.ll +--- a/llvm/test/CodeGen/AArch64/extract-bits.ll ++++ b/llvm/test/CodeGen/AArch64/extract-bits.ll +@@ -1013,8 +1013,8 @@ + define i32 @c2_i32(i32 %arg) nounwind { + ; CHECK-LABEL: c2_i32: + ; CHECK: // %bb.0: +-; CHECK-NEXT: lsr w8, w0, #17 +-; CHECK-NEXT: and w0, w8, #0xffc ++; CHECK-NEXT: ubfx w8, w0, #19, #10 ++; CHECK-NEXT: lsl w0, w8, #2 + ; CHECK-NEXT: ret + %tmp0 = lshr i32 %arg, 19 + %tmp1 = and i32 %tmp0, 1023 +@@ -1063,8 +1063,8 @@ + define i64 @c2_i64(i64 %arg) nounwind { + ; CHECK-LABEL: c2_i64: + ; CHECK: // %bb.0: +-; CHECK-NEXT: lsr x8, x0, #49 +-; CHECK-NEXT: and x0, x8, #0xffc ++; CHECK-NEXT: ubfx x8, x0, #51, #10 ++; CHECK-NEXT: lsl x0, x8, #2 + ; CHECK-NEXT: ret + %tmp0 = lshr i64 %arg, 51 + %tmp1 = and i64 %tmp0, 1023 +@@ -1120,8 +1120,8 @@ + define void @c7_i32(i32 %arg, ptr %ptr) nounwind { + ; CHECK-LABEL: c7_i32: + ; CHECK: // %bb.0: +-; CHECK-NEXT: lsr w8, w0, #17 +-; CHECK-NEXT: and w8, w8, #0xffc ++; CHECK-NEXT: ubfx w8, w0, #19, #10 ++; CHECK-NEXT: lsl w8, w8, #2 + ; CHECK-NEXT: str w8, [x1] + ; CHECK-NEXT: ret + %tmp0 = lshr i32 %arg, 19 +@@ -1163,8 +1163,8 @@ + define void @c7_i64(i64 %arg, ptr %ptr) nounwind { + ; CHECK-LABEL: c7_i64: + ; CHECK: // %bb.0: +-; CHECK-NEXT: lsr x8, x0, #49 +-; CHECK-NEXT: and x8, x8, #0xffc ++; CHECK-NEXT: ubfx x8, x0, #51, #10 ++; CHECK-NEXT: lsl x8, x8, #2 + ; CHECK-NEXT: str x8, [x1] + ; CHECK-NEXT: ret + %tmp0 = lshr i64 %arg, 51 +diff -ruN --strip-trailing-cr a/llvm/test/CodeGen/AArch64/fpenv.ll b/llvm/test/CodeGen/AArch64/fpenv.ll +--- a/llvm/test/CodeGen/AArch64/fpenv.ll ++++ b/llvm/test/CodeGen/AArch64/fpenv.ll +@@ -4,11 +4,11 @@ + define void @func_set_rounding_dyn(i32 %rm) { + ; CHECK-LABEL: func_set_rounding_dyn: + ; CHECK: // %bb.0: +-; CHECK-NEXT: lsl w9, w0, #22 ++; CHECK-NEXT: sub w9, w0, #1 + ; CHECK-NEXT: mrs x8, FPCR ++; CHECK-NEXT: and w9, w9, #0x3 + ; CHECK-NEXT: and x8, x8, #0xffffffffff3fffff +-; CHECK-NEXT: sub w9, w9, #1024, lsl #12 // =4194304 +-; CHECK-NEXT: and w9, w9, #0xc00000 ++; CHECK-NEXT: lsl w9, w9, #22 + ; CHECK-NEXT: orr x8, x8, x9 + ; CHECK-NEXT: msr FPCR, x8 + ; CHECK-NEXT: ret +diff -ruN --strip-trailing-cr a/llvm/test/CodeGen/AArch64/xbfiz.ll b/llvm/test/CodeGen/AArch64/xbfiz.ll +--- a/llvm/test/CodeGen/AArch64/xbfiz.ll ++++ b/llvm/test/CodeGen/AArch64/xbfiz.ll +@@ -69,19 +69,3 @@ + %and = and i64 %shl, 4294967295 + ret i64 %and + } +- +-define i64 @lsl_zext_i8_i64(i8 %b) { +-; CHECK-LABEL: lsl_zext_i8_i64: +-; CHECK: ubfiz x0, x0, #1, #8 +- %1 = zext i8 %b to i64 +- %2 = shl i64 %1, 1 +- ret i64 %2 -} - - } // namespace -diff -ruN --strip-trailing-cr a/utils/bazel/llvm-project-overlay/clang/BUILD.bazel b/utils/bazel/llvm-project-overlay/clang/BUILD.bazel ---- a/utils/bazel/llvm-project-overlay/clang/BUILD.bazel -+++ b/utils/bazel/llvm-project-overlay/clang/BUILD.bazel -@@ -43,10 +43,7 @@ - - gentbl( - name = "diagnostic_defs_gen", -- tbl_outs = [( -- "-gen-clang-diags-defs -clang-component=%s" % c, -- "include/clang/Basic/Diagnostic%sKinds.inc" % c, -- ) for c in [ -+ tbl_outs = [out for c in [ - "AST", - "Analysis", - "Comment", -@@ -60,6 +57,15 @@ - "Refactoring", - "Sema", - "Serialization", -+ ] for out in [ -+ ( -+ "-gen-clang-diags-defs -clang-component=%s" % c, -+ "include/clang/Basic/Diagnostic%sKinds.inc" % c, -+ ), -+ ( -+ "-gen-clang-diags-enums -clang-component=%s" % c, -+ "include/clang/Basic/Diagnostic%sEnums.inc" % c, -+ ), - ]] + [ - ( - "-gen-clang-diag-groups", +-define i64 @lsl_zext_i16_i64(i16 %b) { +-; CHECK-LABEL: lsl_zext_i16_i64: +-; CHECK: ubfiz x0, x0, #1, #16 +- %1 = zext i16 %b to i64 +- %2 = shl i64 %1, 1 +- ret i64 %2 +-} +diff -ruN --strip-trailing-cr a/llvm/test/Transforms/SLPVectorizer/X86/insert-subvector.ll b/llvm/test/Transforms/SLPVectorizer/X86/insert-subvector.ll +--- a/llvm/test/Transforms/SLPVectorizer/X86/insert-subvector.ll ++++ b/llvm/test/Transforms/SLPVectorizer/X86/insert-subvector.ll +@@ -0,0 +1,81 @@ ++; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 ++; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=x86_64-unknown-linux-gnu | FileCheck %s ++ ++define <16 x double> @test(ptr %x, double %v, double %a) { ++; CHECK-LABEL: define <16 x double> @test( ++; CHECK-SAME: ptr [[X:%.*]], double [[V:%.*]], double [[A:%.*]]) { ++; CHECK-NEXT: [[GEP6:%.*]] = getelementptr inbounds double, ptr [[X]], i64 8 ++; CHECK-NEXT: [[GEP8:%.*]] = getelementptr inbounds double, ptr [[X]], i64 9 ++; CHECK-NEXT: [[TMP1:%.*]] = load <6 x double>, ptr [[X]], align 4 ++; CHECK-NEXT: [[TMP6:%.*]] = load <2 x double>, ptr [[GEP6]], align 4 ++; CHECK-NEXT: [[TMP7:%.*]] = load <2 x double>, ptr [[GEP8]], align 4 ++; CHECK-NEXT: [[TMP4:%.*]] = insertelement <16 x double> poison, double [[A]], i32 0 ++; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <16 x double> [[TMP4]], <16 x double> poison, <16 x i32> zeroinitializer ++; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x double> poison, double [[V]], i32 0 ++; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x double> [[TMP2]], <4 x double> poison, <4 x i32> zeroinitializer ++; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x double> poison, double [[V]], i32 0 ++; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x double> [[TMP8]], <2 x double> poison, <2 x i32> zeroinitializer ++; CHECK-NEXT: [[TMP10:%.*]] = call <16 x double> @llvm.vector.insert.v16f64.v6f64(<16 x double> poison, <6 x double> [[TMP1]], i64 0) ++; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x double> [[TMP3]], <4 x double> poison, <16 x i32> ++; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <16 x double> [[TMP10]], <16 x double> [[TMP11]], <16 x i32> ++; CHECK-NEXT: [[TMP13:%.*]] = call <16 x double> @llvm.vector.insert.v16f64.v2f64(<16 x double> [[TMP12]], <2 x double> [[TMP6]], i64 6) ++; CHECK-NEXT: [[TMP14:%.*]] = call <16 x double> @llvm.vector.insert.v16f64.v2f64(<16 x double> [[TMP13]], <2 x double> [[TMP7]], i64 8) ++; CHECK-NEXT: [[TMP15:%.*]] = call <16 x double> @llvm.vector.insert.v16f64.v2f64(<16 x double> [[TMP14]], <2 x double> [[TMP9]], i64 10) ++; CHECK-NEXT: [[TMP16:%.*]] = call <16 x double> @llvm.vector.insert.v16f64.v2f64(<16 x double> [[TMP15]], <2 x double> [[TMP9]], i64 12) ++; CHECK-NEXT: [[TMP17:%.*]] = call <16 x double> @llvm.vector.insert.v16f64.v2f64(<16 x double> [[TMP16]], <2 x double> [[TMP9]], i64 14) ++; CHECK-NEXT: [[TMP18:%.*]] = fadd <16 x double> [[TMP5]], [[TMP17]] ++; CHECK-NEXT: ret <16 x double> [[TMP18]] ++; ++ %gep1 = getelementptr inbounds double, ptr %x, i64 1 ++ %gep2 = getelementptr inbounds double, ptr %x, i64 2 ++ %gep3 = getelementptr inbounds double, ptr %x, i64 3 ++ %gep4 = getelementptr inbounds double, ptr %x, i64 4 ++ %gep5 = getelementptr inbounds double, ptr %x, i64 5 ++ %gep6 = getelementptr inbounds double, ptr %x, i64 8 ++ %gep7 = getelementptr inbounds double, ptr %x, i64 9 ++ %gep8 = getelementptr inbounds double, ptr %x, i64 9 ++ %gep9 = getelementptr inbounds double, ptr %x, i64 10 ++ %x0 = load double, ptr %x, align 4 ++ %x1 = load double, ptr %gep1, align 4 ++ %x2 = load double, ptr %gep2, align 4 ++ %x3 = load double, ptr %gep3, align 4 ++ %x4 = load double, ptr %gep4, align 4 ++ %x5 = load double, ptr %gep5, align 4 ++ %x6 = load double, ptr %gep6, align 4 ++ %x7 = load double, ptr %gep7, align 4 ++ %x8 = load double, ptr %gep8, align 4 ++ %x9 = load double, ptr %gep9, align 4 ++ %add1 = fadd double %a, %x0 ++ %add2 = fadd double %a, %x1 ++ %add3 = fadd double %a, %x2 ++ %add4 = fadd double %a, %x3 ++ %add5 = fadd double %a, %x4 ++ %add6 = fadd double %a, %x5 ++ %add7 = fadd double %a, %x6 ++ %add8 = fadd double %a, %x7 ++ %add9 = fadd double %a, %x8 ++ %add10 = fadd double %a, %x9 ++ %add11 = fadd double %a, %v ++ %add12 = fadd double %a, %v ++ %add13 = fadd double %a, %v ++ %add14 = fadd double %a, %v ++ %add15 = fadd double %a, %v ++ %add16 = fadd double %a, %v ++ %i0 = insertelement <16 x double> poison, double %add1, i32 0 ++ %i1 = insertelement <16 x double> %i0, double %add2, i32 1 ++ %i2 = insertelement <16 x double> %i1, double %add3, i32 2 ++ %i3 = insertelement <16 x double> %i2, double %add4, i32 3 ++ %i4 = insertelement <16 x double> %i3, double %add5, i32 4 ++ %i5 = insertelement <16 x double> %i4, double %add6, i32 5 ++ %i6 = insertelement <16 x double> %i5, double %add7, i32 6 ++ %i7 = insertelement <16 x double> %i6, double %add8, i32 7 ++ %i8 = insertelement <16 x double> %i7, double %add9, i32 8 ++ %i9 = insertelement <16 x double> %i8, double %add10, i32 9 ++ %i10 = insertelement <16 x double> %i9, double %add11, i32 10 ++ %i11 = insertelement <16 x double> %i10, double %add12, i32 11 ++ %i12 = insertelement <16 x double> %i11, double %add13, i32 12 ++ %i13 = insertelement <16 x double> %i12, double %add14, i32 13 ++ %i14 = insertelement <16 x double> %i13, double %add15, i32 14 ++ %i15 = insertelement <16 x double> %i14, double %add16, i32 15 ++ ret <16 x double> %i15 ++}