diff --git a/bazel/import_llvm.bzl b/bazel/import_llvm.bzl
index 214fdf097..08b84a691 100644
--- a/bazel/import_llvm.bzl
+++ b/bazel/import_llvm.bzl
@@ -7,7 +7,7 @@ load(
 
 def import_llvm(name):
     """Imports LLVM."""
-    LLVM_COMMIT = "bf17016a92bc8a23d2cdd2b51355dd4eb5019c68"
+    LLVM_COMMIT = "13c761789753862a7cc31a2a26f23010afa668b9"
 
     new_git_repository(
         name = name,
diff --git a/patches/llvm.patch b/patches/llvm.patch
index 3d2a2525c..8b54ffba7 100644
--- a/patches/llvm.patch
+++ b/patches/llvm.patch
@@ -1,207 +1,1156 @@
 Auto generated patch. Do not edit or delete it, even if empty.
-diff -ruN --strip-trailing-cr a/mlir/include/mlir/IR/TypeRange.h b/mlir/include/mlir/IR/TypeRange.h
---- a/mlir/include/mlir/IR/TypeRange.h
-+++ b/mlir/include/mlir/IR/TypeRange.h
-@@ -29,12 +29,11 @@
- /// a SmallVector/std::vector. This class should be used in places that are not
- /// suitable for a more derived type (e.g. ArrayRef) or a template range
- /// parameter.
--class TypeRange
--    : public llvm::detail::indexed_accessor_range_base<
--          TypeRange,
--          llvm::PointerUnion<const Value *, const Type *, OpOperand *,
--                             detail::OpResultImpl *, Type>,
--          Type, Type, Type> {
-+class TypeRange : public llvm::detail::indexed_accessor_range_base<
-+                      TypeRange,
-+                      llvm::PointerUnion<const Value *, const Type *,
-+                                         OpOperand *, detail::OpResultImpl *>,
-+                      Type, Type, Type> {
- public:
-   using RangeBaseT::RangeBaseT;
-   TypeRange(ArrayRef<Type> types = std::nullopt);
-@@ -45,11 +44,8 @@
-   TypeRange(ValueTypeRange<ValueRangeT> values)
-       : TypeRange(ValueRange(ValueRangeT(values.begin().getCurrent(),
-                                          values.end().getCurrent()))) {}
--
--  TypeRange(Type type) : TypeRange(type, /*count=*/1) {}
--  template <typename Arg, typename = std::enable_if_t<
--                              std::is_constructible_v<ArrayRef<Type>, Arg> &&
--                              !std::is_constructible_v<Type, Arg>>>
-+  template <typename Arg, typename = std::enable_if_t<std::is_constructible<
-+                              ArrayRef<Type>, Arg>::value>>
-   TypeRange(Arg &&arg) : TypeRange(ArrayRef<Type>(std::forward<Arg>(arg))) {}
-   TypeRange(std::initializer_list<Type> types)
-       : TypeRange(ArrayRef<Type>(types)) {}
-@@ -60,9 +56,8 @@
-   /// * A pointer to the first element of an array of types.
-   /// * A pointer to the first element of an array of operands.
-   /// * A pointer to the first element of an array of results.
--  /// * A single 'Type' instance.
-   using OwnerT = llvm::PointerUnion<const Value *, const Type *, OpOperand *,
--                                    detail::OpResultImpl *, Type>;
-+                                    detail::OpResultImpl *>;
- 
-   /// See `llvm::detail::indexed_accessor_range_base` for details.
-   static OwnerT offset_base(OwnerT object, ptrdiff_t index);
-diff -ruN --strip-trailing-cr a/mlir/include/mlir/IR/ValueRange.h b/mlir/include/mlir/IR/ValueRange.h
---- a/mlir/include/mlir/IR/ValueRange.h
-+++ b/mlir/include/mlir/IR/ValueRange.h
-@@ -374,16 +374,16 @@
- /// SmallVector/std::vector. This class should be used in places that are not
- /// suitable for a more derived type (e.g. ArrayRef) or a template range
- /// parameter.
--class ValueRange final : public llvm::detail::indexed_accessor_range_base<
--                             ValueRange,
--                             PointerUnion<const Value *, OpOperand *,
--                                          detail::OpResultImpl *, Value>,
--                             Value, Value, Value> {
-+class ValueRange final
-+    : public llvm::detail::indexed_accessor_range_base<
-+          ValueRange,
-+          PointerUnion<const Value *, OpOperand *, detail::OpResultImpl *>,
-+          Value, Value, Value> {
- public:
-   /// The type representing the owner of a ValueRange. This is either a list of
--  /// values, operands, or results or a single value.
-+  /// values, operands, or results.
-   using OwnerT =
--      PointerUnion<const Value *, OpOperand *, detail::OpResultImpl *, Value>;
-+      PointerUnion<const Value *, OpOperand *, detail::OpResultImpl *>;
- 
-   using RangeBaseT::RangeBaseT;
- 
-@@ -392,7 +392,7 @@
-                 std::is_constructible<ArrayRef<Value>, Arg>::value &&
-                 !std::is_convertible<Arg, Value>::value>>
-   ValueRange(Arg &&arg) : ValueRange(ArrayRef<Value>(std::forward<Arg>(arg))) {}
--  ValueRange(Value value) : ValueRange(value, /*count=*/1) {}
-+  ValueRange(const Value &value) : ValueRange(&value, /*count=*/1) {}
-   ValueRange(const std::initializer_list<Value> &values)
-       : ValueRange(ArrayRef<Value>(values)) {}
-   ValueRange(iterator_range<OperandRange::iterator> values)
-diff -ruN --strip-trailing-cr a/mlir/lib/IR/OperationSupport.cpp b/mlir/lib/IR/OperationSupport.cpp
---- a/mlir/lib/IR/OperationSupport.cpp
-+++ b/mlir/lib/IR/OperationSupport.cpp
-@@ -653,15 +653,6 @@
- /// See `llvm::detail::indexed_accessor_range_base` for details.
- ValueRange::OwnerT ValueRange::offset_base(const OwnerT &owner,
-                                            ptrdiff_t index) {
--  if (llvm::isa_and_nonnull<Value>(owner)) {
--    // Prevent out-of-bounds indexing for single values.
--    // Note that we do allow an index of 1 as is required by 'slice'ing that
--    // returns an empty range. This also matches the usual rules of C++ of being
--    // allowed to index past the last element of an array.
--    assert(index <= 1 && "out-of-bound offset into single-value 'ValueRange'");
--    // Return nullptr to quickly cause segmentation faults on misuse.
--    return index == 0 ? owner : nullptr;
--  }
-   if (const auto *value = llvm::dyn_cast_if_present<const Value *>(owner))
-     return {value + index};
-   if (auto *operand = llvm::dyn_cast_if_present<OpOperand *>(owner))
-@@ -670,10 +661,6 @@
+diff -ruN --strip-trailing-cr a/flang/include/flang/Optimizer/Builder/HLFIRTools.h b/flang/include/flang/Optimizer/Builder/HLFIRTools.h
+--- a/flang/include/flang/Optimizer/Builder/HLFIRTools.h
++++ b/flang/include/flang/Optimizer/Builder/HLFIRTools.h
+@@ -513,12 +513,6 @@
+ Entity loadElementAt(mlir::Location loc, fir::FirOpBuilder &builder,
+                      Entity entity, mlir::ValueRange oneBasedIndices);
+ 
+-/// Return a vector of extents for the given entity.
+-/// The function creates new operations, but tries to clean-up
+-/// after itself.
+-llvm::SmallVector<mlir::Value>
+-genExtentsVector(mlir::Location loc, fir::FirOpBuilder &builder, Entity entity);
+-
+ } // namespace hlfir
+ 
+ #endif // FORTRAN_OPTIMIZER_BUILDER_HLFIRTOOLS_H
+diff -ruN --strip-trailing-cr a/flang/lib/Optimizer/Builder/HLFIRTools.cpp b/flang/lib/Optimizer/Builder/HLFIRTools.cpp
+--- a/flang/lib/Optimizer/Builder/HLFIRTools.cpp
++++ b/flang/lib/Optimizer/Builder/HLFIRTools.cpp
+@@ -1421,15 +1421,3 @@
+   return loadTrivialScalar(loc, builder,
+                            getElementAt(loc, builder, entity, oneBasedIndices));
  }
- /// See `llvm::detail::indexed_accessor_range_base` for details.
- Value ValueRange::dereference_iterator(const OwnerT &owner, ptrdiff_t index) {
--  if (auto value = llvm::dyn_cast_if_present<Value>(owner)) {
--    assert(index == 0 && "cannot offset into single-value 'ValueRange'");
--    return value;
+-
+-llvm::SmallVector<mlir::Value>
+-hlfir::genExtentsVector(mlir::Location loc, fir::FirOpBuilder &builder,
+-                        hlfir::Entity entity) {
+-  entity = hlfir::derefPointersAndAllocatables(loc, builder, entity);
+-  mlir::Value shape = hlfir::genShape(loc, builder, entity);
+-  llvm::SmallVector<mlir::Value, Fortran::common::maxRank> extents =
+-      hlfir::getExplicitExtentsFromShape(shape, builder);
+-  if (shape.getUses().empty())
+-    shape.getDefiningOp()->erase();
+-  return extents;
+-}
+diff -ruN --strip-trailing-cr a/flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp b/flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp
+--- a/flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp
++++ b/flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp
+@@ -37,79 +37,6 @@
+ 
+ namespace {
+ 
+-// Helper class to generate operations related to computing
+-// product of values.
+-class ProductFactory {
+-public:
+-  ProductFactory(mlir::Location loc, fir::FirOpBuilder &builder)
+-      : loc(loc), builder(builder) {}
+-
+-  // Generate an update of the inner product value:
+-  //   acc += v1 * v2, OR
+-  //   acc += CONJ(v1) * v2, OR
+-  //   acc ||= v1 && v2
+-  //
+-  // CONJ parameter specifies whether the first complex product argument
+-  // needs to be conjugated.
+-  template <bool CONJ = false>
+-  mlir::Value genAccumulateProduct(mlir::Value acc, mlir::Value v1,
+-                                   mlir::Value v2) {
+-    mlir::Type resultType = acc.getType();
+-    acc = castToProductType(acc, resultType);
+-    v1 = castToProductType(v1, resultType);
+-    v2 = castToProductType(v2, resultType);
+-    mlir::Value result;
+-    if (mlir::isa<mlir::FloatType>(resultType)) {
+-      result = builder.create<mlir::arith::AddFOp>(
+-          loc, acc, builder.create<mlir::arith::MulFOp>(loc, v1, v2));
+-    } else if (mlir::isa<mlir::ComplexType>(resultType)) {
+-      if constexpr (CONJ)
+-        result = fir::IntrinsicLibrary{builder, loc}.genConjg(resultType, v1);
+-      else
+-        result = v1;
+-
+-      result = builder.create<fir::AddcOp>(
+-          loc, acc, builder.create<fir::MulcOp>(loc, result, v2));
+-    } else if (mlir::isa<mlir::IntegerType>(resultType)) {
+-      result = builder.create<mlir::arith::AddIOp>(
+-          loc, acc, builder.create<mlir::arith::MulIOp>(loc, v1, v2));
+-    } else if (mlir::isa<fir::LogicalType>(resultType)) {
+-      result = builder.create<mlir::arith::OrIOp>(
+-          loc, acc, builder.create<mlir::arith::AndIOp>(loc, v1, v2));
+-    } else {
+-      llvm_unreachable("unsupported type");
+-    }
+-
+-    return builder.createConvert(loc, resultType, result);
 -  }
-   if (const auto *value = llvm::dyn_cast_if_present<const Value *>(owner))
-     return value[index];
-   if (auto *operand = llvm::dyn_cast_if_present<OpOperand *>(owner))
-diff -ruN --strip-trailing-cr a/mlir/lib/IR/TypeRange.cpp b/mlir/lib/IR/TypeRange.cpp
---- a/mlir/lib/IR/TypeRange.cpp
-+++ b/mlir/lib/IR/TypeRange.cpp
-@@ -31,23 +31,12 @@
-     this->base = result;
-   else if (auto *operand = llvm::dyn_cast_if_present<OpOperand *>(owner))
-     this->base = operand;
--  else if (auto value = llvm::dyn_cast_if_present<Value>(owner))
--    this->base = value.getType();
-   else
-     this->base = cast<const Value *>(owner);
- }
+-
+-private:
+-  mlir::Location loc;
+-  fir::FirOpBuilder &builder;
+-
+-  mlir::Value castToProductType(mlir::Value value, mlir::Type type) {
+-    if (mlir::isa<fir::LogicalType>(type))
+-      return builder.createConvert(loc, builder.getIntegerType(1), value);
+-
+-    // TODO: the multiplications/additions by/of zero resulting from
+-    // complex * real are optimized by LLVM under -fno-signed-zeros
+-    // -fno-honor-nans.
+-    // We can make them disappear by default if we:
+-    //   * either expand the complex multiplication into real
+-    //     operations, OR
+-    //   * set nnan nsz fast-math flags to the complex operations.
+-    if (fir::isa_complex(type) && !fir::isa_complex(value.getType())) {
+-      mlir::Value zeroCmplx = fir::factory::createZeroValue(builder, loc, type);
+-      fir::factory::Complex helper(builder, loc);
+-      mlir::Type partType = helper.getComplexPartType(type);
+-      return helper.insertComplexPart(zeroCmplx,
+-                                      castToProductType(value, partType),
+-                                      /*isImagPart=*/false);
+-    }
+-    return builder.createConvert(loc, type, value);
+-  }
+-};
+-
+ class TransposeAsElementalConversion
+     : public mlir::OpRewritePattern<hlfir::TransposeOp> {
+ public:
+@@ -163,8 +90,11 @@
+   static mlir::Value genResultShape(mlir::Location loc,
+                                     fir::FirOpBuilder &builder,
+                                     hlfir::Entity array) {
+-    llvm::SmallVector<mlir::Value, 2> inExtents =
+-        hlfir::genExtentsVector(loc, builder, array);
++    mlir::Value inShape = hlfir::genShape(loc, builder, array);
++    llvm::SmallVector<mlir::Value> inExtents =
++        hlfir::getExplicitExtentsFromShape(inShape, builder);
++    if (inShape.getUses().empty())
++      inShape.getDefiningOp()->erase();
+ 
+     // transpose indices
+     assert(inExtents.size() == 2 && "checked in TransposeOp::validate");
+@@ -207,7 +137,7 @@
+     mlir::Value resultShape, dimExtent;
+     llvm::SmallVector<mlir::Value> arrayExtents;
+     if (isTotalReduction)
+-      arrayExtents = hlfir::genExtentsVector(loc, builder, array);
++      arrayExtents = genArrayExtents(loc, builder, array);
+     else
+       std::tie(resultShape, dimExtent) =
+           genResultShapeForPartialReduction(loc, builder, array, dimVal);
+@@ -233,8 +163,7 @@
+       // If DIM is not present, do total reduction.
+ 
+       // Initial value for the reduction.
+-      mlir::Value reductionInitValue =
+-          fir::factory::createZeroValue(builder, loc, elementType);
++      mlir::Value reductionInitValue = genInitValue(loc, builder, elementType);
+ 
+       // The reduction loop may be unordered if FastMathFlags::reassoc
+       // transformations are allowed. The integer reduction is always
+@@ -335,6 +264,17 @@
+   }
+ 
+ private:
++  static llvm::SmallVector<mlir::Value>
++  genArrayExtents(mlir::Location loc, fir::FirOpBuilder &builder,
++                  hlfir::Entity array) {
++    mlir::Value inShape = hlfir::genShape(loc, builder, array);
++    llvm::SmallVector<mlir::Value> inExtents =
++        hlfir::getExplicitExtentsFromShape(inShape, builder);
++    if (inShape.getUses().empty())
++      inShape.getDefiningOp()->erase();
++    return inExtents;
++  }
++
+   // Return fir.shape specifying the shape of the result
+   // of a SUM reduction with DIM=dimVal. The second return value
+   // is the extent of the DIM dimension.
+@@ -343,7 +283,7 @@
+                                     fir::FirOpBuilder &builder,
+                                     hlfir::Entity array, int64_t dimVal) {
+     llvm::SmallVector<mlir::Value> inExtents =
+-        hlfir::genExtentsVector(loc, builder, array);
++        genArrayExtents(loc, builder, array);
+     assert(dimVal > 0 && dimVal <= static_cast<int64_t>(inExtents.size()) &&
+            "DIM must be present and a positive constant not exceeding "
+            "the array's rank");
+@@ -353,6 +293,26 @@
+     return {builder.create<fir::ShapeOp>(loc, inExtents), dimExtent};
+   }
+ 
++  // Generate the initial value for a SUM reduction with the given
++  // data type.
++  static mlir::Value genInitValue(mlir::Location loc,
++                                  fir::FirOpBuilder &builder,
++                                  mlir::Type elementType) {
++    if (auto ty = mlir::dyn_cast<mlir::FloatType>(elementType)) {
++      const llvm::fltSemantics &sem = ty.getFloatSemantics();
++      return builder.createRealConstant(loc, elementType,
++                                        llvm::APFloat::getZero(sem));
++    } else if (auto ty = mlir::dyn_cast<mlir::ComplexType>(elementType)) {
++      mlir::Value initValue = genInitValue(loc, builder, ty.getElementType());
++      return fir::factory::Complex{builder, loc}.createComplex(ty, initValue,
++                                                               initValue);
++    } else if (mlir::isa<mlir::IntegerType>(elementType)) {
++      return builder.createIntegerConstant(loc, elementType, 0);
++    }
++
++    llvm_unreachable("unsupported SUM reduction type");
++  }
++
+   // Generate scalar addition of the two values (of the same data type).
+   static mlir::Value genScalarAdd(mlir::Location loc,
+                                   fir::FirOpBuilder &builder,
+@@ -610,10 +570,16 @@
+   static std::tuple<mlir::Value, mlir::Value>
+   genResultShape(mlir::Location loc, fir::FirOpBuilder &builder,
+                  hlfir::Entity input1, hlfir::Entity input2) {
+-    llvm::SmallVector<mlir::Value, 2> input1Extents =
+-        hlfir::genExtentsVector(loc, builder, input1);
+-    llvm::SmallVector<mlir::Value, 2> input2Extents =
+-        hlfir::genExtentsVector(loc, builder, input2);
++    mlir::Value input1Shape = hlfir::genShape(loc, builder, input1);
++    llvm::SmallVector<mlir::Value> input1Extents =
++        hlfir::getExplicitExtentsFromShape(input1Shape, builder);
++    if (input1Shape.getUses().empty())
++      input1Shape.getDefiningOp()->erase();
++    mlir::Value input2Shape = hlfir::genShape(loc, builder, input2);
++    llvm::SmallVector<mlir::Value> input2Extents =
++        hlfir::getExplicitExtentsFromShape(input2Shape, builder);
++    if (input2Shape.getUses().empty())
++      input2Shape.getDefiningOp()->erase();
  
- /// See `llvm::detail::indexed_accessor_range_base` for details.
- TypeRange::OwnerT TypeRange::offset_base(OwnerT object, ptrdiff_t index) {
--  if (llvm::isa_and_nonnull<Type>(object)) {
--    // Prevent out-of-bounds indexing for single values.
--    // Note that we do allow an index of 1 as is required by 'slice'ing that
--    // returns an empty range. This also matches the usual rules of C++ of being
--    // allowed to index past the last element of an array.
--    assert(index <= 1 && "out-of-bound offset into single-value 'ValueRange'");
--    // Return nullptr to quickly cause segmentation faults on misuse.
--    return index == 0 ? object : nullptr;
+     llvm::SmallVector<mlir::Value, 2> newExtents;
+     mlir::Value innerProduct1Extent, innerProduct2Extent;
+@@ -661,6 +627,60 @@
+             innerProductExtent[0]};
+   }
+ 
++  static mlir::Value castToProductType(mlir::Location loc,
++                                       fir::FirOpBuilder &builder,
++                                       mlir::Value value, mlir::Type type) {
++    if (mlir::isa<fir::LogicalType>(type))
++      return builder.createConvert(loc, builder.getIntegerType(1), value);
++
++    // TODO: the multiplications/additions by/of zero resulting from
++    // complex * real are optimized by LLVM under -fno-signed-zeros
++    // -fno-honor-nans.
++    // We can make them disappear by default if we:
++    //   * either expand the complex multiplication into real
++    //     operations, OR
++    //   * set nnan nsz fast-math flags to the complex operations.
++    if (fir::isa_complex(type) && !fir::isa_complex(value.getType())) {
++      mlir::Value zeroCmplx = fir::factory::createZeroValue(builder, loc, type);
++      fir::factory::Complex helper(builder, loc);
++      mlir::Type partType = helper.getComplexPartType(type);
++      return helper.insertComplexPart(
++          zeroCmplx, castToProductType(loc, builder, value, partType),
++          /*isImagPart=*/false);
++    }
++    return builder.createConvert(loc, type, value);
++  }
++
++  // Generate an update of the inner product value:
++  //   acc += v1 * v2, OR
++  //   acc ||= v1 && v2
++  static mlir::Value genAccumulateProduct(mlir::Location loc,
++                                          fir::FirOpBuilder &builder,
++                                          mlir::Type resultType,
++                                          mlir::Value acc, mlir::Value v1,
++                                          mlir::Value v2) {
++    acc = castToProductType(loc, builder, acc, resultType);
++    v1 = castToProductType(loc, builder, v1, resultType);
++    v2 = castToProductType(loc, builder, v2, resultType);
++    mlir::Value result;
++    if (mlir::isa<mlir::FloatType>(resultType))
++      result = builder.create<mlir::arith::AddFOp>(
++          loc, acc, builder.create<mlir::arith::MulFOp>(loc, v1, v2));
++    else if (mlir::isa<mlir::ComplexType>(resultType))
++      result = builder.create<fir::AddcOp>(
++          loc, acc, builder.create<fir::MulcOp>(loc, v1, v2));
++    else if (mlir::isa<mlir::IntegerType>(resultType))
++      result = builder.create<mlir::arith::AddIOp>(
++          loc, acc, builder.create<mlir::arith::MulIOp>(loc, v1, v2));
++    else if (mlir::isa<fir::LogicalType>(resultType))
++      result = builder.create<mlir::arith::OrIOp>(
++          loc, acc, builder.create<mlir::arith::AndIOp>(loc, v1, v2));
++    else
++      llvm_unreachable("unsupported type");
++
++    return builder.createConvert(loc, resultType, result);
++  }
++
+   static mlir::LogicalResult
+   genContiguousMatmul(mlir::Location loc, fir::FirOpBuilder &builder,
+                       hlfir::Entity result, mlir::Value resultShape,
+@@ -728,9 +748,9 @@
+             hlfir::loadElementAt(loc, builder, lhs, {I, K});
+         hlfir::Entity rhsElementValue =
+             hlfir::loadElementAt(loc, builder, rhs, {K, J});
+-        mlir::Value productValue =
+-            ProductFactory{loc, builder}.genAccumulateProduct(
+-                resultElementValue, lhsElementValue, rhsElementValue);
++        mlir::Value productValue = genAccumulateProduct(
++            loc, builder, resultElementType, resultElementValue,
++            lhsElementValue, rhsElementValue);
+         builder.create<hlfir::AssignOp>(loc, productValue, resultElement);
+         return {};
+       };
+@@ -765,9 +785,9 @@
+             hlfir::loadElementAt(loc, builder, lhs, {J, K});
+         hlfir::Entity rhsElementValue =
+             hlfir::loadElementAt(loc, builder, rhs, {K});
+-        mlir::Value productValue =
+-            ProductFactory{loc, builder}.genAccumulateProduct(
+-                resultElementValue, lhsElementValue, rhsElementValue);
++        mlir::Value productValue = genAccumulateProduct(
++            loc, builder, resultElementType, resultElementValue,
++            lhsElementValue, rhsElementValue);
+         builder.create<hlfir::AssignOp>(loc, productValue, resultElement);
+         return {};
+       };
+@@ -797,9 +817,9 @@
+             hlfir::loadElementAt(loc, builder, lhs, {K});
+         hlfir::Entity rhsElementValue =
+             hlfir::loadElementAt(loc, builder, rhs, {K, J});
+-        mlir::Value productValue =
+-            ProductFactory{loc, builder}.genAccumulateProduct(
+-                resultElementValue, lhsElementValue, rhsElementValue);
++        mlir::Value productValue = genAccumulateProduct(
++            loc, builder, resultElementType, resultElementValue,
++            lhsElementValue, rhsElementValue);
+         builder.create<hlfir::AssignOp>(loc, productValue, resultElement);
+         return {};
+       };
+@@ -865,9 +885,9 @@
+             hlfir::loadElementAt(loc, builder, lhs, lhsIndices);
+         hlfir::Entity rhsElementValue =
+             hlfir::loadElementAt(loc, builder, rhs, rhsIndices);
+-        mlir::Value productValue =
+-            ProductFactory{loc, builder}.genAccumulateProduct(
+-                reductionArgs[0], lhsElementValue, rhsElementValue);
++        mlir::Value productValue = genAccumulateProduct(
++            loc, builder, resultElementType, reductionArgs[0], lhsElementValue,
++            rhsElementValue);
+         return {productValue};
+       };
+       llvm::SmallVector<mlir::Value, 1> innerProductValue =
+@@ -884,73 +904,6 @@
+   }
+ };
+ 
+-class DotProductConversion
+-    : public mlir::OpRewritePattern<hlfir::DotProductOp> {
+-public:
+-  using mlir::OpRewritePattern<hlfir::DotProductOp>::OpRewritePattern;
+-
+-  llvm::LogicalResult
+-  matchAndRewrite(hlfir::DotProductOp product,
+-                  mlir::PatternRewriter &rewriter) const override {
+-    hlfir::Entity op = hlfir::Entity{product};
+-    if (!op.isScalar())
+-      return rewriter.notifyMatchFailure(product, "produces non-scalar result");
+-
+-    mlir::Location loc = product.getLoc();
+-    fir::FirOpBuilder builder{rewriter, product.getOperation()};
+-    hlfir::Entity lhs = hlfir::Entity{product.getLhs()};
+-    hlfir::Entity rhs = hlfir::Entity{product.getRhs()};
+-    mlir::Type resultElementType = product.getType();
+-    bool isUnordered = mlir::isa<mlir::IntegerType>(resultElementType) ||
+-                       mlir::isa<fir::LogicalType>(resultElementType) ||
+-                       static_cast<bool>(builder.getFastMathFlags() &
+-                                         mlir::arith::FastMathFlags::reassoc);
+-
+-    mlir::Value extent = genProductExtent(loc, builder, lhs, rhs);
+-
+-    auto genBody = [&](mlir::Location loc, fir::FirOpBuilder &builder,
+-                       mlir::ValueRange oneBasedIndices,
+-                       mlir::ValueRange reductionArgs)
+-        -> llvm::SmallVector<mlir::Value, 1> {
+-      hlfir::Entity lhsElementValue =
+-          hlfir::loadElementAt(loc, builder, lhs, oneBasedIndices);
+-      hlfir::Entity rhsElementValue =
+-          hlfir::loadElementAt(loc, builder, rhs, oneBasedIndices);
+-      mlir::Value productValue =
+-          ProductFactory{loc, builder}.genAccumulateProduct</*CONJ=*/true>(
+-              reductionArgs[0], lhsElementValue, rhsElementValue);
+-      return {productValue};
+-    };
+-
+-    mlir::Value initValue =
+-        fir::factory::createZeroValue(builder, loc, resultElementType);
+-
+-    llvm::SmallVector<mlir::Value, 1> result = hlfir::genLoopNestWithReductions(
+-        loc, builder, {extent},
+-        /*reductionInits=*/{initValue}, genBody, isUnordered);
+-
+-    rewriter.replaceOp(product, result[0]);
+-    return mlir::success();
 -  }
-   if (const auto *value = llvm::dyn_cast_if_present<const Value *>(object))
-     return {value + index};
-   if (auto *operand = llvm::dyn_cast_if_present<OpOperand *>(object))
-@@ -59,10 +48,6 @@
- 
- /// See `llvm::detail::indexed_accessor_range_base` for details.
- Type TypeRange::dereference_iterator(OwnerT object, ptrdiff_t index) {
--  if (auto type = llvm::dyn_cast_if_present<Type>(object)) {
--    assert(index == 0 && "cannot offset into single-value 'TypeRange'");
--    return type;
+-
+-private:
+-  static mlir::Value genProductExtent(mlir::Location loc,
+-                                      fir::FirOpBuilder &builder,
+-                                      hlfir::Entity input1,
+-                                      hlfir::Entity input2) {
+-    llvm::SmallVector<mlir::Value, 1> input1Extents =
+-        hlfir::genExtentsVector(loc, builder, input1);
+-    llvm::SmallVector<mlir::Value, 1> input2Extents =
+-        hlfir::genExtentsVector(loc, builder, input2);
+-
+-    assert(input1Extents.size() == 1 && input2Extents.size() == 1 &&
+-           "hlfir.dot_product arguments must be vectors");
+-    llvm::SmallVector<mlir::Value, 1> extent =
+-        fir::factory::deduceOptimalExtents(input1Extents, input2Extents);
+-    return extent[0];
 -  }
-   if (const auto *value = llvm::dyn_cast_if_present<const Value *>(object))
-     return (value + index)->getType();
-   if (auto *operand = llvm::dyn_cast_if_present<OpOperand *>(object))
-diff -ruN --strip-trailing-cr a/mlir/unittests/IR/OperationSupportTest.cpp b/mlir/unittests/IR/OperationSupportTest.cpp
---- a/mlir/unittests/IR/OperationSupportTest.cpp
-+++ b/mlir/unittests/IR/OperationSupportTest.cpp
-@@ -313,21 +313,4 @@
-   op2->destroy();
+-};
+-
+ class SimplifyHLFIRIntrinsics
+     : public hlfir::impl::SimplifyHLFIRIntrinsicsBase<SimplifyHLFIRIntrinsics> {
+ public:
+@@ -986,8 +939,6 @@
+     if (forceMatmulAsElemental || this->allowNewSideEffects)
+       patterns.insert<MatmulConversion<hlfir::MatmulOp>>(context);
+ 
+-    patterns.insert<DotProductConversion>(context);
+-
+     if (mlir::failed(mlir::applyPatternsGreedily(
+             getOperation(), std::move(patterns), config))) {
+       mlir::emitError(getOperation()->getLoc(),
+diff -ruN --strip-trailing-cr a/flang/test/HLFIR/simplify-hlfir-intrinsics-dotproduct.fir b/flang/test/HLFIR/simplify-hlfir-intrinsics-dotproduct.fir
+--- a/flang/test/HLFIR/simplify-hlfir-intrinsics-dotproduct.fir
++++ b/flang/test/HLFIR/simplify-hlfir-intrinsics-dotproduct.fir
+@@ -1,144 +0,0 @@
+-// Test hlfir.dot_product simplification to a reduction loop:
+-// RUN: fir-opt --simplify-hlfir-intrinsics %s | FileCheck %s
+-
+-func.func @dot_product_integer(%arg0: !hlfir.expr<?xi16>, %arg1: !hlfir.expr<?xi32>) -> i32 {
+-  %res = hlfir.dot_product %arg0 %arg1 : (!hlfir.expr<?xi16>, !hlfir.expr<?xi32>) -> i32
+-  return %res : i32
+-}
+-// CHECK-LABEL:   func.func @dot_product_integer(
+-// CHECK-SAME:                                   %[[VAL_0:.*]]: !hlfir.expr<?xi16>,
+-// CHECK-SAME:                                   %[[VAL_1:.*]]: !hlfir.expr<?xi32>) -> i32 {
+-// CHECK:           %[[VAL_2:.*]] = arith.constant 1 : index
+-// CHECK:           %[[VAL_3:.*]] = arith.constant 0 : i32
+-// CHECK:           %[[VAL_4:.*]] = hlfir.shape_of %[[VAL_0]] : (!hlfir.expr<?xi16>) -> !fir.shape<1>
+-// CHECK:           %[[VAL_5:.*]] = hlfir.get_extent %[[VAL_4]] {dim = 0 : index} : (!fir.shape<1>) -> index
+-// CHECK:           %[[VAL_6:.*]] = fir.do_loop %[[VAL_7:.*]] = %[[VAL_2]] to %[[VAL_5]] step %[[VAL_2]] unordered iter_args(%[[VAL_8:.*]] = %[[VAL_3]]) -> (i32) {
+-// CHECK:             %[[VAL_9:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_7]] : (!hlfir.expr<?xi16>, index) -> i16
+-// CHECK:             %[[VAL_10:.*]] = hlfir.apply %[[VAL_1]], %[[VAL_7]] : (!hlfir.expr<?xi32>, index) -> i32
+-// CHECK:             %[[VAL_11:.*]] = fir.convert %[[VAL_9]] : (i16) -> i32
+-// CHECK:             %[[VAL_12:.*]] = arith.muli %[[VAL_11]], %[[VAL_10]] : i32
+-// CHECK:             %[[VAL_13:.*]] = arith.addi %[[VAL_8]], %[[VAL_12]] : i32
+-// CHECK:             fir.result %[[VAL_13]] : i32
+-// CHECK:           }
+-// CHECK:           return %[[VAL_6]] : i32
+-// CHECK:         }
+-
+-func.func @dot_product_real(%arg0: !hlfir.expr<?xf32>, %arg1: !hlfir.expr<?xf16>) -> f32 {
+-  %res = hlfir.dot_product %arg0 %arg1 : (!hlfir.expr<?xf32>, !hlfir.expr<?xf16>) -> f32
+-  return %res : f32
+-}
+-// CHECK-LABEL:   func.func @dot_product_real(
+-// CHECK-SAME:                                %[[VAL_0:.*]]: !hlfir.expr<?xf32>,
+-// CHECK-SAME:                                %[[VAL_1:.*]]: !hlfir.expr<?xf16>) -> f32 {
+-// CHECK:           %[[VAL_2:.*]] = arith.constant 1 : index
+-// CHECK:           %[[VAL_3:.*]] = arith.constant 0.000000e+00 : f32
+-// CHECK:           %[[VAL_4:.*]] = hlfir.shape_of %[[VAL_0]] : (!hlfir.expr<?xf32>) -> !fir.shape<1>
+-// CHECK:           %[[VAL_5:.*]] = hlfir.get_extent %[[VAL_4]] {dim = 0 : index} : (!fir.shape<1>) -> index
+-// CHECK:           %[[VAL_6:.*]] = fir.do_loop %[[VAL_7:.*]] = %[[VAL_2]] to %[[VAL_5]] step %[[VAL_2]] iter_args(%[[VAL_8:.*]] = %[[VAL_3]]) -> (f32) {
+-// CHECK:             %[[VAL_9:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_7]] : (!hlfir.expr<?xf32>, index) -> f32
+-// CHECK:             %[[VAL_10:.*]] = hlfir.apply %[[VAL_1]], %[[VAL_7]] : (!hlfir.expr<?xf16>, index) -> f16
+-// CHECK:             %[[VAL_11:.*]] = fir.convert %[[VAL_10]] : (f16) -> f32
+-// CHECK:             %[[VAL_12:.*]] = arith.mulf %[[VAL_9]], %[[VAL_11]] : f32
+-// CHECK:             %[[VAL_13:.*]] = arith.addf %[[VAL_8]], %[[VAL_12]] : f32
+-// CHECK:             fir.result %[[VAL_13]] : f32
+-// CHECK:           }
+-// CHECK:           return %[[VAL_6]] : f32
+-// CHECK:         }
+-
+-func.func @dot_product_complex(%arg0: !hlfir.expr<?xcomplex<f32>>, %arg1: !hlfir.expr<?xcomplex<f16>>) -> complex<f32> {
+-  %res = hlfir.dot_product %arg0 %arg1 : (!hlfir.expr<?xcomplex<f32>>, !hlfir.expr<?xcomplex<f16>>) -> complex<f32>
+-  return %res : complex<f32>
+-}
+-// CHECK-LABEL:   func.func @dot_product_complex(
+-// CHECK-SAME:                                   %[[VAL_0:.*]]: !hlfir.expr<?xcomplex<f32>>,
+-// CHECK-SAME:                                   %[[VAL_1:.*]]: !hlfir.expr<?xcomplex<f16>>) -> complex<f32> {
+-// CHECK:           %[[VAL_2:.*]] = arith.constant 1 : index
+-// CHECK:           %[[VAL_3:.*]] = arith.constant 0.000000e+00 : f32
+-// CHECK:           %[[VAL_4:.*]] = hlfir.shape_of %[[VAL_0]] : (!hlfir.expr<?xcomplex<f32>>) -> !fir.shape<1>
+-// CHECK:           %[[VAL_5:.*]] = hlfir.get_extent %[[VAL_4]] {dim = 0 : index} : (!fir.shape<1>) -> index
+-// CHECK:           %[[VAL_6:.*]] = fir.undefined complex<f32>
+-// CHECK:           %[[VAL_7:.*]] = fir.insert_value %[[VAL_6]], %[[VAL_3]], [0 : index] : (complex<f32>, f32) -> complex<f32>
+-// CHECK:           %[[VAL_8:.*]] = fir.insert_value %[[VAL_7]], %[[VAL_3]], [1 : index] : (complex<f32>, f32) -> complex<f32>
+-// CHECK:           %[[VAL_9:.*]] = fir.do_loop %[[VAL_10:.*]] = %[[VAL_2]] to %[[VAL_5]] step %[[VAL_2]] iter_args(%[[VAL_11:.*]] = %[[VAL_8]]) -> (complex<f32>) {
+-// CHECK:             %[[VAL_12:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_10]] : (!hlfir.expr<?xcomplex<f32>>, index) -> complex<f32>
+-// CHECK:             %[[VAL_13:.*]] = hlfir.apply %[[VAL_1]], %[[VAL_10]] : (!hlfir.expr<?xcomplex<f16>>, index) -> complex<f16>
+-// CHECK:             %[[VAL_14:.*]] = fir.convert %[[VAL_13]] : (complex<f16>) -> complex<f32>
+-// CHECK:             %[[VAL_15:.*]] = fir.extract_value %[[VAL_12]], [1 : index] : (complex<f32>) -> f32
+-// CHECK:             %[[VAL_16:.*]] = arith.negf %[[VAL_15]] : f32
+-// CHECK:             %[[VAL_17:.*]] = fir.insert_value %[[VAL_12]], %[[VAL_16]], [1 : index] : (complex<f32>, f32) -> complex<f32>
+-// CHECK:             %[[VAL_18:.*]] = fir.mulc %[[VAL_17]], %[[VAL_14]] : complex<f32>
+-// CHECK:             %[[VAL_19:.*]] = fir.addc %[[VAL_11]], %[[VAL_18]] : complex<f32>
+-// CHECK:             fir.result %[[VAL_19]] : complex<f32>
+-// CHECK:           }
+-// CHECK:           return %[[VAL_9]] : complex<f32>
+-// CHECK:         }
+-
+-func.func @dot_product_real_complex(%arg0: !hlfir.expr<?xf32>, %arg1: !hlfir.expr<?xcomplex<f16>>) -> complex<f32> {
+-  %res = hlfir.dot_product %arg0 %arg1 : (!hlfir.expr<?xf32>, !hlfir.expr<?xcomplex<f16>>) -> complex<f32>
+-  return %res : complex<f32>
+-}
+-// CHECK-LABEL:   func.func @dot_product_real_complex(
+-// CHECK-SAME:                                        %[[VAL_0:.*]]: !hlfir.expr<?xf32>,
+-// CHECK-SAME:                                        %[[VAL_1:.*]]: !hlfir.expr<?xcomplex<f16>>) -> complex<f32> {
+-// CHECK:           %[[VAL_2:.*]] = arith.constant 1 : index
+-// CHECK:           %[[VAL_3:.*]] = arith.constant 0.000000e+00 : f32
+-// CHECK:           %[[VAL_4:.*]] = hlfir.shape_of %[[VAL_0]] : (!hlfir.expr<?xf32>) -> !fir.shape<1>
+-// CHECK:           %[[VAL_5:.*]] = hlfir.get_extent %[[VAL_4]] {dim = 0 : index} : (!fir.shape<1>) -> index
+-// CHECK:           %[[VAL_6:.*]] = fir.undefined complex<f32>
+-// CHECK:           %[[VAL_7:.*]] = fir.insert_value %[[VAL_6]], %[[VAL_3]], [0 : index] : (complex<f32>, f32) -> complex<f32>
+-// CHECK:           %[[VAL_8:.*]] = fir.insert_value %[[VAL_7]], %[[VAL_3]], [1 : index] : (complex<f32>, f32) -> complex<f32>
+-// CHECK:           %[[VAL_9:.*]] = fir.do_loop %[[VAL_10:.*]] = %[[VAL_2]] to %[[VAL_5]] step %[[VAL_2]] iter_args(%[[VAL_11:.*]] = %[[VAL_8]]) -> (complex<f32>) {
+-// CHECK:             %[[VAL_12:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_10]] : (!hlfir.expr<?xf32>, index) -> f32
+-// CHECK:             %[[VAL_13:.*]] = hlfir.apply %[[VAL_1]], %[[VAL_10]] : (!hlfir.expr<?xcomplex<f16>>, index) -> complex<f16>
+-// CHECK:             %[[VAL_14:.*]] = fir.undefined complex<f32>
+-// CHECK:             %[[VAL_15:.*]] = fir.insert_value %[[VAL_14]], %[[VAL_3]], [0 : index] : (complex<f32>, f32) -> complex<f32>
+-// CHECK:             %[[VAL_16:.*]] = fir.insert_value %[[VAL_15]], %[[VAL_3]], [1 : index] : (complex<f32>, f32) -> complex<f32>
+-// CHECK:             %[[VAL_17:.*]] = fir.insert_value %[[VAL_16]], %[[VAL_12]], [0 : index] : (complex<f32>, f32) -> complex<f32>
+-// CHECK:             %[[VAL_18:.*]] = fir.convert %[[VAL_13]] : (complex<f16>) -> complex<f32>
+-// CHECK:             %[[VAL_19:.*]] = fir.extract_value %[[VAL_17]], [1 : index] : (complex<f32>) -> f32
+-// CHECK:             %[[VAL_20:.*]] = arith.negf %[[VAL_19]] : f32
+-// CHECK:             %[[VAL_21:.*]] = fir.insert_value %[[VAL_17]], %[[VAL_20]], [1 : index] : (complex<f32>, f32) -> complex<f32>
+-// CHECK:             %[[VAL_22:.*]] = fir.mulc %[[VAL_21]], %[[VAL_18]] : complex<f32>
+-// CHECK:             %[[VAL_23:.*]] = fir.addc %[[VAL_11]], %[[VAL_22]] : complex<f32>
+-// CHECK:             fir.result %[[VAL_23]] : complex<f32>
+-// CHECK:           }
+-// CHECK:           return %[[VAL_9]] : complex<f32>
+-// CHECK:         }
+-
+-func.func @dot_product_logical(%arg0: !hlfir.expr<?x!fir.logical<1>>, %arg1: !hlfir.expr<?x!fir.logical<4>>) -> !fir.logical<4> {
+-  %res = hlfir.dot_product %arg0 %arg1 : (!hlfir.expr<?x!fir.logical<1>>, !hlfir.expr<?x!fir.logical<4>>) -> !fir.logical<4>
+-  return %res : !fir.logical<4>
+-}
+-// CHECK-LABEL:   func.func @dot_product_logical(
+-// CHECK-SAME:                                   %[[VAL_0:.*]]: !hlfir.expr<?x!fir.logical<1>>,
+-// CHECK-SAME:                                   %[[VAL_1:.*]]: !hlfir.expr<?x!fir.logical<4>>) -> !fir.logical<4> {
+-// CHECK:           %[[VAL_2:.*]] = arith.constant 1 : index
+-// CHECK:           %[[VAL_3:.*]] = arith.constant false
+-// CHECK:           %[[VAL_4:.*]] = hlfir.shape_of %[[VAL_0]] : (!hlfir.expr<?x!fir.logical<1>>) -> !fir.shape<1>
+-// CHECK:           %[[VAL_5:.*]] = hlfir.get_extent %[[VAL_4]] {dim = 0 : index} : (!fir.shape<1>) -> index
+-// CHECK:           %[[VAL_6:.*]] = fir.convert %[[VAL_3]] : (i1) -> !fir.logical<4>
+-// CHECK:           %[[VAL_7:.*]] = fir.do_loop %[[VAL_8:.*]] = %[[VAL_2]] to %[[VAL_5]] step %[[VAL_2]] unordered iter_args(%[[VAL_9:.*]] = %[[VAL_6]]) -> (!fir.logical<4>) {
+-// CHECK:             %[[VAL_10:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_8]] : (!hlfir.expr<?x!fir.logical<1>>, index) -> !fir.logical<1>
+-// CHECK:             %[[VAL_11:.*]] = hlfir.apply %[[VAL_1]], %[[VAL_8]] : (!hlfir.expr<?x!fir.logical<4>>, index) -> !fir.logical<4>
+-// CHECK:             %[[VAL_12:.*]] = fir.convert %[[VAL_9]] : (!fir.logical<4>) -> i1
+-// CHECK:             %[[VAL_13:.*]] = fir.convert %[[VAL_10]] : (!fir.logical<1>) -> i1
+-// CHECK:             %[[VAL_14:.*]] = fir.convert %[[VAL_11]] : (!fir.logical<4>) -> i1
+-// CHECK:             %[[VAL_15:.*]] = arith.andi %[[VAL_13]], %[[VAL_14]] : i1
+-// CHECK:             %[[VAL_16:.*]] = arith.ori %[[VAL_12]], %[[VAL_15]] : i1
+-// CHECK:             %[[VAL_17:.*]] = fir.convert %[[VAL_16]] : (i1) -> !fir.logical<4>
+-// CHECK:             fir.result %[[VAL_17]] : !fir.logical<4>
+-// CHECK:           }
+-// CHECK:           return %[[VAL_7]] : !fir.logical<4>
+-// CHECK:         }
+-
+-func.func @dot_product_known_dim(%arg0: !hlfir.expr<10xf32>, %arg1: !hlfir.expr<?xi16>) -> f32 {
+-  %res1 = hlfir.dot_product %arg0 %arg1 : (!hlfir.expr<10xf32>, !hlfir.expr<?xi16>) -> f32
+-  %res2 = hlfir.dot_product %arg1 %arg0 : (!hlfir.expr<?xi16>, !hlfir.expr<10xf32>) -> f32
+-  %res = arith.addf %res1, %res2 : f32
+-  return %res : f32
+-}
+-// CHECK-LABEL:   func.func @dot_product_known_dim(
+-// CHECK:           %[[VAL_2:.*]] = arith.constant 1 : index
+-// CHECK:           %[[VAL_4:.*]] = arith.constant 10 : index
+-// CHECK:           fir.do_loop %{{.*}} = %[[VAL_2]] to %[[VAL_4]] step %[[VAL_2]]
+-// CHECK:           fir.do_loop %{{.*}} = %[[VAL_2]] to %[[VAL_4]] step %[[VAL_2]]
+diff -ruN --strip-trailing-cr a/libcxx/include/__config b/libcxx/include/__config
+--- a/libcxx/include/__config
++++ b/libcxx/include/__config
+@@ -1166,9 +1166,7 @@
+ #    define _LIBCPP_NOESCAPE
+ #  endif
+ 
+-// FIXME: Expand this to [[__gnu__::__nodebug__]] again once the testcase reported in
+-// https://github.com/llvm/llvm-project/pull/118710 has been analyzed
+-#  define _LIBCPP_NODEBUG
++#  define _LIBCPP_NODEBUG [[__gnu__::__nodebug__]]
+ 
+ #  if __has_attribute(__standalone_debug__)
+ #    define _LIBCPP_STANDALONE_DEBUG __attribute__((__standalone_debug__))
+diff -ruN --strip-trailing-cr a/libcxx/test/tools/clang_tidy_checks/libcpp_module.cpp b/libcxx/test/tools/clang_tidy_checks/libcpp_module.cpp
+--- a/libcxx/test/tools/clang_tidy_checks/libcpp_module.cpp
++++ b/libcxx/test/tools/clang_tidy_checks/libcpp_module.cpp
+@@ -27,7 +27,7 @@
+     check_factories.registerCheck<libcpp::header_exportable_declarations>("libcpp-header-exportable-declarations");
+     check_factories.registerCheck<libcpp::hide_from_abi>("libcpp-hide-from-abi");
+     check_factories.registerCheck<libcpp::internal_ftm_use>("libcpp-internal-ftms");
+-    // check_factories.registerCheck<libcpp::nodebug_on_aliases>("libcpp-nodebug-on-aliases");
++    check_factories.registerCheck<libcpp::nodebug_on_aliases>("libcpp-nodebug-on-aliases");
+     check_factories.registerCheck<libcpp::proper_version_checks>("libcpp-cpp-version-check");
+     check_factories.registerCheck<libcpp::robust_against_adl_check>("libcpp-robust-against-adl");
+     check_factories.registerCheck<libcpp::uglify_attributes>("libcpp-uglify-attributes");
+diff -ruN --strip-trailing-cr a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
++++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+@@ -1140,8 +1140,6 @@
+ 
+   setTargetDAGCombine(ISD::SCALAR_TO_VECTOR);
+ 
+-  setTargetDAGCombine(ISD::SHL);
+-
+   // In case of strict alignment, avoid an excessive number of byte wide stores.
+   MaxStoresPerMemsetOptSize = 8;
+   MaxStoresPerMemset =
+@@ -26473,43 +26471,6 @@
+   return NVCAST;
  }
  
--TEST(ValueRangeTest, ValueConstructable) {
--  MLIRContext context;
--  Builder builder(&context);
+-/// If the operand is a bitwise AND with a constant RHS, and the shift has a
+-/// constant RHS and is the only use, we can pull it out of the shift, i.e.
+-///
+-///   (shl (and X, C1), C2) -> (and (shl X, C2), (shl C1, C2))
+-///
+-/// We prefer this canonical form to match existing isel patterns.
+-static SDValue performSHLCombine(SDNode *N,
+-                                 TargetLowering::DAGCombinerInfo &DCI,
+-                                 SelectionDAG &DAG) {
+-  if (DCI.isBeforeLegalizeOps())
+-    return SDValue();
 -
--  Operation *useOp =
--      createOp(&context, /*operands=*/std::nullopt, builder.getIntegerType(16));
--  // Valid construction despite a temporary 'OpResult'.
--  ValueRange operands = useOp->getResult(0);
+-  SDValue Op0 = N->getOperand(0);
+-  if (Op0.getOpcode() != ISD::AND || !Op0.hasOneUse())
+-    return SDValue();
 -
--  useOp->setOperands(operands);
--  EXPECT_EQ(useOp->getNumOperands(), 1u);
--  EXPECT_EQ(useOp->getOperand(0), useOp->getResult(0));
+-  SDValue C1 = Op0->getOperand(1);
+-  SDValue C2 = N->getOperand(1);
+-  if (!isa<ConstantSDNode>(C1) || !isa<ConstantSDNode>(C2))
+-    return SDValue();
 -
--  useOp->dropAllUses();
--  useOp->destroy();
+-  // Might be folded into shifted op, do not lower.
+-  if (N->hasOneUse()) {
+-    unsigned UseOpc = N->user_begin()->getOpcode();
+-    if (UseOpc == ISD::ADD || UseOpc == ISD::SUB || UseOpc == ISD::SETCC ||
+-        UseOpc == AArch64ISD::ADDS || UseOpc == AArch64ISD::SUBS)
+-      return SDValue();
+-  }
+-
+-  SDLoc DL(N);
+-  EVT VT = N->getValueType(0);
+-  SDValue X = Op0->getOperand(0);
+-  SDValue NewRHS = DAG.getNode(ISD::SHL, DL, VT, C1, C2);
+-  SDValue NewShift = DAG.getNode(ISD::SHL, DL, VT, X, C2);
+-  return DAG.getNode(ISD::AND, DL, VT, NewShift, NewRHS);
+-}
+-
+ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
+                                                  DAGCombinerInfo &DCI) const {
+   SelectionDAG &DAG = DCI.DAG;
+@@ -26855,8 +26816,6 @@
+     return performCTLZCombine(N, DAG, Subtarget);
+   case ISD::SCALAR_TO_VECTOR:
+     return performScalarToVectorCombine(N, DCI, DAG);
+-  case ISD::SHL:
+-    return performSHLCombine(N, DCI, DAG);
+   }
+   return SDValue();
+ }
+diff -ruN --strip-trailing-cr a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
++++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+@@ -4979,7 +4979,7 @@
+     // the subvector length.
+     const unsigned VecVF = getNumElements(Vec->getType());
+     SmallVector<int> Mask(VecVF, PoisonMaskElem);
+-    std::iota(Mask.begin(), std::next(Mask.begin(), Index), 0);
++    std::iota(Mask.begin(), Mask.end(), 0);
+     for (unsigned I : seq<unsigned>(SubVecVF))
+       Mask[I + Index] = I + VecVF;
+     if (Generator) {
+@@ -13956,11 +13956,12 @@
+     Instruction *InsElt;
+     if (auto *VecTy = dyn_cast<FixedVectorType>(Scalar->getType())) {
+       assert(SLPReVec && "FixedVectorType is not expected.");
+-      Vec = InsElt = cast<Instruction>(createInsertVector(
+-          Builder, Vec, Scalar, Pos * getNumElements(VecTy)));
+-      auto *II = dyn_cast<IntrinsicInst>(InsElt);
++      Vec =
++          createInsertVector(Builder, Vec, Scalar, Pos * getNumElements(VecTy));
++      auto *II = dyn_cast<IntrinsicInst>(Vec);
+       if (!II || II->getIntrinsicID() != Intrinsic::vector_insert)
+         return Vec;
++      InsElt = II;
+     } else {
+       Vec = Builder.CreateInsertElement(Vec, Scalar, Builder.getInt32(Pos));
+       InsElt = dyn_cast<InsertElementInst>(Vec);
+diff -ruN --strip-trailing-cr a/llvm/test/CodeGen/AArch64/const-shift-of-constmasked.ll b/llvm/test/CodeGen/AArch64/const-shift-of-constmasked.ll
+--- a/llvm/test/CodeGen/AArch64/const-shift-of-constmasked.ll
++++ b/llvm/test/CodeGen/AArch64/const-shift-of-constmasked.ll
+@@ -190,7 +190,8 @@
+ define i8 @test_i8_7_mask_shl_1(i8 %a0) {
+ ; CHECK-LABEL: test_i8_7_mask_shl_1:
+ ; CHECK:       // %bb.0:
+-; CHECK-NEXT:    ubfiz w0, w0, #1, #3
++; CHECK-NEXT:    and w8, w0, #0x7
++; CHECK-NEXT:    lsl w0, w8, #1
+ ; CHECK-NEXT:    ret
+   %t0 = and i8 %a0, 7
+   %t1 = shl i8 %t0, 1
+@@ -199,7 +200,8 @@
+ define i8 @test_i8_7_mask_shl_4(i8 %a0) {
+ ; CHECK-LABEL: test_i8_7_mask_shl_4:
+ ; CHECK:       // %bb.0:
+-; CHECK-NEXT:    ubfiz w0, w0, #4, #3
++; CHECK-NEXT:    and w8, w0, #0x7
++; CHECK-NEXT:    lsl w0, w8, #4
+ ; CHECK-NEXT:    ret
+   %t0 = and i8 %a0, 7
+   %t1 = shl i8 %t0, 4
+@@ -227,8 +229,8 @@
+ define i8 @test_i8_28_mask_shl_1(i8 %a0) {
+ ; CHECK-LABEL: test_i8_28_mask_shl_1:
+ ; CHECK:       // %bb.0:
+-; CHECK-NEXT:    lsl w8, w0, #1
+-; CHECK-NEXT:    and w0, w8, #0x38
++; CHECK-NEXT:    and w8, w0, #0x1c
++; CHECK-NEXT:    lsl w0, w8, #1
+ ; CHECK-NEXT:    ret
+   %t0 = and i8 %a0, 28
+   %t1 = shl i8 %t0, 1
+@@ -237,8 +239,8 @@
+ define i8 @test_i8_28_mask_shl_2(i8 %a0) {
+ ; CHECK-LABEL: test_i8_28_mask_shl_2:
+ ; CHECK:       // %bb.0:
+-; CHECK-NEXT:    lsl w8, w0, #2
+-; CHECK-NEXT:    and w0, w8, #0x70
++; CHECK-NEXT:    and w8, w0, #0x1c
++; CHECK-NEXT:    lsl w0, w8, #2
+ ; CHECK-NEXT:    ret
+   %t0 = and i8 %a0, 28
+   %t1 = shl i8 %t0, 2
+@@ -247,8 +249,8 @@
+ define i8 @test_i8_28_mask_shl_3(i8 %a0) {
+ ; CHECK-LABEL: test_i8_28_mask_shl_3:
+ ; CHECK:       // %bb.0:
+-; CHECK-NEXT:    lsl w8, w0, #3
+-; CHECK-NEXT:    and w0, w8, #0xe0
++; CHECK-NEXT:    and w8, w0, #0x1c
++; CHECK-NEXT:    lsl w0, w8, #3
+ ; CHECK-NEXT:    ret
+   %t0 = and i8 %a0, 28
+   %t1 = shl i8 %t0, 3
+@@ -257,8 +259,8 @@
+ define i8 @test_i8_28_mask_shl_4(i8 %a0) {
+ ; CHECK-LABEL: test_i8_28_mask_shl_4:
+ ; CHECK:       // %bb.0:
+-; CHECK-NEXT:    lsl w8, w0, #4
+-; CHECK-NEXT:    and w0, w8, #0xc0
++; CHECK-NEXT:    and w8, w0, #0xc
++; CHECK-NEXT:    lsl w0, w8, #4
+ ; CHECK-NEXT:    ret
+   %t0 = and i8 %a0, 28
+   %t1 = shl i8 %t0, 4
+@@ -268,8 +270,8 @@
+ define i8 @test_i8_224_mask_shl_1(i8 %a0) {
+ ; CHECK-LABEL: test_i8_224_mask_shl_1:
+ ; CHECK:       // %bb.0:
+-; CHECK-NEXT:    lsl w8, w0, #1
+-; CHECK-NEXT:    and w0, w8, #0xc0
++; CHECK-NEXT:    and w8, w0, #0x60
++; CHECK-NEXT:    lsl w0, w8, #1
+ ; CHECK-NEXT:    ret
+   %t0 = and i8 %a0, 224
+   %t1 = shl i8 %t0, 1
+@@ -463,7 +465,8 @@
+ define i16 @test_i16_127_mask_shl_1(i16 %a0) {
+ ; CHECK-LABEL: test_i16_127_mask_shl_1:
+ ; CHECK:       // %bb.0:
+-; CHECK-NEXT:    ubfiz w0, w0, #1, #7
++; CHECK-NEXT:    and w8, w0, #0x7f
++; CHECK-NEXT:    lsl w0, w8, #1
+ ; CHECK-NEXT:    ret
+   %t0 = and i16 %a0, 127
+   %t1 = shl i16 %t0, 1
+@@ -472,7 +475,8 @@
+ define i16 @test_i16_127_mask_shl_8(i16 %a0) {
+ ; CHECK-LABEL: test_i16_127_mask_shl_8:
+ ; CHECK:       // %bb.0:
+-; CHECK-NEXT:    ubfiz w0, w0, #8, #7
++; CHECK-NEXT:    and w8, w0, #0x7f
++; CHECK-NEXT:    lsl w0, w8, #8
+ ; CHECK-NEXT:    ret
+   %t0 = and i16 %a0, 127
+   %t1 = shl i16 %t0, 8
+@@ -500,8 +504,8 @@
+ define i16 @test_i16_2032_mask_shl_3(i16 %a0) {
+ ; CHECK-LABEL: test_i16_2032_mask_shl_3:
+ ; CHECK:       // %bb.0:
+-; CHECK-NEXT:    lsl w8, w0, #3
+-; CHECK-NEXT:    and w0, w8, #0x3f80
++; CHECK-NEXT:    and w8, w0, #0x7f0
++; CHECK-NEXT:    lsl w0, w8, #3
+ ; CHECK-NEXT:    ret
+   %t0 = and i16 %a0, 2032
+   %t1 = shl i16 %t0, 3
+@@ -510,8 +514,8 @@
+ define i16 @test_i16_2032_mask_shl_4(i16 %a0) {
+ ; CHECK-LABEL: test_i16_2032_mask_shl_4:
+ ; CHECK:       // %bb.0:
+-; CHECK-NEXT:    lsl w8, w0, #4
+-; CHECK-NEXT:    and w0, w8, #0x7f00
++; CHECK-NEXT:    and w8, w0, #0x7f0
++; CHECK-NEXT:    lsl w0, w8, #4
+ ; CHECK-NEXT:    ret
+   %t0 = and i16 %a0, 2032
+   %t1 = shl i16 %t0, 4
+@@ -520,8 +524,8 @@
+ define i16 @test_i16_2032_mask_shl_5(i16 %a0) {
+ ; CHECK-LABEL: test_i16_2032_mask_shl_5:
+ ; CHECK:       // %bb.0:
+-; CHECK-NEXT:    lsl w8, w0, #5
+-; CHECK-NEXT:    and w0, w8, #0xfe00
++; CHECK-NEXT:    and w8, w0, #0x7f0
++; CHECK-NEXT:    lsl w0, w8, #5
+ ; CHECK-NEXT:    ret
+   %t0 = and i16 %a0, 2032
+   %t1 = shl i16 %t0, 5
+@@ -530,8 +534,8 @@
+ define i16 @test_i16_2032_mask_shl_6(i16 %a0) {
+ ; CHECK-LABEL: test_i16_2032_mask_shl_6:
+ ; CHECK:       // %bb.0:
+-; CHECK-NEXT:    lsl w8, w0, #6
+-; CHECK-NEXT:    and w0, w8, #0xfc00
++; CHECK-NEXT:    and w8, w0, #0x3f0
++; CHECK-NEXT:    lsl w0, w8, #6
+ ; CHECK-NEXT:    ret
+   %t0 = and i16 %a0, 2032
+   %t1 = shl i16 %t0, 6
+@@ -541,8 +545,8 @@
+ define i16 @test_i16_65024_mask_shl_1(i16 %a0) {
+ ; CHECK-LABEL: test_i16_65024_mask_shl_1:
+ ; CHECK:       // %bb.0:
+-; CHECK-NEXT:    lsl w8, w0, #1
+-; CHECK-NEXT:    and w0, w8, #0xfc00
++; CHECK-NEXT:    and w8, w0, #0x7e00
++; CHECK-NEXT:    lsl w0, w8, #1
+ ; CHECK-NEXT:    ret
+   %t0 = and i16 %a0, 65024
+   %t1 = shl i16 %t0, 1
+@@ -736,7 +740,8 @@
+ define i32 @test_i32_32767_mask_shl_1(i32 %a0) {
+ ; CHECK-LABEL: test_i32_32767_mask_shl_1:
+ ; CHECK:       // %bb.0:
+-; CHECK-NEXT:    ubfiz w0, w0, #1, #15
++; CHECK-NEXT:    and w8, w0, #0x7fff
++; CHECK-NEXT:    lsl w0, w8, #1
+ ; CHECK-NEXT:    ret
+   %t0 = and i32 %a0, 32767
+   %t1 = shl i32 %t0, 1
+@@ -745,7 +750,8 @@
+ define i32 @test_i32_32767_mask_shl_16(i32 %a0) {
+ ; CHECK-LABEL: test_i32_32767_mask_shl_16:
+ ; CHECK:       // %bb.0:
+-; CHECK-NEXT:    ubfiz w0, w0, #16, #15
++; CHECK-NEXT:    and w8, w0, #0x7fff
++; CHECK-NEXT:    lsl w0, w8, #16
+ ; CHECK-NEXT:    ret
+   %t0 = and i32 %a0, 32767
+   %t1 = shl i32 %t0, 16
+@@ -773,8 +779,8 @@
+ define i32 @test_i32_8388352_mask_shl_7(i32 %a0) {
+ ; CHECK-LABEL: test_i32_8388352_mask_shl_7:
+ ; CHECK:       // %bb.0:
+-; CHECK-NEXT:    lsl w8, w0, #7
+-; CHECK-NEXT:    and w0, w8, #0x3fff8000
++; CHECK-NEXT:    and w8, w0, #0x7fff00
++; CHECK-NEXT:    lsl w0, w8, #7
+ ; CHECK-NEXT:    ret
+   %t0 = and i32 %a0, 8388352
+   %t1 = shl i32 %t0, 7
+@@ -783,8 +789,8 @@
+ define i32 @test_i32_8388352_mask_shl_8(i32 %a0) {
+ ; CHECK-LABEL: test_i32_8388352_mask_shl_8:
+ ; CHECK:       // %bb.0:
+-; CHECK-NEXT:    lsl w8, w0, #8
+-; CHECK-NEXT:    and w0, w8, #0x7fff0000
++; CHECK-NEXT:    and w8, w0, #0x7fff00
++; CHECK-NEXT:    lsl w0, w8, #8
+ ; CHECK-NEXT:    ret
+   %t0 = and i32 %a0, 8388352
+   %t1 = shl i32 %t0, 8
+@@ -793,8 +799,8 @@
+ define i32 @test_i32_8388352_mask_shl_9(i32 %a0) {
+ ; CHECK-LABEL: test_i32_8388352_mask_shl_9:
+ ; CHECK:       // %bb.0:
+-; CHECK-NEXT:    lsl w8, w0, #9
+-; CHECK-NEXT:    and w0, w8, #0xfffe0000
++; CHECK-NEXT:    and w8, w0, #0x7fff00
++; CHECK-NEXT:    lsl w0, w8, #9
+ ; CHECK-NEXT:    ret
+   %t0 = and i32 %a0, 8388352
+   %t1 = shl i32 %t0, 9
+@@ -803,8 +809,8 @@
+ define i32 @test_i32_8388352_mask_shl_10(i32 %a0) {
+ ; CHECK-LABEL: test_i32_8388352_mask_shl_10:
+ ; CHECK:       // %bb.0:
+-; CHECK-NEXT:    lsl w8, w0, #10
+-; CHECK-NEXT:    and w0, w8, #0xfffc0000
++; CHECK-NEXT:    and w8, w0, #0x3fff00
++; CHECK-NEXT:    lsl w0, w8, #10
+ ; CHECK-NEXT:    ret
+   %t0 = and i32 %a0, 8388352
+   %t1 = shl i32 %t0, 10
+@@ -814,8 +820,8 @@
+ define i32 @test_i32_4294836224_mask_shl_1(i32 %a0) {
+ ; CHECK-LABEL: test_i32_4294836224_mask_shl_1:
+ ; CHECK:       // %bb.0:
+-; CHECK-NEXT:    lsl w8, w0, #1
+-; CHECK-NEXT:    and w0, w8, #0xfffc0000
++; CHECK-NEXT:    and w8, w0, #0x7ffe0000
++; CHECK-NEXT:    lsl w0, w8, #1
+ ; CHECK-NEXT:    ret
+   %t0 = and i32 %a0, 4294836224
+   %t1 = shl i32 %t0, 1
+@@ -1009,7 +1015,8 @@
+ define i64 @test_i64_2147483647_mask_shl_1(i64 %a0) {
+ ; CHECK-LABEL: test_i64_2147483647_mask_shl_1:
+ ; CHECK:       // %bb.0:
+-; CHECK-NEXT:    lsl w0, w0, #1
++; CHECK-NEXT:    and x8, x0, #0x7fffffff
++; CHECK-NEXT:    lsl x0, x8, #1
+ ; CHECK-NEXT:    ret
+   %t0 = and i64 %a0, 2147483647
+   %t1 = shl i64 %t0, 1
+@@ -1047,8 +1054,8 @@
+ define i64 @test_i64_140737488289792_mask_shl_15(i64 %a0) {
+ ; CHECK-LABEL: test_i64_140737488289792_mask_shl_15:
+ ; CHECK:       // %bb.0:
+-; CHECK-NEXT:    lsl x8, x0, #15
+-; CHECK-NEXT:    and x0, x8, #0x3fffffff80000000
++; CHECK-NEXT:    and x8, x0, #0x7fffffff0000
++; CHECK-NEXT:    lsl x0, x8, #15
+ ; CHECK-NEXT:    ret
+   %t0 = and i64 %a0, 140737488289792
+   %t1 = shl i64 %t0, 15
+@@ -1057,8 +1064,8 @@
+ define i64 @test_i64_140737488289792_mask_shl_16(i64 %a0) {
+ ; CHECK-LABEL: test_i64_140737488289792_mask_shl_16:
+ ; CHECK:       // %bb.0:
+-; CHECK-NEXT:    lsl x8, x0, #16
+-; CHECK-NEXT:    and x0, x8, #0x7fffffff00000000
++; CHECK-NEXT:    and x8, x0, #0x7fffffff0000
++; CHECK-NEXT:    lsl x0, x8, #16
+ ; CHECK-NEXT:    ret
+   %t0 = and i64 %a0, 140737488289792
+   %t1 = shl i64 %t0, 16
+@@ -1067,8 +1074,8 @@
+ define i64 @test_i64_140737488289792_mask_shl_17(i64 %a0) {
+ ; CHECK-LABEL: test_i64_140737488289792_mask_shl_17:
+ ; CHECK:       // %bb.0:
+-; CHECK-NEXT:    lsl x8, x0, #17
+-; CHECK-NEXT:    and x0, x8, #0xfffffffe00000000
++; CHECK-NEXT:    and x8, x0, #0x7fffffff0000
++; CHECK-NEXT:    lsl x0, x8, #17
+ ; CHECK-NEXT:    ret
+   %t0 = and i64 %a0, 140737488289792
+   %t1 = shl i64 %t0, 17
+@@ -1077,8 +1084,8 @@
+ define i64 @test_i64_140737488289792_mask_shl_18(i64 %a0) {
+ ; CHECK-LABEL: test_i64_140737488289792_mask_shl_18:
+ ; CHECK:       // %bb.0:
+-; CHECK-NEXT:    lsl x8, x0, #18
+-; CHECK-NEXT:    and x0, x8, #0xfffffffc00000000
++; CHECK-NEXT:    and x8, x0, #0x3fffffff0000
++; CHECK-NEXT:    lsl x0, x8, #18
+ ; CHECK-NEXT:    ret
+   %t0 = and i64 %a0, 140737488289792
+   %t1 = shl i64 %t0, 18
+@@ -1088,8 +1095,8 @@
+ define i64 @test_i64_18446744065119617024_mask_shl_1(i64 %a0) {
+ ; CHECK-LABEL: test_i64_18446744065119617024_mask_shl_1:
+ ; CHECK:       // %bb.0:
+-; CHECK-NEXT:    lsl x8, x0, #1
+-; CHECK-NEXT:    and x0, x8, #0xfffffffc00000000
++; CHECK-NEXT:    and x8, x0, #0x7ffffffe00000000
++; CHECK-NEXT:    lsl x0, x8, #1
+ ; CHECK-NEXT:    ret
+   %t0 = and i64 %a0, 18446744065119617024
+   %t1 = shl i64 %t0, 1
+diff -ruN --strip-trailing-cr a/llvm/test/CodeGen/AArch64/extract-bits.ll b/llvm/test/CodeGen/AArch64/extract-bits.ll
+--- a/llvm/test/CodeGen/AArch64/extract-bits.ll
++++ b/llvm/test/CodeGen/AArch64/extract-bits.ll
+@@ -1013,8 +1013,8 @@
+ define i32 @c2_i32(i32 %arg) nounwind {
+ ; CHECK-LABEL: c2_i32:
+ ; CHECK:       // %bb.0:
+-; CHECK-NEXT:    lsr w8, w0, #17
+-; CHECK-NEXT:    and w0, w8, #0xffc
++; CHECK-NEXT:    ubfx w8, w0, #19, #10
++; CHECK-NEXT:    lsl w0, w8, #2
+ ; CHECK-NEXT:    ret
+   %tmp0 = lshr i32 %arg, 19
+   %tmp1 = and i32 %tmp0, 1023
+@@ -1063,8 +1063,8 @@
+ define i64 @c2_i64(i64 %arg) nounwind {
+ ; CHECK-LABEL: c2_i64:
+ ; CHECK:       // %bb.0:
+-; CHECK-NEXT:    lsr x8, x0, #49
+-; CHECK-NEXT:    and x0, x8, #0xffc
++; CHECK-NEXT:    ubfx x8, x0, #51, #10
++; CHECK-NEXT:    lsl x0, x8, #2
+ ; CHECK-NEXT:    ret
+   %tmp0 = lshr i64 %arg, 51
+   %tmp1 = and i64 %tmp0, 1023
+@@ -1120,8 +1120,8 @@
+ define void @c7_i32(i32 %arg, ptr %ptr) nounwind {
+ ; CHECK-LABEL: c7_i32:
+ ; CHECK:       // %bb.0:
+-; CHECK-NEXT:    lsr w8, w0, #17
+-; CHECK-NEXT:    and w8, w8, #0xffc
++; CHECK-NEXT:    ubfx w8, w0, #19, #10
++; CHECK-NEXT:    lsl w8, w8, #2
+ ; CHECK-NEXT:    str w8, [x1]
+ ; CHECK-NEXT:    ret
+   %tmp0 = lshr i32 %arg, 19
+@@ -1163,8 +1163,8 @@
+ define void @c7_i64(i64 %arg, ptr %ptr) nounwind {
+ ; CHECK-LABEL: c7_i64:
+ ; CHECK:       // %bb.0:
+-; CHECK-NEXT:    lsr x8, x0, #49
+-; CHECK-NEXT:    and x8, x8, #0xffc
++; CHECK-NEXT:    ubfx x8, x0, #51, #10
++; CHECK-NEXT:    lsl x8, x8, #2
+ ; CHECK-NEXT:    str x8, [x1]
+ ; CHECK-NEXT:    ret
+   %tmp0 = lshr i64 %arg, 51
+diff -ruN --strip-trailing-cr a/llvm/test/CodeGen/AArch64/fpenv.ll b/llvm/test/CodeGen/AArch64/fpenv.ll
+--- a/llvm/test/CodeGen/AArch64/fpenv.ll
++++ b/llvm/test/CodeGen/AArch64/fpenv.ll
+@@ -4,11 +4,11 @@
+ define void @func_set_rounding_dyn(i32 %rm) {
+ ; CHECK-LABEL: func_set_rounding_dyn:
+ ; CHECK:       // %bb.0:
+-; CHECK-NEXT:    lsl w9, w0, #22
++; CHECK-NEXT:    sub w9, w0, #1
+ ; CHECK-NEXT:    mrs x8, FPCR
++; CHECK-NEXT:    and w9, w9, #0x3
+ ; CHECK-NEXT:    and x8, x8, #0xffffffffff3fffff
+-; CHECK-NEXT:    sub w9, w9, #1024, lsl #12 // =4194304
+-; CHECK-NEXT:    and w9, w9, #0xc00000
++; CHECK-NEXT:    lsl w9, w9, #22
+ ; CHECK-NEXT:    orr x8, x8, x9
+ ; CHECK-NEXT:    msr FPCR, x8
+ ; CHECK-NEXT:    ret
+diff -ruN --strip-trailing-cr a/llvm/test/CodeGen/AArch64/xbfiz.ll b/llvm/test/CodeGen/AArch64/xbfiz.ll
+--- a/llvm/test/CodeGen/AArch64/xbfiz.ll
++++ b/llvm/test/CodeGen/AArch64/xbfiz.ll
+@@ -69,19 +69,3 @@
+   %and = and i64 %shl, 4294967295
+   ret i64 %and
+ }
+-
+-define i64 @lsl_zext_i8_i64(i8 %b) {
+-; CHECK-LABEL: lsl_zext_i8_i64:
+-; CHECK:    ubfiz x0, x0, #1, #8
+-  %1 = zext i8 %b to i64
+-  %2 = shl i64 %1, 1
+-  ret i64 %2
 -}
 -
- } // namespace
-diff -ruN --strip-trailing-cr a/utils/bazel/llvm-project-overlay/clang/BUILD.bazel b/utils/bazel/llvm-project-overlay/clang/BUILD.bazel
---- a/utils/bazel/llvm-project-overlay/clang/BUILD.bazel
-+++ b/utils/bazel/llvm-project-overlay/clang/BUILD.bazel
-@@ -43,10 +43,7 @@
- 
- gentbl(
-     name = "diagnostic_defs_gen",
--    tbl_outs = [(
--        "-gen-clang-diags-defs -clang-component=%s" % c,
--        "include/clang/Basic/Diagnostic%sKinds.inc" % c,
--    ) for c in [
-+    tbl_outs = [out for c in [
-         "AST",
-         "Analysis",
-         "Comment",
-@@ -60,6 +57,15 @@
-         "Refactoring",
-         "Sema",
-         "Serialization",
-+    ] for out in [
-+        (
-+            "-gen-clang-diags-defs -clang-component=%s" % c,
-+            "include/clang/Basic/Diagnostic%sKinds.inc" % c,
-+        ),
-+        (
-+            "-gen-clang-diags-enums -clang-component=%s" % c,
-+            "include/clang/Basic/Diagnostic%sEnums.inc" % c,
-+        ),
-     ]] + [
-         (
-             "-gen-clang-diag-groups",
+-define i64 @lsl_zext_i16_i64(i16 %b) {
+-; CHECK-LABEL: lsl_zext_i16_i64:
+-; CHECK:    ubfiz x0, x0, #1, #16
+-  %1 = zext i16 %b to i64
+-  %2 = shl i64 %1, 1
+-  ret i64 %2
+-}
+diff -ruN --strip-trailing-cr a/llvm/test/Transforms/SLPVectorizer/X86/insert-subvector.ll b/llvm/test/Transforms/SLPVectorizer/X86/insert-subvector.ll
+--- a/llvm/test/Transforms/SLPVectorizer/X86/insert-subvector.ll
++++ b/llvm/test/Transforms/SLPVectorizer/X86/insert-subvector.ll
+@@ -0,0 +1,81 @@
++; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
++; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
++
++define <16 x double> @test(ptr %x, double %v, double %a) {
++; CHECK-LABEL: define <16 x double> @test(
++; CHECK-SAME: ptr [[X:%.*]], double [[V:%.*]], double [[A:%.*]]) {
++; CHECK-NEXT:    [[GEP6:%.*]] = getelementptr inbounds double, ptr [[X]], i64 8
++; CHECK-NEXT:    [[GEP8:%.*]] = getelementptr inbounds double, ptr [[X]], i64 9
++; CHECK-NEXT:    [[TMP1:%.*]] = load <6 x double>, ptr [[X]], align 4
++; CHECK-NEXT:    [[TMP6:%.*]] = load <2 x double>, ptr [[GEP6]], align 4
++; CHECK-NEXT:    [[TMP7:%.*]] = load <2 x double>, ptr [[GEP8]], align 4
++; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <16 x double> poison, double [[A]], i32 0
++; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <16 x double> [[TMP4]], <16 x double> poison, <16 x i32> zeroinitializer
++; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x double> poison, double [[V]], i32 0
++; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <4 x double> [[TMP2]], <4 x double> poison, <4 x i32> zeroinitializer
++; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <2 x double> poison, double [[V]], i32 0
++; CHECK-NEXT:    [[TMP9:%.*]] = shufflevector <2 x double> [[TMP8]], <2 x double> poison, <2 x i32> zeroinitializer
++; CHECK-NEXT:    [[TMP10:%.*]] = call <16 x double> @llvm.vector.insert.v16f64.v6f64(<16 x double> poison, <6 x double> [[TMP1]], i64 0)
++; CHECK-NEXT:    [[TMP11:%.*]] = shufflevector <4 x double> [[TMP3]], <4 x double> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
++; CHECK-NEXT:    [[TMP12:%.*]] = shufflevector <16 x double> [[TMP10]], <16 x double> [[TMP11]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 16, i32 17, i32 18, i32 19, i32 14, i32 15>
++; CHECK-NEXT:    [[TMP13:%.*]] = call <16 x double> @llvm.vector.insert.v16f64.v2f64(<16 x double> [[TMP12]], <2 x double> [[TMP6]], i64 6)
++; CHECK-NEXT:    [[TMP14:%.*]] = call <16 x double> @llvm.vector.insert.v16f64.v2f64(<16 x double> [[TMP13]], <2 x double> [[TMP7]], i64 8)
++; CHECK-NEXT:    [[TMP15:%.*]] = call <16 x double> @llvm.vector.insert.v16f64.v2f64(<16 x double> [[TMP14]], <2 x double> [[TMP9]], i64 10)
++; CHECK-NEXT:    [[TMP16:%.*]] = call <16 x double> @llvm.vector.insert.v16f64.v2f64(<16 x double> [[TMP15]], <2 x double> [[TMP9]], i64 12)
++; CHECK-NEXT:    [[TMP17:%.*]] = call <16 x double> @llvm.vector.insert.v16f64.v2f64(<16 x double> [[TMP16]], <2 x double> [[TMP9]], i64 14)
++; CHECK-NEXT:    [[TMP18:%.*]] = fadd <16 x double> [[TMP5]], [[TMP17]]
++; CHECK-NEXT:    ret <16 x double> [[TMP18]]
++;
++  %gep1 = getelementptr inbounds double, ptr %x, i64 1
++  %gep2 = getelementptr inbounds double, ptr %x, i64 2
++  %gep3 = getelementptr inbounds double, ptr %x, i64 3
++  %gep4 = getelementptr inbounds double, ptr %x, i64 4
++  %gep5 = getelementptr inbounds double, ptr %x, i64 5
++  %gep6 = getelementptr inbounds double, ptr %x, i64 8
++  %gep7 = getelementptr inbounds double, ptr %x, i64 9
++  %gep8 = getelementptr inbounds double, ptr %x, i64 9
++  %gep9 = getelementptr inbounds double, ptr %x, i64 10
++  %x0 = load double, ptr %x, align 4
++  %x1 = load double, ptr %gep1, align 4
++  %x2 = load double, ptr %gep2, align 4
++  %x3 = load double, ptr %gep3, align 4
++  %x4 = load double, ptr %gep4, align 4
++  %x5 = load double, ptr %gep5, align 4
++  %x6 = load double, ptr %gep6, align 4
++  %x7 = load double, ptr %gep7, align 4
++  %x8 = load double, ptr %gep8, align 4
++  %x9 = load double, ptr %gep9, align 4
++  %add1 = fadd double %a, %x0
++  %add2 = fadd double %a, %x1
++  %add3 = fadd double %a, %x2
++  %add4 = fadd double %a, %x3
++  %add5 = fadd double %a, %x4
++  %add6 = fadd double %a, %x5
++  %add7 = fadd double %a, %x6
++  %add8 = fadd double %a, %x7
++  %add9 = fadd double %a, %x8
++  %add10 = fadd double %a, %x9
++  %add11 = fadd double %a, %v
++  %add12 = fadd double %a, %v
++  %add13 = fadd double %a, %v
++  %add14 = fadd double %a, %v
++  %add15 = fadd double %a, %v
++  %add16 = fadd double %a, %v
++  %i0 = insertelement <16 x double> poison, double %add1, i32 0
++  %i1 = insertelement <16 x double> %i0, double %add2, i32 1
++  %i2 = insertelement <16 x double> %i1, double %add3, i32 2
++  %i3 = insertelement <16 x double> %i2, double %add4, i32 3
++  %i4 = insertelement <16 x double> %i3, double %add5, i32 4
++  %i5 = insertelement <16 x double> %i4, double %add6, i32 5
++  %i6 = insertelement <16 x double> %i5, double %add7, i32 6
++  %i7 = insertelement <16 x double> %i6, double %add8, i32 7
++  %i8 = insertelement <16 x double> %i7, double %add9, i32 8
++  %i9 = insertelement <16 x double> %i8, double %add10, i32 9
++  %i10 = insertelement <16 x double> %i9, double %add11, i32 10
++  %i11 = insertelement <16 x double> %i10, double %add12, i32 11
++  %i12 = insertelement <16 x double> %i11, double %add13, i32 12
++  %i13 = insertelement <16 x double> %i12, double %add14, i32 13
++  %i14 = insertelement <16 x double> %i13, double %add15, i32 14
++  %i15 = insertelement <16 x double> %i14, double %add16, i32 15
++  ret <16 x double> %i15
++}