From b30dd02de1afe2a8ef1f82e45907efa18c50e014 Mon Sep 17 00:00:00 2001 From: Thomas Lively Date: Thu, 16 Jan 2025 17:42:34 -0800 Subject: [PATCH 1/2] Optimize struct RMW ops in OptimizeInstructions When the RMW operation can be proven not to change the accessed value, optimize it to a simple atomic get instead. This is valid because a write that does not change an in-memory value does not synchronize with any subsequent reads of that value, since those reads can be considered to be reading from the previous write. Also optimize RMW operations on unshared structs to their non-atomic equivalent operations. This can increase code size, but can also enable follow-on optimizations of the simpler operations and can be less expensive at runtime. --- scripts/test/fuzzing.py | 1 + src/passes/OptimizeInstructions.cpp | 185 +++ .../optimize-instructions-struct-rmw.wast | 1270 +++++++++++++++++ 3 files changed, 1456 insertions(+) create mode 100644 test/lit/passes/optimize-instructions-struct-rmw.wast diff --git a/scripts/test/fuzzing.py b/scripts/test/fuzzing.py index 51384c41b44..62b833a2653 100644 --- a/scripts/test/fuzzing.py +++ b/scripts/test/fuzzing.py @@ -83,6 +83,7 @@ 'gc-atomics-null-refs.wast', 'shared-structs.wast', 'heap2local-rmw.wast', + 'optimize-instructions-struct-rmw.wast', # contains too many segments to run in a wasm VM 'limit-segments_disable-bulk-memory.wast', # https://github.com/WebAssembly/binaryen/issues/7176 diff --git a/src/passes/OptimizeInstructions.cpp b/src/passes/OptimizeInstructions.cpp index 6a528d74fa9..5b62bdfaa73 100644 --- a/src/passes/OptimizeInstructions.cpp +++ b/src/passes/OptimizeInstructions.cpp @@ -1862,6 +1862,191 @@ struct OptimizeInstructions } } + void visitStructRMW(StructRMW* curr) { + skipNonNullCast(curr->ref, curr); + if (trapOnNull(curr, curr->ref)) { + return; + } + + if (!curr->ref->type.isStruct()) { + return; + } + + Builder builder(*getModule()); + + // Even when the access to shared memory, we can optimize out the modify and + // write parts if we know that the modified value is the same as the + // original value. This is valid because reads from writes that don't change + // the in-memory value can be considered to be reads from the previous write + // to the same location instead. That means there is no read that + // necessarily synchronizes with the write. + auto* value = + Properties::getFallthrough(curr->value, getPassOptions(), *getModule()); + if (Properties::isSingleConstantExpression(value)) { + auto val = Properties::getLiteral(value); + bool canOptimize = false; + switch (curr->op) { + case RMWAdd: + case RMWSub: + case RMWOr: + case RMWXor: + canOptimize = val.getInteger() == 0; + break; + case RMWAnd: + canOptimize = val == Literal::makeNegOne(val.type); + break; + case RMWXchg: + canOptimize = false; + break; + } + if (canOptimize) { + replaceCurrent(builder.makeStructGet( + curr->index, + getResultOfFirst(curr->ref, builder.makeDrop(curr->value)), + curr->order, + curr->type)); + return; + } + } + + if (curr->ref->type.getHeapType().isShared()) { + return; + } + + // Lower the RMW to its more basic operations. Breaking the atomic + // operation into several non-atomic operations is safe because no other + // thread can observe an intermediate state in the unshared memory. This + // initially increases code size, but the more basic operations may be + // more optimizable than the original RMW. + auto ref = builder.addVar(getFunction(), curr->ref->type); + auto val = builder.addVar(getFunction(), curr->type); + auto result = builder.addVar(getFunction(), curr->type); + auto* block = builder.makeBlock( + {builder.makeLocalSet(ref, curr->ref), + builder.makeLocalSet(val, curr->value), + builder.makeLocalSet( + result, + builder.makeStructGet(curr->index, + builder.makeLocalGet(ref, curr->ref->type), + MemoryOrder::Unordered, + curr->type))}); + Expression* newVal = nullptr; + if (curr->op == RMWXchg) { + newVal = builder.makeLocalGet(val, curr->type); + } else { + Abstract::Op binop = Abstract::Add; + switch (curr->op) { + case RMWAdd: + binop = Abstract::Add; + break; + case RMWSub: + binop = Abstract::Sub; + break; + case RMWAnd: + binop = Abstract::And; + break; + case RMWOr: + binop = Abstract::Or; + break; + case RMWXor: + binop = Abstract::Xor; + break; + case RMWXchg: + WASM_UNREACHABLE("unexpected op"); + } + newVal = builder.makeBinary(Abstract::getBinary(curr->type, binop), + builder.makeLocalGet(result, curr->type), + builder.makeLocalGet(val, curr->type)); + } + block->list.push_back( + builder.makeStructSet(curr->index, + builder.makeLocalGet(ref, curr->ref->type), + newVal, + MemoryOrder::Unordered)); + + // We must maintain this operation's effect on the global order of seqcst + // operations. + if (curr->order == MemoryOrder::SeqCst) { + block->list.push_back(builder.makeAtomicFence()); + } + + block->list.push_back(builder.makeLocalGet(result, curr->type)); + block->type = curr->type; + replaceCurrent(block); + } + + void visitStructCmpxchg(StructCmpxchg* curr) { + skipNonNullCast(curr->ref, curr); + if (trapOnNull(curr, curr->ref)) { + return; + } + + if (!curr->ref->type.isStruct()) { + return; + } + + Builder builder(*getModule()); + + // Just like other RMW operations, cmpxchg can be optimized to just a read + // if it is known not to change the in-memory value. This is the case when + // `expected` and `replacement` are known to be the same. + if (areConsecutiveInputsEqual(curr->expected, curr->replacement)) { + auto* ref = getResultOfFirst( + curr->ref, + builder.makeSequence(builder.makeDrop(curr->expected), + builder.makeDrop(curr->replacement))); + replaceCurrent( + builder.makeStructGet(curr->index, ref, curr->order, curr->type)); + return; + } + + if (curr->ref->type.getHeapType().isShared()) { + return; + } + + // Just like other RMW operations, lower to basic operations when operating + // on unshared memory. + auto ref = builder.addVar(getFunction(), curr->ref->type); + auto expected = builder.addVar(getFunction(), curr->type); + auto replacement = builder.addVar(getFunction(), curr->type); + auto result = builder.addVar(getFunction(), curr->type); + auto* block = + builder.makeBlock({builder.makeLocalSet(ref, curr->ref), + builder.makeLocalSet(expected, curr->expected), + builder.makeLocalSet(replacement, curr->replacement)}); + auto* lhs = builder.makeLocalTee( + result, + builder.makeStructGet(curr->index, + builder.makeLocalGet(ref, curr->ref->type), + MemoryOrder::Unordered, + curr->type), + curr->type); + auto* rhs = builder.makeLocalGet(expected, curr->type); + Expression* pred = nullptr; + if (curr->type.isRef()) { + pred = builder.makeRefEq(lhs, rhs); + } else { + pred = builder.makeBinary( + Abstract::getBinary(curr->type, Abstract::Eq), lhs, rhs); + } + block->list.push_back(builder.makeIf( + pred, + builder.makeStructSet(curr->index, + builder.makeLocalGet(ref, curr->ref->type), + builder.makeLocalGet(replacement, curr->type), + MemoryOrder::Unordered))); + + // We must maintain this operation's effect on the global order of seqcst + // operations. + if (curr->order == MemoryOrder::SeqCst) { + block->list.push_back(builder.makeAtomicFence()); + } + + block->list.push_back(builder.makeLocalGet(result, curr->type)); + block->type = curr->type; + replaceCurrent(block); + } + void visitArrayNew(ArrayNew* curr) { // If a value is provided, we can optimize in some cases. if (curr->type == Type::unreachable || curr->isWithDefault()) { diff --git a/test/lit/passes/optimize-instructions-struct-rmw.wast b/test/lit/passes/optimize-instructions-struct-rmw.wast new file mode 100644 index 00000000000..1a9bcec6c41 --- /dev/null +++ b/test/lit/passes/optimize-instructions-struct-rmw.wast @@ -0,0 +1,1270 @@ +;; NOTE: Assertions have been generated by update_lit_checks.py and should not be edited. + +;; RUN: wasm-opt %s -all --optimize-instructions --preserve-type-order -S -o - | filecheck %s + +(module + ;; CHECK: (type $i32 (shared (struct (field (mut i32))))) + (type $i32 (shared (struct (field (mut i32))))) + ;; CHECK: (type $i64 (shared (struct (field (mut i64))))) + (type $i64 (shared (struct (field (mut i64))))) + ;; CHECK: (type $struct (shared (struct (field (mut (ref null $struct)))))) + (type $struct (shared (struct (field (mut (ref null $struct)))))) + ;; CHECK: (type $unshared-i32 (struct (field (mut i32)))) + (type $unshared-i32 (struct (field (mut i32)))) + ;; CHECK: (type $unshared-i64 (struct (field (mut i64)))) + (type $unshared-i64 (struct (field (mut i64)))) + ;; CHECK: (type $unshared-struct (struct (field (mut (ref null $unshared-struct))))) + (type $unshared-struct (struct (field (mut (ref null $unshared-struct))))) + + ;; CHECK: (func $rmw-skip-non-null-cast (type $6) (param $0 (ref null $i32)) (param $1 i32) (result i32) + ;; CHECK-NEXT: (struct.atomic.rmw.add $i32 0 + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: (local.get $1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $rmw-skip-non-null-cast (param (ref null $i32) i32) (result i32) + (struct.atomic.rmw.add $i32 0 + (ref.as_non_null + (local.get 0) + ) + (local.get 1) + ) + ) + + ;; CHECK: (func $cmpxchg-skip-non-null-cast (type $7) (param $0 (ref null $i32)) (param $1 i32) (param $2 i32) (result i32) + ;; CHECK-NEXT: (struct.atomic.rmw.cmpxchg $i32 0 + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: (local.get $1) + ;; CHECK-NEXT: (local.get $2) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $cmpxchg-skip-non-null-cast (param (ref null $i32) i32 i32) (result i32) + (struct.atomic.rmw.cmpxchg $i32 0 + (ref.as_non_null + (local.get 0) + ) + (local.get 1) + (local.get 2) + ) + ) + + ;; CHECK: (func $rmw-trap-on-null (type $8) (result i32) + ;; CHECK-NEXT: (unreachable) + ;; CHECK-NEXT: ) + (func $rmw-trap-on-null (result i32) + (struct.atomic.rmw.add $i32 0 + (ref.null (shared none)) + (i32.const 1) + ) + ) + + ;; CHECK: (func $cmpxchg-trap-on-null (type $8) (result i32) + ;; CHECK-NEXT: (unreachable) + ;; CHECK-NEXT: ) + (func $cmpxchg-trap-on-null (result i32) + (struct.atomic.rmw.cmpxchg $i32 0 + (ref.null (shared none)) + (i32.const 1) + (i32.const 2) + ) + ) + + ;; CHECK: (func $rmw-add-i32-ident (type $9) (param $0 (ref null $i32)) (result i32) + ;; CHECK-NEXT: (struct.atomic.get $i32 0 + ;; CHECK-NEXT: (block (result (ref null $i32)) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $rmw-add-i32-ident (param (ref null $i32)) (result i32) + ;; This can be optimized to just an atomic load. + (struct.atomic.rmw.add $i32 0 + (local.get 0) + (i32.const 0) + ) + ) + + ;; CHECK: (func $rmw-add-i32-noident (type $9) (param $0 (ref null $i32)) (result i32) + ;; CHECK-NEXT: (struct.atomic.rmw.add $i32 0 + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $rmw-add-i32-noident (param (ref null $i32)) (result i32) + ;; But this cannot be optimized at all. + (struct.atomic.rmw.add $i32 0 + (local.get 0) + (i32.const 1) + ) + ) + + ;; CHECK: (func $rmw-sub-i32-ident (type $9) (param $0 (ref null $i32)) (result i32) + ;; CHECK-NEXT: (struct.atomic.get $i32 0 + ;; CHECK-NEXT: (block (result (ref null $i32)) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $rmw-sub-i32-ident (param (ref null $i32)) (result i32) + (struct.atomic.rmw.sub $i32 0 + (local.get 0) + (i32.const 0) + ) + ) + + ;; CHECK: (func $rmw-sub-i32-noident (type $9) (param $0 (ref null $i32)) (result i32) + ;; CHECK-NEXT: (struct.atomic.rmw.sub $i32 0 + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $rmw-sub-i32-noident (param (ref null $i32)) (result i32) + (struct.atomic.rmw.sub $i32 0 + (local.get 0) + (i32.const 1) + ) + ) + + ;; CHECK: (func $rmw-and-i32-ident (type $9) (param $0 (ref null $i32)) (result i32) + ;; CHECK-NEXT: (struct.atomic.get $i32 0 + ;; CHECK-NEXT: (block (result (ref null $i32)) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const -1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $rmw-and-i32-ident (param (ref null $i32)) (result i32) + (struct.atomic.rmw.and $i32 0 + (local.get 0) + (i32.const -1) + ) + ) + + ;; CHECK: (func $rmw-and-i32-noident (type $9) (param $0 (ref null $i32)) (result i32) + ;; CHECK-NEXT: (struct.atomic.rmw.and $i32 0 + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $rmw-and-i32-noident (param (ref null $i32)) (result i32) + (struct.atomic.rmw.and $i32 0 + (local.get 0) + (i32.const 0) + ) + ) + + ;; CHECK: (func $rmw-or-i32-ident (type $9) (param $0 (ref null $i32)) (result i32) + ;; CHECK-NEXT: (struct.atomic.get $i32 0 + ;; CHECK-NEXT: (block (result (ref null $i32)) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $rmw-or-i32-ident (param (ref null $i32)) (result i32) + (struct.atomic.rmw.or $i32 0 + (local.get 0) + (i32.const 0) + ) + ) + + ;; CHECK: (func $rmw-or-i32-noident (type $9) (param $0 (ref null $i32)) (result i32) + ;; CHECK-NEXT: (struct.atomic.rmw.or $i32 0 + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: (i32.const -1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $rmw-or-i32-noident (param (ref null $i32)) (result i32) + (struct.atomic.rmw.or $i32 0 + (local.get 0) + (i32.const -1) + ) + ) + + ;; CHECK: (func $rmw-xor-i32-ident (type $9) (param $0 (ref null $i32)) (result i32) + ;; CHECK-NEXT: (struct.atomic.get $i32 0 + ;; CHECK-NEXT: (block (result (ref null $i32)) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $rmw-xor-i32-ident (param (ref null $i32)) (result i32) + (struct.atomic.rmw.xor $i32 0 + (local.get 0) + (i32.const 0) + ) + ) + + ;; CHECK: (func $rmw-xor-i32-noident (type $9) (param $0 (ref null $i32)) (result i32) + ;; CHECK-NEXT: (struct.atomic.rmw.xor $i32 0 + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: (i32.const -1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $rmw-xor-i32-noident (param (ref null $i32)) (result i32) + (struct.atomic.rmw.xor $i32 0 + (local.get 0) + (i32.const -1) + ) + ) + + ;; CHECK: (func $rmw-xchg-i32-ident (type $9) (param $0 (ref null $i32)) (result i32) + ;; CHECK-NEXT: (struct.atomic.rmw.xchg $i32 0 + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: (struct.get $i32 0 + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $rmw-xchg-i32-ident (param (ref null $i32)) (result i32) + ;; TODO: Optimize this case. + (struct.atomic.rmw.xchg $i32 0 + (local.get 0) + (struct.get $i32 0 + (local.get 0) + ) + ) + ) + + ;; CHECK: (func $rmw-xchg-i32-noident (type $9) (param $0 (ref null $i32)) (result i32) + ;; CHECK-NEXT: (struct.atomic.rmw.xchg $i32 0 + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $rmw-xchg-i32-noident (param (ref null $i32)) (result i32) + (struct.atomic.rmw.xchg $i32 0 + (local.get 0) + (i32.const 0) + ) + ) + + ;; CHECK: (func $cmpxchg-i32-ident (type $6) (param $0 (ref null $i32)) (param $1 i32) (result i32) + ;; CHECK-NEXT: (struct.atomic.get $i32 0 + ;; CHECK-NEXT: (block (result (ref null $i32)) + ;; CHECK-NEXT: (block + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (local.get $1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (local.get $1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $cmpxchg-i32-ident (param (ref null $i32) i32) (result i32) + (struct.atomic.rmw.cmpxchg $i32 0 + (local.get 0) + (local.get 1) + (local.get 1) + ) + ) + + ;; CHECK: (func $cmpxchg-i32-noident (type $6) (param $0 (ref null $i32)) (param $1 i32) (result i32) + ;; CHECK-NEXT: (struct.atomic.rmw.cmpxchg $i32 0 + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $cmpxchg-i32-noident (param (ref null $i32) i32) (result i32) + (struct.atomic.rmw.cmpxchg $i32 0 + (local.get 0) + (i32.const 0) + (i32.const 1) + ) + ) + + ;; CHECK: (func $rmw-add-i64-ident (type $10) (param $0 (ref null $i64)) (result i64) + ;; CHECK-NEXT: (struct.atomic.get $i64 0 + ;; CHECK-NEXT: (block (result (ref null $i64)) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i64.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $rmw-add-i64-ident (param (ref null $i64)) (result i64) + ;; This can be optimized to just an atomic load. + (struct.atomic.rmw.add $i64 0 + (local.get 0) + (i64.const 0) + ) + ) + + ;; CHECK: (func $rmw-add-i64-noident (type $10) (param $0 (ref null $i64)) (result i64) + ;; CHECK-NEXT: (struct.atomic.rmw.add $i64 0 + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: (i64.const 1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $rmw-add-i64-noident (param (ref null $i64)) (result i64) + ;; But this cannot be optimized at all. + (struct.atomic.rmw.add $i64 0 + (local.get 0) + (i64.const 1) + ) + ) + + ;; CHECK: (func $rmw-sub-i64-ident (type $10) (param $0 (ref null $i64)) (result i64) + ;; CHECK-NEXT: (struct.atomic.get $i64 0 + ;; CHECK-NEXT: (block (result (ref null $i64)) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i64.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $rmw-sub-i64-ident (param (ref null $i64)) (result i64) + (struct.atomic.rmw.sub $i64 0 + (local.get 0) + (i64.const 0) + ) + ) + + ;; CHECK: (func $rmw-sub-i64-noident (type $10) (param $0 (ref null $i64)) (result i64) + ;; CHECK-NEXT: (struct.atomic.rmw.sub $i64 0 + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: (i64.const 1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $rmw-sub-i64-noident (param (ref null $i64)) (result i64) + (struct.atomic.rmw.sub $i64 0 + (local.get 0) + (i64.const 1) + ) + ) + + ;; CHECK: (func $rmw-and-i64-ident (type $10) (param $0 (ref null $i64)) (result i64) + ;; CHECK-NEXT: (struct.atomic.get $i64 0 + ;; CHECK-NEXT: (block (result (ref null $i64)) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i64.const -1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $rmw-and-i64-ident (param (ref null $i64)) (result i64) + (struct.atomic.rmw.and $i64 0 + (local.get 0) + (i64.const -1) + ) + ) + + ;; CHECK: (func $rmw-and-i64-noident (type $10) (param $0 (ref null $i64)) (result i64) + ;; CHECK-NEXT: (struct.atomic.rmw.and $i64 0 + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: (i64.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $rmw-and-i64-noident (param (ref null $i64)) (result i64) + (struct.atomic.rmw.and $i64 0 + (local.get 0) + (i64.const 0) + ) + ) + + ;; CHECK: (func $rmw-or-i64-ident (type $10) (param $0 (ref null $i64)) (result i64) + ;; CHECK-NEXT: (struct.atomic.get $i64 0 + ;; CHECK-NEXT: (block (result (ref null $i64)) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i64.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $rmw-or-i64-ident (param (ref null $i64)) (result i64) + (struct.atomic.rmw.or $i64 0 + (local.get 0) + (i64.const 0) + ) + ) + + ;; CHECK: (func $rmw-or-i64-noident (type $10) (param $0 (ref null $i64)) (result i64) + ;; CHECK-NEXT: (struct.atomic.rmw.or $i64 0 + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: (i64.const -1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $rmw-or-i64-noident (param (ref null $i64)) (result i64) + (struct.atomic.rmw.or $i64 0 + (local.get 0) + (i64.const -1) + ) + ) + + ;; CHECK: (func $rmw-xor-i64-ident (type $10) (param $0 (ref null $i64)) (result i64) + ;; CHECK-NEXT: (struct.atomic.get $i64 0 + ;; CHECK-NEXT: (block (result (ref null $i64)) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i64.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $rmw-xor-i64-ident (param (ref null $i64)) (result i64) + (struct.atomic.rmw.xor $i64 0 + (local.get 0) + (i64.const 0) + ) + ) + + ;; CHECK: (func $rmw-xor-i64-noident (type $10) (param $0 (ref null $i64)) (result i64) + ;; CHECK-NEXT: (struct.atomic.rmw.xor $i64 0 + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: (i64.const -1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $rmw-xor-i64-noident (param (ref null $i64)) (result i64) + (struct.atomic.rmw.xor $i64 0 + (local.get 0) + (i64.const -1) + ) + ) + + ;; CHECK: (func $rmw-xchg-i64-ident (type $10) (param $0 (ref null $i64)) (result i64) + ;; CHECK-NEXT: (struct.atomic.rmw.xchg $i64 0 + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: (struct.get $i64 0 + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $rmw-xchg-i64-ident (param (ref null $i64)) (result i64) + ;; TODO: Optimize this case. + (struct.atomic.rmw.xchg $i64 0 + (local.get 0) + (struct.get $i64 0 + (local.get 0) + ) + ) + ) + + ;; CHECK: (func $rmw-xchg-i64-noident (type $10) (param $0 (ref null $i64)) (result i64) + ;; CHECK-NEXT: (struct.atomic.rmw.xchg $i64 0 + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: (i64.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $rmw-xchg-i64-noident (param (ref null $i64)) (result i64) + (struct.atomic.rmw.xchg $i64 0 + (local.get 0) + (i64.const 0) + ) + ) + + ;; CHECK: (func $cmpxchg-i64-ident (type $11) (param $0 (ref null $i64)) (param $1 i64) (result i64) + ;; CHECK-NEXT: (struct.atomic.get $i64 0 + ;; CHECK-NEXT: (block (result (ref null $i64)) + ;; CHECK-NEXT: (block + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (local.get $1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (local.get $1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $cmpxchg-i64-ident (param (ref null $i64) i64) (result i64) + (struct.atomic.rmw.cmpxchg $i64 0 + (local.get 0) + (local.get 1) + (local.get 1) + ) + ) + + ;; CHECK: (func $cmpxchg-i64-noident (type $11) (param $0 (ref null $i64)) (param $1 i64) (result i64) + ;; CHECK-NEXT: (struct.atomic.rmw.cmpxchg $i64 0 + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: (i64.const 0) + ;; CHECK-NEXT: (i64.const 1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $cmpxchg-i64-noident (param (ref null $i64) i64) (result i64) + (struct.atomic.rmw.cmpxchg $i64 0 + (local.get 0) + (i64.const 0) + (i64.const 1) + ) + ) + + ;; CHECK: (func $rmw-xchg-ref-ident (type $12) (param $0 (ref null $struct)) (result (ref null $struct)) + ;; CHECK-NEXT: (struct.atomic.rmw.xchg $struct 0 + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: (struct.get $struct 0 + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $rmw-xchg-ref-ident (param (ref null $struct)) (result (ref null $struct)) + ;; TODO: Optimize this case. + (struct.atomic.rmw.xchg $struct 0 + (local.get 0) + (struct.get $struct 0 + (local.get 0) + ) + ) + ) + + ;; CHECK: (func $rmw-xchg-ref-noident (type $12) (param $0 (ref null $struct)) (result (ref null $struct)) + ;; CHECK-NEXT: (struct.atomic.rmw.xchg $struct 0 + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $rmw-xchg-ref-noident (param (ref null $struct)) (result (ref null $struct)) + (struct.atomic.rmw.xchg $struct 0 + (local.get 0) + (local.get 0) + ) + ) + + ;; CHECK: (func $cmpxchg-ref-ident (type $12) (param $0 (ref null $struct)) (result (ref null $struct)) + ;; CHECK-NEXT: (struct.atomic.get $struct 0 + ;; CHECK-NEXT: (block (result (ref null $struct)) + ;; CHECK-NEXT: (block + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $cmpxchg-ref-ident (param (ref null $struct)) (result (ref null $struct)) + (struct.atomic.rmw.cmpxchg $struct 0 + (local.get 0) + (local.get 0) + (local.get 0) + ) + ) + + ;; CHECK: (func $cmpxchg-ref-ident-null (type $12) (param $0 (ref null $struct)) (result (ref null $struct)) + ;; CHECK-NEXT: (struct.atomic.get $struct 0 + ;; CHECK-NEXT: (block (result (ref null $struct)) + ;; CHECK-NEXT: (block + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.null (shared none)) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.null (shared none)) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $cmpxchg-ref-ident-null (param (ref null $struct)) (result (ref null $struct)) + (struct.atomic.rmw.cmpxchg $struct 0 + (local.get 0) + (ref.null (shared none)) + (ref.null (shared none)) + ) + ) + + ;; CHECK: (func $cmpxchg-ref-noident (type $12) (param $0 (ref null $struct)) (result (ref null $struct)) + ;; CHECK-NEXT: (struct.atomic.rmw.cmpxchg $struct 0 + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: (ref.null (shared none)) + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $cmpxchg-ref-noident (param (ref null $struct)) (result (ref null $struct)) + (struct.atomic.rmw.cmpxchg $struct 0 + (local.get 0) + (ref.null (shared none)) + (local.get 0) + ) + ) + + ;; CHECK: (func $rmw-add-i32-acqrel-ident (type $9) (param $0 (ref null $i32)) (result i32) + ;; CHECK-NEXT: (struct.atomic.get acqrel $i32 0 + ;; CHECK-NEXT: (block (result (ref null $i32)) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $rmw-add-i32-acqrel-ident (param (ref null $i32)) (result i32) + ;; Check that acqrel rmws are optimized to acquire gets. + (struct.atomic.rmw.add acqrel acqrel $i32 0 + (local.get 0) + (i32.const 0) + ) + ) + + ;; CHECK: (func $rmw-add-i32-unshared-ident (type $13) (param $0 (ref null $unshared-i32)) (result i32) + ;; CHECK-NEXT: (struct.atomic.get $unshared-i32 0 + ;; CHECK-NEXT: (block (result (ref null $unshared-i32)) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $rmw-add-i32-unshared-ident (param (ref null $unshared-i32)) (result i32) + ;; Check just one unshared case to make sure we do the same identity + ;; optimizations tested above. + (struct.atomic.rmw.add $unshared-i32 0 + (local.get 0) + (i32.const 0) + ) + ) + + + ;; CHECK: (func $cmpxchg-i32-unshared-ident (type $13) (param $0 (ref null $unshared-i32)) (result i32) + ;; CHECK-NEXT: (struct.atomic.get $unshared-i32 0 + ;; CHECK-NEXT: (block (result (ref null $unshared-i32)) + ;; CHECK-NEXT: (block + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $cmpxchg-i32-unshared-ident (param (ref null $unshared-i32)) (result i32) + ;; Check the same for cmpxchg. + (struct.atomic.rmw.cmpxchg $unshared-i32 0 + (local.get 0) + (i32.const 0) + (i32.const 0) + ) + ) + + ;; CHECK: (func $rmw-add-i32-lower (type $13) (param $0 (ref null $unshared-i32)) (result i32) + ;; CHECK-NEXT: (local $1 (ref null $unshared-i32)) + ;; CHECK-NEXT: (local $2 i32) + ;; CHECK-NEXT: (local $3 i32) + ;; CHECK-NEXT: (local.set $1 + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $2 + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $3 + ;; CHECK-NEXT: (struct.get $unshared-i32 0 + ;; CHECK-NEXT: (local.get $1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (struct.set $unshared-i32 0 + ;; CHECK-NEXT: (local.get $1) + ;; CHECK-NEXT: (i32.add + ;; CHECK-NEXT: (local.get $3) + ;; CHECK-NEXT: (local.get $2) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (atomic.fence) + ;; CHECK-NEXT: (local.get $3) + ;; CHECK-NEXT: ) + (func $rmw-add-i32-lower (param (ref null $unshared-i32)) (result i32) + (struct.atomic.rmw.add $unshared-i32 0 + (local.get 0) + (i32.const 1) + ) + ) + + ;; CHECK: (func $rmw-sub-i32-lower (type $13) (param $0 (ref null $unshared-i32)) (result i32) + ;; CHECK-NEXT: (local $1 (ref null $unshared-i32)) + ;; CHECK-NEXT: (local $2 i32) + ;; CHECK-NEXT: (local $3 i32) + ;; CHECK-NEXT: (local.set $1 + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $2 + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $3 + ;; CHECK-NEXT: (struct.get $unshared-i32 0 + ;; CHECK-NEXT: (local.get $1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (struct.set $unshared-i32 0 + ;; CHECK-NEXT: (local.get $1) + ;; CHECK-NEXT: (i32.sub + ;; CHECK-NEXT: (local.get $3) + ;; CHECK-NEXT: (local.get $2) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (atomic.fence) + ;; CHECK-NEXT: (local.get $3) + ;; CHECK-NEXT: ) + (func $rmw-sub-i32-lower (param (ref null $unshared-i32)) (result i32) + (struct.atomic.rmw.sub $unshared-i32 0 + (local.get 0) + (i32.const 1) + ) + ) + + ;; CHECK: (func $rmw-and-i32-lower (type $13) (param $0 (ref null $unshared-i32)) (result i32) + ;; CHECK-NEXT: (local $1 (ref null $unshared-i32)) + ;; CHECK-NEXT: (local $2 i32) + ;; CHECK-NEXT: (local $3 i32) + ;; CHECK-NEXT: (local.set $1 + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $2 + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $3 + ;; CHECK-NEXT: (struct.get $unshared-i32 0 + ;; CHECK-NEXT: (local.get $1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (struct.set $unshared-i32 0 + ;; CHECK-NEXT: (local.get $1) + ;; CHECK-NEXT: (i32.and + ;; CHECK-NEXT: (local.get $3) + ;; CHECK-NEXT: (local.get $2) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (atomic.fence) + ;; CHECK-NEXT: (local.get $3) + ;; CHECK-NEXT: ) + (func $rmw-and-i32-lower (param (ref null $unshared-i32)) (result i32) + (struct.atomic.rmw.and $unshared-i32 0 + (local.get 0) + (i32.const 1) + ) + ) + + ;; CHECK: (func $rmw-or-i32-lower (type $13) (param $0 (ref null $unshared-i32)) (result i32) + ;; CHECK-NEXT: (local $1 (ref null $unshared-i32)) + ;; CHECK-NEXT: (local $2 i32) + ;; CHECK-NEXT: (local $3 i32) + ;; CHECK-NEXT: (local.set $1 + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $2 + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $3 + ;; CHECK-NEXT: (struct.get $unshared-i32 0 + ;; CHECK-NEXT: (local.get $1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (struct.set $unshared-i32 0 + ;; CHECK-NEXT: (local.get $1) + ;; CHECK-NEXT: (i32.or + ;; CHECK-NEXT: (local.get $3) + ;; CHECK-NEXT: (local.get $2) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (atomic.fence) + ;; CHECK-NEXT: (local.get $3) + ;; CHECK-NEXT: ) + (func $rmw-or-i32-lower (param (ref null $unshared-i32)) (result i32) + (struct.atomic.rmw.or $unshared-i32 0 + (local.get 0) + (i32.const 1) + ) + ) + + ;; CHECK: (func $rmw-xor-i32-lower (type $13) (param $0 (ref null $unshared-i32)) (result i32) + ;; CHECK-NEXT: (local $1 (ref null $unshared-i32)) + ;; CHECK-NEXT: (local $2 i32) + ;; CHECK-NEXT: (local $3 i32) + ;; CHECK-NEXT: (local.set $1 + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $2 + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $3 + ;; CHECK-NEXT: (struct.get $unshared-i32 0 + ;; CHECK-NEXT: (local.get $1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (struct.set $unshared-i32 0 + ;; CHECK-NEXT: (local.get $1) + ;; CHECK-NEXT: (i32.xor + ;; CHECK-NEXT: (local.get $3) + ;; CHECK-NEXT: (local.get $2) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (atomic.fence) + ;; CHECK-NEXT: (local.get $3) + ;; CHECK-NEXT: ) + (func $rmw-xor-i32-lower (param (ref null $unshared-i32)) (result i32) + (struct.atomic.rmw.xor $unshared-i32 0 + (local.get 0) + (i32.const 1) + ) + ) + + ;; CHECK: (func $rmw-xchg-i32-lower (type $13) (param $0 (ref null $unshared-i32)) (result i32) + ;; CHECK-NEXT: (local $1 (ref null $unshared-i32)) + ;; CHECK-NEXT: (local $2 i32) + ;; CHECK-NEXT: (local $3 i32) + ;; CHECK-NEXT: (local.set $1 + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $2 + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $3 + ;; CHECK-NEXT: (struct.get $unshared-i32 0 + ;; CHECK-NEXT: (local.get $1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (struct.set $unshared-i32 0 + ;; CHECK-NEXT: (local.get $1) + ;; CHECK-NEXT: (local.get $2) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (atomic.fence) + ;; CHECK-NEXT: (local.get $3) + ;; CHECK-NEXT: ) + (func $rmw-xchg-i32-lower (param (ref null $unshared-i32)) (result i32) + (struct.atomic.rmw.xchg $unshared-i32 0 + (local.get 0) + (i32.const 1) + ) + ) + + ;; CHECK: (func $cmpxchg-i32-lower (type $13) (param $0 (ref null $unshared-i32)) (result i32) + ;; CHECK-NEXT: (local $1 (ref null $unshared-i32)) + ;; CHECK-NEXT: (local $2 i32) + ;; CHECK-NEXT: (local $3 i32) + ;; CHECK-NEXT: (local $4 i32) + ;; CHECK-NEXT: (local.set $1 + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $2 + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $3 + ;; CHECK-NEXT: (i32.const 2) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (if + ;; CHECK-NEXT: (i32.eq + ;; CHECK-NEXT: (local.tee $4 + ;; CHECK-NEXT: (struct.get $unshared-i32 0 + ;; CHECK-NEXT: (local.get $1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $2) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (then + ;; CHECK-NEXT: (struct.set $unshared-i32 0 + ;; CHECK-NEXT: (local.get $1) + ;; CHECK-NEXT: (local.get $3) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (atomic.fence) + ;; CHECK-NEXT: (local.get $4) + ;; CHECK-NEXT: ) + (func $cmpxchg-i32-lower (param (ref null $unshared-i32)) (result i32) + (struct.atomic.rmw.cmpxchg $unshared-i32 0 + (local.get 0) + (i32.const 1) + (i32.const 2) + ) + ) + + ;; CHECK: (func $rmw-add-i64-lower (type $14) (param $0 (ref null $unshared-i64)) (result i64) + ;; CHECK-NEXT: (local $1 (ref null $unshared-i64)) + ;; CHECK-NEXT: (local $2 i64) + ;; CHECK-NEXT: (local $3 i64) + ;; CHECK-NEXT: (local.set $1 + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $2 + ;; CHECK-NEXT: (i64.const 1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $3 + ;; CHECK-NEXT: (struct.get $unshared-i64 0 + ;; CHECK-NEXT: (local.get $1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (struct.set $unshared-i64 0 + ;; CHECK-NEXT: (local.get $1) + ;; CHECK-NEXT: (i64.add + ;; CHECK-NEXT: (local.get $3) + ;; CHECK-NEXT: (local.get $2) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (atomic.fence) + ;; CHECK-NEXT: (local.get $3) + ;; CHECK-NEXT: ) + (func $rmw-add-i64-lower (param (ref null $unshared-i64)) (result i64) + (struct.atomic.rmw.add $unshared-i64 0 + (local.get 0) + (i64.const 1) + ) + ) + + ;; CHECK: (func $rmw-sub-i64-lower (type $14) (param $0 (ref null $unshared-i64)) (result i64) + ;; CHECK-NEXT: (local $1 (ref null $unshared-i64)) + ;; CHECK-NEXT: (local $2 i64) + ;; CHECK-NEXT: (local $3 i64) + ;; CHECK-NEXT: (local.set $1 + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $2 + ;; CHECK-NEXT: (i64.const 1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $3 + ;; CHECK-NEXT: (struct.get $unshared-i64 0 + ;; CHECK-NEXT: (local.get $1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (struct.set $unshared-i64 0 + ;; CHECK-NEXT: (local.get $1) + ;; CHECK-NEXT: (i64.sub + ;; CHECK-NEXT: (local.get $3) + ;; CHECK-NEXT: (local.get $2) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (atomic.fence) + ;; CHECK-NEXT: (local.get $3) + ;; CHECK-NEXT: ) + (func $rmw-sub-i64-lower (param (ref null $unshared-i64)) (result i64) + (struct.atomic.rmw.sub $unshared-i64 0 + (local.get 0) + (i64.const 1) + ) + ) + + ;; CHECK: (func $rmw-and-i64-lower (type $14) (param $0 (ref null $unshared-i64)) (result i64) + ;; CHECK-NEXT: (local $1 (ref null $unshared-i64)) + ;; CHECK-NEXT: (local $2 i64) + ;; CHECK-NEXT: (local $3 i64) + ;; CHECK-NEXT: (local.set $1 + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $2 + ;; CHECK-NEXT: (i64.const 1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $3 + ;; CHECK-NEXT: (struct.get $unshared-i64 0 + ;; CHECK-NEXT: (local.get $1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (struct.set $unshared-i64 0 + ;; CHECK-NEXT: (local.get $1) + ;; CHECK-NEXT: (i64.and + ;; CHECK-NEXT: (local.get $3) + ;; CHECK-NEXT: (local.get $2) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (atomic.fence) + ;; CHECK-NEXT: (local.get $3) + ;; CHECK-NEXT: ) + (func $rmw-and-i64-lower (param (ref null $unshared-i64)) (result i64) + (struct.atomic.rmw.and $unshared-i64 0 + (local.get 0) + (i64.const 1) + ) + ) + + ;; CHECK: (func $rmw-or-i64-lower (type $14) (param $0 (ref null $unshared-i64)) (result i64) + ;; CHECK-NEXT: (local $1 (ref null $unshared-i64)) + ;; CHECK-NEXT: (local $2 i64) + ;; CHECK-NEXT: (local $3 i64) + ;; CHECK-NEXT: (local.set $1 + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $2 + ;; CHECK-NEXT: (i64.const 1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $3 + ;; CHECK-NEXT: (struct.get $unshared-i64 0 + ;; CHECK-NEXT: (local.get $1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (struct.set $unshared-i64 0 + ;; CHECK-NEXT: (local.get $1) + ;; CHECK-NEXT: (i64.or + ;; CHECK-NEXT: (local.get $3) + ;; CHECK-NEXT: (local.get $2) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (atomic.fence) + ;; CHECK-NEXT: (local.get $3) + ;; CHECK-NEXT: ) + (func $rmw-or-i64-lower (param (ref null $unshared-i64)) (result i64) + (struct.atomic.rmw.or $unshared-i64 0 + (local.get 0) + (i64.const 1) + ) + ) + + ;; CHECK: (func $rmw-xor-i64-lower (type $14) (param $0 (ref null $unshared-i64)) (result i64) + ;; CHECK-NEXT: (local $1 (ref null $unshared-i64)) + ;; CHECK-NEXT: (local $2 i64) + ;; CHECK-NEXT: (local $3 i64) + ;; CHECK-NEXT: (local.set $1 + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $2 + ;; CHECK-NEXT: (i64.const 1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $3 + ;; CHECK-NEXT: (struct.get $unshared-i64 0 + ;; CHECK-NEXT: (local.get $1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (struct.set $unshared-i64 0 + ;; CHECK-NEXT: (local.get $1) + ;; CHECK-NEXT: (i64.xor + ;; CHECK-NEXT: (local.get $3) + ;; CHECK-NEXT: (local.get $2) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (atomic.fence) + ;; CHECK-NEXT: (local.get $3) + ;; CHECK-NEXT: ) + (func $rmw-xor-i64-lower (param (ref null $unshared-i64)) (result i64) + (struct.atomic.rmw.xor $unshared-i64 0 + (local.get 0) + (i64.const 1) + ) + ) + + ;; CHECK: (func $rmw-xchg-i64-lower (type $14) (param $0 (ref null $unshared-i64)) (result i64) + ;; CHECK-NEXT: (local $1 (ref null $unshared-i64)) + ;; CHECK-NEXT: (local $2 i64) + ;; CHECK-NEXT: (local $3 i64) + ;; CHECK-NEXT: (local.set $1 + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $2 + ;; CHECK-NEXT: (i64.const 1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $3 + ;; CHECK-NEXT: (struct.get $unshared-i64 0 + ;; CHECK-NEXT: (local.get $1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (struct.set $unshared-i64 0 + ;; CHECK-NEXT: (local.get $1) + ;; CHECK-NEXT: (local.get $2) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (atomic.fence) + ;; CHECK-NEXT: (local.get $3) + ;; CHECK-NEXT: ) + (func $rmw-xchg-i64-lower (param (ref null $unshared-i64)) (result i64) + (struct.atomic.rmw.xchg $unshared-i64 0 + (local.get 0) + (i64.const 1) + ) + ) + + ;; CHECK: (func $cmpxchg-i64-lower (type $14) (param $0 (ref null $unshared-i64)) (result i64) + ;; CHECK-NEXT: (local $1 (ref null $unshared-i64)) + ;; CHECK-NEXT: (local $2 i64) + ;; CHECK-NEXT: (local $3 i64) + ;; CHECK-NEXT: (local $4 i64) + ;; CHECK-NEXT: (local.set $1 + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $2 + ;; CHECK-NEXT: (i64.const 1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $3 + ;; CHECK-NEXT: (i64.const 2) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (if + ;; CHECK-NEXT: (i64.eq + ;; CHECK-NEXT: (local.tee $4 + ;; CHECK-NEXT: (struct.get $unshared-i64 0 + ;; CHECK-NEXT: (local.get $1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $2) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (then + ;; CHECK-NEXT: (struct.set $unshared-i64 0 + ;; CHECK-NEXT: (local.get $1) + ;; CHECK-NEXT: (local.get $3) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (atomic.fence) + ;; CHECK-NEXT: (local.get $4) + ;; CHECK-NEXT: ) + (func $cmpxchg-i64-lower (param (ref null $unshared-i64)) (result i64) + (struct.atomic.rmw.cmpxchg $unshared-i64 0 + (local.get 0) + (i64.const 1) + (i64.const 2) + ) + ) + + ;; CHECK: (func $rmw-xchg-ref-lower (type $15) (param $0 (ref null $unshared-struct)) (result (ref null $unshared-struct)) + ;; CHECK-NEXT: (local $1 (ref null $unshared-struct)) + ;; CHECK-NEXT: (local $2 (ref null $unshared-struct)) + ;; CHECK-NEXT: (local $3 (ref null $unshared-struct)) + ;; CHECK-NEXT: (local.set $1 + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $2 + ;; CHECK-NEXT: (ref.null none) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $3 + ;; CHECK-NEXT: (struct.get $unshared-struct 0 + ;; CHECK-NEXT: (local.get $1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (struct.set $unshared-struct 0 + ;; CHECK-NEXT: (local.get $1) + ;; CHECK-NEXT: (local.get $2) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (atomic.fence) + ;; CHECK-NEXT: (local.get $3) + ;; CHECK-NEXT: ) + (func $rmw-xchg-ref-lower (param (ref null $unshared-struct)) (result (ref null $unshared-struct)) + (struct.atomic.rmw.xchg $unshared-struct 0 + (local.get 0) + (ref.null none) + ) + ) + + ;; CHECK: (func $cmpxchg-ref-lower (type $15) (param $0 (ref null $unshared-struct)) (result (ref null $unshared-struct)) + ;; CHECK-NEXT: (local $1 (ref null $unshared-struct)) + ;; CHECK-NEXT: (local $2 (ref null $unshared-struct)) + ;; CHECK-NEXT: (local $3 (ref null $unshared-struct)) + ;; CHECK-NEXT: (local $4 (ref null $unshared-struct)) + ;; CHECK-NEXT: (local.set $1 + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $2 + ;; CHECK-NEXT: (ref.null none) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $3 + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (if + ;; CHECK-NEXT: (ref.eq + ;; CHECK-NEXT: (local.tee $4 + ;; CHECK-NEXT: (struct.get $unshared-struct 0 + ;; CHECK-NEXT: (local.get $1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $2) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (then + ;; CHECK-NEXT: (struct.set $unshared-struct 0 + ;; CHECK-NEXT: (local.get $1) + ;; CHECK-NEXT: (local.get $3) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (atomic.fence) + ;; CHECK-NEXT: (local.get $4) + ;; CHECK-NEXT: ) + (func $cmpxchg-ref-lower (param (ref null $unshared-struct)) (result (ref null $unshared-struct)) + (struct.atomic.rmw.cmpxchg $unshared-struct 0 + (local.get 0) + (ref.null none) + (local.get 0) + ) + ) + + ;; CHECK: (func $rmw-add-i32-acqrel (type $13) (param $0 (ref null $unshared-i32)) (result i32) + ;; CHECK-NEXT: (local $1 (ref null $unshared-i32)) + ;; CHECK-NEXT: (local $2 i32) + ;; CHECK-NEXT: (local $3 i32) + ;; CHECK-NEXT: (local.set $1 + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $2 + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $3 + ;; CHECK-NEXT: (struct.get $unshared-i32 0 + ;; CHECK-NEXT: (local.get $1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (struct.set $unshared-i32 0 + ;; CHECK-NEXT: (local.get $1) + ;; CHECK-NEXT: (i32.add + ;; CHECK-NEXT: (local.get $3) + ;; CHECK-NEXT: (local.get $2) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $3) + ;; CHECK-NEXT: ) + (func $rmw-add-i32-acqrel (param (ref null $unshared-i32)) (result i32) + ;; Check that the lowering of an acqrel RMW does not have a fence. + (struct.atomic.rmw.add acqrel acqrel $unshared-i32 0 + (local.get 0) + (i32.const 1) + ) + ) + + ;; CHECK: (func $cmpxchg-i32-acqrel (type $13) (param $0 (ref null $unshared-i32)) (result i32) + ;; CHECK-NEXT: (local $1 (ref null $unshared-i32)) + ;; CHECK-NEXT: (local $2 i32) + ;; CHECK-NEXT: (local $3 i32) + ;; CHECK-NEXT: (local $4 i32) + ;; CHECK-NEXT: (local.set $1 + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $2 + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $3 + ;; CHECK-NEXT: (i32.const 2) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (if + ;; CHECK-NEXT: (i32.eq + ;; CHECK-NEXT: (local.tee $4 + ;; CHECK-NEXT: (struct.get $unshared-i32 0 + ;; CHECK-NEXT: (local.get $1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $2) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (then + ;; CHECK-NEXT: (struct.set $unshared-i32 0 + ;; CHECK-NEXT: (local.get $1) + ;; CHECK-NEXT: (local.get $3) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $4) + ;; CHECK-NEXT: ) + (func $cmpxchg-i32-acqrel (param (ref null $unshared-i32)) (result i32) + ;; Same for an cmpxchg. + (struct.atomic.rmw.cmpxchg acqrel acqrel $unshared-i32 0 + (local.get 0) + (i32.const 1) + (i32.const 2) + ) + ) +) From cd23371360318416aad28b4fdd3111dbe0dad3e1 Mon Sep 17 00:00:00 2001 From: Thomas Lively Date: Fri, 17 Jan 2025 10:37:15 -0800 Subject: [PATCH 2/2] update comments --- src/passes/OptimizeInstructions.cpp | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/src/passes/OptimizeInstructions.cpp b/src/passes/OptimizeInstructions.cpp index 5b62bdfaa73..c58630a8e7a 100644 --- a/src/passes/OptimizeInstructions.cpp +++ b/src/passes/OptimizeInstructions.cpp @@ -1874,12 +1874,12 @@ struct OptimizeInstructions Builder builder(*getModule()); - // Even when the access to shared memory, we can optimize out the modify and - // write parts if we know that the modified value is the same as the - // original value. This is valid because reads from writes that don't change - // the in-memory value can be considered to be reads from the previous write - // to the same location instead. That means there is no read that - // necessarily synchronizes with the write. + // Even when the RMW access is to shared memory, we can optimize out the + // modify and write parts if we know that the modified value is the same as + // the original value. This is valid because reads from writes that don't + // change the in-memory value can be considered to be reads from the + // previous write to the same location instead. That means there is no read + // that necessarily synchronizes with the write. auto* value = Properties::getFallthrough(curr->value, getPassOptions(), *getModule()); if (Properties::isSingleConstantExpression(value)) { @@ -1918,6 +1918,9 @@ struct OptimizeInstructions // thread can observe an intermediate state in the unshared memory. This // initially increases code size, but the more basic operations may be // more optimizable than the original RMW. + // TODO: Experiment to determine whether this is worthwhile on real code. + // Maybe we should do this optimization only when optimizing for speed over + // size. auto ref = builder.addVar(getFunction(), curr->ref->type); auto val = builder.addVar(getFunction(), curr->type); auto result = builder.addVar(getFunction(), curr->type);