From e4d2e7ccbc6c4daca4eee80dae96c5d5de1aa49a Mon Sep 17 00:00:00 2001 From: Googler Date: Tue, 12 Nov 2024 15:13:03 -0800 Subject: [PATCH 1/2] Add set data type to Starlark Experimental feature guarded by --experimental_enable_starlark_set. This is the Bazel implementation of https://github.com/bazelbuild/starlark/issues/264 Replicates the Python 3 set API and (in almost all respects) the starlark-go implementation, with the notable exception of explicitly not supporting (partial) ordering of sets. Note that there are no set-valued attributes (nor plans to add any), and set-valued select() expressions are not supported. RELNOTES: Add a set data type to Starlark, guarded by the --experimental_enable_starlark_set flag. PiperOrigin-RevId: 695886977 Change-Id: Id1e178bd3dd354619f188c4375d8a1256bd55f75 Cherry-picked from https://github.com/bazelbuild/bazel/commit/c5e08d4de65167e91045d99e89dc4b6a17e9fb39 --- .../semantics/BuildLanguageOptions.java | 11 + src/main/java/net/starlark/java/eval/BUILD | 2 + .../java/net/starlark/java/eval/Eval.java | 75 +- .../net/starlark/java/eval/EvalUtils.java | 31 +- .../net/starlark/java/eval/MethodLibrary.java | 23 +- .../java/net/starlark/java/eval/Starlark.java | 2 + .../starlark/java/eval/StarlarkIndexable.java | 23 +- .../java/eval/StarlarkMembershipTestable.java | 24 + .../starlark/java/eval/StarlarkSemantics.java | 3 + .../net/starlark/java/eval/StarlarkSet.java | 777 ++++++++++++++++++ .../net/starlark/java/eval/ScriptTest.java | 9 +- .../net/starlark/java/eval/testdata/set.star | 259 ++++++ 12 files changed, 1212 insertions(+), 27 deletions(-) create mode 100644 src/main/java/net/starlark/java/eval/StarlarkMembershipTestable.java create mode 100644 src/main/java/net/starlark/java/eval/StarlarkSet.java create mode 100644 src/test/java/net/starlark/java/eval/testdata/set.star diff --git a/src/main/java/com/google/devtools/build/lib/packages/semantics/BuildLanguageOptions.java b/src/main/java/com/google/devtools/build/lib/packages/semantics/BuildLanguageOptions.java index 0993f45fc5b358..01e6800d61bde2 100644 --- a/src/main/java/com/google/devtools/build/lib/packages/semantics/BuildLanguageOptions.java +++ b/src/main/java/com/google/devtools/build/lib/packages/semantics/BuildLanguageOptions.java @@ -803,6 +803,15 @@ public final class BuildLanguageOptions extends OptionsBase { + " attributes of symbolic macros or attribute default values.") public boolean incompatibleSimplifyUnconditionalSelectsInRuleAttrs; + @Option( + name = "experimental_enable_starlark_set", + defaultValue = "false", + documentationCategory = OptionDocumentationCategory.STARLARK_SEMANTICS, + effectTags = {OptionEffectTag.BUILD_FILE_SEMANTICS}, + metadataTags = {OptionMetadataTag.EXPERIMENTAL}, + help = "If true, enable the set data type and set() constructor in Starlark.") + public boolean experimentalEnableStarlarkSet; + @Option( name = "incompatible_locations_prefers_executable", defaultValue = "true", @@ -929,6 +938,8 @@ public StarlarkSemantics toStarlarkSemantics() { incompatibleSimplifyUnconditionalSelectsInRuleAttrs) .setBool( INCOMPATIBLE_LOCATIONS_PREFERS_EXECUTABLE, incompatibleLocationsPrefersExecutable) + .setBool( + StarlarkSemantics.EXPERIMENTAL_ENABLE_STARLARK_SET, experimentalEnableStarlarkSet) .build(); return INTERNER.intern(semantics); } diff --git a/src/main/java/net/starlark/java/eval/BUILD b/src/main/java/net/starlark/java/eval/BUILD index 7ad814002f9036..93f62b94c56464 100644 --- a/src/main/java/net/starlark/java/eval/BUILD +++ b/src/main/java/net/starlark/java/eval/BUILD @@ -51,7 +51,9 @@ java_library( "StarlarkInt.java", "StarlarkIterable.java", "StarlarkList.java", + "StarlarkMembershipTestable.java", "StarlarkSemantics.java", + "StarlarkSet.java", "StarlarkThread.java", "StarlarkValue.java", "StringModule.java", diff --git a/src/main/java/net/starlark/java/eval/Eval.java b/src/main/java/net/starlark/java/eval/Eval.java index 11ba6fa0cbc572..20e8e4ce95e94c 100644 --- a/src/main/java/net/starlark/java/eval/Eval.java +++ b/src/main/java/net/starlark/java/eval/Eval.java @@ -22,6 +22,7 @@ import java.util.Collections; import java.util.List; import java.util.Map; +import java.util.Set; import net.starlark.java.spelling.SpellChecker; import net.starlark.java.syntax.Argument; import net.starlark.java.syntax.AssignmentStatement; @@ -469,20 +470,66 @@ private static void execAugmentedAssignment(StarlarkThread.Frame fr, AssignmentS private static Object inplaceBinaryOp(StarlarkThread.Frame fr, TokenKind op, Object x, Object y) throws EvalException { - // list += iterable behaves like list.extend(iterable) - // TODO(b/141263526): following Python, allow list+=iterable (but not list+iterable). - if (op == TokenKind.PLUS && x instanceof StarlarkList && y instanceof StarlarkList) { - StarlarkList list = (StarlarkList) x; - list.extend(y); - return list; - } else if (op == TokenKind.PIPE && x instanceof Dict && y instanceof Map) { - // dict |= map merges the contents of the second operand (usually a dict) into the first. - @SuppressWarnings("unchecked") - Dict xDict = (Dict) x; - @SuppressWarnings("unchecked") - Map yMap = (Map) y; - xDict.putEntries(yMap); - return xDict; + switch (op) { + case PLUS: + // list += iterable behaves like list.extend(iterable) + // TODO(b/141263526): following Python, allow list+=iterable (but not list+iterable). + if (x instanceof StarlarkList && y instanceof StarlarkList) { + StarlarkList list = (StarlarkList) x; + list.extend(y); + return list; + } + break; + + case PIPE: + if (x instanceof Dict && y instanceof Map) { + // dict |= map merges the contents of the second operand (usually a dict) into the first. + @SuppressWarnings("unchecked") + Dict xDict = (Dict) x; + @SuppressWarnings("unchecked") + Map yMap = (Map) y; + xDict.putEntries(yMap); + return xDict; + } else if (x instanceof StarlarkSet && y instanceof Set) { + // set |= set merges the contents of the second operand into the first. + @SuppressWarnings("unchecked") + StarlarkSet xSet = (StarlarkSet) x; + xSet.update(Tuple.of(y)); + return xSet; + } + break; + + case AMPERSAND: + if (x instanceof StarlarkSet && y instanceof Set) { + // set &= set replaces the first set with the intersection of the two sets. + @SuppressWarnings("unchecked") + StarlarkSet xSet = (StarlarkSet) x; + xSet.intersectionUpdate(Tuple.of(y)); + return xSet; + } + break; + + case CARET: + if (x instanceof StarlarkSet && y instanceof Set) { + // set ^= set replaces the first set with the symmetric difference of the two sets. + @SuppressWarnings("unchecked") + StarlarkSet xSet = (StarlarkSet) x; + xSet.symmetricDifferenceUpdate(y); + return xSet; + } + break; + + case MINUS: + if (x instanceof StarlarkSet && y instanceof Set) { + // set -= set removes all elements of the second set from the first set. + @SuppressWarnings("unchecked") + StarlarkSet xSet = (StarlarkSet) x; + xSet.differenceUpdate(Tuple.of(y)); + return xSet; + } + break; + + default: // fall through } return EvalUtils.binaryOp(op, x, y, fr.thread); } diff --git a/src/main/java/net/starlark/java/eval/EvalUtils.java b/src/main/java/net/starlark/java/eval/EvalUtils.java index 1c9c8ad024b004..2ace0b6521d9bf 100644 --- a/src/main/java/net/starlark/java/eval/EvalUtils.java +++ b/src/main/java/net/starlark/java/eval/EvalUtils.java @@ -15,6 +15,7 @@ import java.util.IllegalFormatException; import java.util.Map; +import java.util.Set; import javax.annotation.Nullable; import net.starlark.java.syntax.TokenKind; @@ -133,6 +134,11 @@ static Object binaryOp(TokenKind op, Object x, Object y, StarlarkThread starlark // map | map (usually dicts) return Dict.builder().putAll((Map) x).putAll((Map) y).build(mu); } + } else if (x instanceof Set && y instanceof Set) { + // set | set + if (semantics.getBool(StarlarkSemantics.EXPERIMENTAL_ENABLE_STARLARK_SET)) { + return StarlarkSet.empty().union(Tuple.of(x, y), starlarkThread); + } } break; @@ -140,6 +146,13 @@ static Object binaryOp(TokenKind op, Object x, Object y, StarlarkThread starlark if (x instanceof StarlarkInt && y instanceof StarlarkInt) { // int & int return StarlarkInt.and((StarlarkInt) x, (StarlarkInt) y); + } else if (x instanceof Set && y instanceof Set) { + // set & set + if (semantics.getBool(StarlarkSemantics.EXPERIMENTAL_ENABLE_STARLARK_SET)) { + StarlarkSet xSet = + x instanceof StarlarkSet ? (StarlarkSet) x : StarlarkSet.checkedCopyOf(mu, x); + return xSet.intersection(Tuple.of(y), starlarkThread); + } } break; @@ -147,6 +160,13 @@ static Object binaryOp(TokenKind op, Object x, Object y, StarlarkThread starlark if (x instanceof StarlarkInt && y instanceof StarlarkInt) { // int ^ int return StarlarkInt.xor((StarlarkInt) x, (StarlarkInt) y); + } else if (x instanceof Set && y instanceof Set) { + // set ^ set + if (semantics.getBool(StarlarkSemantics.EXPERIMENTAL_ENABLE_STARLARK_SET)) { + StarlarkSet xSet = + x instanceof StarlarkSet ? (StarlarkSet) x : StarlarkSet.checkedCopyOf(mu, x); + return xSet.symmetricDifference(y, starlarkThread); + } } break; @@ -186,6 +206,13 @@ static Object binaryOp(TokenKind op, Object x, Object y, StarlarkThread starlark double z = xf - ((StarlarkInt) y).toFiniteDouble(); return StarlarkFloat.of(z); } + } else if (x instanceof Set && y instanceof Set) { + // set - set + if (semantics.getBool(StarlarkSemantics.EXPERIMENTAL_ENABLE_STARLARK_SET)) { + StarlarkSet xSet = + x instanceof StarlarkSet ? (StarlarkSet) x : StarlarkSet.checkedCopyOf(mu, x); + return xSet.difference(Tuple.of(y), starlarkThread); + } } break; @@ -344,8 +371,8 @@ static Object binaryOp(TokenKind op, Object x, Object y, StarlarkThread starlark return compare(x, y) >= 0; case IN: - if (y instanceof StarlarkIndexable) { - return ((StarlarkIndexable) y).containsKey(semantics, x); + if (y instanceof StarlarkMembershipTestable) { + return ((StarlarkMembershipTestable) y).containsKey(semantics, x); } else if (y instanceof StarlarkIndexable.Threaded) { return ((StarlarkIndexable.Threaded) y).containsKey(starlarkThread, semantics, x); } else if (y instanceof String) { diff --git a/src/main/java/net/starlark/java/eval/MethodLibrary.java b/src/main/java/net/starlark/java/eval/MethodLibrary.java index b8d206ad1017ff..fd2eccb6302d14 100644 --- a/src/main/java/net/starlark/java/eval/MethodLibrary.java +++ b/src/main/java/net/starlark/java/eval/MethodLibrary.java @@ -406,7 +406,7 @@ public StarlarkList list(StarlarkIterable x, StarlarkThread thread) throws @StarlarkMethod( name = "len", doc = - "Returns the length of a string, sequence (such as a list or tuple), dict, or other" + "Returns the length of a string, sequence (such as a list or tuple), dict, set, or other" + " iterable.", parameters = {@Param(name = "x", doc = "The value whose length to report.")}, useStarlarkThread = true) @@ -642,6 +642,27 @@ public StarlarkInt intForStarlark(Object x, Object baseO) throws EvalException { return dict; } + @StarlarkMethod( + name = "set", + doc = + "Experimental. This API is experimental and may change at any time. Please do not" + + " depend on it. It may be enabled on an experimental basis by setting" + + " --experimental_enable_starlark_set.\n" // + + "

Creates a new set, optionally initialized to" + + " contain the elements from a given iterable.", + parameters = { + @Param(name = "elements", defaultValue = "[]", doc = "A set, sequence, or dict."), + }, + useStarlarkThread = true) + public StarlarkSet set(Object elements, StarlarkThread thread) throws EvalException { + // Ordinarily we would use StarlarkMethod#enableOnlyWithFlag, but this doesn't work for + // top-level symbols, so enforce it here instead. + if (!thread.getSemantics().getBool(StarlarkSemantics.EXPERIMENTAL_ENABLE_STARLARK_SET)) { + throw Starlark.errorf("Use of set() requires --experimental_enable_starlark_set"); + } + return StarlarkSet.checkedCopyOf(thread.mutability(), elements); + } + @StarlarkMethod( name = "enumerate", doc = diff --git a/src/main/java/net/starlark/java/eval/Starlark.java b/src/main/java/net/starlark/java/eval/Starlark.java index 557aae0547b19a..b517e005b3d87e 100644 --- a/src/main/java/net/starlark/java/eval/Starlark.java +++ b/src/main/java/net/starlark/java/eval/Starlark.java @@ -312,6 +312,8 @@ public static int len(Object x) { return ((Sequence) x).size(); } else if (x instanceof Dict) { return ((Dict) x).size(); + } else if (x instanceof StarlarkSet) { + return ((StarlarkSet) x).size(); } else if (x instanceof StarlarkIterable) { // Iterables.size runs in constant time if x implements Collection. return Iterables.size((Iterable) x); diff --git a/src/main/java/net/starlark/java/eval/StarlarkIndexable.java b/src/main/java/net/starlark/java/eval/StarlarkIndexable.java index fed0386d450fe3..db44bef1d2b628 100644 --- a/src/main/java/net/starlark/java/eval/StarlarkIndexable.java +++ b/src/main/java/net/starlark/java/eval/StarlarkIndexable.java @@ -17,20 +17,25 @@ /** * A Starlark value that support indexed access ({@code object[key]}) and membership tests ({@code * key in object}). + * + *

Implementations of this interface come in three flavors: map-like, sequence-like, and + * string-like. + * + *

    + *
  • For map-like objects, 'x in y' should return True when 'y[x]' is valid; otherwise, it + * should either be False or a failure. Examples: dict. + *
  • For sequence-like objects, 'x in y' should return True when 'x == y[i]' for some integer + * 'i'; otherwise, it should either be False or a failure. Examples: list, tuple, and string + * (which, notably, is not a {@link Sequence}). + *
  • For string-like objects, 'x in y' should return True when 'x' is a substring of 'y', i.e. + * 'x[i] == y[i + n]' for some 'n' and all i in [0, len(x)). Examples: string. + *
*/ -public interface StarlarkIndexable extends StarlarkValue { +public interface StarlarkIndexable extends StarlarkMembershipTestable { /** Returns the value associated with the given key. */ Object getIndex(StarlarkSemantics semantics, Object key) throws EvalException; - /** - * Returns whether the key is in the object. New types should try to follow the semantics of dict: - * 'x in y' should return True when 'y[x]' is valid; otherwise, it should either be False or a - * failure. Note however that the builtin types string, list, and tuple do not follow this - * convention. - */ - boolean containsKey(StarlarkSemantics semantics, Object key) throws EvalException; - /** * A variant of {@link StarlarkIndexable} that also provides a StarlarkThread instance on method * calls. diff --git a/src/main/java/net/starlark/java/eval/StarlarkMembershipTestable.java b/src/main/java/net/starlark/java/eval/StarlarkMembershipTestable.java new file mode 100644 index 00000000000000..747b0d42b4a14f --- /dev/null +++ b/src/main/java/net/starlark/java/eval/StarlarkMembershipTestable.java @@ -0,0 +1,24 @@ +// Copyright 2024 The Bazel Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package net.starlark.java.eval; + +/** + * A Starlark value that support membership tests ({@code key in object} and {@code key not in + * object}). + */ +public interface StarlarkMembershipTestable extends StarlarkValue { + /** Returns whether the key is in the object. */ + boolean containsKey(StarlarkSemantics semantics, Object key) throws EvalException; +} diff --git a/src/main/java/net/starlark/java/eval/StarlarkSemantics.java b/src/main/java/net/starlark/java/eval/StarlarkSemantics.java index 3e2626f418f9da..658977e013bf08 100644 --- a/src/main/java/net/starlark/java/eval/StarlarkSemantics.java +++ b/src/main/java/net/starlark/java/eval/StarlarkSemantics.java @@ -256,4 +256,7 @@ public final String toString() { * unconditionally prohibits recursion. */ public static final String ALLOW_RECURSION = "-allow_recursion"; + + /** Whether StarlarkSet objects may be constructed by the interpreter. */ + public static final String EXPERIMENTAL_ENABLE_STARLARK_SET = "-experimental_enable_starlark_set"; } diff --git a/src/main/java/net/starlark/java/eval/StarlarkSet.java b/src/main/java/net/starlark/java/eval/StarlarkSet.java new file mode 100644 index 00000000000000..144fefd780c744 --- /dev/null +++ b/src/main/java/net/starlark/java/eval/StarlarkSet.java @@ -0,0 +1,777 @@ +// Copyright 2024 The Bazel Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package net.starlark.java.eval; + +import static com.google.common.base.Preconditions.checkArgument; +import static com.google.common.base.Preconditions.checkNotNull; + +import com.google.common.collect.ImmutableSet; +import com.google.common.collect.Sets; +import java.util.AbstractSet; +import java.util.Collection; +import java.util.Collections; +import java.util.Iterator; +import java.util.LinkedHashSet; +import java.util.Map; +import java.util.Set; +import javax.annotation.Nullable; +import net.starlark.java.annot.Param; +import net.starlark.java.annot.StarlarkBuiltin; +import net.starlark.java.annot.StarlarkMethod; + +/** A finite, mutable set of Starlark values. */ +@StarlarkBuiltin( + name = "set", + category = "core", + doc = + """ +Experimental. This API is experimental and may change at any time. Please do not depend on +it. It may be enabled on an experimental basis by setting +--experimental_enable_starlark_set. + +

The built-in mutable set type. Example set expressions: + +

+x = set()           # x is an empty set
+y = set([1, 2, 3])  # y is a set with 3 elements
+3 in y              # True
+0 in y              # False
+len(x)              # 0
+len(y)              # 3
+
+ +

A set used in Boolean context is true if and only if it is non-empty. + +

+s = set()
+"non-empty" if s else "empty"  # "empty"
+t = set(["x", "y"])
+"non-empty" if t else "empty"  # "non-empty"
+
+ +

The elements of a set must be hashable; x may be an element of a set if and only if +x may be used as a key of a dict. + +

A set itself is not hashable; therefore, you cannot have a set with another set as an +element. + +

You cannot access the elements of a set by index, but you can iterate over them, and you can +obtain the list of a set's elements in iteration order using the list() built-in +function. Just like for lists, it is an error to mutate a set while it is being iterated over. The +order of iteration matches insertion order: + +

+s = set([3, 1, 3])
+s.add(2)
+# prints 3, 1, 2
+for item in s:
+    print(item)
+list(s)  # [3, 1, 2]
+
+ +

A set s is equal to t if and only if t is a set containing +the same elements, possibly with a different iteration order. In particular, a set is +not equal to its list of elements. + +

Sets are not ordered; the <, <=, >, and +>= operations are not defined for sets, and a list of sets cannot be sorted - unlike +in Python. + +

The | operation on two sets returns the union of the two sets: a set containing the +elements found in either one or both of the original sets. The | operation has an +augmented assignment version; s |= t adds to s all the elements of +t. + +

+set([1, 2]) | set([3, 2])  # set([1, 2, 3])
+s = set([1, 2])
+s |= set([2, 3, 4])        # s now equals set([1, 2, 3, 4])
+
+ +

The & operation on two sets returns the intersection of the two sets: a set +containing only the elements found in both of the original sets. The & operation +has an augmented assignment version; s &= t removes from s all the +elements not found in t. + +

+set([1, 2]) & set([2, 3])  # set([2])
+set([1, 2]) & set([3, 4])  # set()
+s = set([1, 2])
+s &= set([0, 1])           # s now equals set([1])
+
+ +

The - operation on two sets returns the difference of the two sets: a set containing +the elements found in the left-hand side set but not the right-hand site set. The - +operation has an augmented assignment version; s -= t removes from s all +the elements found in t. + +

+set([1, 2]) - set([2, 3])  # set([1])
+set([1, 2]) - set([3, 4])  # set([1, 2])
+s = set([1, 2])
+s -= set([0, 1])           # s now equals set([2])
+
+ +

The ^ operation on two sets returns the symmetric difference of the two sets: a set +containing the elements found in exactly one of the two original sets, but not in both. The +^ operation has an augmented assignment version; s ^= t removes from +s any element of t found in s and adds to s any +element of t not found in s. + +

+set([1, 2]) ^ set([2, 3])  # set([1, 3])
+set([1, 2]) ^ set([3, 4])  # set([1, 2, 3, 4])
+s = set([1, 2])
+s ^= set([0, 1])           # s now equals set([2, 0])
+
+""") +public final class StarlarkSet extends AbstractSet + implements Mutability.Freezable, StarlarkMembershipTestable, StarlarkIterable { + + private static final StarlarkSet EMPTY = new StarlarkSet<>(ImmutableSet.of()); + + // Either LinkedHashSet or ImmutableSet. + private final Set contents; + // Number of active iterators (unused once frozen). + private transient int iteratorCount; // transient for serialization by Bazel + + /** Final except for {@link #unsafeShallowFreeze}; must not be modified any other way. */ + private Mutability mutability; + + @SuppressWarnings("NonApiType") + private StarlarkSet(Mutability mutability, LinkedHashSet contents) { + checkNotNull(mutability); + checkArgument(mutability != Mutability.IMMUTABLE); + this.mutability = mutability; + this.contents = contents; + } + + private StarlarkSet(ImmutableSet contents) { + // An immutable set might as well store its contents as an ImmutableSet, since ImmutableSet + // both is more memory-efficient than LinkedHashSet and also it has the requisite deterministic + // iteration order. + this.mutability = Mutability.IMMUTABLE; + this.contents = contents; + } + + @Override + public boolean truth() { + return !isEmpty(); + } + + @Override + public boolean isImmutable() { + return mutability().isFrozen(); + } + + @Override + public boolean updateIteratorCount(int delta) { + if (mutability().isFrozen()) { + return false; + } + if (delta > 0) { + iteratorCount++; + } else if (delta < 0) { + iteratorCount--; + } + return iteratorCount > 0; + } + + @Override + public void checkHashable() throws EvalException { + // Even a frozen set is unhashable. + throw Starlark.errorf("unhashable type: 'set'"); + } + + @Override + public int hashCode() { + return contents.hashCode(); + } + + @Override + public void repr(Printer printer) { + if (isEmpty()) { + printer.append("set()"); + } else { + printer.printList(this, "set([", ", ", "])"); + } + } + + @Override + public String toString() { + return Starlark.repr(this); + } + + @Override + public boolean equals(Object o) { + return contents.equals(o); + } + + @Override + public Iterator iterator() { + if (contents instanceof ImmutableSet) { + return contents.iterator(); + } else { + // Prohibit mutation through Iterator.remove(). + return Collections.unmodifiableSet(contents).iterator(); + } + } + + @Override + public int size() { + return contents.size(); + } + + @Override + public boolean isEmpty() { + return contents.isEmpty(); + } + + @Override + public Object[] toArray() { + return contents.toArray(); + } + + @Override + public T[] toArray(T[] a) { + return contents.toArray(a); + } + + @Override + public boolean contains(Object o) { + return contents.contains(o); + } + + @Override + public boolean containsAll(Collection c) { + return contents.containsAll(c); + } + + @Override + public boolean containsKey(StarlarkSemantics semantics, Object element) { + return contents.contains(element); + } + + /** Returns an immutable empty set. */ + // Safe because the empty singleton is immutable. + @SuppressWarnings("unchecked") + public static StarlarkSet empty() { + return (StarlarkSet) EMPTY; + } + + /** Returns a new empty set with the specified mutability. */ + public static StarlarkSet of(@Nullable Mutability mu) { + if (mu == null) { + mu = Mutability.IMMUTABLE; + } + if (mu == Mutability.IMMUTABLE) { + return empty(); + } else { + return new StarlarkSet<>(mu, Sets.newLinkedHashSetWithExpectedSize(1)); + } + } + + /** + * Returns a set with the specified mutability containing the entries of {@code elements}. Tries + * to elide copying if {@code elements} is immutable. + * + * @param elements a collection of elements, which must be Starlark-hashable (note that this + * method assumes but does not verify their hashability), to add to the new set. + */ + public static StarlarkSet copyOf( + @Nullable Mutability mu, Collection elements) { + if (elements.isEmpty()) { + return of(mu); + } + + if (mu == null) { + mu = Mutability.IMMUTABLE; + } + + if (mu == Mutability.IMMUTABLE) { + if (elements instanceof ImmutableSet) { + elements.forEach(Starlark::checkValid); + @SuppressWarnings("unchecked") + ImmutableSet immutableSet = (ImmutableSet) elements; + return new StarlarkSet<>(immutableSet); + } + + if (elements instanceof StarlarkSet && ((StarlarkSet) elements).isImmutable()) { + @SuppressWarnings("unchecked") + StarlarkSet starlarkSet = (StarlarkSet) elements; + return starlarkSet; + } + + ImmutableSet.Builder immutableSetBuilder = + ImmutableSet.builderWithExpectedSize(elements.size()); + elements.forEach(e -> immutableSetBuilder.add(Starlark.checkValid(e))); + return new StarlarkSet<>(immutableSetBuilder.build()); + } else { + LinkedHashSet linkedHashSet = Sets.newLinkedHashSetWithExpectedSize(elements.size()); + elements.forEach(e -> linkedHashSet.add(Starlark.checkValid(e))); + return new StarlarkSet<>(mu, linkedHashSet); + } + } + + private static StarlarkSet wrapOrImmutableCopy(Mutability mu, LinkedHashSet elements) { + checkNotNull(mu); + if (mu == Mutability.IMMUTABLE) { + return elements.isEmpty() ? empty() : new StarlarkSet<>(ImmutableSet.copyOf(elements)); + } else { + return new StarlarkSet<>(mu, elements); + } + } + + /** + * A variant of {@link #copyOf} intended to be used from Starlark. Unlike {@link #copyOf}, this + * method does verify that the elements being added to the set are Starlark-hashable. + * + * @param elements a collection of elements to add to the new set, or a map whose keys will be + * added to the new set. + */ + public static StarlarkSet checkedCopyOf(@Nullable Mutability mu, Object elements) + throws EvalException { + @SuppressWarnings("unchecked") + Collection collection = + (Collection) toHashableCollection(elements, "set constructor argument"); + return copyOf(mu, collection); + } + + /** + * Returns an immutable set containing the entries of {@code elements}. Tries to elide copying if + * {@code elements} is already immutable. + * + * @param elements a collection of elements, which must be Starlark-hashable (note that this + * method assumes but does not verify their hashability), to add to the new set. + */ + public static StarlarkSet immutableCopyOf(Collection elements) { + return copyOf(null, elements); + } + + @Override + public Mutability mutability() { + return mutability; + } + + @Override + public void unsafeShallowFreeze() { + Mutability.Freezable.checkUnsafeShallowFreezePrecondition(this); + this.mutability = Mutability.IMMUTABLE; + } + + @StarlarkMethod( + name = "issubset", + doc = + """ +Returns true of this set is a subset of another. + +

For example, +

+set([1, 2]).issubset([1, 2, 3]) == True
+set([1, 2]).issubset([1, 2]) == True
+set([1, 2]).issubset([2, 3]) == False
+
+""", + parameters = {@Param(name = "other", doc = "A set, sequence, or dict.")}) + public boolean isSubset(Object other) throws EvalException { + return toCollection(other, "issubset argument").containsAll(this.contents); + } + + @StarlarkMethod( + name = "issuperset", + doc = + """ +Returns true of this set is a superset of another. + +

For example, +

+set([1, 2, 3]).issuperset([1, 2]) == True
+set([1, 2, 3]).issuperset([1, 2, 3]) == True
+set([1, 2, 3]).issuperset([2, 3, 4]) == False
+
+""", + parameters = {@Param(name = "other", doc = "A set, sequence, or dict.")}) + public boolean isSuperset(Object other) throws EvalException { + return contents.containsAll(toCollection(other, "issuperset argument")); + } + + @StarlarkMethod( + name = "isdisjoint", + doc = + """ +Returns true if this set has no elements in common with another. + +

For example, +

+set([1, 2]).isdisjoint([3, 4]) == True
+set().isdisjoint(set()) == True
+set([1, 2]).isdisjoint([2, 3]) == False
+
+""", + parameters = {@Param(name = "other", doc = "A set, sequence, or dict.")}) + public boolean isDisjoint(Object other) throws EvalException { + return Collections.disjoint(this.contents, toCollection(other, "isdisjoint argument")); + } + + /** + * Intended for use from Starlark; if used from Java, the caller should ensure that the elements + * to be added are instances of {@code E}. + */ + @StarlarkMethod( + name = "update", + doc = + """ +Adds the elements found in others to this set. + +

For example, +

+x = set([1, 2])
+x.update([2, 3], [3, 4])
+# x is now set([1, 2, 3, 4])
+
+""", + extraPositionals = @Param(name = "others", doc = "Sets, sequences, or dicts.")) + public void update(Tuple others) throws EvalException { + Starlark.checkMutable(this); + for (Object other : others) { + @SuppressWarnings("unchecked") + Collection otherCollection = + (Collection) toHashableCollection(other, "update argument"); + contents.addAll(otherCollection); + } + } + + @StarlarkMethod( + name = "add", + doc = "Adds an element to the set.", + parameters = {@Param(name = "element", doc = "Element to add.")}) + public void addElement(E element) throws EvalException { + Starlark.checkMutable(this); + Starlark.checkHashable(element); + contents.add(element); + } + + @StarlarkMethod( + name = "remove", + doc = + """ +Removes an element, which must be present in the set, from the set. Fails if the element was not +present in the set. +""", + parameters = {@Param(name = "element", doc = "Element to remove.")}) + public void removeElement(E element) throws EvalException { + Starlark.checkMutable(this); + if (!contents.remove(element)) { + throw Starlark.errorf("element %s not found in set", Starlark.repr(element)); + } + } + + @StarlarkMethod( + name = "discard", + doc = "Removes an element from the set if it is present.", + parameters = {@Param(name = "element", doc = "Element to discard.")}) + public void discard(E element) throws EvalException { + Starlark.checkMutable(this); + contents.remove(element); + } + + @StarlarkMethod( + name = "pop", + doc = "Removes and returns the first element of the set. Fails if the set is empty.") + public E pop() throws EvalException { + Starlark.checkMutable(this); + if (isEmpty()) { + throw Starlark.errorf("set is empty"); + } + E element = contents.iterator().next(); + contents.remove(element); + return element; + } + + @StarlarkMethod(name = "clear", doc = "Removes all the elements of the set.") + public void clearElements() throws EvalException { + Starlark.checkMutable(this); + contents.clear(); + } + + @StarlarkMethod( + name = "union", + doc = + """ +Returns a new mutable set containing the union of this set with others. + +

For example, +

+set([1, 2]).union([2, 3, 4], [4, 5]) == set([1, 2, 3, 4, 5])
+
+""", + extraPositionals = @Param(name = "others", doc = "Sets, sequences, or dicts."), + useStarlarkThread = true) + public StarlarkSet union(Tuple others, StarlarkThread thread) throws EvalException { + LinkedHashSet newContents = new LinkedHashSet<>(contents); + for (Object other : others) { + newContents.addAll(toHashableCollection(other, "union argument")); + } + return wrapOrImmutableCopy(thread.mutability(), newContents); + } + + @StarlarkMethod( + name = "intersection", + doc = + """ +Returns a new mutable set containing the intersection of this set with others. + +

For example, +

+set([1, 2, 3]).intersection([1, 2], [2, 3]) == set([2])
+
+""", + extraPositionals = @Param(name = "others", doc = "Sets, sequences, or dicts."), + useStarlarkThread = true) + public StarlarkSet intersection(Tuple others, StarlarkThread thread) throws EvalException { + LinkedHashSet newContents = new LinkedHashSet<>(contents); + for (Object other : others) { + newContents.retainAll(toCollection(other, "intersection argument")); + } + return wrapOrImmutableCopy(thread.mutability(), newContents); + } + + @StarlarkMethod( + name = "intersection_update", + doc = + """ +Removes any elements not found in all others from this set. + +

For example, +

+x = set([1, 2, 3, 4])
+x.intersection_update([2, 3], [3, 4])
+# x is now set([3])
+
+""", + extraPositionals = @Param(name = "others", doc = "Sets, sequences, or dicts.")) + public void intersectionUpdate(Tuple others) throws EvalException { + Starlark.checkMutable(this); + for (Object other : others) { + contents.retainAll(toCollection(other, "intersection_update argument")); + } + } + + @StarlarkMethod( + name = "difference", + doc = + """ +Returns a new mutable set containing the difference of this set with others. + +

For example, +

+set([1, 2, 3]).intersection([1, 2], [2, 3]) == set([2])
+
+""", + extraPositionals = @Param(name = "others", doc = "Sets, sequences, or dicts."), + useStarlarkThread = true) + public StarlarkSet difference(Tuple others, StarlarkThread thread) throws EvalException { + LinkedHashSet newContents = new LinkedHashSet<>(contents); + for (Object other : others) { + newContents.removeAll(toCollection(other, "difference argument")); + } + return wrapOrImmutableCopy(thread.mutability(), newContents); + } + + @StarlarkMethod( + name = "difference_update", + doc = + """ +Removes any elements found in any others from this set. + +

For example, +

+x = set([1, 2, 3, 4])
+x.difference_update([2, 3], [3, 4])
+# x is now set([1])
+
+""", + extraPositionals = @Param(name = "others", doc = "Sets, sequences, or dicts.")) + public void differenceUpdate(Tuple others) throws EvalException { + Starlark.checkMutable(this); + for (Object other : others) { + contents.removeAll(toCollection(other, "intersection_update argument")); + } + } + + @StarlarkMethod( + name = "symmetric_difference", + doc = + """ +Returns a new mutable set containing the symmetric difference of this set with another set, +sequence, or dict. + +

For example, +

+set([1, 2, 3]).symmetric_difference([2, 3, 4]) == set([1, 4])
+
+""", + parameters = {@Param(name = "other", doc = "A set, sequence, or dict.")}, + useStarlarkThread = true) + public StarlarkSet symmetricDifference(Object other, StarlarkThread thread) + throws EvalException { + LinkedHashSet newContents = new LinkedHashSet<>(contents); + for (Object element : toHashableCollection(other, "symmetric_difference argument")) { + if (contents.contains(element)) { + newContents.remove(element); + } else { + newContents.add(element); + } + } + return wrapOrImmutableCopy(thread.mutability(), newContents); + } + + /** + * Intended for use from Starlark; if used from Java, the caller should ensure that the elements + * to be added are instances of {@code E}. + */ + @StarlarkMethod( + name = "symmetric_difference_update", + doc = + """ +Returns a new mutable set containing the symmetric difference of this set with another set, +sequence, or dict. + +

For example, +

+set([1, 2, 3]).symmetric_difference([2, 3, 4]) == set([1, 4])
+
+""", + parameters = {@Param(name = "other", doc = "A set, sequence, or dict.")}) + public void symmetricDifferenceUpdate(Object other) throws EvalException { + Starlark.checkMutable(this); + ImmutableSet originalContents = ImmutableSet.copyOf(contents); + for (Object element : toHashableCollection(other, "symmetric_difference_update argument")) { + if (originalContents.contains(element)) { + contents.remove(element); + } else { + @SuppressWarnings("unchecked") + E castElement = (E) element; + contents.add(castElement); + } + } + } + + /** + * Verifies that {@code other} is either a collection or a map. + * + * @return {@code other} if it is a collection, or the key set of {@code other} if it is a map. + */ + private static Collection toCollection(Object other, String what) throws EvalException { + if (other instanceof Collection) { + return (Collection) other; + } else if (other instanceof Map) { + return ((Map) other).keySet(); + } + throw notSizedIterableError(other, what); + } + + /** + * A variant of {@link #toCollection} which additionally checks whether the returned collection's + * elements are Starlark-hashable. + * + * @return {@code other} if it is a collection, or the key set of {@code other} if it is a map. + */ + private static Collection toHashableCollection(Object other, String what) + throws EvalException { + if (other instanceof Collection) { + Collection collection = (Collection) other; + // Assume that elements of a StarlarkSet have already been checked to be hashable. + if (!(collection instanceof StarlarkSet)) { + for (Object element : collection) { + Starlark.checkHashable(element); + } + } + return collection; + } else if (other instanceof Map) { + Set keySet = ((Map) other).keySet(); + // Assume that keys of a Dict have already been checked to be hashable. + if (!(other instanceof Dict)) { + for (Object element : keySet) { + Starlark.checkHashable(element); + } + } + return keySet; + } + throw notSizedIterableError(other, what); + } + + // Starlark doesn't have a "sized iterable" interface - so we enumerate the types we expect. + private static EvalException notSizedIterableError(Object other, String what) { + return Starlark.errorf( + "for %s got value of type '%s', want a set, sequence, or dict", what, Starlark.type(other)); + } + + // Prohibit Java Set mutators. + + /** + * @deprecated use {@link #addElement} instead. + */ + @Deprecated + @Override + public boolean add(E e) { + throw new UnsupportedOperationException(); + } + + /** + * @deprecated use {@link #update} instead. + */ + @Deprecated + @Override + public boolean addAll(Collection c) { + throw new UnsupportedOperationException(); + } + + /** + * @deprecated use {@link #clearElements} instead. + */ + @Deprecated + @Override + public void clear() { + throw new UnsupportedOperationException(); + } + + /** + * @deprecated use {@link #removeElement} instead. + */ + @Deprecated + @Override + public boolean remove(Object o) { + throw new UnsupportedOperationException(); + } + + /** + * @deprecated use {@link #differenceUpdate} instead. + */ + @Deprecated + @Override + public boolean removeAll(Collection c) { + throw new UnsupportedOperationException(); + } + + /** + * @deprecated use {@link #intersectionUpdate} instead. + */ + @Deprecated + @Override + public boolean retainAll(Collection c) { + throw new UnsupportedOperationException(); + } +} diff --git a/src/test/java/net/starlark/java/eval/ScriptTest.java b/src/test/java/net/starlark/java/eval/ScriptTest.java index 57661d588dc7f3..f8409a21807bbb 100644 --- a/src/test/java/net/starlark/java/eval/ScriptTest.java +++ b/src/test/java/net/starlark/java/eval/ScriptTest.java @@ -228,7 +228,14 @@ public static void main(String[] args) throws Exception { Starlark.addMethods(predeclared, new ScriptTest()); // e.g. assert_eq predeclared.put("json", Json.INSTANCE); - StarlarkSemantics semantics = StarlarkSemantics.DEFAULT; + // TODO(b/376078033): remove special set.star handling once Starlark sets are enabled by + // default. + StarlarkSemantics semantics = + name.equals("set.star") + ? StarlarkSemantics.builder() + .setBool(StarlarkSemantics.EXPERIMENTAL_ENABLE_STARLARK_SET, true) + .build() + : StarlarkSemantics.DEFAULT; Module module = Module.withPredeclared(semantics, predeclared.buildOrThrow()); try (Mutability mu = Mutability.createAllowingShallowFreeze("test")) { StarlarkThread thread = StarlarkThread.createTransient(mu, semantics); diff --git a/src/test/java/net/starlark/java/eval/testdata/set.star b/src/test/java/net/starlark/java/eval/testdata/set.star new file mode 100644 index 00000000000000..5734335315bc89 --- /dev/null +++ b/src/test/java/net/starlark/java/eval/testdata/set.star @@ -0,0 +1,259 @@ +# constructor +assert_eq(type(set()), "set") +assert_eq(list(set()), []) +assert_eq(set(), set([])) +assert_eq(type(set([1, 3, 2, 3])), "set") +assert_eq(list(set([1, 3, 2, 3])), [1, 3, 2]) +assert_eq(type(set("hello".elems())), "set") +assert_eq(list(set("hello".elems())), ["h", "e", "l", "o"]) +assert_eq(type(set(range(3))), "set") +assert_eq(list(set(range(3))), [0, 1, 2]) +assert_eq(type(set({"a": 1, "b": 2, "c": 0})), "set") +assert_eq(list(set({"a": 1, "b": 2, "c": 0})), ["a", "b", "c"]) +assert_eq(type(set(set([3, 1, 2]))), "set") +assert_eq(list(set(set([3, 1, 2]))), [3, 1, 2]) +assert_fails(lambda: set(1), "got value of type 'int', want a set, sequence, or dict") +assert_fails(lambda: set([1], [2]), "accepts no more than 1 positional argument") +assert_fails(lambda: set([1, 2, [3]]), "unhashable type: 'list'") + +# sets are not hashable +assert_fails(lambda: set([set()]), "unhashable type: 'set'") +assert_fails(lambda: {set([1]): 1}, "unhashable type: 'set'") + +# stringification +assert_eq(str(set()), "set()") +assert_eq(repr(set()), "set()") +assert_eq(json.encode(set()), "[]") +assert_eq(str(set([1, 3, 2, 3])), "set([1, 3, 2])") +assert_eq(repr(set([1, 3, 2, 3])), "set([1, 3, 2])") +assert_eq(json.encode(set([3, 2, 1])), "[3,2,1]") + +# membership +assert_eq(1 in set([1, 2, 3]), True) +assert_eq(0 in set([1, 2, 3]), False) +assert_eq(None in set(), False) + +# truth +assert_(not set()) +assert_(set([False])) +assert_(set([1, 2, 3])) + +# len +assert_eq(len(set()), 0) +assert_eq(len(set([1, 2, 3])), 3) +assert_eq(len(set("hello".elems())), 4) + +# a set is equal to another set with the same elements (in any order) +# a set is *not* equal to a non-set container with the same elements +assert_eq(set() == set(), True) +assert_eq(set() == [], False) +assert_eq(set() == dict(), False) +assert_eq(set([1, 2]) == set([2, 1]), True) +assert_eq(set([1, 2]) == [1, 2], False) +assert_eq(set([1, 2]) == {1: "one", 2: "two"}, False) +assert_eq(set([1, 2]) != set([2, 3]), True) +assert_eq(set([1, 2]) != [2, 3], True) + +# unsupported comparison +assert_fails(lambda: set([1]) < set([1, 2]), "unsupported comparison") +assert_fails(lambda: set([1, 2]) <= set([1, 2]), "unsupported comparison") +assert_fails(lambda: set([1, 2]) > set([1, 2]), "unsupported comparison") +assert_fails(lambda: set([1, 2]) >= set([1, 2]), "unsupported comparison") +assert_fails(lambda: sorted([set(), set([1]), set([2])]), "unsupported comparison") + +# binary operations +assert_eq(set([1, 2]) | set([2, 3]), set([1, 2, 3])) +assert_eq(set([1, 2]) & set([2, 3]), set([2])) +assert_eq(set([1, 2]) - set([2, 3]), set([1])) +assert_eq(set([1, 2]) ^ set([2, 3]), set([1, 3])) + +# unsupported binary operations +assert_fails(lambda: set([1]) + set([2]), "unsupported binary operation") +assert_fails(lambda: set([1, 2]) | [2, 3], "unsupported binary operation") +assert_fails(lambda: set([1, 2]) & [2, 3], "unsupported binary operation") +assert_fails(lambda: set([1, 2]) - [2, 3], "unsupported binary operation") +assert_fails(lambda: set([1, 2]) ^ [2, 3], "unsupported binary operation") + +# binary inplace mutations +def pipe_equals(x, y): + x |= y + +def amp_equals(x, y): + x &= y + +def minus_equals(x, y): + x -= y + +def caret_equals(x, y): + x ^= y + +inplace_set = set([1, 2]) +pipe_equals(inplace_set, set([2, 3, 4])) +assert_eq(inplace_set, set([1, 2, 3, 4])) +amp_equals(inplace_set, set([2, 3, 4, 5])) +assert_eq(inplace_set, set([2, 3, 4])) +minus_equals(inplace_set, set([1, 3])) +assert_eq(inplace_set, set([2, 4])) +caret_equals(inplace_set, set([1, 2])) +assert_eq(inplace_set, set([1, 4])) + +# unsupported mutations of a frozen value +frozen_set = set([1, 2]) +freeze(frozen_set) +assert_fails(lambda: pipe_equals(frozen_set, set([2, 3, 4])), "trying to mutate a frozen set value") +assert_fails(lambda: amp_equals(frozen_set, set([2, 3, 4])), "trying to mutate a frozen set value") +assert_fails(lambda: minus_equals(frozen_set, set([1, 3])), "trying to mutate a frozen set value") +assert_fails(lambda: caret_equals(frozen_set, set([1, 2])), "trying to mutate a frozen set value") + +# unsupported binary inplace mutations +def always_unsupported_plus_equals(x, y): + x += y + +assert_fails(lambda: always_unsupported_plus_equals(set([1]), set([2])), "unsupported binary operation") +assert_fails(lambda: pipe_equals(set([1, 2]), [2, 3]), "unsupported binary operation") +assert_fails(lambda: amp_equals(set([1, 2]), [2, 3]), "unsupported binary operation") +assert_fails(lambda: minus_equals(set([1, 2]), [2, 3]), "unsupported binary operation") +assert_fails(lambda: caret_equals(set([1, 2]), [2, 3]), "unsupported binary operation") + +# unsupported indexing +assert_fails(lambda: set([1, 2])[0], "type 'set' has no operator \\[\\]") + +# add +add_set = set([1, 2, 3]) +add_set.add(0) +assert_eq(list(add_set), [1, 2, 3, 0]) +add_set.add(1) +assert_eq(list(add_set), [1, 2, 3, 0]) +assert_fails(lambda: frozen_set.add(0), "trying to mutate a frozen set value") + +# update +update_set = set([1, 2]) +update_set.update([2, 3], {3: "three", 4: "four"}) +assert_eq(list(update_set), [1, 2, 3, 4]) +assert_fails(lambda: update_set.update(1), "got value of type 'int', want a set, sequence, or dict") +assert_fails(lambda: frozen_set.update([0]), "trying to mutate a frozen set value") + +# iteration order +def iterate(s): + elements = [] + for e in s: + elements.append(e) + return elements + +assert_eq(iterate(set("hello world".elems())), ["h", "e", "l", "o", " ", "w", "r", "d"]) +add_set_for_iter = set() +add_set_for_iter.add(3) +add_set_for_iter.add(1) +add_set_for_iter.add(2) +assert_eq(iterate(add_set_for_iter), [3, 1, 2]) + +# remove +remove_set = set([1, 2, 3]) +remove_set.remove(2) +assert_eq(list(remove_set), [1, 3]) +assert_fails(lambda: remove_set.remove(4), "not found") +assert_fails(lambda: frozen_set.remove(1), "trying to mutate a frozen set value") + +# discard +discard_set = set([1, 2, 3]) +discard_set.discard(2) +assert_eq(list(discard_set), [1, 3]) +discard_set.discard(4) +assert_eq(list(discard_set), [1, 3]) +assert_fails(lambda: frozen_set.discard(1), "trying to mutate a frozen set value") + +# pop +pop_set = set("hello".elems()) +assert_eq(pop_set.pop(), "h") +assert_eq(pop_set.pop(), "e") +assert_eq(pop_set.pop(), "l") +assert_eq(pop_set.pop(), "o") +assert_fails(lambda: pop_set.pop(), "set is empty") +assert_fails(lambda: frozen_set.pop(), "trying to mutate a frozen set value") + +# clear +clear_set = set([1, 2, 3]) +clear_set.clear() +assert_eq(clear_set, set()) +assert_fails(lambda: frozen_set.clear(), "trying to mutate a frozen set value") + +# issubset method allows an arbitrary sequence, set, or mapping +assert_eq(set([1, 2]).issubset([1, 2, 3]), True) +assert_eq(set([1, 2]).issubset(set([2, 3])), False) +assert_eq(set([1, 2]).issubset([2, 1]), True) +assert_fails(lambda: set([1, 2]).issubset(2), "got value of type 'int', want a set, sequence, or dict") +assert_fails(lambda: set([1, 2]).issubset([1, 2], [3]), "accepts no more than 1 positional argument") + +# issuperset method allows an arbitrary sequence, set, or mapping +assert_eq(set([1, 2, 3]).issuperset([0, 1, 2, 3]), False) +assert_eq(set([1, 2, 3]).issuperset({2: "a", 3: "b"}), True) +assert_eq(set([1, 2, 3]).issuperset([3, 2, 1]), True) +assert_fails(lambda: set([1, 2]).issuperset(2), "got value of type 'int', want a set, sequence, or dict") +assert_fails(lambda: set([1, 2]).issubset([1, 2], [3]), "accepts no more than 1 positional argument") + +# isdisjoint method allows an arbitrary sequence, set, or mapping +assert_eq(set([1, 2]).isdisjoint([3, 4]), True) +assert_eq(set([1, 2]).isdisjoint([2, 3]), False) +assert_eq(set([1, 2]).isdisjoint({2: "a", 3: "b"}), False) +assert_eq(set([1, 2]).isdisjoint({}), True) +assert_eq(set().isdisjoint([2, 3]), True) +assert_eq(set().isdisjoint([]), True) +assert_fails(lambda: set([1, 2]).isdisjoint(2), "got value of type 'int', want a set, sequence, or dict") +assert_fails(lambda: set([1, 2]).isdisjoint([1, 2], [3]), "accepts no more than 1 positional argument") + +# union method, unlike the | operator, allows arbitrary number of arbitrary sequences, sets, or mappings +assert_eq(set([1, 2]).union([2, 3]), set([1, 2, 3])) +assert_eq(set([1, 2]).union([2, 3], {3: "three", 4: "four"}), set([1, 2, 3, 4])) +assert_fails(lambda: set([1, 2]).union(3), "got value of type 'int', want a set, sequence, or dict") + +# intersection method, unlike the & operator, allows arbitrary number of arbitrary sequences, sets, or mappings +assert_eq(set([1, 2, 3]).intersection([2, 3, 4]), set([2, 3])) +assert_eq(set([1, 2, 3]).intersection([2, 3, 4, 2, 3, 4]), set([2, 3])) +assert_eq(set([1, 2, 3]).intersection([2, 3], {3: "three", 4: "four"}), set([3])) +assert_fails(lambda: set([1, 2]).intersection(3), "got value of type 'int', want a set, sequence, or dict") + +# intersection_update method, unlike the &= operator, allows arbitrary number of arbitrary sequences, sets, or mappings +intersection_update_set = set([1, 2, 3]) +intersection_update_set.intersection_update([2, 3, 4]) +assert_eq(intersection_update_set, set([2, 3])) +intersection_update_set.intersection_update([2, 3, 4, 2, 3, 4]) +assert_eq(intersection_update_set, set([2, 3])) +intersection_update_set.intersection_update([2, 3], {3: "three", 4: "four"}) +assert_eq(intersection_update_set, set([3])) +assert_fails(lambda: intersection_update_set.intersection_update(3), "got value of type 'int', want a set, sequence, or dict") +assert_fails(lambda: frozen_set.intersection_update([1]), "trying to mutate a frozen set value") + +# difference method, unlike the - operator, allows arbitrary number of arbitrary sequences, sets, or mappings +assert_eq(set([1, 2, 3]).difference([2]), set([1, 3])) +assert_eq(set([1, 2, 3]).difference([2, 3, 2, 3]), set([1])) +assert_eq(set([1, 2, 3]).difference([2], {3: "three", 4: "four"}), set([1])) +assert_fails(lambda: set([1, 2]).difference(2), "got value of type 'int', want a set, sequence, or dict") + +# difference_update method, unlike the -= operator, allows arbitrary number of arbitrary sequences, sets, or mappings +difference_update_set = set([1, 2, 3, 4]) +difference_update_set.difference_update([2]) +assert_eq(difference_update_set, set([1, 3, 4])) +difference_update_set.difference_update([2, 3, 2, 3]) +assert_eq(difference_update_set, set([1, 4])) +difference_update_set.difference_update([2], {3: "three", 4: "four"}) +assert_eq(difference_update_set, set([1])) +assert_fails(lambda: difference_update_set.difference_update(2), "got value of type 'int', want a set, sequence, or dict") +assert_fails(lambda: frozen_set.difference_update([1]), "trying to mutate a frozen set value") + +# symmetric_difference method, unlike the ^ operator, allows one arbitrary sequence, set, or mapping +assert_eq(set([1, 2, 3]).symmetric_difference([2, 3, 4]), set([1, 4])) +assert_eq(set([1, 2, 3]).symmetric_difference([2, 3, 4, 2, 3, 4]), set([1, 4])) +assert_eq(set([1, 2, 3]).symmetric_difference({0: "zero", 1: "one"}), set([2, 3, 0])) +assert_fails(lambda: set([1, 2]).symmetric_difference(2), "got value of type 'int', want a set, sequence, or dict") +assert_fails(lambda: set([1, 2]).symmetric_difference([1], [2]), "accepts no more than 1 positional argument") + +# symmetric_difference_update method, unlike the ^= operator, allows one arbitrary sequence, set, or mapping +symmetric_difference_update_set = set([1, 2, 3, 4]) +symmetric_difference_update_set.symmetric_difference_update([2]) +assert_eq(symmetric_difference_update_set, set([1, 3, 4])) +symmetric_difference_update_set.symmetric_difference_update([2, 3, 2, 3]) +assert_eq(symmetric_difference_update_set, set([1, 2, 4])) +symmetric_difference_update_set.symmetric_difference_update({0: "zero", 1: "one"}) +assert_eq(symmetric_difference_update_set, set([0, 2, 4])) +assert_fails(lambda: symmetric_difference_update_set.symmetric_difference_update(2), "got value of type 'int', want a set, sequence, or dict") +assert_fails(lambda: frozen_set.symmetric_difference_update([1]), "trying to mutate a frozen set value") From d6825b4e177dff635e289cec8f3db5f9f93b04d0 Mon Sep 17 00:00:00 2001 From: Googler Date: Wed, 18 Dec 2024 13:55:36 -0800 Subject: [PATCH 2/2] Enable Starlark sets by default Now that the Starlark language spec has been approved: https://github.com/bazelbuild/starlark/blob/master/spec.md#sets Require elements of arguments to StarlarkSet methods to be hashable, matching the final version of the language spec. Take the opportunity to update our documentation to match the spec whenever reasonable (modulo minor differences in terminology and formatting). RELNOTES: Flip --experimental_enable_starlark_set and enable the Starlark set data type by default. PiperOrigin-RevId: 707659085 Change-Id: Ibcad59838f9709e980d7b69f4957b8f0fede51c6 Cherry-picked from https://github.com/bazelbuild/bazel/commit/8ae25701a32668bf98d526426aa849ea02149833 --- .../semantics/BuildLanguageOptions.java | 2 +- .../net/starlark/java/eval/MethodLibrary.java | 23 +- .../starlark/java/eval/StarlarkSemantics.java | 2 +- .../net/starlark/java/eval/StarlarkSet.java | 383 ++++++++++++------ .../net/starlark/java/eval/ScriptTest.java | 9 +- 5 files changed, 273 insertions(+), 146 deletions(-) diff --git a/src/main/java/com/google/devtools/build/lib/packages/semantics/BuildLanguageOptions.java b/src/main/java/com/google/devtools/build/lib/packages/semantics/BuildLanguageOptions.java index 01e6800d61bde2..0f1673f5120bcc 100644 --- a/src/main/java/com/google/devtools/build/lib/packages/semantics/BuildLanguageOptions.java +++ b/src/main/java/com/google/devtools/build/lib/packages/semantics/BuildLanguageOptions.java @@ -805,7 +805,7 @@ public final class BuildLanguageOptions extends OptionsBase { @Option( name = "experimental_enable_starlark_set", - defaultValue = "false", + defaultValue = "true", documentationCategory = OptionDocumentationCategory.STARLARK_SEMANTICS, effectTags = {OptionEffectTag.BUILD_FILE_SEMANTICS}, metadataTags = {OptionMetadataTag.EXPERIMENTAL}, diff --git a/src/main/java/net/starlark/java/eval/MethodLibrary.java b/src/main/java/net/starlark/java/eval/MethodLibrary.java index fd2eccb6302d14..9eb2417a1f5e71 100644 --- a/src/main/java/net/starlark/java/eval/MethodLibrary.java +++ b/src/main/java/net/starlark/java/eval/MethodLibrary.java @@ -645,13 +645,24 @@ public StarlarkInt intForStarlark(Object x, Object baseO) throws EvalException { @StarlarkMethod( name = "set", doc = - "Experimental. This API is experimental and may change at any time. Please do not" - + " depend on it. It may be enabled on an experimental basis by setting" - + " --experimental_enable_starlark_set.\n" // - + "

Creates a new set, optionally initialized to" - + " contain the elements from a given iterable.", + """ +Creates a new set containing the unique elements of a given +iterable, preserving iteration order. + +

If called with no argument, set() returns a new empty set. + +

For example, +

+set()                          # an empty set
+set([3, 1, 1, 2])              # set([3, 1, 2]), a set of three elements
+set({"k1": "v1", "k2": "v2"})  # set(["k1", "k2"]), a set of two elements
+
+""", parameters = { - @Param(name = "elements", defaultValue = "[]", doc = "A set, sequence, or dict."), + @Param( + name = "elements", + defaultValue = "[]", + doc = "A set, a sequence of hashable values, or a dict."), }, useStarlarkThread = true) public StarlarkSet set(Object elements, StarlarkThread thread) throws EvalException { diff --git a/src/main/java/net/starlark/java/eval/StarlarkSemantics.java b/src/main/java/net/starlark/java/eval/StarlarkSemantics.java index 658977e013bf08..2bcefe895d74c7 100644 --- a/src/main/java/net/starlark/java/eval/StarlarkSemantics.java +++ b/src/main/java/net/starlark/java/eval/StarlarkSemantics.java @@ -258,5 +258,5 @@ public final String toString() { public static final String ALLOW_RECURSION = "-allow_recursion"; /** Whether StarlarkSet objects may be constructed by the interpreter. */ - public static final String EXPERIMENTAL_ENABLE_STARLARK_SET = "-experimental_enable_starlark_set"; + public static final String EXPERIMENTAL_ENABLE_STARLARK_SET = "+experimental_enable_starlark_set"; } diff --git a/src/main/java/net/starlark/java/eval/StarlarkSet.java b/src/main/java/net/starlark/java/eval/StarlarkSet.java index 144fefd780c744..d53443f21f3dea 100644 --- a/src/main/java/net/starlark/java/eval/StarlarkSet.java +++ b/src/main/java/net/starlark/java/eval/StarlarkSet.java @@ -37,19 +37,40 @@ category = "core", doc = """ -Experimental. This API is experimental and may change at any time. Please do not depend on -it. It may be enabled on an experimental basis by setting ---experimental_enable_starlark_set. +The built-in set type. A set is a mutable, iterable collection of unique values – the set's +elements. The type name of a set is "set". -

The built-in mutable set type. Example set expressions: +

Sets provide constant-time operations to insert, remove, or check for the presence of a value. +Sets are implemented using a hash table, and therefore, just like keys of a +dictionary, elements of a set must be hashable. A value may be used as an +element of a set if and only if it may be used as a key of a dictionary. + +

Sets may be constructed using the set() built-in +function, which returns a new set containing the unique elements of its optional argument, which +must be an iterable. Calling set() without an argument constructs an empty set. Sets +have no literal syntax. + +

The in and not in operations check whether a value is (or is not) in a +set:

-x = set()           # x is an empty set
-y = set([1, 2, 3])  # y is a set with 3 elements
-3 in y              # True
-0 in y              # False
-len(x)              # 0
-len(y)              # 3
+s = set(["a", "b", "c"])
+"a" in s  # True
+"z" in s  # False
+
+ +

A set is iterable, and thus may be used as the operand of a for loop, a list +comprehension, and the various built-in functions that operate on iterables. Its length can be +retrieved using the len() built-in function, and the +order of iteration is the order in which elements were first added to the set: + +

+s = set(["z", "y", "z", "y"])
+len(s)       # prints 2
+s.add("x")
+len(s)       # prints 3
+for e in s:
+    print e  # prints "z", "y", "x"
 

A set used in Boolean context is true if and only if it is non-empty. @@ -61,81 +82,68 @@ "non-empty" if t else "empty" # "non-empty" -

The elements of a set must be hashable; x may be an element of a set if and only if -x may be used as a key of a dict. - -

A set itself is not hashable; therefore, you cannot have a set with another set as an -element. - -

You cannot access the elements of a set by index, but you can iterate over them, and you can -obtain the list of a set's elements in iteration order using the list() built-in -function. Just like for lists, it is an error to mutate a set while it is being iterated over. The -order of iteration matches insertion order: +

Sets may be compared for equality or inequality using == and !=. A set +s is equal to t if and only if t is a set containing the same +elements; iteration order is not significant. In particular, a set is not equal to the list +of its elements. Sets are not ordered with respect to other sets, and an attempt to compare two sets +using <, <=, >, >=, or to sort a +sequence of sets, will fail.

-s = set([3, 1, 3])
-s.add(2)
-# prints 3, 1, 2
-for item in s:
-    print(item)
-list(s)  # [3, 1, 2]
+set() == set()              # True
+set() != []                 # True
+set([1, 2]) == set([2, 1])  # True
+set([1, 2]) != [1, 2]       # True
 
-

A set s is equal to t if and only if t is a set containing -the same elements, possibly with a different iteration order. In particular, a set is -not equal to its list of elements. - -

Sets are not ordered; the <, <=, >, and ->= operations are not defined for sets, and a list of sets cannot be sorted - unlike -in Python. -

The | operation on two sets returns the union of the two sets: a set containing the -elements found in either one or both of the original sets. The | operation has an -augmented assignment version; s |= t adds to s all the elements of -t. +elements found in either one or both of the original sets.

 set([1, 2]) | set([3, 2])  # set([1, 2, 3])
-s = set([1, 2])
-s |= set([2, 3, 4])        # s now equals set([1, 2, 3, 4])
 

The & operation on two sets returns the intersection of the two sets: a set -containing only the elements found in both of the original sets. The & operation -has an augmented assignment version; s &= t removes from s all the -elements not found in t. +containing only the elements found in both of the original sets.

-set([1, 2]) & set([2, 3])  # set([2])
-set([1, 2]) & set([3, 4])  # set()
-s = set([1, 2])
-s &= set([0, 1])           # s now equals set([1])
+set([1, 2]) & set([2, 3])  # set([2])
+set([1, 2]) & set([3, 4])  # set()
 

The - operation on two sets returns the difference of the two sets: a set containing -the elements found in the left-hand side set but not the right-hand site set. The - -operation has an augmented assignment version; s -= t removes from s all -the elements found in t. +the elements found in the left-hand side set but not the right-hand side set.

 set([1, 2]) - set([2, 3])  # set([1])
 set([1, 2]) - set([3, 4])  # set([1, 2])
-s = set([1, 2])
-s -= set([0, 1])           # s now equals set([2])
 

The ^ operation on two sets returns the symmetric difference of the two sets: a set -containing the elements found in exactly one of the two original sets, but not in both. The -^ operation has an augmented assignment version; s ^= t removes from -s any element of t found in s and adds to s any -element of t not found in s. +containing the elements found in exactly one of the two original sets, but not in both.

 set([1, 2]) ^ set([2, 3])  # set([1, 3])
 set([1, 2]) ^ set([3, 4])  # set([1, 2, 3, 4])
+
+ +

In each of the above operations, the elements of the resulting set retain their order from the +two operand sets, with all elements that were drawn from the left-hand side ordered before any +element that was only present in the right-hand side. + +

The corresponding augmented assignments, |=, &=, -=, +and ^=, modify the left-hand set in place. + +

 s = set([1, 2])
-s ^= set([0, 1])           # s now equals set([2, 0])
+s |= set([2, 3, 4])     # s now equals set([1, 2, 3, 4])
+s &= set([0, 1, 2, 3])  # s now equals set([1, 2, 3])
+s -= set([0, 1])        # s now equals set([2, 3])
+s ^= set([3, 4])        # s now equals set([2, 4])
 
+ +

Like all mutable values in Starlark, a set can be frozen, and once frozen, all subsequent +operations that attempt to update it will fail. """) public final class StarlarkSet extends AbstractSet implements Mutability.Freezable, StarlarkMembershipTestable, StarlarkIterable { @@ -377,16 +385,20 @@ public void unsafeShallowFreeze() { """ Returns true of this set is a subset of another. +

Note that a set is always considered to be a subset of itself. +

For example,

-set([1, 2]).issubset([1, 2, 3]) == True
-set([1, 2]).issubset([1, 2]) == True
-set([1, 2]).issubset([2, 3]) == False
+set([1, 2]).issubset([1, 2, 3])  # True
+set([1, 2]).issubset([1, 2])     # True
+set([1, 2]).issubset([2, 3])     # False
 
""", - parameters = {@Param(name = "other", doc = "A set, sequence, or dict.")}) + parameters = { + @Param(name = "other", doc = "A set, a sequence of hashable elements, or a dict.") + }) public boolean isSubset(Object other) throws EvalException { - return toCollection(other, "issubset argument").containsAll(this.contents); + return toHashableCollection(other, "issubset argument").containsAll(this.contents); } @StarlarkMethod( @@ -395,16 +407,20 @@ public boolean isSubset(Object other) throws EvalException { """ Returns true of this set is a superset of another. +

Note that a set is always considered to be a superset of itself. +

For example,

-set([1, 2, 3]).issuperset([1, 2]) == True
-set([1, 2, 3]).issuperset([1, 2, 3]) == True
-set([1, 2, 3]).issuperset([2, 3, 4]) == False
+set([1, 2, 3]).issuperset([1, 2])     # True
+set([1, 2, 3]).issuperset([1, 2, 3])  # True
+set([1, 2, 3]).issuperset([2, 3, 4])  # False
 
""", - parameters = {@Param(name = "other", doc = "A set, sequence, or dict.")}) + parameters = { + @Param(name = "other", doc = "A set, a sequence of hashable elements, or a dict.") + }) public boolean isSuperset(Object other) throws EvalException { - return contents.containsAll(toCollection(other, "issuperset argument")); + return contents.containsAll(toHashableCollection(other, "issuperset argument")); } @StarlarkMethod( @@ -415,14 +431,16 @@ public boolean isSuperset(Object other) throws EvalException {

For example,

-set([1, 2]).isdisjoint([3, 4]) == True
-set().isdisjoint(set()) == True
-set([1, 2]).isdisjoint([2, 3]) == False
+set([1, 2]).isdisjoint([3, 4])  # True
+set().isdisjoint(set())         # True
+set([1, 2]).isdisjoint([2, 3])  # False
 
""", - parameters = {@Param(name = "other", doc = "A set, sequence, or dict.")}) + parameters = { + @Param(name = "other", doc = "A set, a sequence of hashable elements, or a dict.") + }) public boolean isDisjoint(Object other) throws EvalException { - return Collections.disjoint(this.contents, toCollection(other, "isdisjoint argument")); + return Collections.disjoint(this.contents, toHashableCollection(other, "isdisjoint argument")); } /** @@ -437,12 +455,20 @@ public boolean isDisjoint(Object other) throws EvalException {

For example,

-x = set([1, 2])
-x.update([2, 3], [3, 4])
-# x is now set([1, 2, 3, 4])
+s = set()
+s.update([1, 2])          # None; s is set([1, 2])
+s.update([2, 3], [3, 4])  # None; s is set([1, 2, 3, 4])
 
+ +

If s and t are sets, s.update(t) is equivalent to +s |= t; however, note that the |= augmented assignment requires both sides +to be sets, while the update method also accepts sequences and dicts. + +

It is permissible to call update without any arguments; this leaves the set +unchanged. """, - extraPositionals = @Param(name = "others", doc = "Sets, sequences, or dicts.")) + extraPositionals = + @Param(name = "others", doc = "Sets, sequences of hashable elements, or dicts.")) public void update(Tuple others) throws EvalException { Starlark.checkMutable(this); for (Object other : others) { @@ -455,7 +481,16 @@ public void update(Tuple others) throws EvalException { @StarlarkMethod( name = "add", - doc = "Adds an element to the set.", + doc = + """ +Adds an element to the set. + +

It is permissible to add a value already present in the set; this leaves the set +unchanged. + +

If you need to add multiple elements to a set, see update or +the |= augmented assignment operation. +""", parameters = {@Param(name = "element", doc = "Element to add.")}) public void addElement(E element) throws EvalException { Starlark.checkMutable(this); @@ -467,12 +502,22 @@ public void addElement(E element) throws EvalException { name = "remove", doc = """ -Removes an element, which must be present in the set, from the set. Fails if the element was not -present in the set. +Removes an element, which must be present in the set, from the set. + +

remove fails if the element was not present in the set. If you don't want to fail on +an attempt to remove a non-present element, use discard instead. +If you need to remove multiple elements from a set, see +difference_update or the -= augmented +assignment operation. """, - parameters = {@Param(name = "element", doc = "Element to remove.")}) + parameters = { + @Param( + name = "element", + doc = "Element to remove. Must be an element of the set (and hashable).") + }) public void removeElement(E element) throws EvalException { Starlark.checkMutable(this); + Starlark.checkHashable(element); if (!contents.remove(element)) { throw Starlark.errorf("element %s not found in set", Starlark.repr(element)); } @@ -480,16 +525,48 @@ public void removeElement(E element) throws EvalException { @StarlarkMethod( name = "discard", - doc = "Removes an element from the set if it is present.", - parameters = {@Param(name = "element", doc = "Element to discard.")}) + doc = + """ +Removes an element from the set if it is present. + +

It is permissible to discard a value not present in the set; this leaves the set +unchanged. If you want to fail on an attempt to remove a non-present element, use +remove instead. If you need to remove multiple elements from a +set, see difference_update or the -= +augmented assignment operation. + +

For example, +

+s = set(["x", "y"])
+s.discard("y")  # None; s == set(["x"])
+s.discard("y")  # None; s == set(["x"])
+
+""", + parameters = {@Param(name = "element", doc = "Element to discard. Must be hashable.")}) public void discard(E element) throws EvalException { Starlark.checkMutable(this); + Starlark.checkHashable(element); contents.remove(element); } @StarlarkMethod( name = "pop", - doc = "Removes and returns the first element of the set. Fails if the set is empty.") + doc = + """ +Removes and returns the first element of the set (in iteration order, which is the order in which +elements were first added to the set). + +

Fails if the set is empty. + +

For example, +

+s = set([3, 1, 2])
+s.pop()  # 3; s == set([1, 2])
+s.pop()  # 1; s == set([2])
+s.pop()  # 2; s == set()
+s.pop()  # error: empty set
+
+""") public E pop() throws EvalException { Starlark.checkMutable(this); if (isEmpty()) { @@ -512,12 +589,21 @@ public void clearElements() throws EvalException { """ Returns a new mutable set containing the union of this set with others. +

If s and t are sets, s.union(t) is equivalent to +s | t; however, note that the | operation requires both sides to be sets, +while the union method also accepts sequences and dicts. + +

It is permissible to call union without any arguments; this returns a copy of the +set. +

For example,

-set([1, 2]).union([2, 3, 4], [4, 5]) == set([1, 2, 3, 4, 5])
+set([1, 2]).union([2, 3])                    # set([1, 2, 3])
+set([1, 2]).union([2, 3], {3: "a", 4: "b"})  # set([1, 2, 3, 4])
 
""", - extraPositionals = @Param(name = "others", doc = "Sets, sequences, or dicts."), + extraPositionals = + @Param(name = "others", doc = "Sets, sequences of hashable elements, or dicts."), useStarlarkThread = true) public StarlarkSet union(Tuple others, StarlarkThread thread) throws EvalException { LinkedHashSet newContents = new LinkedHashSet<>(contents); @@ -533,17 +619,26 @@ public StarlarkSet union(Tuple others, StarlarkThread thread) throws EvalExce """ Returns a new mutable set containing the intersection of this set with others. +

If s and t are sets, s.intersection(t) is equivalent to +s & t; however, note that the & operation requires both sides to +be sets, while the intersection method also accepts sequences and dicts. + +

It is permissible to call intersection without any arguments; this returns a copy of +the set. +

For example,

-set([1, 2, 3]).intersection([1, 2], [2, 3]) == set([2])
+set([1, 2]).intersection([2, 3])             # set([2])
+set([1, 2, 3]).intersection([0, 1], [1, 2])  # set([1])
 
""", - extraPositionals = @Param(name = "others", doc = "Sets, sequences, or dicts."), + extraPositionals = + @Param(name = "others", doc = "Sets, sequences of hashable elements, or dicts."), useStarlarkThread = true) public StarlarkSet intersection(Tuple others, StarlarkThread thread) throws EvalException { LinkedHashSet newContents = new LinkedHashSet<>(contents); for (Object other : others) { - newContents.retainAll(toCollection(other, "intersection argument")); + newContents.retainAll(toHashableCollection(other, "intersection argument")); } return wrapOrImmutableCopy(thread.mutability(), newContents); } @@ -554,18 +649,27 @@ public StarlarkSet intersection(Tuple others, StarlarkThread thread) throws E """ Removes any elements not found in all others from this set. +

If s and t are sets, s.intersection_update(t) is +equivalent to s &= t; however, note that the &= augmented +assignment requires both sides to be sets, while the intersection_update method also +accepts sequences and dicts. + +

It is permissible to call intersection_update without any arguments; this leaves the +set unchanged. +

For example,

-x = set([1, 2, 3, 4])
-x.intersection_update([2, 3], [3, 4])
-# x is now set([3])
+s = set([1, 2, 3, 4])
+s.intersection_update([0, 1, 2])       # None; s is set([1, 2])
+s.intersection_update([0, 1], [1, 2])  # None; s is set([1])
 
""", - extraPositionals = @Param(name = "others", doc = "Sets, sequences, or dicts.")) + extraPositionals = + @Param(name = "others", doc = "Sets, sequences of hashable elements, or dicts.")) public void intersectionUpdate(Tuple others) throws EvalException { Starlark.checkMutable(this); for (Object other : others) { - contents.retainAll(toCollection(other, "intersection_update argument")); + contents.retainAll(toHashableCollection(other, "intersection_update argument")); } } @@ -575,17 +679,26 @@ public void intersectionUpdate(Tuple others) throws EvalException { """ Returns a new mutable set containing the difference of this set with others. +

If s and t are sets, s.difference(t) is equivalent to +s - t; however, note that the - operation requires both sides to be sets, +while the difference method also accepts sequences and dicts. + +

It is permissible to call difference without any arguments; this returns a copy of +the set. +

For example,

-set([1, 2, 3]).intersection([1, 2], [2, 3]) == set([2])
+set([1, 2, 3]).difference([2])             # set([1, 3])
+set([1, 2, 3]).difference([0, 1], [3, 4])  # set([2])
 
""", - extraPositionals = @Param(name = "others", doc = "Sets, sequences, or dicts."), + extraPositionals = + @Param(name = "others", doc = "Sets, sequences of hashable elements, or dicts."), useStarlarkThread = true) public StarlarkSet difference(Tuple others, StarlarkThread thread) throws EvalException { LinkedHashSet newContents = new LinkedHashSet<>(contents); for (Object other : others) { - newContents.removeAll(toCollection(other, "difference argument")); + newContents.removeAll(toHashableCollection(other, "difference argument")); } return wrapOrImmutableCopy(thread.mutability(), newContents); } @@ -596,18 +709,26 @@ public StarlarkSet difference(Tuple others, StarlarkThread thread) throws Eva """ Removes any elements found in any others from this set. +

If s and t are sets, s.difference_update(t) is equivalent +to s -= t; however, note that the -= augmented assignment requires both +sides to be sets, while the difference_update method also accepts sequences and dicts. + +

It is permissible to call difference_update without any arguments; this leaves the +set unchanged. +

For example,

-x = set([1, 2, 3, 4])
-x.difference_update([2, 3], [3, 4])
-# x is now set([1])
+s = set([1, 2, 3, 4])
+s.difference_update([2])             # None; s is set([1, 3, 4])
+s.difference_update([0, 1], [4, 5])  # None; s is set([3])
 
""", - extraPositionals = @Param(name = "others", doc = "Sets, sequences, or dicts.")) + extraPositionals = + @Param(name = "others", doc = "Sets, sequences of hashable elements, or dicts.")) public void differenceUpdate(Tuple others) throws EvalException { Starlark.checkMutable(this); for (Object other : others) { - contents.removeAll(toCollection(other, "intersection_update argument")); + contents.removeAll(toHashableCollection(other, "intersection_update argument")); } } @@ -618,12 +739,19 @@ public void differenceUpdate(Tuple others) throws EvalException { Returns a new mutable set containing the symmetric difference of this set with another set, sequence, or dict. +

If s and t are sets, s.symmetric_difference(t) is +equivalent to s ^ t; however, note that the ^ operation requires both +sides to be sets, while the symmetric_difference method also accepts a sequence or a +dict. +

For example,

-set([1, 2, 3]).symmetric_difference([2, 3, 4]) == set([1, 4])
+set([1, 2]).symmetric_difference([2, 3])  # set([1, 3])
 
""", - parameters = {@Param(name = "other", doc = "A set, sequence, or dict.")}, + parameters = { + @Param(name = "other", doc = "A set, a sequence of hashable elements, or a dict.") + }, useStarlarkThread = true) public StarlarkSet symmetricDifference(Object other, StarlarkThread thread) throws EvalException { @@ -649,12 +777,20 @@ public StarlarkSet symmetricDifference(Object other, StarlarkThread thread) Returns a new mutable set containing the symmetric difference of this set with another set, sequence, or dict. +

If s and t are sets, s.symmetric_difference_update(t) is +equivalent to `s ^= t; however, note that the ^=` augmented assignment requires both +sides to be sets, while the symmetric_difference_update method also accepts a sequence +or a dict. +

For example,

-set([1, 2, 3]).symmetric_difference([2, 3, 4]) == set([1, 4])
+s = set([1, 2])
+s.symmetric_difference_update([2, 3])  # None; s == set([1, 3])
 
""", - parameters = {@Param(name = "other", doc = "A set, sequence, or dict.")}) + parameters = { + @Param(name = "other", doc = "A set, a sequence of hashable elements, or a dict.") + }) public void symmetricDifferenceUpdate(Object other) throws EvalException { Starlark.checkMutable(this); ImmutableSet originalContents = ImmutableSet.copyOf(contents); @@ -670,29 +806,19 @@ public void symmetricDifferenceUpdate(Object other) throws EvalException { } /** - * Verifies that {@code other} is either a collection or a map. + * Verifies that {@code other} is either a collection of Starlark-hashable elements or a map with + * Starlark-hashable keys. * - * @return {@code other} if it is a collection, or the key set of {@code other} if it is a map. - */ - private static Collection toCollection(Object other, String what) throws EvalException { - if (other instanceof Collection) { - return (Collection) other; - } else if (other instanceof Map) { - return ((Map) other).keySet(); - } - throw notSizedIterableError(other, what); - } - - /** - * A variant of {@link #toCollection} which additionally checks whether the returned collection's - * elements are Starlark-hashable. + *

Note that in the Starlark language spec, this notion is referred to as an "iterable + * sequence" of hashable elements; but our {@link Dict} doesn't implement {@link Sequence}, and in + * any case, we may need to operate on native Java collections and maps which don't implement + * {@link StarlarkIterable} or {@link Sequence}. * * @return {@code other} if it is a collection, or the key set of {@code other} if it is a map. */ private static Collection toHashableCollection(Object other, String what) throws EvalException { - if (other instanceof Collection) { - Collection collection = (Collection) other; + if (other instanceof Collection collection) { // Assume that elements of a StarlarkSet have already been checked to be hashable. if (!(collection instanceof StarlarkSet)) { for (Object element : collection) { @@ -700,22 +826,19 @@ private static Collection toHashableCollection(Object other, String what) } } return collection; - } else if (other instanceof Map) { - Set keySet = ((Map) other).keySet(); + } else if (other instanceof Map map) { + Set keySet = map.keySet(); // Assume that keys of a Dict have already been checked to be hashable. - if (!(other instanceof Dict)) { + if (!(map instanceof Dict)) { for (Object element : keySet) { Starlark.checkHashable(element); } } return keySet; } - throw notSizedIterableError(other, what); - } - - // Starlark doesn't have a "sized iterable" interface - so we enumerate the types we expect. - private static EvalException notSizedIterableError(Object other, String what) { - return Starlark.errorf( + // The Java Starlark interpreter doesn't have a "sized iterable" interface - so we enumerate the + // types we expect. + throw Starlark.errorf( "for %s got value of type '%s', want a set, sequence, or dict", what, Starlark.type(other)); } diff --git a/src/test/java/net/starlark/java/eval/ScriptTest.java b/src/test/java/net/starlark/java/eval/ScriptTest.java index f8409a21807bbb..57661d588dc7f3 100644 --- a/src/test/java/net/starlark/java/eval/ScriptTest.java +++ b/src/test/java/net/starlark/java/eval/ScriptTest.java @@ -228,14 +228,7 @@ public static void main(String[] args) throws Exception { Starlark.addMethods(predeclared, new ScriptTest()); // e.g. assert_eq predeclared.put("json", Json.INSTANCE); - // TODO(b/376078033): remove special set.star handling once Starlark sets are enabled by - // default. - StarlarkSemantics semantics = - name.equals("set.star") - ? StarlarkSemantics.builder() - .setBool(StarlarkSemantics.EXPERIMENTAL_ENABLE_STARLARK_SET, true) - .build() - : StarlarkSemantics.DEFAULT; + StarlarkSemantics semantics = StarlarkSemantics.DEFAULT; Module module = Module.withPredeclared(semantics, predeclared.buildOrThrow()); try (Mutability mu = Mutability.createAllowingShallowFreeze("test")) { StarlarkThread thread = StarlarkThread.createTransient(mu, semantics);