Skip to content

Commit

Permalink
RangeSet: nD folding optimization (#485) (#486)
Browse files Browse the repository at this point in the history
Optimize multi-dimensional range set folding algorithm:

* get rid of heuristic when pre-expanding vectors for folding to improve
reproducibility
* when merging vectors, perform O(n) "folding" passes as long as changes
are detected to reduce the problem size, before attempting a final
O(n^2) pass
* avoid casting string into integer when sorting (as RangeSet is now
using strings natively)

Closes #485
  • Loading branch information
thiell authored Nov 16, 2022
1 parent 5ac85da commit ec14536
Show file tree
Hide file tree
Showing 3 changed files with 91 additions and 102 deletions.
73 changes: 17 additions & 56 deletions lib/ClusterShell/RangeSet.py
Original file line number Diff line number Diff line change
Expand Up @@ -1133,7 +1133,7 @@ def rgveckeyfunc(rgvec):
# (3) lower first index first
# (4) lower last index first
return (-reduce(mul, [len(rg) for rg in rgvec]), \
tuple((-len(rg), int(rg[0]), int(rg[-1])) for rg in rgvec))
tuple((-len(rg), rg[0], rg[-1]) for rg in rgvec))
self._veclist.sort(key=rgveckeyfunc)

@precond_fold()
Expand Down Expand Up @@ -1181,72 +1181,23 @@ def _fold_multivariate(self):
"""Multivariate nD folding"""
# PHASE 1: expand with respect to uniqueness
self._fold_multivariate_expand()
self._sort()
# PHASE 2: merge
self._fold_multivariate_merge()
self._sort()
self._dirty = False

def _fold_multivariate_expand(self):
"""Multivariate nD folding: expand [phase 1]"""
max_length = sum([reduce(mul, [len(rg) for rg in rgvec]) \
for rgvec in self._veclist])
# Simple heuristic to make us faster
if len(self._veclist) * (len(self._veclist) - 1) / 2 > max_length * 10:
# *** nD full expand is preferred ***
self._veclist = [[RangeSet.fromone(i, autostep=self.autostep)
for i in tvec]
for tvec in set(self._iter())]
return

# *** nD compare algorithm is preferred ***
index1, index2 = 0, 1
while (index1 + 1) < len(self._veclist):
# use 2 references on iterator to compare items by couples
item1 = self._veclist[index1]
index2 = index1 + 1
index1 += 1
while index2 < len(self._veclist):
item2 = self._veclist[index2]
index2 += 1
new_item = None
disjoint = False
suppl = []
for pos, (rg1, rg2) in enumerate(zip(item1, item2)):
if not rg1 & rg2:
disjoint = True
break

if new_item is None:
new_item = [None] * len(item1)

if rg1 == rg2:
new_item[pos] = rg1
else:
assert rg1 & rg2
# intersection
new_item[pos] = rg1 & rg2
# create part 1
if rg1 - rg2:
item1_p = item1[0:pos] + [rg1 - rg2] + item1[pos+1:]
suppl.append(item1_p)
# create part 2
if rg2 - rg1:
item2_p = item2[0:pos] + [rg2 - rg1] + item2[pos+1:]
suppl.append(item2_p)
if not disjoint:
assert new_item is not None
assert suppl is not None
item1 = self._veclist[index1 - 1] = new_item
index2 -= 1
self._veclist.pop(index2)
self._veclist += suppl
self._veclist = [[RangeSet.fromone(i, autostep=self.autostep)
for i in tvec]
for tvec in set(self._iter())]

def _fold_multivariate_merge(self):
"""Multivariate nD folding: merge [phase 2]"""
chg = True
full = False # try easy O(n) passes first
chg = True # new pass (eg. after change on veclist)
while chg:
chg = False
self._sort() # sort veclist before new pass
index1, index2 = 0, 1
while (index1 + 1) < len(self._veclist):
# use 2 references on iterator to compare items by couples
Expand Down Expand Up @@ -1288,6 +1239,16 @@ def _fold_multivariate_merge(self):
item1 = self._veclist[index1 - 1] = new_item
index2 -= 1
self._veclist.pop(index2)
elif not full:
# easy pass so break to avoid scanning all
# index2; advance with next index1 for now
break
if not chg and not full:
# if no change was done during the last normal pass, we do a
# full O(n^2) pass. This pass is done only at the end in the
# hope that most vectors have already been merged by easy
# O(n) passes.
chg = full = True

def __or__(self, other):
"""Return the union of two RangeSetNDs as a new RangeSetND.
Expand Down
67 changes: 48 additions & 19 deletions tests/NodeSetTest.py
Original file line number Diff line number Diff line change
Expand Up @@ -576,10 +576,13 @@ def test_nd_fold_axis_errors(self):
self.assertRaises(NodeSetParseError, str, n1)
n1.fold_axis = range(2) # ok
self.assertEqual(str(n1), "a[1,3]b2c0,a[1,3]b2c1,a2b[3-5]c1")
self.assertEqual(n1, NodeSet("a[1,3]b2c0,a[1,3]b2c1,a2b[3-5]c1"))
n1.fold_axis = RangeSet("0-1") # ok
self.assertEqual(str(n1), "a[1,3]b2c0,a[1,3]b2c1,a2b[3-5]c1")
self.assertEqual(n1, NodeSet("a[1,3]b2c0,a[1,3]b2c1,a2b[3-5]c1"))
n1.fold_axis = (0, 1) # ok
self.assertEqual(str(n1), "a[1,3]b2c0,a[1,3]b2c1,a2b[3-5]c1")
self.assertEqual(n1, NodeSet("a[1,3]b2c0,a[1,3]b2c1,a2b[3-5]c1"))

def testSimpleNodeSetUpdates(self):
"""test NodeSet simple nodeset-based update()"""
Expand Down Expand Up @@ -2093,14 +2096,19 @@ def test_nd_contiguous(self):

def test_nd_fold(self):
ns = NodeSet("da[2-3]c[1-2],da[3-4]c[3-4]")
self.assertEqual(str(ns), "da[2-3]c[1-2],da[3-4]c[3-4]")
self.assertEqual(ns, NodeSet("da[2-3]c[1-2],da[3-4]c[3-4]"))
self.assertEqual(str(ns), "da3c[1-4],da2c[1-2],da4c[3-4]")
ns = NodeSet("da[2-3]c[1-2],da[3-4]c[2-3]")
self.assertEqual(ns, NodeSet("da[2-3]c[1-2],da[3-4]c[2-3]"))
self.assertEqual(str(ns), "da3c[1-3],da2c[1-2],da4c[2-3]")
ns = NodeSet("da[2-3]c[1-2],da[3-4]c[1-2]")
self.assertEqual(ns, NodeSet("da[2-3]c[1-2],da[3-4]c[1-2]"))
self.assertEqual(str(ns), "da[2-4]c[1-2]")
ns = NodeSet("da[2-3]c[1-2]p3,da[3-4]c[1-3]p3")
self.assertEqual(str(ns), "da[2-4]c[1-2]p3,da[3-4]c3p3")
self.assertEqual(ns, NodeSet("da[2-3]c[1-2]p3,da[3-4]c[1-3]p3"))
self.assertEqual(str(ns), "da[3-4]c[1-3]p3,da2c[1-2]p3")
ns = NodeSet("da[2-3]c[1-2],da[2,5]c[2-3]")
self.assertEqual(ns, NodeSet("da[2-3]c[1-2],da[2,5]c[2-3]"))
self.assertEqual(str(ns), "da2c[1-3],da3c[1-2],da5c[2-3]")

def test_nd_issuperset(self):
Expand Down Expand Up @@ -2193,19 +2201,28 @@ def test_nd_intersection(self):
def test_nd_nonoverlap(self):
ns1 = NodeSet("a[0-2]b[1-3]c[4]")
ns1.add("a[0-1]b[2-3]c[4-5]")
self.assertEqual(str(ns1), "a[0-1]b[2-3]c[4-5],a[0-2]b1c4,a2b[2-3]c4")
self.assertEqual(ns1, NodeSet("a[0-1]b[2-3]c[4-5],a[0-2]b1c4,a2b[2-3]c4"))
self.assertEqual(ns1, NodeSet("a2b[1-3]c4,a0b[1-2]c4,a0b3c[4-5],a1b[1-2]c4,a1b3c[4-5],a0b2c5,a1b2c5"))
self.assertEqual(str(ns1), "a[0-1]b[1-2]c4,a[0-1]b3c[4-5],a2b[1-3]c4,a[0-1]b2c5")
self.assertEqual(len(ns1), 13)

ns1 = NodeSet("a[0-1]b[2-3]c[4-5]")
ns1.add("a[0-2]b[1-3]c[4]")
self.assertEqual(str(ns1), "a[0-1]b[2-3]c[4-5],a[0-2]b1c4,a2b[2-3]c4")
self.assertEqual(ns1, NodeSet("a[0-1]b[2-3]c[4-5],a[0-2]b1c4,a2b[2-3]c4"))
self.assertEqual(ns1, NodeSet("a2b[1-3]c4,a0b[1-2]c4,a0b3c[4-5],a1b[1-2]c4,a1b3c[4-5],a0b2c5,a1b2c5"))
self.assertEqual(str(ns1), "a[0-1]b[1-2]c4,a[0-1]b3c[4-5],a2b[1-3]c4,a[0-1]b2c5")
self.assertEqual(len(ns1), 13)

ns1 = NodeSet("a[0-2]b[1-3]c[4],a[0-1]b[2-3]c[4-5]")
self.assertEqual(str(ns1), "a[0-1]b[2-3]c[4-5],a[0-2]b1c4,a2b[2-3]c4")
self.assertEqual(ns1, NodeSet("a[0-2]b[1-3]c[4],a[0-1]b[2-3]c[4-5]"))
self.assertEqual(ns1, NodeSet("a2b[1-3]c4,a0b[1-2]c4,a0b3c[4-5],a1b[1-2]c4,a1b3c[4-5],a0b2c5,a1b2c5"))
self.assertEqual(ns1, NodeSet("a[0-2]b[1-3]c4,a[0-1]b[2-3]c5"))
self.assertEqual(str(ns1), "a[0-1]b[1-2]c4,a[0-1]b3c[4-5],a2b[1-3]c4,a[0-1]b2c5")
self.assertEqual(len(ns1), 13)

ns1 = NodeSet("a[0-2]b[1-3]c[4-6],a[0-1]b[2-3]c[4-5]")
self.assertEqual(ns1, NodeSet("a[0-2]b[1-3]c[4-6],a[0-1]b[2-3]c[4-5]"))
self.assertEqual(ns1, NodeSet("a[0-2]b[1-3]c[4-6]"))
self.assertEqual(str(ns1), "a[0-2]b[1-3]c[4-6]")
self.assertEqual(len(ns1), 3*3*3)

Expand All @@ -2223,13 +2240,14 @@ def test_nd_nonoverlap(self):
self.assertEqual(len(ns1), (3*2*3)+(2*1*2))

ns1 = NodeSet("a[0-2]b[2-3]c[4-6],a[0-1]b[1-3]c[4-5],a2b1c[4-6]")
self.assertEqual(str(ns1), "a[0-2]b[2-3]c[4-6],a[0-1]b1c[4-5],a2b1c[4-6]")
self.assertEqual(str(ns1), "a[0-1]b[2-3]c[4-6],a2b[1-3]c[4-6],a[0-1]b1c[4-5]")
self.assertEqual(ns1, NodeSet("a[0-1]b[1-3]c[4-5],a[0-2]b[2-3]c6,a2b[2-3]c[4-5],a2b1c[4-6]"))
self.assertEqual(ns1, NodeSet("a[0-2]b[2-3]c[4-6],a[0-1]b1c[4-5],a2b1c[4-6]"))
self.assertEqual(len(ns1), (3*3*2)+1+(3*2*1))
ns1.add("a1b1c6")
self.assertEqual(str(ns1), "a[0-2]b[2-3]c[4-6],a[0-1]b1c[4-5],a2b1c[4-6],a1b1c6")
self.assertEqual(str(ns1), "a[1-2]b[1-3]c[4-6],a0b[2-3]c[4-6],a0b1c[4-5]")
self.assertEqual(ns1, NodeSet("a[0-2]b[2-3]c[4-6],a[0-1]b1c[4-5],a2b1c[4-6],a1b1c6"))
self.assertEqual(ns1, NodeSet("a[1-2]b[1-3]c[4-6],a0b[2-3]c[4-6],a0b1c[4-5]"))
ns1.add("a0b1c6")
self.assertEqual(str(ns1), "a[0-2]b[1-3]c[4-6]")
self.assertEqual(ns1, NodeSet("a[0-2]b[1-3]c[4-6]"))
Expand All @@ -2254,7 +2272,10 @@ def test_nd_difference(self):
self.assertEqual(len(ns1.difference(ns2)), 6)

ns1 = NodeSet("a[0-2]b[1-3]c[4],a[0-1]b[2-3]c[4-5]")
self.assertEqual(str(ns1), "a[0-1]b[2-3]c[4-5],a[0-2]b1c4,a2b[2-3]c4")
self.assertEqual(str(ns1), "a[0-1]b[1-2]c4,a[0-1]b3c[4-5],a2b[1-3]c4,a[0-1]b2c5")
self.assertEqual(ns1, NodeSet("a[0-2]b[1-3]c[4],a[0-1]b[2-3]c[4-5]"))
self.assertEqual(ns1, NodeSet("a[0-1]b[2-3]c[4-5],a[0-2]b1c4,a2b[2-3]c4"))
self.assertEqual(ns1, NodeSet("a2b[1-3]c4,a0b[1-2]c4,a0b3c[4-5],a1b[1-2]c4,a1b3c[4-5],a0b2c5,a1b2c5"))

self.assertEqual(len(ns1), 3*3 + 2*2)
ns2 = NodeSet("a[0-3]b[1]c[4-5]")
Expand All @@ -2267,15 +2288,15 @@ def test_nd_difference(self):

ns1 = NodeSet("a[0-3]b[1-5]c5")
ns2 = NodeSet("a[0-2]b[2-4]c5")
self.assertEqual(str(ns1.difference(ns2)), "a[0-3]b[1,5]c5,a3b[2-4]c5")
self.assertEqual(str(ns1.difference(ns2)), "a[0-2]b[1,5]c5,a3b[1-5]c5")

ns1 = NodeSet("a[0-3]b2c5")
ns2 = NodeSet("a[0-2]b1c5")
self.assertEqual(str(ns1.difference(ns2)), "a[0-3]b2c5")

ns1 = NodeSet("a[0-3]b[1-4]c[5]")
ns2 = NodeSet("a[0-2]b1c5")
self.assertEqual(str(ns1.difference(ns2)), "a[0-3]b[2-4]c5,a3b1c5")
self.assertEqual(str(ns1.difference(ns2)), "a[0-2]b[2-4]c5,a3b[1-4]c5")

ns1 = NodeSet("a[0-2]b[1-4]c5")
ns2 = NodeSet("a[0-3]b[2-3]c5")
Expand Down Expand Up @@ -2308,7 +2329,7 @@ def test_nd_difference_test(self):
ns1 = NodeSet("a[1-10]b[1-10]")
ns2 = NodeSet("a[5-20]b[5-20]")
nsdiff = ns1.difference(ns2)
self.assertEqual(str(nsdiff), "a[1-10]b[1-4],a[1-4]b[5-10]")
self.assertEqual(str(nsdiff), "a[1-4]b[1-10],a[5-10]b[1-4]")
self.assertEqual(nsdiff, NodeSet("a[1-4]b[1-10],a[1-10]b[1-4]")) # manually checked with overlap

# node[1-100]x[1-10] -x node4x4
Expand Down Expand Up @@ -2340,7 +2361,7 @@ def test_nd_difference_m(self):
ns1 = NodeSet("a[2-3]b[0,3-4],a[6-10]b[0-2]")
ns2 = NodeSet("a[3-6]b[2-3]")
nsdiff = ns1.difference(ns2)
self.assertEqual(str(nsdiff), "a[7-10]b[0-2],a[2-3]b[0,4],a6b[0-1],a2b3")
self.assertEqual(str(nsdiff), "a[7-10]b[0-2],a2b[0,3-4],a3b[0,4],a6b[0-1]")
self.assertEqual(nsdiff, NodeSet("a[7-10]b[0-2],a[2-3]b[0,4],a6b[0-1],a2b3"))
self.assertEqual(nsdiff, NodeSet("a[2-3,6-10]b0,a[6-10]b1,a[7-10]b2,a2b3,a[2-3]b4")) # manually checked

Expand Down Expand Up @@ -2387,25 +2408,33 @@ def test_nd_xor(self):
first = NodeSet("a[2-3,5]b[1,4],a6b5")
second = NodeSet("a[4-6]b[3-6]")
first.symmetric_difference_update(second)
self.assertEqual(str(first), "a[4-6]b[3,6],a[2-3]b[1,4],a4b[4-5],a5b[1,5],a6b4")
self.assertEqual(str(first), "a[2-3]b[1,4],a4b[3-6],a5b[1,3,5-6],a6b[3-4,6]")
self.assertEqual(first, NodeSet("a[2-3]b[1,4],a4b[3-6],a5b[1,3,5-6],a6b[3-4,6]"))
self.assertEqual(first, NodeSet("a[4-6]b[3,6],a[2-3]b[1,4],a4b[4-5],a5b[1,5],a6b4"))

first = NodeSet("a[1-50]b[1-20]")
second = NodeSet("a[40-60]b[10-30]")
first.symmetric_difference_update(second)
self.assertEqual(str(first), "a[1-39]b[1-20],a[40-60]b[21-30],a[51-60]b[10-20],a[40-50]b[1-9]")
self.assertEqual(first, NodeSet("a[1-39]b[1-20],a[51-60]b[10-30],a[40-50]b[1-9,21-30]"))
self.assertEqual(str(first), "a[1-39]b[1-20],a[51-60]b[10-30],a[40-50]b[1-9,21-30]")
self.assertEqual(first, NodeSet("a[1-39]b[1-20],a[40-60]b[21-30],a[51-60]b[10-20],a[40-50]b[1-9]"))

first = NodeSet("a[1-2]p[1-2]")
second = NodeSet("a[2-3]p[2-3]")
first.symmetric_difference_update(second)
self.assertEqual(str(first), "a1p[1-2],a2p[1,3],a3p[2-3]")
self.assertEqual(first, NodeSet("a1p1,a1p2,a2p1,a2p3,a3p2,a3p3"))

first = NodeSet("artcore[3-999]p[1-99,500-598]")
second = NodeSet("artcore[1-2000]p[40-560]")
first = NodeSet("a[3-29]p[1-9,50-58]")
second = NodeSet("a[1-110]p[4-56]")
first.symmetric_difference_update(second)
self.assertEqual(str(first), "artcore[1-2000]p[100-499],artcore[1-2,1000-2000]p[40-99,500-560],artcore[3-999]p[1-39,561-598]")
self.assertEqual(first, NodeSet("artcore[1-2000]p[100-499],artcore[1-2,1000-2000]p[40-99,500-560],artcore[3-999]p[1-39,561-598]"))
self.assertEqual(str(first), "a[1-2,30-110]p[4-56],a[3-29]p[1-3,10-49,57-58]")
self.assertEqual(first, NodeSet("a[1-2,30-110]p[4-56],a[3-29]p[1-3,10-49,57-58]"))

ns1 = NodeSet("a[1-6]b4")
ns2 = NodeSet("a5b[2-5]")
ns1.symmetric_difference_update(ns2)
self.assertEqual(str(ns1), "a[1-4,6]b4,a5b[2-3,5]")
self.assertEqual(ns1, NodeSet("a[1-4]b4,a5b[2-3,5],a6b4"))
self.assertEqual(ns1, NodeSet("a[1-4,6]b4,a5b[2-3,5]"))

def test_autostep(self):
Expand Down
Loading

0 comments on commit ec14536

Please sign in to comment.