diff --git a/M2/BUILD/frank/Makefile b/M2/BUILD/frank/Makefile index 0ab5caed2f5..3ee18ea8cf2 100644 --- a/M2/BUILD/frank/Makefile +++ b/M2/BUILD/frank/Makefile @@ -17,6 +17,21 @@ cmake-make-appleclang: -DBUILD_DOCS=on \ ../../../.. +## CC="`brew --prefix llvm`/bin/clang" CXX="`brew --prefix llvm`/bin/clang++" cmake \ + +cmake-clang: + echo "git branch is " $(BRANCH) + mkdir -p builds.tmp/cmake-clang + cd builds.tmp/cmake-clang; cmake \ + -GNinja \ + -DCMAKE_BUILD_TYPE=RelWithDebInfo \ + -DBUILD_TESTING=on \ + -DCMAKE_C_COMPILER="`brew --prefix llvm`/bin/clang" \ + -DCMAKE_CXX_COMPILER="`brew --prefix llvm`/bin/clang++" \ + -DBUILD_DOCS=on \ + -DBUILD_NATIVE=off \ + ../../../.. + cmake-appleclang: echo "git branch is " $(BRANCH) mkdir -p builds.tmp/cmake-appleclang @@ -24,7 +39,7 @@ cmake-appleclang: -GNinja \ -DCMAKE_BUILD_TYPE=RelWithDebInfo \ -DBUILD_TESTING=on \ - -DCMAKE_PREFIX_PATH="`brew --prefix libffi`" \ + -DCMAKE_PREFIX_PATH="`brew --prefix libomp`" \ -DBUILD_DOCS=on \ -DBUILD_NATIVE=off \ ../../../.. diff --git a/M2/Macaulay2/e/CMakeLists.txt b/M2/Macaulay2/e/CMakeLists.txt index f6a0ea96703..72fd6ae7a45 100644 --- a/M2/Macaulay2/e/CMakeLists.txt +++ b/M2/Macaulay2/e/CMakeLists.txt @@ -299,7 +299,6 @@ set(SRCLIST gb-f4/SPairs f4/f4-computation f4/f4-m2-interface - f4/f4-mem f4/f4-monlookup f4/f4-spairs f4/f4 diff --git a/M2/Macaulay2/e/Makefile.files b/M2/Macaulay2/e/Makefile.files index 807e8926a6f..f75b786285a 100644 --- a/M2/Macaulay2/e/Makefile.files +++ b/M2/Macaulay2/e/Makefile.files @@ -62,7 +62,6 @@ INTERFACE = \ gb-f4/PolynomialList \ gb-f4/SPairs \ f4/f4 \ - f4/f4-mem \ f4/f4-monlookup \ f4/f4-computation \ f4/f4-spairs \ diff --git a/M2/Macaulay2/e/VectorArithmetic.hpp b/M2/Macaulay2/e/VectorArithmetic.hpp index e78edaf19fa..36fe3840956 100644 --- a/M2/Macaulay2/e/VectorArithmetic.hpp +++ b/M2/Macaulay2/e/VectorArithmetic.hpp @@ -16,7 +16,6 @@ // #include "aring-glue.hpp" // for ConcreteRing // #include "aring.hpp" // for DummyRing, ring_GFFlintBig, ring_... // #include "buffer.hpp" // for buffer -// #include "f4/f4-mem.hpp" // for F4Vec // #include "ring.hpp" // for Ring // #include "ringelem.hpp" // for ring_elem @@ -42,7 +41,6 @@ #include "aring-glue.hpp" #include #include -#include "f4/f4-mem.hpp" // for F4Mem class Ring; using ComponentIndex = int; @@ -296,8 +294,7 @@ class ConcreteVectorArithmetic ElementArray& sparse, // output value: sets this value int*& comps, int first, - int last, - F4Vec& f4Vec) const + int last) const { auto& dvec = * elementArray(dense); @@ -308,7 +305,8 @@ class ConcreteVectorArithmetic for (int i = first; i >= 0 and i <= last; i++) if (not mRing->is_zero(dvec[i])) len++; - comps = f4Vec.allocate(len); + //comps = f4Vec.allocate(len); + comps = new int[len]; sparse = allocateElementArray(len); auto& svec = * elementArray(sparse); @@ -716,9 +714,8 @@ class VectorArithmetic ElementArray& coeffs, // sets coeffs int*& comps, // sets comps int first, - int last, - F4Vec& f4Vec) const { - std::visit([&](auto& arg) { arg->denseToSparse(dense,coeffs,comps,first,last,f4Vec); }, mConcreteVector); + int last) const { + std::visit([&](auto& arg) { arg->denseToSparse(dense,coeffs,comps,first,last); }, mConcreteVector); } template diff --git a/M2/Macaulay2/e/comp-gb.cpp b/M2/Macaulay2/e/comp-gb.cpp index 10665f46bee..a8fe293aa4f 100644 --- a/M2/Macaulay2/e/comp-gb.cpp +++ b/M2/Macaulay2/e/comp-gb.cpp @@ -20,12 +20,14 @@ GBComputation *createF4GB(const Matrix *m, M2_arrayint gb_weights, int strategy, M2_bool use_max_degree, - int max_degree); + int max_degree, + int numThreads); // Found in "gb-f4/GBF4Interface.hpp" GBComputation *createGBF4Interface(const Matrix *m, const std::vector& weights, - int strategy + int strategy, + int numThreads ); GBComputation::~GBComputation() {} @@ -42,6 +44,7 @@ GBComputation *GBComputation::choose_gb(const Matrix *m, int max_degree, int algorithm, int strategy, + int numThreads, int max_reduction_count) { const Ring *R1 = m->get_ring(); @@ -105,7 +108,8 @@ GBComputation *GBComputation::choose_gb(const Matrix *m, gb_weights, strategy, use_max_degree, - max_degree); + max_degree, + numThreads); break; case 7: result = binomialGB_comp::create(m, @@ -120,10 +124,12 @@ GBComputation *GBComputation::choose_gb(const Matrix *m, ERROR("Algorithm => Test has been removed from M2"); return nullptr; case 9: + // new GBF4 algorithm weights = M2_arrayint_to_stdvector(gb_weights); result = createGBF4Interface(m, weights, - strategy); + strategy, + numThreads); break; default: result = gbA::create(m, diff --git a/M2/Macaulay2/e/comp-gb.hpp b/M2/Macaulay2/e/comp-gb.hpp index 43f429ee32e..f5f39727fc9 100644 --- a/M2/Macaulay2/e/comp-gb.hpp +++ b/M2/Macaulay2/e/comp-gb.hpp @@ -49,6 +49,7 @@ class GBComputation : public Computation int max_degree, int algorithm, int strategy, + int numThreads, int max_reduction_count = 10); // Values for algorithm and strategy are documented in engine.h // Returns NULL if an error occurs diff --git a/M2/Macaulay2/e/f4/f4-computation.cpp b/M2/Macaulay2/e/f4/f4-computation.cpp index ec99c8644e2..f9051cab1b9 100644 --- a/M2/Macaulay2/e/f4/f4-computation.cpp +++ b/M2/Macaulay2/e/f4/f4-computation.cpp @@ -5,7 +5,6 @@ #include "buffer.hpp" // for buffer #include "error.h" // for ERROR #include "f4/f4-m2-interface.hpp" // for F4toM2Interface -#include "f4/f4-mem.hpp" // for F4Mem #include "f4/f4-types.hpp" // for gb_array, gbelem #include "f4/f4.hpp" // for F4GB #include "f4/moninfo.hpp" // for MonomialInfo @@ -28,48 +27,44 @@ GBComputation *createF4GB(const Matrix *m, M2_arrayint gb_weights, int strategy, M2_bool use_max_degree, - int max_degree) + int max_degree, + int numThreads) { const PolynomialRing *R = m->get_ring()->cast_to_PolynomialRing(); const Ring *K = R->getCoefficients(); - F4Mem *Mem = new F4Mem; auto vectorArithmetic = new VectorArithmetic(K); // TODO: code here used to detect whether R, K is a valid ring here GBComputation *G; G = new F4Computation(vectorArithmetic, - Mem, m, collect_syz, n_rows_to_keep, gb_weights, strategy, use_max_degree, - max_degree); + max_degree, + numThreads); return G; } F4Computation::F4Computation(const VectorArithmetic* VA, - F4Mem *Mem0, const Matrix *m, M2_bool collect_syz, int n_rows_to_keep, M2_arrayint gb_weights, int strategy, M2_bool use_max_degree, - int max_degree) + int max_degree, + int numThreads) : mFreeModule(m->rows()), - mVectorArithmetic(VA), - mMemoryBlock(Mem0) + mVectorArithmetic(VA) { - // Note: the F4Mem which K0 uses should be mMemoryBlock. ??TODO: no longer valid, still containing useful info? mOriginalRing = m->get_ring()->cast_to_PolynomialRing(); mMonoid = new MonomialInfo(mOriginalRing->n_vars(), mOriginalRing->getMonoid()->getMonomialOrdering()); - mF4GB = new F4GB(mVectorArithmetic, - Mem0, mMonoid, m->rows(), collect_syz, @@ -77,13 +72,14 @@ F4Computation::F4Computation(const VectorArithmetic* VA, gb_weights, strategy, use_max_degree, - max_degree); + max_degree, + numThreads); F4toM2Interface::from_M2_matrix(mVectorArithmetic, mMonoid, m, gb_weights, mF4GB->get_generators()); mF4GB->new_generators(0, m->n_cols() - 1); } -F4Computation::~F4Computation() { delete mMemoryBlock; } +F4Computation::~F4Computation() = default; /************************* ** Top level interface ** *************************/ @@ -169,7 +165,6 @@ void F4Computation::show() const // debug display stash::stats(o); emit(o.str()); - mMemoryBlock->show(); // f4->show(); } diff --git a/M2/Macaulay2/e/f4/f4-computation.hpp b/M2/Macaulay2/e/f4/f4-computation.hpp index ec3eb4b1cb9..d9e2b5a569c 100644 --- a/M2/Macaulay2/e/f4/f4-computation.hpp +++ b/M2/Macaulay2/e/f4/f4-computation.hpp @@ -9,7 +9,6 @@ #include "interface/computation.h" // for ComputationStatusCode #include "polyring.hpp" // for PolynomialRing class Computation; -class F4Mem; class FreeModule; class Matrix; class MonomialInfo; @@ -29,18 +28,17 @@ class F4Computation : public GBComputation // Also determines degrees of elements in F. const VectorArithmetic* mVectorArithmetic; MonomialInfo *mMonoid; - F4Mem *mMemoryBlock; F4GB *mF4GB; public: F4Computation(const VectorArithmetic* VA, - F4Mem *Mem, const Matrix *m, M2_bool collect_syz, int n_rows_to_keep, M2_arrayint gb_weights, int strategy, M2_bool use_max_degree, - int max_degree); + int max_degree, + int numThreads); ~F4Computation() override; @@ -90,7 +88,8 @@ GBComputation *createF4GB(const Matrix *m, M2_arrayint gb_weights, int strategy, M2_bool use_max_degree, - int max_degree); + int max_degree, + int numThreads); #endif diff --git a/M2/Macaulay2/e/f4/f4-mem.cpp b/M2/Macaulay2/e/f4/f4-mem.cpp deleted file mode 100644 index 05b03cb02ab..00000000000 --- a/M2/Macaulay2/e/f4/f4-mem.cpp +++ /dev/null @@ -1,61 +0,0 @@ -#include "f4/f4-mem.hpp" -#include - -const unsigned int doublesize[DOUBLESIZE] = { - 12, 24, 48, 96, 192, 384, - 768, 1536, 3072, 6144, 12288, 24576, - 49152, 98304, 196608, 393216, 786432, 1572864, - 3145728, 6291456, 12582912, 25165824, 50331648, 100663296, - 201326592, 402653184, 805306368, 1610612736, 3221225472U}; - -void F4Vec::show() -{ - fprintf(stderr, "memory usage for %s. nreallocs = %d\n", name, nreallocs); - fprintf(stderr, - "index\t\tsize\t\tnalloc\t\tndealloc\t\thighwater\tcurrent\n"); - for (int i = 0; i < DOUBLESIZE; i++) - if (nallocs[i] > 0) - { - fprintf(stderr, - "%4u\t%12u\t%12u\t%12u\t\t%12u\t%12u\n", - i, - doublesize[i], - nallocs[i], - ndeallocs[i], - highwater[i], - current[i]); - } -} - -F4Mem::F4Mem() - : monom_alloc(0), - monom_total(0), - monom_freed(0), - monom_dealloc(0), - monom_highwater(0), - monom_current(0), - components("comps"), - coefficients("coeffs") -{ -} - -void F4Mem::show() -{ - fprintf(stderr, - "class\tnalloc\tndealloc\thighwater\ttotal\tfreed\tcurrent\n"); - fprintf(stderr, - "monom\t%ld\t%ld\t\t%ld\t\t%ld\t%ld\t%ld\n", - monom_alloc, - monom_dealloc, - monom_highwater, - monom_total, - monom_freed, - monom_current); - components.show(); - coefficients.show(); -} - -// Local Variables: -// compile-command: "make -C $M2BUILDDIR/Macaulay2/e " -// indent-tabs-mode: nil -// End: diff --git a/M2/Macaulay2/e/f4/f4-mem.hpp b/M2/Macaulay2/e/f4/f4-mem.hpp deleted file mode 100644 index 9101e415054..00000000000 --- a/M2/Macaulay2/e/f4/f4-mem.hpp +++ /dev/null @@ -1,158 +0,0 @@ -// Copyright 2007 Michael E. Stillman - -#ifndef __f4mem_h_ -#define __f4mem_h_ - -#include "newdelete.hpp" -#include "f4/moninfo.hpp" // only for monomial_word - -typedef int *f4vec; - -#define DOUBLESIZE 30 -extern const unsigned int doublesize[DOUBLESIZE]; - -#define SIZE_MASK 0x07ffffff -#define STASH_SHIFT (8 * sizeof(int) - 5) -#define STASH(a) (a >> STASH_SHIFT) - -class F4Vec -{ - // Format for the items pointed to: - // a[-1] = (top 5 bits: which stash this belongs to) - // (rest of the bits: current size of the array) - const char *name; - - int nreallocs; - int nallocs[DOUBLESIZE]; - int highwater[DOUBLESIZE]; - int current[DOUBLESIZE]; - int ndeallocs[DOUBLESIZE]; - - int find_alloc_size(int sz) - { - sz++; - int s = 0; - while (doublesize[s] < sz) s++; - return s; - } - - public: - void reset() - { - nreallocs = 0; - for (int i = 0; i < DOUBLESIZE; i++) nallocs[i] = 0; - for (int i = 0; i < DOUBLESIZE; i++) highwater[i] = 0; - for (int i = 0; i < DOUBLESIZE; i++) current[i] = 0; - for (int i = 0; i < DOUBLESIZE; i++) ndeallocs[i] = 0; - } - - F4Vec(const char *name0) : name(name0) { reset(); } - ~F4Vec() { name = nullptr; } - int size(f4vec a) { return (SIZE_MASK & (a[-1])); } - int alloc_stash(f4vec a) { return (a[-1]) >> STASH_SHIFT; } - f4vec allocate(int alloc_sz) - { - if (alloc_sz == 0) return nullptr; - int s = find_alloc_size(alloc_sz); - int nextlen = doublesize[s]; - nallocs[s]++; - current[s]++; - if (highwater[s] < current[s]) highwater[s] = current[s]; - int *result = newarray_atomic(int, nextlen); - result[0] = (s << STASH_SHIFT) | alloc_sz; - return (result + 1); - } - - void deallocate(f4vec &a) - { - if (a == nullptr) return; - int s = alloc_stash(a); - ndeallocs[s]++; - current[s]--; - freemem(a - 1); - a = nullptr; - } - - void clear(f4vec &a) - { - // Don't change the amount allocated, but set the size back to 0. - // (and zero out the array (or place garbage in there...) - int sz = size(a); - for (int i = sz; i > 0; i--) a[i] = 0x03030303; - a[-1] -= sz; - } - - void reallocate(f4vec &a, int newsize) - { - nreallocs++; - f4vec newa = allocate(newsize); - int oldsize = size(a); - int *oldp = a; - int *p = newa; - for (int i = oldsize; i > 0; i--) *p++ = *oldp++; - newa[-1] += (newsize - oldsize); - deallocate(a); - a = p; - } - - void resize(f4vec &a, int newsize) - { - // grow 'a' to have size 'newsize'. - // If the size becomes too large for a to handle - // then reallocation is done. - int s = alloc_stash(a); - if (doublesize[s] < newsize) - reallocate(a, newsize); - else - a[-1] += (newsize - size(a)); - } - - void grow(f4vec &a, int ntoadd) { resize(a, size(a) + ntoadd); } - void show(); -}; - -class F4Mem : public our_new_delete -{ - long monom_alloc; - long monom_total; // number of ints total asked for - long monom_freed; // number of ints deallocated - long monom_dealloc; - long monom_highwater; - long monom_current; - - public: - F4Vec components; - F4Vec coefficients; - - F4Mem(); - - monomial_word *allocate_monomial_array(int len) - { - if (len == 0) return nullptr; - monom_alloc++; - monom_total += len; - monom_current += len; - if (monom_highwater < monom_current) monom_highwater = monom_current; - monomial_word *result = newarray_atomic(monomial_word, len); - return result; - } - - void deallocate_monomial_array(monomial_word *&a, int len) - { - if (len == 0) return; - monom_dealloc++; - monom_freed += len; - monom_current -= len; - freemem(a); - a = nullptr; - } - - void show(); -}; - -#endif - -// Local Variables: -// compile-command: "make -C $M2BUILDDIR/Macaulay2/e " -// indent-tabs-mode: nil -// End: diff --git a/M2/Macaulay2/e/f4/f4-monlookup.cpp b/M2/Macaulay2/e/f4/f4-monlookup.cpp index 6d4a4cab824..c0d1874d6af 100644 --- a/M2/Macaulay2/e/f4/f4-monlookup.cpp +++ b/M2/Macaulay2/e/f4/f4-monlookup.cpp @@ -4,12 +4,9 @@ #include "buffer.hpp" // for buffer #include "engine-exports.h" // for newline #include "f4/varpower-monomial.hpp" // for varpower_word, const_varpower_mo... -#include "mem.hpp" // for stash -#include "style.hpp" // for INTSIZE #include "text-io.hpp" // for emit, emit_line #include // for assert -#include // for gc_allocator #include // for int32_t #include // for vector, vector<>::iterator #include @@ -20,7 +17,7 @@ F4MonomialLookupTableT::new_mi_node(varpower_word v, varpower_word e, mi_node *d) { - mi_node *p = reinterpret_cast(mi_stash->new_elem()); + mi_node *p = new mi_node; p->var = v; p->exp = e; p->left = nullptr; @@ -37,7 +34,7 @@ F4MonomialLookupTableT::new_mi_node(varpower_word v, varpower_word e, Key k) { - mi_node *p = reinterpret_cast(mi_stash->new_elem()); + mi_node *p = new mi_node; p->var = v; p->exp = e; p->left = nullptr; @@ -57,32 +54,23 @@ void F4MonomialLookupTableT::delete_mi_node(mi_node *p) { if (p->header != p) delete_mi_node(p->down()); } - mi_stash->delete_elem(p); + delete p; } template -F4MonomialLookupTableT::F4MonomialLookupTableT(int nvars, stash *mi_stash0) +F4MonomialLookupTableT::F4MonomialLookupTableT(int nvars) { count = 0; - mi_stash = mi_stash0; - if (mi_stash == nullptr) - { - count = 1; - mi_stash = new stash("mi_node", sizeof(mi_node)); - } - size_of_exp = nvars; - exp0 = newarray_atomic_clear(ntuple_word, size_of_exp); + exp0 = new ntuple_word[size_of_exp]; + std::fill(exp0, exp0 + size_of_exp, 0); } template F4MonomialLookupTableT::~F4MonomialLookupTableT() { - for (typename VECTOR(mi_node *)::const_iterator i = mis.begin(); - i != mis.end(); - i++) - delete_mi_node(*i); - if ((count % 2) == 1) delete mi_stash; + for (auto& i : mis) delete_mi_node(i); + delete [] exp0; } template @@ -90,7 +78,7 @@ void F4MonomialLookupTableT::insert1(mi_node *&top, const_varpower_monomial b, Key k) { - count += 2; + count++; mi_node **p = &top, *up = nullptr; bool one_element = true; @@ -204,8 +192,8 @@ bool F4MonomialLookupTableT::find_one_divisor1(mi_node *mi, template void F4MonomialLookupTableT::find_all_divisors1(mi_node *mi, const_ntuple_monomial exp, - VECTOR(Key) & - result_k) const + std::vector& result_k + ) const { mi_node *p = mi; @@ -244,13 +232,14 @@ void F4MonomialLookupTableT::update_expvector( if (size_of_exp <= nvars) { // Increase size of exponent vector - freemem(exp0); + delete [] exp0; if (nvars > 2 * size_of_exp) size_of_exp = nvars; else size_of_exp *= 2; - exp0 = newarray_atomic_clear(ntuple_word, size_of_exp); + exp0 = new ntuple_word[size_of_exp]; + for (auto i=0; i(*m++); @@ -291,7 +280,7 @@ template void F4MonomialLookupTableT::find_all_divisors_vp( long comp, const_varpower_monomial m, - VECTOR(Key) & result_k) const + std::vector & result_k) const { if (comp >= mis.size()) return; mi_node *mi = mis[comp]; @@ -322,7 +311,7 @@ template void F4MonomialLookupTableT::find_all_divisors_packed( const MonomialInfo *M, const_packed_monomial m, - VECTOR(Key) & result_k) const + std::vector & result_k) const { long comp = M->get_component(m); if (comp >= mis.size()) return; @@ -337,10 +326,11 @@ void F4MonomialLookupTableT::insert_minimal_vp(long comp, const_varpower_monomial m, Key k) { - if (comp >= mis.size()) - { - for (long j = comp - mis.size(); j >= 0; j--) mis.push_back(nullptr); - } + while (comp >= mis.size()) mis.push_back(nullptr); + // if (comp >= mis.size()) + // { + // for (long j = comp - mis.size(); j >= 0; j--) mis.push_back(nullptr); + // } insert1(mis[comp], m, k); } @@ -446,10 +436,8 @@ void F4MonomialLookupTableT::debug_out(int disp) const nnodes = 0; nleaves = 0; ndepth = 0; - for (typename VECTOR(mi_node *)::const_iterator i = mis.begin(); - i != mis.end(); - i++) - if (*i != nullptr) do_tree(*i, 0, 0, disp); + for (auto& i : mis) + if (i != nullptr) do_tree(i, 0, 0, disp); buffer o; o << "list nodes = " << nlists << newline; o << "internal nodes = " << nnodes << newline; @@ -506,26 +494,22 @@ template void F4MonomialLookupTableT::debug_check() const { int nfound = 0; - for (typename VECTOR(mi_node *)::const_iterator i = mis.begin(); - i != mis.end(); - i++) + for (auto& i : mis) { - if (*i != nullptr) nfound += debug_check(*i, nullptr); + if (i != nullptr) nfound += debug_check(i, nullptr); } - assert(count / 2 == nfound); + assert(count == nfound); } template void F4MonomialLookupTableT::text_out(buffer &o) const { o << "F4MonomialLookupTableT ("; - o << count / 2 << " entries)\n"; + o << count << " entries)\n"; int a = 0; - for (typename VECTOR(mi_node *)::const_iterator i = mis.begin(); - i != mis.end(); - i++) + for (auto& i : mis) { - for (mi_node *p = *i; p != nullptr; p = next(p)) + for (mi_node *p = i; p != nullptr; p = next(p)) { if ((++a) % 15 == 0) o << newline; o << p->key() << " "; @@ -533,9 +517,8 @@ void F4MonomialLookupTableT::text_out(buffer &o) const } } -void minimalize_varpower_monomials(const VECTOR(varpower_monomial) & elems, - VECTOR(int) & result_minimals, - stash *mi_stash) +void minimalize_varpower_monomials(const std::vector & elems, + std::vector & result_minimals) { std::vector> degs_and_indices; int count = 0; @@ -547,7 +530,7 @@ void minimalize_varpower_monomials(const VECTOR(varpower_monomial) & elems, } std::stable_sort(degs_and_indices.begin(), degs_and_indices.end()); - F4MonomialLookupTableT M(10, mi_stash); // 10 is simply a suggested value + F4MonomialLookupTableT M(10); // 10 is simply a suggested value for (auto p : degs_and_indices) { int k; // unused: we only care if there is a divisor, not which index it has diff --git a/M2/Macaulay2/e/f4/f4-monlookup.hpp b/M2/Macaulay2/e/f4/f4-monlookup.hpp index 00a665377be..549e1db68a6 100644 --- a/M2/Macaulay2/e/f4/f4-monlookup.hpp +++ b/M2/Macaulay2/e/f4/f4-monlookup.hpp @@ -6,15 +6,13 @@ #include "f4/moninfo.hpp" // for MonomialInfo (ptr only), const_p... #include "f4/ntuple-monomial.hpp" // for const_ntuple_monomial, ntuple_word #include "f4/varpower-monomial.hpp" // for const_varpower_monomial, varpowe... -#include "newdelete.hpp" // for VECTOR, our_new_delete class buffer; -class stash; template -class F4MonomialLookupTableT : public our_new_delete +class F4MonomialLookupTableT { - struct mi_node : public our_new_delete // monomial ideal internal node /// + struct mi_node // monomial ideal internal node /// { varpower_word var; varpower_word exp; @@ -40,8 +38,7 @@ class F4MonomialLookupTableT : public our_new_delete } }; - stash *mi_stash; - VECTOR(mi_node *) mis; + std::vector mis; int count; int size_of_exp; // in ints, size of exp0 @@ -61,12 +58,12 @@ class F4MonomialLookupTableT : public our_new_delete void find_all_divisors1(mi_node *mi, const_ntuple_monomial exp, - VECTOR(Key) & result_k) const; + std::vector& result_k) const; void insert1(mi_node *&p, const_varpower_monomial m, Key k); public: - F4MonomialLookupTableT(int nvars, stash *mi_stash = nullptr); + F4MonomialLookupTableT(int nvars); ~F4MonomialLookupTableT(); // // Should we write these two routines? @@ -98,11 +95,11 @@ class F4MonomialLookupTableT : public our_new_delete void find_all_divisors_vp(long comp, const_varpower_monomial m, - VECTOR(Key) & result_k) const; + std::vector & result_k) const; void find_all_divisors_packed(const MonomialInfo *M, const_packed_monomial m, - VECTOR(Key) & result_k) const; + std::vector & result_k) const; // Search. Return a vector of all keys corresponding to // monomials which divide m. @@ -121,9 +118,8 @@ class F4MonomialLookupTableT : public our_new_delete void debug_check() const; }; -void minimalize_varpower_monomials(const VECTOR(varpower_monomial) & elems, - VECTOR(int) & result_minimals, - stash *mi_stash = nullptr); +void minimalize_varpower_monomials(const std::vector & elems, + std::vector & result_minimals); #endif diff --git a/M2/Macaulay2/e/f4/f4-spairs.cpp b/M2/Macaulay2/e/f4/f4-spairs.cpp index 64dfa6cce4d..7ee53bfefe4 100644 --- a/M2/Macaulay2/e/f4/f4-spairs.cpp +++ b/M2/Macaulay2/e/f4/f4-spairs.cpp @@ -11,15 +11,25 @@ #include // for stable_sort #include // for vector, vector<>::iterator +#if defined(WITH_TBB) +F4SPairSet::F4SPairSet(const MonomialInfo *M0, const gb_array &gb0, mtbb::task_arena& scheduler) +#else F4SPairSet::F4SPairSet(const MonomialInfo *M0, const gb_array &gb0) - : M(M0), gb(gb0), heap(nullptr), this_set(nullptr), nsaved_unneeded(0) +#endif + : M(M0), + gb(gb0), + mSPairCompare(mSPairs), + mSPairQueue(mSPairCompare), + nsaved_unneeded(0), + mMinimizePairsSeconds(0) +#if defined(WITH_TBB) + , mScheduler(scheduler) +#endif { max_varpower_size = 2 * M->n_vars() + 1; - spair *used_to_determine_size = nullptr; - size_t spair_size = - sizeofspair(used_to_determine_size, M->max_monomial_size()); - spair_stash = new stash("F4 spairs", spair_size); + //spair *used_to_determine_size = nullptr; + //mSPairSizeInBytes = sizeofspair(used_to_determine_size, M->max_monomial_size()); } F4SPairSet::~F4SPairSet() @@ -27,20 +37,6 @@ F4SPairSet::~F4SPairSet() // Deleting the stash deletes all memory used here // PS, VP are deleted automatically. M = nullptr; - heap = nullptr; - this_set = nullptr; - delete spair_stash; -} - -spair *F4SPairSet::make_spair(spair_type type, int deg, int i, int j) -{ - spair *result = reinterpret_cast(spair_stash->new_elem()); - result->next = nullptr; - result->type = type; - result->deg = deg; - result->i = i; - result->j = j; - return result; } void F4SPairSet::insert_spair(pre_spair *p, int me) @@ -49,27 +45,43 @@ void F4SPairSet::insert_spair(pre_spair *p, int me) int deg = p->deg1 + gb[me]->deg; // int me_component = M->get_component(gb[me]->f.monoms); - spair *result = make_spair(F4_SPAIR_SPAIR, deg, me, j); - - M->from_varpower_monomial(p->quot, 0, result->lcm); - M->unchecked_mult(result->lcm, gb[me]->f.monoms, result->lcm); - - result->next = heap; - heap = result; + spair result {SPairType::SPair, deg, me, j, nullptr}; + + auto allocRange = mSPairLCMs.allocateArray(M->max_monomial_size()); + result.lcm = allocRange.first; + + M->from_varpower_monomial(p->quot, 0, result.lcm); + M->unchecked_mult(result.lcm, gb[me]->f.monoms, result.lcm); + + mSPairLCMs.shrinkLastAllocate(allocRange.first, + allocRange.second, + allocRange.first + M->monomial_size(result.lcm)); + + auto sPairIndex = mSPairs.size(); + mSPairs.push_back(result); + mSPairQueue.push(sPairIndex); + } -void F4SPairSet::delete_spair(spair *p) { spair_stash->delete_elem(p); } +void F4SPairSet::delete_spair(spair *p) { delete p; } void F4SPairSet::insert_generator(int deg, packed_monomial lcm, int col) { - spair *p = make_spair(F4_SPAIR_GEN, deg, col, -1); - M->copy(lcm, p->lcm); - p->next = heap; - heap = p; + spair result {SPairType::Generator,deg,col,-1,nullptr}; + + auto allocRange = mSPairLCMs.allocateArray(M->monomial_size(lcm)); + result.lcm = allocRange.first; + + M->copy(lcm, result.lcm); + + auto sPairIndex = mSPairs.size(); + mSPairs.push_back(result); + mSPairQueue.push(sPairIndex); } bool F4SPairSet::pair_not_needed(spair *p, gbelem *m) { - if (p->type != F4_SPAIR_SPAIR && p->type != F4_SPAIR_RING) return false; + // if (p->type != SPairType::SPair && p->type != SPairType::Ring) return false; + if (p->type != SPairType::SPair) return false; if (M->get_component(p->lcm) != M->get_component(m->f.monoms)) return false; return M->unnecessary( m->f.monoms, gb[p->i]->f.monoms, gb[p->j]->f.monoms, p->lcm); @@ -81,98 +93,85 @@ int F4SPairSet::remove_unneeded_pairs() // and do so. Return the number removed. // MES: Check the ones in this_set? Probably not needed... - spair head; - spair *p = &head; + + if (gb.size() == 0) return 0; + gbelem *m = gb[gb.size() - 1]; - int nremoved = 0; - - head.next = heap; - while (p->next != nullptr) - if (pair_not_needed(p->next, m)) - { - nremoved++; - spair *tmp = p->next; - p->next = tmp->next; - tmp->next = nullptr; - delete_spair(tmp); - } - else - p = p->next; - heap = head.next; - return nremoved; + //long nremoved = 0; + +#if defined(WITH_TBB) + mScheduler.execute([&] { + mtbb::parallel_for(mtbb::blocked_range{0, INTSIZE(mSPairs)}, + [&](const mtbb::blocked_range& r) + { + for (auto i = r.begin(); i != r.end(); ++i) + { + if (pair_not_needed(&mSPairs[i],m)) + { + mSPairs[i].type = SPairType::Retired; + } + } + }); + }); +#else + for (auto& p : mSPairs) + { + if (pair_not_needed(&p,m)) + { + p.type = SPairType::Retired; + //++nremoved; + } + } +#endif + return 0; + //return nremoved; + } -int F4SPairSet::determine_next_degree(int &result_number) +std::pair F4SPairSet::setThisDegree() { - spair *p; - int nextdeg; - int len = 1; - if (heap == nullptr) + if (mSPairQueue.empty()) return {false, 0}; + + auto queueTop = mSPairQueue.top(); + while (mSPairs[queueTop].type == SPairType::Retired) { - result_number = 0; - return 0; + mSPairQueue.pop(); + if (mSPairQueue.empty()) return {false, 0}; + queueTop = mSPairQueue.top(); } - nextdeg = heap->deg; - for (p = heap->next; p != nullptr; p = p->next) - if (p->deg > nextdeg) - continue; - else if (p->deg < nextdeg) - { - len = 1; - nextdeg = p->deg; - } - else - len++; - result_number = len; - return nextdeg; -} -int F4SPairSet::prepare_next_degree(int max, int &result_number) -// Returns the (sugar) degree being done next, and collects all (or at -// most 'max', if max>0) spairs in this lowest degree. -// Returns the degree, sets result_number. -{ - this_set = nullptr; - int result_degree = determine_next_degree(result_number); - if (result_number == 0) return 0; - if (max > 0 && max < result_number) result_number = max; - int len = result_number; - spair head; - spair *p; - head.next = heap; - p = &head; - while (p->next != nullptr) - if (p->next->deg != result_degree) - p = p->next; - else - { - spair *tmp = p->next; - p->next = tmp->next; - tmp->next = this_set; - this_set = tmp; - len--; - if (len == 0) break; - } - heap = head.next; - return result_degree; + mThisDegree = mSPairs[queueTop].deg; + return {true,mThisDegree}; + } -spair *F4SPairSet::get_next_pair() +//spair *F4SPairSet::get_next_pair() +std::pair F4SPairSet::get_next_pair() // get the next pair in this degree (the one 'prepare_next_degree' set up') -// returns 0 if at the end { - spair *result; - if (!this_set) return nullptr; + if (mSPairQueue.empty()) return {false, {}}; + auto result = mSPairQueue.top(); + if (mSPairs[result].deg != mThisDegree) return {false, {} }; + mSPairQueue.pop(); + return {true,mSPairs[result]}; +} - result = this_set; - this_set = this_set->next; - result->next = nullptr; - return result; +void F4SPairSet::discardSPairsInCurrentDegree() +{ + while (not mSPairQueue.empty()) + { + auto result = mSPairQueue.top(); + if (mSPairs[result].deg != mThisDegree) return; + mSPairs[result].type = SPairType::Retired; + mSPairQueue.pop(); + } } int F4SPairSet::find_new_pairs(bool remove_disjoints) // returns the number of new pairs found { + // this is used for "late" removal of spairs -- will need to be reworked + // in the new priority_queue approach nsaved_unneeded += remove_unneeded_pairs(); int len = construct_pairs(remove_disjoints); return len; @@ -181,7 +180,7 @@ int F4SPairSet::find_new_pairs(bool remove_disjoints) void F4SPairSet::display_spair(spair *p) // A debugging routine which displays an spair { - if (p->type == F4_SPAIR_SPAIR) + if (p->type == SPairType::SPair) { fprintf(stderr, "[%d %d deg %d lcm ", p->i, p->j, p->deg); M->show(p->lcm); @@ -196,7 +195,8 @@ void F4SPairSet::display_spair(spair *p) void F4SPairSet::display() // A debugging routine which displays the spairs in the set { - fprintf(stderr, "spair set\n"); + /* + fprintf(stderr, "spair set\n"); for (spair *p = heap; p != nullptr; p = p->next) { fprintf(stderr, " "); @@ -208,6 +208,7 @@ void F4SPairSet::display() fprintf(stderr, " "); display_spair(p); } + */ } //////////////////////////////// @@ -222,7 +223,7 @@ pre_spair *F4SPairSet::create_pre_spair(int j) pre_spair *result = PS.allocate(); result->quot = VP.reserve(max_varpower_size); result->j = j; - result->type = F4_SPAIR_SPAIR; + result->type = SPairType::SPair;; M->quotient_as_vp(gb[j]->f.monoms, gb[gb.size() - 1]->f.monoms, result->quot, @@ -233,7 +234,7 @@ pre_spair *F4SPairSet::create_pre_spair(int j) return result; } -void insert_pre_spair(VECTOR(VECTOR(pre_spair *)) & bins, pre_spair *p) +void insert_pre_spair(std::vector> & bins, pre_spair *p) { int d = p->deg1; if (d >= bins.size()) bins.resize(d + 1); @@ -251,10 +252,10 @@ int F4SPairSet::construct_pairs(bool remove_disjoints) gbelem *me = gb[gb.size() - 1]; int me_component = static_cast(M->get_component(me->f.monoms)); - typedef VECTOR(pre_spair *) spairs; - - VECTOR(VECTOR(pre_spair *)) bins; + std::vector> bins; + mtbb::tick_count t0 {mtbb::tick_count::now()}; + // Loop through each element of gb, and create the pre_spair for (int i = 0; i < gb.size() - 1; i++) { @@ -264,6 +265,9 @@ int F4SPairSet::construct_pairs(bool remove_disjoints) insert_pre_spair(bins, p); } + mtbb::tick_count t1 {mtbb::tick_count::now()}; + mPrePairsSeconds += (t1-t0).seconds(); + //////////////////////////// // Now minimalize the set // //////////////////////////// @@ -279,9 +283,9 @@ int F4SPairSet::construct_pairs(bool remove_disjoints) std::stable_sort(bins[i].begin(), bins[i].end(), C); // Loop through each degree and potentially insert... - spairs::iterator first = bins[i].begin(); - spairs::iterator next = first; - spairs::iterator end = bins[i].end(); + auto first = bins[i].begin(); + auto next = first; + auto end = bins[i].end(); for (; first != end; first = next) { next = first + 1; @@ -310,10 +314,82 @@ int F4SPairSet::construct_pairs(bool remove_disjoints) } } delete montab; + mtbb::tick_count t2 {mtbb::tick_count::now()}; + mMinimizePairsSeconds += (t2-t1).seconds(); return n_new_pairs; } +#if 0 +// testing mathic and mathicgb routines... +class TestPairQueueConfiguration +{ +private: + // What should be here? + +public: + TestPairQueueConfiguration(const gb_array& gb, + XXX + ); + using PairData = MonomialInfo::OrderedMonomial; + void computePairData( + size_t col, + size_t row, + PairData & m, // allocated space? + ) const; + + using CompareResult = bool; + bool compare( + size_t colA, + size_t rowA, + MonomialInfo::ConstOrderedMonomial a, + size_t colB, + size_t rowB, + MonomialInfo::ConstOrderedMonomial b) const + { + // What to change this test code to? + const auto cmp = orderMonoid().compare(*a, *b); + if (cmp == GT) + return true; + if (cmp == LT) + return false; + + const bool aRetired = mBasis.retired(rowA) || mBasis.retired(colA); + const bool bRetired = mBasis.retired(rowB) || mBasis.retired(colB); + if (aRetired || bRetired) + return !bRetired; + + if (mPreferSparseSPairs) { + const auto termCountA = + mBasis.basisElement(colA).termCount() + + mBasis.basisElement(rowA).termCount(); + const auto termCountB = + mBasis.basisElement(colB).termCount() + + mBasis.basisElement(rowB).termCount(); + if (termCountA > termCountB) + return true; + if (termCountA < termCountB) + return false; + } + return colA + rowA > colB + rowB; + } + + bool cmpLessThan(bool v) const {return v;} +}; + +class TestSPairs +{ +private: + mathic::PairQueue mPairQueue; + +public: + TestSPairs(gb_poly& currentGroebnerBasis); + + ~TestSPairs() {} // anything here? + +}; +#endif + // Local Variables: // compile-command: "make -C $M2BUILDDIR/Macaulay2/e " // indent-tabs-mode: nil diff --git a/M2/Macaulay2/e/f4/f4-spairs.hpp b/M2/Macaulay2/e/f4/f4-spairs.hpp index 1855828ec9d..5104edc6e79 100644 --- a/M2/Macaulay2/e/f4/f4-spairs.hpp +++ b/M2/Macaulay2/e/f4/f4-spairs.hpp @@ -1,21 +1,18 @@ // Copyright 2004-2021 Michael E. Stillman -#ifndef __f4spairs_h_ -#define __f4spairs_h_ +#pragma once +#include // for std::priority_queue +#include "MemoryBlock.hpp" // for MemoryBlock #include "f4/f4-types.hpp" // for spair (ptr only), gb_array, pre_... #include "f4/memblock.hpp" // for F4MemoryBlock #include "f4/moninfo.hpp" // for MonomialInfo (ptr only), packed_... #include "f4/varpower-monomial.hpp" // for varpower_word -#include "newdelete.hpp" // for our_new_delete -class stash; -class F4SPairSet : public our_new_delete +class F4SPairSet { private: - int determine_next_degree(int &result_number); - - spair *make_spair(spair_type type, + spair *make_spair(SPairType type, int deg, int i, int j); // CAUTION: lcm is allocated correctly, but is @@ -31,8 +28,12 @@ class F4SPairSet : public our_new_delete int construct_pairs(bool remove_disjoints); public: - F4SPairSet(const MonomialInfo *MI0, const gb_array &gb0); - +#if defined(WITH_TBB) + F4SPairSet(const MonomialInfo *MI0, const gb_array &gb0,mtbb::task_arena& scheduler); +#else + F4SPairSet(const MonomialInfo *MI0, const gb_array &gb0); +#endif + ~F4SPairSet(); void insert_generator(int deg, packed_monomial lcm, int column); @@ -45,16 +46,20 @@ class F4SPairSet : public our_new_delete int find_new_pairs(bool remove_disjoints); // returns the number of new pairs found, using the last element on this list - int prepare_next_degree(int max, int &result_number); + std::pair setThisDegree(); + + // int prepare_next_degree(int max, int &result_number); // Returns the (sugar) degree being done next, and collects all (or at // most 'max', if max>0) spairs in this lowest degree. // Returns the degree, sets result_number. // These spairs are not sorted in any way. Should they be? - spair *get_next_pair(); + std::pair get_next_pair(); // get the next pair in this degree (the one 'prepare_next_degree' set up') - // returns 0 if at the end + // discard all of the spairs in the current set degree + void discardSPairsInCurrentDegree(); + void display_spair(spair *p); // A debugging routine which displays an spair @@ -65,22 +70,37 @@ class F4SPairSet : public our_new_delete // Returns how many pairs were created, then later removed due to // spair criteria. + size_t numberOfSPairs() const { return mSPairs.size(); } + + double secondsToMinimizePairs() const { return mMinimizePairsSeconds; } + double secondsToCreatePrePairs() const { return mPrePairsSeconds; } private: F4MemoryBlock PS; // passed to constructor routine F4MemoryBlock VP; // used for constructing new pairs + MemoryBlock mSPairLCMs; // used for spair lcms + int max_varpower_size; const MonomialInfo *M; const gb_array &gb; - spair *heap; // list of pairs - spair *this_set; - stash *spair_stash; + + std::vector mSPairs; + SPairCompare mSPairCompare; + std::priority_queue,SPairCompare> mSPairQueue; + + long mThisDegree; // stats long nsaved_unneeded; -}; + double mMinimizePairsSeconds; + double mPrePairsSeconds; +#if defined (WITH_TBB) + mtbb::task_arena& mScheduler; #endif + +}; + // Local Variables: // compile-command: "make -C $M2BUILDDIR/Macaulay2/e " diff --git a/M2/Macaulay2/e/f4/f4-types.hpp b/M2/Macaulay2/e/f4/f4-types.hpp index aa923ebbdea..aa6acb38326 100644 --- a/M2/Macaulay2/e/f4/f4-types.hpp +++ b/M2/Macaulay2/e/f4/f4-types.hpp @@ -4,6 +4,7 @@ #define _F4types_h_ +#include // for INT_MIN #include "VectorArithmetic.hpp" // for ElementArray #include "f4/f4-monlookup.hpp" // for F4MonomialLookupTableT #include "f4/moninfo.hpp" // for MonomialInfo, monomial_word, pac... @@ -24,41 +25,66 @@ enum gbelem_type { }; enum spair_type { - F4_SPAIR_SPAIR, - F4_SPAIR_GCD_ZZ, - F4_SPAIR_RING, - F4_SPAIR_SKEW, - F4_SPAIR_GEN, - F4_SPAIR_ELEM + F4_SPAIR_SPAIR, // arising from an honest spair + F4_SPAIR_GCD_ZZ, // for gbs over the integers + F4_SPAIR_RING, // an spair between a generator and a gen of the defining ideal + F4_SPAIR_SKEW, // from exterior variables times a monomial + F4_SPAIR_GEN, // a generator of the defining ideal + F4_SPAIR_ELEM // }; -struct GBF4Polynomial : public our_new_delete +enum class SPairType { + SPair, + Generator, + Retired + // later we would also like GCDZZ, Ring, Skew to handle those cases as well +}; + +struct GBF4Polynomial { int len; ElementArray coeffs; monomial_word *monoms; // This is all of the monomials written contiguously }; -struct pre_spair : public our_new_delete +struct pre_spair { - enum spair_type type; + //enum spair_type type; + SPairType type; int deg1; // simple degree of quot varpower_monomial quot; int j; bool are_disjoint; }; -struct spair : public our_new_delete +// old spair type (linked list-style container) +//struct spair +//{ +// spair *next; +// spair_type type; +// int deg; /* sugar degree of this spair */ +// int i; +// int j; +// monomial_word lcm[1]; // packed_monomial +//}; + +struct spair { - spair *next; - spair_type type; +public: + // spair *next; + SPairType type; int deg; /* sugar degree of this spair */ int i; int j; - monomial_word lcm[1]; // packed_monomial + // monomial_word lcm[1]; // packed_monomial + monomial_word* lcm; // pointer to a monomial space + + spair() : type(SPairType::Retired),deg(INT_MIN),i(-1),j(-1),lcm(nullptr) {} + spair(SPairType t, int deg0, int i0, int j0, monomial_word* lcm0) : + type(t), deg(deg0), i(i0), j(j0), lcm(lcm0) {} }; -struct gbelem : public our_new_delete +struct gbelem { GBF4Polynomial f; int deg; @@ -66,16 +92,17 @@ struct gbelem : public our_new_delete gbelem_type minlevel; }; -typedef VECTOR(gbelem *) gb_array; +// typedef VECTOR(gbelem *) gb_array; +typedef std::vector gb_array; -struct sparse_row : public our_new_delete +struct sparse_row { int len; ElementArray coeffs; int *comps; // of length len, allocated in a memory block. }; -struct row_elem : public our_new_delete +struct row_elem { // Header information packed_monomial monom; // pointer, allocated monomial in a memory block @@ -84,20 +111,22 @@ struct row_elem : public our_new_delete // The polynomial itself int len; ElementArray coeffs; - int *comps; // of length len, allocated in a memory block. + int *comps; // of length len, no longer allocated in a memory block, but with new[] }; -struct column_elem : public our_new_delete +struct column_elem { packed_monomial monom; // pointer, allocated monomial in a memory block int head; // which row is being used as a pivot for this column. // -1 means none, -2 means not set yet }; -struct coefficient_matrix : public our_new_delete +struct coefficient_matrix { - typedef VECTOR(row_elem) row_array; - typedef VECTOR(column_elem) column_array; + //typedef VECTOR(row_elem) row_array; + //typedef VECTOR(column_elem) column_array; + typedef std::vector row_array; + typedef std::vector column_array; row_array rows; column_array columns; @@ -187,6 +216,26 @@ class PreSPairSorter ~PreSPairSorter() {} }; +class SPairCompare +{ +public: + SPairCompare(const std::vector &spairs) : mSPairs(spairs) { } + +public: + bool operator()(size_t s, size_t t) const + { + const spair& a = mSPairs[s]; + const spair& b = mSPairs[t]; + if (a.deg > b.deg) return true; + if (a.deg < b.deg) return false; + if (a.i > b.i) return true; + return false; + } + +private: + const std::vector& mSPairs; +}; + typedef F4MonomialLookupTableT MonomialLookupTable; #endif diff --git a/M2/Macaulay2/e/f4/f4.cpp b/M2/Macaulay2/e/f4/f4.cpp index a29744fd9f7..113b556c6a4 100644 --- a/M2/Macaulay2/e/f4/f4.cpp +++ b/M2/Macaulay2/e/f4/f4.cpp @@ -7,7 +7,6 @@ #include "buffer.hpp" // for buffer #include "error.h" // for error #include "f4/f4-m2-interface.hpp" // for F4toM2Interface -#include "f4/f4-mem.hpp" // for F4Mem, F4Vec #include "f4/f4-spairs.hpp" // for F4SPairSet #include "f4/f4-types.hpp" // for row_elem, coefficient_matrix #include "f4/hilb-fcn.hpp" // for HilbertController @@ -26,11 +25,13 @@ #include // for fprintf, stderr #include // for stable_sort #include // for swap, vector +#include // for atomic ints in gauss_reduce +#include +#include class RingElement; F4GB::F4GB(const VectorArithmetic* VA, - F4Mem *Mem0, const MonomialInfo *M0, const FreeModule *F0, M2_bool collect_syz, @@ -38,10 +39,11 @@ F4GB::F4GB(const VectorArithmetic* VA, M2_arrayint weights0, int strategy, M2_bool use_max_degree, - int max_degree) + int max_degree, + int numThreads) : mVectorArithmetic(VA), - M(M0), - F(F0), + mMonomialInfo(M0), + mFreeModule(F0), weights(weights0), component_degrees(nullptr), // need to put this in n_pairs_computed(0), @@ -52,31 +54,42 @@ F4GB::F4GB(const VectorArithmetic* VA, this_degree(), is_ideal(F0->rank() == 1), hilbert(nullptr), - gens(), - gb(), - lookup(nullptr), - S(nullptr), + mGenerators(), + mGroebnerBasis(), + mLookupTable(mMonomialInfo->n_vars()), next_col_to_process(0), mat(nullptr), - H(M0, 17), - B(), + mMonomialHashTable(M0, 17), + mMonomialMemoryBlock(), next_monom(), - Mem(Mem0), clock_sort_columns(0), clock_gauss(0), - clock_make_matrix(0) + mGaussTime(0), + mParallelGaussTime(0), + mSerialGaussTime(0), + mTailReduceTime(0), + mNewSPairTime(0), + mInsertGBTime(0), + clock_make_matrix(0), + mNumThreads(mtbb::numThreads(numThreads)), +#if defined(WITH_TBB) + mScheduler(mNumThreads), + mSPairSet(mMonomialInfo, mGroebnerBasis, mScheduler) +#else + mSPairSet(mMonomialInfo, mGroebnerBasis) +#endif { - lookup = new MonomialLookupTable(M->n_vars()); - S = new F4SPairSet(M, gb); + // mLookupTable = new MonomialLookupTable(mMonomialInfo->n_vars()); + // mSPairSet = new F4SPairSet(mMonomialInfo, mGroebnerBasis); mat = new coefficient_matrix; // TODO: set status? - if (M2_gbTrace >= 2) M->show(); + if (M2_gbTrace >= 3) mMonomialInfo->show(); } void F4GB::set_hilbert_function(const RingElement *hf) { - hilbert = new HilbertController(F, hf); // TODO MES: GC problem? + hilbert = new HilbertController(mFreeModule, hf); // TODO MES: GC problem? } void F4GB::delete_gb_array(gb_array &g) @@ -92,22 +105,22 @@ void F4GB::delete_gb_array(gb_array &g) F4GB::~F4GB() { - delete S; - delete lookup; + // delete mSPairSet; + // delete mLookupTable; delete mat; // Now delete the gens, gb arrays. - delete_gb_array(gens); - delete_gb_array(gb); + delete_gb_array(mGenerators); + delete_gb_array(mGroebnerBasis); } void F4GB::new_generators(int lo, int hi) { for (int i = lo; i <= hi; i++) { - gbelem *g = gens[i]; + gbelem *g = mGenerators[i]; if (g->f.len == 0) continue; - S->insert_generator(g->deg, g->f.monoms, i); + mSPairSet.insert_generator(g->deg, g->f.monoms, i); } } @@ -129,11 +142,11 @@ int F4GB::new_column(packed_monomial m) int F4GB::find_or_append_column(packed_monomial m) { packed_monomial new_m; - if (H.find_or_insert(m, new_m)) return static_cast(new_m[-1]); + if (mMonomialHashTable.find_or_insert(m, new_m)) return static_cast(new_m[-1]); // At this point, m is a new monomial to be placed as a column m = next_monom; - B.intern(1 + M->monomial_size(m)); - next_monom = B.reserve(1 + M->max_monomial_size()); + mMonomialMemoryBlock.intern(1 + mMonomialInfo->monomial_size(m)); + next_monom = mMonomialMemoryBlock.reserve(1 + mMonomialInfo->max_monomial_size()); next_monom++; return new_column(m); } @@ -146,34 +159,36 @@ int F4GB::mult_monomials(packed_monomial m, packed_monomial n) // If it is there, return its column // If not, increment our memory block, and insert a new column. packed_monomial new_m; - M->unchecked_mult(m, n, next_monom); - if (H.find_or_insert(next_monom, new_m)) + mMonomialInfo->unchecked_mult(m, n, next_monom); + if (mMonomialHashTable.find_or_insert(next_monom, new_m)) return static_cast( new_m[-1]); // monom exists, don't save monomial space m = next_monom; - B.intern(1 + M->monomial_size(m)); - next_monom = B.reserve(1 + M->max_monomial_size()); + mMonomialMemoryBlock.intern(1 + mMonomialInfo->monomial_size(m)); + next_monom = mMonomialMemoryBlock.reserve(1 + mMonomialInfo->max_monomial_size()); next_monom++; return new_column(m); } void F4GB::load_gen(int which) { - GBF4Polynomial &g = gens[which]->f; + GBF4Polynomial &g = mGenerators[which]->f; row_elem r{}; r.monom = nullptr; // This says that this element corresponds to a generator r.elem = which; r.len = g.len; - r.comps = Mem->components.allocate(g.len); + // r.comps = Mem->components.allocate(g.len); + r.comps = new int[g.len]; // r.coeffs is already initialized to [nullptr]. + // TODO: this iterator requires knowledge about memory layout of monomials monomial_word *w = g.monoms; for (int i = 0; i < g.len; i++) { - M->copy(w, next_monom); + mMonomialInfo->copy(w, next_monom); r.comps[i] = find_or_append_column(next_monom); - w += M->monomial_size(w); + w += mMonomialInfo->monomial_size(w); } mat->rows.push_back(r); @@ -181,20 +196,22 @@ void F4GB::load_gen(int which) void F4GB::load_row(packed_monomial monom, int which) { - GBF4Polynomial &g = gb[which]->f; + GBF4Polynomial &g = mGroebnerBasis[which]->f; row_elem r{}; r.monom = monom; r.elem = which; r.len = g.len; - r.comps = Mem->components.allocate(g.len); + // r.comps = Mem->components.allocate(g.len); + r.comps = new int[g.len]; // r.coeffs is already initialized to [nullptr]. - + + // TODO: this iterator requires knowledge about memory layout of monomials monomial_word *w = g.monoms; for (int i = 0; i < g.len; i++) { r.comps[i] = mult_monomials(monom, w); - w += M->monomial_size(w); + w += mMonomialInfo->monomial_size(w); } mat->rows.push_back(r); @@ -211,13 +228,13 @@ void F4GB::process_column(int c) column_elem &ce = mat->columns[c]; if (ce.head >= -1) return; int32_t which; - bool found = lookup->find_one_divisor_packed(M, ce.monom, which); + bool found = mLookupTable.find_one_divisor_packed(mMonomialInfo, ce.monom, which); if (found) { packed_monomial n = next_monom; - M->unchecked_divide(ce.monom, gb[which]->f.monoms, n); - B.intern(1 + M->monomial_size(n)); - next_monom = B.reserve(1 + M->max_monomial_size()); + mMonomialInfo->unchecked_divide(ce.monom, mGroebnerBasis[which]->f.monoms, n); + mMonomialMemoryBlock.intern(1 + mMonomialInfo->monomial_size(n)); + next_monom = mMonomialMemoryBlock.reserve(1 + mMonomialInfo->max_monomial_size()); next_monom++; // M->set_component(which, n); ce.head = INTSIZE(mat->rows); @@ -227,21 +244,35 @@ void F4GB::process_column(int c) ce.head = -1; } +void F4GB::loadSPairRow(spair *p) +{ + packed_monomial n = next_monom; + mMonomialInfo->unchecked_divide(p->lcm, mGroebnerBasis[p->i]->f.monoms, n); + mMonomialMemoryBlock.intern(1 + mMonomialInfo->monomial_size(n)); + next_monom = mMonomialMemoryBlock.reserve(1 + mMonomialInfo->max_monomial_size()); + next_monom++; + load_row(n, p->i); +} + +void F4GB::loadReducerRow(spair *p) +{ + packed_monomial n = next_monom; + mMonomialInfo->unchecked_divide(p->lcm, mGroebnerBasis[p->j]->f.monoms, n); + mMonomialMemoryBlock.intern(1 + mMonomialInfo->monomial_size(n)); + next_monom = mMonomialMemoryBlock.reserve(1 + mMonomialInfo->max_monomial_size()); + next_monom++; + load_row(n, p->j); +} + void F4GB::process_s_pair(spair *p) { int c; switch (p->type) { - case F4_SPAIR_SPAIR: + case SPairType::SPair: { - packed_monomial n = next_monom; - M->unchecked_divide(p->lcm, gb[p->i]->f.monoms, n); - B.intern(1 + M->monomial_size(n)); - next_monom = B.reserve(1 + M->max_monomial_size()); - next_monom++; - - load_row(n, p->i); + loadSPairRow(p); c = mat->rows[mat->rows.size() - 1].comps[0]; if (mat->columns[c].head >= -1) @@ -249,21 +280,15 @@ void F4GB::process_s_pair(spair *p) else { // In this situation, we load the other half as a reducer - n = next_monom; - M->unchecked_divide(p->lcm, gb[p->j]->f.monoms, n); - B.intern(1 + M->monomial_size(n)); - next_monom = B.reserve(1 + M->max_monomial_size()); - next_monom++; - load_row(n, p->j); - + loadReducerRow(p); mat->columns[c].head = INTSIZE(mat->rows) - 1; } break; } - case F4_SPAIR_GEN: + case SPairType::Generator: load_gen(p->i); break; - default: + case SPairType::Retired: break; } } @@ -284,10 +309,12 @@ void F4GB::reorder_columns() // sort the columns - int *column_order = Mem->components.allocate(ncols); - int *ord = Mem->components.allocate(ncols); + //int *column_order = Mem->components.allocate(ncols); + //int *ord = Mem->components.allocate(ncols); + int *column_order = new int[ncols]; + int *ord = new int[ncols]; - ColumnsSorter C(M, mat); + ColumnsSorter C(mMonomialInfo, mat); // Actual sort of columns ///////////////// @@ -346,8 +373,10 @@ void F4GB::reorder_columns() } std::swap(mat->columns, newcols); - Mem->components.deallocate(column_order); - Mem->components.deallocate(ord); + delete [] column_order; + delete [] ord; + //Mem->components.deallocate(column_order); + //Mem->components.deallocate(ord); } void F4GB::reorder_rows() @@ -355,7 +384,7 @@ void F4GB::reorder_rows() int nrows = INTSIZE(mat->rows); int ncols = INTSIZE(mat->columns); coefficient_matrix::row_array newrows; - VECTOR(long) rowlocs; // 0..nrows-1 of where row as been placed + std::vector rowlocs; // 0..nrows-1 of where row as been placed newrows.reserve(nrows); rowlocs.reserve(nrows); @@ -383,7 +412,7 @@ void F4GB::reset_matrix() { // mat next_col_to_process = 0; - next_monom = B.reserve(1 + M->max_monomial_size()); + next_monom = mMonomialMemoryBlock.reserve(1 + mMonomialInfo->max_monomial_size()); next_monom++; } @@ -392,23 +421,22 @@ void F4GB::clear_matrix() // Clear the rows first for (auto & r : mat->rows) { + // std::cout << "at A" << std::endl; if (not r.coeffs.isNull()) mVectorArithmetic->deallocateElementArray(r.coeffs); - Mem->components.deallocate(r.comps); + // Mem->components.deallocate(r.comps); + // std::cout << "at B" << std::endl; + if (r.comps != nullptr) + delete [] r.comps; + // std::cout << "at C" << std::endl; r.len = 0; r.elem = -1; r.monom = nullptr; } mat->rows.clear(); mat->columns.clear(); - H.reset(); - B.reset(); - if (M2_gbTrace >= 4) - { - Mem->components.show(); - Mem->coefficients.show(); - Mem->show(); - } + mMonomialHashTable.reset(); + mMonomialMemoryBlock.reset(); } void F4GB::make_matrix() @@ -419,10 +447,13 @@ void F4GB::make_matrix() Is this the best order to do it in? Maybe not... */ - spair *p; - while ((p = S->get_next_pair())) + //spair *p; + std::pair p; + p = mSPairSet.get_next_pair(); + while (p.first) { - process_s_pair(p); + process_s_pair(&p.second); + p = mSPairSet.get_next_pair(); } while (next_col_to_process < mat->columns.size()) @@ -444,8 +475,83 @@ void F4GB::make_matrix() reorder_columns(); // reorder_rows(); // This is only here so we can see what we are doing...? + + // For debugging: let's compute the number of non-zero elements above the diagonal in A matrix. + // Also, let's compute the amount of space used for: + // column info + // row info + // sum of values in each row + // Maybe so A B C D matrices separately. + macaulayMatrixStats(); +} + +auto F4GB::macaulayMatrixStats() const -> MacaulayMatrixStats +{ + // Want: + // sizes of A, B, C, D. + // number of elements in each part. + // loop through the rows, and for each row, determine: in A or C? + // loop through elements in a row: for each, determine: in A/C or B/D + + MacaulayMatrixStats stats; + + // look at 'mat', determine number of rows, cols, etc. + long nrows = mat->rows.size(); + long ncols = mat->columns.size(); + for (long i=0; irows[i]; + for (long j=0; j < r.len; ++j) + { + auto c = r.comps[j]; + bool is_left = mat->columns[c].head >= 0; + if (is_pivot and is_left) ++stats.mAEntries; + if (is_pivot and not is_left) ++stats.mBEntries; + if (not is_pivot and is_left) ++stats.mCEntries; + if (not is_pivot and not is_left) ++stats.mDEntries; + } + } + + stats.mAEntries -= stats.mTopAndLeft; + stats.mBottom = nrows - stats.mTopAndLeft; + stats.mRight = ncols - stats.mTopAndLeft; + + stats.display(); + return stats; +} + +void F4GB::MacaulayMatrixStats::display(buffer& o) +{ + std::ostringstream s; + + s << "Quad matrix sizes" << std::endl; + s << "sizes of quad matrix" << std::endl; + s << std::setw(11) << " " << std::setw(11) << mTopAndLeft << std::setw(11) << mRight << std::endl; + s << std::setw(11) << mTopAndLeft << std::setw(11) << "A" << std::setw(11) << "B" << std::endl; + s << std::setw(11) << mBottom << std::setw(11) << "C" << std::setw(11) << "D" << std::endl; + s << std::endl; + + s << "Quad matrix entries (no diagona on A)" << std::endl; + s << std::setw(11) << " " << std::setw(11) << mTopAndLeft << std::setw(11) << mRight << std::endl; + s << std::setw(11) << mTopAndLeft << std::setw(11) << mAEntries << std::setw(11) << mBEntries << std::endl; + s << std::setw(11) << mBottom << std::setw(11) << mCEntries << std::setw(11) << mDEntries << std::endl; + s << std::endl; + + o << s.str().c_str(); } +void F4GB::MacaulayMatrixStats::display() +{ + buffer o; + display(o); + emit(o.str()); +} + + /////////////////////////////////////////////////// // Gaussian elimination /////////////////////////// /////////////////////////////////////////////////// @@ -457,8 +563,8 @@ const ElementArray& F4GB::get_coeffs_array(row_elem &r) // At this point, we must go find the coeff array if (r.monom == nullptr) // i.e. a generator - return gens[r.elem]->f.coeffs; - return gb[r.elem]->f.coeffs; + return mGenerators[r.elem]->f.coeffs; + return mGroebnerBasis[r.elem]->f.coeffs; } bool F4GB::is_new_GB_row(int row) const @@ -482,27 +588,138 @@ void F4GB::gauss_reduce(bool diagonalize) int ncols = INTSIZE(mat->columns); int n_newpivots = -1; // the number of new GB elements in this degree - int n_zero_reductions = 0; + std::atomic n_zero_reductions = 0; + + mtbb::tick_count t0; + mtbb::tick_count t1; + if (hilbert) { n_newpivots = hilbert->nRemainingExpected(); - if (n_newpivots == 0) return; + if (n_newpivots == 0) + { + std::cout << "Oh crap, our logic is wrong: should not get here if no new GB elements expected in this degree" << std::endl; + return; + } } +#if defined(WITH_TBB) +//#if 0 + + std::vector spair_rows; + for (int i = 0; i < nrows; ++i) + { + if (not is_pivot_row(i)) + spair_rows.push_back(i); + } + + std::mutex cout_guard; + t0 = mtbb::tick_count::now(); + using threadLocalDense_t = mtbb::enumerable_thread_specific; + // create a dense array for each thread + threadLocalDense_t threadLocalDense([&]() { + return mVectorArithmetic->allocateElementArray(ncols); + }); + std::cout << "mNumThreads: " << mNumThreads << std::endl; + mScheduler.execute([&] { + mtbb::parallel_for(mtbb::blocked_range{0, INTSIZE(spair_rows)}, + [&](const mtbb::blocked_range& r) + { + // long actual_reductions = 0; + // for (auto i = r.begin(); i != r.end(); ++i) + // { + // if (not is_pivot_row(i)) + // ++ actual_reductions; + // } + + // cout_guard.lock(); + // std::cout << "#reductions to do: " << actual_reductions << std::endl; + // cout_guard.unlock(); + + threadLocalDense_t::reference my_dense = threadLocalDense.local(); + for (auto i = r.begin(); i != r.end(); ++i) + { + // these lines are commented out to avoid the hilbert hint for now... + //if ((not hilbert) or (n_newpivots > 0)) + //{ + bool newNonzeroReduction = gauss_reduce_row(spair_rows[i], my_dense); + if (not newNonzeroReduction) ++n_zero_reductions; + // if (hilbert && newNonzeroReduction) + // --n_newpivots; + //} + } + }); + }); + for (auto tlDense : threadLocalDense) + mVectorArithmetic->deallocateElementArray(tlDense); + t1 = mtbb::tick_count::now(); + mParallelGaussTime += (t1-t0).seconds(); +#endif + + std::cout << "About to do serial loop, n_newpivots = " << n_newpivots << std::endl; + t0 = mtbb::tick_count::now(); ElementArray gauss_row { mVectorArithmetic->allocateElementArray(ncols) }; - for (int i = 0; i < nrows; i++) + for (auto i = 0; i < spair_rows.size(); i++) { - row_elem &r = mat->rows[i]; - if (r.len == 0) continue; // could happen once we include syzygies... + auto this_row = spair_rows[i]; + if ((not hilbert) or (n_newpivots > 0)) + { + bool newNonzeroReduction = gauss_reduce_row(this_row, gauss_row); + if (newNonzeroReduction) + { + row_elem &r = mat->rows[this_row]; + mVectorArithmetic->makeMonic(r.coeffs); + mat->columns[r.comps[0]].head = this_row; + if (hilbert) --n_newpivots; + } + else + ++n_zero_reductions; + } else if (hilbert) + { + // Inform code that we don't want a new GB element from this row. + // The row will be deleted with clear_matrix. + row_elem &r = mat->rows[this_row]; + r.len = 0; + } + } + mVectorArithmetic->deallocateElementArray(gauss_row); + t1 = mtbb::tick_count::now(); + mSerialGaussTime += (t1-t0).seconds(); + + if (M2_gbTrace >= 3) + fprintf(stderr, "-- #zeroreductions %ld\n", n_zero_reductions.load()); + + t0 = mtbb::tick_count::now(); + if (diagonalize) tail_reduce(); + t1 = mtbb::tick_count::now(); + mTailReduceTime += (t1-t0).seconds(); +} + +bool F4GB::is_pivot_row(int index) const +{ + row_elem &r = mat->rows[index]; + int pivotcol = r.comps[0]; + int pivotrow = mat->columns[pivotcol].head; + return (pivotrow == index); +} + +bool F4GB::gauss_reduce_row(int index, + ElementArray& gauss_row) +{ + // returns true if the row reduces to a "new" nonzero row + // returns false otherwise + row_elem &r = mat->rows[index]; + if (r.len == 0) return false; // could happen once we include syzygies... + if (is_pivot_row(index)) return false; + int pivotcol = r.comps[0]; int pivotrow = mat->columns[pivotcol].head; - if (pivotrow == i) continue; // this is a pivot row, so leave it alone const ElementArray& rcoeffs = get_coeffs_array(r); n_pairs_computed++; mVectorArithmetic->fillDenseArray(gauss_row, rcoeffs, Range(r.comps, r.comps + r.len)); - int firstnonzero = ncols; + int firstnonzero = -1; int first = r.comps[0]; int last = r.comps[r.len - 1]; do @@ -521,18 +738,23 @@ void F4GB::gauss_reduce(bool diagonalize) int last1 = pivot_rowelem.comps[pivot_rowelem.len - 1]; if (last1 > last) last = last1; } - else if (firstnonzero == ncols) + else if (firstnonzero == -1) firstnonzero = first; first = mVectorArithmetic->denseNextNonzero(gauss_row, first+1, last); } - while (first <= last); + while (first <= last); // end do + if (not r.coeffs.isNull()) mVectorArithmetic->deallocateElementArray(r.coeffs); - Mem->components.deallocate(r.comps); + //Mem->components.deallocate(r.comps); + delete [] r.comps; + r.comps = nullptr; r.len = 0; //Range monomRange; //mVectorArithmetic->denseToSparse(gauss_row, r.coeffs, monomRange, firstnonzero, last, mComponentSpace); - mVectorArithmetic->denseToSparse(gauss_row, r.coeffs, r.comps, firstnonzero, last, Mem->components); + //mVectorArithmetic->denseToSparse(gauss_row, r.coeffs, r.comps, firstnonzero, last, Mem->components); + mVectorArithmetic->denseToSparse(gauss_row, r.coeffs, r.comps, firstnonzero, last); + // TODO set r.comps from monomRange. Question: who has allocated this space?? // Maybe for now it is just the following line: r.len = mVectorArithmetic->size(r.coeffs); @@ -545,22 +767,9 @@ void F4GB::gauss_reduce(bool diagonalize) // the above line leaves gauss_row zero, and also handles the case when // r.len is 0 (TODO: check this!!) // it also potentially frees the old r.coeffs and r.comps (TODO: ?? r.comps??) - if (r.len > 0) - { - mVectorArithmetic->makeMonic(r.coeffs); - mat->columns[r.comps[0]].head = i; - if (--n_newpivots == 0) break; - } - else - n_zero_reductions++; - } - mVectorArithmetic->deallocateElementArray(gauss_row); - - if (M2_gbTrace >= 3) - fprintf(stderr, "-- #zeroreductions %d\n", n_zero_reductions); - - if (diagonalize) tail_reduce(); + return (r.len > 0); } + void F4GB::tail_reduce() { @@ -601,7 +810,8 @@ void F4GB::tail_reduce() }; if (anychange) { - Mem->components.deallocate(r.comps); + //Mem->components.deallocate(r.comps); + delete [] r.comps; mVectorArithmetic->deallocateElementArray(r.coeffs); r.len = 0; //Range monomRange; @@ -610,11 +820,15 @@ void F4GB::tail_reduce() // monomRange, // firstnonzero, last, // mComponentSpace); + // mVectorArithmetic->denseToSparse(gauss_row, + // r.coeffs, + // r.comps, + // firstnonzero, last, + // Mem->components); mVectorArithmetic->denseToSparse(gauss_row, r.coeffs, r.comps, - firstnonzero, last, - Mem->components); + firstnonzero, last); r.len = mVectorArithmetic->size(r.coeffs); //r.len = monomRange.size(); //r.comps = Mem->components.allocate(r.len); @@ -648,7 +862,7 @@ void F4GB::insert_gb_element(row_elem &r) // insert the monomial into the lookup table // find new pairs associated to this new element - int nslots = M->max_monomial_size(); + int nslots = mMonomialInfo->max_monomial_size(); int nlongs = r.len * nslots; gbelem *result = new gbelem; @@ -670,39 +884,50 @@ void F4GB::insert_gb_element(row_elem &r) { result->f.coeffs.swap(r.coeffs); } - result->f.monoms = Mem->allocate_monomial_array(nlongs); + + //result->f.monoms = Mem->allocate_monomial_array(nlongs); + result->f.monoms = new monomial_word[nlongs]; monomial_word *nextmonom = result->f.monoms; for (int i = 0; i < r.len; i++) { - M->copy(mat->columns[r.comps[i]].monom, nextmonom); + mMonomialInfo->copy(mat->columns[r.comps[i]].monom, nextmonom); nextmonom += nslots; } - Mem->components.deallocate(r.comps); + // Mem->components.deallocate(r.comps); + delete [] r.comps; + r.comps = nullptr; r.len = 0; result->deg = this_degree; - result->alpha = static_cast(M->last_exponent(result->f.monoms)); + result->alpha = static_cast(mMonomialInfo->last_exponent(result->f.monoms)); result->minlevel = ELEM_MIN_GB; // MES: How do // we distinguish between ELEM_MIN_GB, ELEM_POSSIBLE_MINGEN? - int which = INTSIZE(gb); - gb.push_back(result); + int which = INTSIZE(mGroebnerBasis); + mGroebnerBasis.push_back(result); if (hilbert) { int x; - int *exp = newarray_atomic(int, M->n_vars()); - M->to_intstar_vector(result->f.monoms, exp, x); + //int *exp = newarray_atomic(int, M->n_vars()); + int *exp = new int[mMonomialInfo->n_vars()]; + mMonomialInfo->to_intstar_vector(result->f.monoms, exp, x); hilbert->addMonomial(exp, x + 1); - freemem(exp); + delete [] exp; + //freemem(exp); } // now insert the lead monomial into the lookup table - varpower_monomial vp = newarray_atomic(varpower_word, 2 * M->n_vars() + 1); - M->to_varpower_monomial(result->f.monoms, vp); - lookup->insert_minimal_vp(M->get_component(result->f.monoms), vp, which); - freemem(vp); + //varpower_monomial vp = newarray_atomic(varpower_word, 2 * M->n_vars() + 1); + varpower_monomial vp = new varpower_word[2 * mMonomialInfo->n_vars() + 1]; + mMonomialInfo->to_varpower_monomial(result->f.monoms, vp); + mLookupTable.insert_minimal_vp(mMonomialInfo->get_component(result->f.monoms), vp, which); + delete [] vp; + //freemem(vp); // now go forth and find those new pairs - S->find_new_pairs(is_ideal); + mtbb::tick_count t0 = mtbb::tick_count::now(); + mSPairSet.find_new_pairs(is_ideal); + mtbb::tick_count t1 = mtbb::tick_count::now(); + mNewSPairTime += (t1-t0).seconds(); } void F4GB::new_GB_elements() @@ -721,6 +946,7 @@ void F4GB::new_GB_elements() then we don't need to loop through all of these */ + mtbb::tick_count t0 = mtbb::tick_count::now(); for (int r = 0; r < mat->rows.size(); r++) { if (is_new_GB_row(r)) @@ -728,6 +954,8 @@ void F4GB::new_GB_elements() insert_gb_element(mat->rows[r]); } } + mtbb::tick_count t1 = mtbb::tick_count::now(); + mInsertGBTime += (t1-t0).seconds(); } /////////////////////////////////////////////////// @@ -738,6 +966,7 @@ void F4GB::do_spairs() { if (hilbert && hilbert->nRemainingExpected() == 0) { + mSPairSet.discardSPairsInCurrentDegree(); if (M2_gbTrace >= 1) fprintf(stderr, "-- skipping degree...no elements expected in this degree\n"); @@ -749,7 +978,7 @@ void F4GB::do_spairs() n_lcmdups = 0; make_matrix(); - if (M2_gbTrace >= 5) + if (M2_gbTrace >= 6) { fprintf(stderr, "---------\n"); show_matrix(); @@ -762,7 +991,13 @@ void F4GB::do_spairs() nsecs /= CLOCKS_PER_SEC; if (M2_gbTrace >= 2) fprintf(stderr, " make matrix time = %f\n", nsecs); - if (M2_gbTrace >= 2) H.dump(); + if (M2_gbTrace >= 3) mMonomialHashTable.dump(); + + double oldParallelGauss = mParallelGaussTime; + double oldSerialGauss = mSerialGaussTime; + double oldTailReduce = mTailReduceTime; + double oldNewSPair = mNewSPairTime; + double oldInsertNewGB = mInsertGBTime; begin_time = clock(); gauss_reduce(true); @@ -778,9 +1013,12 @@ void F4GB::do_spairs() if (M2_gbTrace >= 2) { fprintf(stderr, " gauss time = %f\n", nsecs); + fprintf(stderr, " parallel gauss time = %g\n", mParallelGaussTime - oldParallelGauss); + fprintf(stderr, " serial gauss time = %g\n", mSerialGaussTime - oldSerialGauss); + fprintf(stderr, " tail reduce time = %g\n", mTailReduceTime - oldTailReduce); fprintf(stderr, " lcm dups = %ld\n", n_lcmdups); - if (M2_gbTrace >= 5) + if (M2_gbTrace >= 6) { fprintf(stderr, "---------\n"); show_matrix(); @@ -788,11 +1026,18 @@ void F4GB::do_spairs() } } new_GB_elements(); - int ngb = INTSIZE(gb); + fprintf(stderr, " finding new spair time = %g\n", mNewSPairTime - oldNewSPair); + fprintf(stderr, " number of spairs in queue = %zu\n", mSPairSet.numberOfSPairs()); + fprintf(stderr, " insert new gb time = %g\n", mInsertGBTime - oldInsertNewGB - (mNewSPairTime - oldNewSPair)); + + int ngb = INTSIZE(mGroebnerBasis); if (M2_gbTrace >= 1) { fprintf(stderr, " # GB elements = %d\n", ngb); - if (M2_gbTrace >= 5) show_gb_array(gb); + if (M2_gbTrace >= 5) { + show_gb_array(mGroebnerBasis); + mSPairSet.display(); + } } clear_matrix(); @@ -801,7 +1046,7 @@ void F4GB::do_spairs() enum ComputationStatusCode F4GB::computation_is_complete(StopConditions &stop_) { // This handles everything but stop_.always, stop_.degree_limit - if (stop_.basis_element_limit > 0 && gb.size() >= stop_.basis_element_limit) + if (stop_.basis_element_limit > 0 && mGroebnerBasis.size() >= stop_.basis_element_limit) return COMP_DONE_GB_LIMIT; if (stop_.pair_limit > 0 && n_pairs_computed >= stop_.pair_limit) return COMP_DONE_PAIR_LIMIT; @@ -826,13 +1071,13 @@ void F4GB::test_spair_code() // gb matrix, and then calling find_new_pairs after each one. // It displays the list of spairs after each generator. - gb.push_back(gens[0]); - for (int i = 1; i < gens.size(); i++) + mGroebnerBasis.push_back(mGenerators[0]); + for (int i = 1; i < mGenerators.size(); i++) { - gb.push_back(gens[i]); - S->find_new_pairs(false); + mGroebnerBasis.push_back(mGenerators[i]); + mSPairSet.find_new_pairs(false); fprintf(stderr, "---Just inserted element %d---\n", i); - S->display(); + mSPairSet.display(); } } @@ -841,7 +1086,7 @@ enum ComputationStatusCode F4GB::start_computation(StopConditions &stop_) clock_sort_columns = 0; clock_gauss = 0; clock_make_matrix = 0; - int npairs; + int npairs = 0; // test_spair_code(); @@ -858,13 +1103,17 @@ enum ComputationStatusCode F4GB::start_computation(StopConditions &stop_) is_done = computation_is_complete(stop_); if (is_done != COMP_COMPUTING) break; - this_degree = S->prepare_next_degree(-1, npairs); - - if (npairs == 0) + //this_degree = mSPairSet.prepare_next_degree(-1, npairs); + auto thisDegInfo = mSPairSet.setThisDegree(); + + if (not thisDegInfo.first) { is_done = COMP_DONE; break; } + + this_degree = thisDegInfo.second; + if (stop_.stop_after_degree && this_degree > stop_.degree_limit->array[0]) { is_done = COMP_DONE_DEGREE_LIMIT; @@ -914,18 +1163,39 @@ enum ComputationStatusCode F4GB::start_computation(StopConditions &stop_) fprintf(stderr, "total time for gauss: %f\n", ((double)clock_gauss) / CLOCKS_PER_SEC); + fprintf(stderr, + "parallel tbb time for gauss: %g\n", + mParallelGaussTime); + fprintf(stderr, + "serial tbb time for gauss: %g\n", + mSerialGaussTime); + fprintf(stderr, + "total time for finding new spairs: %g\n", + mNewSPairTime); + fprintf(stderr, + "total time for creating pre spairs: %g\n", + mSPairSet.secondsToCreatePrePairs()); + fprintf(stderr, + "total time for minimizing new spairs: %g\n", + mSPairSet.secondsToMinimizePairs()); + fprintf(stderr, + "total time for inserting new gb elements: %g\n", + mInsertGBTime); fprintf(stderr, "number of spairs computed : %ld\n", n_pairs_computed); fprintf(stderr, "number of reduction steps : %ld\n", n_reduction_steps); + if (M2_gbTrace >= 3) + { + fprintf(stderr, + "number of spairs removed by criterion = %ld\n", + mSPairSet.n_unneeded_pairs()); + mMonomialInfo->show(); + } } - fprintf(stderr, - "number of spairs removed by criterion = %ld\n", - S->n_unneeded_pairs()); - M->show(); return is_done; } @@ -942,10 +1212,10 @@ void F4GB::show_gb_array(const gb_array &g) const for (int i = 0; i < g.size(); i++) { vec v = F4toM2Interface::to_M2_vec( - mVectorArithmetic, M, g[i]->f, F); + mVectorArithmetic, mMonomialInfo, g[i]->f, mFreeModule); o << "element " << i << " degree " << g[i]->deg << " alpha " << g[i]->alpha << newline << " "; - F->get_ring()->vec_text_out(o, v); + mFreeModule->get_ring()->vec_text_out(o, v); o << newline; } emit(o.str()); @@ -960,7 +1230,7 @@ void F4GB::show_row_info() const if (mat->rows[i].monom == nullptr) fprintf(stderr, "generator"); else - M->show(mat->rows[i].monom); + mMonomialInfo->show(mat->rows[i].monom); fprintf(stderr, "\n"); } } @@ -971,7 +1241,7 @@ void F4GB::show_column_info() const for (int i = 0; i < mat->columns.size(); i++) { fprintf(stderr, "head %4d monomial ", mat->columns[i].head); - M->show(mat->columns[i].monom); + mMonomialInfo->show(mat->columns[i].monom); fprintf(stderr, "\n"); } } @@ -980,7 +1250,7 @@ void F4GB::show_matrix() { // Debugging routine MutableMatrix *q = F4toM2Interface::to_M2_MutableMatrix( - mVectorArithmetic, mat, gens, gb); + mVectorArithmetic, mat, mGenerators, mGroebnerBasis); buffer o; q->text_out(o); emit(o.str()); diff --git a/M2/Macaulay2/e/f4/f4.hpp b/M2/Macaulay2/e/f4/f4.hpp index 761bc28740e..e73cce057da 100644 --- a/M2/Macaulay2/e/f4/f4.hpp +++ b/M2/Macaulay2/e/f4/f4.hpp @@ -71,15 +71,15 @@ #include "engine-exports.h" // for M2_arrayint, M2_bool #include "f4-types.hpp" // for gb_array, MonomialLookupTable #include "f4/moninfo.hpp" // for packed_monomial, MonomialInfo +#include "f4/f4-spairs.hpp" // For F4SPairSet #include "interface/computation.h" // for ComputationStatusCode, StopCondit... +#include "m2tbb.hpp" // for TBB #include "memblock.hpp" // for F4MemoryBlock #include "monhashtable.hpp" // for MonomialHashTable #include "newdelete.hpp" // for our_new_delete #include "MemoryBlock.hpp" // for MemoryBlock #include // for clock, CLOCKS_PER_SEC, clock_t -class F4Mem; -class F4SPairSet; class FreeModule; class HilbertController; class RingElement; @@ -91,8 +91,8 @@ class F4GB : public our_new_delete { // Basic required information const VectorArithmetic* mVectorArithmetic; - const MonomialInfo *M; - const FreeModule *F; + const MonomialInfo *mMonomialInfo; + const FreeModule *mFreeModule; M2_arrayint weights; // The length of this is the number of variables, each // entry is positive. M2_arrayint component_degrees; // Degree of each free module element. @@ -114,27 +114,65 @@ class F4GB : public our_new_delete // Monomial order information. Should this be in M? // The main players in the computation - gb_array gens; - gb_array gb; - MonomialLookupTable *lookup; // (monom,comp) --> index into gb - F4SPairSet *S; + gb_array mGenerators; + gb_array mGroebnerBasis; + MonomialLookupTable mLookupTable; // (monom,comp) --> index into mGroebnerBasis // The matrix and its construction int next_col_to_process; coefficient_matrix *mat; - MonomialHashTable H; - F4MemoryBlock B; + MonomialHashTable mMonomialHashTable; + F4MemoryBlock mMonomialMemoryBlock; monomial_word *next_monom; // valid while creating the matrix - F4Mem *Mem; // Used to allocate and deallocate arrays used in the matrix - MemoryBlock mComponentSpace; // stop-gap for use with VectorArithmetic and Mem. + MemoryBlock mComponentSpace; // stop-gap for use with VectorArithmetic and Mem. (TODO: what does this comment mean?) // cumulative timing info double clock_sort_columns; clock_t clock_gauss; + double mGaussTime; + double mParallelGaussTime; + double mSerialGaussTime; + double mTailReduceTime; + double mNewSPairTime; + double mInsertGBTime; clock_t clock_make_matrix; - private: + struct MacaulayMatrixStats + { + public: + // #rows/cols + long mTopAndLeft = 0; + long mBottom = 0; + long mRight = 0; + + // #entries + long mAEntries = 0; // but not the diagonals? + long mBEntries = 0; + long mCEntries = 0; + long mDEntries = 0; + + // memory usage: column info, row info, all the entries, hash table? + long mColumnInfo = 0; + long mRowInfo = 0; + long mEntries = 0; + + void display(buffer& o); + void display(); + }; + + MacaulayMatrixStats macaulayMatrixStats() const; // For debugging info + +#if defined (WITH_TBB) + int mNumThreads; + mtbb::task_arena mScheduler; + + mtbb::task_arena& getScheduler() { return mScheduler; } +#endif + + F4SPairSet mSPairSet; + +private: //////////////////////////////////////////////////////////////////// void delete_gb_array(gb_array &g); @@ -154,7 +192,11 @@ class F4GB : public our_new_delete void load_gen(int which); void load_row(packed_monomial monom, int which); void process_column(int c); + + void loadSPairRow(spair *p); + void loadReducerRow(spair *p); void process_s_pair(spair *p); + void reorder_columns(); void reorder_rows(); @@ -168,7 +210,10 @@ class F4GB : public our_new_delete // place // and also to determine if an element (row) needs to be tail reduced + bool is_pivot_row(int index) const; + void gauss_reduce(bool diagonalize); + bool gauss_reduce_row(int index, ElementArray& gauss_row); void tail_reduce(); void row_to_dense_row(int r, int &first, int &last); @@ -182,7 +227,6 @@ class F4GB : public our_new_delete public: F4GB(const VectorArithmetic* VA, - F4Mem *Mem0, const MonomialInfo *MI, const FreeModule *F, // used for debugging only... M2_bool collect_syz, @@ -190,7 +234,8 @@ class F4GB : public our_new_delete M2_arrayint gb_weights, int strategy, M2_bool use_max_degree, - int max_degree); + int max_degree, + int numThreads); ~F4GB(); @@ -199,10 +244,10 @@ class F4GB : public our_new_delete void new_generators(int lo, int hi); - const gb_array &get_generators() const { return gens; } - gb_array &get_generators() { return gens; } - const gb_array &get_gb() const { return gb; } - gb_array &get_gb() { return gb; } + const gb_array &get_generators() const { return mGenerators; } + gb_array &get_generators() { return mGenerators; } + const gb_array &get_gb() const { return mGroebnerBasis; } + gb_array &get_gb() { return mGroebnerBasis; } void set_hilbert_function(const RingElement *hf); enum ComputationStatusCode start_computation(StopConditions &stop_); diff --git a/M2/Macaulay2/e/f4/hilb-fcn.hpp b/M2/Macaulay2/e/f4/hilb-fcn.hpp index 183c29522c5..b045ea1bee5 100644 --- a/M2/Macaulay2/e/f4/hilb-fcn.hpp +++ b/M2/Macaulay2/e/f4/hilb-fcn.hpp @@ -12,7 +12,7 @@ class MatrixConstructor; class PolynomialRing; class RingElement; -class HilbertController +class HilbertController : public our_new_delete { public: HilbertController(const FreeModule *F0, const RingElement *hf); diff --git a/M2/Macaulay2/e/f4/memblock.hpp b/M2/Macaulay2/e/f4/memblock.hpp index f834837855a..2c9a7dfefac 100644 --- a/M2/Macaulay2/e/f4/memblock.hpp +++ b/M2/Macaulay2/e/f4/memblock.hpp @@ -71,7 +71,7 @@ template typename F4MemoryBlock::slab *F4MemoryBlock::new_slab() { slab *result = new slab; - result->next = 0; + result->next = nullptr; return result; } @@ -87,9 +87,9 @@ T *F4MemoryBlock::reserve(int len) { if (next_free + len > current_slab->block + NSLAB) { - if (current_slab->next == 0) + if (current_slab->next == nullptr) { - last_slab->next = new slab; + last_slab->next = new_slab(); last_slab = last_slab->next; current_slab = last_slab; } diff --git a/M2/Macaulay2/e/f4/moninfo.cpp b/M2/Macaulay2/e/f4/moninfo.cpp index e3ab48ebaca..b79baefd0ce 100644 --- a/M2/Macaulay2/e/f4/moninfo.cpp +++ b/M2/Macaulay2/e/f4/moninfo.cpp @@ -10,7 +10,7 @@ MonomialInfo::MonomialInfo(int nvars0, const MonomialOrdering *mo) { nvars = nvars0; - hashfcn = newarray_atomic(monomial_word, nvars); + hashfcn = new monomial_word[nvars]; for (int i = 0; i < nvars; i++) hashfcn[i] = rand(); mask = 0x10000000; @@ -55,7 +55,12 @@ MonomialInfo::MonomialInfo(int nvars0, const MonomialOrdering *mo) firstvar = 2 + nweights; } -MonomialInfo::~MonomialInfo() { freemem(hashfcn); } +MonomialInfo::~MonomialInfo() +{ + delete [] hashfcn; + //freemem(hashfcn); +} + monomial_word MonomialInfo::monomial_weight(const_packed_monomial m, const M2_arrayint wts) const { diff --git a/M2/Macaulay2/e/gb-f4/GBF4Interface.cpp b/M2/Macaulay2/e/gb-f4/GBF4Interface.cpp index 46a1ccbc145..2cf0ee6fbeb 100644 --- a/M2/Macaulay2/e/gb-f4/GBF4Interface.cpp +++ b/M2/Macaulay2/e/gb-f4/GBF4Interface.cpp @@ -9,17 +9,19 @@ // TODO: Fix int/Strategy discrepancy. Make a ComputationStrategy type in util.hpp or comp-gb.hpp? auto createGBF4Interface(const Matrix *inputMatrix, const std::vector& variableWeights, // what is this, do we need it? - int strategy // do we need this? + int strategy, // do we need this? + int numThreads ) -> GBComputation* { - return newf4::createGBF4Interface(inputMatrix, variableWeights, newf4::Strategy::Normal); + return newf4::createGBF4Interface(inputMatrix, variableWeights, newf4::Strategy::Normal, numThreads); } namespace newf4 { auto createGBF4Interface(const Matrix *inputMatrix, const std::vector& variableWeights, // what is this, do we need it? - Strategy strategy // do we need this? + Strategy strategy, // do we need this? + int numThreads ) -> GBComputation* { const PolynomialRing* R = inputMatrix->get_ring()->cast_to_PolynomialRing(); @@ -29,7 +31,8 @@ auto createGBF4Interface(const Matrix *inputMatrix, auto C = new GBF4Interface(R, inputMatrix, variableWeights, - strategy); + strategy, + numThreads); return C; } @@ -37,7 +40,8 @@ auto createGBF4Interface(const Matrix *inputMatrix, GBF4Interface::GBF4Interface(const PolynomialRing* originalRing, const Matrix* inputMatrix, const std::vector& variableWeights, - Strategy strategy + Strategy strategy, + int numThreads ) : mOriginalRing(originalRing), mFreeModule(inputMatrix->rows()), @@ -56,7 +60,8 @@ GBF4Interface::GBF4Interface(const PolynomialRing* originalRing, const FreeModule* freeModule, const BasicPolyList& basicPolyList, const std::vector& variableWeights, - Strategy strategy + Strategy strategy, + int numThreads ) : mOriginalRing(originalRing), mFreeModule(freeModule), diff --git a/M2/Macaulay2/e/gb-f4/GBF4Interface.hpp b/M2/Macaulay2/e/gb-f4/GBF4Interface.hpp index 7d624164489..be55b99a304 100644 --- a/M2/Macaulay2/e/gb-f4/GBF4Interface.hpp +++ b/M2/Macaulay2/e/gb-f4/GBF4Interface.hpp @@ -10,7 +10,8 @@ class Matrix; auto createGBF4Interface(const Matrix *inputMatrix, const std::vector& variableWeights, // what is this, do we need it? - int strategy + int strategy, + int numThreads ) -> GBComputation*; namespace newf4 { @@ -21,7 +22,8 @@ enum class Strategy { Normal }; auto createGBF4Interface(const Matrix *inputMatrix, const std::vector& variableWeights, // what is this, do we need it? - Strategy strategy // do we need this? + Strategy strategy, // do we need this? + int numThreads ) -> GBComputation*; void populateComputation(const Matrix* M, GBF4Computation& C); @@ -43,14 +45,16 @@ class GBF4Interface : public GBComputation GBF4Interface(const PolynomialRing* originalRing, const Matrix* inputMatrix, const std::vector& variableWeights, - Strategy strategy + Strategy strategy, + int numThreads ); GBF4Interface(const PolynomialRing* originalRing, const FreeModule* freeModule, const BasicPolyList& basicPolyList, const std::vector& variableWeights, - Strategy strategy + Strategy strategy, + int numThreads ); ~GBF4Interface() override; diff --git a/M2/Macaulay2/e/gb-walk.cpp b/M2/Macaulay2/e/gb-walk.cpp index e0dd383a4e5..25696952039 100644 --- a/M2/Macaulay2/e/gb-walk.cpp +++ b/M2/Macaulay2/e/gb-walk.cpp @@ -94,7 +94,8 @@ GBComputation *GBWalker::make_gb(const Matrix *M) const false, -1, 0, - 0 + 0, + 0 // TBB numThreads /* , max_reduction_count */ ); G0->set_stop_conditions(false, diff --git a/M2/Macaulay2/e/interface/groebner.cpp b/M2/Macaulay2/e/interface/groebner.cpp index e8eda3aca38..276cb58719c 100644 --- a/M2/Macaulay2/e/interface/groebner.cpp +++ b/M2/Macaulay2/e/interface/groebner.cpp @@ -103,6 +103,7 @@ Computation /* or null */ *IM2_GB_make( { test_over_RR_or_CC(m->get_ring()); clear_emit_size(); + int numThreads = M2_numTBBThreads; // settable from front end. return GBComputation::choose_gb(m, collect_syz, n_rows_to_keep, @@ -111,6 +112,7 @@ Computation /* or null */ *IM2_GB_make( max_degree, algorithm, strategy, + numThreads, max_reduction_count); } catch (const exc::engine_error& e) { diff --git a/M2/Macaulay2/e/polyquotient.cpp b/M2/Macaulay2/e/polyquotient.cpp index a9952decd37..9f1e2019eda 100644 --- a/M2/Macaulay2/e/polyquotient.cpp +++ b/M2/Macaulay2/e/polyquotient.cpp @@ -220,7 +220,8 @@ GBComputation *PolyRingQuotient::make_gb(const ring_elem g) const false, -1, 0, - 0 + 0, + 0 // TBB numThreads /* , max_reduction_count */ ); G->set_stop_conditions(false, @@ -395,7 +396,8 @@ void PolyRingQuotient::syzygy(const ring_elem a, false, -1, 0, - 0 + 0, + 0 // TBB numThreads /* , max_reduction_count */ ); G->set_stop_conditions(false, diff --git a/M2/Macaulay2/e/schreyer-resolution/res-f4.cpp b/M2/Macaulay2/e/schreyer-resolution/res-f4.cpp index 135b8452e56..292290156b5 100644 --- a/M2/Macaulay2/e/schreyer-resolution/res-f4.cpp +++ b/M2/Macaulay2/e/schreyer-resolution/res-f4.cpp @@ -581,15 +581,6 @@ void F4Res::gaussReduce() bool onlyConstantMaps = false; std::vector track(mReducers.size()); -#if defined(WITH_TBB) - //std::cout << "about to do parallel gauss_reduce" << std::endl; - using threadLocalDense_t = mtbb::enumerable_thread_specific; - // create a dense array for each thread - threadLocalDense_t threadLocalDense([&]() { - return mRing.vectorArithmetic().allocateElementArray(static_cast(mColumns.size())); - }); -#endif - if (onlyConstantMaps) // and not exterior algebra? { for (auto i = 0; i < mReducers.size(); i++) @@ -601,6 +592,14 @@ void F4Res::gaussReduce() } } +#if defined(WITH_TBB) + //std::cout << "about to do parallel gauss_reduce" << std::endl; + using threadLocalDense_t = mtbb::enumerable_thread_specific; + // create a dense array for each thread + threadLocalDense_t threadLocalDense([&]() { + return mRing.vectorArithmetic().allocateElementArray(static_cast(mColumns.size())); + }); + // Reduce to zero every spair. Recording creates the // corresponding syzygy, which is auto-reduced and correctly ordered. @@ -609,7 +608,6 @@ void F4Res::gaussReduce() // std::cout << "gauss_row size: " << mRing.vectorArithmetic().size(gauss_row) << // std::endl; -#if defined(WITH_TBB) //size_t chunk_size = std::max(mSPairs.size() / (100*mFrame.getNumThreads()), (size_t) 1); //mFrame.getScheduler().execute([this,&chunk_size,&onlyConstantMaps,&track,&threadLocalDense] { mFrame.getScheduler().execute([this,&onlyConstantMaps,&track,&threadLocalDense] { diff --git a/M2/Macaulay2/packages/undistributed-packages/ExampleFreeResolutions.m2 b/M2/Macaulay2/packages/undistributed-packages/ExampleFreeResolutions.m2 index e802f81557e..52805b5520b 100644 --- a/M2/Macaulay2/packages/undistributed-packages/ExampleFreeResolutions.m2 +++ b/M2/Macaulay2/packages/undistributed-packages/ExampleFreeResolutions.m2 @@ -1018,7 +1018,7 @@ peek M.cache#(ResolutionContext{}).Result.Resolution.RawComputation -- Some benchmark examples for testing parallelism of Schreyer res code. -- I = Grassmannian(2, 5, CoefficientRing => ZZ/101) - -- I = Grassmannian(1, 6, CoefficientRing => ZZ/101) + I = Grassmannian(1, 6, CoefficientRing => ZZ/101) I = Grassmannian(1, 7, CoefficientRing => ZZ/101) debug Core @@ -1048,3 +1048,15 @@ peek M.cache#(ResolutionContext{}).Result.Resolution.RawComputation elapsedTime freeResolution(ideal I_*, Strategy => Nonminimal) elapsedTime C = freeResolution(ideal I_*, Strategy => Nonminimal, ParallelizeByDegree => true) + restart + I = Grassmannian(2, 6, CoefficientRing => ZZ/101) + poincI = poincare ideal I_* + R = ring I + elapsedTime gens gb(ideal I_*, Algorithm => LinearAlgebra, Hilbert => poincI); + + restart + R = ZZ/101[a..g] + I = ideal random(R^1,R^{4:-4}); + poincI = poincare ideal (a^4, b^4, c^4, d^4); + elapsedTime Igb = gens gb(ideal I_*, Algorithm => LinearAlgebra); + elapsedTime gens gb(ideal I_*, Algorithm => LinearAlgebra, Hilbert => poincI); diff --git a/M2/Macaulay2/packages/undistributed-packages/ExampleIdeals/gb-ideals.m2 b/M2/Macaulay2/packages/undistributed-packages/ExampleIdeals/gb-ideals.m2 index 32deb634f3c..4e3ccd359e3 100644 --- a/M2/Macaulay2/packages/undistributed-packages/ExampleIdeals/gb-ideals.m2 +++ b/M2/Macaulay2/packages/undistributed-packages/ExampleIdeals/gb-ideals.m2 @@ -40,7 +40,8 @@ J1=ideal"dgjm-chjm-dfkm+bhkm+cflm-bglm-dgin+chin+dekn-ahkn-celn+agln+dfio-bhio-d JMPS-INPS-JLQS+HNQS+ILRS-HMRS-JMOT+INOT+JKQT-GNQT-IKRT+GMRT+JLOU-HNOU-JKPU+GNPU+HKRU-GLRU-ILOV+HMOV+IKPV-GMPV-HKQV+GLQV"; J1 -* - time gb(ideal J1_*, Algorithm=>LinearAlgebra); -- 69.9 sec + gbTrace = 2 + elapsedTime gb(ideal J1_*, Algorithm=>LinearAlgebra); -- 69.9 sec -- Mike M1 max, std::variant VectorArithmetic: 18.0 sec, 18.46 sec -- Mike M1 max, virtual VectorArithmetic: 17.88 sec, 19.06 sec @@ -65,7 +66,7 @@ R1 = kk[a..g, MonomialSize=>8]; J1 = ideal random(R1^1, R1^{-5,-5,-5,-6}); J1 -* - gbTrace=1 + gbTrace=2 time gb(ideal J1_*, Algorithm=>LinearAlgebra); -- 54.1 sec (53.25 sec, 58.7 sec, MBP 7/17/09) -- Mike M1 max, std::variant VectorArithmetic: 14.29 sec, 14.3 sec -- Mike M1 max, virtual VectorArithmetic: 14.43 sec, 14.28 sec @@ -639,3 +640,14 @@ I = sub(ideal last coefficients F, R) gbTrace=3 time gens gb(I, DegreeLimit=>m*n); ---------------------------------------------- + +--random5566 +restart +kk = ZZ/101; +R1 = kk[a..g, MonomialSize=>8]; +J1 = ideal random(R1^1, R1^{-5,-5,-6,-6}); +-* + gbTrace=2 + elapsedTime gens gb(ideal J1_*, Algorithm=>LinearAlgebra); -- 26.3 M1 Mac Mini + Zoom; 16s gauss + elapsedTime groebnerBasis(ideal J1_*, Strategy=>"F4"); -- 6.5s M1 Mac Mini + Zoom +*-