diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 9dfadb29..699da1a7 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -31,7 +31,7 @@ jobs: run: | python -m pip install --upgrade pip python -m pip install wheel setuptools - python -m pip install numpy scipy matplotlib bumps periodictable scikit-learn pytest pytest-cov + python -m pip install numpy scipy matplotlib bumps periodictable scikit-learn pytest pytest-cov numba python setup.py build_ext --inplace python setup.py build mkdir release @@ -97,8 +97,6 @@ jobs: echo "SRC_DIST=$(ls dist/*.tar.gz)" >> $GITHUB_ENV echo "WINDOWS_WHL=$(ls dist/*win*.whl)" >> $GITHUB_ENV echo "MACOS_WHL=$(ls dist/*macos*.whl)" >> $GITHUB_ENV - echo "WINDOWS_LATEST_WHL=$(ls unstable/*win*.whl)" >> $GITHUB_ENV - echo "MACOS_LATEST_WHL=$(ls unstable/*macos*.whl)" >> $GITHUB_ENV - name: Update unstable release uses: johnwbyrd/update-release@v1.0.0 @@ -106,7 +104,7 @@ jobs: token: ${{ secrets.GITHUB_TOKEN }} release: unstable tag: sid - files: unstable/Refl1D-windows-exe-latest.zip ${{ env.WINDOWS_LATEST_WHL }} ${{ env.MACOS_LATEST_WHL }} + files: unstable/Refl1D-windows-exe-latest.zip - name: Update current release if: startsWith(github.ref, 'refs/tags') diff --git a/refl1d/lib/calc_g_zs_cex.c b/refl1d/lib/calc_g_zs_cex.c deleted file mode 100644 index d6e63e4c..00000000 --- a/refl1d/lib/calc_g_zs_cex.c +++ /dev/null @@ -1,256 +0,0 @@ -// Can't use Py_LIMITED_API with numpy arrays -//#define Py_LIMITED_API 0x03020000 -#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION -#include -#include - - -PyObject *Pcalc_g_zs(PyObject *self, PyObject *args) -{ - PyArrayObject *g_z_pao, *c_i_pao, *g_zs_pao; - double lambda_0, lambda_1, cval; - double *g_z, *c_i, *g_zs, *g_zs_next; - Py_ssize_t segments, layers, r, z, i; - - if (!PyArg_ParseTuple(args, "O!O!O!ddnn", - &PyArray_Type,&g_z_pao, - &PyArray_Type,&c_i_pao, - &PyArray_Type,&g_zs_pao, - &lambda_0, - &lambda_1, - &layers, - &segments)) - {return NULL;} - - g_z=(double *) PyArray_DATA(g_z_pao); - c_i=(double *) PyArray_DATA(c_i_pao); - g_zs=(double *) PyArray_DATA(g_zs_pao); - g_zs_next = g_zs + layers; - - for (r=1; r= 3 - - DLL_EXPORT PyMODINIT_FUNC MODULE_INIT3(void) - { - PyObject* blah; - static struct PyModuleDef moduledef = { - PyModuleDef_HEAD_INIT, - MODULE_NAME, /* m_name */ - MODULE_DOC, /* m_doc */ - -1, /* m_size */ - MODULE_METHODS, /* m_methods */ - NULL, /* m_reload */ - NULL, /* m_traverse */ - NULL, /* m_clear */ - NULL, /* m_free */ - }; - blah = PyModule_Create(&moduledef); - import_array(); - return blah; - } - -#else /* PY_MAJOR_VERSION >= 3 */ - - DLL_EXPORT PyMODINIT_FUNC MODULE_INIT2(void) - { - Py_InitModule4(MODULE_NAME, - MODULE_METHODS, - MODULE_DOC, - 0, - PYTHON_API_VERSION - ); - import_array(); - } - -#endif /* PY_MAJOR_VERSION >= 3 */ diff --git a/refl1d/lib/methods.h b/refl1d/lib/methods.h index 877aaaa9..19c62801 100755 --- a/refl1d/lib/methods.h +++ b/refl1d/lib/methods.h @@ -6,92 +6,66 @@ // TODO: can we free the view before using the pointer with writable vectors? /* *** // Vector binding glue -#if (PY_VERSION_HEX > 0x03000000) && !defined(Py_LIMITED_API) - // Assuming that a view into a writable vector points to a - // non-changing pointer for the duration of the C call, capture - // the view pointer and immediately free the view. - #define VECTOR(VEC_obj, VEC_buf, VEC_len) do { \ - Py_buffer VEC_view; \ - int VEC_err = PyObject_GetBuffer(VEC_obj, &VEC_view, PyBUF_WRITABLE|PyBUF_FORMAT); \ - if (VEC_err < 0 || sizeof(*VEC_buf) != VEC_view.itemsize) return NULL; \ - VEC_buf = (typeof(VEC_buf))VEC_view.buf; \ - VEC_len = VEC_view.len/sizeof(*VEC_buf); \ - PyBuffer_Release(&VEC_view); \ - } while (0) -#else - #define VECTOR(VEC_obj, VEC_buf, VEC_len) do { \ - int VEC_err = PyObject_AsWriteBuffer(VEC_obj, (void **)(&VEC_buf), &VEC_len); \ - if (VEC_err < 0) return NULL; \ - VEC_len /= sizeof(*VEC_buf); \ - } while (0) -#endif +// Assuming that a view into a writable vector points to a +// non-changing pointer for the duration of the C call, capture +// the view pointer and immediately free the view. +#define VECTOR(VEC_obj, VEC_buf, VEC_len) do { \ + Py_buffer VEC_view; \ + int VEC_err = PyObject_GetBuffer(VEC_obj, &VEC_view, PyBUF_WRITABLE|PyBUF_FORMAT); \ + if (VEC_err < 0 || sizeof(*VEC_buf) != VEC_view.itemsize) return NULL; \ + VEC_buf = (typeof(VEC_buf))VEC_view.buf; \ + VEC_len = VEC_view.len/sizeof(*VEC_buf); \ + PyBuffer_Release(&VEC_view); \ +} while (0) *** */ // Vector binding glue -#if 0 && (PY_VERSION_HEX > 0x03000000) && !defined(Py_LIMITED_API) - - // Buffer protocol for python 3 requires freeing each view - // that is retrieved, so need to leave space to retrieve - // the buffer view for each vector and free them after they - // have been used. This is particularly problematic if there - // is an error discovered while interpreting the inputs, since - // we may be freeing a subset. - #define DECLARE_VECTORS(VEC_n) \ - Py_buffer VEC_views[VEC_n]; \ - int VEC_current = 0; \ - const int VEC_maximum = VEC_n; - - #define FREE_VECTORS() \ - do { \ - int VEC_k = 0; \ - while (VEC_k < VEC_current) PyBuffer_Release(&VEC_views[VEC_k++]); \ - } while (0) - - #define _VECTOR(VEC_obj, VEC_buf, VEC_len, VEC_flags) \ - do { \ - if (VEC_current >= VEC_maximum) { \ - PyErr_SetString(PyExc_TypeError, "not enough vectors declared"); \ - FREE_VECTORS(); \ - return NULL; \ - } else { \ - Py_buffer *VEC_view = &VEC_views[VEC_current]; \ - int VEC_err = PyObject_GetBuffer(VEC_obj, VEC_view, VEC_flags); \ - if (VEC_err < 0) { FREE_VECTORS(); return NULL; } \ - VEC_current++; \ - if (sizeof(*VEC_buf) != VEC_view->itemsize) { \ - PyErr_SetString(PyExc_TypeError, "wrong numeric type for vector"); \ - FREE_VECTORS(); \ - return NULL; \ - } \ - VEC_buf = (typeof(VEC_buf))VEC_view->buf; \ - VEC_len = VEC_view->len/sizeof(*VEC_buf); \ - } \ - } while (0) - - #define INVECTOR(obj, buf, len) _VECTOR(obj, buf, len, PyBUF_SIMPLE|PyBUF_FORMAT) - #define OUTVECTOR(obj, buf, len) _VECTOR(obj, buf, len, PyBUF_WRITABLE|PyBUF_FORMAT) +// Buffer protocol for python 3 requires freeing each view +// that is retrieved, so need to leave space to retrieve +// the buffer view for each vector and free them after they +// have been used. This is particularly problematic if there +// is an error discovered while interpreting the inputs, since +// we may be freeing a subset. +#define DECLARE_VECTORS(VEC_n) \ + Py_buffer VEC_views[VEC_n]; \ + int VEC_current = 0; \ + const int VEC_maximum = VEC_n; + +#define FREE_VECTORS() \ + do { \ + int VEC_k = 0; \ + while (VEC_k < VEC_current) PyBuffer_Release(&VEC_views[VEC_k++]); \ + } while (0) +#ifdef __cplusplus +# define CAST_ASSIGN(to_y, from_x) to_y = (decltype(to_y))(from_x) #else - - #define DECLARE_VECTORS(n) do {} while(0) - #define FREE_VECTORS() do {} while(0) - - #define INVECTOR(obj,buf,len) \ - do { \ - int err = PyObject_AsReadBuffer(obj, (const void **)(&buf), &len); \ - if (err < 0) return NULL; \ - len /= sizeof(*buf); \ - } while (0) - - #define OUTVECTOR(obj,buf,len) \ - do { \ - int err = PyObject_AsWriteBuffer(obj, (void **)(&buf), &len); \ - if (err < 0) return NULL; \ - len /= sizeof(*buf); \ - } while (0) - +# define CAST_ASSIGN(to_y, from_x) to_y = (typeof(to_y))(from_x) #endif +#define _VECTOR(VEC_obj, VEC_buf, VEC_len, VEC_flags) \ + do { \ + if (VEC_current >= VEC_maximum) { \ + PyErr_SetString(PyExc_TypeError, "not enough vectors declared"); \ + FREE_VECTORS(); \ + return NULL; \ + } else { \ + Py_buffer *VEC_view = &VEC_views[VEC_current]; \ + int VEC_err = PyObject_GetBuffer(VEC_obj, VEC_view, VEC_flags); \ + if (VEC_err < 0) { FREE_VECTORS(); return NULL; } \ + VEC_current++; \ + if (sizeof(*VEC_buf) != VEC_view->itemsize) { \ + PyErr_SetString(PyExc_TypeError, "wrong numeric type for vector"); \ + FREE_VECTORS(); \ + return NULL; \ + } \ + CAST_ASSIGN(VEC_buf, VEC_view->buf); \ + VEC_len = VEC_view->len/sizeof(*VEC_buf); \ + } \ + } while (0) + +#define INVECTOR(obj, buf, len) _VECTOR(obj, buf, len, PyBUF_SIMPLE|PyBUF_FORMAT) +#define OUTVECTOR(obj, buf, len) _VECTOR(obj, buf, len, PyBUF_WRITABLE|PyBUF_FORMAT) #define SCALAR(obj) PyFloat_AsDouble(obj) diff --git a/refl1d/polymer.py b/refl1d/polymer.py index e391d9bc..ee55f56a 100644 --- a/refl1d/polymer.py +++ b/refl1d/polymer.py @@ -52,29 +52,21 @@ "VolumeProfile", "layer_thickness"] import inspect - -import numpy as np - -from bumps.parameter import Parameter, to_dict -from .model import Layer -from . import util from time import time +from collections import OrderedDict -try: - from collections import OrderedDict -except ImportError: - class OrderedDict(dict): - def popitem(self, *args, **kw): - return dict.popitem(self, *args) - +import numpy as np from numpy import real, imag, exp, log, sqrt, pi, hstack, ones_like +from numpy.core.multiarray import correlate as old_correlate +from bumps.parameter import Parameter, to_dict -# This is okay to use as long as LAMBDA_ARRAY is symmetric, -# otherwise a slice LAMBDA_ARRAY[::-1] is necessary -from numpy.core.multiarray import correlate +from . import util +from .model import Layer LAMBDA_1 = 1.0/6.0 #always assume cubic lattice (1/6) for now LAMBDA_0 = 1.0 - 2.0*LAMBDA_1 +# Use reverse order for LAMBDA_ARRAY if it is asymmetric since we are using +# it with correlate(). LAMBDA_ARRAY = np.array([LAMBDA_1, LAMBDA_0, LAMBDA_1]) MINLAT = 25 MINBULK = 5 @@ -1004,7 +996,7 @@ def SCFeqns(phi_z, chi, chi_s, sigma, n_avg, p_i, phi_b=0): # calculate new g_z (Boltzmann weighting factors) u_prime = log((1.0 - phi_z) / (1.0 - phi_b)) - u_int = 2 * chi * (correlate(phi_z, LAMBDA_ARRAY, 1) - phi_b) + u_int = 2 * chi * (old_correlate(phi_z, LAMBDA_ARRAY, 1) - phi_b) u_int[0] += chi_s u_z = u_prime + u_int g_z = exp(u_z) @@ -1072,31 +1064,15 @@ def calc_phi_z(g_z, n_avg, sigma, phi_b, u_z_avg=0, p_i=None): return phi_z_ta + phi_z_free - def compose(g_zs, g_zs_ngts, g_z): prod = g_zs * np.fliplr(g_zs_ngts) prod[np.isnan(prod)] = 0 return np.sum(prod, axis=1) / g_z - class Propagator(object): - cex = None - def __init__(self, g_z, segments): self.g_z = g_z self.shape = int(g_z.size), int(segments) - # keep all future instances from retrying this test - if self.cex is not None: - return - - try: - import refl1d.calc_g_zs_cex as cex - Propagator.cex = cex - except ImportError: - import warnings - warnings.warn('Could not load C extension for EndTetheredPolymer. Continuing with slower NumPy code.\n' - 'Try rebuilding refl1d to remove this warning and speed things up!') - Propagator.cex = False def ta(self): # terminally attached beginnings @@ -1105,7 +1081,7 @@ def ta(self): g_zs = self._new() g_zs[:, 0] = 0.0 g_zs[0, 0] = self.g_z[0] - self._calc_g_zs_uniform(self.g_z, g_zs) + _calc_g_zs_uniform(self.g_z, g_zs, LAMBDA_0, LAMBDA_1) return g_zs def free(self): @@ -1114,7 +1090,7 @@ def free(self): g_zs = self._new() g_zs[:, 0] = self.g_z - self._calc_g_zs_uniform(self.g_z, g_zs) + _calc_g_zs_uniform(self.g_z, g_zs, LAMBDA_0, LAMBDA_1) return g_zs def ngts_u(self, c): @@ -1123,7 +1099,7 @@ def ngts_u(self, c): g_zs = self._new() g_zs[:, 0] = c * self.g_z - self._calc_g_zs_uniform(self.g_z, g_zs) + _calc_g_zs_uniform(self.g_z, g_zs, LAMBDA_0, LAMBDA_1) return g_zs def ngts(self, c_i): @@ -1132,27 +1108,56 @@ def ngts(self, c_i): g_zs = self._new() g_zs[:, 0] = c_i[-1] * self.g_z - self._calc_g_zs(self.g_z, c_i, g_zs) + _calc_g_zs(self.g_z, c_i, g_zs, LAMBDA_0, LAMBDA_1) return g_zs def _new(self): return np.empty(self.shape, order='F') - def _calc_g_zs(self, g_z, c_i, g_zs): - if self.cex: - self.cex._calc_g_zs(g_z, c_i, g_zs, LAMBDA_0, LAMBDA_1, *self.shape) - else: - pg_zs = g_zs[:, 0] - segment_iterator = enumerate(c_i[::-1]) - next(segment_iterator) - for r, c in segment_iterator: - g_zs[:, r] = pg_zs = (correlate(pg_zs, LAMBDA_ARRAY, 1) + c) * g_z - - def _calc_g_zs_uniform(self, g_z, g_zs): - if self.cex: - self.cex._calc_g_zs_uniform(g_z, g_zs, LAMBDA_0, LAMBDA_1, *self.shape) - else: - segments = g_zs.shape[1] - pg_zs = g_zs[:, 0] - for r in range(1, segments): - g_zs[:, r] = pg_zs = correlate(pg_zs, LAMBDA_ARRAY, 1) * g_z +try: + from numba import njit + USE_NUMBA = True +except ImportError: + USE_NUMBA = False + +#USE_NUMBA = False # Uncomment when doing timing tests + +if USE_NUMBA: + @njit('(f8[:], f8[:, :], f8, f8)', cache=True) + def _calc_g_zs_uniform(g_z, g_zs, f0, f1): + points, segments = g_zs.shape + for r in range(segments-1): + g_zs[0, r+1] = (g_zs[0, r]*f0 + g_zs[1, r]*f1)*g_z[0] + #g_zs[1:-1, r+1] = np.correlate(g_zs[:, r], [f1, f0, f1], 'valid')*g_z[1:-1] + for k in range(1, points-1): + g_zs[k, r+1] = ( + g_zs[k, r]*f0 + (g_zs[k-1, r] + g_zs[k+1, r])*f1)*g_z[k] + g_zs[-1, r+1] = (g_zs[-2, r]*f1 + g_zs[-1, r]*f0)*g_z[-1] + + @njit('(f8[:], f8[:], f8[:, :], f8, f8)', cache=True) + def _calc_g_zs(g_z, c_i, g_zs, f0, f1): + points, segments = g_zs.shape + for r in range(segments-1): + c_ir = c_i[segments-(r+1)-1] + g_zs[0, r+1] = (g_zs[0, r]*f0 + g_zs[1, r]*f1 + c_ir)*g_z[0] + #g_zs[1:-1, r+1] = (np.correlate(g_zs[:, r], fir, 'valid') + c_ir)*g_z[1:-1] + for k in range(1, points-1): + g_zs[k, r+1] = ( + g_zs[k, r]*f0 + (g_zs[k-1, r] + g_zs[k+1, r])*f1 + c_ir)*g_z[k] + g_zs[-1, r+1] = (g_zs[-2, r]*f1 + g_zs[-1, r]*f0 + c_ir)*g_z[-1] + +else: + def _calc_g_zs(g_z, c_i, g_zs, f0, f1): + coeff = np.array([f1, f0, f1]) + pg_zs = g_zs[:, 0] + segment_iterator = enumerate(c_i[::-1]) + next(segment_iterator) + for r, c in segment_iterator: + g_zs[:, r] = pg_zs = (old_correlate(pg_zs, coeff, 1) + c) * g_z + + def _calc_g_zs_uniform(g_z, g_zs, f0, f1): + coeff = np.array([f1, f0, f1]) + segments = g_zs.shape[1] + pg_zs = g_zs[:, 0] + for r in range(1, segments): + g_zs[:, r] = pg_zs = old_correlate(pg_zs, coeff, 1) * g_z diff --git a/setup.py b/setup.py index e6a16f3b..12cf59bb 100755 --- a/setup.py +++ b/setup.py @@ -67,16 +67,6 @@ def reflmodule_config(): py_limited_api="cp32", ) -# SCF extension -def SCFmodule_config(): - from numpy import get_include - return Extension('refl1d.calc_g_zs_cex', - sources=[os.path.join('refl1d', 'lib', 'calc_g_zs_cex.c')], - include_dirs=[get_include()], - #language="c++", - py_limited_api="cp32", - ) - #TODO: write a proper dependency checker for packages which cannot be # installed by easy_install #dependency_check('numpy>=1.0', 'scipy>=0.6', 'matplotlib>=1.0', 'wx>=2.8.9') @@ -107,9 +97,9 @@ def SCFmodule_config(): 'console_scripts': ['refl1d=refl1d.main:cli'], 'gui_scripts': ['refl1d_gui=refl1d.main:gui'] }, - ext_modules=[reflmodule_config(), SCFmodule_config()], + ext_modules=[reflmodule_config()], install_requires=['bumps>=0.7.16', 'numpy', 'scipy', 'matplotlib', 'periodictable'], - extras_require={'full': ['wxpython']}, + extras_require={'full': ['wxpython', 'ipython', 'numba']}, cmdclass={'build_ext': build_ext_subclass}, )