From 2591d1862eefd9b5173ffe525c4027fb6c6beb42 Mon Sep 17 00:00:00 2001 From: Michael Ringgaard Date: Thu, 9 May 2019 18:09:06 +0200 Subject: [PATCH] Python 3 support for SLING API (#366) --- .travis.yml | 7 +- doc/guide/install.md | 37 +++--- doc/guide/myelin.md | 12 +- doc/guide/pyapi.md | 57 ++++---- doc/guide/training.md | 2 +- python/__init__.py | 1 + python/flags.py | 2 +- python/log.py | 2 +- python/myelin/__init__.py | 4 +- python/myelin/builder.py | 9 +- python/myelin/flow.py | 39 +++--- python/myelin/lexical_encoder.py | 4 +- python/nlp/document.py | 9 +- python/task/__init__.py | 2 +- python/task/download.py | 9 +- python/task/embedding.py | 6 +- python/task/entity.py | 4 +- python/task/wiki.py | 4 +- python/task/workflow.py | 23 ++-- run.sh | 2 +- setup.sh | 68 ++++++++++ sling/pyapi/BUILD | 3 +- sling/pyapi/pyapi.cc | 30 +++-- sling/pyapi/pyarray.cc | 33 ++++- sling/pyapi/pyarray.h | 5 +- sling/pyapi/pybase.h | 16 ++- sling/pyapi/pydate.cc | 13 +- sling/pyapi/pyframe.cc | 42 ++++-- sling/pyapi/pyframe.h | 3 + sling/pyapi/pymisc.cc | 20 +-- sling/pyapi/pymyelin.cc | 69 +++++----- sling/pyapi/pyparser.cc | 2 +- sling/pyapi/pyphrase.cc | 16 +-- sling/pyapi/pyrecordio.cc | 47 +++++-- sling/pyapi/pystore.cc | 217 +++++++++++++------------------ sling/pyapi/pystore.h | 10 +- sling/pyapi/pytask.cc | 31 ++--- sling/pyapi/pywiki.cc | 4 +- tools/build-wheel.py | 36 +++-- tools/docv1to2.py | 4 +- tools/optohdr.py | 16 +-- 41 files changed, 522 insertions(+), 398 deletions(-) create mode 100755 setup.sh diff --git a/.travis.yml b/.travis.yml index 09f4331f..2191a104 100644 --- a/.travis.yml +++ b/.travis.yml @@ -3,7 +3,7 @@ language: - cpp - python compiler: gcc -python: "2.7" +python: "3.5" addons: apt: @@ -13,10 +13,11 @@ addons: - wget - pkg-config - g++-4.8 + - python3.5-dev before_install: - - wget https://github.com/bazelbuild/bazel/releases/download/0.8.0/bazel_0.8.0-linux-x86_64.deb - - sudo dpkg -i bazel_0.8.0-linux-x86_64.deb + - wget https://github.com/bazelbuild/bazel/releases/download/0.13.0/bazel_0.13.0-linux-x86_64.deb + - sudo dpkg -i bazel_0.13.0-linux-x86_64.deb script: - tools/buildall.sh diff --git a/doc/guide/install.md b/doc/guide/install.md index 89a2f462..1f489b07 100644 --- a/doc/guide/install.md +++ b/doc/guide/install.md @@ -4,10 +4,10 @@ If you just want to try out the parser on a pre-trained model, you can install the wheel with pip and download a pre-trained parser model. On a Linux machine -with Python 2.7 you can install a pre-built wheel: +with Python 3.5 you can install a pre-built wheel: ``` -sudo pip install http://www.jbox.dk/sling/sling-2.0.0-cp27-none-linux_x86_64.whl +sudo pip3 install http://www.jbox.dk/sling/sling-2.0.0-cp35-none-linux_x86_64.whl ``` and download the pre-trained model: ``` @@ -38,28 +38,30 @@ git clone https://github.com/google/sling.git cd sling ``` -SLING uses [Bazel](https://bazel.build/) as the build system, so you need to -[install Bazel](https://docs.bazel.build/versions/master/install.html) in order -to build the SLING parser. - +Next, run the `seup.sh` script to set up the SLING development environment +and build the code: ```shell -sudo apt-get install pkg-config zip g++ zlib1g-dev unzip python2.7 python2.7-dev -wget -P /tmp https://github.com/bazelbuild/bazel/releases/download/0.13.0/bazel-0.13.0-installer-linux-x86_64.sh -chmod +x /tmp/bazel-0.13.0-installer-linux-x86_64.sh -sudo /tmp/bazel-0.13.0-installer-linux-x86_64.sh +./setup.sh ``` -The parser trainer uses Python v2.7 and PyTorch for training, so they need to be -installed. +This will perform the following steps: +* Install missing package dependencies, notably GCC and Python 3. +* Install [Bazel](https://bazel.build/) which is used as the build system for + SLING. +* Build SLING from source. +* Remove the Python 2.7 SLING pip package if it is installed. +* Set up link to the SLING development enviroment for SLING Python 3 API. + +The parser trainer uses PyTorch for training, so it also needs to be installed: ```shell -sudo pip install http://download.pytorch.org/whl/cpu/torch-0.3.1-cp27-cp27mu-linux_x86_64.whl +sudo pip3 install http://download.pytorch.org/whl/cpu/torch-0.3.1-cp35-cp35mu-linux_x86_64.whl ``` ## Building Operating system: Linux
-Languages: C++, Python 2.7, assembler
+Languages: C++ (gcc or clang), Python 3.5+, assembler
CPU: Intel x64 or compatible
Build system: Bazel
@@ -69,11 +71,12 @@ You can use the `buildall.sh` script to build all the source code: tools/buildall.sh ``` -You then need to link the sling Python module directly to the Python source -directory to use it in "developer mode": +If you haven't run the `setup.sh` script already, you then need to link the +sling Python module directly to the Python source directory to use it in +"developer mode": ```shell -sudo ln -s $(realpath python) /usr/lib/python2.7/dist-packages/sling +sudo ln -s $(realpath python) /usr/lib/python3/dist-packages/sling ``` **NOTE:** diff --git a/doc/guide/myelin.md b/doc/guide/myelin.md index a382970b..a75c8707 100644 --- a/doc/guide/myelin.md +++ b/doc/guide/myelin.md @@ -152,15 +152,15 @@ data = cell.instance() # Set input. xdata = data[x] -for i in xrange(64): xdata[0, i] = 5 +for i in range(64): xdata[0, i] = 5 # Run computation for data instance. data.compute() # Print result. ydata = data[y] -print "y", ydata -print "argmax", np.asarray(ydata).argmax() +print("y", ydata) +print("argmax", np.asarray(ydata).argmax()) ``` The index operator on the cell object (e.g. `data[x]`) returns a _tensor_ object @@ -217,15 +217,15 @@ data = cell.instance() # Set input. xdata = data[x] -for i in xrange(64): xdata[0, i] = 5 +for i in range(64): xdata[0, i] = 5 # Run computation for data instance. data.compute() # Print result. ydata = data[y] -print "y", ydata -print "argmax", np.asarray(ydata).argmax() +print("y", ydata) +print("argmax", np.asarray(ydata).argmax()) ``` ## Creating a flow file from a Tensorflow graph diff --git a/doc/guide/pyapi.md b/doc/guide/pyapi.md index 6a12484b..acae6669 100644 --- a/doc/guide/pyapi.md +++ b/doc/guide/pyapi.md @@ -3,14 +3,9 @@ A number of components in SLING can be accessed through the Python SLING API. You can install the SLING Python wheel using pip: ``` -sudo pip install http://www.jbox.dk/sling/sling-2.0.0-cp27-none-linux_x86_64.whl -``` -or you can [clone the repo and build SLING from sources](install.md). You can -then link the `sling` Python module directly to the Python source directory to -use it in "developer mode": -``` -sudo ln -s $(realpath python) /usr/lib/python2.7/dist-packages/sling +sudo pip3 install http://www.jbox.dk/sling/sling-2.0.0-cp35-none-linux_x86_64.whl ``` +or you can [clone the repo and build SLING from sources](install.md). # Table of contents @@ -61,25 +56,25 @@ doc = store['document'] ``` Role values for frames can be accessed as attributes: ``` -print doc.name +print(doc.name) ``` or using indexing: ``` -print doc['name'] +print(doc['name']) ``` You can also use a frame value to access roles: ``` -print doc[name] +print(doc[name]) ``` You can test if a frame has a role: ``` -if 'name' in doc: print "doc has 'name'" -if name in doc: print "doc has name" +if 'name' in doc: print("doc has 'name'") +if name in doc: print("doc has name") ``` You can iterate over all the named frames (i.e. frames with an `id:` slot) in a store: ``` -for f in store: print f.id +for f in store: print(f.id) ``` The `parse()` method can be used for adding new frames to the store: ``` @@ -121,24 +116,24 @@ f.extend([('foo', 10), ('bar': 20)]) All the slots in a frame can be iterated: ``` for name, value in f: - print "slot", name,"=", value + print("slot", name,"=", value) ``` or just the roles with a particular name: ``` for r in doc('role'): - print "doc role", r + print("doc role", r) ``` Frames can be encoded in text format with the `data()` method: ``` -print f.data() +print(f.data()) ``` and with indentation: ``` -print f.data(pretty=True) +print(f.data(pretty=True)) ``` or with binary encoding: ``` -print len(f.data(binary=True)) +print(len(f.data(binary=True))) ``` Arrays can be created with the `array()` method: ``` @@ -154,9 +149,9 @@ a[2] = 3 SLING arrays work much in the same way as Python lists except that they have a fixed size: ``` -print len(a) -print a[1] -for item in a: print item +print(len(a)) +print(a[1]) +for item in a: print(item) ``` Finally, a store can be save to a file in textual encoding: ``` @@ -181,7 +176,7 @@ import sling recin = sling.RecordReader("test.rec") for key,value in recin: - print key, value + print(key, value) recin.close() ``` The `RecordReader` class has the following methods: @@ -236,7 +231,7 @@ writer.close() # Look up each record in record database. db = sling.RecordDatabase("/tmp/test.rec") for i in range(N): - print db.lookup(str(i)) + print(db.lookup(str(i))) db.close() ``` @@ -310,7 +305,7 @@ for _,rec in corpus: num_docs += 1 num_tokens += len(doc.tokens) -print "docs:", num_docs, "tokens:", num_tokens +print("docs:", num_docs, "tokens:", num_tokens) ``` Example: read text from a file and create a corpus of tokenized documents: @@ -464,7 +459,7 @@ The `Corpus` class can be used for iterating over a corpus of documents stored i record files: ``` for document in sling.Corpus("local/data/e/wiki/en/documents@10.rec"): - print document.text + print(document.text) ``` This will create a global store with the document schema symbols and create a local store for each document. If you have a global store you can use this @@ -474,7 +469,7 @@ kb = sling.Store() corpus = sling.Corpus("local/data/e/wiki/en/documents@10.rec", commons=kb) kb.freeze() for document in corpus: - print document.text + print(document.text) ``` ### LEX format @@ -560,11 +555,11 @@ kb.freeze() # Lookup entities with name 'Annette Stroyberg'. for entity in names.lookup("Annette Stroyberg"): - print entity.id, entity.name + print(entity.id, entity.name) # Query all entities named 'Funen' with frequency counts. for m in names.query("Funen"): - print m.count(), m.id(), m.item().name, "(", m.item().description, ")" + print(m.count(), m.id(), m.item().name, "(", m.item().description, ")") ``` The `lookup()` and `query()` methods return the matches in decreasing @@ -593,7 +588,7 @@ for Annette Stroyberg ([Q2534120](https://www.wikidata.org/wiki/Q2534120)): ``` entity = kb["Q2534120"] dob = sling.Date(entity["P569"]) -print dob.year, dob.month, dob.day +print(dob.year, dob.month, dob.day) ``` The `Date` class has the following properties and methods: @@ -688,7 +683,7 @@ The `flags.define()` function takes the same arguments as the standard Python method. You can then access the flags as variables in the flags module, e.g.: ``` if flags.verbose: - print "verbose output..." + print("verbose output...") ``` The flags parser must be initialized in the main method of your Python program: @@ -712,5 +707,5 @@ url = "https://www.wikidata.org/wiki/Special:EntityData/" + qid + ".json" json = urllib2.urlopen(url).read()[len(qid) + 16:-2] item = wikiconv.convert_wikidata(store, json) -print item.data(pretty=True) +print(item.data(pretty=True)) ``` diff --git a/doc/guide/training.md b/doc/guide/training.md index 988e8a5f..2bbbe80b 100644 --- a/doc/guide/training.md +++ b/doc/guide/training.md @@ -195,7 +195,7 @@ the same script to create the commons store behind the scenes. But we mention this here in case one wishes to inspect the automatically created commons. ```shell -python sling/nlp/parser/tools/commons_from_corpora.py \ +python3 sling/nlp/parser/tools/commons_from_corpora.py \ --input=,, \ --output= ``` diff --git a/python/__init__.py b/python/__init__.py index 45f58d70..e4435512 100644 --- a/python/__init__.py +++ b/python/__init__.py @@ -1,4 +1,5 @@ import sling.pysling as api + from sling.log import * from sling.nlp.document import * from sling.nlp.parser import * diff --git a/python/flags.py b/python/flags.py index eaabb7fc..7338f429 100644 --- a/python/flags.py +++ b/python/flags.py @@ -15,7 +15,7 @@ """Command-line flags""" import argparse -import pysling as api +import sling.pysling as api # Command line flag arguments. arg = argparse.Namespace() diff --git a/python/log.py b/python/log.py index c684f9b5..43940e33 100644 --- a/python/log.py +++ b/python/log.py @@ -16,7 +16,7 @@ import inspect import os -import pysling as api +import sling.pysling as api INFO = 0 WARNING = 1 diff --git a/python/myelin/__init__.py b/python/myelin/__init__.py index d0922075..8e9180fa 100644 --- a/python/myelin/__init__.py +++ b/python/myelin/__init__.py @@ -1,7 +1,7 @@ import sling.pysling as api -from builder import * -from flow import * +from .builder import * +from .flow import * Compiler=api.Compiler diff --git a/python/myelin/builder.py b/python/myelin/builder.py index 6c4395c8..20b3da67 100644 --- a/python/myelin/builder.py +++ b/python/myelin/builder.py @@ -15,10 +15,7 @@ """Myelin function builder and expression evaluator.""" -import flow -from flow import Variable -from flow import Function -from flow import Flow +from .flow import set_builder_factory, Variable DT_FLOAT32 = "float32" DT_FLOAT64 = "float64" @@ -160,7 +157,7 @@ def split(self, x, splits, axis=0, name=None): shape = x.shape[:] shape[axis] = x.shape[axis] / splits results = [] - for n in xrange(splits): + for n in range(splits): o = self.var(op.name + ":" + str(n), x.type, shape) op.add_output(o) results.append(o) @@ -379,5 +376,5 @@ def rank(self, x, name=None): def builder_factory(flow, name): return Builder(flow, name) -flow.builder_factory = builder_factory +set_builder_factory(builder_factory) diff --git a/python/myelin/flow.py b/python/myelin/flow.py index 87cc3781..f8a80375 100644 --- a/python/myelin/flow.py +++ b/python/myelin/flow.py @@ -16,15 +16,16 @@ """Myelin computation flows.""" import os -from struct import calcsize -from struct import pack -from struct import unpack -from struct import unpack_from +from struct import calcsize, pack, unpack, unpack_from def dummy_factory_builder(flow, name): raise Exception("No flow builder defined") -builder_factory = dummy_factory_builder +builder_factory_method = dummy_factory_builder + +def set_builder_factory(factory): + global builder_factory_method + builder_factory_method = factory class FileWriter: """Flow file writer.""" @@ -430,7 +431,7 @@ def blob(self, name): def define(self, name): """Create a builder for a new funtion.""" - return builder_factory(self, name) + return builder_factory_method(self, name) def rename_prefix(self, prefix, replacement): """Replace prefix in all names.""" @@ -623,13 +624,13 @@ def load(self, filename): if version >= 5: self.flags = f.read_int() num_vars = f.read_int() - for _ in xrange(num_vars): + for _ in range(num_vars): flags = 0 if version >= 5: flags = f.read_int() name = f.read_string() num_aliases = f.read_int() aliases = [] - for i in xrange(num_aliases): + for i in range(num_aliases): aliases.append(f.read_string()) t = f.read_string() if t[0] == '&': @@ -637,7 +638,7 @@ def load(self, filename): t = t[1:] shape_size = f.read_int() shape = [] - for _ in xrange(shape_size): + for _ in range(shape_size): shape.append(f.read_int()) var = self.var(name, type=t, shape=shape) var.flags = flags @@ -646,7 +647,7 @@ def load(self, filename): var.data = f.slice(size) # avoid creating a copy num_ops = f.read_int() - for _ in xrange(num_ops): + for _ in range(num_ops): flags = 0 if version >= 5: flags = f.read_int() name = f.read_string() @@ -655,43 +656,43 @@ def load(self, filename): op.type = f.read_string() num_in = f.read_int() - for _ in xrange(num_in): + for _ in range(num_in): op.add_input(self.var(name=f.read_string())) num_out = f.read_int() - for _ in xrange(num_out): + for _ in range(num_out): op.add_output(self.var(name=f.read_string())) num_attr = f.read_int() - for _ in xrange(num_attr): + for _ in range(num_attr): attr_name = f.read_string() attr_val = f.read_string() op.add_attr(attr_name, attr_val) num_funcs = f.read_int() - for _ in xrange(num_funcs): + for _ in range(num_funcs): flags = 0 if version >= 5: flags = f.read_int() name = f.read_string() func = self.func(name) func.flags = flags n = f.read_int() - for _ in xrange(n): + for _ in range(n): func.add(self.op(f.read_string())) num_cnxs = f.read_int() - for _ in xrange(num_cnxs): + for _ in range(num_cnxs): flags = 0 if version >= 5: flags = f.read_int() name = f.read_string() cnx = self.cnx(name) cnx.flags = flags n = f.read_int() - for _ in xrange(n): + for _ in range(n): cnx.add(self.var(f.read_string())) num_blobs = f.read_int() - for _ in xrange(num_blobs): + for _ in range(num_blobs): flags = 0 if version >= 5: flags = f.read_int() name = f.read_string() @@ -699,7 +700,7 @@ def load(self, filename): blob.flags = flags blob.type = f.read_string() n = f.read_int() - for _ in xrange(n): + for _ in range(n): name = f.read_string() val = f.read_string() blob.add_attr(name, val) diff --git a/python/myelin/lexical_encoder.py b/python/myelin/lexical_encoder.py index ef5dc4fa..5db73989 100644 --- a/python/myelin/lexical_encoder.py +++ b/python/myelin/lexical_encoder.py @@ -15,8 +15,8 @@ import os import tempfile -import builder -import nn +import sling.myelin.flow as flow +import sling.myelin.nn as nn # Adds a lexical encoder to 'flow', as per 'spec' (which is a Spec object). # 'lstm_feature_embeddings' is a list of embeddings data (e.g. numpy arrays), diff --git a/python/nlp/document.py b/python/nlp/document.py index ec859588..100a8627 100644 --- a/python/nlp/document.py +++ b/python/nlp/document.py @@ -65,7 +65,7 @@ def word(self): if start != None: size = self.frame[self.schema.token_size] if size == None: size = 1 - text = self.document._text[start : start + size] + text = self.document._text[start : start + size].decode() return text @word.setter @@ -159,7 +159,7 @@ def __init__(self, frame=None, store=None, schema=None): # Initialize document from frame. self.frame = frame self.schema = schema - self._text = frame[schema.document_text] + self._text = frame.get(schema.document_text, binary=True) self.tokens = [] self.mentions = [] self.themes = [] @@ -252,6 +252,7 @@ def text(self): @text.setter def text(self, value): + if isinstance(value, str): value = value.encode() self._text = value self.frame[self.schema.document_text] = value @@ -322,8 +323,8 @@ def __getitem__(self, key): def __iter__(self): return self - def next(self): - _, data = self.input.next() + def __next__(self): + _, data = self.input.__next__() f = sling.Store(self.commons).parse(data) return sling.Document(f, schema=self.docschema) diff --git a/python/task/__init__.py b/python/task/__init__.py index 7ae5dd25..3ba48e84 100644 --- a/python/task/__init__.py +++ b/python/task/__init__.py @@ -1,2 +1,2 @@ -from workflow import * +from sling.task.workflow import * diff --git a/python/task/download.py b/python/task/download.py index 8fda5edf..27ecfc6d 100644 --- a/python/task/download.py +++ b/python/task/download.py @@ -15,14 +15,15 @@ """Workflow builder for downloading wiki dumps""" import os -import urllib2 +from urllib.request import urlopen import _strptime import time -from workflow import * -import corpora +import sling +import sling.task.corpora as corpora import sling.flags as flags import sling.log as log +from sling.task.workflow import * # Number of concurrent downloads. download_concurrency = 0 @@ -59,7 +60,7 @@ def run(self, task): # Download from url to file. if ratelimit > 0: log.info("Start download of " + url) - conn = urllib2.urlopen(url) + conn = urlopen(url) last_modified = time.mktime(time.strptime(conn.headers['last-modified'], "%a, %d %b %Y %H:%M:%S GMT")) total_bytes = "bytes_downloaded" diff --git a/python/task/embedding.py b/python/task/embedding.py index d7d9977b..66521840 100644 --- a/python/task/embedding.py +++ b/python/task/embedding.py @@ -14,10 +14,10 @@ """Workflow builder for embedding processing""" -from workflow import * -from wiki import WikiWorkflow import sling.flags as flags -import corpora +import sling.task.corpora as corpora +from sling.task.workflow import * +from sling.task.wiki import WikiWorkflow class EmbeddingWorkflow: def __init__(self, name=None, wf=None): diff --git a/python/task/entity.py b/python/task/entity.py index bfb4a968..827b46f9 100644 --- a/python/task/entity.py +++ b/python/task/entity.py @@ -14,9 +14,9 @@ """Workflow builder for named entity recognition""" -from workflow import * -from wiki import WikiWorkflow import sling.flags as flags +from sling.task import * +from sling.task.wiki import WikiWorkflow class EntityWorkflow: def __init__(self, name=None, wf=None): diff --git a/python/task/wiki.py b/python/task/wiki.py index d02a15f8..c799d66c 100644 --- a/python/task/wiki.py +++ b/python/task/wiki.py @@ -14,9 +14,9 @@ """Workflow builder for Wikidata and Wikipedia processing""" -from workflow import * -import corpora import sling.flags as flags +from sling.task import * +import sling.task.corpora as corpora flags.define("--index", help="index wiki data sets", diff --git a/python/task/workflow.py b/python/task/workflow.py index 5c91d3ae..0071d18b 100644 --- a/python/task/workflow.py +++ b/python/task/workflow.py @@ -19,6 +19,7 @@ import os import re import time + import sling import sling.pysling as api import sling.flags as flags @@ -232,7 +233,7 @@ def add_param(self, name, value): def add_params(self, params): """Add configuration parameters to task.""" if params != None: - for name, value in params.iteritems(): + for name, value in params.items(): self.add_param(name, value) def __repr__(self): @@ -351,7 +352,7 @@ def resource(self, file, dir=None, shards=None, ext=None, format=None): prefix = m.group(1) shards = int(m.group(2)) suffix = m.group(3) - for shard in xrange(shards): + for shard in range(shards): fn = "%s-%05d-of-%05d%s" % (prefix, shard, shards, suffix) filenames.append(fn) else: @@ -373,7 +374,7 @@ def resource(self, file, dir=None, shards=None, ext=None, format=None): else: filenames.sort() resources = [] - for shard in xrange(n): + for shard in range(n): key = (filenames[shard], str(Shard(shard, n)), str(format)) r = self.resource_map.get(key) if r == None: @@ -392,7 +393,7 @@ def channel(self, producer, name="output", shards=None, format=None): channels = [] for p in producer: if shards != None: - for shard in xrange(shards): + for shard in range(shards): ch = Channel(format, Port(p, name, Shard(shard, shards)), None) p.connect_sink(ch) channels.append(ch) @@ -405,7 +406,7 @@ def channel(self, producer, name="output", shards=None, format=None): return channels elif shards != None: channels = [] - for shard in xrange(shards): + for shard in range(shards): sink = Port(producer, name, Shard(shard, shards)) ch = Channel(format, sink, None) producer.connect_sink(ch) @@ -430,7 +431,7 @@ def connect(self, channel, consumer, sharding=None, name="input"): elif multi_channel and not multi_task: # Connect multiple channels to single task. shards = len(channel) - for shard in xrange(shards): + for shard in range(shards): if channel[shard].consumer != None: raise Exception("already connected") port = Port(consumer, name, Shard(shard, shards)) channel[shard].consumer = port @@ -439,7 +440,7 @@ def connect(self, channel, consumer, sharding=None, name="input"): # Connect multiple channels to multiple tasks. shards = len(channel) if len(consumer) != shards: raise Exception("size mismatch") - for shard in xrange(shards): + for shard in range(shards): if channel[shard].consumer != None: raise Exception("already connected") port = Port(consumer[shard], name, None) channel[shard].consumer = port @@ -461,7 +462,7 @@ def read(self, input, name=None, params=None): if isinstance(input, list): outputs = [] shards = len(input) - for shard in xrange(shards): + for shard in range(shards): format = input[shard].format if type(format) == str: format = Format(format) if format == None: format = Format("text") @@ -513,7 +514,7 @@ def write(self, producer, output, sharding=None, name=None, params=None): # Create writer tasks for writing to output. writer_tasks = [] - for shard in xrange(fanout): + for shard in range(fanout): format = output[shard].format if type(format) == str: format = Format(format) if format == None: format = Format("text") @@ -592,7 +593,7 @@ def shuffle(self, input, shards=None): # Pipe outputs from sharder to sorters. sorters = [] - for i in xrange(shards): + for i in range(shards): sorter = self.task("sorter", shard=Shard(i, shards)) self.connect(pipes[i], sorter) sorters.append(sorter) @@ -740,7 +741,7 @@ def save_workflow_log(path): def run(wf): # In dryrun mode the workflow is just dumped without running it. if flags.arg.dryrun: - print wf.dump() + print(wf.dump()) return # Start workflow. diff --git a/run.sh b/run.sh index 37556be1..c2f35f6e 100755 --- a/run.sh +++ b/run.sh @@ -1,4 +1,4 @@ #!/bin/sh -python python/run.py $* +python3 python/run.py $* diff --git a/setup.sh b/setup.sh new file mode 100755 index 00000000..b83c089c --- /dev/null +++ b/setup.sh @@ -0,0 +1,68 @@ +#!/bin/bash + +# Exit on errors. +set -e + +echo "=========================================================================" +echo "Set up SLING development environment" +echo "=========================================================================" + +# Install packages. +echo +echo "=== Install SLING dependencies" +PYVER=3.5 +PYPKGS="python${PYVER} python${PYVER}-dev python3-pip" +PKGS="pkg-config zip g++ zlib1g-dev unzip ${PYPKGS}" +sudo apt-get install ${PKGS} + +# Install bazel. +BAZELVER=0.13.0 +BAZELSH=bazel-${BAZELVER}-installer-linux-x86_64.sh +BAZELREPO=https://github.com/bazelbuild/bazel +BAZELURL=${BAZELREPO}/releases/download/${BAZELVER}/${BAZELSH} +if ! which bazel > /dev/null; then + echo + echo "=== Install Bazel build system" + wget -P ${BAZELURL} + chmod +x /tmp/${BAZELSH} + sudo /tmp/${BAZELSH} + rm /tmp/${BAZELSH} +fi + +# Build SLING. +echo +echo "=== Build SLING" +tools/buildall.sh + +# Install SLING Python API. +echo +echo "=== Set up SLING Python API" +SLINGPKG=/usr/lib/python3/dist-packages/sling + +PIP="sudo -H pip3 --disable-pip-version-check" + +if [[ -L "/usr/lib/python2.7/dist-packages/sling" ]]; then + echo "Removing deprecated SLING Python 2.7 package" + sudo rm /usr/lib/python2.7/dist-packages/sling +fi +if [[ -L "/usr/local/lib/python2.7/dist-packages/sling" ]]; then + echo "Removing deprecated SLING Python 2.7 local package" + sudo rm /usr/local/lib/python2.7/dist-packages/sling +fi + +if [[ $(${PIP} freeze | grep "sling==") ]]; then + echo "Removing existing SLING pip package" + ${PIP} uninstall sling +fi + +if [[ -x "${SLINGPKG}" ]]; then + echo "SLING Python package already installed" +else + echo "Adding link for SLING Python package" + sudo ln -s $(realpath python) ${SLINGPKG} +fi + +# Done. +echo +echo "=== SLING is now set up." + diff --git a/sling/pyapi/BUILD b/sling/pyapi/BUILD index 2e983b15..2d2d7759 100644 --- a/sling/pyapi/BUILD +++ b/sling/pyapi/BUILD @@ -59,13 +59,12 @@ cc_library( "//sling/task:process", "//sling/stream:file", "//sling/stream:memory", - "//sling/stream:unix-file", "//sling/string:text", ], copts = [ - "-Wno-pmf-conversions", "-Wno-write-strings", "-Wno-invalid-offsetof", + "-DPYVER=35" ], ) diff --git a/sling/pyapi/pyapi.cc b/sling/pyapi/pyapi.cc index 6d65e337..f9c25e37 100644 --- a/sling/pyapi/pyapi.cc +++ b/sling/pyapi/pyapi.cc @@ -12,14 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. -#ifdef SLING_GOOGLE3 -#include -#else -#include -#endif - #include "sling/base/init.h" + #include "sling/pyapi/pyarray.h" +#include "sling/pyapi/pybase.h" #include "sling/pyapi/pydate.h" #include "sling/pyapi/pyframe.h" #include "sling/pyapi/pymyelin.h" @@ -50,8 +46,20 @@ static PyMethodDef py_funcs[] = { {nullptr, nullptr, 0, nullptr} }; -static void RegisterPythonModule() { - PyObject *module = Py_InitModule3("pysling", py_funcs, "SLING"); +static struct PyModuleDef py_module = { + PyModuleDef_HEAD_INIT, + "pysling", + nullptr, + 0, + py_funcs, + nullptr, + nullptr, + nullptr, + nullptr +}; + +static PyObject *RegisterPythonModule() { + PyObject *module = PyModule_Create(&py_module); PyStore::Define(module); PySymbols::Define(module); @@ -89,14 +97,16 @@ static void RegisterPythonModule() { PyResource::Define(module); PyTask::Define(module); #endif + + return module; } } // namespace sling -extern "C" void initpysling() { +PyMODINIT_FUNC PyInit_pysling() { #ifndef SLING_GOOGLE3 sling::InitSharedLibrary(); #endif - sling::RegisterPythonModule(); + return sling::RegisterPythonModule(); } diff --git a/sling/pyapi/pyarray.cc b/sling/pyapi/pyarray.cc index 05dd7507..623190cb 100644 --- a/sling/pyapi/pyarray.cc +++ b/sling/pyapi/pyarray.cc @@ -43,6 +43,7 @@ void PyArray::Define(PyObject *module) { sequence.sq_ass_item = method_cast(&PyArray::SetItem); sequence.sq_contains = method_cast(&PyArray::Contains); + methods.Add("get", &PyArray::Get); methods.Add("store", &PyArray::GetStore); methods.Add("data", &PyArray::Data); type.tp_methods = methods.table(); @@ -89,15 +90,33 @@ PyObject *PyArray::GetItem(Py_ssize_t index) { return pystore->PyValue(arr->get(pos(index))); } +PyObject *PyArray::Get(PyObject *args, PyObject *kw) { + static const char *kwlist[] = {"index", "binary", nullptr}; + int index = 0; + bool binary = false; + if (!PyArg_ParseTupleAndKeywords(args, kw, "i|b", + const_cast(kwlist), &index, &binary)) return nullptr; + + // Check array bounds. + ArrayDatum *arr = array(); + if (index < 0) index = length() + index; + if (index < 0 || index >= length()) { + PyErr_SetString(PyExc_IndexError, "Array index out of bounds"); + return nullptr; + } + + // Return array element. + return pystore->PyValue(arr->get(pos(index)), binary); +} + PyObject *PyArray::GetItems(PyObject *key) { - if (PyInt_Check(key)) { + if (PyLong_Check(key)) { // Simple integer index. - return GetItem(PyInt_AS_LONG(key)); + return GetItem(PyLong_AS_LONG(key)); } else if (PySlice_Check(key)) { // Get index slice. - PySliceObject *pyslice = reinterpret_cast(key); Slice *subset = new Slice(); - if (subset->Init(pyslice, length()) == -1) { + if (subset->Init(key, length()) == -1) { delete subset; return nullptr; } @@ -215,7 +234,7 @@ PyObject *PyArray::Str() { StringPrinter printer(pystore->store); printer.Print(h); const string &text = printer.text(); - return PyString_FromStringAndSize(text.data(), text.size()); + return PyUnicode_FromStringAndSize(text.data(), text.size()); } PyObject *PyArray::Data(PyObject *args, PyObject *kw) { @@ -231,13 +250,13 @@ PyObject *PyArray::Data(PyObject *args, PyObject *kw) { flags.InitEncoder(encoder.encoder()); encoder.Encode(h); const string &buffer = encoder.buffer(); - return PyString_FromStringAndSize(buffer.data(), buffer.size()); + return PyUnicode_FromStringAndSize(buffer.data(), buffer.size()); } else { StringPrinter printer(pystore->store); flags.InitPrinter(printer.printer()); printer.Print(h); const string &text = printer.text(); - return PyString_FromStringAndSize(text.data(), text.size()); + return PyUnicode_FromStringAndSize(text.data(), text.size()); } } diff --git a/sling/pyapi/pyarray.h b/sling/pyapi/pyarray.h index 1d1d534a..161929e3 100644 --- a/sling/pyapi/pyarray.h +++ b/sling/pyapi/pyarray.h @@ -26,7 +26,7 @@ struct PyArray : public PyBase, public Root { // A slice represents a subset of the elements in an array. struct Slice { // Initialize slice from Python slice object. - int Init(PySliceObject *slice, Py_ssize_t size) { + int Init(PyObject *slice, Py_ssize_t size) { int rc = PySlice_GetIndicesEx(slice, size, &start, &stop, &step, &length); stop = start + step * length; return rc; @@ -66,6 +66,9 @@ struct PyArray : public PyBase, public Root { // Get element from array. PyObject *GetItem(Py_ssize_t index); + // Get element from array with options. + PyObject *Get(PyObject *args, PyObject *kw); + // Get elements from array. PyObject *GetItems(PyObject *key); diff --git a/sling/pyapi/pybase.h b/sling/pyapi/pybase.h index 2a76cb3d..e242d8ca 100644 --- a/sling/pyapi/pybase.h +++ b/sling/pyapi/pybase.h @@ -18,10 +18,20 @@ #ifdef SLING_GOOGLE3 #include #include +#elif PYVER==35 +#include +#include +#elif PYVER==36 +#include +#include +#elif PYVER==37 +#include +#include #else -#include -#include +#include +#include #endif + #include #include "sling/string/text.h" @@ -112,7 +122,7 @@ struct PyBase : public PyVarObject { // Allocate string. static PyObject *AllocateString(Text text) { - return PyString_FromStringAndSize(text.data(), text.size()); + return PyUnicode_FromStringAndSize(text.data(), text.size()); } // Type checking. diff --git a/sling/pyapi/pydate.cc b/sling/pyapi/pydate.cc index 54aa8549..809fc154 100644 --- a/sling/pyapi/pydate.cc +++ b/sling/pyapi/pydate.cc @@ -79,15 +79,14 @@ int PyDate::Init(PyObject *args, PyObject *kwds) { PyFrame *frame = reinterpret_cast(time); Store *store = frame->pystore->store; date.Init(Object(store, store->Resolve(frame->handle()))); - } else if (PyString_Check(time)) { + } else if (PyUnicode_Check(time)) { // Parse date from string. - char *data; Py_ssize_t length; - PyString_AsStringAndSize(time, &data, &length);\ + const char *data = PyUnicode_AsUTF8AndSize(time, &length); date.ParseFromString(Text(data, length)); - } else if (PyInt_Check(time)) { + } else if (PyLong_Check(time)) { // Parse date from number. - date.ParseFromNumber(PyInt_AsLong(time)); + date.ParseFromNumber(PyLong_AsLong(time)); } else { PyErr_SetString(PyExc_ValueError, "Cannot create date from value"); return -1; @@ -107,7 +106,7 @@ PyObject *PyDate::Str() { PyObject *PyDate::Value() { int number = date.AsNumber(); - if (number != -1) return PyInt_FromLong(number); + if (number != -1) return PyLong_FromLong(number); return AllocateString(date.AsString()); } @@ -120,6 +119,7 @@ void PyCalendar::Define(PyObject *module) { type.tp_init = method_cast(&PyCalendar::Init); type.tp_dealloc = method_cast(&PyCalendar::Dealloc); + methods.AddO("str", &PyCalendar::Str); methods.AddO("day", &PyCalendar::Day); methods.AddO("month", &PyCalendar::Month); methods.AddO("year", &PyCalendar::Year); @@ -154,7 +154,6 @@ void PyCalendar::Dealloc() { PyObject *PyCalendar::Str(PyObject *obj) { PyDate *pydate = GetDate(obj); if (pydate == nullptr) return nullptr; - return AllocateString(calendar->DateAsString(pydate->date)); } diff --git a/sling/pyapi/pyframe.cc b/sling/pyapi/pyframe.cc index 58b944cd..42533c40 100644 --- a/sling/pyapi/pyframe.cc +++ b/sling/pyapi/pyframe.cc @@ -46,6 +46,7 @@ void PyFrame::Define(PyObject *module) { type.tp_as_sequence = &sequence; sequence.sq_contains = method_cast(&PyFrame::Contains); + methods.Add("get", &PyFrame::Get); methods.Add("data", &PyFrame::Data); methods.Add("append", &PyFrame::Append); methods.AddO("extend", &PyFrame::Extend); @@ -132,6 +133,25 @@ PyObject *PyFrame::Lookup(PyObject *key) { return pystore->PyValue(value); } +PyObject *PyFrame::Get(PyObject *args, PyObject *kw) { + static const char *kwlist[] = {"role", "binary", nullptr}; + PyObject *key = nullptr; + bool binary = false; + if (!PyArg_ParseTupleAndKeywords(args, kw, "O|b", + const_cast(kwlist), &key, &binary)) return nullptr; + + // Look up role. + Handle role = pystore->RoleValue(key, true); + if (role.IsError()) return nullptr; + + // Return None if the role name does not exist. + if (role.IsNil()) Py_RETURN_NONE; + + // Look up (first) value for role. + Handle value = frame()->get(role); + return pystore->PyValue(value, binary); +} + int PyFrame::Assign(PyObject *key, PyObject *v) { // Check that frame is writable. if (!Writable()) return -1; @@ -177,15 +197,15 @@ int PyFrame::Contains(PyObject *key) { } PyObject *PyFrame::GetAttr(PyObject *key) { - // Get attribute name. - char *name = PyString_AsString(key); - if (name == nullptr) return nullptr; - // Resolve methods. - PyObject *method = Py_FindMethod(methods.table(), AsObject(), name); + PyObject *method = PyObject_GenericGetAttr(AsObject(), key); if (method != nullptr) return method; PyErr_Clear(); + // Get attribute name. + const char *name = PyUnicode_AsUTF8(key); + if (name == nullptr) return nullptr; + // Lookup role. Handle role = pystore->store->LookupExisting(name); if (role.IsNil()) Py_RETURN_NONE; @@ -200,7 +220,7 @@ int PyFrame::SetAttr(PyObject *key, PyObject *v) { if (!Writable()) return -1; // Get role name. - char *name = PyString_AsString(key); + const char *name = PyUnicode_AsUTF8(key); if (name == nullptr) return -1; // Lookup role. @@ -298,13 +318,13 @@ PyObject *PyFrame::Str() { Handle id = f->get(Handle::id()); SymbolDatum *symbol = pystore->store->Deref(id)->AsSymbol(); StringDatum *name = pystore->store->GetString(symbol->name); - return PyString_FromStringAndSize(name->data(), name->size()); + return PyUnicode_FromStringAndSize(name->data(), name->size()); } else { // Return frame as text. StringPrinter printer(pystore->store); printer.Print(handle()); const string &text = printer.text(); - return PyString_FromStringAndSize(text.data(), text.size()); + return PyUnicode_FromStringAndSize(text.data(), text.size()); } } @@ -319,7 +339,7 @@ PyObject *PyFrame::Data(PyObject *args, PyObject *kw) { flags.InitEncoder(encoder.encoder()); encoder.Encode(handle()); const string &buffer = encoder.buffer(); - return PyString_FromStringAndSize(buffer.data(), buffer.size()); + return PyBytes_FromStringAndSize(buffer.data(), buffer.size()); } else if (flags.json) { string json; StringOutputStream stream(&json); @@ -331,13 +351,13 @@ PyObject *PyFrame::Data(PyObject *args, PyObject *kw) { writer.set_byref(flags.byref); writer.Write(handle()); output.Flush(); - return PyString_FromStringAndSize(json.data(), json.size()); + return PyUnicode_FromStringAndSize(json.data(), json.size()); } else { StringPrinter printer(pystore->store); flags.InitPrinter(printer.printer()); printer.Print(handle()); const string &text = printer.text(); - return PyString_FromStringAndSize(text.data(), text.size()); + return PyUnicode_FromStringAndSize(text.data(), text.size()); } } diff --git a/sling/pyapi/pyframe.h b/sling/pyapi/pyframe.h index bb94f11d..00e1620b 100644 --- a/sling/pyapi/pyframe.h +++ b/sling/pyapi/pyframe.h @@ -35,6 +35,9 @@ struct PyFrame : public PyBase, public Root { // Look up role value for frame. PyObject *Lookup(PyObject *key); + // Get role value for frame with options. + PyObject *Get(PyObject *args, PyObject *kw); + // Assign value to slot. int Assign(PyObject *key, PyObject *v); diff --git a/sling/pyapi/pymisc.cc b/sling/pyapi/pymisc.cc index 2a6acea3..b2a9a2c7 100644 --- a/sling/pyapi/pymisc.cc +++ b/sling/pyapi/pymisc.cc @@ -36,8 +36,8 @@ PyObject *PyGetFlags() { Flag *flag = flags[i]; // Get name and help string. - PyObject *name = PyString_FromString(flag->name); - PyObject *help = PyString_FromString(flag->help); + PyObject *name = PyUnicode_FromString(flag->name); + PyObject *help = PyUnicode_FromString(flag->help); // Get default flag value. PyObject *defval = nullptr; @@ -46,22 +46,22 @@ PyObject *PyGetFlags() { defval = PyBool_FromLong(flag->value()); break; case Flag::INT32: - defval = PyInt_FromLong(flag->value()); + defval = PyLong_FromLong(flag->value()); break; case Flag::UINT32: - defval = PyInt_FromLong(flag->value()); + defval = PyLong_FromLong(flag->value()); break; case Flag::INT64: - defval = PyLong_FromLong(flag->value()); + defval = PyLong_FromLongLong(flag->value()); break; case Flag::UINT64: - defval = PyLong_FromUnsignedLong(flag->value()); + defval = PyLong_FromUnsignedLongLong(flag->value()); break; case Flag::DOUBLE: defval = PyFloat_FromDouble(flag->value()); break; case Flag::STRING: - defval = PyString_FromStringAndSize( + defval = PyUnicode_FromStringAndSize( flag->value().data(), flag->value().size()); break; } @@ -92,10 +92,10 @@ PyObject *PySetFlag(PyObject *self, PyObject *args) { flag->value() = (value == Py_True); break; case Flag::INT32: - flag->value() = PyInt_AsLong(value); + flag->value() = PyLong_AsLong(value); break; case Flag::UINT32: - flag->value() = PyInt_AsUnsignedLongMask(value); + flag->value() = PyLong_AsLong(value); break; case Flag::INT64: flag->value() = PyLong_AsLongLong(value); @@ -107,7 +107,7 @@ PyObject *PySetFlag(PyObject *self, PyObject *args) { flag->value() = PyFloat_AsDouble(value); break; case Flag::STRING: - flag->value() = PyString_AsString(value); + flag->value() = PyUnicode_AsUTF8(value); break; } diff --git a/sling/pyapi/pymyelin.cc b/sling/pyapi/pymyelin.cc index 6e86d1ae..f3518d96 100644 --- a/sling/pyapi/pymyelin.cc +++ b/sling/pyapi/pymyelin.cc @@ -62,25 +62,25 @@ static int AssignElement(char *ptr, Type type, PyObject *value) { break; } case DT_INT32: { - int v = PyInt_AsLong(value); + int v = PyLong_AsLong(value); if (v == -1 && PyErr_Occurred()) return -1; *reinterpret_cast(ptr) = v; break; } case DT_UINT8: { - int v = PyInt_AsLong(value); + int v = PyLong_AsUnsignedLong(value); if (v == -1 && PyErr_Occurred()) return -1; *reinterpret_cast(ptr) = v; break; } case DT_INT16: { - int v = PyInt_AsLong(value); + int v = PyLong_AsLong(value); if (v == -1 && PyErr_Occurred()) return -1; *reinterpret_cast(ptr) = v; break; } case DT_INT8: { - int v = PyInt_AsLong(value); + int v = PyLong_AsLong(value); if (v == -1 && PyErr_Occurred()) return -1; *reinterpret_cast(ptr) = v; break; @@ -113,13 +113,13 @@ static PyObject *RetrieveElement(char *ptr, Type type) { case DT_DOUBLE: return PyFloat_FromDouble(*reinterpret_cast(ptr)); case DT_INT32: - return PyInt_FromLong(*reinterpret_cast(ptr)); + return PyLong_FromLong(*reinterpret_cast(ptr)); case DT_UINT8: - return PyInt_FromLong(*reinterpret_cast(ptr)); + return PyLong_FromUnsignedLong(*reinterpret_cast(ptr)); case DT_INT16: - return PyInt_FromLong(*reinterpret_cast(ptr)); + return PyLong_FromLong(*reinterpret_cast(ptr)); case DT_INT8: - return PyInt_FromLong(*reinterpret_cast(ptr)); + return PyLong_FromLong(*reinterpret_cast(ptr)); case DT_INT64: return PyLong_FromLongLong(*reinterpret_cast(ptr)); case DT_BOOL: @@ -238,7 +238,7 @@ bool PyCompiler::ImportFlow(PyObject *pyflow, Flow *flow, PyBuffers *buffers) { PyObject *pyshape = PyAttr(pyvar, "shape"); Shape shape; for (int i = 0; i < PyList_Size(pyshape); ++i) { - int dim = PyInt_AsLong(PyList_GetItem(pyshape, i)); + int dim = PyLong_AsLong(PyList_GetItem(pyshape, i)); if (dim == -1) dim = 1; shape.add(dim); } @@ -361,9 +361,9 @@ bool PyCompiler::ImportAttributes(PyObject *obj, Attributes *attrs) { PyObject *pyname; PyObject *pyvalue; while (PyDict_Next(pyattrs, &pos, &pyname, &pyvalue)) { - const char *name = PyString_AsString(pyname); + const char *name = PyUnicode_AsUTF8(pyname); if (name == nullptr) return false; - const char *value = PyString_AsString(pyvalue); + const char *value = PyUnicode_AsUTF8(pyvalue); if (value == nullptr) return false; attrs->SetAttr(name, value); } @@ -373,7 +373,7 @@ bool PyCompiler::ImportAttributes(PyObject *obj, Attributes *attrs) { const char *PyCompiler::PyStrAttr(PyObject *obj, const char *name) { PyObject *attr = PyAttr(obj, name); - const char *str = attr == Py_None ? "" : PyString_AsString(attr); + const char *str = attr == Py_None ? "" : PyUnicode_AsUTF8(attr); CHECK(str != nullptr) << name; Py_DECREF(attr); return str; @@ -510,7 +510,7 @@ int PyNetwork::SetTensor(PyObject *key, PyObject *value) { PyObject *PyNetwork::LookupCell(PyObject *key) { // Get cell name. - const char *name = PyString_AsString(key); + const char *name = PyUnicode_AsUTF8(key); if (name == nullptr) return nullptr; // Look up cell in network. @@ -536,22 +536,22 @@ Tensor *PyNetwork::FindTensor(PyObject *key, const Cell *cell) { // parameter array of the network. Otherwise, Otherwise, the repr() method // is used for computing the name of the tensor. Tensor *tensor; - if (PyInt_Check(key)) { - int index = PyInt_AsLong(key); + if (PyLong_Check(key)) { + int index = PyLong_AsLong(key); auto ¶ms = net->parameters(); if (index < 0 || index >= params.size()) { PyErr_SetString(PyExc_IndexError, "Invalid parameter tensor index"); return nullptr; } tensor = params[index]; - } else if (PyString_Check(key)) { - const char *name = PyString_AsString(key); + } else if (PyUnicode_Check(key)) { + const char *name = PyUnicode_AsUTF8(key); if (name == nullptr) return nullptr; tensor = net->LookupParameter(name); } else { PyObject *repr = PyObject_Repr(key); if (repr == nullptr) return nullptr; - const char *name = PyString_AsString(repr); + const char *name = PyUnicode_AsUTF8(repr); if (name == nullptr) { Py_DECREF(repr); return nullptr; @@ -641,7 +641,7 @@ PyObject *PyCell::Index(PyObject *key) { break; } } - return PyInt_FromLong(index); + return PyLong_FromLong(index); } int PyCell::Contains(PyObject *key) { @@ -868,7 +868,7 @@ Py_ssize_t PyChannel::Size() { PyObject *PyChannel::Lookup(PyObject *key) { // Get index. - int index = PyInt_AsLong(key); + int index = PyLong_AsLong(key); if (index == -1 && PyErr_Occurred()) return nullptr; if (index < 0 || index >= channel->size()) { PyErr_SetString(PyExc_IndexError, "Invalid channel element index"); @@ -910,7 +910,6 @@ void PyTensor::Define(PyObject *module) { mapping.mp_ass_subscript = method_cast(&PyTensor::SetElement); type.tp_as_buffer = &buffer; - type.tp_flags |= Py_TPFLAGS_HAVE_NEWBUFFER; buffer.bf_getbuffer = method_cast(&PyTensor::GetBuffer); buffer.bf_releasebuffer = @@ -947,7 +946,7 @@ PyObject *PyTensor::Name() { } PyObject *PyTensor::Rank() { - return PyInt_FromLong(format->rank()); + return PyLong_FromLong(format->rank()); } Py_ssize_t PyTensor::Size() { @@ -957,7 +956,7 @@ Py_ssize_t PyTensor::Size() { PyObject *PyTensor::Shape() { PyObject *dims = PyList_New(format->rank()); for (int d = 0; d < format->rank(); ++d) { - PyList_SetItem(dims, d, PyInt_FromLong(format->dim(d))); + PyList_SetItem(dims, d, PyLong_FromLong(format->dim(d))); } return dims; } @@ -1001,7 +1000,7 @@ char *PyTensor::GetAddress(PyObject *index) { return data; } else if (rank == 1) { // Get single-dimensional index. - int idx = PyInt_AsLong(index); + int idx = PyLong_AsLong(index); if (idx == -1 && PyErr_Occurred()) return nullptr; if (idx < 0) idx += format->dim(0); if (idx < 0 || idx >= format->dim(0)) { @@ -1018,7 +1017,7 @@ char *PyTensor::GetAddress(PyObject *index) { } size_t ofs = 0; for (int d = 0; d < rank; ++d) { - int idx = PyInt_AsLong(PyTuple_GetItem(index, d)); + int idx = PyLong_AsLong(PyTuple_GetItem(index, d)); if (idx == -1 && PyErr_Occurred()) return nullptr; if (idx < 0) idx += format->dim(d); if (idx < 0 || idx >= format->dim(d)) { @@ -1030,7 +1029,7 @@ char *PyTensor::GetAddress(PyObject *index) { return data + ofs; } else { // Linear indexing into multi-dimensional tensor. - int idx = PyInt_AsLong(index); + int idx = PyLong_AsLong(index); if (idx == -1 && PyErr_Occurred()) return nullptr; if (idx < 0 || idx >= format->elements()) { PyErr_SetString(PyExc_IndexError, "Invalid tensor index"); @@ -1126,12 +1125,12 @@ char *PyBuffers::GetData(PyObject *obj, Type type, size_t *size) { return static_cast(view->buf); } - // Try to get buffer from string. - if (PyString_Check(obj)) { - // Get string buffer. + // Try to get buffer from bytes. + if (PyBytes_Check(obj)) { + // Get byte buffer. char *data; Py_ssize_t length; - if (PyString_AsStringAndSize(obj, &data, &length) == -1) return nullptr; + if (PyBytes_AsStringAndSize(obj, &data, &length) == -1) return nullptr; Py_INCREF(obj); refs_.push_back(obj); *size = length; @@ -1142,7 +1141,7 @@ char *PyBuffers::GetData(PyObject *obj, Type type, size_t *size) { if (type == DT_INVALID) { if (PyFloat_Check(obj)) { type = DT_FLOAT; - } else if (PyInt_Check(obj)) { + } else if (PyLong_Check(obj)) { type = DT_INT32; } } @@ -1160,22 +1159,22 @@ char *PyBuffers::GetData(PyObject *obj, Type type, size_t *size) { return flow_->AllocateMemory(&v, sizeof(double)); } case DT_INT32: { - int v = PyInt_AsLong(obj); + int v = PyLong_AsLong(obj); *size = sizeof(int); return flow_->AllocateMemory(&v, sizeof(int)); } case DT_INT64: { - int64 v = PyLong_AsLong(obj); + int64 v = PyLong_AsLongLong(obj); *size = sizeof(int64); return flow_->AllocateMemory(&v, sizeof(int64)); } case DT_INT16: { - int16 v = PyInt_AsLong(obj); + int16 v = PyLong_AsLong(obj); *size = sizeof(int16); return flow_->AllocateMemory(&v, sizeof(int16)); } case DT_INT8: { - int8 v = PyInt_AsLong(obj); + int8 v = PyLong_AsLong(obj); *size = sizeof(int8); return flow_->AllocateMemory(&v, sizeof(int8)); } diff --git a/sling/pyapi/pyparser.cc b/sling/pyapi/pyparser.cc index 1fcc417e..44967cd1 100644 --- a/sling/pyapi/pyparser.cc +++ b/sling/pyapi/pyparser.cc @@ -174,7 +174,7 @@ PyObject *PyToLex(PyObject *self, PyObject *args) { string lex = nlp::ToLex(document); // Return LEX representation. - return PyString_FromStringAndSize(lex.data(), lex.size()); + return PyUnicode_FromStringAndSize(lex.data(), lex.size()); } PyObject *PyEvaluateFrames(PyObject *self, PyObject *args) { diff --git a/sling/pyapi/pyphrase.cc b/sling/pyapi/pyphrase.cc index d15b240d..5455e2f3 100644 --- a/sling/pyapi/pyphrase.cc +++ b/sling/pyapi/pyphrase.cc @@ -57,7 +57,7 @@ void PyPhraseMatch::Dealloc() { } PyObject *PyPhraseMatch::Id() { - return PyString_FromStringAndSize(info.id.data(), info.id.size()); + return PyUnicode_FromStringAndSize(info.id.data(), info.id.size()); } PyObject *PyPhraseMatch::Item() { @@ -66,11 +66,11 @@ PyObject *PyPhraseMatch::Item() { } PyObject *PyPhraseMatch::Form() { - return PyInt_FromLong(info.form); + return PyLong_FromLong(info.form); } PyObject *PyPhraseMatch::Count() { - return PyInt_FromLong(info.count); + return PyLong_FromLong(info.count); } PyObject *PyPhraseMatch::Reliable() { @@ -120,7 +120,7 @@ void PyPhraseTable::Dealloc() { PyObject *PyPhraseTable::Lookup(PyObject *obj) { // Get phrase. - char *phrase = PyString_AsString(obj); + const char *phrase = PyUnicode_AsUTF8(obj); if (phrase == nullptr) return nullptr; // Compute phrase fingerprint. @@ -141,7 +141,7 @@ PyObject *PyPhraseTable::Lookup(PyObject *obj) { PyObject *PyPhraseTable::Query(PyObject *obj) { // Get phrase. - char *phrase = PyString_AsString(obj); + const char *phrase = PyUnicode_AsUTF8(obj); if (phrase == nullptr) return nullptr; // Compute phrase fingerprint. @@ -164,7 +164,7 @@ PyObject *PyPhraseTable::Query(PyObject *obj) { PyObject *PyPhraseTable::Fingerprint(PyObject *obj) { // Get phrase. - char *phrase = PyString_AsString(obj); + const char *phrase = PyUnicode_AsUTF8(obj); if (phrase == nullptr) return nullptr; // Compute phrase fingerprint. @@ -176,7 +176,7 @@ PyObject *PyPhraseTable::Fingerprint(PyObject *obj) { PyObject *PyPhraseTable::Form(PyObject *obj) { // Get phrase. - char *phrase = PyString_AsString(obj); + const char *phrase = PyUnicode_AsUTF8(obj); if (phrase == nullptr) return nullptr; // Determine case form. @@ -185,7 +185,7 @@ PyObject *PyPhraseTable::Form(PyObject *obj) { tokenizer->FingerprintAndForm(phrase, &fp, &form); // Return case form. - return PyInt_FromLong(form); + return PyLong_FromLong(form); } } // namespace sling diff --git a/sling/pyapi/pyrecordio.cc b/sling/pyapi/pyrecordio.cc index 75cc9332..eb3822a8 100644 --- a/sling/pyapi/pyrecordio.cc +++ b/sling/pyapi/pyrecordio.cc @@ -94,10 +94,10 @@ PyObject *PyRecordReader::Read() { PyObject *k = Py_None; PyObject *v = Py_None; if (!record.key.empty()) { - k = PyString_FromStringAndSize(record.key.data(), record.key.size()); + k = PyBytes_FromStringAndSize(record.key.data(), record.key.size()); } if (!record.value.empty()) { - v = PyString_FromStringAndSize(record.value.data(), record.value.size()); + v = PyBytes_FromStringAndSize(record.value.data(), record.value.size()); } PyObject *pair = PyTuple_Pack(2, k, v); if (k != Py_None) Py_DECREF(k); @@ -196,14 +196,19 @@ PyObject *PyRecordDatabase::Close() { PyObject *PyRecordDatabase::Lookup(PyObject *obj) { // Get key. - char *key = PyString_AsString(obj); + const char *key; + if (PyUnicode_Check(obj)) { + key = PyUnicode_AsUTF8(obj); + } else { + key = PyBytes_AsString(obj); + } if (key == nullptr) return nullptr; // Look up record. CHECK(db != nullptr); Record record; if (!db->Lookup(key, &record)) Py_RETURN_NONE; - return PyString_FromStringAndSize(record.value.data(), record.value.size()); + return PyBytes_FromStringAndSize(record.value.data(), record.value.size()); } PyObject *PyRecordDatabase::Next() { @@ -218,10 +223,10 @@ PyObject *PyRecordDatabase::Next() { PyObject *k = Py_None; PyObject *v = Py_None; if (!record.key.empty()) { - k = PyString_FromStringAndSize(record.key.data(), record.key.size()); + k = PyBytes_FromStringAndSize(record.key.data(), record.key.size()); } if (!record.value.empty()) { - v = PyString_FromStringAndSize(record.value.data(), record.value.size()); + v = PyBytes_FromStringAndSize(record.value.data(), record.value.size()); } PyObject *pair = PyTuple_Pack(2, k, v); if (k != Py_None) Py_DECREF(k); @@ -290,16 +295,30 @@ PyObject *PyRecordWriter::Write(PyObject *args) { Slice key; Slice value; if (pykey != Py_None) { - char *data; - Py_ssize_t length; - if (PyString_AsStringAndSize(pykey, &data, &length)) return nullptr; - key = Slice(data, length); + if (PyUnicode_Check(pykey)) { + Py_ssize_t length; + const char *data = PyUnicode_AsUTF8AndSize(pykey, &length); + if (data == nullptr) return nullptr; + key = Slice(data, length); + } else { + char *data; + Py_ssize_t length; + if (PyBytes_AsStringAndSize(pykey, &data, &length)) return nullptr; + key = Slice(data, length); + } } if (pyvalue != Py_None) { - char *data; - Py_ssize_t length; - if (PyString_AsStringAndSize(pyvalue, &data, &length)) return nullptr; - value = Slice(data, length); + if (PyUnicode_Check(pyvalue)) { + Py_ssize_t length; + const char *data = PyUnicode_AsUTF8AndSize(pyvalue, &length); + if (data == nullptr) return nullptr; + value = Slice(data, length); + } else { + char *data; + Py_ssize_t length; + if (PyBytes_AsStringAndSize(pyvalue, &data, &length)) return nullptr; + value = Slice(data, length); + } } // Write record. diff --git a/sling/pyapi/pystore.cc b/sling/pyapi/pystore.cc index 934dd466..c4d0ad1b 100644 --- a/sling/pyapi/pystore.cc +++ b/sling/pyapi/pystore.cc @@ -19,7 +19,6 @@ #include "sling/pyapi/pyarray.h" #include "sling/pyapi/pyframe.h" #include "sling/stream/file.h" -#include "sling/stream/unix-file.h" namespace sling { @@ -110,22 +109,38 @@ PyObject *PyStore::Freeze() { PyObject *PyStore::Load(PyObject *args, PyObject *kw) { // Parse arguments. - static const char *kwlist[] = {"file", "binary", "snapshot", nullptr}; - PyObject *file = nullptr; + static const char *kwlist[] = {"filename", "binary", "snapshot", nullptr}; + char *filename = nullptr; bool force_binary = false; bool snapshot = true; bool ok = PyArg_ParseTupleAndKeywords( - args, kw, "O|bb", const_cast(kwlist), - &file, &force_binary, &snapshot); + args, kw, "s|bb", const_cast(kwlist), + &filename, &force_binary, &snapshot); if (!ok) return nullptr; // Check that store is writable. if (!Writable()) return nullptr; // Read frames from file. - if (PyFile_Check(file)) { - // Load store from file object. - StdFileInputStream stream(PyFile_AsFile(file), false); + if (snapshot && store->Pristine() && Snapshot::Valid(filename)) { + // Load store from snapshot. + Status st = Snapshot::Read(store, filename); + if (!st.ok()) { + PyErr_SetString(PyExc_IOError, st.message()); + return nullptr; + } + Py_RETURN_NONE; + } else { + // Load store store from file. First, open input file. + File *f; + Status st = File::Open(filename, "r", &f); + if (!st.ok()) { + PyErr_SetString(PyExc_IOError, st.message()); + return nullptr; + } + + // Load frames from file. + FileInputStream stream(f); InputParser parser(store, &stream, force_binary); store->LockGC(); Object result = parser.ReadAll(); @@ -135,69 +150,26 @@ PyObject *PyStore::Load(PyObject *args, PyObject *kw) { return nullptr; } return PyValue(result.handle()); - } else if (PyString_Check(file)) { - char *filename = PyString_AsString(file); - if (snapshot && store->Pristine() && Snapshot::Valid(filename)) { - // Load store from snapshot. - Status st = Snapshot::Read(store, filename); - if (!st.ok()) { - PyErr_SetString(PyExc_IOError, st.message()); - return nullptr; - } - Py_RETURN_NONE; - } else { - // Load store store from file. First, open input file. - File *f; - Status st = File::Open(filename, "r", &f); - if (!st.ok()) { - PyErr_SetString(PyExc_IOError, st.message()); - return nullptr; - } - - // Load frames from file. - FileInputStream stream(f); - InputParser parser(store, &stream, force_binary); - store->LockGC(); - Object result = parser.ReadAll(); - store->UnlockGC(); - if (parser.error()) { - PyErr_SetString(PyExc_IOError, parser.error_message().c_str()); - return nullptr; - } - return PyValue(result.handle()); - } - } else { - PyErr_SetString(PyExc_ValueError, "File or string argument expected"); - return nullptr; } } PyObject *PyStore::Save(PyObject *args, PyObject *kw) { // Get arguments. SerializationFlags flags(store); - PyObject *file = flags.ParseArgs(args, kw); + char *filename = flags.ParseArgs(args, kw); + if (filename == nullptr) return nullptr; // Get output stream. - OutputStream *stream; - if (PyFile_Check(file)) { - // Create stream from stdio file. - stream = new StdFileOutputStream(PyFile_AsFile(file), false); - } else if (PyString_Check(file)) { - // Open output file. - File *f; - Status st = File::Open(PyString_AsString(file), "w", &f); - if (!st.ok()) { - PyErr_SetString(PyExc_IOError, st.message()); - return nullptr; - } - stream = new FileOutputStream(f); - } else { - PyErr_SetString(PyExc_ValueError, "File or string argument expected"); + File *f; + Status st = File::Open(filename, "w", &f); + if (!st.ok()) { + PyErr_SetString(PyExc_IOError, st.message()); return nullptr; } // Write frames to output. - Output output(stream); + FileOutputStream stream(f); + Output output(&stream); if (flags.binary) { Encoder encoder(store, &output); flags.InitEncoder(&encoder); @@ -209,7 +181,6 @@ PyObject *PyStore::Save(PyObject *args, PyObject *kw) { } output.Flush(); - delete stream; Py_RETURN_NONE; } @@ -221,7 +192,7 @@ PyObject *PyStore::Parse(PyObject *args, PyObject *kw) { bool json = false; bool xml = false; bool ok = PyArg_ParseTupleAndKeywords( - args, kw, "S|bbb", const_cast(kwlist), + args, kw, "O|bbb", const_cast(kwlist), &object, &force_binary, &json, &xml); if (!ok) return nullptr; @@ -229,9 +200,16 @@ PyObject *PyStore::Parse(PyObject *args, PyObject *kw) { if (!Writable()) return nullptr; // Get data buffer. - char *data; + const char *data; Py_ssize_t length; - PyString_AsStringAndSize(object, &data, &length); + if (PyUnicode_Check(object)) { + data = PyUnicode_AsUTF8AndSize(object, &length); + if (data == nullptr) return nullptr; + } else { + char *ptr; + if (PyBytes_AsStringAndSize(object, &ptr, &length) == -1) return nullptr; + data = ptr; + } ArrayInputStream stream(data, length); if (xml) { @@ -262,7 +240,7 @@ Py_ssize_t PyStore::Size() { PyObject *PyStore::Lookup(PyObject *key) { // Get symbol name. - char *name = PyString_AsString(key); + const char *name = PyUnicode_AsUTF8(key); if (name == nullptr) return nullptr; // Lookup name in symbol table. @@ -270,18 +248,20 @@ PyObject *PyStore::Lookup(PyObject *key) { return PyValue(handle); } -PyObject *PyStore::Resolve(PyObject *handle) { - if (PyObject_TypeCheck(handle, &PyFrame::type)) { - PyFrame *pyhandle = reinterpret_cast(handle); - return PyValue(store->Resolve(pyhandle->handle())); +PyObject *PyStore::Resolve(PyObject *object) { + if (PyObject_TypeCheck(object, &PyFrame::type)) { + PyFrame *pyframe = reinterpret_cast(object); + Handle handle = pyframe->handle(); + Handle qua = store->Resolve(handle); + if (qua != handle) return PyValue(qua); } - Py_INCREF(handle); - return handle; + Py_INCREF(object); + return object; } int PyStore::Contains(PyObject *key) { // Get symbol name. - char *name = PyString_AsString(key); + const char *name = PyUnicode_AsUTF8(key); if (name == nullptr) return -1; // Lookup name in symbol table. @@ -303,7 +283,7 @@ PyObject *PyStore::NewFrame(PyObject *arg) { std::vector slots; // If the argument is a string, create a frame with that id. - if (PyString_Check(arg)) { + if (PyUnicode_Check(arg)) { slots.emplace_back(Handle::id(), SymbolValue(arg)); } else { // Parse data into slot list. @@ -339,7 +319,7 @@ PyObject *PyStore::NewArray(PyObject *arg) { *array->at(i) = value; } } else { - int size = PyInt_AsLong(arg); + int size = PyLong_AsLong(arg); if (size < 0) return nullptr; handle = store->AllocateArray(size); } @@ -376,7 +356,7 @@ PyObject *PyStore::UnlockGC() { Py_RETURN_NONE; } -PyObject *PyStore::PyValue(Handle handle) { +PyObject *PyStore::PyValue(Handle handle, bool binary) { switch (handle.tag()) { case Handle::kGlobal: case Handle::kLocal: { @@ -386,15 +366,28 @@ PyObject *PyStore::PyValue(Handle handle) { // Get datum for object. Datum *datum = store->Deref(handle); + // Convert SLING object to Python object. if (datum->IsFrame()) { // Return new frame wrapper for handle. PyFrame *frame = PyObject_New(PyFrame, &PyFrame::type); frame->Init(this, handle); return frame->AsObject(); } else if (datum->IsString()) { - // Return string object. StringDatum *str = datum->AsString(); - return PyString_FromStringAndSize(str->data(), str->size()); + PyObject *pystr; + if (binary) { + // Return string as bytes. + pystr = PyBytes_FromStringAndSize(str->data(), str->size()); + } else { + // Return unicode string object. + pystr = PyUnicode_FromStringAndSize(str->data(), str->size()); + if (pystr == nullptr) { + // Fall back to bytes if string is not valid UTF8. + PyErr_Clear(); + pystr = PyBytes_FromStringAndSize(str->data(), str->size()); + } + } + return pystr; } else if (datum->IsArray()) { // Return new frame array for handle. PyArray *array = PyObject_New(PyArray, &PyArray::type); @@ -404,7 +397,7 @@ PyObject *PyStore::PyValue(Handle handle) { // Return symbol name. SymbolDatum *symbol = datum->AsSymbol(); StringDatum *str = store->Deref(symbol->name)->AsString(); - return PyString_FromStringAndSize(str->data(), str->size()); + return PyUnicode_FromStringAndSize(str->data(), str->size()); } else { // Unsupported type. PyErr_SetString(PyExc_ValueError, "Unsupported object type"); @@ -414,7 +407,7 @@ PyObject *PyStore::PyValue(Handle handle) { case Handle::kIntTag: // Return integer object. - return PyInt_FromLong(handle.AsInt()); + return PyLong_FromLong(handle.AsInt()); case Handle::kFloatTag: // Return floating point number object. @@ -436,27 +429,22 @@ Handle PyStore::Value(PyObject *object) { return Handle::error(); } return frame->handle(); - } else if (PyString_Check(object)) { + } else if (PyUnicode_Check(object)) { // Create string and return handle. if (!Writable()) return Handle::error(); - char *data; Py_ssize_t length; - PyString_AsStringAndSize(object, &data, &length); + const char *data = PyUnicode_AsUTF8AndSize(object, &length); return store->AllocateString(Text(data, length)); - } else if (PyUnicode_Check(object)) { - // Create string from Unicode and return handle. + } else if (PyBytes_Check(object)) { + // Create string from bytes and return handle. if (!Writable()) return Handle::error(); - PyObject *str = PyUnicode_AsUTF8String(object); - if (str == nullptr) return Handle::error(); char *data; Py_ssize_t length; - PyString_AsStringAndSize(str, &data, &length); - Handle h = store->AllocateString(Text(data, length)); - Py_DECREF(str); - return h; - } else if (PyInt_Check(object)) { + PyBytes_AsStringAndSize(object, &data, &length); + return store->AllocateString(Text(data, length)); + } else if (PyLong_Check(object)) { // Return integer handle. - return Handle::Integer(PyInt_AsLong(object)); + return Handle::Integer(PyLong_AsLong(object)); } else if (PyFloat_Check(object)) { // Return floating point number handle. return Handle::Float(PyFloat_AsDouble(object)); @@ -485,7 +473,7 @@ Handle PyStore::Value(PyObject *object) { // Get slot value. Handle value; - if (name.IsId() && PyString_Check(v)) { + if (name.IsId() && PyUnicode_Check(v)) { value = SymbolValue(v); } else { value = Value(v); @@ -521,45 +509,24 @@ Handle PyStore::Value(PyObject *object) { } Handle PyStore::RoleValue(PyObject *object, bool existing) { - if (PyString_Check(object)) { - char *name = PyString_AsString(object); + if (PyUnicode_Check(object)) { + const char *name = PyUnicode_AsUTF8(object); if (name == nullptr) return Handle::error(); if (existing) { return store->LookupExisting(name); } else { return store->Lookup(name); } - } else if (PyUnicode_Check(object)) { - Handle h; - PyObject *str = PyUnicode_AsUTF8String(object); - if (str == nullptr) return Handle::error(); - char *name = PyString_AsString(str); - if (name == nullptr) { - h = Handle::error(); - } else if (existing) { - h = store->LookupExisting(name); - } else { - h = store->Lookup(name); - } - Py_DECREF(str); - return h; } else { return Value(object); } } Handle PyStore::SymbolValue(PyObject *object) { - if (PyString_Check(object)) { - char *name = PyString_AsString(object); + if (PyUnicode_Check(object)) { + const char *name = PyUnicode_AsUTF8(object); if (name == nullptr) return Handle::error(); return store->Symbol(name); - } else if (PyUnicode_Check(object)) { - PyObject *str = PyUnicode_AsUTF8String(object); - if (str == nullptr) return Handle::error(); - char *name = PyString_AsString(str); - Handle h = name == nullptr ? Handle::error() : store->Symbol(name); - Py_DECREF(str); - return h; } else { return Value(object); } @@ -579,7 +546,7 @@ bool PyStore::SlotList(PyObject *object, std::vector *slots) { // Get slot value. Handle value; - if (name.IsId() && (PyString_Check(v) || PyUnicode_Check(v))) { + if (name.IsId() && PyUnicode_Check(v)) { value = SymbolValue(v); } else { value = Value(v); @@ -609,7 +576,7 @@ bool PyStore::SlotList(PyObject *object, std::vector *slots) { // Get slot value. PyObject *v = PyTuple_GetItem(item, 1); Handle value; - if (name.IsId() && (PyString_Check(v) || PyUnicode_Check(v))) { + if (name.IsId() && PyUnicode_Check(v)) { value = SymbolValue(v); } else { value = Value(v); @@ -690,17 +657,17 @@ void SerializationFlags::InitPrinter(Printer *printer) { printer->set_utf8(utf8); } -PyObject *SerializationFlags::ParseArgs(PyObject *args, PyObject *kw) { +char *SerializationFlags::ParseArgs(PyObject *args, PyObject *kw) { static const char *kwlist[] = { - "file", "binary", "global", "shallow", "byref", "pretty", + "filename", "binary", "global", "shallow", "byref", "pretty", "utf8", "json", nullptr }; - PyObject *file = nullptr; + char *filename = nullptr; bool ok = PyArg_ParseTupleAndKeywords( - args, kw, "O|bbbbbbb", const_cast(kwlist), - &file, &binary, &global, &shallow, &byref, &pretty, &utf8, &json); + args, kw, "s|bbbbbbb", const_cast(kwlist), + &filename, &binary, &global, &shallow, &byref, &pretty, &utf8, &json); if (!ok) return nullptr; - return file; + return filename; } bool SerializationFlags::ParseFlags(PyObject *args, PyObject *kw) { diff --git a/sling/pyapi/pystore.h b/sling/pyapi/pystore.h index a4d3e2c3..f6599fba 100644 --- a/sling/pyapi/pystore.h +++ b/sling/pyapi/pystore.h @@ -41,8 +41,8 @@ struct PyStore : public PyBase { // Parse string as binary or ascii encoded frames. PyObject *Parse(PyObject *args, PyObject *kw); - // Resolves the given handle. - PyObject *Resolve(PyObject *handle); + // Resolves the given object. + PyObject *Resolve(PyObject *object); // Return the number of objects in the symbol table. Py_ssize_t Size(); @@ -70,7 +70,7 @@ struct PyStore : public PyBase { PyObject *UnlockGC(); // Create new Python object for handle value. - PyObject *PyValue(Handle handle); + PyObject *PyValue(Handle handle, bool binary=false); // Check if store can be modified. bool Writable(); @@ -149,8 +149,8 @@ struct SerializationFlags { // Set flags for printer. void InitPrinter(Printer *printer); - // Parse arguments for methods taking one argument. - PyObject *ParseArgs(PyObject *args, PyObject *kw); + // Parse arguments for methods taking a filename argument. + char *ParseArgs(PyObject *args, PyObject *kw); // Parse arguments for methods taking no fixed arguments. bool ParseFlags(PyObject *args, PyObject *kw); diff --git a/sling/pyapi/pytask.cc b/sling/pyapi/pytask.cc index c1c346d4..2bb3c4c9 100644 --- a/sling/pyapi/pytask.cc +++ b/sling/pyapi/pytask.cc @@ -102,8 +102,8 @@ int PyJob::Init(PyObject *args, PyObject *kwds) { PyObject *k; PyObject *v; while (PyDict_Next(params, &pos, &k, &v)) { - const char *key = PyString_AsString(k); - const char *value = PyString_AsString(v); + const char *key = PyUnicode_AsUTF8(k); + const char *value = PyUnicode_AsUTF8(v); task->AddParameter(key, value); } Py_DECREF(params); @@ -226,7 +226,7 @@ PyObject *PyJob::Counters() { // Gather current counter values. job->IterateCounters([counters](const string &name, Counter *counter) { - PyObject *key = PyString_FromStringAndSize(name.data(), name.size()); + PyObject *key = PyUnicode_FromStringAndSize(name.data(), name.size()); PyObject *val = PyLong_FromLong(counter->value()); PyDict_SetItem(counters, key, val); Py_DECREF(key); @@ -266,7 +266,7 @@ Shard PyJob::PyGetShard(PyObject *obj) { const char *PyJob::PyStrAttr(PyObject *obj, const char *name) { PyObject *attr = PyAttr(obj, name); - const char *str = attr == Py_None ? "" : PyString_AsString(attr); + const char *str = attr == Py_None ? "" : PyUnicode_AsUTF8(attr); CHECK(str != nullptr) << name; Py_DECREF(attr); return str; @@ -414,12 +414,12 @@ PyObject *PyTask::GetParameter(PyObject *args) { if (pydefval == nullptr) { return AllocateString(task->Get(name, "")); - } else if (PyString_Check(pydefval)) { - const char *defval = PyString_AsString(pydefval); + } else if (PyUnicode_Check(pydefval)) { + const char *defval = PyUnicode_AsUTF8(pydefval); return AllocateString(task->Get(name, defval)); - } else if (PyInt_Check(pydefval)) { - int64 defval = PyInt_AsLong(pydefval); - return PyInt_FromLong(task->Get(name, defval)); + } else if (PyLong_Check(pydefval)) { + int64 defval = PyLong_AsLong(pydefval); + return PyLong_FromLong(task->Get(name, defval)); } else if (PyFloat_Check(pydefval)) { double defval = PyFloat_AsDouble(pydefval); return PyFloat_FromDouble(task->Get(name, defval)); @@ -449,11 +449,8 @@ void PyProcessor::Run(Task *task) { PyGILState_STATE gstate = PyGILState_Ensure(); // Create Python task object. - PyObject *args = PyTuple_New(0); - CHECK(args != nullptr); - PyObject *pyproc = PyInstance_New(pycls_, args, nullptr); + PyObject *pyproc = PyObject_CallObject(pycls_, nullptr); CHECK(pyproc != nullptr); - Py_DECREF(args); // Create task wrapper. PyTask *pytask = PyObject_New(PyTask, &PyTask::type); @@ -479,9 +476,9 @@ PyObject *PyRegisterTask(PyObject *self, PyObject *args) { PyObject *cls; if (!PyArg_ParseTuple(args, "sO", &name, &cls)) return nullptr; - // Check for class object. - if (!PyClass_Check(cls)) { - PyErr_SetString(PyExc_ValueError, "Class object expected"); + // Check for type object. + if (!PyType_Check(cls)) { + PyErr_SetString(PyExc_ValueError, "Type object expected"); return nullptr; } @@ -532,7 +529,7 @@ PyObject *PyStartTaskMonitor(PyObject *self, PyObject *args) { PyObject *PyGetJobStatistics() { if (dashboard == nullptr) Py_RETURN_NONE; string stats = dashboard->GetStatus(); - return PyString_FromStringAndSize(stats.data(), stats.size()); + return PyUnicode_FromStringAndSize(stats.data(), stats.size()); } PyObject *PyFinalizeDashboard() { diff --git a/sling/pyapi/pywiki.cc b/sling/pyapi/pywiki.cc index d399fb8a..5ac77c5b 100644 --- a/sling/pyapi/pywiki.cc +++ b/sling/pyapi/pywiki.cc @@ -268,11 +268,11 @@ int PyTaxonomy::Init(PyFactExtractor *extractor, PyObject *typelist) { std::vector types; for (int i = 0; i < size; ++i) { PyObject *item = PyList_GetItem(typelist, i); - if (!PyString_Check(item)) { + if (!PyUnicode_Check(item)) { PyErr_BadArgument(); return -1; } - const char *name = PyString_AsString(item); + const char *name = PyUnicode_AsUTF8(item); if (name == nullptr) { PyErr_BadArgument(); return -1; diff --git a/tools/build-wheel.py b/tools/build-wheel.py index b4ba2126..bbebc03e 100644 --- a/tools/build-wheel.py +++ b/tools/build-wheel.py @@ -17,15 +17,16 @@ # The Python wheel produced by this script can be installed with the following # command: # -# sudo pip install /tmp/sling-2.0.0-cp27-none-linux_x86_64.whl +# sudo pip install /tmp/sling-2.0.0-cp35-none-linux_x86_64.whl # # If you are developing the SLING system, it is convenient to just add a # link to the SLING repository directly from the Python package directory # instead: # -# sudo ln -s $(realpath python) /usr/local/lib/python2.7/dist-packages/sling +# sudo ln -s $(realpath python) /usr/lib/python3/dist-packages/sling import os +import sys import hashlib import base64 import zipfile @@ -45,9 +46,14 @@ def sha256_content_checksum(data): sha256.update(data) return base64.urlsafe_b64encode(sha256.digest()).rstrip(b'=') +# Python version. +pymajor = str(sys.version_info.major) +pyminor = str(sys.version_info.minor) +pyversion = pymajor + "." + pyminor + # Wheel package information. platform = distutils.util.get_platform().replace("-", "_") -tag = "cp27-none-" + platform +tag = "cp" + pymajor + pyminor + "-none-" + platform package = "sling" version = "2.0.0" dist_dir = package + "-" + version + ".dist-info" @@ -94,10 +100,10 @@ def sha256_content_checksum(data): wheel_metadata_filename = dist_dir + "/WHEEL" wheel_metadata = """Wheel-Version: 1.0 Root-Is-Purelib: false -Tag: $TAG$""".replace("$TAG$", tag) +Tag: $TAG$""".replace("$TAG$", tag).encode() record += wheel_metadata_filename + ",sha256=" + \ - sha256_content_checksum(wheel_metadata) + "," + \ + sha256_content_checksum(wheel_metadata).decode() + "," + \ str(len(wheel_metadata)) + "\n" wheel.writestr(wheel_metadata_filename, wheel_metadata) @@ -114,14 +120,18 @@ def sha256_content_checksum(data): Download-URL: https://github.com/google/sling/releases Platform: UNKNOWN Classifier: Programming Language :: Python -Classifier: Programming Language :: Python :: 2 -Classifier: Programming Language :: Python :: 2.7 +Classifier: Programming Language :: Python :: $PYMAJOR$ +Classifier: Programming Language :: Python :: $PYVERSION$ Google SLING frame semantic parsing framework -""".replace("$VERSION$", version) +""" +package_metadata = package_metadata.replace("$VERSION$", version) +package_metadata = package_metadata.replace("$PYMAJOR$", pymajor) +package_metadata = package_metadata.replace("$PYVERSION$", pyversion) +package_metadata = package_metadata.encode() record += package_metadata_filename + ",sha256=" + \ - sha256_content_checksum(package_metadata) + "," + \ + sha256_content_checksum(package_metadata).decode() + "," + \ str(len(package_metadata)) + "\n" wheel.writestr(package_metadata_filename, package_metadata) @@ -131,21 +141,21 @@ def sha256_content_checksum(data): destination = files[source] destination = destination.replace("$INFO$", dist_dir) destination = destination.replace("$DATA$", data_dir) - print "Install", source, "as", destination + print("Install", source, "as", destination) # Write entry to RECORD file. size = os.path.getsize(source) - checksum = sha256_checksum(source) + checksum = sha256_checksum(source.encode()).decode() record += destination + ",sha256=" + checksum + "," + str(size) + "\n" # Add file to wheel zip archive. wheel.write(source, destination) # Add RECORD file to wheel. -print "Add", record_filename +print("Add", record_filename) wheel.writestr(record_filename, record) # Done. wheel.close() -print "Wheel written to", wheel_filename +print("Wheel written to", wheel_filename) diff --git a/tools/docv1to2.py b/tools/docv1to2.py index 39388ba9..f4167afb 100644 --- a/tools/docv1to2.py +++ b/tools/docv1to2.py @@ -20,7 +20,7 @@ # Check arguments. if len(sys.argv) != 3: - print "usage:", sys.argv[0], "", "" + print("usage:", sys.argv[0], "", "") sys.exit(1) # Intialize commons store. @@ -58,5 +58,5 @@ fin.close() fout.close() -print num_docs, "documents converted" +print(num_docs, "documents converted") diff --git a/tools/optohdr.py b/tools/optohdr.py index eed44a30..8b744ecd 100644 --- a/tools/optohdr.py +++ b/tools/optohdr.py @@ -18,7 +18,7 @@ This tools converts the instruction op code table to a header file with methods for encoding each instruction. -python tools/optohdr.py > third_party/jit/avx512.inc +python3 tools/optohdr.py > third_party/jit/avx512.inc """ @@ -64,7 +64,7 @@ def find_method(name, args): return None # Parse all the op definitions. -print "// Auto-generated from Intel instruction tables." +print("// Auto-generated from Intel instruction tables.") fin = open("third_party/jit/avx512ops.txt", "r") for line in fin.readlines(): line = line.strip() @@ -96,7 +96,7 @@ def find_method(name, args): elif flag == "W0" or flag == "W1" or flag == "WIG": flags.append("EVEX_" + flag) else: - print "flag:", flag + print("flag:", flag) # Parse op code. opcode = tokens[1] @@ -126,7 +126,7 @@ def find_method(name, args): # vsib encoding not supported. if vsib: - if warnings: print "// vsib encoding not supported for " + mnemonic + if warnings: print("// vsib encoding not supported for " + mnemonic) continue # Parse instruction arguments. @@ -283,9 +283,9 @@ def find_method(name, args): methods.append(method) else: if warnings and opcode != method.opcode: - print "// Hmm! opcode mismatch", method.name, method.opcode + print("// Hmm! opcode mismatch", method.name, method.opcode) if warnings and numargs != method.numargs: - print "// Hmm! numargs mismatch" + print("// Hmm! numargs mismatch") method.add_flags(flags) if dt != 0: method.add_flag("EVEX_DT" + str(dt / 8)) @@ -304,7 +304,7 @@ def find_method(name, args): for i in range(len(method.args)): arg = method.args[i] if arg == "zmm/mem" or arg == "reg/mem": - if warnings and reg_mem_arg != -1: print "// Oops!! multi reg/mem" + if warnings and reg_mem_arg != -1: print("// Oops!! multi reg/mem") reg_mem_arg = i if reg_mem_arg != -1: @@ -374,6 +374,6 @@ def find_method(name, args): sig = "void " + method.name + "(" + ", ".join(argsigs) + ")" if sig not in signatures: - print sig + " {\n " + body + "\n}" + print(sig + " {\n " + body + "\n}") signatures.append(sig)