diff --git a/sentencepiece/cppbuild.sh b/sentencepiece/cppbuild.sh index 9a7e4db8e57..dd572dc35f9 100755 --- a/sentencepiece/cppbuild.sh +++ b/sentencepiece/cppbuild.sh @@ -8,13 +8,15 @@ if [[ -z "$PLATFORM" ]]; then fi SENTENCEPIECE_VERSION=0.1.99 -download https://github.com/google/sentencepiece/archive/refs/tags/v$SENTENCEPIECE_VERSION.tar.gz sentencepiece-$SENTENCEPIECE_VERSION.tar.gz +download https://github.com/google/sentencepiece/archive/refs/tags/v$SENTENCEPIECE_VERSION.zip sentencepiece-$SENTENCEPIECE_VERSION.zip mkdir -p $PLATFORM cd $PLATFORM INSTALL_PATH=`pwd` echo "Decompressing archives..." -tar -xzvf ../sentencepiece-$SENTENCEPIECE_VERSION.tar.gz + +unzip -o ../sentencepiece-$SENTENCEPIECE_VERSION.zip + cd sentencepiece-$SENTENCEPIECE_VERSION CMAKE_CONFIG="-DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=$INSTALL_PATH -DCMAKE_INSTALL_LIBDIR=$INSTALL_PATH/lib" diff --git a/sentencepiece/samples/SentencepieceExample.java b/sentencepiece/samples/SentencepieceExample.java index 6d918ec00eb..0270c30fd59 100644 --- a/sentencepiece/samples/SentencepieceExample.java +++ b/sentencepiece/samples/SentencepieceExample.java @@ -2,6 +2,11 @@ import org.bytedeco.javacpp.*; import org.bytedeco.sentencepiece.*; +/** + * To try encoding you can download an existing model, i.e. + * wget https://nlp.h-its.org/bpemb/en/en.wiki.bpe.vs10000.model + * mvn compile exec:java -e -Dexec.mainClass=SentencepieceExample -D exec.args="en.wiki.bpe.vs10000.model" + */ public final class SentencepieceExample { public static void main(String[] args) {