Skip to content

Commit

Permalink
v0.7.3
Browse files Browse the repository at this point in the history
  • Loading branch information
cahya-wirawan committed Jun 17, 2024
1 parent f23a4db commit e8896e9
Show file tree
Hide file tree
Showing 6 changed files with 65,543 additions and 11 deletions.
6 changes: 3 additions & 3 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "pyrwkv_tokenizer"
version = "0.7.2"
version = "0.7.3"
edition = "2021"
authors = ["Cahya Wirawan <cahya.wirawan@gmail.com>"]
description = "A fast RWKV Tokenizer"
Expand All @@ -22,4 +22,4 @@ crate-type = ["cdylib"]

[dependencies]
pyo3 = "0.21.2"
rwkv-tokenizer = "0.7.1"
rwkv-tokenizer = "0.7.3"
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "maturin"

[project]
name = "pyrwkv_tokenizer"
version = "0.7.2"
version = "0.7.3"
requires-python = ">=3.8"
description = "RWKV Tokenizer"
readme = "README.md"
Expand Down
9 changes: 6 additions & 3 deletions pyrwkv_tokenizer/__init__.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,22 @@
# import the contents of the Rust library into the Python extension
from .pyrwkv_tokenizer import *
from .pyrwkv_tokenizer import __all__
from pathlib import Path

# optional: include the documentation from the Rust module
from .pyrwkv_tokenizer import __doc__ # noqa: F401

__all__ = __all__ + ["RWKVTokenizer"]
__version__ = "0.7.2"
__version__ = "0.7.3"


class RWKVTokenizer:
def __init__(self, name="WorldTokenizer") -> None:
def __init__(self, name="WorldTokenizer", vocab_filepath=None) -> None:
if name != "WorldTokenizer":
raise Exception(f"The {name} is not supported.")
self.tokenizer = WorldTokenizer()
self.vocab_filepath = str(Path(__path__[0]) / "rwkv_vocab_v20230424.txt") \
if vocab_filepath is None else vocab_filepath
self.tokenizer = WorldTokenizer(self.vocab_filepath)

def encode(self, text: str):
tokens_ids = self.tokenizer.encode(text)
Expand Down
Loading

0 comments on commit e8896e9

Please sign in to comment.