From e3d13a21b6852d8569174c4ad1d6f2055ed4e352 Mon Sep 17 00:00:00 2001 From: sunhailinLeo <379978424@qq.com> Date: Wed, 12 Jul 2023 11:41:38 +0800 Subject: [PATCH] update go.mod; fix Chinese tokenizer error --- README.md | 6 +- go.mod | 4 +- go.sum | 146 ++---------------------------------------- models/bert/common.go | 6 +- models/bert/model.go | 3 + test/bert_test.go | 2 +- utils/const.go | 2 + utils/funcs.go | 12 ++++ utils/times.go | 2 + 9 files changed, 33 insertions(+), 150 deletions(-) diff --git a/README.md b/README.md index 8927a0e..f008a68 100644 --- a/README.md +++ b/README.md @@ -130,7 +130,7 @@ func main() { if initErr != nil { panic(initErr) } - bertService = bertService.SetChineseTokenize().SetMaxSeqLength(maxSeqLen) + bertService = bertService.SetChineseTokenize(false).SetMaxSeqLength(maxSeqLen) // infer inferResultV1, inferErr := bertService.ModelInfer([]string{""}, "", "", 1*time.Second) if inferErr != nil { @@ -144,6 +144,10 @@ func main() { ### Version +* version 1.4.5 - 2023/07/12 + * update go.mod + * fix Chinese tokenizer error + * version 1.4.4 - 2023/07/11 * tokenize Chinese-English-Number text with char mode for NER task. diff --git a/go.mod b/go.mod index 4b934b6..e577040 100644 --- a/go.mod +++ b/go.mod @@ -15,7 +15,7 @@ require ( github.com/golang/protobuf v1.5.3 // indirect github.com/klauspost/compress v1.16.3 // indirect github.com/valyala/bytebufferpool v1.0.0 // indirect - golang.org/x/net v0.9.0 // indirect - golang.org/x/sys v0.7.0 // indirect + golang.org/x/net v0.12.0 // indirect + golang.org/x/sys v0.10.0 // indirect google.golang.org/genproto v0.0.0-20230410155749-daa745c078e1 // indirect ) diff --git a/go.sum b/go.sum index 82201b5..36853d0 100644 --- a/go.sum +++ b/go.sum @@ -1,163 +1,25 @@ -cloud.google.com/go v0.110.0 h1:Zc8gqp3+a9/Eyph2KDmcGaPtbKRIoqq4YTlL4NMD0Ys= -cloud.google.com/go/accessapproval v1.6.0 h1:x0cEHro/JFPd7eS4BlEWNTMecIj2HdXjOVB5BtvwER0= -cloud.google.com/go/accesscontextmanager v1.7.0 h1:MG60JgnEoawHJrbWw0jGdv6HLNSf6gQvYRiXpuzqgEA= -cloud.google.com/go/aiplatform v1.37.0 h1:zTw+suCVchgZyO+k847wjzdVjWmrAuehxdvcZvJwfGg= -cloud.google.com/go/analytics v0.19.0 h1:LqAo3tAh2FU9+w/r7vc3hBjU23Kv7GhO/PDIW7kIYgM= -cloud.google.com/go/apigateway v1.5.0 h1:ZI9mVO7x3E9RK/BURm2p1aw9YTBSCQe3klmyP1WxWEg= -cloud.google.com/go/apigeeconnect v1.5.0 h1:sWOmgDyAsi1AZ48XRHcATC0tsi9SkPT7DA/+VCfkaeA= -cloud.google.com/go/apigeeregistry v0.6.0 h1:E43RdhhCxdlV+I161gUY2rI4eOaMzHTA5kNkvRsFXvc= -cloud.google.com/go/apikeys v0.6.0 h1:B9CdHFZTFjVti89tmyXXrO+7vSNo2jvZuHG8zD5trdQ= -cloud.google.com/go/appengine v1.7.1 h1:aBGDKmRIaRRoWJ2tAoN0oVSHoWLhtO9aj/NvUyP4aYs= -cloud.google.com/go/area120 v0.7.1 h1:ugckkFh4XkHJMPhTIx0CyvdoBxmOpMe8rNs4Ok8GAag= -cloud.google.com/go/artifactregistry v1.13.0 h1:o1Q80vqEB6Qp8WLEH3b8FBLNUCrGQ4k5RFj0sn/sgO8= -cloud.google.com/go/asset v1.13.0 h1:YAsssO08BqZ6mncbb6FPlj9h6ACS7bJQUOlzciSfbNk= -cloud.google.com/go/assuredworkloads v1.10.0 h1:VLGnVFta+N4WM+ASHbhc14ZOItOabDLH1MSoDv+Xuag= -cloud.google.com/go/automl v1.12.0 h1:50VugllC+U4IGl3tDNcZaWvApHBTrn/TvyHDJ0wM+Uw= -cloud.google.com/go/baremetalsolution v0.5.0 h1:2AipdYXL0VxMboelTTw8c1UJ7gYu35LZYUbuRv9Q28s= -cloud.google.com/go/batch v0.7.0 h1:YbMt0E6BtqeD5FvSv1d56jbVsWEzlGm55lYte+M6Mzs= -cloud.google.com/go/beyondcorp v0.5.0 h1:UkY2BTZkEUAVrgqnSdOJ4p3y9ZRBPEe1LkjgC8Bj/Pc= -cloud.google.com/go/bigquery v1.50.0 h1:RscMV6LbnAmhAzD893Lv9nXXy2WCaJmbxYPWDLbGqNQ= -cloud.google.com/go/billing v1.13.0 h1:JYj28UYF5w6VBAh0gQYlgHJ/OD1oA+JgW29YZQU+UHM= -cloud.google.com/go/binaryauthorization v1.5.0 h1:d3pMDBCCNivxt5a4eaV7FwL7cSH0H7RrEnFrTb1QKWs= -cloud.google.com/go/certificatemanager v1.6.0 h1:5C5UWeSt8Jkgp7OWn2rCkLmYurar/vIWIoSQ2+LaTOc= -cloud.google.com/go/channel v1.12.0 h1:GpcQY5UJKeOekYgsX3QXbzzAc/kRGtBq43fTmyKe6Uw= -cloud.google.com/go/cloudbuild v1.9.0 h1:GHQCjV4WlPPVU/j3Rlpc8vNIDwThhd1U9qSY/NPZdko= -cloud.google.com/go/clouddms v1.5.0 h1:E7v4TpDGUyEm1C/4KIrpVSOCTm0P6vWdHT0I4mostRA= -cloud.google.com/go/cloudtasks v1.10.0 h1:uK5k6abf4yligFgYFnG0ni8msai/dSv6mDmiBulU0hU= -cloud.google.com/go/compute v1.19.1 h1:am86mquDUgjGNWxiGn+5PGLbmgiWXlE/yNWpIpNvuXY= -cloud.google.com/go/compute/metadata v0.2.3 h1:mg4jlk7mCAj6xXp9UJ4fjI9VUI5rubuGBW5aJ7UnBMY= -cloud.google.com/go/contactcenterinsights v1.6.0 h1:jXIpfcH/VYSE1SYcPzO0n1VVb+sAamiLOgCw45JbOQk= -cloud.google.com/go/container v1.15.0 h1:NKlY/wCDapfVZlbVVaeuu2UZZED5Dy1z4Zx1KhEzm8c= -cloud.google.com/go/containeranalysis v0.9.0 h1:EQ4FFxNaEAg8PqQCO7bVQfWz9NVwZCUKaM1b3ycfx3U= -cloud.google.com/go/datacatalog v1.13.0 h1:4H5IJiyUE0X6ShQBqgFFZvGGcrwGVndTwUSLP4c52gw= -cloud.google.com/go/dataflow v0.8.0 h1:eYyD9o/8Nm6EttsKZaEGD84xC17bNgSKCu0ZxwqUbpg= -cloud.google.com/go/dataform v0.7.0 h1:Dyk+fufup1FR6cbHjFpMuP4SfPiF3LI3JtoIIALoq48= -cloud.google.com/go/datafusion v1.6.0 h1:sZjRnS3TWkGsu1LjYPFD/fHeMLZNXDK6PDHi2s2s/bk= -cloud.google.com/go/datalabeling v0.7.0 h1:ch4qA2yvddGRUrlfwrNJCr79qLqhS9QBwofPHfFlDIk= -cloud.google.com/go/dataplex v1.6.0 h1:RvoZ5T7gySwm1CHzAw7yY1QwwqaGswunmqEssPxU/AM= -cloud.google.com/go/dataproc v1.12.0 h1:W47qHL3W4BPkAIbk4SWmIERwsWBaNnWm0P2sdx3YgGU= -cloud.google.com/go/dataqna v0.7.0 h1:yFzi/YU4YAdjyo7pXkBE2FeHbgz5OQQBVDdbErEHmVQ= -cloud.google.com/go/datastore v1.11.0 h1:iF6I/HaLs3Ado8uRKMvZRvF/ZLkWaWE9i8AiHzbC774= -cloud.google.com/go/datastream v1.7.0 h1:BBCBTnWMDwwEzQQmipUXxATa7Cm7CA/gKjKcR2w35T0= -cloud.google.com/go/deploy v1.8.0 h1:otshdKEbmsi1ELYeCKNYppwV0UH5xD05drSdBm7ouTk= -cloud.google.com/go/dialogflow v1.32.0 h1:uVlKKzp6G/VtSW0E7IH1Y5o0H48/UOCmqksG2riYCwQ= -cloud.google.com/go/dlp v1.9.0 h1:1JoJqezlgu6NWCroBxr4rOZnwNFILXr4cB9dMaSKO4A= -cloud.google.com/go/documentai v1.18.0 h1:KM3Xh0QQyyEdC8Gs2vhZfU+rt6OCPF0dwVwxKgLmWfI= -cloud.google.com/go/domains v0.8.0 h1:2ti/o9tlWL4N+wIuWUNH+LbfgpwxPr8J1sv9RHA4bYQ= -cloud.google.com/go/edgecontainer v1.0.0 h1:O0YVE5v+O0Q/ODXYsQHmHb+sYM8KNjGZw2pjX2Ws41c= -cloud.google.com/go/errorreporting v0.3.0 h1:kj1XEWMu8P0qlLhm3FwcaFsUvXChV/OraZwA70trRR0= -cloud.google.com/go/essentialcontacts v1.5.0 h1:gIzEhCoOT7bi+6QZqZIzX1Erj4SswMPIteNvYVlu+pM= -cloud.google.com/go/eventarc v1.11.0 h1:fsJmNeqvqtk74FsaVDU6cH79lyZNCYP8Rrv7EhaB/PU= -cloud.google.com/go/filestore v1.6.0 h1:ckTEXN5towyTMu4q0uQ1Mde/JwTHur0gXs8oaIZnKfw= -cloud.google.com/go/firestore v1.9.0 h1:IBlRyxgGySXu5VuW0RgGFlTtLukSnNkpDiEOMkQkmpA= -cloud.google.com/go/functions v1.13.0 h1:pPDqtsXG2g9HeOQLoquLbmvmb82Y4Ezdo1GXuotFoWg= -cloud.google.com/go/gaming v1.9.0 h1:7vEhFnZmd931Mo7sZ6pJy7uQPDxF7m7v8xtBheG08tc= -cloud.google.com/go/gkebackup v0.4.0 h1:za3QZvw6ujR0uyqkhomKKKNoXDyqYGPJies3voUK8DA= -cloud.google.com/go/gkeconnect v0.7.0 h1:gXYKciHS/Lgq0GJ5Kc9SzPA35NGc3yqu6SkjonpEr2Q= -cloud.google.com/go/gkehub v0.12.0 h1:TqCSPsEBQ6oZSJgEYZ3XT8x2gUadbvfwI32YB0kuHCs= -cloud.google.com/go/gkemulticloud v0.5.0 h1:8I84Q4vl02rJRsFiinBxl7WCozfdLlUVBQuSrqr9Wtk= -cloud.google.com/go/gsuiteaddons v1.5.0 h1:1mvhXqJzV0Vg5Fa95QwckljODJJfDFXV4pn+iL50zzA= -cloud.google.com/go/iam v0.13.0 h1:+CmB+K0J/33d0zSQ9SlFWUeCCEn5XJA0ZMZ3pHE9u8k= -cloud.google.com/go/iap v1.7.1 h1:PxVHFuMxmSZyfntKXHXhd8bo82WJ+LcATenq7HLdVnU= -cloud.google.com/go/ids v1.3.0 h1:fodnCDtOXuMmS8LTC2y3h8t24U8F3eKWfhi+3LY6Qf0= -cloud.google.com/go/iot v1.6.0 h1:39W5BFSarRNZfVG0eXI5LYux+OVQT8GkgpHCnrZL2vM= -cloud.google.com/go/kms v1.10.1 h1:7hm1bRqGCA1GBRQUrp831TwJ9TWhP+tvLuP497CQS2g= -cloud.google.com/go/language v1.9.0 h1:7Ulo2mDk9huBoBi8zCE3ONOoBrL6UXfAI71CLQ9GEIM= -cloud.google.com/go/lifesciences v0.8.0 h1:uWrMjWTsGjLZpCTWEAzYvyXj+7fhiZST45u9AgasasI= -cloud.google.com/go/logging v1.7.0 h1:CJYxlNNNNAMkHp9em/YEXcfJg+rPDg7YfwoRpMU+t5I= -cloud.google.com/go/longrunning v0.4.1 h1:v+yFJOfKC3yZdY6ZUI933pIYdhyhV8S3NpWrXWmg7jM= -cloud.google.com/go/managedidentities v1.5.0 h1:ZRQ4k21/jAhrHBVKl/AY7SjgzeJwG1iZa+mJ82P+VNg= -cloud.google.com/go/maps v0.7.0 h1:mv9YaczD4oZBZkM5XJl6fXQ984IkJNHPwkc8MUsdkBo= -cloud.google.com/go/mediatranslation v0.7.0 h1:anPxH+/WWt8Yc3EdoEJhPMBRF7EhIdz426A+tuoA0OU= -cloud.google.com/go/memcache v1.9.0 h1:8/VEmWCpnETCrBwS3z4MhT+tIdKgR1Z4Tr2tvYH32rg= -cloud.google.com/go/metastore v1.10.0 h1:QCFhZVe2289KDBQ7WxaHV2rAmPrmRAdLC6gbjUd3HPo= -cloud.google.com/go/monitoring v1.13.0 h1:2qsrgXGVoRXpP7otZ14eE1I568zAa92sJSDPyOJvwjM= -cloud.google.com/go/networkconnectivity v1.11.0 h1:ZD6b4Pk1jEtp/cx9nx0ZYcL3BKqDa+KixNDZ6Bjs1B8= -cloud.google.com/go/networkmanagement v1.6.0 h1:8KWEUNGcpSX9WwZXq7FtciuNGPdPdPN/ruDm769yAEM= -cloud.google.com/go/networksecurity v0.8.0 h1:sOc42Ig1K2LiKlzG71GUVloeSJ0J3mffEBYmvu+P0eo= -cloud.google.com/go/notebooks v1.8.0 h1:Kg2K3K7CbSXYJHZ1aGQpf1xi5x2GUvQWf2sFVuiZh8M= -cloud.google.com/go/optimization v1.3.1 h1:dj8O4VOJRB4CUwZXdmwNViH1OtI0WtWL867/lnYH248= -cloud.google.com/go/orchestration v1.6.0 h1:Vw+CEXo8M/FZ1rb4EjcLv0gJqqw89b7+g+C/EmniTb8= -cloud.google.com/go/orgpolicy v1.10.0 h1:XDriMWug7sd0kYT1QKofRpRHzjad0bK8Q8uA9q+XrU4= -cloud.google.com/go/osconfig v1.11.0 h1:PkSQx4OHit5xz2bNyr11KGcaFccL5oqglFPdTboyqwQ= -cloud.google.com/go/oslogin v1.9.0 h1:whP7vhpmc+ufZa90eVpkfbgzJRK/Xomjz+XCD4aGwWw= -cloud.google.com/go/phishingprotection v0.7.0 h1:l6tDkT7qAEV49MNEJkEJTB6vOO/onbSOcNtAT09HPuA= -cloud.google.com/go/policytroubleshooter v1.6.0 h1:yKAGC4p9O61ttZUswaq9GAn1SZnEzTd0vUYXD7ZBT7Y= -cloud.google.com/go/privatecatalog v0.8.0 h1:EPEJ1DpEGXLDnmc7mnCAqFmkwUJbIsaLAiLHVOkkwtc= -cloud.google.com/go/pubsub v1.30.0 h1:vCge8m7aUKBJYOgrZp7EsNDf6QMd2CAlXZqWTn3yq6s= -cloud.google.com/go/pubsublite v1.7.0 h1:cb9fsrtpINtETHiJ3ECeaVzrfIVhcGjhhJEjybHXHao= -cloud.google.com/go/recaptchaenterprise/v2 v2.7.0 h1:6iOCujSNJ0YS7oNymI64hXsjGq60T4FK1zdLugxbzvU= -cloud.google.com/go/recommendationengine v0.7.0 h1:VibRFCwWXrFebEWKHfZAt2kta6pS7Tlimsnms0fjv7k= -cloud.google.com/go/recommender v1.9.0 h1:ZnFRY5R6zOVk2IDS1Jbv5Bw+DExCI5rFumsTnMXiu/A= -cloud.google.com/go/redis v1.11.0 h1:JoAd3SkeDt3rLFAAxEvw6wV4t+8y4ZzfZcZmddqphQ8= -cloud.google.com/go/resourcemanager v1.7.0 h1:NRM0p+RJkaQF9Ee9JMnUV9BQ2QBIOq/v8M+Pbv/wmCs= -cloud.google.com/go/resourcesettings v1.5.0 h1:8Dua37kQt27CCWHm4h/Q1XqCF6ByD7Ouu49xg95qJzI= -cloud.google.com/go/retail v1.12.0 h1:1Dda2OpFNzIb4qWgFZjYlpP7sxX3aLeypKG6A3H4Yys= -cloud.google.com/go/run v0.9.0 h1:ydJQo+k+MShYnBfhaRHSZYeD/SQKZzZLAROyfpeD9zw= -cloud.google.com/go/scheduler v1.9.0 h1:NpQAHtx3sulByTLe2dMwWmah8PWgeoieFPpJpArwFV0= -cloud.google.com/go/secretmanager v1.10.0 h1:pu03bha7ukxF8otyPKTFdDz+rr9sE3YauS5PliDXK60= -cloud.google.com/go/security v1.13.0 h1:PYvDxopRQBfYAXKAuDpFCKBvDOWPWzp9k/H5nB3ud3o= -cloud.google.com/go/securitycenter v1.19.0 h1:AF3c2s3awNTMoBtMX3oCUoOMmGlYxGOeuXSYHNBkf14= -cloud.google.com/go/servicecontrol v1.11.1 h1:d0uV7Qegtfaa7Z2ClDzr9HJmnbJW7jn0WhZ7wOX6hLE= -cloud.google.com/go/servicedirectory v1.9.0 h1:SJwk0XX2e26o25ObYUORXx6torSFiYgsGkWSkZgkoSU= -cloud.google.com/go/servicemanagement v1.8.0 h1:fopAQI/IAzlxnVeiKn/8WiV6zKndjFkvi+gzu+NjywY= -cloud.google.com/go/serviceusage v1.6.0 h1:rXyq+0+RSIm3HFypctp7WoXxIA563rn206CfMWdqXX4= -cloud.google.com/go/shell v1.6.0 h1:wT0Uw7ib7+AgZST9eCDygwTJn4+bHMDtZo5fh7kGWDU= -cloud.google.com/go/spanner v1.45.0 h1:7VdjZ8zj4sHbDw55atp5dfY6kn1j9sam9DRNpPQhqR4= -cloud.google.com/go/speech v1.15.0 h1:JEVoWGNnTF128kNty7T4aG4eqv2z86yiMJPT9Zjp+iw= -cloud.google.com/go/storagetransfer v1.8.0 h1:5T+PM+3ECU3EY2y9Brv0Sf3oka8pKmsCfpQ07+91G9o= -cloud.google.com/go/talent v1.5.0 h1:nI9sVZPjMKiO2q3Uu0KhTDVov3Xrlpt63fghP9XjyEM= -cloud.google.com/go/texttospeech v1.6.0 h1:H4g1ULStsbVtalbZGktyzXzw6jP26RjVGYx9RaYjBzc= -cloud.google.com/go/tpu v1.5.0 h1:/34T6CbSi+kTv5E19Q9zbU/ix8IviInZpzwz3rsFE+A= -cloud.google.com/go/trace v1.9.0 h1:olxC0QHC59zgJVALtgqfD9tGk0lfeCP5/AGXL3Px/no= -cloud.google.com/go/translate v1.7.0 h1:GvLP4oQ4uPdChBmBaUSa/SaZxCdyWELtlAaKzpHsXdA= -cloud.google.com/go/video v1.15.0 h1:upIbnGI0ZgACm58HPjAeBMleW3sl5cT84AbYQ8PWOgM= -cloud.google.com/go/videointelligence v1.10.0 h1:Uh5BdoET8XXqXX2uXIahGb+wTKbLkGH7s4GXR58RrG8= -cloud.google.com/go/vision/v2 v2.7.0 h1:8C8RXUJoflCI4yVdqhTy9tRyygSHmp60aP363z23HKg= -cloud.google.com/go/vmmigration v1.6.0 h1:Azs5WKtfOC8pxvkyrDvt7J0/4DYBch0cVbuFfCCFt5k= -cloud.google.com/go/vmwareengine v0.3.0 h1:b0NBu7S294l0gmtrT0nOJneMYgZapr5x9tVWvgDoVEM= -cloud.google.com/go/vpcaccess v1.6.0 h1:FOe6CuiQD3BhHJWt7E8QlbBcaIzVRddupwJlp7eqmn4= -cloud.google.com/go/webrisk v1.8.0 h1:IY+L2+UwxcVm2zayMAtBhZleecdIFLiC+QJMzgb0kT0= -cloud.google.com/go/websecurityscanner v1.5.0 h1:AHC1xmaNMOZtNqxI9Rmm87IJEyPaRkOxeI0gpAacXGk= -cloud.google.com/go/workflows v1.10.0 h1:FfGp9w0cYnaKZJhUOMqCOJCYT/WlvYBfTQhFWV3sRKI= github.com/andybalholm/brotli v1.0.5 h1:8uQZIdzKmjc/iuPu7O2ioW48L81FgatrcpfFmiq/cCs= github.com/andybalholm/brotli v1.0.5/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig= -github.com/census-instrumentation/opencensus-proto v0.4.1 h1:iKLQ0xPNFxR/2hzXZMrBo8f1j86j5WHzznCCQxV/b8g= -github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44= -github.com/cncf/udpa/go v0.0.0-20220112060539-c52dc94e7fbe h1:QQ3GSy+MqSHxm/d8nCtnAiZdYFd45cYZPs8vOOIYKfk= -github.com/cncf/xds/go v0.0.0-20230607035331-e9ce68804cb4 h1:/inchEIKaYC1Akx+H+gqO04wryn5h75LSazbRlnya1k= -github.com/envoyproxy/go-control-plane v0.11.1-0.20230524094728-9239064ad72f h1:7T++XKzy4xg7PKy+bM+Sa9/oe1OC88yz2hXQUISoXfA= -github.com/envoyproxy/protoc-gen-validate v0.10.1 h1:c0g45+xCJhdgFGw7a5QAfdS4byAbud7miNWJ1WwEVf8= github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU= github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I= -github.com/golang/glog v1.1.0 h1:/d3pCKDPWNnvIWe0vVUpNP32qc8U3PDVxySP/y360qE= github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg= github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38= -github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I= github.com/klauspost/compress v1.16.3 h1:XuJt9zzcnaz6a16/OU53ZjWp/v7/42WcR5t2a0PcNQY= github.com/klauspost/compress v1.16.3/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE= github.com/valyala/bytebufferpool v1.0.0 h1:GqA5TC/0021Y/b9FG4Oi9Mr3q7XYx6KllzawFIhcdPw= github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc= github.com/valyala/fasthttp v1.48.0 h1:oJWvHb9BIZToTQS3MuQ2R3bJZiNSa2KiNdeI8A+79Tc= github.com/valyala/fasthttp v1.48.0/go.mod h1:k2zXd82h/7UZc3VOdJ2WaUqt1uZ/XpXAfE9i+HBC3lA= -github.com/valyala/tcplisten v1.0.0 h1:rBHj/Xf+E1tRGZyWIWwJDiRY0zc1Js+CV5DqwacVSA8= -golang.org/x/crypto v0.7.0 h1:AvwMYaRytfdeVt3u6mLaxYtErKYjxA2OXjJ1HHq6t3A= -golang.org/x/mod v0.8.0 h1:LUYupSeNrTNCGzR/hVBk2NHZO4hXcVaW1k4Qx7rjPx8= -golang.org/x/net v0.9.0 h1:aWJ/m6xSmxWBx+V0XRHTlrYrPG56jKsLdTFmsSsCzOM= -golang.org/x/net v0.9.0/go.mod h1:d48xBJpPfHeWQsugry2m+kC02ZBRGRgulfHnEXEuWns= -golang.org/x/oauth2 v0.7.0 h1:qe6s0zUXlPX80/dITx3440hWZ7GwMwgDDyrSGTPJG/g= -golang.org/x/sys v0.7.0 h1:3jlCCIQZPdOYu1h8BkNvLz8Kgwtae2cagcG/VamtZRU= -golang.org/x/sys v0.7.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/term v0.7.0 h1:BEvjmm5fURWqcfbSKTdpkDXYBrUS1c0m8agp14W48vQ= +golang.org/x/net v0.12.0 h1:cfawfvKITfUsFCeJIHJrbSxpeu/E81khclypR0GVT50= +golang.org/x/net v0.12.0/go.mod h1:zEVYFnQC7m/vmpQFELhcD1EWkZlX69l4oqgmer6hfKA= +golang.org/x/sys v0.10.0 h1:SqMFp9UcQJZa+pmYuAKjd9xq1f0j5rLcDIk0mj4qAsA= +golang.org/x/sys v0.10.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/text v0.11.0 h1:LAntKIrcmeSKERyiOh0XMV39LXS8IE9UL2yP7+f5ij4= golang.org/x/text v0.11.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE= -golang.org/x/tools v0.6.0 h1:BOw41kyTf3PuCW1pVQf8+Cyg8pMlkYB1oo9iJ6D/lKM= -golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -google.golang.org/appengine v1.6.7 h1:FZR1q0exgwxzPzp/aF+VccGrSfxfPpkBqjIIEq3ru6c= google.golang.org/genproto v0.0.0-20230410155749-daa745c078e1 h1:KpwkzHKEF7B9Zxg18WzOa7djJ+Ha5DzthMyZYQfEn2A= google.golang.org/genproto v0.0.0-20230410155749-daa745c078e1/go.mod h1:nKE/iIaLqn2bQwXBg8f1g2Ylh6r5MN5CmZvuzZCgsCU= google.golang.org/grpc v1.56.2 h1:fVRFRnXvU+x6C4IlHZewvJOVHoOv1TUuQyoRsYnB4bI= diff --git a/models/bert/common.go b/models/bert/common.go index 119655f..8521dca 100644 --- a/models/bert/common.go +++ b/models/bert/common.go @@ -3,12 +3,10 @@ package bert import "github.com/sunhailin-Leo/triton-service-go/nvidia_inferenceserver" // GenerateModelInferRequest model input callback. -type GenerateModelInferRequest func( - batchSize, maxSeqLength int) []*nvidia_inferenceserver.ModelInferRequest_InferInputTensor +type GenerateModelInferRequest func(batchSize, maxSeqLength int) []*nvidia_inferenceserver.ModelInferRequest_InferInputTensor // GenerateModelInferOutputRequest model output callback. -type GenerateModelInferOutputRequest func( - params ...interface{}) []*nvidia_inferenceserver.ModelInferRequest_InferRequestedOutputTensor +type GenerateModelInferOutputRequest func(params ...interface{}) []*nvidia_inferenceserver.ModelInferRequest_InferRequestedOutputTensor // InputFeature Bert InputFeature. type InputFeature struct { diff --git a/models/bert/model.go b/models/bert/model.go index 56c328b..75de48c 100644 --- a/models/bert/model.go +++ b/models/bert/model.go @@ -141,6 +141,9 @@ func (m *ModelService) SetSecondaryServerURL(url string) *ModelService { // getTokenizerResult Get Tokenizer result from different tokenizers. func (m *ModelService) getTokenizerResult(inferData string) []string { if m.isChinese { + if m.isChineseCharMode { + return GetStrings(m.BertTokenizer.TokenizeChineseCharMode(strings.ToLower(inferData))) + } return GetStrings(m.BertTokenizer.TokenizeChinese(strings.ToLower(inferData))) } return GetStrings(m.BertTokenizer.Tokenize(inferData)) diff --git a/test/bert_test.go b/test/bert_test.go index a131548..0e7d6a8 100644 --- a/test/bert_test.go +++ b/test/bert_test.go @@ -92,7 +92,7 @@ func TestBertServiceForBertChinese(t *testing.T) { if initErr != nil { panic(initErr) } - bertService = bertService.SetChineseTokenize().SetMaxSeqLength(maxSeqLen) + bertService = bertService.SetChineseTokenize(false).SetMaxSeqLength(maxSeqLen) vocabSize := bertService.BertVocab.Size() if bertService.BertVocab.Size() != 21128 { t.Errorf("Expected '%d', but got '%d'", vocabSize, 21128) diff --git a/utils/const.go b/utils/const.go index 2a4b50f..3e71830 100644 --- a/utils/const.go +++ b/utils/const.go @@ -7,6 +7,8 @@ import ( const ( TritonBytesType string = "BYTES" + TritonINT32Type string = "INT32" + TritonINT64Type string = "INT64" TritonFP16Type string = "FP16" TritonFP32Type string = "FP32" SliceByteType string = "[]byte" diff --git a/utils/funcs.go b/utils/funcs.go index 4eff333..0305418 100644 --- a/utils/funcs.go +++ b/utils/funcs.go @@ -20,6 +20,7 @@ func IsWhitespace(c rune) bool { } // IsControl checks whether rune c is a BERT control character. +//go:inline func IsControl(c rune) bool { switch c { case '\t': @@ -34,6 +35,7 @@ func IsControl(c rune) bool { } // IsPunctuation checks whether rune c is a BERT punctuation character. +//go:inline func IsPunctuation(c rune) bool { // return unicode.In(c, utils.Bp, unicode.P) // return unicode.In(c, utils.AsciiPunctuation, unicode.P) && c != '-' @@ -41,22 +43,26 @@ func IsPunctuation(c rune) bool { } // IsChinese validates that rune c is in the CJK range according to BERT spec. +//go:inline func IsChinese(c rune) bool { // unicode.Is(unicode.Han, c) return unicode.In(c, BertChineseChar, unicode.P) } // IsChineseOrNumber validates that rune c is in the CJK range according to BERT spec or Number. +//go:inline func IsChineseOrNumber(c rune) bool { return unicode.In(c, BertChineseChar, unicode.P) || unicode.IsNumber(c) } // IsWhiteSpaceOrChinese validates that rune c is whitespace or is Chinese. +//go:inline func IsWhiteSpaceOrChinese(c rune) bool { return IsWhitespace(c) || IsChinese(c) } // IsWhiteSpaceOrChineseOrNumber validates that rune c is whitespace or is Chinese or is Number. +//go:inline func IsWhiteSpaceOrChineseOrNumber(c rune) bool { return IsWhitespace(c) || IsChineseOrNumber(c) } @@ -236,6 +242,12 @@ func BinaryFilter(arr []byte) []byte { // convert functions. var convertFuncMap = map[string]func([]uint8) interface{}{ + TritonINT32Type: func(b []uint8) interface{} { + return int32(binary.LittleEndian.Uint32(b)) + }, + TritonINT64Type: func(b []uint8) interface{} { + return int64(binary.LittleEndian.Uint64(b)) + }, SliceFloat32Type: func(b []uint8) interface{} { return math.Float32frombits(binary.LittleEndian.Uint32(b)) }, diff --git a/utils/times.go b/utils/times.go index 2ee94ff..4c33afb 100644 --- a/utils/times.go +++ b/utils/times.go @@ -3,11 +3,13 @@ package utils import "time" // GetNanoTimeFromSys get nano timestamp. +//go:inline func GetNanoTimeFromSys() int64 { return time.Now().UnixNano() } // CalTimeGapWithNS get nano timestamp gap. +//go:inline func CalTimeGapWithNS(begin int64) int64 { return GetNanoTimeFromSys() - begin }