PaddlePaddle · JunnYu · Jan 12, 2024 · Jan 8, 2024 · Jan 10, 2024 · Jan 11, 2024
diff --git a/llm/docs/inference.md b/llm/docs/inference.md
@@ -61,9 +61,10 @@ PaddleNLP 中已经添加高性能推理模型相关实现，支持：
 | [ChatGLM2](../chatglm2)      | ✅               | ❌       | ❌     | ❌   |
 | [Bloom](../bloom)            | ✅               | ✅       | ✅     | ❌   |
 | [GPT-3](../gpt-3)            | ✅               | ❌       | ❌     | ❌   |
-| [Qwen](../qwen)              | ❌               | ❌       | ❌     | ❌   |
+| [Qwen](../qwen)              | ✅               | ❌       | ❌     | ❌   |
 | [BaiChuan-7B](../llama)     | ✅               | ✅       | ✅     | 🚧   |
-| [BaiChuan-13B](../llama) | ❌               | ❌       | ❌     | ❌   |
+| [BaiChuan2-7B](../llama)     | ✅               | ✅       | ✅     | 🚧   |
+| [BaiChuan2-13B](../llama) | 🚧               | 🚧       | 🚧     | 🚧   |
 
 * ✅: Supported
 * 🚧: In Progress

diff --git a/llm/gradio_ui.py b/llm/gradio_ui.py
@@ -185,7 +185,7 @@ def get_shown_context(context):
             with gr.Column(scale=1):
                 top_k = gr.Slider(
                     minimum=0,
-                    maximum=default_params.get("top_k", 20),
+                    maximum=100,
                     value=0,
                     step=1,
                     label="Top-k",
@@ -218,7 +218,7 @@ def get_shown_context(context):
                 default_src_length = default_params["src_length"]
                 total_length = default_params["src_length"] + default_params["max_length"]
                 src_length = create_src_slider(default_src_length, total_length)
-                max_length = create_max_slider(50, total_length)
+                max_length = create_max_slider(min(total_length - default_src_length, 50), total_length)
 
                 def src_length_change_event(src_length_value, max_length_value):
                     return create_max_slider(

diff --git a/llm/predictor.py b/llm/predictor.py
@@ -165,10 +165,18 @@ def get_eos_token_id(
     Returns:
         int | List[int]: eos_token_id to stop the generation
     """
-    if generation_config is None or generation_config.eos_token_id is None:
-        return tokenizer.eos_token_id
+    eos_token_ids = []
+    if tokenizer.eos_token_id is not None:
+        eos_token_ids.append(tokenizer.eos_token_id)
 
-    return generation_config.eos_token_id
+    if generation_config is not None and generation_config.eos_token_id is not None:
+        if isinstance(generation_config.eos_token_id, int):
+            eos_token_ids.append(generation_config.eos_token_id)
+        else:
+            eos_token_ids.extend(generation_config.eos_token_id)
+
+    eos_token_ids_dict = {str(item): item for item in eos_token_ids}
+    return list(eos_token_ids_dict.values())
 
 
 class BasePredictor:
@@ -959,8 +967,18 @@ def predict():
         with open(model_args.data_file, "r", encoding="utf-8") as f:
             for line in f:
                 example = json.loads(line)
-                source_texts.append(example["src"])
-                target_texts.append(example["tgt"])
+                if isinstance(example["src"], str) or predictor.tokenizer.chat_template is None:
+                    if isinstance(example["src"], str):
+                        source_texts.append(example["src"])
+                        target_texts.append(example["tgt"])
+                    else:
+                        # load multi-rounds dataset
+                        source_texts.append(example["src"][0])
+                        target_texts.append(example["tgt"][0])
+                else:
+                    source_texts.append(list(zip(example["src"], example["tgt"])))
+                    target_texts.append("")
+
     else:
         source_texts = ["解释一下“温故而知新”", "你好，请问你是谁?"]
         target_texts = ["", ""]

diff --git a/paddlenlp/transformers/chatglm_v2/tokenizer.py b/paddlenlp/transformers/chatglm_v2/tokenizer.py
@@ -14,6 +14,7 @@
 from __future__ import annotations
 
 import os
+import re
 from typing import Any, Dict, List, Optional, Union
 
 import numpy as np
@@ -36,16 +37,43 @@ def __init__(self, model_path: str):
         self.pad_id: int = self.sp_model.unk_id()
         assert self.sp_model.vocab_size() == self.sp_model.get_piece_size()
 
-        special_tokens = ["[MASK]", "[gMASK]", "[sMASK]", "sop", "eop"]
+        special_tokens = [
+            "[MASK]",
+            "[gMASK]",
+            "[sMASK]",
+            "sop",
+            "eop",
+            "<|system|>",
+            "<|user|>",
+            "<|assistant|>",
+            "<|observation|>",
+        ]
+
         self.special_tokens = {}
         self.index_special_tokens = {}
         for token in special_tokens:
             self.special_tokens[token] = self.n_words
             self.index_special_tokens[self.n_words] = token
             self.n_words += 1
 
-    def tokenize(self, s: str):
-        return self.sp_model.EncodeAsPieces(s)
+        # add eos/pad/unk token to special_token_expression
+        all_special_tokens = list(self.special_tokens.keys()) + ["</s>", "<unk>"]
+        self.special_token_expression = "|".join([re.escape(token) for token in all_special_tokens])
+
+    def tokenize(self, s: str, encode_special_tokens=False):
+        if encode_special_tokens:
+            last_index = 0
+            t = []
+            for match in re.finditer(self.special_token_expression, s):
+                if last_index < match.start():
+                    t.extend(self.sp_model.EncodeAsPieces(s[last_index : match.start()]))
+                t.append(s[match.start() : match.end()])
+                last_index = match.end()
+            if last_index < len(s):
+                t.extend(self.sp_model.EncodeAsPieces(s[last_index:]))
+            return t
+        else:
+            return self.sp_model.EncodeAsPieces(s)
 
     def encode(self, s: str, bos: bool = False, eos: bool = False) -> List[int]:
         assert type(s) is str
@@ -85,7 +113,8 @@ class ChatGLMv2Tokenizer(PretrainedTokenizer):
         }
     }
 
-    def __init__(self, vocab_file, padding_side="left", **kwargs):
+    # always encode special tokens, eg: </s>, [gMASK], [MASK] ...
+    def __init__(self, vocab_file, padding_side="left", encode_special_tokens=True, **kwargs):
         super().__init__(padding_side=padding_side, **kwargs)
         self.name = "ChatGLMv2Tokenizer"
 
@@ -94,8 +123,10 @@ def __init__(self, vocab_file, padding_side="left", **kwargs):
         self.special_tokens = {
             "<bos>": self.tokenizer.bos_id,
             "<eos>": self.tokenizer.eos_id,
+            "<unk>": self.tokenizer.pad_id,
             "<pad>": self.tokenizer.pad_id,
         }
+        self.encode_special_tokens = encode_special_tokens
 
     def get_command(self, token):
         if token in self.special_tokens:
@@ -130,7 +161,7 @@ def get_vocab(self):
         return vocab
 
     def _tokenize(self, text, **kwargs):
-        return self.tokenizer.tokenize(text)
+        return self.tokenizer.tokenize(text, encode_special_tokens=self.encode_special_tokens)
 
     def _convert_token_to_id(self, token):
         """Converts a token (str) in an id using the vocab."""

diff --git a/paddlenlp/transformers/tokenizer_utils.py b/paddlenlp/transformers/tokenizer_utils.py
@@ -585,6 +585,8 @@ def __call__(self, conversations: list[list[str]] | str, context_data: Dict[str,
         final_query = self.render_system(context_data=context_data)
         context_data["length"] = len(conversations)
         for index, conversation in enumerate(conversations[:-1]):
+            context_data["is_first"] = index == 0
+            context_data["is_last"] = False
             final_query += "".join(self.render_conversation(conversation, index=index, context_data=context_data))
 
         if not isinstance(conversations[-1], list) and not len(conversations[-1]) != 1:
@@ -668,6 +670,10 @@ def encode_chat_inputs(self, conversations: List[List[str, str]], context_data:
         # encode conversation
         conversation_ids = []
         for index, conversation in enumerate(conversations):
+            # give more control to chat_template
+            context_data["is_first"] = index == 0
+            context_data["is_last"] = index == len(conversations) - 1
+
             user_input, bot_output = self.chat_template.render_conversation(
                 conversation, index=index, context_data=context_data
             )

diff --git a/tests/transformers/chatglm_v2/test_tokenizer.py b/tests/transformers/chatglm_v2/test_tokenizer.py
@@ -0,0 +1,57 @@
+# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+# Copyright 2020 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+
+from paddlenlp.transformers.chatglm_v2.tokenizer import ChatGLMv2Tokenizer
+from paddlenlp.transformers.tokenizer_utils import PretrainedTokenizer
+
+VOCAB_FILES_NAMES = {
+    "vocab_file": "vocab.json",
+}
+
+
+class ChatGLMv2TokenizationTest(unittest.TestCase):
+
+    tokenizer_class = ChatGLMv2Tokenizer
+    test_decode_token = True
+
+    def get_tokenizer(self, **kwargs) -> PretrainedTokenizer:
+        tokenizer = ChatGLMv2Tokenizer.from_pretrained("THUDM/chatglm2-6b", **kwargs)
+        return tokenizer
+
+    def test_encode_special_tokens(self):
+        tokenizer = self.get_tokenizer()
+
+        query = "[gMASK]</s>"
+        tokens = tokenizer.tokenize(query)
+        self.assertEqual(len(tokens), 2)
+
+        outputs = tokenizer.encode(query, add_special_tokens=False)
+        self.assertEqual(len(outputs["input_ids"]), 2)
+
+
+class ChatGLMv3TokenizationTest(unittest.TestCase):
+    tokenizer_class = ChatGLMv2Tokenizer
+
+    def get_tokenizer(self, **kwargs) -> PretrainedTokenizer:
+        return ChatGLMv2Tokenizer.from_pretrained("THUDM/chatglm3-6b", **kwargs)
+
+    def test_encode_special_tokens(self):
+        tokenizer = self.get_tokenizer()
+
+        query = "[gMASK]<|user|><|assistant|></s>"
+        tokens = tokenizer.tokenize(query)
+        self.assertEqual(len(tokens), 4)
diff --git a/tests/transformers/test_generation_utils.py b/tests/transformers/test_generation_utils.py
@@ -1069,7 +1069,17 @@ def test_get_unfinished_flag(self):
         self.assertEqual(unfinish_flag.reshape([2]).tolist(), [False, True])
 
         eos_token_id = [7, 11, 3]
-        unfinish_flag = paddle.to_tensor([[False], [False]], dtype="bool")
+        unfinish_flag = paddle.to_tensor([[True], [True]], dtype="bool")
+        unfinish_flag = get_unfinished_flag(input_ids, unfinish_flag, eos_token_id)
+        self.assertEqual(unfinish_flag.reshape([2]).tolist(), [False, False])
+
+        eos_token_id = [[7], [11], [3]]
+        unfinish_flag = paddle.to_tensor([[True], [True]], dtype="bool")
+        unfinish_flag = get_unfinished_flag(input_ids, unfinish_flag, eos_token_id)
+        self.assertEqual(unfinish_flag.reshape([2]).tolist(), [False, False])
+
+        eos_token_id = [7, [11], [3]]
+        unfinish_flag = paddle.to_tensor([[True], [True]], dtype="bool")
         unfinish_flag = get_unfinished_flag(input_ids, unfinish_flag, eos_token_id)
         self.assertEqual(unfinish_flag.reshape([2]).tolist(), [False, False])