diff --git a/examples/data_preprocess/arth.py b/examples/data_preprocess/arth.py index e22b03b..b0bfffe 100644 --- a/examples/data_preprocess/arth.py +++ b/examples/data_preprocess/arth.py @@ -38,11 +38,12 @@ def gen_dataset( seed(1) # Generate N pairs of numbers and their results for different operations equations = [] - operations = ['*', '+', '-', '*', '*'] + # operations = ['*', '+', '-', '*', '*'] + operations = ['*'] for _ in tqdm(range(N)): # Helper function to generate a number with 50% chance of being N-digit or N/2-digit def get_random_num(): - r = randint(0,3) + r = randint(1,3) if r == 0: # 2 digits less than original max_num = 10**(DIGIT-2) @@ -79,7 +80,7 @@ def make_prefix(dp): num1 = dp['num1'] num2 = dp['num2'] op = dp['operation'] - prefix = f"""A conversation between User and Assistant. The user asks a question, and the Assistant solves it. The assistant first thinks about the reasoning process in the mind and then provides the user with the answer. The reasoning process and answer are enclosed within and tags, respectively, i.e., reasoning process here RESULT_NUMBER . \nUser: Give me the answer of the following equation: {num1} {op} {num2}.\nAssistant: Ok let me think about it.\n""" + prefix = f"""A conversation between User and Assistant. The user asks a question, and the Assistant solves it. The assistant first thinks about the reasoning process in the mind and then provides the user with the answer. The reasoning process and answer are enclosed within and tags, respectively, i.e., reasoning process here RESULT_NUMBER . \nUser: Give me the answer of the following equation: {num1} {op} {num2}.\nAssistant: Let me solve this step by step.\n""" return prefix if __name__ == '__main__': diff --git a/examples/data_preprocess/multiply.py b/examples/data_preprocess/multiply.py index 02a1684..2c6e349 100644 --- a/examples/data_preprocess/multiply.py +++ b/examples/data_preprocess/multiply.py @@ -37,7 +37,7 @@ def gen_dataset( for _ in tqdm(range(N)): # Helper function to generate a number with 50% chance of being N-digit or N/2-digit def get_random_num(): - r = randint(0,3) + r = randint(,3) if r == 0: # 2 digits less than original max_num = 10**(DIGIT-2)