-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathconstraint.py
125 lines (106 loc) · 3.59 KB
/
constraint.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
from typing import Dict, List
class Trie(object):
def __init__(self, sequences: List[List[int]] = []):
self.trie_dict = {}
self.len = 0
if sequences:
for sequence in sequences:
Trie._add_to_trie(sequence, self.trie_dict)
self.len += 1
self.append_trie = None
self.bos_token_id = None
def append(self, trie, bos_token_id):
self.append_trie = trie
self.bos_token_id = bos_token_id
def add(self, sequence: List[int]):
Trie._add_to_trie(sequence, self.trie_dict)
self.len += 1
def get(self, prefix_sequence: List[int]):
return Trie._get_from_trie(
prefix_sequence, self.trie_dict, self.append_trie, self.bos_token_id
)
@staticmethod
def load_from_dict(trie_dict):
trie = Trie()
trie.trie_dict = trie_dict
trie.len = sum(1 for _ in trie)
return trie
@staticmethod
def _add_to_trie(sequence: List[int], trie_dict: Dict):
if sequence:
if sequence[0] not in trie_dict:
trie_dict[sequence[0]] = {}
Trie._add_to_trie(sequence[1:], trie_dict[sequence[0]])
@staticmethod
def _get_from_trie(
prefix_sequence: List[int],
trie_dict: Dict,
append_trie=None,
bos_token_id: int = None,
):
if len(prefix_sequence) == 0:
output = list(trie_dict.keys())
if append_trie and bos_token_id in output:
output.remove(bos_token_id)
output += list(append_trie.trie_dict.keys())
return output
elif prefix_sequence[0] in trie_dict:
return Trie._get_from_trie(
prefix_sequence[1:],
trie_dict[prefix_sequence[0]],
append_trie,
bos_token_id,
)
else:
if append_trie:
return append_trie.get(prefix_sequence)
else:
return []
def __iter__(self):
def _traverse(prefix_sequence, trie_dict):
if trie_dict:
for next_token in trie_dict:
yield from _traverse(
prefix_sequence + [next_token], trie_dict[next_token]
)
else:
yield prefix_sequence
return _traverse([], self.trie_dict)
def __len__(self):
return self.len
def __getitem__(self, value):
return self.get(value)
class PartialTrie(Trie):
def __init__(self, dic_len):
super().__init__()
self.none_list = list(range(dic_len))
def get(self, prefix_sequence: List[int]):
return PartialTrie._get_from_partial_trie(
prefix_sequence, self.none_list, self.trie_dict, self.append_trie, self.bos_token_id
)
@staticmethod
def _get_from_partial_trie(
prefix_sequence: List[int],
none_list: List[int],
trie_dict: Dict,
append_trie=None,
bos_token_id: int = None,
):
for i in range(len(prefix_sequence)):
temp_prefix_sequence = prefix_sequence[i:]
temp_return = Trie._get_from_trie(
temp_prefix_sequence, trie_dict, append_trie, bos_token_id
)
if len(temp_return) > 0:
return temp_return
return []
def build_trie(constraint_list):
trie = Trie()
for c in constraint_list:
trie.add(c)
return trie
def build_partial_trie(constraint_list, dic_len):
trie = PartialTrie(dic_len)
for c in constraint_list:
trie.add(c)
return trie