Skip to content

Commit

Permalink
Latest model (only compatible with 2022-02-15 and later models)
Browse files Browse the repository at this point in the history
  • Loading branch information
www committed Feb 15, 2022
1 parent 119b423 commit 7e145b1
Show file tree
Hide file tree
Showing 4 changed files with 22 additions and 19 deletions.
File renamed without changes.
17 changes: 7 additions & 10 deletions run.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,13 @@

RUN_DEVICE = 'gpu' # gpu 或 dml 或 cpu

MODEL_NAME = 'model/wangwen-2022-01-09' # 模型名
WORD_NAME = 'model/wangwen-2022-01-09' # 这个也修改
MODEL_NAME = 'model/wangwen-2022-02-15' # 模型名
WORD_NAME = 'model/wangwen-2022-02-15' # 这个也修改

NUM_OF_RUNS = 9999 # 写多少遍
LENGTH_OF_EACH = 200 # 每次写多少字
NUM_OF_RUNS = 999 # 写多少遍
LENGTH_OF_EACH = 512 # 每次写多少字

top_p = 0.8 # 这个的范围是 0 到 1。越大,变化越多。越小,生成效果越规矩。自己试试 0 和 0.5 和 1.0 的效果就知道了
top_p = 0.75 # 这个的范围是 0 到 1。越大,变化越多。越小,生成效果越规矩。自己试试 0 和 0.5 和 1.0 的效果就知道了
top_p_newline = 0.9

# 开头非常重要。开头需创造剧情点。开头文笔越好,续写就越好。开头乱写,续写也乱写。
Expand Down Expand Up @@ -93,7 +93,6 @@
time_w = m2[prefix + 'time_w']
time_alpha = m2[prefix + 'time_alpha']
time_beta = m2[prefix + 'time_beta']
mask = m2[prefix + 'mask']

TT = ctx_len
T = ctx_len
Expand All @@ -102,13 +101,11 @@
w = w[:, :-TT].reshape(-1, TT, 2 * TT - 1)
w = w[:, :, TT-1:]
w = w[:, :T, :T] * time_alpha[:, :, :T] * time_beta[:, :T, :]
w = w.masked_fill(mask[:T, :T] == 0, 0)

m2[prefix + 'time_ww'] = w
del m2[prefix + 'time_w']
del m2[prefix + 'time_alpha']
del m2[prefix + 'time_beta']
del m2[prefix + 'mask']
if RUN_DEVICE == 'gpu':
model = model.cuda()
model.load_state_dict(m2)
Expand Down Expand Up @@ -143,7 +140,7 @@
xxx = torch.tensor(x[-ctx_len:], dtype=torch.long)[None,...]
if RUN_DEVICE == 'gpu':
xxx = xxx.cuda()
out, _ = model(xxx)
out, _ = model(xxx)
out[:, :, UNKNOWN_CHAR] = -float('Inf')
pos = -1 if real_len >= ctx_len else real_len - 1

Expand All @@ -155,7 +152,7 @@
x = np.append(x, char)
real_len += 1

if i % 10 == 9 or i == LENGTH_OF_EACH-1 or i < 10 or RUN_DEVICE != 'gpu':
if i % 2 == 1 or i == LENGTH_OF_EACH-1 or i < 10 or RUN_DEVICE != 'gpu':
completion = ''.join([train_dataset.itos[int(i)] for i in x[print_begin:real_len]])
print(completion.replace('\n', '\n '), end = '', flush=True)
print_begin = real_len
Expand Down
13 changes: 5 additions & 8 deletions server.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,10 @@

RUN_DEVICE = 'gpu' # gpu 或 dml 或 cpu

MODEL_NAME = 'model/wangwen-2022-01-09' # 模型名,例如 yanqing-2021-10-29 xuanhuan-2021-10-26
WORD_NAME = 'model/wangwen-2022-01-09' # 这个也修改
MODEL_NAME = 'model/wangwen-2022-02-15' # 模型名
WORD_NAME = 'model/wangwen-2022-02-15' # 这个也修改

top_p = 0.8 # 这个的范围是 0 到 1。越大,变化越多。越小,生成效果越规矩。自己试试 0 和 0.5 和 1.0 的效果就知道了
top_p = 0.75 # 这个的范围是 0 到 1。越大,变化越多。越小,生成效果越规矩。自己试试 0 和 0.5 和 1.0 的效果就知道了
top_p_newline = 0.9

LENGTH_OF_EACH = 20 # 每次写多少字
Expand Down Expand Up @@ -170,7 +170,7 @@ def NeuralWorker(queueZ, queueX):
# src.utils.set_seed(42) # 是否固定随机数(固定后每次运行的生成结果都一样)

print('\nAI人工智障写作 https://github.com/BlinkDL/AI-Writer')
print('请关注我的知乎 https://zhuanlan.zhihu.com/p/394766831')
print('请关注我的知乎 https://zhuanlan.zhihu.com/p/423646620')
print('\n声明:模型的训练数据全部来自网文,缺乏生活常识。生成的文字仅供娱乐。请遵守法律法规。')

print(f'\nLoading model for {RUN_DEVICE}...', end=' ')
Expand Down Expand Up @@ -201,7 +201,6 @@ def train_dataset(): return None
time_w = m2[prefix + 'time_w']
time_alpha = m2[prefix + 'time_alpha']
time_beta = m2[prefix + 'time_beta']
mask = m2[prefix + 'mask']

TT = ctx_len
T = ctx_len
Expand All @@ -210,13 +209,11 @@ def train_dataset(): return None
w = w[:, :-TT].reshape(-1, TT, 2 * TT - 1)
w = w[:, :, TT-1:]
w = w[:, :T, :T] * time_alpha[:, :, :T] * time_beta[:, :T, :]
w = w.masked_fill(mask[:T, :T] == 0, 0)

m2[prefix + 'time_ww'] = w
del m2[prefix + 'time_w']
del m2[prefix + 'time_alpha']
del m2[prefix + 'time_beta']
del m2[prefix + 'mask']
if RUN_DEVICE == 'gpu':
model = model.cuda()
model.load_state_dict(m2)
Expand Down Expand Up @@ -265,7 +262,7 @@ def train_dataset(): return None
xxx = torch.tensor(x[-ctx_len:], dtype=torch.long)[None,...]
if RUN_DEVICE == 'gpu':
xxx = xxx.cuda()
out, _ = model(xxx)
out, _ = model(xxx)
out[:, :, UNKNOWN_CHAR] = -float('Inf')

pos = -1 if real_len >= ctx_len else real_len - 1
Expand Down
11 changes: 10 additions & 1 deletion src/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,10 @@ def __init__(self, config):
self.time_out = nn.Parameter(torch.ones(1, config.ctx_len, 1))
self.head = nn.Linear(config.n_embd, config.vocab_size, bias=False)

self.head_q = nn.Linear(config.n_embd, 256)
self.head_k = nn.Linear(config.n_embd, 256)
self.register_buffer("copy_mask", torch.tril(torch.ones(config.ctx_len, config.ctx_len)))

self.ctx_len = config.ctx_len

logger.info("number of parameters: %e", sum(p.numel()
Expand All @@ -148,8 +152,13 @@ def forward(self, idx, targets=None):
x = self.blocks(x)

x = self.ln_f(x)
q = self.head_q(x)[:,:T,:]
k = self.head_k(x)[:,:T,:]
c = (q @ k.transpose(-2, -1)) * (1.0 / 256)
c = c.masked_fill(self.copy_mask[:T,:T] == 0, 0)
c = c @ F.one_hot(idx, num_classes = self.config.vocab_size).float()
x = x * self.time_out[:, :T, :]
x = self.head(x)
x = self.head(x) + c

loss = None
if targets is not None:
Expand Down

0 comments on commit 7e145b1

Please sign in to comment.