Skip to content

Commit

Permalink
add bce example (#930)
Browse files Browse the repository at this point in the history
  • Loading branch information
lvyufeng authored Mar 16, 2024
1 parent bacc2ad commit 9e7a477
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 0 deletions.
21 changes: 21 additions & 0 deletions llm/inference/bce/run_bce-embedding.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
from mindspore import ops
from mindnlp.transformers import AutoModel, AutoTokenizer, AutoModelForSequenceClassification

hf_token = 'your_hf_token'

# list of sentences
sentences = ['sentence_0', 'sentence_1']

# init model and tokenizer
tokenizer = AutoTokenizer.from_pretrained('maidalun1020/bce-embedding-base_v1', token=hf_token)
model = AutoModel.from_pretrained('maidalun1020/bce-embedding-base_v1', token=hf_token)

# get inputs
inputs = tokenizer(sentences, padding=True, truncation=True, max_length=512, return_tensors="ms")
inputs = {k: v for k, v in inputs.items()}

# get embeddings
outputs = model(**inputs, return_dict=True)
embeddings = outputs.last_hidden_state[:, 0] # cls pooler
embeddings = embeddings / embeddings.norm(dim=1, keepdim=True) # normalize
print(embeddings)
24 changes: 24 additions & 0 deletions llm/inference/bce/run_bce-reranker.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
from mindspore import ops
from mindnlp.transformers import AutoModel, AutoTokenizer, AutoModelForSequenceClassification

hf_token = 'your_hf_token'

# init model and tokenizer
tokenizer = AutoTokenizer.from_pretrained('maidalun1020/bce-reranker-base_v1', token=hf_token)
model = AutoModelForSequenceClassification.from_pretrained('maidalun1020/bce-reranker-base_v1', token=hf_token)

# your query and corresponding passages
query = "上海天气"
passages = ["北京美食", "上海气候"]

# construct sentence pairs
sentence_pairs = [[query, passage] for passage in passages]

# get inputs
inputs = tokenizer(sentence_pairs, padding=True, truncation=True, max_length=512, return_tensors="ms")
inputs_on_device = {k: v for k, v in inputs.items()}

# calculate scores
scores = model(**inputs_on_device, return_dict=True).logits.view(-1,).float()
scores = ops.sigmoid(scores)
print(scores)

0 comments on commit 9e7a477

Please sign in to comment.