Parallel sampling with threadpool #7479
causal_lm_cpp.yml
on: pull_request
Matrix: cpp-beam_search_causal_lm-ubuntu
cpp-multinomial-greedy_causal_lm-ubuntu
18m 48s
cpp-greedy_causal_lm-windows
40m 51s
cpp-greedy_causal_lm-Qwen-7B-Chat
11m 10s
cpp-beam_search_causal_lm-Qwen1_5-7B-Chat
33m 11s
cpp-beam_search_causal_lm-Phi-2
16m 30s
cpp-beam_search_causal_lm-notus-7b-v1
31m 42s
cpp-speculative_decoding_lm-ubuntu
13m 17s
cpp-prompt_lookup_decoding_lm-ubuntu
12m 35s
cpp-Phi-1_5
8m 59s
cpp-greedy_causal_lm-redpajama-3b-chat
10m 56s
cpp-chat_sample-ubuntu
16m 56s
visual_language_chat_sample-ubuntu-minicpm_v2_6
7m 59s
visual_language_chat_sample-ubuntu-llava_1_5
/
visual_language_chat_sample-ubuntu-llava
14m 33s
visual_language_chat_sample-ubuntu-llava_next
/
visual_language_chat_sample-ubuntu-llava
35m 48s
visual_language_chat_sample-ubuntu-internvl2
24m 28s
cpp-continuous-batching-ubuntu
15m 50s
cpp-continuous-batching-windows
26m 24s
cpp-continuous-batching-macos
22m 9s
visual_language_chat_sample-ubuntu-qwen2vl
12m 54s
ci/gha_overall_status_causal_lm
0s