diff --git a/docs/articles_en/about-openvino/performance-benchmarks/generative-ai-performance.rst b/docs/articles_en/about-openvino/performance-benchmarks/generative-ai-performance.rst index b8256af650e2f8..fb028d4fbc06a7 100644 --- a/docs/articles_en/about-openvino/performance-benchmarks/generative-ai-performance.rst +++ b/docs/articles_en/about-openvino/performance-benchmarks/generative-ai-performance.rst @@ -5,9 +5,7 @@ This page is regularly updated to help you identify the best-performing LLMs on Intel® Core™ Ultra processor family and AI PCs. The current data is as of OpenVINO 2024.4, 24 Oct. 2024 -The tables below list the key performance indicators for a selection of Large Language Models, -running on an Intel® Core™ Ultra 7-165H, Intel® Core™ Ultra 7-265V, and Intel® Core™ Ultra -7-288V based system, on built-in GPUs. +The tables below list the key performance indicators for inference on built-in GPUs. @@ -16,14 +14,32 @@ running on an Intel® Core™ Ultra 7-165H, Intel® Core™ Ultra 7-265V, and In +.. tab-set:: -.. csv-table:: - :class: modeldata stripe - :name: supportedModelsTableOv - :header-rows: 1 - :file: ../../_static/benchmarks_files/llm_models.csv + .. tab-item:: 9-288V + + .. csv-table:: + :class: modeldata stripe + :name: supportedModelsTableOv + :header-rows: 1 + :file: ../../_static/benchmarks_files/llm_models_9-288V.csv + + .. tab-item:: 7-268V + + .. csv-table:: + :class: modeldata stripe + :name: supportedModelsTableOv + :header-rows: 1 + :file: ../../_static/benchmarks_files/llm_models_7-268V.csv + + .. tab-item:: 7-155H + + .. csv-table:: + :class: modeldata stripe + :name: supportedModelsTableOv + :header-rows: 1 + :file: ../../_static/benchmarks_files/llm_models_7-155H.csv -| .. grid:: 1 1 2 2 :gutter: 4 diff --git a/docs/sphinx_setup/_static/benchmarks_files/llm_models_7-155H.csv b/docs/sphinx_setup/_static/benchmarks_files/llm_models_7-155H.csv new file mode 100644 index 00000000000000..d2c68a3619620e --- /dev/null +++ b/docs/sphinx_setup/_static/benchmarks_files/llm_models_7-155H.csv @@ -0,0 +1,156 @@ +Topology,Precision,Input Size,max rss memory,1st latency (ms),2nd latency (ms),2nd tok/sec,,, +opt-125m-gptq,INT4-MIXED,32,965.9,29,7.7,129.87,,, +opt-125m-gptq,INT4-MIXED,1024,1507.9,113.1,7.8,128.21,,, +tiny-llama-1.1b-chat,INT4-MIXED,32,1831.8,46.5,16.7,59.88,,, +tiny-llama-1.1b-chat,INT4-MIXED,1024,1806.3,635,17.8,56.18,,, +qwen2-0.5b,INT4-MIXED,32,2551.7,61.4,18.3,54.64,,, +qwen2-0.5b,INT4-MIXED,1024,2976.6,356.1,19.2,52.08,,, +tiny-llama-1.1b-chat,INT8-CW,32,1987.4,56,21.6,46.30,,, +tiny-llama-1.1b-chat,INT8-CW,1024,2209.1,772.7,22.6,44.25,,, +qwen2-0.5b,INT8-CW,32,2484.9,57.3,22.8,43.86,,, +qwen2-0.5b,INT8-CW,1024,3102.5,407.1,23.9,41.84,,, +qwen2-1.5b,INT4-MIXED,32,4265.2,71.7,25.5,39.22,,, +qwen2-1.5b,INT4-MIXED,1024,4884.5,862.4,26.8,37.31,,, +dolly-v2-3b,INT4-MIXED,32,2401.3,89.6,27.5,36.36,,, +red-pajama-incite-chat-3b-v1,INT4-MIXED,32,2511.5,78.6,28.2,35.46,,, +phi-2,INT4-MIXED,32,2279.5,95.7,29.1,34.36,,, +minicpm-1b-sft,INT4-MIXED,31,2759.9,104.4,30.9,32.36,,, +phi-2,INT4-MIXED,32,2620.1,100.8,31,32.26,,, +stable-zephyr-3b-dpo,INT4-MIXED,30,2636.5,86.8,31.7,31.55,,, +dolly-v2-3b,INT4-MIXED,1024,3137.1,1782.9,32.2,31.06,,, +red-pajama-incite-chat-3b-v1,INT4-MIXED,1020,3118.5,1831.7,33.3,30.03,,, +red-pajama-incite-chat-3b-v1,INT4-MIXED,1024,2862.7,1821.1,33.5,29.85,,, +qwen2-1.5b,INT8-CW,32,4831.2,87,33.8,29.59,,, +opt-2.7b,INT4-MIXED,31,2898.3,73.2,33.9,29.50,,, +phi-2,INT4-MIXED,1024,2797.4,1887,34,29.41,,, +orca-mini-3b,INT4-MIXED,32,2877.8,100.3,35,28.57,,, +stablelm-3b-4e1t,INT4-MIXED,32,2669.4,94.7,35.3,28.33,,, +qwen2-1.5b,INT8-CW,1024,5455.8,1047.6,35.3,28.33,,, +minicpm-1b-sft,INT8-CW,31,3104.1,103.5,35.3,28.33,,, +phi-2,INT4-MIXED,1024,3039.8,1917.4,35.9,27.86,,, +stable-zephyr-3b-dpo,INT4-MIXED,946,3411.4,1695,37,27.03,,, +gemma-2b-it,INT4-MIXED,32,3991.7,116.1,37.9,26.39,,, +opt-2.7b,INT4-MIXED,937,3617.5,1764.9,38.2,26.18,,, +phi-3-mini-4k-instruct,INT4-MIXED,31,2935.3,111.6,38.2,26.18,,, +phi-3-mini-4k-instruct,INT4-MIXED,38,3102.4,134,38.4,26.04,,, +phi-3-mini-4k-instruct,INT4-MIXED,31,2986.1,114.1,38.9,25.71,,, +phi-3-mini-4k-instruct,INT4-MIXED,38,2977.4,131.1,39,25.64,,, +gemma-2b-it,INT4-MIXED,1024,4973.3,1249.2,39.7,25.19,,, +stablelm-3b-4e1t,INT4-MIXED,1024,3196.9,2045.4,39.9,25.06,,, +dolly-v2-3b,INT8-CW,32,3490.2,107.4,41.5,24.10,,, +red-pajama-incite-chat-3b-v1,INT8-CW,32,3457.9,105,42.5,23.53,,, +opt-2.7b,INT8-CW,31,3686.8,107.5,44.1,22.68,,, +phi-2,INT8-CW,32,3554.9,116.6,44.1,22.68,,, +phi-3-mini-4k-instruct,INT4-MIXED,1023,3390.7,2277.1,44.2,22.62,,, +phi-3-mini-4k-instruct,INT4-MIXED,1061,3643.6,2485,44.4,22.52,,, +phi-3-mini-4k-instruct,INT4-MIXED,1023,3516.4,2280.9,44.5,22.47,,, +phi-3-mini-4k-instruct,INT4-MIXED,1061,3537.2,2522.4,44.7,22.37,,, +orca-mini-3b,INT4-MIXED,1024,3557.3,1898.9,45,22.22,,, +minicpm-1b-sft,FP16,31,3814.4,97.9,45.4,22.03,,, +stablelm-3b-4e1t,INT8-CW,32,3486.9,100.5,46.1,21.69,,, +stable-zephyr-3b-dpo,INT8-CW,30,3516.7,101.9,46.1,21.69,,, +dolly-v2-3b,INT8-CW,1024,4265.9,2178.6,46.2,21.65,,, +red-pajama-incite-chat-3b-v1,INT8-CW,1020,3979.1,2219.7,47.2,21.19,,, +red-pajama-incite-chat-3b-v1,INT8-CW,1024,3975.5,2199.7,47.3,21.14,,, +opt-2.7b,INT8-CW,937,4358.6,1981.8,48.4,20.66,,, +phi-2,INT8-CW,1024,4058.1,2280.1,48.9,20.45,,, +gemma-2b-it,INT8-CW,32,4786.8,119.8,49.4,20.24,,, +chatglm3-6b,INT4-MIXED,32,4141.5,166.6,49.7,20.12,,, +stablelm-3b-4e1t,INT8-CW,1024,4054.8,2243.5,50.7,19.72,,, +stable-zephyr-3b-dpo,INT8-CW,946,4521.8,1816.4,51.3,19.49,,, +gemma-2b-it,INT8-CW,1024,5810.7,1580,51.3,19.49,,, +chatglm3-6b,INT4-MIXED,32,4651.4,164.7,51.6,19.38,,, +chatglm3-6b,INT4-MIXED,1024,4235.1,2818.7,52.3,19.12,,, +orca-mini-3b,INT8-CW,32,4162,109.2,53.3,18.76,,, +chatglm3-6b,INT4-MIXED,1024,4783.8,2869,54.4,18.38,,, +gpt-j-6b,INT4-MIXED,32,4667.3,176.7,56.3,17.76,,, +chatglm3-6b-gptq,INT4-MIXED,32,5369.4,173.9,58.9,16.98,,, +llama-2-7b-chat-hf,INT4-MIXED,32,4280,173.2,60.1,16.64,,, +phi-3-mini-4k-instruct,INT8-CW,31,4585.1,123,60.5,16.53,,, +phi-3-mini-4k-instruct,INT8-CW,38,4597,152,60.5,16.53,,, +chatglm2-6b,INT4-MIXED,32,4847.8,158.7,60.6,16.50,,, +vicuna-7b-v1.5,INT4-MIXED,32,4476.9,178.2,61.2,16.34,,, +chatglm3-6b-gptq,INT4-MIXED,1024,5217.6,2863.7,61.3,16.31,,, +mistral-7b-v0.1,INT4-MIXED,31,4413.6,194,61.7,16.21,,, +qwen2-7b,INT4-MIXED,32,7044.7,184.4,61.7,16.21,,, +mistral-7b-v0.1,INT4-MIXED,32,4427.6,193.3,61.8,16.18,,, +orca-mini-3b,INT8-CW,1024,4821.6,2239.1,62,16.13,,, +codegen25-7b,INT4-MIXED,32,4687.2,176.2,62.7,15.95,,, +chatglm2-6b,INT4-MIXED,1024,5165.9,3148,63,15.87,,, +llama-2-7b-gptq,INT4-MIXED,32,4632.8,175.2,63.4,15.77,,, +stablelm-7b,INT4-MIXED,32,5219.5,206.3,63.4,15.77,,, +qwen-7b-chat,INT4-MIXED,32,7805.6,193.8,63.6,15.72,,, +gpt-j-6b,INT4-MIXED,1024,5314.9,3111.8,63.6,15.72,,, +qwen2-7b,INT4-MIXED,1024,7716.2,3548.3,64.1,15.60,,, +llama-3-8b,INT4-MIXED,32,4910.9,204.8,64.7,15.46,,, +mistral-7b-v0.1,INT4-MIXED,1024,4720.8,3667.1,64.8,15.43,,, +mistral-7b-v0.1,INT4-MIXED,1007,4704.7,3685.4,64.9,15.41,,, +llama-3.1-8b,INT4-MIXED,31,4850.3,211.5,64.9,15.41,,, +phi-3-mini-4k-instruct,INT8-CW,1023,5128.6,2815.2,65.7,15.22,,, +phi-3-mini-4k-instruct,INT8-CW,1061,5155,3407.9,65.9,15.17,,, +mistral-7b-v0.1,INT4-MIXED,32,4939.3,192,66.5,15.04,,, +llama-3-8b,INT4-MIXED,33,4919.4,261.9,67.2,14.88,,, +llama-2-7b-chat-hf,INT4-MIXED,1024,4948.2,3811,67.3,14.86,,, +qwen1.5-7b-chat,INT4-MIXED,32,5943.1,180.5,67.7,14.77,,, +qwen-7b-chat-gptq,INT4-MIXED,32,8057,187,68.1,14.68,,, +llama-3-8b,INT4-MIXED,32,5503.5,198.4,68.1,14.68,,, +qwen-7b-chat,INT4-MIXED,32,8091.6,185.9,68.1,14.68,,, +llama-3-8b,INT4-MIXED,1024,5569.1,3920.5,68.2,14.66,,, +llama-3.1-8b,INT4-MIXED,31,5358.6,201,68.2,14.66,,, +stablelm-7b,INT4-MIXED,1020,5804.4,3726.6,68.8,14.53,,, +llama-3.1-8b,INT4-MIXED,31,5452.6,202.9,68.8,14.53,,, +llama-2-7b-chat-hf,INT4-MIXED,32,5023,165.7,69,14.49,,, +llama-3-8b,INT4-MIXED,32,5413.6,202,69.1,14.47,,, +llama-3-8b,INT4-MIXED,33,5440.4,262.1,69.2,14.45,,, +codegen25-7b,INT4-MIXED,1024,5434.6,3513.2,69.9,14.31,,, +mistral-7b-v0.1,INT4-MIXED,1024,5614.9,3819.1,70,14.29,,, +mistral-7b-v0.1,INT4-MIXED,31,4927.8,205,70.5,14.18,,, +llama-3-8b,INT4-MIXED,33,5498.9,270.7,70.6,14.16,,, +llama-3-8b,INT4-MIXED,1025,5577.4,4271.2,70.6,14.16,,, +llama-2-7b-gptq,INT4-MIXED,1024,5302.2,3529.4,70.7,14.14,,, +zephyr-7b-beta,INT4-MIXED,32,5212.4,190.6,71.2,14.04,,, +llama-3-8b,INT4-MIXED,1024,6161.1,3918,71.5,13.99,,, +llama-3-8b,INT4-MIXED,1025,6098,4441.8,72.3,13.83,,, +llama-3-8b,INT4-MIXED,1024,6071.7,3972.2,72.4,13.81,,, +mistral-7b-v0.1,INT4-MIXED,1007,5224.1,4153.4,73.8,13.55,,, +llama-3-8b,INT4-MIXED,1025,6156.9,4357,73.9,13.53,,, +zephyr-7b-beta,INT4-MIXED,1024,5511.6,3978,74.4,13.44,,, +opt-2.7b,FP16,31,9220.3,107.8,74.7,13.39,,, +dolly-v2-3b,FP16,32,6058.9,109.9,74.7,13.39,,, +qwen1.5-7b-chat,INT4-MIXED,1024,7063.2,3791.7,75,13.33,,, +qwen-7b-chat,INT4-MIXED,1024,8919.5,3763.9,75,13.33,,, +red-pajama-incite-chat-3b-v1,FP16,32,6036.5,107.5,75.9,13.18,,, +llama-2-7b-chat-hf,INT4-MIXED,1024,5716.8,4231.7,76.2,13.12,,, +phi-2,FP16,32,6090.1,115.2,77.1,12.97,,, +stable-zephyr-3b-dpo,FP16,30,6113.1,112.1,78.6,12.72,,, +qwen-7b-chat,INT4-MIXED,1024,9212.9,3857.4,78.6,12.72,,, +stablelm-3b-4e1t,FP16,32,6065.4,110.2,78.7,12.71,,, +opt-2.7b,FP16,937,9733.8,3750.8,78.8,12.69,,, +dolly-v2-3b,FP16,1024,6615.2,2230.9,79.1,12.64,,, +red-pajama-incite-chat-3b-v1,FP16,1020,6588.3,2259.4,80.2,12.47,,, +glm-4-9b,INT4-MIXED,33,6386.2,328,80.4,12.44,,, +red-pajama-incite-chat-3b-v1,FP16,1024,6570.3,2268.7,80.4,12.44,,, +baichuan2-7b-chat,INT4-MIXED,32,5977.9,201.7,81,12.35,,, +glm-4-9b,INT4-MIXED,32,6389.7,248.1,81,12.35,,, +phi-2,FP16,1024,6646.2,2406.7,81.4,12.29,,, +stable-zephyr-3b-dpo,FP16,946,6875.7,1868.2,82.9,12.06,,, +stablelm-3b-4e1t,FP16,1024,6636.1,2036.9,83,12.05,,, +chatglm2-6b,INT8-CW,32,6731.8,159.2,84.4,11.85,,, +glm-4-9b,INT4-MIXED,1025,7061.4,4939.2,85.2,11.74,,, +qwen-7b-chat-gptq,INT4-MIXED,1024,9175.3,3898,85.3,11.72,,, +gemma-7b-it,INT4-MIXED,32,7883.9,230.5,86,11.63,,, +gemma-7b-it,INT4-MIXED,32,8002.6,235,86.1,11.61,,, +glm-4-9b,INT4-MIXED,1024,7064.9,4411.2,86.2,11.60,,, +gpt-j-6b,INT8-CW,32,7009.2,176.8,86.4,11.57,,, +chatglm2-6b,INT8-CW,1024,7050.5,3871.6,86.8,11.52,,, +chatglm3-6b,INT8-CW,32,6755.9,159,86.8,11.52,,, +baichuan2-7b-chat,INT4-MIXED,1024,7033.3,4049,88.8,11.26,,, +chatglm3-6b,INT8-CW,1024,7076.5,3865.9,89.2,11.21,,, +qwen-7b-chat,INT4-MIXED,32,9245.7,176.3,90,11.11,,, +gemma-7b-it,INT4-MIXED,1024,9449.4,4305.8,93.2,10.73,,, +gpt-j-6b,INT8-CW,1024,7672.3,4181.1,93.5,10.70,,, +gemma-7b-it,INT4-MIXED,1024,9330.5,4222.5,93.7,10.67,,, +orca-mini-3b,FP16,32,7416.5,122.3,94.7,10.56,,, +codegen25-7b,INT8-CW,32,7557.6,170.7,98.4,10.16,,, +qwen-7b-chat,INT4-MIXED,1024,10371.1,4271.7,98.9,10.11,,, +llama-2-7b-chat-hf,INT8-CW,32,7390.6,171.6,99.9,10.01,,, diff --git a/docs/sphinx_setup/_static/benchmarks_files/llm_models_7-258V.csv b/docs/sphinx_setup/_static/benchmarks_files/llm_models_7-258V.csv new file mode 100644 index 00000000000000..efbf0cee8e4a80 --- /dev/null +++ b/docs/sphinx_setup/_static/benchmarks_files/llm_models_7-258V.csv @@ -0,0 +1,182 @@ +Topology,Precision,Input Size,max rss memory,1st latency (ms),2nd latency (ms),2nd tok/sec,,, +opt-125m-gptq,INT4-MIXED,1024,1513.6,81.9,7.8,128.21,,, +opt-125m-gptq,INT4-MIXED,32,979.9,50.4,7.9,126.58,,, +tiny-llama-1.1b-chat,INT4-MIXED,1024,1943.3,176.3,16.8,59.52,,, +tiny-llama-1.1b-chat,INT4-MIXED,32,1982.2,59.5,17.1,58.48,,, +qwen2-0.5b,INT4-MIXED,32,2678,117.3,18.7,53.48,,, +tiny-llama-1.1b-chat,INT8-CW,32,2080.9,59.4,19,52.63,,, +qwen2-0.5b,INT4-MIXED,1024,3036.1,165.5,19.2,52.08,,, +tiny-llama-1.1b-chat,INT8-CW,1024,2287,241.4,19.6,51.02,,, +qwen2-0.5b,INT8-CW,1024,3084.9,172.1,20,50.00,,, +qwen2-0.5b,INT8-CW,32,2518,105.5,21.4,46.73,,, +red-pajama-incite-chat-3b-v1,INT4-MIXED,32,2793.6,141.8,23.9,41.84,,, +qwen2-1.5b,INT4-MIXED,32,4515.4,118.7,24,41.67,,, +qwen2-1.5b,INT4-MIXED,1024,4930.1,229.6,24.3,41.15,,, +dolly-v2-3b,INT4-MIXED,32,2486.1,174,25.4,39.37,,, +phi-2,INT4-MIXED,32,2552.9,210.6,26.9,37.17,,, +red-pajama-incite-chat-3b-v1,INT4-MIXED,1020,2934.1,464.5,27.5,36.36,,, +qwen2-1.5b,INT8-CW,32,4813.4,119.1,27.8,35.97,,, +opt-2.7b,INT4-MIXED,31,3172.5,131.9,28.5,35.09,,, +red-pajama-incite-chat-3b-v1,INT4-MIXED,1024,3038.2,447.1,28.6,34.97,,, +dolly-v2-3b,INT4-MIXED,1024,2947.4,409,28.8,34.72,,, +qwen2-1.5b,INT8-CW,1024,5394.8,327.9,29.3,34.13,,, +stable-zephyr-3b-dpo,INT4-MIXED,30,2728.1,131.2,29.8,33.56,,, +phi-2,INT4-MIXED,32,2805.1,208.3,30.2,33.11,,, +minicpm-1b-sft,INT8-CW,31,3104.2,147.8,30.9,32.36,,, +phi-2,INT4-MIXED,1024,3058.9,602.9,31.1,32.15,,, +minicpm-1b-sft,INT4-MIXED,31,2970.1,183.7,31.1,32.15,,, +stablelm-3b-4e1t,INT4-MIXED,32,3077.1,183.2,31.6,31.65,,, +opt-2.7b,INT4-MIXED,937,3416.7,429.4,31.6,31.65,,, +stable-zephyr-3b-dpo,INT4-MIXED,946,3211.8,428.8,32.3,30.96,,, +phi-3-mini-4k-instruct,INT4-MIXED,31,3014.5,116,32.5,30.77,,, +phi-3-mini-4k-instruct,INT4-MIXED,38,2957.4,153.9,32.5,30.77,,, +phi-2,INT4-MIXED,1024,3278.9,613.3,33.4,29.94,,, +phi-3-mini-4k-instruct,INT4-MIXED,38,3288.5,152.9,33.4,29.94,,, +phi-3-mini-4k-instruct,INT4-MIXED,31,3265.1,123.6,34.1,29.33,,, +gemma-2b-it,INT4-MIXED,32,4162.1,208.8,34.2,29.24,,, +stablelm-3b-4e1t,INT4-MIXED,1024,3525.8,524.5,35,28.57,,, +phi-3-mini-4k-instruct,INT4-MIXED,1061,3427.8,777.5,36.5,27.40,,, +phi-3-mini-4k-instruct,INT4-MIXED,1023,3405.4,554.1,36.7,27.25,,, +gemma-2b-it,INT4-MIXED,1024,5053.1,354.8,36.9,27.10,,, +minicpm-1b-sft,FP16,31,3595.5,124.9,36.9,27.10,,, +phi-3-mini-4k-instruct,INT4-MIXED,1061,3547.2,755.8,37.1,26.95,,, +phi-3-mini-4k-instruct,INT4-MIXED,1023,3528.4,536.4,37.4,26.74,,, +red-pajama-incite-chat-3b-v1,INT8-CW,32,3747.7,189.9,38.1,26.25,,, +opt-2.7b,INT8-CW,31,3810.7,145.7,38.5,25.97,,, +chatglm3-6b,INT4-MIXED,32,4120.7,67.3,38.7,25.84,,, +dolly-v2-3b,INT8-CW,32,3747,188.4,39.2,25.51,,, +chatglm3-6b,INT4-MIXED,32,4482.9,69.9,40.7,24.57,,, +chatglm3-6b,INT4-MIXED,1024,4146,606.8,41,24.39,,, +opt-2.7b,INT8-CW,937,4458.9,587.8,41.8,23.92,,, +red-pajama-incite-chat-3b-v1,INT8-CW,1024,4088.4,634.1,41.9,23.87,,, +red-pajama-incite-chat-3b-v1,INT8-CW,1020,4086.8,653.4,42,23.81,,, +phi-2,INT8-CW,32,3794.6,202.7,42.1,23.75,,, +chatglm3-6b,INT4-MIXED,1024,4446.7,598.6,42.3,23.64,,, +stablelm-3b-4e1t,INT8-CW,32,3652.5,146,42.6,23.47,,, +stable-zephyr-3b-dpo,INT8-CW,30,3768.6,151.9,42.6,23.47,,, +dolly-v2-3b,INT8-CW,1024,4092,603.1,42.9,23.31,,, +stablelm-3b-4e1t,INT8-CW,1024,4143.2,671.7,45.2,22.12,,, +gemma-2b-it,INT8-CW,32,4878.4,221.6,45.6,21.93,,, +phi-2,INT8-CW,1024,4153.6,810.3,46,21.74,,, +llama-2-7b-chat-hf,INT4-MIXED,32,4394.6,109.7,46.2,21.65,,, +chatglm3-6b-gptq,INT4-MIXED,32,5218.9,79.7,46.7,21.41,,, +stable-zephyr-3b-dpo,INT8-CW,946,4360.1,627.8,46.8,21.37,,, +vicuna-7b-v1.5,INT4-MIXED,32,4482.3,101.2,47.2,21.19,,, +gemma-2b-it,INT8-CW,1024,5837.1,507.1,48,20.83,,, +llama-2-7b-gptq,INT4-MIXED,32,4734.3,102.8,48.1,20.79,,, +orca-mini-3b,INT4-MIXED,32,2720.1,132,48.1,20.79,,, +qwen-7b-chat,INT4-MIXED,32,7803.7,178.5,48.3,20.70,,, +mistral-7b-v0.1,INT4-MIXED,31,4537.5,99,48.5,20.62,,, +codegen25-7b,INT4-MIXED,32,4723.3,108.5,48.5,20.62,,, +chatglm3-6b-gptq,INT4-MIXED,1024,5150.8,614.2,48.8,20.49,,, +mistral-7b-v0.1,INT4-MIXED,32,4572,102.9,48.8,20.49,,, +llama-3-8b,INT4-MIXED,33,4991.2,252.2,50.9,19.65,,, +qwen-7b-chat-gptq,INT4-MIXED,32,8088.4,212.6,51,19.61,,, +chatglm2-6b,INT4-MIXED,32,4960.6,105.5,51.2,19.53,,, +gpt-j-6b,INT4-MIXED,32,4699.5,259.2,51.4,19.46,,, +llama-3.1-8b,INT4-MIXED,31,4897.8,106.9,51.5,19.42,,, +llama-3-8b,INT4-MIXED,32,4999.7,105.9,51.6,19.38,,, +qwen-7b-chat,INT4-MIXED,32,8085.9,193.5,51.7,19.34,,, +falcon-7b-instruct,INT4-MIXED,32,5416.2,175,52.5,19.05,,, +mistral-7b-v0.1,INT4-MIXED,1007,4772.6,803,52.6,19.01,,, +qwen1.5-7b-chat,INT4-MIXED,32,6027.3,174.9,53,18.87,,, +mistral-7b-v0.1,INT4-MIXED,1024,4775,717.6,53,18.87,,, +llama-2-7b-chat-hf,INT4-MIXED,1024,4976.5,992.1,53.1,18.83,,, +qwen2-7b,INT4-MIXED,32,7087.1,138.1,53.3,18.76,,, +llama-2-7b-gptq,INT4-MIXED,1024,5351.2,711.6,53.7,18.62,,, +llama-3-8b,INT4-MIXED,32,5472.8,109.4,53.7,18.62,,, +phi-3-mini-4k-instruct,INT8-CW,38,4575.3,115.9,53.7,18.62,,, +stablelm-7b,INT4-MIXED,32,5213.7,128.5,53.8,18.59,,, +phi-3-mini-4k-instruct,INT8-CW,31,4571.8,118.9,53.8,18.59,,, +llama-3-8b,INT4-MIXED,33,5480.4,246.8,53.9,18.55,,, +llama-3-8b,INT4-MIXED,32,5528.2,144.9,54.3,18.42,,, +llama-3.1-8b,INT4-MIXED,31,5377.3,112.8,54.3,18.42,,, +chatglm2-6b,INT4-MIXED,1024,5232.3,759.6,54.6,18.32,,, +llama-3.1-8b,INT4-MIXED,31,5440.4,126.4,54.8,18.25,,, +llama-3-8b,INT4-MIXED,33,5532.8,248.2,54.9,18.21,,, +codegen25-7b,INT4-MIXED,1024,5412.9,714.8,55,18.18,,, +mistral-7b-v0.1,INT4-MIXED,32,4998.5,117.3,55.2,18.12,,, +mistral-7b-v0.1,INT4-MIXED,31,5000.2,122.4,55.6,17.99,,, +llama-3-8b,INT4-MIXED,1024,5594,953.5,56.6,17.67,,, +gpt-j-6b,INT4-MIXED,1024,5323.8,1254,56.8,17.61,,, +llama-3-8b,INT4-MIXED,1025,5596.7,1192.3,56.8,17.61,,, +qwen2-7b,INT4-MIXED,1024,7722.1,714.2,57,17.54,,, +phi-3-mini-4k-instruct,INT8-CW,1023,5067.1,818.5,57.4,17.42,,, +phi-3-mini-4k-instruct,INT8-CW,1061,5086.1,975.1,57.4,17.42,,, +llama-2-7b-chat-hf,INT4-MIXED,32,5087.7,126.2,57.9,17.27,,, +stablelm-7b,INT4-MIXED,1020,5780.5,1248.4,59,16.95,,, +llama-3-8b,INT4-MIXED,1025,6088.9,1381.5,59,16.95,,, +llama-3-8b,INT4-MIXED,1024,6084.8,931.2,59.2,16.89,,, +llama-3-8b,INT4-MIXED,1025,6141.2,1494.3,59.4,16.84,,, +llama-3-8b,INT4-MIXED,1024,6133.8,1075.2,59.6,16.78,,, +mistral-7b-v0.1,INT4-MIXED,1024,5472.6,794.3,59.7,16.75,,, +zephyr-7b-beta,INT4-MIXED,32,5328.5,103.5,59.8,16.72,,, +falcon-7b-instruct,INT4-MIXED,1024,5677.5,686.2,59.8,16.72,,, +mistral-7b-v0.1,INT4-MIXED,1007,5243.5,1074,59.9,16.69,,, +qwen1.5-7b-chat,INT4-MIXED,1024,7096.7,1132.7,60,16.67,,, +qwen-7b-chat,INT4-MIXED,1024,8872.6,792.8,61,16.39,,, +qwen-7b-chat,INT4-MIXED,1024,9164.4,822.6,63.3,15.80,,, +orca-mini-3b,INT8-CW,32,4221.7,170.6,63.5,15.75,,, +llama-2-7b-chat-hf,INT4-MIXED,1024,5708.1,1397.9,63.6,15.72,,, +glm-4-9b,INT4-MIXED,33,6402.9,307.1,63.8,15.67,,, +zephyr-7b-beta,INT4-MIXED,1024,5572.4,1156.4,64.3,15.55,,, +glm-4-9b,INT4-MIXED,32,6383.1,256.2,64.5,15.50,,, +baichuan2-7b-chat,INT4-MIXED,32,5926.3,191.8,65.8,15.20,,, +opt-2.7b,FP16,31,5886,112.2,68,14.71,,, +dolly-v2-3b,FP16,32,6161.5,147.5,69.5,14.39,,, +red-pajama-incite-chat-3b-v1,FP16,32,6265.4,146.2,69.6,14.37,,, +glm-4-9b,INT4-MIXED,1024,6994.5,1013.7,69.8,14.33,,, +opt-2.7b,FP16,937,6345,379.5,71.6,13.97,,, +glm-4-9b,INT4-MIXED,1025,7014.9,1416.8,72.5,13.79,,, +phi-2,FP16,32,6204.7,189.2,72.9,13.72,,, +stable-zephyr-3b-dpo,FP16,30,6221.4,159.7,73,13.70,,, +dolly-v2-3b,FP16,1024,6669.9,424.3,73.3,13.64,,, +red-pajama-incite-chat-3b-v1,FP16,1020,6658.8,484.7,73.4,13.62,,, +stablelm-3b-4e1t,FP16,32,6216.3,145.4,73.5,13.61,,, +qwen-7b-chat,INT4-MIXED,32,9294.9,144.4,73.8,13.55,,, +red-pajama-incite-chat-3b-v1,FP16,1024,6755.1,469.1,73.9,13.53,,, +qwen-7b-chat-gptq,INT4-MIXED,1024,9152.1,827.2,75.1,13.32,,, +gemma-7b-it,INT4-MIXED,32,7991.4,128.6,75.8,13.19,,, +chatglm2-6b,INT8-CW,32,6854.4,110.2,76.3,13.11,,, +chatglm3-6b,INT8-CW,32,6754.8,112.3,76.4,13.09,,, +stable-zephyr-3b-dpo,FP16,946,6940,428.6,76.7,13.04,,, +baichuan2-7b-chat,INT4-MIXED,1024,6930.2,1229.5,76.7,13.04,,, +gemma-7b-it,INT4-MIXED,32,8061.5,125.6,76.7,13.04,,, +stablelm-3b-4e1t,FP16,1024,6722.9,480.8,77,12.99,,, +phi-2,FP16,1024,6709.4,624.1,77.2,12.95,,, +chatglm2-6b,INT8-CW,1024,7132.9,1361.9,78.7,12.71,,, +chatglm3-6b,INT8-CW,1024,7037.5,1389.2,78.7,12.71,,, +qwen-7b-chat,INT4-MIXED,1024,10374.1,1357.5,81.1,12.33,,, +gemma-7b-it,INT4-MIXED,1024,9398,1268.5,82.7,12.09,,, +gemma-7b-it,INT4-MIXED,1024,9469.5,1268,83.2,12.02,,, +gpt-j-6b,INT8-CW,32,7126.5,255.2,87.2,11.47,,, +falcon-7b-instruct,INT8-CW,32,8287.6,131.1,88.4,11.31,,, +llama-2-7b-chat-hf,INT8-CW,32,7474.9,139.5,89.7,11.15,,, +codegen25-7b,INT8-CW,32,7559.4,138,90.8,11.01,,, +vicuna-7b-v1.5,INT8-CW,32,7390.8,136.6,90.8,11.01,,, +falcon-7b-instruct,INT8-CW,1024,8546.8,1205.9,92.2,10.85,,, +stablelm-7b,INT8-CW,32,8356.4,143,92.4,10.82,,, +qwen2-7b,INT8-CW,32,9940.7,132,92.5,10.81,,, +baichuan2-13b-chat,INT4-MIXED,32,9879.2,184.9,93.3,10.72,,, +phi-3-mini-4k-instruct,FP16,38,8290,125.2,93.4,10.71,,, +phi-3-mini-4k-instruct,FP16,31,8290.5,109.5,93.5,10.70,,, +gpt-j-6b,INT8-CW,1024,7759,1996.8,93.9,10.65,,, +llama-2-7b-chat-hf,INT8-CW,1024,8097.8,1701.6,94.7,10.56,,, +phi-3-medium-4k-instruct,INT4-MIXED,38,8210.4,527,95.1,10.52,,, +mistral-7b-v0.1,INT8-CW,31,7882.4,128.6,95.1,10.52,,, +vicuna-7b-v1.5,INT8-CW,1024,8013.2,1558.1,95.1,10.52,,, +mistral-7b-v0.1,INT8-CW,32,7886.9,140.6,95.2,10.50,,, +qwen2-7b,INT8-CW,1024,10573.1,1564.5,95.3,10.49,,, +codegen25-7b,INT8-CW,1024,8253.1,1526.3,95.7,10.45,,, +zephyr-7b-beta,INT8-CW,32,7785.3,144.4,95.8,10.44,,, +stablelm-7b,INT8-CW,1020,8921.9,1845,96.9,10.32,,, +mistral-7b-v0.1,INT8-CW,1007,8127.4,1648.4,97.4,10.27,,, +qwen-7b-chat,INT8-CW,32,11083.2,140.6,97.7,10.24,,, +qwen1.5-7b-chat,INT8-CW,32,8870,156.4,98.1,10.19,,, +llama-3.1-8b,INT8-CW,31,8600.3,189.2,98.4,10.16,,, +mistral-7b-v0.1,INT8-CW,1024,8134.7,1554.1,98.4,10.16,,, +qwen-14b-chat,INT4-MIXED,32,9876.2,192.3,98.6,10.14,,, +zephyr-7b-beta,INT8-CW,1024,8035.2,1580.4,98.8,10.12,,, +llama-3-8b,INT8-CW,32,8694.2,150.7,99.5,10.05,,, +llama-3-8b,INT8-CW,33,8700.4,175.4,99.8,10.02,,, +phi-3-mini-4k-instruct,FP16,1023,8795.2,601.3,99.9,10.01,,, diff --git a/docs/sphinx_setup/_static/benchmarks_files/llm_models.csv b/docs/sphinx_setup/_static/benchmarks_files/llm_models_9-288V.csv similarity index 100% rename from docs/sphinx_setup/_static/benchmarks_files/llm_models.csv rename to docs/sphinx_setup/_static/benchmarks_files/llm_models_9-288V.csv