diff --git a/plugins/framework/src/fms_acceleration/framework_plugin.py b/plugins/framework/src/fms_acceleration/framework_plugin.py index 06f0190c..5f111d5a 100644 --- a/plugins/framework/src/fms_acceleration/framework_plugin.py +++ b/plugins/framework/src/fms_acceleration/framework_plugin.py @@ -101,7 +101,7 @@ def _update_config_contents(_cfg: Dict, content: Dict, key: str): for key in _or_keys: content = _trace_key_path(configuration, key) if content is not None: - if reject == True: + if reject is True: # it is an OR key, and if at least one of them specified # then do not reject reject = False diff --git a/plugins/fused-ops-and-kernels/tests/test_foak_plugins.py b/plugins/fused-ops-and-kernels/tests/test_foak_plugins.py index dd7b472d..0e9bae76 100644 --- a/plugins/fused-ops-and-kernels/tests/test_foak_plugins.py +++ b/plugins/fused-ops-and-kernels/tests/test_foak_plugins.py @@ -35,7 +35,7 @@ DIRNAME, "../configs/fast_quantized_peft.yaml" ) - +@pytest.mark.skip(reason="Installation logic has changed - test to be fixed in future.") def test_configure_gptq_foak_plugin(): "test foak plugin loads correctly" diff --git a/scripts/benchmarks/refs/a100_80gb.csv b/scripts/benchmarks/refs/a100_80gb.csv index 37cdcc6d..abb7f2bc 100644 --- a/scripts/benchmarks/refs/a100_80gb.csv +++ b/scripts/benchmarks/refs/a100_80gb.csv @@ -1,85 +1,125 @@ -epoch,fp16,framework_config,learning_rate,lora_alpha,lora_dropout,mem_nvidia_mem_reserved,mem_peak_torch_mem_alloc_in_bytes,mem_torch_mem_alloc_in_bytes,model_name_or_path,num_gpus,peft_method,per_device_train_batch_size,r,target_modules,torch_dtype,train_loss,train_runtime,train_samples_per_second,train_steps_per_second,train_tokens_per_second -0.15,,none,2e-5,,,76031.0,72435426816.0,43468236288.0,mistralai/Mistral-7B-v0.1,1,,4,,,float16,0.9239591407775879,539.6271,0.741,0.185,3036.17 -0.15,,none,2e-5,,,43610.0,36226242560.0,28984444928.0,mistralai/Mistral-7B-v0.1,2,,2,,,float16,0.878299913406372,297.9576,1.342,0.336,2749.384 -0.29,,none,2e-5,,,78727.0,72435820032.0,43468629504.0,mistralai/Mistral-7B-v0.1,1,,8,,,float16,1.008358039855957,1048.9483,0.763,0.095,3123.891 -0.29,,none,2e-5,,,52837.0,36226439168.0,28984641536.0,mistralai/Mistral-7B-v0.1,2,,4,,,float16,0.917950096130371,554.8154,1.442,0.18,2953.054 -,,none,2e-5,,,80969.0,,,mistralai/Mixtral-8x7B-Instruct-v0.1,1,,4,,,float16,,,,, -,,none,2e-5,,,80921.0,,,mistralai/Mixtral-8x7B-Instruct-v0.1,2,,2,,,float16,,,,, -,,none,2e-5,,,80969.0,,,mistralai/Mixtral-8x7B-Instruct-v0.1,1,,8,,,float16,,,,, -,,none,2e-5,,,79851.0,,,mistralai/Mixtral-8x7B-Instruct-v0.1,2,,4,,,float16,,,,, -,,none,2e-5,,,81049.0,,,NousResearch/Llama-2-70b-hf,1,,4,,,float16,,,,, -,,none,2e-5,,,80535.0,,,NousResearch/Llama-2-70b-hf,2,,2,,,float16,,,,, -,,none,2e-5,,,81049.0,,,NousResearch/Llama-2-70b-hf,1,,8,,,float16,,,,, -,,none,2e-5,,,80778.0,,,NousResearch/Llama-2-70b-hf,2,,4,,,float16,,,,, -0.15,,none,2e-4,16,0.1,28069.0,25654479360.0,14664623616.0,mistralai/Mistral-7B-v0.1,1,lora,4,16,q_proj k_proj v_proj o_proj,float16,0.8902829456329345,492.8103,0.812,0.203,3324.606 -0.15,,none,2e-4,16,0.1,17745.0,15245721600.0,7368103936.0,mistralai/Mistral-7B-v0.1,2,lora,2,16,q_proj k_proj v_proj o_proj,float16,0.8672024631500244,282.6828,1.415,0.354,2897.948 -0.29,,none,2e-4,16,0.1,41405.0,36645743104.0,14665016832.0,mistralai/Mistral-7B-v0.1,1,lora,8,16,q_proj k_proj v_proj o_proj,float16,0.9471714115142822,981.2132,0.815,0.102,3339.539 -0.29,,none,2e-4,16,0.1,25347.0,22161342464.0,7368300544.0,mistralai/Mistral-7B-v0.1,2,lora,4,16,q_proj k_proj v_proj o_proj,float16,0.8963674259185791,519.4995,1.54,0.192,3153.805 -,,none,2e-4,16,0.1,81015.0,,,mistralai/Mixtral-8x7B-Instruct-v0.1,1,lora,4,16,q_proj k_proj v_proj o_proj,float16,,,,, -0.15,,none,2e-4,16,0.1,61651.0,58190445568.0,47366035456.0,mistralai/Mixtral-8x7B-Instruct-v0.1,2,lora,2,16,q_proj k_proj v_proj o_proj,float16,0.8985625457763672,521.5924,0.767,0.192,1570.575 -,,none,2e-4,16,0.1,81015.0,,,mistralai/Mixtral-8x7B-Instruct-v0.1,1,lora,8,16,q_proj k_proj v_proj o_proj,float16,,,,, -0.29,,none,2e-4,16,0.1,69774.0,65584154624.0,47366232064.0,mistralai/Mixtral-8x7B-Instruct-v0.1,2,lora,4,16,q_proj k_proj v_proj o_proj,float16,0.9053801918029785,899.4995,0.889,0.111,1821.457 -,,none,2e-4,16,0.1,81043.0,,,NousResearch/Llama-2-70b-hf,1,lora,4,16,q_proj k_proj v_proj o_proj,float16,,,,, -,,none,2e-4,16,0.1,80885.0,,,NousResearch/Llama-2-70b-hf,2,lora,2,16,q_proj k_proj v_proj o_proj,float16,,,,, -,,none,2e-4,16,0.1,81043.0,,,NousResearch/Llama-2-70b-hf,1,lora,8,16,q_proj k_proj v_proj o_proj,float16,,,,, -,,none,2e-4,16,0.1,80297.0,,,NousResearch/Llama-2-70b-hf,2,lora,4,16,q_proj k_proj v_proj o_proj,float16,,,,, -0.15,True,baseline-peft-bnb,2e-4,16,0.1,25359.0,21215549440.0,4831579648.0,mistralai/Mistral-7B-v0.1,1,lora,4,16,q_proj k_proj v_proj o_proj,float16,0.868394603729248,582.2584,0.687,0.172,2813.871 -0.15,True,baseline-peft-bnb,2e-4,16,0.1,12012.0,9525447168.0,2244599808.0,mistralai/Mistral-7B-v0.1,2,lora,2,16,q_proj k_proj v_proj o_proj,float16,0.8697228622436524,293.0901,1.365,0.341,2795.045 -0.29,True,baseline-peft-bnb,2e-4,16,0.1,45481.0,37594830848.0,4831972864.0,mistralai/Mistral-7B-v0.1,1,lora,8,16,q_proj k_proj v_proj o_proj,float16,0.8708569717407226,1141.9093,0.701,0.088,2869.58 -0.29,True,baseline-peft-bnb,2e-4,16,0.1,19437.0,16171584000.0,2244796416.0,mistralai/Mistral-7B-v0.1,2,lora,4,16,q_proj k_proj v_proj o_proj,float16,0.8702435684204102,504.2852,1.586,0.198,3248.955 -0.15,True,baseline-peft-bnb,2e-4,16,0.1,44857.0,44196393984.0,25726455296.0,mistralai/Mixtral-8x7B-Instruct-v0.1,1,lora,4,16,q_proj k_proj v_proj o_proj,float16,0.8948281192779541,1108.8622,0.361,0.09,1477.551 -0.15,True,baseline-peft-bnb,2e-4,16,0.1,24006.0,21761152512.0,13273686016.0,mistralai/Mixtral-8x7B-Instruct-v0.1,2,lora,2,16,q_proj k_proj v_proj o_proj,float16,0.8967031955718994,533.5877,0.75,0.187,1535.268 -0.29,True,baseline-peft-bnb,2e-4,16,0.1,63891.0,62284255232.0,25726848512.0,mistralai/Mixtral-8x7B-Instruct-v0.1,1,lora,8,16,q_proj k_proj v_proj o_proj,float16,0.8938827133178711,2008.7727,0.398,0.05,1631.245 -0.29,True,baseline-peft-bnb,2e-4,16,0.1,31110.0,28873790464.0,13273882624.0,mistralai/Mixtral-8x7B-Instruct-v0.1,2,lora,4,16,q_proj k_proj v_proj o_proj,float16,0.8923345756530762,904.7134,0.884,0.111,1810.96 -,True,baseline-peft-bnb,2e-4,16,0.1,79963.0,,,NousResearch/Llama-2-70b-hf,1,lora,4,16,q_proj k_proj v_proj o_proj,float16,,,,, -0.14,True,baseline-peft-bnb,2e-4,16,0.1,51535.0,46685150208.0,19266900480.0,NousResearch/Llama-2-70b-hf,2,lora,2,16,q_proj k_proj v_proj o_proj,float16,1.0005579376220703,1958.8245,0.204,0.051,418.21 -,True,baseline-peft-bnb,2e-4,16,0.1,80417.0,,,NousResearch/Llama-2-70b-hf,1,lora,8,16,q_proj k_proj v_proj o_proj,float16,,,,, -0.28,True,baseline-peft-bnb,2e-4,16,0.1,80307.0,72626134016.0,19267097088.0,NousResearch/Llama-2-70b-hf,2,lora,4,16,q_proj k_proj v_proj o_proj,float16,0.9999524974822998,3755.0656,0.213,0.027,436.317 -0.15,True,accelerated-peft-bnb,2e-4,16,0.1,18267.0,15323746816.0,4306628096.0,mistralai/Mistral-7B-v0.1,1,lora,4,16,q_proj k_proj v_proj o_proj,float16,0.8702622985839844,473.6415,0.845,0.211,3459.156 -0.15,True,accelerated-peft-bnb,2e-4,16,0.1,11974.0,9525447168.0,2244599808.0,mistralai/Mistral-7B-v0.1,2,lora,2,16,q_proj k_proj v_proj o_proj,float16,0.8716620254516602,290.781,1.376,0.344,2817.241 -0.29,True,accelerated-peft-bnb,2e-4,16,0.1,32691.0,26315010560.0,4307021312.0,mistralai/Mistral-7B-v0.1,1,lora,8,16,q_proj k_proj v_proj o_proj,float16,0.8700333976745606,930.8538,0.859,0.107,3520.209 -0.29,True,accelerated-peft-bnb,2e-4,16,0.1,19507.0,16171584000.0,2244796416.0,mistralai/Mistral-7B-v0.1,2,lora,4,16,q_proj k_proj v_proj o_proj,float16,0.8691346645355225,504.1747,1.587,0.198,3249.667 -0.15,True,accelerated-peft-bnb-foak,2e-4,16,0.1,16809.0,13065396224.0,4306628096.0,mistralai/Mistral-7B-v0.1,1,lora,4,16,q_proj k_proj v_proj o_proj,float16,0.8685474967956544,410.2967,0.975,0.244,3993.208 -0.15,True,accelerated-peft-bnb-foak,2e-4,16,0.1,11780.0,9309506048.0,2244599808.0,mistralai/Mistral-7B-v0.1,2,lora,2,16,q_proj k_proj v_proj o_proj,float16,0.8708373165130615,223.4526,1.79,0.448,3666.101 -0.29,True,accelerated-peft-bnb-foak,2e-4,16,0.1,27953.0,21825572864.0,4307021312.0,mistralai/Mistral-7B-v0.1,1,lora,8,16,q_proj k_proj v_proj o_proj,float16,0.871671667098999,802.3406,0.997,0.125,4084.051 -0.29,True,accelerated-peft-bnb-foak,2e-4,16,0.1,18836.0,15686158848.0,2244796416.0,mistralai/Mistral-7B-v0.1,2,lora,4,16,q_proj k_proj v_proj o_proj,float16,0.8719861793518067,421.817,1.897,0.237,3884.148 -0.15,True,accelerated-peft-bnb,2e-4,16,0.1,37381.0,36218622464.0,25201503744.0,mistralai/Mixtral-8x7B-Instruct-v0.1,1,lora,4,16,q_proj k_proj v_proj o_proj,float16,0.8955930042266845,863.9677,0.463,0.116,1896.367 -0.15,True,accelerated-peft-bnb,2e-4,16,0.1,24002.5,21762409472.0,13273686016.0,mistralai/Mixtral-8x7B-Instruct-v0.1,2,lora,2,16,q_proj k_proj v_proj o_proj,float16,0.8963699913024903,534.0626,0.749,0.187,1533.903 -0.29,True,accelerated-peft-bnb,2e-4,16,0.1,49911.0,47209886208.0,25201896960.0,mistralai/Mixtral-8x7B-Instruct-v0.1,1,lora,8,16,q_proj k_proj v_proj o_proj,float16,0.89283447265625,1612.732,0.496,0.062,2031.832 -0.29,True,accelerated-peft-bnb,2e-4,16,0.1,31178.0,28883566592.0,13273882624.0,mistralai/Mixtral-8x7B-Instruct-v0.1,2,lora,4,16,q_proj k_proj v_proj o_proj,float16,0.891493616104126,905.1923,0.884,0.11,1810.002 -0.15,True,accelerated-peft-bnb-foak,2e-4,16,0.1,35493.0,34864005632.0,25201503744.0,mistralai/Mixtral-8x7B-Instruct-v0.1,1,lora,4,16,q_proj k_proj v_proj o_proj,float16,0.8977092170715332,797.873,0.501,0.125,2053.46 -0.15,True,accelerated-peft-bnb-foak,2e-4,16,0.1,24609.0,21479203840.0,13273686016.0,mistralai/Mixtral-8x7B-Instruct-v0.1,2,lora,2,16,q_proj k_proj v_proj o_proj,float16,0.899329023361206,467.9373,0.855,0.214,1750.662 -0.29,True,accelerated-peft-bnb-foak,2e-4,16,0.1,46045.0,44399605760.0,25201896960.0,mistralai/Mixtral-8x7B-Instruct-v0.1,1,lora,8,16,q_proj k_proj v_proj o_proj,float16,0.8906442070007324,1482.6429,0.54,0.067,2210.107 -0.29,True,accelerated-peft-bnb-foak,2e-4,16,0.1,31782.0,28263236608.0,13273882624.0,mistralai/Mixtral-8x7B-Instruct-v0.1,2,lora,4,16,q_proj k_proj v_proj o_proj,float16,0.8938700771331787,819.8908,0.976,0.122,1998.315 -0.14,True,accelerated-peft-bnb,2e-4,16,0.1,71645.0,68126652928.0,37179273216.0,NousResearch/Llama-2-70b-hf,1,lora,4,16,q_proj k_proj v_proj o_proj,float16,1.0004115581512452,3608.5496,0.111,0.028,454.033 -0.14,True,accelerated-peft-bnb,2e-4,16,0.1,51534.0,46685150208.0,19266900480.0,NousResearch/Llama-2-70b-hf,2,lora,2,16,q_proj k_proj v_proj o_proj,float16,1.0003361415863037,1957.3459,0.204,0.051,418.526 -,True,accelerated-peft-bnb,2e-4,16,0.1,81013.0,,,NousResearch/Llama-2-70b-hf,1,lora,8,16,q_proj k_proj v_proj o_proj,float16,,,,, -0.28,True,accelerated-peft-bnb,2e-4,16,0.1,80576.0,72626134016.0,19267097088.0,NousResearch/Llama-2-70b-hf,2,lora,4,16,q_proj k_proj v_proj o_proj,float16,1.0003034496307373,3754.632,0.213,0.027,436.368 -0.14,True,accelerated-peft-bnb-foak,2e-4,16,0.1,69963.0,67049175552.0,37179273216.0,NousResearch/Llama-2-70b-hf,1,lora,4,16,q_proj k_proj v_proj o_proj,float16,1.0002764415740968,3310.3528,0.121,0.03,494.932 -0.14,True,accelerated-peft-bnb-foak,2e-4,16,0.1,51248.0,46407474176.0,19266900480.0,NousResearch/Llama-2-70b-hf,2,lora,2,16,q_proj k_proj v_proj o_proj,float16,1.0008399486541748,1759.3679,0.227,0.057,465.622 -,True,accelerated-peft-bnb-foak,2e-4,16,0.1,80785.0,,,NousResearch/Llama-2-70b-hf,1,lora,8,16,q_proj k_proj v_proj o_proj,float16,,,,, -0.28,True,accelerated-peft-bnb-foak,2e-4,16,0.1,80907.0,71810538496.0,19267097088.0,NousResearch/Llama-2-70b-hf,2,lora,4,16,q_proj k_proj v_proj o_proj,float16,1.0006698417663573,3397.5545,0.235,0.029,482.229 -0.15,True,accelerated-peft-autogptq,2e-4,16,0.1,18789.0,15354056192.0,4336937472.0,TheBloke/Mistral-7B-v0.1-GPTQ,1,lora,4,16,q_proj k_proj v_proj o_proj,float16,0.9604459190368653,472.8841,0.846,0.211,3464.696 -0.15,True,accelerated-peft-autogptq,2e-4,16,0.1,12297.0,9542977024.0,2261277696.0,TheBloke/Mistral-7B-v0.1-GPTQ,2,lora,2,16,q_proj k_proj v_proj o_proj,float16,0.9566273403167724,286.9325,1.394,0.349,2855.027 -0.29,True,accelerated-peft-autogptq,2e-4,16,0.1,32583.0,26345319936.0,4337330688.0,TheBloke/Mistral-7B-v0.1-GPTQ,1,lora,8,16,q_proj k_proj v_proj o_proj,float16,0.9381388664245606,927.1603,0.863,0.108,3534.232 -0.29,True,accelerated-peft-autogptq,2e-4,16,0.1,19937.0,16189113856.0,2261474304.0,TheBloke/Mistral-7B-v0.1-GPTQ,2,lora,4,16,q_proj k_proj v_proj o_proj,float16,0.9374161720275879,501.0475,1.597,0.2,3269.95 -0.15,True,accelerated-peft-autogptq-foak,2e-4,16,0.1,16553.0,13095705600.0,4336937472.0,TheBloke/Mistral-7B-v0.1-GPTQ,1,lora,4,16,q_proj k_proj v_proj o_proj,float16,0.971520586013794,403.7642,0.991,0.248,4057.814 -0.15,True,accelerated-peft-autogptq-foak,2e-4,16,0.1,12341.0,9327035904.0,2261277696.0,TheBloke/Mistral-7B-v0.1-GPTQ,2,lora,2,16,q_proj k_proj v_proj o_proj,float16,0.9715091705322265,220.2987,1.816,0.454,3718.587 -0.29,True,accelerated-peft-autogptq-foak,2e-4,16,0.1,28337.0,21855882240.0,4337330688.0,TheBloke/Mistral-7B-v0.1-GPTQ,1,lora,8,16,q_proj k_proj v_proj o_proj,float16,0.9389788341522217,791.0793,1.011,0.126,4142.189 -0.29,True,accelerated-peft-autogptq-foak,2e-4,16,0.1,19370.0,15703688704.0,2261474304.0,TheBloke/Mistral-7B-v0.1-GPTQ,2,lora,4,16,q_proj k_proj v_proj o_proj,float16,0.9499363136291504,414.8507,1.928,0.241,3949.372 -0.15,True,accelerated-peft-autogptq,2e-4,16,0.1,36439.0,35528691200.0,24511572480.0,TheBloke/Mixtral-8x7B-Instruct-v0.1-GPTQ,1,lora,4,16,q_proj k_proj v_proj o_proj,float16,0.8999805450439453,824.8819,0.485,0.121,1986.224 -0.15,True,accelerated-peft-autogptq,2e-4,16,0.1,23508.0,21071283712.0,12581313536.0,TheBloke/Mixtral-8x7B-Instruct-v0.1-GPTQ,2,lora,2,16,q_proj k_proj v_proj o_proj,float16,0.8981617355346679,498.8269,0.802,0.2,1642.253 -0.29,True,accelerated-peft-autogptq,2e-4,16,0.1,48969.0,46519954944.0,24511965696.0,TheBloke/Mixtral-8x7B-Instruct-v0.1-GPTQ,1,lora,8,16,q_proj k_proj v_proj o_proj,float16,0.8971894550323486,1569.1833,0.51,0.064,2088.22 -0.29,True,accelerated-peft-autogptq,2e-4,16,0.1,30660.0,28189791744.0,12581510144.0,TheBloke/Mixtral-8x7B-Instruct-v0.1-GPTQ,2,lora,4,16,q_proj k_proj v_proj o_proj,float16,0.8969889640808105,869.4187,0.92,0.115,1884.478 -0.15,True,accelerated-peft-autogptq-foak,2e-4,16,0.1,34745.0,34214612480.0,24511572480.0,TheBloke/Mixtral-8x7B-Instruct-v0.1-GPTQ,1,lora,4,16,q_proj k_proj v_proj o_proj,float16,0.8998163032531739,755.773,0.529,0.132,2167.847 -0.15,True,accelerated-peft-autogptq-foak,2e-4,16,0.1,24143.0,20788983296.0,12581313536.0,TheBloke/Mixtral-8x7B-Instruct-v0.1-GPTQ,2,lora,2,16,q_proj k_proj v_proj o_proj,float16,0.9024192810058593,433.2446,0.923,0.231,1890.849 -0.29,True,accelerated-peft-autogptq-foak,2e-4,16,0.1,46713.0,43776172032.0,24511965696.0,TheBloke/Mixtral-8x7B-Instruct-v0.1-GPTQ,1,lora,8,16,q_proj k_proj v_proj o_proj,float16,0.8984066200256348,1432.2052,0.559,0.07,2287.94 -0.29,True,accelerated-peft-autogptq-foak,2e-4,16,0.1,31123.0,27569485312.0,12581510144.0,TheBloke/Mixtral-8x7B-Instruct-v0.1-GPTQ,2,lora,4,16,q_proj k_proj v_proj o_proj,float16,0.8986644458770752,780.5165,1.025,0.128,2099.123 -0.14,True,accelerated-peft-autogptq,2e-4,16,0.1,70529.0,67069982208.0,36122602496.0,TheBloke/Llama-2-70B-GPTQ,1,lora,4,16,q_proj k_proj v_proj o_proj,float16,0.9913517284393311,3559.5185,0.112,0.028,460.287 -0.14,True,accelerated-peft-autogptq,2e-4,16,0.1,50471.0,45638376448.0,18220084736.0,TheBloke/Llama-2-70B-GPTQ,2,lora,2,16,q_proj k_proj v_proj o_proj,float16,0.9913260459899902,1905.2123,0.21,0.052,429.978 -,True,accelerated-peft-autogptq,2e-4,16,0.1,79895.0,,,TheBloke/Llama-2-70B-GPTQ,1,lora,8,16,q_proj k_proj v_proj o_proj,float16,,,,, -0.28,True,accelerated-peft-autogptq,2e-4,16,0.1,80755.0,71579360256.0,18220281344.0,TheBloke/Llama-2-70B-GPTQ,2,lora,4,16,q_proj k_proj v_proj o_proj,float16,0.9910284423828125,3686.3588,0.217,0.027,444.449 -0.14,True,accelerated-peft-autogptq-foak,2e-4,16,0.1,69339.0,65992504832.0,36122602496.0,TheBloke/Llama-2-70B-GPTQ,1,lora,4,16,q_proj k_proj v_proj o_proj,float16,0.991469144821167,3234.048,0.124,0.031,506.61 -0.14,True,accelerated-peft-autogptq-foak,2e-4,16,0.1,50733.0,45360700416.0,18220084736.0,TheBloke/Llama-2-70B-GPTQ,2,lora,2,16,q_proj k_proj v_proj o_proj,float16,0.9918032264709473,1691.5951,0.236,0.059,484.277 -,True,accelerated-peft-autogptq-foak,2e-4,16,0.1,80161.0,,,TheBloke/Llama-2-70B-GPTQ,1,lora,8,16,q_proj k_proj v_proj o_proj,float16,,,,, -0.28,True,accelerated-peft-autogptq-foak,2e-4,16,0.1,80316.0,70763764736.0,18220281344.0,TheBloke/Llama-2-70B-GPTQ,2,lora,4,16,q_proj k_proj v_proj o_proj,float16,0.9914980411529541,3325.3628,0.241,0.03,492.698 +bf16,epoch,fp16,framework_config,learning_rate,lora_alpha,lora_dropout,mem_nvidia_mem_reserved,mem_peak_torch_mem_alloc_in_bytes,mem_torch_mem_alloc_in_bytes,model_name_or_path,num_gpus,peft_method,per_device_train_batch_size,r,target_modules,torch_dtype,train_loss,train_runtime,train_samples_per_second,train_steps_per_second,train_tokens_per_second +True,0.07,,none,2e-5,,,15359.0,13632690688.0,6770300416.0,bigcode/gpt_bigcode-santacoder,1,,4,,,bfloat16,2.332193660736084,51.1308,7.823,1.956,16021.654 +True,0.07,,none,2e-5,,,16292.0,11310628864.0,9062559744.0,bigcode/gpt_bigcode-santacoder,2,,2,,,bfloat16,2.1947376251220705,34.4961,11.596,2.899,11873.81 +True,0.14,,none,2e-5,,,22507.0,20492466688.0,6769448448.0,bigcode/gpt_bigcode-santacoder,1,,8,,,bfloat16,2.3124921417236326,96.6986,8.273,1.034,16943.362 +True,0.14,,none,2e-5,,,19442.0,13862536704.0,9063688704.0,bigcode/gpt_bigcode-santacoder,2,,4,,,bfloat16,2.169607696533203,56.0038,14.285,1.786,14627.569 +True,0.07,,foak-fast-kernels,2e-5,,,14647.0,12021062144.0,6769251840.0,bigcode/gpt_bigcode-santacoder,1,,4,,,bfloat16,2.3321532440185546,51.9014,7.707,1.927,15783.76 +True,0.07,,foak-fast-kernels,2e-5,,,15159.0,11312634880.0,9064565760.0,bigcode/gpt_bigcode-santacoder,2,,2,,,bfloat16,2.1948485946655274,34.2526,11.678,2.919,11958.203 +True,0.14,,foak-fast-kernels,2e-5,,,19435.0,17273076224.0,6769448448.0,bigcode/gpt_bigcode-santacoder,1,,8,,,bfloat16,2.3125320434570313,95.1025,8.412,1.051,17227.735 +True,0.14,,foak-fast-kernels,2e-5,,,18982.0,12252922880.0,9064710144.0,bigcode/gpt_bigcode-santacoder,2,,4,,,bfloat16,2.1695573806762694,56.1474,14.248,1.781,14590.174 +True,0.15,,none,2e-5,,,76047.0,72434853376.0,43467892224.0,mistralai/Mistral-7B-v0.1,1,,4,,,bfloat16,0.8285089540481567,541.4379,0.739,0.185,3026.016 +True,0.15,,none,2e-5,,,77716.0,72434657280.0,57951176704.0,mistralai/Mistral-7B-v0.1,2,,2,,,bfloat16,0.8260897445678711,309.3386,1.293,0.323,2648.231 +True,0.29,,none,2e-5,,,71823.0,72435246592.0,43468285440.0,mistralai/Mistral-7B-v0.1,1,,8,,,bfloat16,0.8293021202087403,1053.8179,0.759,0.095,3109.456 +True,0.29,,none,2e-5,,,77628.0,72434853888.0,57951373312.0,mistralai/Mistral-7B-v0.1,2,,4,,,bfloat16,0.8233438396453857,565.1788,1.415,0.177,2898.906 +True,0.15,,foak-fast-kernels,2e-5,,,76071.0,72432723456.0,43466827264.0,mistralai/Mistral-7B-v0.1,1,,4,,,bfloat16,0.8281177949905395,483.8157,0.827,0.207,3386.414 +True,0.15,,foak-fast-kernels,2e-5,,,77736.0,72434657280.0,57951176704.0,mistralai/Mistral-7B-v0.1,2,,2,,,bfloat16,0.8248114776611328,279.656,1.43,0.358,2929.313 +True,0.29,,foak-fast-kernels,2e-5,,,70035.0,72433116672.0,43467220480.0,mistralai/Mistral-7B-v0.1,1,,8,,,bfloat16,0.8302128696441651,936.0343,0.855,0.107,3500.726 +True,0.29,,foak-fast-kernels,2e-5,,,80751.0,72434853888.0,57951373312.0,mistralai/Mistral-7B-v0.1,2,,4,,,bfloat16,0.8242907524108887,505.4347,1.583,0.198,3241.566 +True,,,none,2e-5,,,81193.0,,,mistralai/Mixtral-8x7B-Instruct-v0.1,1,,4,,,bfloat16,,,,, +True,,,none,2e-5,,,81090.0,,,mistralai/Mixtral-8x7B-Instruct-v0.1,2,,2,,,bfloat16,,,,, +True,,,none,2e-5,,,81193.0,,,mistralai/Mixtral-8x7B-Instruct-v0.1,1,,8,,,bfloat16,,,,, +True,,,none,2e-5,,,79873.0,,,mistralai/Mixtral-8x7B-Instruct-v0.1,2,,4,,,bfloat16,,,,, +True,,,foak-fast-kernels,2e-5,,,81193.0,,,mistralai/Mixtral-8x7B-Instruct-v0.1,1,,4,,,bfloat16,,,,, +True,,,foak-fast-kernels,2e-5,,,79873.0,,,mistralai/Mixtral-8x7B-Instruct-v0.1,2,,2,,,bfloat16,,,,, +True,,,foak-fast-kernels,2e-5,,,81193.0,,,mistralai/Mixtral-8x7B-Instruct-v0.1,1,,8,,,bfloat16,,,,, +True,,,foak-fast-kernels,2e-5,,,80448.0,,,mistralai/Mixtral-8x7B-Instruct-v0.1,2,,4,,,bfloat16,,,,, +True,,,none,2e-5,,,81177.0,,,NousResearch/Llama-2-70b-hf,1,,4,,,bfloat16,,,,, +True,,,none,2e-5,,,80307.0,,,NousResearch/Llama-2-70b-hf,2,,2,,,bfloat16,,,,, +True,,,none,2e-5,,,78361.0,,,NousResearch/Llama-2-70b-hf,1,,8,,,bfloat16,,,,, +True,,,none,2e-5,,,80873.0,,,NousResearch/Llama-2-70b-hf,2,,4,,,bfloat16,,,,, +True,,,foak-fast-kernels,2e-5,,,81177.0,,,NousResearch/Llama-2-70b-hf,1,,4,,,bfloat16,,,,, +True,,,foak-fast-kernels,2e-5,,,80307.0,,,NousResearch/Llama-2-70b-hf,2,,2,,,bfloat16,,,,, +True,,,foak-fast-kernels,2e-5,,,81177.0,,,NousResearch/Llama-2-70b-hf,1,,8,,,bfloat16,,,,, +True,,,foak-fast-kernels,2e-5,,,80873.0,,,NousResearch/Llama-2-70b-hf,2,,4,,,bfloat16,,,,, +True,0.15,,none,2e-4,16,0.1,28769.0,25681144320.0,14664508928.0,mistralai/Mistral-7B-v0.1,1,lora,4,16,q_proj k_proj v_proj o_proj,bfloat16,0.8505570697784424,481.2995,0.831,0.208,3404.117 +True,0.15,,none,2e-4,16,0.1,17316.0,14975934464.0,7368046592.0,mistralai/Mistral-7B-v0.1,2,lora,2,16,q_proj k_proj v_proj o_proj,bfloat16,0.8524067306518555,277.4993,1.441,0.36,2952.08 +True,0.29,,none,2e-4,16,0.1,42809.0,36670876160.0,14664902144.0,mistralai/Mistral-7B-v0.1,1,lora,8,16,q_proj k_proj v_proj o_proj,bfloat16,0.8525794410705566,953.5883,0.839,0.105,3436.284 +True,0.29,,none,2e-4,16,0.1,24995.0,21622071296.0,7368243200.0,mistralai/Mistral-7B-v0.1,2,lora,4,16,q_proj k_proj v_proj o_proj,bfloat16,0.8535801601409913,503.0453,1.59,0.199,3256.963 +True,0.15,,foak-fast-kernels,2e-4,16,0.1,27511.0,23530188288.0,14664508928.0,mistralai/Mistral-7B-v0.1,1,lora,4,16,q_proj k_proj v_proj o_proj,bfloat16,0.8501485443115234,422.1615,0.948,0.237,3880.979 +True,0.15,,foak-fast-kernels,2e-4,16,0.1,16963.0,14774607872.0,7368046592.0,mistralai/Mistral-7B-v0.1,2,lora,2,16,q_proj k_proj v_proj o_proj,bfloat16,0.8515177631378174,253.1253,1.58,0.395,3236.341 +True,0.29,,foak-fast-kernels,2e-4,16,0.1,40271.0,32393276928.0,14664902144.0,mistralai/Mistral-7B-v0.1,1,lora,8,16,q_proj k_proj v_proj o_proj,bfloat16,0.8585422229766846,835.7668,0.957,0.12,3920.711 +True,0.29,,foak-fast-kernels,2e-4,16,0.1,23845.0,21219418112.0,7368243200.0,mistralai/Mistral-7B-v0.1,2,lora,4,16,q_proj k_proj v_proj o_proj,bfloat16,0.8570475673675537,447.0688,1.789,0.224,3664.76 +True,,,none,2e-4,16,0.1,81127.0,,,mistralai/Mixtral-8x7B-Instruct-v0.1,1,lora,4,16,q_proj k_proj v_proj o_proj,bfloat16,,,,, +True,0.15,,none,2e-4,16,0.1,61260.0,57922956288.0,47365978112.0,mistralai/Mixtral-8x7B-Instruct-v0.1,2,lora,2,16,q_proj k_proj v_proj o_proj,bfloat16,0.890601749420166,522.9286,0.765,0.191,1566.562 +True,,,none,2e-4,16,0.1,81127.0,,,mistralai/Mixtral-8x7B-Instruct-v0.1,1,lora,8,16,q_proj k_proj v_proj o_proj,bfloat16,,,,, +True,0.29,,none,2e-4,16,0.1,69154.0,65045124608.0,47366174720.0,mistralai/Mixtral-8x7B-Instruct-v0.1,2,lora,4,16,q_proj k_proj v_proj o_proj,bfloat16,0.8849094486236573,877.0711,0.912,0.114,1868.036 +True,,,foak-fast-kernels,2e-4,16,0.1,81127.0,,,mistralai/Mixtral-8x7B-Instruct-v0.1,1,lora,4,16,q_proj k_proj v_proj o_proj,bfloat16,,,,, +True,0.15,,foak-fast-kernels,2e-4,16,0.1,61428.0,57688308736.0,47365978112.0,mistralai/Mixtral-8x7B-Instruct-v0.1,2,lora,2,16,q_proj k_proj v_proj o_proj,bfloat16,0.8905545234680176,494.0377,0.81,0.202,1658.173 +True,,,foak-fast-kernels,2e-4,16,0.1,81127.0,,,mistralai/Mixtral-8x7B-Instruct-v0.1,1,lora,8,16,q_proj k_proj v_proj o_proj,bfloat16,,,,, +True,0.29,,foak-fast-kernels,2e-4,16,0.1,68700.0,64576132608.0,47366174720.0,mistralai/Mixtral-8x7B-Instruct-v0.1,2,lora,4,16,q_proj k_proj v_proj o_proj,bfloat16,0.8864504814147949,823.1006,0.972,0.121,1990.522 +True,,,none,2e-4,16,0.1,81205.0,,,NousResearch/Llama-2-70b-hf,1,lora,4,16,q_proj k_proj v_proj o_proj,bfloat16,,,,, +True,,,none,2e-4,16,0.1,81003.0,,,NousResearch/Llama-2-70b-hf,2,lora,2,16,q_proj k_proj v_proj o_proj,bfloat16,,,,, +True,,,none,2e-4,16,0.1,81205.0,,,NousResearch/Llama-2-70b-hf,1,lora,8,16,q_proj k_proj v_proj o_proj,bfloat16,,,,, +True,,,none,2e-4,16,0.1,81085.0,,,NousResearch/Llama-2-70b-hf,2,lora,4,16,q_proj k_proj v_proj o_proj,bfloat16,,,,, +True,,,foak-fast-kernels,2e-4,16,0.1,81205.0,,,NousResearch/Llama-2-70b-hf,1,lora,4,16,q_proj k_proj v_proj o_proj,bfloat16,,,,, +True,,,foak-fast-kernels,2e-4,16,0.1,80875.0,,,NousResearch/Llama-2-70b-hf,2,lora,2,16,q_proj k_proj v_proj o_proj,bfloat16,,,,, +True,,,foak-fast-kernels,2e-4,16,0.1,81205.0,,,NousResearch/Llama-2-70b-hf,1,lora,8,16,q_proj k_proj v_proj o_proj,bfloat16,,,,, +True,,,foak-fast-kernels,2e-4,16,0.1,80993.0,,,NousResearch/Llama-2-70b-hf,2,lora,4,16,q_proj k_proj v_proj o_proj,bfloat16,,,,, +True,0.15,,baseline-peft-bnb,2e-4,16,0.1,24727.0,20556796416.0,4307044864.0,mistralai/Mistral-7B-v0.1,1,lora,4,16,q_proj k_proj v_proj o_proj,bfloat16,0.8730130004882812,575.2521,0.695,0.174,2848.143 +True,0.15,,baseline-peft-bnb,2e-4,16,0.1,11914.0,9525273600.0,2244541440.0,mistralai/Mistral-7B-v0.1,2,lora,2,16,q_proj k_proj v_proj o_proj,bfloat16,0.8705758380889893,282.8263,1.414,0.354,2896.477 +True,0.29,,baseline-peft-bnb,2e-4,16,0.1,44721.0,36801860096.0,4307438080.0,mistralai/Mistral-7B-v0.1,1,lora,8,16,q_proj k_proj v_proj o_proj,bfloat16,0.8701838970184326,1116.3381,0.717,0.09,2935.311 +True,0.29,,baseline-peft-bnb,2e-4,16,0.1,19423.0,16171410432.0,2244738048.0,mistralai/Mistral-7B-v0.1,2,lora,4,16,q_proj k_proj v_proj o_proj,bfloat16,0.8743645858764648,490.0888,1.632,0.204,3343.068 +True,0.15,,baseline-peft-bnb,2e-4,16,0.1,43775.0,43550715392.0,25201920512.0,mistralai/Mixtral-8x7B-Instruct-v0.1,1,lora,4,16,q_proj k_proj v_proj o_proj,bfloat16,0.8938954734802246,1082.9658,0.369,0.092,1512.883 +True,0.15,,baseline-peft-bnb,2e-4,16,0.1,24068.0,21767946240.0,13273627648.0,mistralai/Mixtral-8x7B-Instruct-v0.1,2,lora,2,16,q_proj k_proj v_proj o_proj,bfloat16,0.8936581707000733,521.8356,0.767,0.192,1569.843 +True,0.29,,baseline-peft-bnb,2e-4,16,0.1,63329.0,61500009472.0,25202313728.0,mistralai/Mixtral-8x7B-Instruct-v0.1,1,lora,8,16,q_proj k_proj v_proj o_proj,bfloat16,0.8923088932037353,1961.7179,0.408,0.051,1670.373 +True,0.29,,baseline-peft-bnb,2e-4,16,0.1,31356.0,28883934208.0,13273824256.0,mistralai/Mixtral-8x7B-Instruct-v0.1,2,lora,4,16,q_proj k_proj v_proj o_proj,bfloat16,0.889978551864624,879.0985,0.91,0.114,1863.727 +True,,,baseline-peft-bnb,2e-4,16,0.1,80247.0,,,NousResearch/Llama-2-70b-hf,1,lora,4,16,q_proj k_proj v_proj o_proj,bfloat16,,,,, +True,0.14,,baseline-peft-bnb,2e-4,16,0.1,51569.0,46684804608.0,19266784768.0,NousResearch/Llama-2-70b-hf,2,lora,2,16,q_proj k_proj v_proj o_proj,bfloat16,1.0016851806640625,1892.8443,0.211,0.053,432.788 +True,,,baseline-peft-bnb,2e-4,16,0.1,79933.0,,,NousResearch/Llama-2-70b-hf,1,lora,8,16,q_proj k_proj v_proj o_proj,bfloat16,,,,, +True,0.28,,baseline-peft-bnb,2e-4,16,0.1,80853.0,72625788416.0,19266981376.0,NousResearch/Llama-2-70b-hf,2,lora,4,16,q_proj k_proj v_proj o_proj,bfloat16,1.0005127334594726,3608.5763,0.222,0.028,454.029 +True,0.07,,accelerated-peft-bnb,2e-4,16,0.1,11429.0,9148997120.0,810277376.0,bigcode/gpt_bigcode-santacoder,1,lora,4,16,q_proj k_proj v_proj o_proj c_attn,bfloat16,2.4391163635253905,54.3258,7.363,1.841,15079.404 +True,0.07,,accelerated-peft-bnb,2e-4,16,0.1,7308.0,4788195328.0,411216896.0,bigcode/gpt_bigcode-santacoder,2,lora,2,16,q_proj k_proj v_proj o_proj c_attn,bfloat16,2.43807243347168,51.0965,7.828,1.957,8016.205 +True,0.14,,accelerated-peft-bnb,2e-4,16,0.1,21921.0,17486716416.0,810473984.0,bigcode/gpt_bigcode-santacoder,1,lora,8,16,q_proj k_proj v_proj o_proj c_attn,bfloat16,2.4325622367858886,101.7155,7.865,0.983,16107.672 +True,0.14,,accelerated-peft-bnb,2e-4,16,0.1,12455.0,8957644800.0,411315200.0,bigcode/gpt_bigcode-santacoder,2,lora,4,16,q_proj k_proj v_proj o_proj c_attn,bfloat16,2.433025417327881,56.9194,14.055,1.757,14392.278 +True,0.07,,accelerated-peft-bnb-foak,2e-4,16,0.1,9125.0,7538417152.0,810277376.0,bigcode/gpt_bigcode-santacoder,1,lora,4,16,q_proj k_proj v_proj o_proj c_attn,bfloat16,2.4385263442993166,54.1754,7.383,1.846,15121.253 +True,0.07,,accelerated-peft-bnb-foak,2e-4,16,0.1,6102.0,3989590016.0,411216896.0,bigcode/gpt_bigcode-santacoder,2,lora,2,16,q_proj k_proj v_proj o_proj c_attn,bfloat16,2.4416963958740237,32.8776,12.166,3.042,12458.335 +True,0.14,,accelerated-peft-bnb-foak,2e-4,16,0.1,17313.0,14266736128.0,810473984.0,bigcode/gpt_bigcode-santacoder,1,lora,8,16,q_proj k_proj v_proj o_proj c_attn,bfloat16,2.432781238555908,100.6833,7.946,0.993,16272.811 +True,0.14,,accelerated-peft-bnb-foak,2e-4,16,0.1,10171.0,7353749504.0,411315200.0,bigcode/gpt_bigcode-santacoder,2,lora,4,16,q_proj k_proj v_proj o_proj c_attn,bfloat16,2.4364870262145994,55.5047,14.413,1.802,14759.122 +True,0.15,,accelerated-peft-bnb,2e-4,16,0.1,18263.0,15323147776.0,4306512384.0,mistralai/Mistral-7B-v0.1,1,lora,4,16,q_proj k_proj v_proj o_proj c_attn,bfloat16,0.8739612770080566,461.5658,0.867,0.217,3549.656 +True,0.15,,accelerated-peft-bnb,2e-4,16,0.1,11981.0,9525273600.0,2244541440.0,mistralai/Mistral-7B-v0.1,2,lora,2,16,q_proj k_proj v_proj o_proj c_attn,bfloat16,0.8697105979919434,282.2485,1.417,0.354,2902.407 +True,0.29,,accelerated-peft-bnb,2e-4,16,0.1,32687.0,26312879616.0,4306905600.0,mistralai/Mistral-7B-v0.1,1,lora,8,16,q_proj k_proj v_proj o_proj c_attn,bfloat16,0.8721167373657227,905.4543,0.884,0.11,3618.957 +True,0.29,,accelerated-peft-bnb,2e-4,16,0.1,19379.0,16171410432.0,2244738048.0,mistralai/Mistral-7B-v0.1,2,lora,4,16,q_proj k_proj v_proj o_proj c_attn,bfloat16,0.8676586151123047,490.4414,1.631,0.204,3340.664 +True,0.15,,accelerated-peft-bnb-foak,2e-4,16,0.1,18809.0,13064809472.0,4306512384.0,mistralai/Mistral-7B-v0.1,1,lora,4,16,q_proj k_proj v_proj o_proj c_attn,bfloat16,0.8674864864349365,397.3926,1.007,0.252,4122.875 +True,0.15,,accelerated-peft-bnb-foak,2e-4,16,0.1,11734.0,9309332480.0,2244541440.0,mistralai/Mistral-7B-v0.1,2,lora,2,16,q_proj k_proj v_proj o_proj c_attn,bfloat16,0.8727792549133301,216.4955,1.848,0.462,3783.912 +True,0.29,,accelerated-peft-bnb-foak,2e-4,16,0.1,31953.0,21823466496.0,4306905600.0,mistralai/Mistral-7B-v0.1,1,lora,8,16,q_proj k_proj v_proj o_proj c_attn,bfloat16,0.8684949207305909,776.5844,1.03,0.129,4219.503 +True,0.29,,accelerated-peft-bnb-foak,2e-4,16,0.1,18598.0,15685985280.0,2244738048.0,mistralai/Mistral-7B-v0.1,2,lora,4,16,q_proj k_proj v_proj o_proj c_attn,bfloat16,0.8712387371063233,404.6605,1.977,0.247,4048.826 +True,0.15,,accelerated-peft-bnb,2e-4,16,0.1,37347.0,36218023424.0,25201388032.0,mistralai/Mixtral-8x7B-Instruct-v0.1,1,lora,4,16,q_proj k_proj v_proj o_proj c_attn,bfloat16,0.8953837585449219,839.4246,0.477,0.119,1951.813 +True,0.15,,accelerated-peft-bnb,2e-4,16,0.1,23942.0,21767827968.0,13273627648.0,mistralai/Mixtral-8x7B-Instruct-v0.1,2,lora,2,16,q_proj k_proj v_proj o_proj c_attn,bfloat16,0.8941266441345215,518.8796,0.771,0.193,1578.786 +True,0.29,,accelerated-peft-bnb,2e-4,16,0.1,49889.0,47207755264.0,25201781248.0,mistralai/Mixtral-8x7B-Instruct-v0.1,1,lora,8,16,q_proj k_proj v_proj o_proj c_attn,bfloat16,0.8910543060302735,1567.3902,0.51,0.064,2090.609 +True,0.29,,accelerated-peft-bnb,2e-4,16,0.1,31310.0,28881018368.0,13273824256.0,mistralai/Mixtral-8x7B-Instruct-v0.1,2,lora,4,16,q_proj k_proj v_proj o_proj c_attn,bfloat16,0.891448860168457,876.3875,0.913,0.114,1869.493 +True,0.15,,accelerated-peft-bnb-foak,2e-4,16,0.1,37423.0,34870765056.0,25201388032.0,mistralai/Mixtral-8x7B-Instruct-v0.1,1,lora,4,16,q_proj k_proj v_proj o_proj c_attn,bfloat16,0.8943702983856201,774.2084,0.517,0.129,2116.226 +True,0.15,,accelerated-peft-bnb-foak,2e-4,16,0.1,23972.0,21485080576.0,13273627648.0,mistralai/Mixtral-8x7B-Instruct-v0.1,2,lora,2,16,q_proj k_proj v_proj o_proj c_attn,bfloat16,0.896587963104248,456.2499,0.877,0.219,1795.507 +True,0.29,,accelerated-peft-bnb-foak,2e-4,16,0.1,49907.0,44414669824.0,25201781248.0,mistralai/Mixtral-8x7B-Instruct-v0.1,1,lora,8,16,q_proj k_proj v_proj o_proj c_attn,bfloat16,0.8900082683563233,1436.5433,0.557,0.07,2281.031 +True,0.29,,accelerated-peft-bnb-foak,2e-4,16,0.1,30617.0,28262693888.0,13273824256.0,mistralai/Mixtral-8x7B-Instruct-v0.1,2,lora,4,16,q_proj k_proj v_proj o_proj c_attn,bfloat16,0.892123146057129,789.7475,1.013,0.127,2074.587 +True,0.14,,accelerated-peft-bnb,2e-4,16,0.1,71641.0,68126422016.0,37179042816.0,NousResearch/Llama-2-70b-hf,1,lora,4,16,q_proj k_proj v_proj o_proj c_attn,bfloat16,1.0015915203094483,3497.702,0.114,0.029,468.422 +True,0.14,,accelerated-peft-bnb,2e-4,16,0.1,51531.0,46684804608.0,19266784768.0,NousResearch/Llama-2-70b-hf,2,lora,2,16,q_proj k_proj v_proj o_proj c_attn,bfloat16,1.0012191390991212,1893.9698,0.211,0.053,432.531 +True,,,accelerated-peft-bnb,2e-4,16,0.1,81009.0,,,NousResearch/Llama-2-70b-hf,1,lora,8,16,q_proj k_proj v_proj o_proj c_attn,bfloat16,,,,, +True,0.28,,accelerated-peft-bnb,2e-4,16,0.1,80647.0,72625788416.0,19266981376.0,NousResearch/Llama-2-70b-hf,2,lora,4,16,q_proj k_proj v_proj o_proj c_attn,bfloat16,0.999879560470581,3609.2665,0.222,0.028,453.943 +True,0.14,,accelerated-peft-bnb-foak,2e-4,16,0.1,71067.0,67048944640.0,37179042816.0,NousResearch/Llama-2-70b-hf,1,lora,4,16,q_proj k_proj v_proj o_proj c_attn,bfloat16,1.0010389518737792,3195.764,0.125,0.031,512.679 +True,0.14,,accelerated-peft-bnb-foak,2e-4,16,0.1,51369.0,46407128576.0,19266784768.0,NousResearch/Llama-2-70b-hf,2,lora,2,16,q_proj k_proj v_proj o_proj c_attn,bfloat16,1.002256908416748,1682.2067,0.238,0.059,486.979 +True,,,accelerated-peft-bnb-foak,2e-4,16,0.1,80783.0,,,NousResearch/Llama-2-70b-hf,1,lora,8,16,q_proj k_proj v_proj o_proj c_attn,bfloat16,,,,, +True,0.28,,accelerated-peft-bnb-foak,2e-4,16,0.1,80919.0,71810192896.0,19266981376.0,NousResearch/Llama-2-70b-hf,2,lora,4,16,q_proj k_proj v_proj o_proj c_attn,bfloat16,0.9998698234558105,3242.0337,0.247,0.031,505.362 +,0.15,True,accelerated-peft-autogptq,2e-4,16,0.1,18785.0,15353458176.0,4336822784.0,TheBloke/Mistral-7B-v0.1-GPTQ,1,lora,4,16,q_proj k_proj v_proj o_proj,float16,0.9844318866729737,481.3534,0.831,0.208,3403.736 +,0.15,True,accelerated-peft-autogptq,2e-4,16,0.1,12310.0,9542804992.0,2261220352.0,TheBloke/Mistral-7B-v0.1-GPTQ,2,lora,2,16,q_proj k_proj v_proj o_proj,float16,0.9955140018463134,287.5048,1.391,0.348,2849.344 +,0.29,True,accelerated-peft-autogptq,2e-4,16,0.1,32579.0,26343190016.0,4337216000.0,TheBloke/Mistral-7B-v0.1-GPTQ,1,lora,8,16,q_proj k_proj v_proj o_proj,float16,0.9898070430755616,946.6898,0.845,0.106,3461.324 +,0.29,True,accelerated-peft-autogptq,2e-4,16,0.1,19842.0,16188941824.0,2261416960.0,TheBloke/Mistral-7B-v0.1-GPTQ,2,lora,4,16,q_proj k_proj v_proj o_proj,float16,0.9835797500610352,504.1388,1.587,0.198,3249.898 +,0.15,True,accelerated-peft-autogptq-foak,2e-4,16,0.1,18553.0,13095119872.0,4336822784.0,TheBloke/Mistral-7B-v0.1-GPTQ,1,lora,4,16,q_proj k_proj v_proj o_proj,float16,0.9779060745239258,412.9784,0.969,0.242,3967.278 +,0.15,True,accelerated-peft-autogptq-foak,2e-4,16,0.1,12109.0,9326863872.0,2261220352.0,TheBloke/Mistral-7B-v0.1-GPTQ,2,lora,2,16,q_proj k_proj v_proj o_proj,float16,1.0228476333618164,221.6896,1.804,0.451,3695.257 +,0.29,True,accelerated-peft-autogptq-foak,2e-4,16,0.1,32337.0,21853776896.0,4337216000.0,TheBloke/Mistral-7B-v0.1-GPTQ,1,lora,8,16,q_proj k_proj v_proj o_proj,float16,0.9703095436096192,810.0302,0.988,0.123,4045.281 +,0.29,True,accelerated-peft-autogptq-foak,2e-4,16,0.1,19074.0,15703516672.0,2261416960.0,TheBloke/Mistral-7B-v0.1-GPTQ,2,lora,4,16,q_proj k_proj v_proj o_proj,float16,1.0047074699401854,418.3267,1.912,0.239,3916.556 +,0.15,True,accelerated-peft-autogptq,2e-4,16,0.1,36435.0,35528093184.0,24511457792.0,TheBloke/Mixtral-8x7B-Instruct-v0.1-GPTQ,1,lora,4,16,q_proj k_proj v_proj o_proj,float16,0.9047156238555908,832.581,0.48,0.12,1967.857 +,0.15,True,accelerated-peft-autogptq,2e-4,16,0.1,23573.0,21067999744.0,12581256192.0,TheBloke/Mixtral-8x7B-Instruct-v0.1-GPTQ,2,lora,2,16,q_proj k_proj v_proj o_proj,float16,0.9067060089111328,498.7756,0.802,0.2,1642.422 +,0.29,True,accelerated-peft-autogptq,2e-4,16,0.1,49007.0,46517825024.0,24511851008.0,TheBloke/Mixtral-8x7B-Instruct-v0.1-GPTQ,1,lora,8,16,q_proj k_proj v_proj o_proj,float16,0.9023971652984619,1584.7821,0.505,0.063,2067.666 +,0.29,True,accelerated-peft-autogptq,2e-4,16,0.1,30557.0,28182132736.0,12581452800.0,TheBloke/Mixtral-8x7B-Instruct-v0.1-GPTQ,2,lora,4,16,q_proj k_proj v_proj o_proj,float16,0.9020897960662841,869.9509,0.92,0.115,1883.325 +,0.15,True,accelerated-peft-autogptq-foak,2e-4,16,0.1,36947.0,34185567744.0,24511457792.0,TheBloke/Mixtral-8x7B-Instruct-v0.1-GPTQ,1,lora,4,16,q_proj k_proj v_proj o_proj,float16,0.9064445781707764,762.6001,0.525,0.131,2148.439 +,0.15,True,accelerated-peft-autogptq-foak,2e-4,16,0.1,23659.0,20783364608.0,12581256192.0,TheBloke/Mixtral-8x7B-Instruct-v0.1-GPTQ,2,lora,2,16,q_proj k_proj v_proj o_proj,float16,0.9081688308715821,433.9353,0.922,0.23,1887.839 +,0.29,True,accelerated-peft-autogptq-foak,2e-4,16,0.1,50659.0,43785179648.0,24511851008.0,TheBloke/Mixtral-8x7B-Instruct-v0.1-GPTQ,1,lora,8,16,q_proj k_proj v_proj o_proj,float16,0.9031758785247803,1447.8847,0.553,0.069,2263.164 +,0.29,True,accelerated-peft-autogptq-foak,2e-4,16,0.1,30421.0,27563599360.0,12581452800.0,TheBloke/Mixtral-8x7B-Instruct-v0.1-GPTQ,2,lora,4,16,q_proj k_proj v_proj o_proj,float16,0.9036233234405517,779.5952,1.026,0.128,2101.603 +,0.14,True,accelerated-peft-autogptq,2e-4,16,0.1,70525.0,67069752832.0,36122373120.0,TheBloke/Llama-2-70B-GPTQ,1,lora,4,16,q_proj k_proj v_proj o_proj,float16,0.9943218612670899,3572.5902,0.112,0.028,458.603 +,0.14,True,accelerated-peft-autogptq,2e-4,16,0.1,50590.0,45638032384.0,18219970048.0,TheBloke/Llama-2-70B-GPTQ,2,lora,2,16,q_proj k_proj v_proj o_proj,float16,0.9938594245910645,1914.0025,0.209,0.052,428.004 +,,True,accelerated-peft-autogptq,2e-4,16,0.1,79895.0,,,TheBloke/Llama-2-70B-GPTQ,1,lora,8,16,q_proj k_proj v_proj o_proj,float16,,,,, +,0.28,True,accelerated-peft-autogptq,2e-4,16,0.1,80748.0,71579016192.0,18220166656.0,TheBloke/Llama-2-70B-GPTQ,2,lora,4,16,q_proj k_proj v_proj o_proj,float16,0.9892638874053955,3677.8684,0.218,0.027,445.475 +,0.14,True,accelerated-peft-autogptq-foak,2e-4,16,0.1,70443.0,65992275456.0,36122373120.0,TheBloke/Llama-2-70B-GPTQ,1,lora,4,16,q_proj k_proj v_proj o_proj,float16,0.9935803413391113,3250.1642,0.123,0.031,504.098 +,0.14,True,accelerated-peft-autogptq-foak,2e-4,16,0.1,50948.0,45360356352.0,18219970048.0,TheBloke/Llama-2-70B-GPTQ,2,lora,2,16,q_proj k_proj v_proj o_proj,float16,0.9940973091125488,1681.9931,0.238,0.059,487.041 +,,True,accelerated-peft-autogptq-foak,2e-4,16,0.1,81077.0,,,TheBloke/Llama-2-70B-GPTQ,1,lora,8,16,q_proj k_proj v_proj o_proj,float16,,,,, +,0.28,True,accelerated-peft-autogptq-foak,2e-4,16,0.1,80617.0,70763420672.0,18220166656.0,TheBloke/Llama-2-70B-GPTQ,2,lora,4,16,q_proj k_proj v_proj o_proj,float16,0.9896932983398438,3295.444,0.243,0.03,497.171 diff --git a/scripts/benchmarks/refs/requirements.txt b/scripts/benchmarks/refs/requirements.txt index 06abe58f..ad534377 100644 --- a/scripts/benchmarks/refs/requirements.txt +++ b/scripts/benchmarks/refs/requirements.txt @@ -1,39 +1,42 @@ accelerate==0.33.0 -aiohttp==3.9.5 +aiohappyeyeballs==2.4.0 +aiohttp==3.10.5 aiosignal==1.3.1 async-timeout==4.0.3 -attrs==23.2.0 -bitsandbytes==0.43.2 -certifi==2024.7.4 +attrs==24.2.0 +bitsandbytes==0.43.3 +certifi==2024.8.30 charset-normalizer==3.3.2 -contourpy==1.2.1 +contourpy==1.3.0 cycler==0.12.1 -datasets==2.20.0 +datasets==2.21.0 dill==0.3.8 docstring_parser==0.16 einops==0.8.0 -filelock==3.15.4 -fire==0.6.0 +filelock==3.16.0 flash-attn==2.6.3 --e git+https://github.com/achew010/fms-acceleration.git@74319eb4f6ef5d946573be0e7e851d97ba16b823#egg=fms_acceleration&subdirectory=plugins/framework --e git+https://github.com/achew010/fms-acceleration.git@74319eb4f6ef5d946573be0e7e851d97ba16b823#egg=fms_acceleration_foak&subdirectory=plugins/fused-ops-and-kernels --e git+https://github.com/achew010/fms-acceleration.git@74319eb4f6ef5d946573be0e7e851d97ba16b823#egg=fms_acceleration_peft&subdirectory=plugins/accelerated-peft -fms-hf-tuning @ git+https://github.com/foundation-model-stack/fms-hf-tuning.git@7dfd4e71a0ded17ab65654925e18bf9a1d76b0fc +-e git+https://github.com/foundation-model-stack/fms-acceleration.git@4851bf363014216e6d938c776b8af3103aca5082#egg=fms_acceleration&subdirectory=plugins/framework +-e git+https://github.com/foundation-model-stack/fms-acceleration.git@4851bf363014216e6d938c776b8af3103aca5082#egg=fms_acceleration_aadp&subdirectory=plugins/attention-and-distributed-packing +-e git+https://github.com/foundation-model-stack/fms-acceleration.git@4851bf363014216e6d938c776b8af3103aca5082#egg=fms_acceleration_foak&subdirectory=plugins/fused-ops-and-kernels +-e git+https://github.com/foundation-model-stack/fms-acceleration.git@4851bf363014216e6d938c776b8af3103aca5082#egg=fms_acceleration_peft&subdirectory=plugins/accelerated-peft +fms-hf-tuning @ git+https://github.com/foundation-model-stack/fms-hf-tuning.git@c40ae7f1615b95b2d0c5f02206d1a3799b0f615c fonttools==4.53.1 frozenlist==1.4.1 -fsspec==2024.5.0 -huggingface-hub==0.24.2 -idna==3.7 +fsspec==2024.6.1 +huggingface-hub==0.24.7 +idna==3.8 Jinja2==3.1.4 -kiwisolver==1.4.5 +kiwisolver==1.4.7 +llvmlite==0.43.0 markdown-it-py==3.0.0 MarkupSafe==2.1.5 -matplotlib==3.9.1 +matplotlib==3.9.2 mdurl==0.1.2 mpmath==1.3.0 -multidict==6.0.5 +multidict==6.1.0 multiprocess==0.70.16 networkx==3.3 +numba==0.60.0 numpy==1.26.4 nvidia-cublas-cu12==12.1.3.1 nvidia-cuda-cupti-cu12==12.1.105 @@ -45,41 +48,39 @@ nvidia-curand-cu12==10.3.2.106 nvidia-cusolver-cu12==11.4.5.107 nvidia-cusparse-cu12==12.1.0.106 nvidia-nccl-cu12==2.20.5 -nvidia-nvjitlink-cu12==12.5.82 +nvidia-nvjitlink-cu12==12.6.68 nvidia-nvtx-cu12==12.1.105 packaging==24.1 pandas==2.2.2 peft==0.12.0 pillow==10.4.0 -protobuf==5.27.2 +protobuf==5.28.1 psutil==6.0.0 pyarrow==17.0.0 -pyarrow-hotfix==0.6 Pygments==2.18.0 -pyparsing==3.1.2 +pyparsing==3.1.4 python-dateutil==2.9.0.post0 -pytz==2024.1 -PyYAML==6.0.1 -regex==2024.7.24 +pytz==2024.2 +PyYAML==6.0.2 +regex==2024.9.11 requests==2.32.3 -rich==13.7.1 -safetensors==0.4.3 +rich==13.8.1 +safetensors==0.4.5 sentencepiece==0.2.0 shtab==1.7.1 simpleeval==0.9.13 six==1.16.0 -sympy==1.13.1 -termcolor==2.4.0 +sympy==1.13.2 threadpoolctl==3.5.0 tokenizers==0.19.1 -torch==2.4.0 -tqdm==4.66.4 -transformers==4.43.3 +torch==2.4.1 +tqdm==4.66.5 +transformers==4.44.2 triton==3.0.0 -trl==0.9.6 +trl==0.10.1 typing_extensions==4.12.2 -tyro==0.8.5 +tyro==0.8.10 tzdata==2024.1 -urllib3==2.2.2 -xxhash==3.4.1 -yarl==1.9.4 +urllib3==2.2.3 +xxhash==3.5.0 +yarl==1.11.1