From 33d83532109e9f3c15930bb170e49fde848d9f27 Mon Sep 17 00:00:00 2001 From: Devin Ha Date: Mon, 23 Sep 2024 22:01:34 +0200 Subject: [PATCH 1/3] [SPARKNLP-1027] Change Default AutoGGUF pretrained model --- docs/en/annotator_entries/AutoGGUF.md | 2 +- python/sparknlp/annotator/seq2seq/auto_gguf_model.py | 6 +++--- .../johnsnowlabs/nlp/annotators/seq2seq/AutoGGUFModel.scala | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/en/annotator_entries/AutoGGUF.md b/docs/en/annotator_entries/AutoGGUF.md index 4bf8384004b0e0..a61509d10c8f36 100644 --- a/docs/en/annotator_entries/AutoGGUF.md +++ b/docs/en/annotator_entries/AutoGGUF.md @@ -22,7 +22,7 @@ val autoGGUFModel = AutoGGUFModel.pretrained() .setOutputCol("completions") ``` -The default model is `"gguf-phi3-mini-4k-instruct-q4"`, if no name is provided. +The default model is `"phi3.5_mini_4k_instruct_q4_gguf"`, if no name is provided. For available pretrained models please see the [Models Hub](https://sparknlp.org/models). diff --git a/python/sparknlp/annotator/seq2seq/auto_gguf_model.py b/python/sparknlp/annotator/seq2seq/auto_gguf_model.py index 4441d1d8c6e88b..37af88d7dbbe15 100755 --- a/python/sparknlp/annotator/seq2seq/auto_gguf_model.py +++ b/python/sparknlp/annotator/seq2seq/auto_gguf_model.py @@ -37,7 +37,7 @@ class AutoGGUFModel(AnnotatorModel, HasBatchedAnnotate): ... .setInputCols(["document"]) \\ ... .setOutputCol("completions") - The default model is ``"gguf-phi3-mini-4k-instruct-q4"``, if no name is provided. + The default model is ``"phi3.5_mini_4k_instruct_q4_gguf"``, if no name is provided. For extended examples of usage, see the `AutoGGUFModelTest `__ @@ -782,13 +782,13 @@ def loadSavedModel(folder, spark_session): return AutoGGUFModel(java_model=jModel) @staticmethod - def pretrained(name="gguf-phi3-mini-4k-instruct-q4", lang="en", remote_loc=None): + def pretrained(name="phi3.5_mini_4k_instruct_q4_gguf", lang="en", remote_loc=None): """Downloads and loads a pretrained model. Parameters ---------- name : str, optional - Name of the pretrained model, by default "gguf-phi3-mini-4k-instruct-q4" + Name of the pretrained model, by default "phi3.5_mini_4k_instruct_q4_gguf" lang : str, optional Language of the pretrained model, by default "en" remote_loc : str, optional diff --git a/src/main/scala/com/johnsnowlabs/nlp/annotators/seq2seq/AutoGGUFModel.scala b/src/main/scala/com/johnsnowlabs/nlp/annotators/seq2seq/AutoGGUFModel.scala index 11d41bdb4d739a..011098341b3f39 100644 --- a/src/main/scala/com/johnsnowlabs/nlp/annotators/seq2seq/AutoGGUFModel.scala +++ b/src/main/scala/com/johnsnowlabs/nlp/annotators/seq2seq/AutoGGUFModel.scala @@ -42,7 +42,7 @@ import org.json4s.jackson.JsonMethods * .setInputCols("document") * .setOutputCol("completions") * }}} - * The default model is `"gguf-phi3-mini-4k-instruct-q4"`, if no name is provided. + * The default model is `"phi3.5_mini_4k_instruct_q4_gguf"`, if no name is provided. * * For available pretrained models please see the [[https://sparknlp.org/models Models Hub]]. * @@ -213,7 +213,7 @@ class AutoGGUFModel(override val uid: String) trait ReadablePretrainedAutoGGUFModel extends ParamsAndFeaturesReadable[AutoGGUFModel] with HasPretrained[AutoGGUFModel] { - override val defaultModelName: Some[String] = Some("gguf-phi3-mini-4k-instruct-q4") + override val defaultModelName: Some[String] = Some("phi3.5_mini_4k_instruct_q4_gguf") override val defaultLang: String = "en" /** Java compliant-overrides */ From 1a07f95d3917bd62a9572fd008807d77467ed492 Mon Sep 17 00:00:00 2001 From: Devin Ha Date: Tue, 24 Sep 2024 15:43:31 +0200 Subject: [PATCH 2/3] [SPARKNLP-1027] Change default model in example --- ...llama.cpp_in_Spark_NLP_AutoGGUFModel.ipynb | 26 +++++++++---------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/examples/python/llama.cpp/llama.cpp_in_Spark_NLP_AutoGGUFModel.ipynb b/examples/python/llama.cpp/llama.cpp_in_Spark_NLP_AutoGGUFModel.ipynb index f07f3892e2d38f..3a76bdf5f01ece 100644 --- a/examples/python/llama.cpp/llama.cpp_in_Spark_NLP_AutoGGUFModel.ipynb +++ b/examples/python/llama.cpp/llama.cpp_in_Spark_NLP_AutoGGUFModel.ipynb @@ -22,7 +22,7 @@ "source": [ "## Download a GGUF Model\n", "\n", - "Lets download a GGUF model to test it out. For this, we will use [microsoft/Phi-3-mini-4k-instruct-gguf](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct-gguf). It is a 3.8B parameter model which also is available in 4-bit quantization. \n", + "Lets download a GGUF model to test it out. For this, we will use [bartowski/Phi-3.5-mini-instruct-GGUF](https://huggingface.co/bartowski/Phi-3.5-mini-instruct-GGUF). It is a 3.8B parameter model which also is available in 4-bit quantization. \n", "\n", "We can download the model by selecting the q4 GGUF file from the \"Files and versions\" tab.\n", "\n", @@ -38,28 +38,28 @@ "name": "stdout", "output_type": "stream", "text": [ - "--2024-07-20 11:11:30-- https://huggingface.co/microsoft/Phi-3-mini-4k-instruct-gguf/resolve/main/Phi-3-mini-4k-instruct-q4.gguf?download=true\n", + "--2024-07-20 11:11:30-- https://huggingface.co/bartowski/Phi-3.5-mini-instruct-GGUF/resolve/main/Phi-3.5-mini-instruct-Q4_K_M.gguf?download=true\n", "Resolving huggingface.co (huggingface.co)... 2600:9000:275f:7600:17:b174:6d00:93a1, 2600:9000:275f:3800:17:b174:6d00:93a1, 2600:9000:275f:6e00:17:b174:6d00:93a1, ...\n", "Connecting to huggingface.co (huggingface.co)|2600:9000:275f:7600:17:b174:6d00:93a1|:443... connected.\n", "HTTP request sent, awaiting response... 302 Found\n", - "Location: https://cdn-lfs-us-1.huggingface.co/repos/41/c8/41c860f65b01de5dc4c68b00d84cead799d3e7c48e38ee749f4c6057776e2e9e/8a83c7fb9049a9b2e92266fa7ad04933bb53aa1e85136b7b30f1b8000ff2edef?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27Phi-3-mini-4k-instruct-q4.gguf%3B+filename%3D%22Phi-3-mini-4k-instruct-q4.gguf%22%3B&Expires=1721725890&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTcyMTcyNTg5MH19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy11cy0xLmh1Z2dpbmdmYWNlLmNvL3JlcG9zLzQxL2M4LzQxYzg2MGY2NWIwMWRlNWRjNGM2OGIwMGQ4NGNlYWQ3OTlkM2U3YzQ4ZTM4ZWU3NDlmNGM2MDU3Nzc2ZTJlOWUvOGE4M2M3ZmI5MDQ5YTliMmU5MjI2NmZhN2FkMDQ5MzNiYjUzYWExZTg1MTM2YjdiMzBmMWI4MDAwZmYyZWRlZj9yZXNwb25zZS1jb250ZW50LWRpc3Bvc2l0aW9uPSoifV19&Signature=joXQf4QRpEhtFeQ3r3gJ0zyJ3bXReb9OxM%7EZit3GJ3355ycKQzemJ%7E6eD-J7%7EkphnsPpRpUDhQkCr2-Oidqo7dgltmFsWAX4SmQLn65R1yjO%7EsMvi%7E4vOUpaRPYlSMCyWWJpiZZjQYVH4Uk0o-G62ALFXKGaDfr627kvahP-fJYwNNP1riTrH8hbbah28ZKRAQjUGI1aNqerG0jojudnGOagawISAnudkAOFZfxnN7Qw3CoMySZLj9Euu02RBv2A5Yy0uSjG7b8rilx-tU5HDR3ECohdQQ8yPXjYFU-LZi-zcG1wwBDF-S01qb%7EgPWsTorenxfRM2cG6J%7EvSziGCzA__&Key-Pair-Id=K24J24Z295AEI9 [following]\n", - "--2024-07-20 11:11:30-- https://cdn-lfs-us-1.huggingface.co/repos/41/c8/41c860f65b01de5dc4c68b00d84cead799d3e7c48e38ee749f4c6057776e2e9e/8a83c7fb9049a9b2e92266fa7ad04933bb53aa1e85136b7b30f1b8000ff2edef?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27Phi-3-mini-4k-instruct-q4.gguf%3B+filename%3D%22Phi-3-mini-4k-instruct-q4.gguf%22%3B&Expires=1721725890&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTcyMTcyNTg5MH19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy11cy0xLmh1Z2dpbmdmYWNlLmNvL3JlcG9zLzQxL2M4LzQxYzg2MGY2NWIwMWRlNWRjNGM2OGIwMGQ4NGNlYWQ3OTlkM2U3YzQ4ZTM4ZWU3NDlmNGM2MDU3Nzc2ZTJlOWUvOGE4M2M3ZmI5MDQ5YTliMmU5MjI2NmZhN2FkMDQ5MzNiYjUzYWExZTg1MTM2YjdiMzBmMWI4MDAwZmYyZWRlZj9yZXNwb25zZS1jb250ZW50LWRpc3Bvc2l0aW9uPSoifV19&Signature=joXQf4QRpEhtFeQ3r3gJ0zyJ3bXReb9OxM%7EZit3GJ3355ycKQzemJ%7E6eD-J7%7EkphnsPpRpUDhQkCr2-Oidqo7dgltmFsWAX4SmQLn65R1yjO%7EsMvi%7E4vOUpaRPYlSMCyWWJpiZZjQYVH4Uk0o-G62ALFXKGaDfr627kvahP-fJYwNNP1riTrH8hbbah28ZKRAQjUGI1aNqerG0jojudnGOagawISAnudkAOFZfxnN7Qw3CoMySZLj9Euu02RBv2A5Yy0uSjG7b8rilx-tU5HDR3ECohdQQ8yPXjYFU-LZi-zcG1wwBDF-S01qb%7EgPWsTorenxfRM2cG6J%7EvSziGCzA__&Key-Pair-Id=K24J24Z295AEI9\n", + "Location: https://cdn-lfs-us-1.huggingface.co/repos/41/c8/41c860f65b01de5dc4c68b00d84cead799d3e7c48e38ee749f4c6057776e2e9e/8a83c7fb9049a9b2e92266fa7ad04933bb53aa1e85136b7b30f1b8000ff2edef?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27Phi-3.5-mini-instruct-Q4_K_M.gguf%3B+filename%3D%22P Phi-3.5-mini-instruct-Q4_K_M.gguf%22%3B&Expires=1721725890&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTcyMTcyNTg5MH19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy11cy0xLmh1Z2dpbmdmYWNlLmNvL3JlcG9zLzQxL2M4LzQxYzg2MGY2NWIwMWRlNWRjNGM2OGIwMGQ4NGNlYWQ3OTlkM2U3YzQ4ZTM4ZWU3NDlmNGM2MDU3Nzc2ZTJlOWUvOGE4M2M3ZmI5MDQ5YTliMmU5MjI2NmZhN2FkMDQ5MzNiYjUzYWExZTg1MTM2YjdiMzBmMWI4MDAwZmYyZWRlZj9yZXNwb25zZS1jb250ZW50LWRpc3Bvc2l0aW9uPSoifV19&Signature=joXQf4QRpEhtFeQ3r3gJ0zyJ3bXReb9OxM%7EZit3GJ3355ycKQzemJ%7E6eD-J7%7EkphnsPpRpUDhQkCr2-Oidqo7dgltmFsWAX4SmQLn65R1yjO%7EsMvi%7E4vOUpaRPYlSMCyWWJpiZZjQYVH4Uk0o-G62ALFXKGaDfr627kvahP-fJYwNNP1riTrH8hbbah28ZKRAQjUGI1aNqerG0jojudnGOagawISAnudkAOFZfxnN7Qw3CoMySZLj9Euu02RBv2A5Yy0uSjG7b8rilx-tU5HDR3ECohdQQ8yPXjYFU-LZi-zcG1wwBDF-S01qb%7EgPWsTorenxfRM2cG6J%7EvSziGCzA__&Key-Pair-Id=K24J24Z295AEI9 [following]\n", + "--2024-07-20 11:11:30-- https://cdn-lfs-us-1.huggingface.co/repos/41/c8/41c860f65b01de5dc4c68b00d84cead799d3e7c48e38ee749f4c6057776e2e9e/8a83c7fb9049a9b2e92266fa7ad04933bb53aa1e85136b7b30f1b8000ff2edef?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27Phi-3.5-mini-instruct-Q4_K_M.gguf%3B+filename%3D%22P Phi-3.5-mini-instruct-Q4_K_M.gguf%22%3B&Expires=1721725890&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTcyMTcyNTg5MH19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy11cy0xLmh1Z2dpbmdmYWNlLmNvL3JlcG9zLzQxL2M4LzQxYzg2MGY2NWIwMWRlNWRjNGM2OGIwMGQ4NGNlYWQ3OTlkM2U3YzQ4ZTM4ZWU3NDlmNGM2MDU3Nzc2ZTJlOWUvOGE4M2M3ZmI5MDQ5YTliMmU5MjI2NmZhN2FkMDQ5MzNiYjUzYWExZTg1MTM2YjdiMzBmMWI4MDAwZmYyZWRlZj9yZXNwb25zZS1jb250ZW50LWRpc3Bvc2l0aW9uPSoifV19&Signature=joXQf4QRpEhtFeQ3r3gJ0zyJ3bXReb9OxM%7EZit3GJ3355ycKQzemJ%7E6eD-J7%7EkphnsPpRpUDhQkCr2-Oidqo7dgltmFsWAX4SmQLn65R1yjO%7EsMvi%7E4vOUpaRPYlSMCyWWJpiZZjQYVH4Uk0o-G62ALFXKGaDfr627kvahP-fJYwNNP1riTrH8hbbah28ZKRAQjUGI1aNqerG0jojudnGOagawISAnudkAOFZfxnN7Qw3CoMySZLj9Euu02RBv2A5Yy0uSjG7b8rilx-tU5HDR3ECohdQQ8yPXjYFU-LZi-zcG1wwBDF-S01qb%7EgPWsTorenxfRM2cG6J%7EvSziGCzA__&Key-Pair-Id=K24J24Z295AEI9\n", "Resolving cdn-lfs-us-1.huggingface.co (cdn-lfs-us-1.huggingface.co)... 2600:9000:275f:7a00:17:9a40:4dc0:93a1, 2600:9000:275f:fc00:17:9a40:4dc0:93a1, 2600:9000:275f:4800:17:9a40:4dc0:93a1, ...\n", "Connecting to cdn-lfs-us-1.huggingface.co (cdn-lfs-us-1.huggingface.co)|2600:9000:275f:7a00:17:9a40:4dc0:93a1|:443... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 2393231072 (2.2G) [binary/octet-stream]\n", - "Saving to: ‘Phi-3-mini-4k-instruct-q4.gguf?download=true’\n", + "Saving to: ‘Phi-3.5-mini-instruct-Q4_K_M.gguf?download=true’\n", "\n", "Phi-3-mini-4k-instr 100%[===================>] 2.23G 22.5MB/s in 96s \n", "\n", - "2024-07-20 11:13:06 (23.7 MB/s) - ‘Phi-3-mini-4k-instruct-q4.gguf?download=true’ saved [2393231072/2393231072]\n", + "2024-07-20 11:13:06 (23.7 MB/s) - ‘Phi-3.5-mini-instruct-Q4_K_M.gguf?download=true’ saved [2393231072/2393231072]\n", "\n" ] } ], "source": [ - "EXPORT_PATH = \"Phi-3-mini-4k-instruct-q4.gguf\"\n", - "! wget \"https://huggingface.co/microsoft/Phi-3-mini-4k-instruct-gguf/resolve/main/Phi-3-mini-4k-instruct-q4.gguf?download=true\" -O {EXPORT_PATH}" + "EXPORT_PATH = \"Phi-3.5-mini-instruct-Q4_K_M.gguf\"\n", + "! wget \"https://huggingface.co/bartowski/Phi-3.5-mini-instruct-GGUF/resolve/main/Phi-3.5-mini-instruct-Q4_K_M.gguf?download=true\" -O {EXPORT_PATH}" ] }, { @@ -359,7 +359,7 @@ "metadata": {}, "outputs": [], "source": [ - "autoGGUFModel.write().overwrite().save(f\"Phi-3-mini-4k-instruct-q4_spark_nlp\")" + "autoGGUFModel.write().overwrite().save(f\"Phi-3.5-mini-instruct-Q4_K_M_spark_nlp\")" ] }, { @@ -398,12 +398,12 @@ "text": [ "total 2337168\n", "drwxr-xr-x 2 ducha ducha 4096 Jul 21 16:24 metadata\n", - "-rwxrwxr-x 1 ducha ducha 2393231072 Jul 21 16:24 Phi-3-mini-4k-instruct-q4.gguf\n" + "-rwxrwxr-x 1 ducha ducha 2393231072 Jul 21 16:24 Phi-3.5-mini-instruct-Q4_K_M.gguf\n" ] } ], "source": [ - "! ls -l Phi-3-mini-4k-instruct-q4_spark_nlp" + "! ls -l Phi-3.5-mini-instruct-Q4_K_M_spark_nlp" ] }, { @@ -437,7 +437,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "llama_model_loader: loaded meta data with 24 key-value pairs and 195 tensors from /tmp/spark-bbad4f64-91a7-4b6e-8242-7f91e6abca54/userFiles-f7d4e4e9-c02d-46e4-81b5-bf5a26d70930/Phi-3-mini-4k-instruct-q4.gguf (version GGUF V3 (latest))\n", + "llama_model_loader: loaded meta data with 24 key-value pairs and 195 tensors from /tmp/spark-bbad4f64-91a7-4b6e-8242-7f91e6abca54/userFiles-f7d4e4e9-c02d-46e4-81b5-bf5a26d70930/Phi-3.5-mini-instruct-Q4_K_M.gguf (version GGUF V3 (latest))\n", "llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.\n", "llama_model_loader: - kv 0: general.architecture str = phi3\n", "llama_model_loader: - kv 1: general.name str = Phi3\n", @@ -583,7 +583,7 @@ "\n", "document_assembler = DocumentAssembler().setInputCol(\"text\").setOutputCol(\"document\")\n", "\n", - "auto_gguf_model = AutoGGUFModel.load(\"Phi-3-mini-4k-instruct-q4_spark_nlp\")\n", + "auto_gguf_model = AutoGGUFModel.load(\"Phi-3.5-mini-instruct-Q4_K_M_spark_nlp\")\n", "\n", "pipeline = Pipeline().setStages([document_assembler, auto_gguf_model])\n", "\n", From 4ff5fba590367b7b49ca43c401031c3b47096f55 Mon Sep 17 00:00:00 2001 From: Devin Ha Date: Tue, 24 Sep 2024 22:08:01 +0200 Subject: [PATCH 3/3] [SPARKNLP-1027] Fix issue with pretrained model --- python/test/annotator/seq2seq/auto_gguf_model_test.py | 11 ++++------- .../nlp/annotators/seq2seq/AutoGGUFModel.scala | 5 ++++- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/python/test/annotator/seq2seq/auto_gguf_model_test.py b/python/test/annotator/seq2seq/auto_gguf_model_test.py index 0c2d4a349ca806..e6553bc509e5ff 100644 --- a/python/test/annotator/seq2seq/auto_gguf_model_test.py +++ b/python/test/annotator/seq2seq/auto_gguf_model_test.py @@ -43,14 +43,13 @@ def runTest(self): DocumentAssembler().setInputCol("text").setOutputCol("document") ) - modelPath = "models/codellama-7b.Q2_K.gguf" model = ( - AutoGGUFModel.loadSavedModel(modelPath, self.spark) + AutoGGUFModel.pretrained() .setInputCols("document") .setOutputCol("completions") .setBatchSize(4) .setNPredict(20) - .setNGpuLayers(99) + .setNGpuLayers(5) .setTemperature(0.4) .setTopK(40) .setTopP(0.9) @@ -79,9 +78,8 @@ def runTest(self): DocumentAssembler().setInputCol("text").setOutputCol("document") ) - modelPath = "models/codellama-7b.Q2_K.gguf" model = ( - AutoGGUFModel.loadSavedModel(modelPath, self.spark) + AutoGGUFModel.pretrained() .setInputCols("document") .setOutputCol("completions") .setBatchSize(4) @@ -182,9 +180,8 @@ def setUp(self): self.spark = SparkContextForTest.spark def runTest(self): - modelPath = "models/codellama-7b.Q2_K.gguf" model = ( - AutoGGUFModel.loadSavedModel(modelPath, self.spark) + AutoGGUFModel.pretrained() .setInputCols("document") .setOutputCol("completions") ) diff --git a/src/main/scala/com/johnsnowlabs/nlp/annotators/seq2seq/AutoGGUFModel.scala b/src/main/scala/com/johnsnowlabs/nlp/annotators/seq2seq/AutoGGUFModel.scala index 011098341b3f39..e681ce99888010 100644 --- a/src/main/scala/com/johnsnowlabs/nlp/annotators/seq2seq/AutoGGUFModel.scala +++ b/src/main/scala/com/johnsnowlabs/nlp/annotators/seq2seq/AutoGGUFModel.scala @@ -233,7 +233,10 @@ trait ReadAutoGGUFModel { def readModel(instance: AutoGGUFModel, path: String, spark: SparkSession): Unit = { def findGGUFModelInFolder(): String = { - val folder = new java.io.File(path) + val folder = + new java.io.File( + path.replace("file:", "") + ) // File should be local at this point. TODO: Except if its HDFS? if (folder.exists && folder.isDirectory) { folder.listFiles .filter(_.isFile)