diff --git a/docs/_posts/prabod/2025-06-23-minilm_l6_v2_en.md b/docs/_posts/prabod/2025-06-23-minilm_l6_v2_en.md new file mode 100644 index 00000000000000..046feaad5a900e --- /dev/null +++ b/docs/_posts/prabod/2025-06-23-minilm_l6_v2_en.md @@ -0,0 +1,120 @@ +--- +layout: model +title: MiniLM L6 V2 +author: John Snow Labs +name: minilm_l6_v2 +date: 2025-06-23 +tags: [en, open_source, openvino] +task: Embeddings +language: en +edition: Spark NLP 5.5.1 +spark_version: 3.0 +supported: true +engine: openvino +annotator: MiniLMEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Sentence embeddings using MiniLM. + +MiniLM, a lightweight and efficient sentence embedding model that can generate text embeddings +for various NLP tasks (e.g., classification, retrieval, clustering, text evaluation, etc.) + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/minilm_l6_v2_en_5.5.1_3.0_1750674121132.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/minilm_l6_v2_en_5.5.1_3.0_1750674121132.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +import sparknlp +from sparknlp.base import * +from sparknlp.annotator import * +from pyspark.ml import Pipeline + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") +embeddings = MiniLMEmbeddings.pretrained() \ + .setInputCols(["document"]) \ + .setOutputCol("minilm_embeddings") +embeddingsFinisher = EmbeddingsFinisher() \ + .setInputCols(["minilm_embeddings"]) \ + .setOutputCols("finished_embeddings") \ + .setOutputAsVector(True) +pipeline = Pipeline().setStages([ + documentAssembler, + embeddings, + embeddingsFinisher +]) + +data = spark.createDataFrame([["This is a sample sentence for embedding generation.", + "Another example sentence to demonstrate MiniLM embeddings.", +]]).toDF("text") +result = pipeline.fit(data).transform(data) +result.selectExpr("explode(finished_embeddings) as result").show(5, 80) +``` +```scala +import spark.implicits._ +import com.johnsnowlabs.nlp.base.DocumentAssembler +import com.johnsnowlabs.nlp.annotators.Tokenizer +import com.johnsnowlabs.nlp.embeddings.MiniLMEmbeddings +import com.johnsnowlabs.nlp.EmbeddingsFinisher +import org.apache.spark.ml.Pipeline + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val embeddings = MiniLMEmbeddings.pretrained("minilm_l6_v2", "en") + .setInputCols("document") + .setOutputCol("minilm_embeddings") + +val embeddingsFinisher = new EmbeddingsFinisher() + .setInputCols("minilm_embeddings") + .setOutputCols("finished_embeddings") + .setOutputAsVector(true) + +val pipeline = new Pipeline().setStages(Array( + documentAssembler, + embeddings, + embeddingsFinisher +)) + +val data = Seq("This is a sample sentence for embedding generation.", +"Another example sentence to demonstrate MiniLM embeddings." + +).toDF("text") +val result = pipeline.fit(data).transform(data) + +result.selectExpr("explode(finished_embeddings) as result").show(1, 80) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|minilm_l6_v2| +|Compatibility:|Spark NLP 5.5.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents]| +|Output Labels:|[minilm]| +|Language:|en| +|Size:|17.2 MB| \ No newline at end of file