diff --git a/package-lock.json b/package-lock.json
index 5fa6eada..34a19ea0 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -4481,7 +4481,7 @@
     },
     "services/data-computer": {
       "name": "ws-data-computer",
-      "version": "2.15.0",
+      "version": "2.16.0",
       "license": "MIT"
     },
     "services/data-termsuite": {
@@ -4490,6 +4490,7 @@
       "license": "MIT"
     },
     "services/data-topcitation": {
+      "name": "ws-data-topcitation",
       "version": "1.0.2",
       "license": "MIT"
     },
diff --git a/services/data-computer/README.md b/services/data-computer/README.md
index 78596844..eb822168 100644
--- a/services/data-computer/README.md
+++ b/services/data-computer/README.md
@@ -1,4 +1,4 @@
-# ws-data-computer@2.15.0
+# ws-data-computer@2.16.0
 
 Le service `data-computer` offre plusieurs services **asynchrones** pour des calculs et de transformations de données simples.
 
@@ -256,3 +256,161 @@ cat input.tar.gz |curl --data-binary @-  -H "X-Hook: https://webhook.site/dce2fe
 
 # When the corpus is processed, get the result
 cat output.json |curl --data-binary @- "http://localhost:31976/v1/retrieve" > output.tar.gz
+```
+
+
+### v1/corpus-similarity
+
+Compare des petits documents (Titre, phrases, petits *abstracts*) entre eux, et renvoie pour chaque document les documents qui lui sont similaires. 
+Il est conseillé d'utiliser cette route avec au moins 6-7 documents dans le corpus.
+
+Il existe un paramètre optionnel `output` pour choisir le type de sortie en fonction de sa valeur:
+- 0 (par défaut) : l'algorithme choisit automatiquement les documents les plus similaires à chaque document
+- 1 : l'algorithme renvoie pour chaque document tous les documents, classés par ordre de proximité (les plus similaires en premier)
+- *n* (avec *n* un entier plus grand que 1) : l'algorithme renvoie pour chaque document les *n* documents les plus proches, classés par ordre de proximité (les plus similaires en premier), ainsi que le score de similarité associé à chaque document.
+par exemple en utilisant `example-similarity-json.tar.gz` avec le paramètre output par défaut (0), obtiendra :
+
+> **Attention** : Le champ ID est utilisé comme référence de chaque document. 
+
+par exemple en utilisant `example-similarity-json.tar.gz` avec le paramètre output par défaut (0), obtiendra :
+
+```json
+[
+  {
+    "id": "Titre 1",
+    "value": {
+      "similarity": [
+        "Titre 4",
+        "Titre 2"
+      ],
+      "score": [
+        0.9411764705882353,
+        0.9349112426035503
+      ]
+    }
+  },
+  {
+    "id": "Titre 2",
+    "value": {
+      "similarity": [
+        "Titre 1"
+      ],
+      "score": [
+        0.9349112426035503
+      ]
+    }
+  },
+  {
+    "id": "Titre 3",
+    "value": {
+      "similarity": [
+        "Titre 4"
+      ],
+      "score": [
+        0.8888888888888888
+      ]
+    }
+  },
+  {
+    "id": "Titre 4",
+    "value": {
+      "similarity": [
+        "Titre 1"
+      ],
+      "score": [
+        0.9411764705882353
+      ]
+    }
+  }
+]
+```
+
+Avec le paramètre output=3, on obtiendra : 
+
+```json
+[
+  {
+    "id": "Titre 1",
+    "value": {
+      "similarity": [
+        "Titre 4",
+        "Titre 2",
+        "Titre 3"
+      ],
+      "score": [
+        0.9411764705882353,
+        0.9349112426035503,
+        0.8757396449704142
+      ]
+    }
+  },
+  {
+    "id": "Titre 2",
+    "value": {
+      "similarity": [
+        "Titre 1",
+        "Titre 4",
+        "Titre 3"
+      ],
+      "score": [
+        0.9349112426035503,
+        0.8888888888888888,
+        0.8651685393258427
+      ]
+    }
+  },
+  {
+    "id": "Titre 3",
+    "value": {
+      "similarity": [
+        "Titre 4",
+        "Titre 1",
+        "Titre 2"
+      ],
+      "score": [
+        0.8888888888888888,
+        0.8757396449704142,
+        0.8651685393258427
+      ]
+    }
+  },
+  {
+    "id": "Titre 4",
+    "value": {
+      "similarity": [
+        "Titre 1",
+        "Titre 3",
+        "Titre 2"
+      ],
+      "score": [
+        0.9411764705882353,
+        0.8888888888888888,
+        0.8888888888888888
+      ]
+    }
+  }
+]
+```
+
+#### Paramètre(s) URL
+
+| nom                 | description                                 |
+| ------------------- | ------------------------------------------- |
+| indent (true/false) | Indenter le résultat renvoyer immédiatement |
+| output  (0,1,n)     | Choix de la sortie                          |
+
+#### Entête(s) HTTP
+
+| nom    | description                                                  |
+| ------ | ------------------------------------------------------------ |
+| X-Hook | URL à appeler quand le résultat sera disponible (facultatif) |
+
+#### Exemple en ligne de commande
+
+
+```bash
+# Send data for batch processing
+cat input.tar.gz |curl --data-binary @-  -H "X-Hook: https://webhook.site/dce2fefa-9a72-4f76-96e5-059405a04f6c" "http://localhost:31976/v1/similarity" > output.json
+
+# When the corpus is processed, get the result
+cat output.json |curl --data-binary @- "http://localhost:31976/v1/retrieve" > output.tar.gz
diff --git a/services/data-computer/example-similarity-json.tar.gz b/services/data-computer/example-similarity-json.tar.gz
new file mode 100644
index 00000000..5316bb15
Binary files /dev/null and b/services/data-computer/example-similarity-json.tar.gz differ
diff --git a/services/data-computer/examples.http b/services/data-computer/examples.http
index 27886c8b..d116eef2 100644
--- a/services/data-computer/examples.http
+++ b/services/data-computer/examples.http
@@ -123,3 +123,14 @@ X-Webhook-Success: https://webhook.site/69300b22-a251-4c16-9905-f7ba218ae7e9
 X-Webhook-Failure: https://webhook.site/69300b22-a251-4c16-9905-f7ba218ae7e9
 
 < ./example-json.tar.gz
+
+
+###
+# @name v1CorpusSimilarity
+POST {{host}}/v1/corpus-similarity HTTP/1.1
+Content-Type: application/x-tar
+X-Webhook-Success: https://webhook.site/69300b22-a251-4c16-9905-f7ba218ae7e9
+X-Webhook-Failure: https://webhook.site/69300b22-a251-4c16-9905-f7ba218ae7e9
+
+< ./example-similarity-json.tar.gz
+
diff --git a/services/data-computer/package.json b/services/data-computer/package.json
index 65c124f5..ac65a822 100644
--- a/services/data-computer/package.json
+++ b/services/data-computer/package.json
@@ -1,35 +1,35 @@
 {
-    "private": true,
-    "name": "ws-data-computer",
-    "version": "2.15.0",
-    "description": "Calculs sur fichier corpus compressé",
-    "repository": {
-        "type": "git",
-        "url": "git+https://github.com/Inist-CNRS/web-services.git"
-    },
-    "keywords": [
-        "ezmaster"
-    ],
-    "author": " <lucas.anki@inist.fr>",
-    "license": "MIT",
-    "bugs": {
-        "url": "https://github.com/Inist-CNRS/web-services/issues"
-    },
-    "homepage": "https://github.com/Inist-CNRS/web-services/#readme",
-    "scripts": {
-        "version:insert:readme": "sed -i \"s#\\(${npm_package_name}.\\)\\([\\.a-z0-9]\\+\\)#\\1${npm_package_version}#g\" README.md && git add README.md",
-        "version:insert:swagger": "sed -i \"s/\\\"version\\\": \\\"[0-9]\\+.[0-9]\\+.[0-9]\\+\\\"/\\\"version\\\": \\\"${npm_package_version}\\\"/g\" swagger.json && git add swagger.json",
-        "version:insert": "npm run version:insert:readme && npm run version:insert:swagger",
-        "version:commit": "git commit -a -m \"release ${npm_package_name}@${npm_package_version}\"",
-        "version:tag": "git tag \"${npm_package_name}@${npm_package_version}\" -m \"${npm_package_name}@${npm_package_version}\"",
-        "version:push": "git push && git push --tags",
-        "version": "npm run version:insert && npm run version:commit && npm run version:tag",
-        "postversion": "npm run version:push",
-        "build:dev": "docker build -t cnrsinist/${npm_package_name}:latest .",
-        "start:dev": "npm run build:dev && docker run --name dev --rm --detach -p 31976:31976 cnrsinist/${npm_package_name}:latest",
-        "stop:dev": "docker stop dev",
-        "build": "docker build -t cnrsinist/${npm_package_name}:${npm_package_version} .",
-        "start": "docker run --rm -p 31976:31976 cnrsinist/${npm_package_name}:${npm_package_version}",
-        "publish": "docker push cnrsinist/${npm_package_name}:${npm_package_version}"
-    }
+  "private": true,
+  "name": "ws-data-computer",
+  "version": "2.16.0",
+  "description": "Calculs sur fichier corpus compressé",
+  "repository": {
+    "type": "git",
+    "url": "git+https://github.com/Inist-CNRS/web-services.git"
+  },
+  "keywords": [
+    "ezmaster"
+  ],
+  "author": " <lucas.anki@inist.fr>",
+  "license": "MIT",
+  "bugs": {
+    "url": "https://github.com/Inist-CNRS/web-services/issues"
+  },
+  "homepage": "https://github.com/Inist-CNRS/web-services/#readme",
+  "scripts": {
+    "version:insert:readme": "sed -i \"s#\\(${npm_package_name}.\\)\\([\\.a-z0-9]\\+\\)#\\1${npm_package_version}#g\" README.md && git add README.md",
+    "version:insert:swagger": "sed -i \"s/\\\"version\\\": \\\"[0-9]\\+.[0-9]\\+.[0-9]\\+\\\"/\\\"version\\\": \\\"${npm_package_version}\\\"/g\" swagger.json && git add swagger.json",
+    "version:insert": "npm run version:insert:readme && npm run version:insert:swagger",
+    "version:commit": "git commit -a -m \"release ${npm_package_name}@${npm_package_version}\"",
+    "version:tag": "git tag \"${npm_package_name}@${npm_package_version}\" -m \"${npm_package_name}@${npm_package_version}\"",
+    "version:push": "git push && git push --tags",
+    "version": "npm run version:insert && npm run version:commit && npm run version:tag",
+    "postversion": "npm run version:push",
+    "build:dev": "docker build -t cnrsinist/${npm_package_name}:latest .",
+    "start:dev": "npm run build:dev && docker run --name dev --rm --detach -p 31976:31976 cnrsinist/${npm_package_name}:latest",
+    "stop:dev": "docker stop dev",
+    "build": "docker build -t cnrsinist/${npm_package_name}:${npm_package_version} .",
+    "start": "docker run --rm -p 31976:31976 cnrsinist/${npm_package_name}:${npm_package_version}",
+    "publish": "docker push cnrsinist/${npm_package_name}:${npm_package_version}"
+  }
 }
diff --git a/services/data-computer/swagger.json b/services/data-computer/swagger.json
index 910be1e6..f5962c80 100644
--- a/services/data-computer/swagger.json
+++ b/services/data-computer/swagger.json
@@ -3,7 +3,7 @@
     "info": {
         "title": "data-computer - Calculs sur fichier corpus compressé",
         "summary": "Calculs sur un corpus compressé",
-        "version": "2.15.0",
+        "version": "2.16.0",
         "termsOfService": "https://services.istex.fr/",
         "contact": {
             "name": "Inist-CNRS",
@@ -15,7 +15,7 @@
             "x-comment": "Will be automatically completed by the ezs server."
         },
         {
-            "url": "http://vptdmjobs.intra.inist.fr:49191/",
+            "url": "http://vptdmjobs.intra.inist.fr:49196/",
             "description": "Latest version for production",
             "x-profil": "Standard"
         }
@@ -30,4 +30,4 @@
             }
         }
     ]
-}
+}
\ No newline at end of file
diff --git a/services/data-computer/tests.hurl b/services/data-computer/tests.hurl
index 56aa93e4..29416a7b 100644
--- a/services/data-computer/tests.hurl
+++ b/services/data-computer/tests.hurl
@@ -72,8 +72,92 @@ delay: 2000
 HTTP 200
 [{"id":"#1","value":{"sample":2,"frequency":0.6666666666666666,"percentage":null,"sum":0,"count":5,"min":0,"max":0,"mean":0,"range":0,"midrange":0,"variance":0,"deviation":0,"population":3,"input":"a"}},{"id":"#2","value":{"sample":2,"frequency":0.6666666666666666,"percentage":null,"sum":0,"count":5,"min":0,"max":0,"mean":0,"range":0,"midrange":0,"variance":0,"deviation":0,"population":3,"input":"b"}},{"id":"#3","value":{"sample":1,"frequency":0.3333333333333333,"percentage":null,"sum":0,"count":5,"min":0,"max":0,"mean":0,"range":0,"midrange":0,"variance":0,"deviation":0,"population":3,"input":"c"}},{"id":"#4","value":{"sample":2,"frequency":0.6666666666666666,"percentage":null,"sum":0,"count":5,"min":0,"max":0,"mean":0,"range":0,"midrange":0,"variance":0,"deviation":0,"population":3,"input":"a"}},{"id":"#5","value":{"sample":2,"frequency":0.6666666666666666,"percentage":null,"sum":0,"count":5,"min":0,"max":0,"mean":0,"range":0,"midrange":0,"variance":0,"deviation":0,"population":3,"input":"b"}}]
 
-#
-# group
+################################ Test for Similarity ################################
+
+POST {{host}}/v1/corpus-similarity
+content-type: application/x-tar
+x-hook: https://webhook.site/69300b22-a251-4c16-9905-f7ba218ae7e9
+file,example-similarity-json.tar.gz;
+
+HTTP 200
+# Capture the computing token
+[Captures]
+computing_token: jsonpath "$[0].value"
+[Asserts]
+variable "computing_token" exists
+
+# There should be a waiting time, representing the time taken to process data.
+# Fortunately, as the data is sparse, and the computing time is small,
+# the need is small.
+
+# Version 4.1.0 of hurl added a delay option, which value is milliseconds.
+# https://hurl.dev/blog/2023/09/24/announcing-hurl-4.1.0.html#add-delay-between-requests
+
+POST {{host}}/v1/retrieve-json?indent=true
+content-type: application/json
+[Options]
+delay: 1000
+```
+[
+	{
+		"value":"{{computing_token}}"
+	}
+]
+```
+
+HTTP 200
+[{
+    "id": "Titre 1",
+    "value": {
+        "similarity": [
+            "Titre 4",
+            "Titre 2"
+        ],
+        "score": [
+            0.9411764705882353,
+            0.9349112426035503
+        ]
+    }
+},
+{
+    "id": "Titre 2",
+    "value": {
+        "similarity": [
+            "Titre 1"
+        ],
+        "score": [
+            0.9349112426035503
+        ]
+    }
+},
+{
+    "id": "Titre 3",
+    "value": {
+        "similarity": [
+            "Titre 4"
+        ],
+        "score": [
+            0.8888888888888888
+        ]
+    }
+},
+{
+    "id": "Titre 4",
+    "value": {
+        "similarity": [
+            "Titre 1"
+        ],
+        "score": [
+            0.9411764705882353
+        ]
+    }
+}]
+
+
+# TODO: ajouter les deux autres routes (v1GraphSegment, v1Lda)
+# TODO: ajouter la route rapido
+
+##################################### group-by ######################
 POST {{host}}/v1/group-by
 content-type: application/gzip
 x-hook: https://webhook.site/69300b22-a251-4c16-9905-f7ba218ae7e9
@@ -109,4 +193,3 @@ HTTP 200
 [{"id":"#1","value":["#1","#4"]},{"id":"#4","value":["#1","#4"]},{"id":"#2","value":["#2","#5"]},{"id":"#5","value":["#2","#5"]},{"id":"#3","value":["#3"]}]
 
 #
-# TODO: ajouter les deux autres routes (v1GraphSegment, v1Lda)
diff --git a/services/data-computer/v1/corpus-similarity.ini b/services/data-computer/v1/corpus-similarity.ini
new file mode 100644
index 00000000..c50fceeb
--- /dev/null
+++ b/services/data-computer/v1/corpus-similarity.ini
@@ -0,0 +1,65 @@
+# OpenAPI Documentation - JSON format (dot notation)
+mimeType = application/json
+
+post.operationId = post-v1-corpus-similarity
+post.description = Web service de calcul de similarité entre documents d un corpus
+post.summary = 3 sorties sont disponibles
+post.tags.0 = data-computer
+post.requestBody.content.application/x-tar.schema.type = string
+post.requestBody.content.application/x-tar.schema.format = binary
+post.requestBody.required = true
+post.responses.default.description = Informations permettant de récupérer les données le moment venu
+post.parameters.0.description = Indenter le JSON résultant
+post.parameters.0.in = query
+post.parameters.0.name = indent
+post.parameters.0.schema.type = boolean
+post.parameters.1.description = URL pour signaler que le traitement est terminé
+post.parameters.1.in = header
+post.parameters.1.name = X-Webhook-Success
+post.parameters.1.schema.type = string
+post.parameters.1.schema.format = uri
+post.parameters.1.required = false
+post.parameters.2.description = URL pour signaler que le traitement a échoué
+post.parameters.2.in = header
+post.parameters.2.name = X-Webhook-Failure
+post.parameters.2.schema.type = string
+post.parameters.2.schema.format = uri
+post.parameters.2.required = false
+
+post.parameters.3.in = query
+post.parameters.3.name = output
+post.parameters.3.schema.type = int
+post.parameters.3.description = Choix du nombre de documents similaires à afficher dans la sortie : 0 pour automatique, 1 pour tout afficher, n'importe quel autre nombre pour afficher au maximum ce nombre d'élements. 
+
+
+[env]
+path = generator
+value = corpus-similarity
+
+[use]
+plugin = basics
+plugin = spawn
+
+# Step 1 (générique): Charger le fichier corpus
+[delegate]
+file = charger.cfg
+
+# Step 2 (générique): Traiter de manière asynchrone les items reçus
+[fork]
+standalone = true
+logger = logger.cfg
+
+# Step 2.1 (spécifique): Lancer un calcul sur tous les items reçus
+[fork/exec]
+# command should be executable !
+command = ./v1/corpus-similarity.py
+args = fix('-p')
+args = env('output', "0")
+
+# Step 2.2 (générique): Enregistrer le résultat et signaler que le traitement est fini
+[fork/delegate]
+file = recorder.cfg
+
+# Step 3 : Renvoyer immédiatement un seul élément indiquant comment récupérer le résultat quand il sera prêt
+[delegate]
+file = recipient.cfg
diff --git a/services/data-computer/v1/corpus-similarity.py b/services/data-computer/v1/corpus-similarity.py
new file mode 100755
index 00000000..c5dc00cd
--- /dev/null
+++ b/services/data-computer/v1/corpus-similarity.py
@@ -0,0 +1,57 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+import json
+import sys
+from difflib import SequenceMatcher
+import numpy as np
+
+def get_ratio(data):
+    currentTitle = data['value']
+    currentId = data['id']
+    idList = []
+    ratioList = []
+
+    for _,line_cmp in enumerate(all_data):
+        data_cmp = line_cmp[0]
+        id,title = data_cmp["id"],data_cmp["value"]
+        if currentId == id:
+            continue
+        ratio = SequenceMatcher(None, currentTitle, title).ratio()
+        idList.append(id)
+        ratioList.append(ratio)
+
+        #Sort both lists according to ratioList
+        ratioList,idList = (list(t) for t in zip(*sorted(zip(ratioList, idList),reverse=True)))
+
+    return currentId, ratioList,idList
+
+# load all datas
+all_data = []
+for line in sys.stdin:
+    data=json.loads(line)
+    all_data.append(data)
+
+
+output = int(sys.argv[sys.argv.index('-p') + 1] if '-p' in sys.argv else 0)
+
+for line in all_data:
+    id, ratioList, idList = get_ratio(line[0])
+    if output == 0:
+        if ratioList[0] < 0.6:
+            sim = []
+            score = []
+        else:
+            diff = -np.diff(ratioList)
+            mean = np.mean(diff)
+            argmx = np.argmax(diff-mean)
+            sim = idList[:argmx+1]
+            score = ratioList[:argmx+1]
+    elif output == 1:
+        sim = idList
+        score = ratioList
+    else:
+        sim = idList[:min(len(idList),output)]
+        score = ratioList[:min(len(idList),output)]
+    
+    sys.stdout.write(json.dumps({"id":id,"value":{"similarity":sim, "score":score}}))
+    sys.stdout.write('\n')