Content File Score Adjustment

mitodl · Oct 9, 2024 · 847fefe · 847fefe
1 parent 2b22483
commit 847fefe
Show file tree

Hide file tree

Showing 8 changed files with 266 additions and 69 deletions.
diff --git a/frontends/api/src/generated/v1/api.ts b/frontends/api/src/generated/v1/api.ts
diff --git a/frontends/mit-learn/src/page-components/SearchDisplay/SearchDisplay.tsx b/frontends/mit-learn/src/page-components/SearchDisplay/SearchDisplay.tsx
@@ -562,6 +562,7 @@ const SearchDisplay: React.FC<SearchDisplayProps> = ({
       max_incompleteness_penalty: searchParams.get(
         "max_incompleteness_penalty",
       ),
+      content_file_score_weight: searchParams.get("content_file_score_weight"),
       ...requestParams,
       aggregations: (facetNames || []).concat([
         "resource_category",
@@ -739,6 +740,26 @@ const SearchDisplay: React.FC<SearchDisplayProps> = ({
               the degree of incompleteness. Only affects results if there is a
               search term.
             </ExplanationContainer>
+            <AdminTitleContainer>
+              Content File Score Weight Adjustment
+            </AdminTitleContainer>
+            <SliderInput
+              currentValue={
+                searchParams.get("content_file_score_weight")
+                  ? Number(searchParams.get("content_file_score_weight"))
+                  : 1
+              }
+              setSearchParams={setSearchParams}
+              urlParam="content_file_score_weight"
+              min={0}
+              max={1}
+              step={0.1}
+            />
+            <ExplanationContainer>
+              Score weight adjustment for content file matches. 1 means no
+              adjustment. 0 means content file matches are not counted in the
+              score. Only affects the results if there is a search term.
+            </ExplanationContainer>
           </div>
         ) : null}
       </div>

diff --git a/learning_resources_search/api.py b/learning_resources_search/api.py
@@ -199,7 +199,9 @@ def generate_content_file_text_clause(text):
     return wrap_text_clause(text_query)
 
 
-def generate_learning_resources_text_clause(text, search_mode, slop):
+def generate_learning_resources_text_clause(
+    text, search_mode, slop, content_file_score_weight
+):
     """
     Return text clause for the query
 
@@ -221,6 +223,14 @@ def generate_learning_resources_text_clause(text, search_mode, slop):
         if search_mode == "phrase" and slop:
             extra_params["slop"] = slop
 
+    if content_file_score_weight is not None:
+        resourcefile_fields = [
+            f"{field}^{content_file_score_weight}"
+            for field in RESOURCEFILE_QUERY_FIELDS
+        ]
+    else:
+        resourcefile_fields = RESOURCEFILE_QUERY_FIELDS
+
     if text:
         text_query = {
             "should": [
@@ -302,7 +312,7 @@ def generate_learning_resources_text_clause(text, search_mode, slop):
                         "query": {
                             query_type: {
                                 "query": text,
-                                "fields": RESOURCEFILE_QUERY_FIELDS,
+                                "fields": resourcefile_fields,
                                 **extra_params,
                             }
                         },
@@ -557,7 +567,10 @@ def add_text_query_to_search(search, text, search_params, query_type_query):
         text_query = generate_content_file_text_clause(text)
     else:
         text_query = generate_learning_resources_text_clause(
-            text, search_params.get("search_mode"), search_params.get("slop")
+            text,
+            search_params.get("search_mode"),
+            search_params.get("slop"),
+            search_params.get("content_file_score_weight"),
         )
 
     yearly_decay_percent = search_params.get("yearly_decay_percent")

diff --git a/learning_resources_search/api_test.py b/learning_resources_search/api_test.py
diff --git a/learning_resources_search/constants.py b/learning_resources_search/constants.py
@@ -387,9 +387,10 @@ class FilterConfig:
 ]
 
 RESOURCEFILE_QUERY_FIELDS = [
-    "content",
-    "title.english^3",
-    "short_description.english^2",
+    "content.english",
+    "title.english",
+    "content_title.english",
+    "description.english",
     "content_feature_type",
 ]
 

diff --git a/learning_resources_search/serializers.py b/learning_resources_search/serializers.py
@@ -456,6 +456,16 @@ class LearningResourcesSearchRequestSerializer(SearchRequestSerializer):
             "search term."
         ),
     )
+    content_file_score_weight = serializers.FloatField(
+        max_value=1,
+        min_value=0,
+        required=False,
+        allow_null=True,
+        help_text=(
+            "Score weight for content file data.  1 is the default."
+            " 0 means content files are ignored"
+        ),
+    )
 
 
 class ContentFileSearchRequestSerializer(SearchRequestSerializer):

diff --git a/learning_resources_search/serializers_test.py b/learning_resources_search/serializers_test.py
@@ -929,6 +929,7 @@ def test_learning_resources_search_request_serializer():
         "slop": 2,
         "min_score": 0,
         "max_incompleteness_penalty": 25,
+        "content_file_score_weight": 0,
     }
 
     cleaned = {
@@ -955,6 +956,7 @@ def test_learning_resources_search_request_serializer():
         "slop": 2,
         "min_score": 0,
         "max_incompleteness_penalty": 25,
+        "content_file_score_weight": 0,
     }
 
     serialized = LearningResourcesSearchRequestSerializer(data=data)

diff --git a/openapi/specs/v1.yaml b/openapi/specs/v1.yaml
@@ -2320,6 +2320,16 @@ paths:
         description: "The type of certificate             \n\n* `micromasters` - Micromasters\
           \ Credential\n* `professional` - Professional Certificate\n* `completion`\
           \ - Certificate of Completion\n* `none` - No Certificate"
+      - in: query
+        name: content_file_score_weight
+        schema:
+          type: number
+          format: double
+          maximum: 1
+          minimum: 0
+          nullable: true
+        description: Score weight for content file data.  1 is the default. 0 means
+          content files are ignored
       - in: query
         name: course_feature
         schema:
@@ -2808,6 +2818,16 @@ paths:
         description: "The type of certificate             \n\n* `micromasters` - Micromasters\
           \ Credential\n* `professional` - Professional Certificate\n* `completion`\
           \ - Certificate of Completion\n* `none` - No Certificate"
+      - in: query
+        name: content_file_score_weight
+        schema:
+          type: number
+          format: double
+          maximum: 1
+          minimum: 0
+          nullable: true
+        description: Score weight for content file data.  1 is the default. 0 means
+          content files are ignored
       - in: query
         name: course_feature
         schema:
@@ -3321,6 +3341,16 @@ paths:
         description: "The type of certificate             \n\n* `micromasters` - Micromasters\
           \ Credential\n* `professional` - Professional Certificate\n* `completion`\
           \ - Certificate of Completion\n* `none` - No Certificate"
+      - in: query
+        name: content_file_score_weight
+        schema:
+          type: number
+          format: double
+          maximum: 1
+          minimum: 0
+          nullable: true
+        description: Score weight for content file data.  1 is the default. 0 means
+          content files are ignored
       - in: query
         name: course_feature
         schema:
@@ -3825,6 +3855,16 @@ paths:
         description: "The type of certificate             \n\n* `micromasters` - Micromasters\
           \ Credential\n* `professional` - Professional Certificate\n* `completion`\
           \ - Certificate of Completion\n* `none` - No Certificate"
+      - in: query
+        name: content_file_score_weight
+        schema:
+          type: number
+          format: double
+          maximum: 1
+          minimum: 0
+          nullable: true
+        description: Score weight for content file data.  1 is the default. 0 means
+          content files are ignored
       - in: query
         name: course_feature
         schema:
@@ -10340,6 +10380,14 @@ components:
             An OCW course with completeness = 0 will have this score penalty. Partially
             complete courses have a linear penalty proportional to the degree of incompleteness.
             Only affects results if there is a search term.
+        content_file_score_weight:
+          type: number
+          format: double
+          maximum: 1
+          minimum: 0
+          nullable: true
+          description: Score weight for content file data.  1 is the default. 0 means
+            content files are ignored
         source_type:
           allOf:
           - $ref: '#/components/schemas/SourceTypeEnum'