Skip to content

Commit

Permalink
Content File Score Adjustment
Browse files Browse the repository at this point in the history
  • Loading branch information
abeglova committed Oct 9, 2024
1 parent 2b22483 commit 847fefe
Show file tree
Hide file tree
Showing 8 changed files with 266 additions and 69 deletions.
82 changes: 82 additions & 0 deletions frontends/api/src/generated/v1/api.ts

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -562,6 +562,7 @@ const SearchDisplay: React.FC<SearchDisplayProps> = ({
max_incompleteness_penalty: searchParams.get(
"max_incompleteness_penalty",
),
content_file_score_weight: searchParams.get("content_file_score_weight"),
...requestParams,
aggregations: (facetNames || []).concat([
"resource_category",
Expand Down Expand Up @@ -739,6 +740,26 @@ const SearchDisplay: React.FC<SearchDisplayProps> = ({
the degree of incompleteness. Only affects results if there is a
search term.
</ExplanationContainer>
<AdminTitleContainer>
Content File Score Weight Adjustment
</AdminTitleContainer>
<SliderInput
currentValue={
searchParams.get("content_file_score_weight")
? Number(searchParams.get("content_file_score_weight"))
: 1
}
setSearchParams={setSearchParams}
urlParam="content_file_score_weight"
min={0}
max={1}
step={0.1}
/>
<ExplanationContainer>
Score weight adjustment for content file matches. 1 means no
adjustment. 0 means content file matches are not counted in the
score. Only affects the results if there is a search term.
</ExplanationContainer>
</div>
) : null}
</div>
Expand Down
19 changes: 16 additions & 3 deletions learning_resources_search/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,9 @@ def generate_content_file_text_clause(text):
return wrap_text_clause(text_query)


def generate_learning_resources_text_clause(text, search_mode, slop):
def generate_learning_resources_text_clause(
text, search_mode, slop, content_file_score_weight
):
"""
Return text clause for the query
Expand All @@ -221,6 +223,14 @@ def generate_learning_resources_text_clause(text, search_mode, slop):
if search_mode == "phrase" and slop:
extra_params["slop"] = slop

if content_file_score_weight is not None:
resourcefile_fields = [
f"{field}^{content_file_score_weight}"
for field in RESOURCEFILE_QUERY_FIELDS
]
else:
resourcefile_fields = RESOURCEFILE_QUERY_FIELDS

if text:
text_query = {
"should": [
Expand Down Expand Up @@ -302,7 +312,7 @@ def generate_learning_resources_text_clause(text, search_mode, slop):
"query": {
query_type: {
"query": text,
"fields": RESOURCEFILE_QUERY_FIELDS,
"fields": resourcefile_fields,
**extra_params,
}
},
Expand Down Expand Up @@ -557,7 +567,10 @@ def add_text_query_to_search(search, text, search_params, query_type_query):
text_query = generate_content_file_text_clause(text)
else:
text_query = generate_learning_resources_text_clause(
text, search_params.get("search_mode"), search_params.get("slop")
text,
search_params.get("search_mode"),
search_params.get("slop"),
search_params.get("content_file_score_weight"),
)

yearly_decay_percent = search_params.get("yearly_decay_percent")
Expand Down
146 changes: 83 additions & 63 deletions learning_resources_search/api_test.py

Large diffs are not rendered by default.

7 changes: 4 additions & 3 deletions learning_resources_search/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -387,9 +387,10 @@ class FilterConfig:
]

RESOURCEFILE_QUERY_FIELDS = [
"content",
"title.english^3",
"short_description.english^2",
"content.english",
"title.english",
"content_title.english",
"description.english",
"content_feature_type",
]

Expand Down
10 changes: 10 additions & 0 deletions learning_resources_search/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -456,6 +456,16 @@ class LearningResourcesSearchRequestSerializer(SearchRequestSerializer):
"search term."
),
)
content_file_score_weight = serializers.FloatField(
max_value=1,
min_value=0,
required=False,
allow_null=True,
help_text=(
"Score weight for content file data. 1 is the default."
" 0 means content files are ignored"
),
)


class ContentFileSearchRequestSerializer(SearchRequestSerializer):
Expand Down
2 changes: 2 additions & 0 deletions learning_resources_search/serializers_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -929,6 +929,7 @@ def test_learning_resources_search_request_serializer():
"slop": 2,
"min_score": 0,
"max_incompleteness_penalty": 25,
"content_file_score_weight": 0,
}

cleaned = {
Expand All @@ -955,6 +956,7 @@ def test_learning_resources_search_request_serializer():
"slop": 2,
"min_score": 0,
"max_incompleteness_penalty": 25,
"content_file_score_weight": 0,
}

serialized = LearningResourcesSearchRequestSerializer(data=data)
Expand Down
48 changes: 48 additions & 0 deletions openapi/specs/v1.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2320,6 +2320,16 @@ paths:
description: "The type of certificate \n\n* `micromasters` - Micromasters\
\ Credential\n* `professional` - Professional Certificate\n* `completion`\
\ - Certificate of Completion\n* `none` - No Certificate"
- in: query
name: content_file_score_weight
schema:
type: number
format: double
maximum: 1
minimum: 0
nullable: true
description: Score weight for content file data. 1 is the default. 0 means
content files are ignored
- in: query
name: course_feature
schema:
Expand Down Expand Up @@ -2808,6 +2818,16 @@ paths:
description: "The type of certificate \n\n* `micromasters` - Micromasters\
\ Credential\n* `professional` - Professional Certificate\n* `completion`\
\ - Certificate of Completion\n* `none` - No Certificate"
- in: query
name: content_file_score_weight
schema:
type: number
format: double
maximum: 1
minimum: 0
nullable: true
description: Score weight for content file data. 1 is the default. 0 means
content files are ignored
- in: query
name: course_feature
schema:
Expand Down Expand Up @@ -3321,6 +3341,16 @@ paths:
description: "The type of certificate \n\n* `micromasters` - Micromasters\
\ Credential\n* `professional` - Professional Certificate\n* `completion`\
\ - Certificate of Completion\n* `none` - No Certificate"
- in: query
name: content_file_score_weight
schema:
type: number
format: double
maximum: 1
minimum: 0
nullable: true
description: Score weight for content file data. 1 is the default. 0 means
content files are ignored
- in: query
name: course_feature
schema:
Expand Down Expand Up @@ -3825,6 +3855,16 @@ paths:
description: "The type of certificate \n\n* `micromasters` - Micromasters\
\ Credential\n* `professional` - Professional Certificate\n* `completion`\
\ - Certificate of Completion\n* `none` - No Certificate"
- in: query
name: content_file_score_weight
schema:
type: number
format: double
maximum: 1
minimum: 0
nullable: true
description: Score weight for content file data. 1 is the default. 0 means
content files are ignored
- in: query
name: course_feature
schema:
Expand Down Expand Up @@ -10340,6 +10380,14 @@ components:
An OCW course with completeness = 0 will have this score penalty. Partially
complete courses have a linear penalty proportional to the degree of incompleteness.
Only affects results if there is a search term.
content_file_score_weight:
type: number
format: double
maximum: 1
minimum: 0
nullable: true
description: Score weight for content file data. 1 is the default. 0 means
content files are ignored
source_type:
allOf:
- $ref: '#/components/schemas/SourceTypeEnum'
Expand Down

0 comments on commit 847fefe

Please sign in to comment.