Skip to content

Commit

Permalink
adds case insensitivity to the language filtering (#1432)
Browse files Browse the repository at this point in the history
  • Loading branch information
alexBLR authored Oct 14, 2024
1 parent adac3cf commit 4144922
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 13 deletions.
11 changes: 4 additions & 7 deletions app/services/language_filter.rb
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ class LanguageFilter
def initialize(filter_file_path = Rails.root.join('config', 'language_filter.yml'))
@filter_data = YAML.load_file(filter_file_path)
@terms = @filter_data.keys.sort { |a, b| b.length <=> a.length }
@term_regexes = @terms.map { |term| [Regexp.new(Regexp.escape(term), Regexp::IGNORECASE), @filter_data[term]['replacement']] }.to_h
end

# Checks if the input is valid, i.e., doesn't need replacement.
Expand All @@ -19,11 +20,7 @@ def initialize(filter_file_path = Rails.root.join('config', 'language_filter.yml
def valid?(input)
return true if input.blank?

@terms.each do |term|
return false if input.include?(term)
end

true
@term_regexes.keys.none? { |regex| input.match?(regex) }
end

# Gets the filtered version of the input text.
Expand All @@ -35,8 +32,8 @@ def filter(input)

output = input.dup

@terms.each do |term|
output.gsub!(term, @filter_data[term]['replacement']) if output.include?(term)
@term_regexes.each do |regex, replacement|
output.gsub!(regex, replacement)
end

output
Expand Down
8 changes: 4 additions & 4 deletions config/language_filter.yml
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
"Gender identity disorder":
"gender identity disorder":
replacement: "Gender dysphoria"
rationale: "https://docs.google.com/spreadsheets/d/1uqiP5PPKXJt35uFLrG5ytsLbz2kgWXBjG__hyD12KlY/edit?gid=0#gid=0"
"Gender identity disorders":
"gender identity disorders":
replacement: "Gender dysphoria"
"Gender identity disorders in children":
"gender identity disorders in children":
replacement: "Gender dysphoria in children"
"Gender identity disorders in adolescence":
"gender identity disorders in adolescence":
replacement: "Gender dysphoria in adolescence"
"african american gays in literature":
replacement: "African American gay people in literature"
Expand Down
17 changes: 15 additions & 2 deletions spec/services/language_filter_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -22,19 +22,21 @@

it 'returns true when input is valid' do
input = "Georgia"

expect(filter.valid?(input)).to eq(true)
end

it 'returns false when input is invalid' do
input = "Gender identity disorder"
expect(filter.valid?(input)).to eq(false)
end

it 'returns false when input is invalid with different case' do
input = "gEnDeR iDeNtItY dIsOrDeR"
expect(filter.valid?(input)).to eq(false)
end

it 'returns true when input is nil' do
input = nil

expect(filter.valid?(input)).to eq(true)
end
end
Expand All @@ -59,7 +61,12 @@
it 'prioritizes terms with higher length during replacement' do
input = 'Gender identity disorders'
expected = 'Gender dysphoria'
expect(filter.filter(input)).to eq(expected)
end

it 'replaces harmful text with corresponding replacements regardless of case' do
input = 'gEnDeR iDeNtItY dIsOrDeR'
expected = 'Gender dysphoria'
expect(filter.filter(input)).to eq(expected)
end

Expand All @@ -70,6 +77,12 @@
expect(filter.filter(input)).to eq(expected)
end

it 'replaces harmful terms when multiple terms are combined regardless of case' do
input = 'GeNdEr IdEnTiTy DiSoRdErS--United States'
expected = 'Gender dysphoria--United States'
expect(filter.filter(input)).to eq(expected)
end

it 'returns nil when input is nil' do
input = nil
expected = nil
Expand Down

0 comments on commit 4144922

Please sign in to comment.