Skip to content

Commit

Permalink
Merge branch 'develop' into CV2-6039-add-channel-to-articles
Browse files Browse the repository at this point in the history
# Conflicts:
#	app/models/team.rb
  • Loading branch information
jayjay-w committed Feb 27, 2025
2 parents cf6adcc + 0e70b5e commit 3be5a80
Show file tree
Hide file tree
Showing 17 changed files with 396 additions and 82 deletions.
2 changes: 2 additions & 0 deletions .github/codeql/codeql-config.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
paths-ignore:
- "data/**" # Ignore the localization data directory, which contains unusual characters that cause parsing to fail.
104 changes: 104 additions & 0 deletions .github/workflows/codeql.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
# For most projects, this workflow file will not need changing; you simply need
# to commit it to your repository.
#
# You may wish to alter this file to override the set of languages analyzed,
# or to provide custom queries or build logic.
#
# ******** NOTE ********
# We have attempted to detect the languages in your repository. Please check
# the `language` matrix defined below to confirm you have the correct set of
# supported CodeQL languages.
#
name: "CodeQL Advanced"

on:
push:
branches: [ "develop", "master" ]
pull_request:
branches: [ "develop", "master" ]
schedule:
- cron: '31 22 * * 0'

jobs:
analyze:
name: Analyze (${{ matrix.language }})
# Runner size impacts CodeQL analysis time. To learn more, please see:
# - https://gh.io/recommended-hardware-resources-for-running-codeql
# - https://gh.io/supported-runners-and-hardware-resources
# - https://gh.io/using-larger-runners (GitHub.com only)
# Consider using larger runners or machines with greater resources for possible analysis time improvements.
runs-on: ${{ (matrix.language == 'swift' && 'macos-latest') || 'ubuntu-latest' }}
permissions:
# required for all workflows
security-events: write

# required to fetch internal or private CodeQL packs
packages: read

# only required for workflows in private repositories
actions: read
contents: read

strategy:
fail-fast: false
matrix:
include:
- language: javascript-typescript
build-mode: none
- language: python
build-mode: none
- language: ruby
build-mode: none
# CodeQL supports the following values keywords for 'language': 'c-cpp', 'csharp', 'go', 'java-kotlin', 'javascript-typescript', 'python', 'ruby', 'swift'
# Use `c-cpp` to analyze code written in C, C++ or both
# Use 'java-kotlin' to analyze code written in Java, Kotlin or both
# Use 'javascript-typescript' to analyze code written in JavaScript, TypeScript or both
# To learn more about changing the languages that are analyzed or customizing the build mode for your analysis,
# see https://docs.github.com/en/code-security/code-scanning/creating-an-advanced-setup-for-code-scanning/customizing-your-advanced-setup-for-code-scanning.
# If you are analyzing a compiled language, you can modify the 'build-mode' for that language to customize how
# your codebase is analyzed, see https://docs.github.com/en/code-security/code-scanning/creating-an-advanced-setup-for-code-scanning/codeql-code-scanning-for-compiled-languages
steps:
- name: Checkout repository
uses: actions/checkout@v4

# Add any setup steps before running the `github/codeql-action/init` action.
# This includes steps like installing compilers or runtimes (`actions/setup-node`
# or others). This is typically only required for manual builds.
# - name: Setup runtime (example)
# uses: actions/setup-example@v1

# Initializes the CodeQL tools for scanning.
- name: Initialize CodeQL
uses: github/codeql-action/init@v3
with:
languages: ${{ matrix.language }}
build-mode: ${{ matrix.build-mode }}
config-file: ./.github/codeql/codeql-config.yml
debug: true
# If you wish to specify custom queries, you can do so here or in a config file.
# By default, queries listed here will override any specified in a config file.
# Prefix the list here with "+" to use these queries and those in the config file.

# For more details on CodeQL's query packs, refer to: https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs
# queries: security-extended,security-and-quality

# If the analyze step fails for one of the languages you are analyzing with
# "We were unable to automatically build your code", modify the matrix above
# to set the build mode to "manual" for that language. Then modify this step
# to build your code.
# ℹ️ Command-line programs to run using the OS shell.
# 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun
- if: matrix.build-mode == 'manual'
shell: bash
run: |
echo 'If you are using a "manual" build mode for one or more of the' \
'languages you are analyzing, replace this with the commands to build' \
'your code, for example:'
echo ' make bootstrap'
echo ' make release'
exit 1
- name: Perform CodeQL Analysis
uses: github/codeql-action/analyze@v3
with:
category: "/language:${{matrix.language}}"
16 changes: 8 additions & 8 deletions .gitlab-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,10 @@ deploy_qa:
script:
- pip install setuptools==68.0.0
- pip install urllib3==2.0.6
- pip install botocore==1.31.62
- pip install boto3==1.28.62
- pip install ecs-deploy==1.15.0
- pip install awscli==1.31.13
- pip install botocore==1.33.13
- pip install boto3==1.33.13
- pip install ecs-deploy==1.14.0
- pip install awscli==1.29.59
- alias aws='docker run -e AWS_ACCESS_KEY_ID -e AWS_SECRET_ACCESS_KEY -e AWS_DEFAULT_REGION --rm amazon/aws-cli'
- aws ssm get-parameters-by-path --region $AWS_DEFAULT_REGION --path /qa/check-api/ --recursive --with-decryption --output text --query "Parameters[].[Name]" | sed -E 's#/qa/check-api/##' > env.qa.names
- for NAME in `cat env.qa.names`; do echo -n "-s qa-check-api-migration $NAME /qa/check-api/$NAME " >> qa-check-api-migration.env.args; done
Expand Down Expand Up @@ -109,10 +109,10 @@ deploy_live:
script:
- pip install setuptools==68.0.0
- pip install urllib3==2.0.6
- pip install botocore==1.31.62
- pip install boto3==1.28.62
- pip install ecs-deploy==1.15.0
- pip install awscli==1.31.13
- pip install botocore==1.33.13
- pip install boto3==1.33.13
- pip install ecs-deploy==1.14.0
- pip install awscli==1.29.59
- alias aws='docker run -e AWS_ACCESS_KEY_ID -e AWS_SECRET_ACCESS_KEY -e AWS_DEFAULT_REGION --rm amazon/aws-cli'
- aws ssm get-parameters-by-path --region $AWS_DEFAULT_REGION --path /live/check-api/ --recursive --with-decryption --output text --query "Parameters[].[Name]" | sed -E 's#/live/check-api/##' > env.live.names
- for NAME in `cat env.live.names`; do echo -n "-s live-check-api-migration $NAME /live/check-api/$NAME " >> live-check-api-migration.env.args; done
Expand Down
2 changes: 1 addition & 1 deletion .rubocop.yml
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,7 @@ Metrics/CyclomaticComplexity:
A complexity metric that is strongly correlated to the number
of test cases needed to validate a method.
Enabled: true
Max: 13
Max: 14

Metrics/LineLength:
Description: 'Limit lines to 80 characters.'
Expand Down
22 changes: 15 additions & 7 deletions app/graph/types/team_type.rb
Original file line number Diff line number Diff line change
Expand Up @@ -304,9 +304,10 @@ def tipline_requests(from_timestamp:, to_timestamp:)
end

field :articles, ::ArticleUnion.connection_type, null: true do
argument :article_type, GraphQL::Types::String, required: true, camelize: false
argument :article_type, GraphQL::Types::String, required: false, camelize: false

# Sort and pagination
argument :limit, GraphQL::Types::Int, required: false, default_value: 10
argument :offset, GraphQL::Types::Int, required: false, default_value: 0
argument :sort, GraphQL::Types::String, required: false, default_value: 'title'
argument :sort_type, GraphQL::Types::String, required: false, camelize: false, default_value: 'ASC'
Expand All @@ -332,13 +333,18 @@ def articles(**args)
sort = args[:sort].to_s
order = [:title, :language, :updated_at, :id].include?(sort.downcase.to_sym) ? sort.downcase.to_sym : :title
order_type = args[:sort_type].to_s.downcase.to_sym == :desc ? :desc : :asc
articles = Explainer.none
if args[:article_type] == 'explainer'
articles = object.filtered_explainers(args)
elsif args[:article_type] == 'fact-check'
articles = object.filtered_fact_checks(args)
if args[:article_type].blank?
limit = context[:current_arguments][:first] || args[:limit]
object.filtered_articles(args, limit.to_i, args[:offset].to_i, order, order_type)
else
articles = nil
if args[:article_type] == 'explainer'
articles = object.filtered_explainers(args)
elsif args[:article_type] == 'fact-check'
articles = object.filtered_fact_checks(args)
end
articles.offset(args[:offset].to_i).order(order => order_type)
end
articles.offset(args[:offset].to_i).order(order => order_type)
end

field :articles_count, GraphQL::Types::Int, null: true do
Expand Down Expand Up @@ -404,6 +410,8 @@ def statistics(period:, language: nil, platform: nil)
TeamStatistics.new(object, period, language, platform)
end

field :statistics_platforms, [GraphQL::Types::String], null: true, description: 'List of tipline platforms for which we have data.'

field :bot_query, [TiplineSearchResultType], null: true do
argument :search_text, GraphQL::Types::String, required: true
argument :threshold, GraphQL::Types::Float, required: false
Expand Down
2 changes: 1 addition & 1 deletion app/models/bot/alegre.rb
Original file line number Diff line number Diff line change
Expand Up @@ -693,7 +693,7 @@ def self.relationship_model_not_allowed(relationship_model)

def self.report_exception_if_bad_relationship(relationship, pm_id_scores, relationship_type)
if relationship.model.nil? || relationship.weight.nil? || relationship.source_field.nil? || relationship.target_field.nil? || self.relationship_model_not_allowed(relationship.model)
CheckSentry.notify(Bot::Alegre::Error.new("[Alegre] Bad relationship was stored without required metadata"), **{trace: Thread.current.backtrace.join("\n"), relationship: relationship.attributes, relationship_type: relationship_type, pm_id_scores: pm_id_scores})
CheckSentry.notify(Bot::Alegre::Error.new("[Alegre] Bad relationship with ID [#{relationship.id}] was stored without required metadata"), **{trace: Thread.current.backtrace.join("\n"), relationship: relationship.attributes, relationship_type: relationship_type, pm_id_scores: pm_id_scores})
end
end

Expand Down
54 changes: 26 additions & 28 deletions app/models/bot/tagger.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,16 @@

class Bot::Tagger < BotUser
check_settings

class Error < ::StandardError
end

def self.get_tag_text(tag_id, auto_tag_prefix, ignore_autotags)
tag=TagText.find_by_id(tag_id)&.text
if tag.nil? || (ignore_autotags && tag[0]==auto_tag_prefix)
tag = TagText.find_by_id(tag_id)&.text
if tag.nil? || (ignore_autotags && tag[0] == auto_tag_prefix)
return nil
else
tag[0]==auto_tag_prefix ? tag[1..] : tag
tag[0] == auto_tag_prefix ? tag[1..] : tag
end
end

Expand All @@ -23,62 +24,60 @@ def self.log(message, pm_id = nil, level = Logger::INFO)
def self.run(body)
self.log("Received event with body of #{body}", nil, Logger::INFO)
if CheckConfig.get('alegre_host').blank?
self.log("Skipping events because `alegre_host` config is blank", nil, Logger::DEBUG)
self.log("Skipping events because alegre_host config is blank", nil, Logger::DEBUG)
return false
end

handled = false
pm = nil
begin
settings=JSON.parse(body[:settings])
auto_tag_prefix=settings["auto_tag_prefix"]
threshold=settings["threshold"]/100.0
ignore_autotags=settings["ignore_autotags"]
settings = JSON.parse(body[:settings])
auto_tag_prefix = settings["auto_tag_prefix"]
threshold = settings["threshold"] / 100.0
ignore_autotags = settings["ignore_autotags"]
pm = ProjectMedia.where(id: body.dig(:data, :dbid)).last
if body.dig(:event) == 'create_project_media' && !pm.nil?
self.log("This item was just created, processing...", pm.id, Logger::INFO)
# Search all text fields for all items in the workspace using only the cofigured vector model

search_texts = ['original_title', 'original_description',
'extracted_text', 'transcription', 'claim_description_content'
].map{|field| pm.send(field) if !pm.nil? && pm.respond_to?(field)}
# Search all text fields for all items in the workspace using only the configured vector model
search_texts = ['original_title', 'original_description', 'extracted_text', 'transcription', 'claim_description_content'].map{ |field| pm.send(field) if !pm.nil? && pm.respond_to?(field) }

# Remove duplicate and nil values
search_texts = search_texts.uniq.compact.reject{|q| q.length==0}
search_texts = search_texts.uniq.compact.reject{ |q| q.length == 0 }
self.log("Query values are: #{search_texts}", pm.id, Logger::INFO)

# Search for each text field in `search_texts`
# Search for each text field in search_texts
# Do not use Elasticsearch. The threshold to use comes from the Tagger bot settings.
# Method signature: get_items_with_similar_text(pm, fields, threshold, query_text, models, team_ids = [pm&.team_id])
results=[]
results = []
search_texts.each do |query|
results<<Bot::Alegre.get_items_with_similar_text(pm, Bot::Alegre::ALL_TEXT_SIMILARITY_FIELDS,
[{ value: threshold }], query, [Bot::Alegre.matching_model_to_use(pm.team_id)].flatten.reject{|m| m==Bot::Alegre::ELASTICSEARCH_MODEL})
#self.debug("Results (#{query}): #{results}", pm.id, Logger::INFO)
results << Bot::Alegre.get_items_with_similar_text(pm, Bot::Alegre::ALL_TEXT_SIMILARITY_FIELDS, [{ value: threshold }], query, [Bot::Alegre.matching_model_to_use(pm.team_id)].flatten.reject{ |m| m == Bot::Alegre::ELASTICSEARCH_MODEL })
# self.debug("Results (#{query}): #{results}", pm.id, Logger::INFO)
end
# Combine the list of hashes into one hash
results=results.reduce({}, :merge!)
results = results.reduce({}, :merge!)
self.log("#{results.length} nearest neighbors #{results.keys()}", pm.id, Logger::INFO)
self.log("Results: #{results}", pm.id, Logger::INFO)

# For each nearest neighbor, get the tags.
tag_counts=results.map{|nn_pm,_| ProjectMedia.find(nn_pm).get_annotations('tag')}.flatten
tag_counts = results.map{ |nn_pm, _| ProjectMedia.find(nn_pm).get_annotations('tag') }.flatten
# Transform from tag objects to strings
# .compact removes any nil values returned by get_tag_text
tag_counts=tag_counts.map{|t| self.get_tag_text(t[:data][:tag],auto_tag_prefix,ignore_autotags)}.compact
tag_counts = tag_counts.map{ |t| self.get_tag_text(t[:data][:tag],auto_tag_prefix, ignore_autotags) }.compact
# Convert to counts and sort by the counts (low to high)
tag_counts=tag_counts.group_by(&:itself).transform_values(&:count).sort_by{|_k,v| v}
tag_counts = tag_counts.group_by(&:itself).transform_values(&:count).sort_by{ |_k, v| v }
# tag_counts is now an array of arrays with counts e.g., [['nature', 1], ['sport', 2]]
self.log("Tag distribution #{tag_counts}", pm.id, Logger::INFO)
if tag_counts.length > 0
max_count=tag_counts.last[1]
if max_count<settings["minimum_count"]
max_count = tag_counts.last[1]
if max_count < settings["minimum_count"]
self.log("Max count #{max_count} is less than minimum required to apply a tag", pm.id, Logger::INFO)
return false
end
most_common_tags=tag_counts.reject{|_k,v| v < max_count}
most_common_tags = tag_counts.reject{ |_k, v| v < max_count }
self.log("Most common tags #{most_common_tags}", pm.id, Logger::INFO)
most_common_tags.each do |tag|
Tag.create!(annotated:pm, annotator: BotUser.get_user('tagger'), tag: auto_tag_prefix+tag[0])
Tag.create!(annotated: pm, annotator: BotUser.get_user('tagger'), tag: auto_tag_prefix + tag[0])
end
else
self.log("No most common tag", pm.id, Logger::INFO)
Expand All @@ -87,11 +86,10 @@ def self.run(body)
end
rescue StandardError => e
error = Error.new(e)
Rails.logger.error("[AutoTagger Bot] Exception for event `#{body['event']}`: #{error.class} - #{error.message}")
Rails.logger.error("[AutoTagger Bot] Exception for event #{body['event']}: #{error.class} - #{error.message}")
CheckSentry.notify(error, bot: self.name, body: body)
end

handled
end
end
18 changes: 5 additions & 13 deletions app/models/concerns/relationship_bulk.rb
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ def bulk_update(ids, updates, team)
end
relationships = Relationship.where(id: ids, source_id: source_id)
relationships.update_all(update_columns)
delete_cached_field(pm_source.id, ids)
# Run callbacks in background
extra_options = {
team_id: team&.id,
Expand All @@ -29,6 +28,7 @@ def bulk_update(ids, updates, team)
action: updates[:action]
}
self.delay.run_update_callbacks(ids.to_json, extra_options.to_json)
delete_cached_fields(pm_source.id, relationships.map(&:target_id))
{ source_project_media: pm_source }
end
end
Expand All @@ -41,7 +41,7 @@ def bulk_destroy(ids, updates, team)
relationships.find_each{ |r| relationship_target[r.id] = r.target_id}
relationships.delete_all
target_ids = relationship_target.values
delete_cached_field(pm_source.id, target_ids)
delete_cached_fields(pm_source.id, target_ids)
# Run callbacks in background
extra_options = {
team_id: team&.id,
Expand All @@ -52,17 +52,9 @@ def bulk_destroy(ids, updates, team)
{ source_project_media: pm_source }
end

def delete_cached_field(source_id, target_ids)
# Clear cached fields
# List fields with `model: Relationship`
cached_fields = [
'is_suggested', 'is_confirmed', 'linked_items_count', 'suggestions_count','report_status','related_count',
'demand', 'last_seen', 'sources_as_sentence', 'added_as_similar_by_name', 'confirmed_as_similar_by_name'
]
cached_fields.each do |name|
Rails.cache.delete("check_cached_field:ProjectMedia:#{source_id}:#{name}")
target_ids.each { |id| Rails.cache.delete("check_cached_field:ProjectMedia:#{id}:#{name}") }
end
def delete_cached_fields(source_id, target_ids)
ids = [source_id, target_ids].flatten
ProjectMedia.where(id: ids).each { |pm| pm.clear_cached_fields }
end

def run_update_callbacks(ids_json, extra_options_json)
Expand Down
Loading

0 comments on commit 3be5a80

Please sign in to comment.