Merge branch 'develop' into CV2-6039-add-channel-to-articles

# Conflicts: # app/models/team.rb
meedan · Feb 27, 2025 · 3be5a80 · 3be5a80
2 parents cf6adcc + 0e70b5e
commit 3be5a80
Show file tree

Hide file tree

Showing 17 changed files with 396 additions and 82 deletions.
diff --git a/.github/codeql/codeql-config.yml b/.github/codeql/codeql-config.yml
@@ -0,0 +1,2 @@
+paths-ignore:
+  - "data/**" # Ignore the localization data directory, which contains unusual characters that cause parsing to fail.
diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml
@@ -0,0 +1,104 @@
+# For most projects, this workflow file will not need changing; you simply need
+# to commit it to your repository.
+#
+# You may wish to alter this file to override the set of languages analyzed,
+# or to provide custom queries or build logic.
+#
+# ******** NOTE ********
+# We have attempted to detect the languages in your repository. Please check
+# the `language` matrix defined below to confirm you have the correct set of
+# supported CodeQL languages.
+#
+name: "CodeQL Advanced"
+
+on:
+  push:
+    branches: [ "develop", "master" ]
+  pull_request:
+    branches: [ "develop", "master" ]
+  schedule:
+    - cron: '31 22 * * 0'
+
+jobs:
+  analyze:
+    name: Analyze (${{ matrix.language }})
+    # Runner size impacts CodeQL analysis time. To learn more, please see:
+    #   - https://gh.io/recommended-hardware-resources-for-running-codeql
+    #   - https://gh.io/supported-runners-and-hardware-resources
+    #   - https://gh.io/using-larger-runners (GitHub.com only)
+    # Consider using larger runners or machines with greater resources for possible analysis time improvements.
+    runs-on: ${{ (matrix.language == 'swift' && 'macos-latest') || 'ubuntu-latest' }}
+    permissions:
+      # required for all workflows
+      security-events: write
+
+      # required to fetch internal or private CodeQL packs
+      packages: read
+
+      # only required for workflows in private repositories
+      actions: read
+      contents: read
+
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+        - language: javascript-typescript
+          build-mode: none
+        - language: python
+          build-mode: none
+        - language: ruby
+          build-mode: none
+        # CodeQL supports the following values keywords for 'language': 'c-cpp', 'csharp', 'go', 'java-kotlin', 'javascript-typescript', 'python', 'ruby', 'swift'
+        # Use `c-cpp` to analyze code written in C, C++ or both
+        # Use 'java-kotlin' to analyze code written in Java, Kotlin or both
+        # Use 'javascript-typescript' to analyze code written in JavaScript, TypeScript or both
+        # To learn more about changing the languages that are analyzed or customizing the build mode for your analysis,
+        # see https://docs.github.com/en/code-security/code-scanning/creating-an-advanced-setup-for-code-scanning/customizing-your-advanced-setup-for-code-scanning.
+        # If you are analyzing a compiled language, you can modify the 'build-mode' for that language to customize how
+        # your codebase is analyzed, see https://docs.github.com/en/code-security/code-scanning/creating-an-advanced-setup-for-code-scanning/codeql-code-scanning-for-compiled-languages
+    steps:
+    - name: Checkout repository
+      uses: actions/checkout@v4
+
+    # Add any setup steps before running the `github/codeql-action/init` action.
+    # This includes steps like installing compilers or runtimes (`actions/setup-node`
+    # or others). This is typically only required for manual builds.
+    # - name: Setup runtime (example)
+    #   uses: actions/setup-example@v1
+
+    # Initializes the CodeQL tools for scanning.
+    - name: Initialize CodeQL
+      uses: github/codeql-action/init@v3
+      with:
+        languages: ${{ matrix.language }}
+        build-mode: ${{ matrix.build-mode }}
+        config-file: ./.github/codeql/codeql-config.yml
+        debug: true
+        # If you wish to specify custom queries, you can do so here or in a config file.
+        # By default, queries listed here will override any specified in a config file.
+        # Prefix the list here with "+" to use these queries and those in the config file.
+
+        # For more details on CodeQL's query packs, refer to: https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs
+        # queries: security-extended,security-and-quality
+
+    # If the analyze step fails for one of the languages you are analyzing with
+    # "We were unable to automatically build your code", modify the matrix above
+    # to set the build mode to "manual" for that language. Then modify this step
+    # to build your code.
+    # ℹ️ Command-line programs to run using the OS shell.
+    # 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun
+    - if: matrix.build-mode == 'manual'
+      shell: bash
+      run: |
+        echo 'If you are using a "manual" build mode for one or more of the' \
+          'languages you are analyzing, replace this with the commands to build' \
+          'your code, for example:'
+        echo '  make bootstrap'
+        echo '  make release'
+        exit 1
+
+    - name: Perform CodeQL Analysis
+      uses: github/codeql-action/analyze@v3
+      with:
+        category: "/language:${{matrix.language}}"
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
@@ -37,10 +37,10 @@ deploy_qa:
   script:
     - pip install setuptools==68.0.0
     - pip install urllib3==2.0.6
-    - pip install botocore==1.31.62
-    - pip install boto3==1.28.62
-    - pip install ecs-deploy==1.15.0
-    - pip install awscli==1.31.13
+    - pip install botocore==1.33.13
+    - pip install boto3==1.33.13
+    - pip install ecs-deploy==1.14.0
+    - pip install awscli==1.29.59
     - alias aws='docker run -e AWS_ACCESS_KEY_ID -e AWS_SECRET_ACCESS_KEY -e AWS_DEFAULT_REGION --rm amazon/aws-cli'
     - aws ssm get-parameters-by-path --region $AWS_DEFAULT_REGION --path /qa/check-api/ --recursive --with-decryption --output text --query "Parameters[].[Name]" | sed -E 's#/qa/check-api/##' > env.qa.names
     - for NAME in `cat env.qa.names`; do echo -n "-s qa-check-api-migration $NAME /qa/check-api/$NAME " >> qa-check-api-migration.env.args; done
@@ -109,10 +109,10 @@ deploy_live:
   script:
     - pip install setuptools==68.0.0
     - pip install urllib3==2.0.6
-    - pip install botocore==1.31.62
-    - pip install boto3==1.28.62
-    - pip install ecs-deploy==1.15.0
-    - pip install awscli==1.31.13
+    - pip install botocore==1.33.13
+    - pip install boto3==1.33.13
+    - pip install ecs-deploy==1.14.0
+    - pip install awscli==1.29.59
     - alias aws='docker run -e AWS_ACCESS_KEY_ID -e AWS_SECRET_ACCESS_KEY -e AWS_DEFAULT_REGION --rm amazon/aws-cli'
     - aws ssm get-parameters-by-path --region $AWS_DEFAULT_REGION --path /live/check-api/ --recursive --with-decryption --output text --query "Parameters[].[Name]" | sed -E 's#/live/check-api/##' > env.live.names
     - for NAME in `cat env.live.names`; do echo -n "-s live-check-api-migration $NAME /live/check-api/$NAME " >> live-check-api-migration.env.args; done

diff --git a/.rubocop.yml b/.rubocop.yml
@@ -224,7 +224,7 @@ Metrics/CyclomaticComplexity:
                  A complexity metric that is strongly correlated to the number
                  of test cases needed to validate a method.
   Enabled: true
-  Max: 13
+  Max: 14
 
 Metrics/LineLength:
   Description: 'Limit lines to 80 characters.'

diff --git a/app/graph/types/team_type.rb b/app/graph/types/team_type.rb
@@ -304,9 +304,10 @@ def tipline_requests(from_timestamp:, to_timestamp:)
   end
 
   field :articles, ::ArticleUnion.connection_type, null: true do
-    argument :article_type, GraphQL::Types::String, required: true, camelize: false
+    argument :article_type, GraphQL::Types::String, required: false, camelize: false
 
     # Sort and pagination
+    argument :limit, GraphQL::Types::Int, required: false, default_value: 10
     argument :offset, GraphQL::Types::Int, required: false, default_value: 0
     argument :sort, GraphQL::Types::String, required: false, default_value: 'title'
     argument :sort_type, GraphQL::Types::String, required: false, camelize: false, default_value: 'ASC'
@@ -332,13 +333,18 @@ def articles(**args)
     sort = args[:sort].to_s
     order = [:title, :language, :updated_at, :id].include?(sort.downcase.to_sym) ? sort.downcase.to_sym : :title
     order_type = args[:sort_type].to_s.downcase.to_sym == :desc ? :desc : :asc
-    articles = Explainer.none
-    if args[:article_type] == 'explainer'
-      articles = object.filtered_explainers(args)
-    elsif args[:article_type] == 'fact-check'
-      articles = object.filtered_fact_checks(args)
+    if args[:article_type].blank?
+      limit = context[:current_arguments][:first] || args[:limit]
+      object.filtered_articles(args, limit.to_i, args[:offset].to_i, order, order_type)
+    else
+      articles = nil
+      if args[:article_type] == 'explainer'
+        articles = object.filtered_explainers(args)
+      elsif args[:article_type] == 'fact-check'
+        articles = object.filtered_fact_checks(args)
+      end
+      articles.offset(args[:offset].to_i).order(order => order_type)
     end
-    articles.offset(args[:offset].to_i).order(order => order_type)
   end
 
   field :articles_count, GraphQL::Types::Int, null: true do
@@ -404,6 +410,8 @@ def statistics(period:, language: nil, platform: nil)
     TeamStatistics.new(object, period, language, platform)
   end
 
+  field :statistics_platforms, [GraphQL::Types::String], null: true, description: 'List of tipline platforms for which we have data.'
+
   field :bot_query, [TiplineSearchResultType], null: true do
     argument :search_text, GraphQL::Types::String, required: true
     argument :threshold, GraphQL::Types::Float, required: false

diff --git a/app/models/bot/alegre.rb b/app/models/bot/alegre.rb
@@ -693,7 +693,7 @@ def self.relationship_model_not_allowed(relationship_model)
 
   def self.report_exception_if_bad_relationship(relationship, pm_id_scores, relationship_type)
     if relationship.model.nil? || relationship.weight.nil? || relationship.source_field.nil? || relationship.target_field.nil? || self.relationship_model_not_allowed(relationship.model)
-      CheckSentry.notify(Bot::Alegre::Error.new("[Alegre] Bad relationship was stored without required metadata"), **{trace: Thread.current.backtrace.join("\n"), relationship: relationship.attributes, relationship_type: relationship_type, pm_id_scores: pm_id_scores})
+      CheckSentry.notify(Bot::Alegre::Error.new("[Alegre] Bad relationship with ID [#{relationship.id}] was stored without required metadata"), **{trace: Thread.current.backtrace.join("\n"), relationship: relationship.attributes, relationship_type: relationship_type, pm_id_scores: pm_id_scores})
     end
   end
 

diff --git a/app/models/bot/tagger.rb b/app/models/bot/tagger.rb
@@ -2,15 +2,16 @@
 
 class Bot::Tagger < BotUser
   check_settings
+
   class Error < ::StandardError
   end
 
   def self.get_tag_text(tag_id, auto_tag_prefix, ignore_autotags)
-    tag=TagText.find_by_id(tag_id)&.text
-    if tag.nil? || (ignore_autotags && tag[0]==auto_tag_prefix)
+    tag = TagText.find_by_id(tag_id)&.text
+    if tag.nil? || (ignore_autotags && tag[0] == auto_tag_prefix)
       return nil
     else
-      tag[0]==auto_tag_prefix ? tag[1..] : tag
+      tag[0] == auto_tag_prefix ? tag[1..] : tag
     end
   end
 
@@ -23,62 +24,60 @@ def self.log(message, pm_id = nil, level = Logger::INFO)
   def self.run(body)
     self.log("Received event with body of #{body}", nil, Logger::INFO)
     if CheckConfig.get('alegre_host').blank?
-      self.log("Skipping events because `alegre_host` config is blank", nil, Logger::DEBUG)
+      self.log("Skipping events because alegre_host config is blank", nil, Logger::DEBUG)
       return false
     end
 
     handled = false
     pm = nil
     begin
-      settings=JSON.parse(body[:settings])
-      auto_tag_prefix=settings["auto_tag_prefix"]
-      threshold=settings["threshold"]/100.0
-      ignore_autotags=settings["ignore_autotags"]
+      settings = JSON.parse(body[:settings])
+      auto_tag_prefix = settings["auto_tag_prefix"]
+      threshold = settings["threshold"] / 100.0
+      ignore_autotags = settings["ignore_autotags"]
       pm = ProjectMedia.where(id: body.dig(:data, :dbid)).last
       if body.dig(:event) == 'create_project_media' && !pm.nil?
         self.log("This item was just created, processing...", pm.id, Logger::INFO)
-        # Search all text fields for all items in the workspace using only the cofigured vector model
 
-        search_texts = ['original_title', 'original_description',
-          'extracted_text', 'transcription', 'claim_description_content'
-        ].map{|field| pm.send(field) if !pm.nil? && pm.respond_to?(field)}
+        # Search all text fields for all items in the workspace using only the configured vector model
+        search_texts = ['original_title', 'original_description', 'extracted_text', 'transcription', 'claim_description_content'].map{ |field| pm.send(field) if !pm.nil? && pm.respond_to?(field) }
+
         # Remove duplicate and nil values
-        search_texts = search_texts.uniq.compact.reject{|q| q.length==0}
+        search_texts = search_texts.uniq.compact.reject{ |q| q.length == 0 }
         self.log("Query values are: #{search_texts}", pm.id, Logger::INFO)
 
-        # Search for each text field in `search_texts`
+        # Search for each text field in search_texts
         # Do not use Elasticsearch. The threshold to use comes from the Tagger bot settings.
         # Method signature: get_items_with_similar_text(pm, fields, threshold, query_text, models, team_ids = [pm&.team_id])
-        results=[]
+        results = []
         search_texts.each do |query|
-          results<<Bot::Alegre.get_items_with_similar_text(pm, Bot::Alegre::ALL_TEXT_SIMILARITY_FIELDS,
-            [{ value: threshold }], query, [Bot::Alegre.matching_model_to_use(pm.team_id)].flatten.reject{|m| m==Bot::Alegre::ELASTICSEARCH_MODEL})
-            #self.debug("Results (#{query}): #{results}", pm.id, Logger::INFO)
+          results << Bot::Alegre.get_items_with_similar_text(pm, Bot::Alegre::ALL_TEXT_SIMILARITY_FIELDS, [{ value: threshold }], query, [Bot::Alegre.matching_model_to_use(pm.team_id)].flatten.reject{ |m| m == Bot::Alegre::ELASTICSEARCH_MODEL })
+          # self.debug("Results (#{query}): #{results}", pm.id, Logger::INFO)
         end
         # Combine the list of hashes into one hash
-        results=results.reduce({}, :merge!)
+        results = results.reduce({}, :merge!)
         self.log("#{results.length} nearest neighbors #{results.keys()}", pm.id, Logger::INFO)
         self.log("Results: #{results}", pm.id, Logger::INFO)
 
         # For each nearest neighbor, get the tags.
-        tag_counts=results.map{|nn_pm,_| ProjectMedia.find(nn_pm).get_annotations('tag')}.flatten
+        tag_counts = results.map{ |nn_pm, _| ProjectMedia.find(nn_pm).get_annotations('tag') }.flatten
         # Transform from tag objects to strings
         # .compact removes any nil values returned by get_tag_text
-        tag_counts=tag_counts.map{|t| self.get_tag_text(t[:data][:tag],auto_tag_prefix,ignore_autotags)}.compact
+        tag_counts = tag_counts.map{ |t| self.get_tag_text(t[:data][:tag],auto_tag_prefix, ignore_autotags) }.compact
         # Convert to counts and sort by the counts (low to high)
-        tag_counts=tag_counts.group_by(&:itself).transform_values(&:count).sort_by{|_k,v| v}
+        tag_counts = tag_counts.group_by(&:itself).transform_values(&:count).sort_by{ |_k, v| v }
         # tag_counts is now an array of arrays with counts e.g., [['nature', 1], ['sport', 2]]
         self.log("Tag distribution #{tag_counts}", pm.id, Logger::INFO)
         if tag_counts.length > 0
-          max_count=tag_counts.last[1]
-          if max_count<settings["minimum_count"]
+          max_count = tag_counts.last[1]
+          if max_count < settings["minimum_count"]
             self.log("Max count #{max_count} is less than minimum required to apply a tag", pm.id, Logger::INFO)
             return false
           end
-          most_common_tags=tag_counts.reject{|_k,v| v < max_count}
+          most_common_tags = tag_counts.reject{ |_k, v| v < max_count }
           self.log("Most common tags #{most_common_tags}", pm.id, Logger::INFO)
           most_common_tags.each do |tag|
-            Tag.create!(annotated:pm, annotator: BotUser.get_user('tagger'), tag: auto_tag_prefix+tag[0])
+            Tag.create!(annotated: pm, annotator: BotUser.get_user('tagger'), tag: auto_tag_prefix + tag[0])
           end
         else
           self.log("No most common tag", pm.id, Logger::INFO)
@@ -87,11 +86,10 @@ def self.run(body)
       end
     rescue StandardError => e
       error = Error.new(e)
-      Rails.logger.error("[AutoTagger Bot] Exception for event `#{body['event']}`: #{error.class} - #{error.message}")
+      Rails.logger.error("[AutoTagger Bot] Exception for event #{body['event']}: #{error.class} - #{error.message}")
       CheckSentry.notify(error, bot: self.name, body: body)
     end
 
     handled
   end
-
 end
diff --git a/app/models/concerns/relationship_bulk.rb b/app/models/concerns/relationship_bulk.rb
@@ -20,7 +20,6 @@ def bulk_update(ids, updates, team)
         end
         relationships = Relationship.where(id: ids, source_id: source_id)
         relationships.update_all(update_columns)
-        delete_cached_field(pm_source.id, ids)
         # Run callbacks in background
         extra_options = {
           team_id: team&.id,
@@ -29,6 +28,7 @@ def bulk_update(ids, updates, team)
           action: updates[:action]
         }
         self.delay.run_update_callbacks(ids.to_json, extra_options.to_json)
+        delete_cached_fields(pm_source.id, relationships.map(&:target_id))
         { source_project_media: pm_source }
       end
     end
@@ -41,7 +41,7 @@ def bulk_destroy(ids, updates, team)
       relationships.find_each{ |r| relationship_target[r.id] = r.target_id}
       relationships.delete_all
       target_ids = relationship_target.values
-      delete_cached_field(pm_source.id, target_ids)
+      delete_cached_fields(pm_source.id, target_ids)
       # Run callbacks in background
       extra_options = {
         team_id: team&.id,
@@ -52,17 +52,9 @@ def bulk_destroy(ids, updates, team)
       { source_project_media: pm_source }
     end
 
-    def delete_cached_field(source_id, target_ids)
-      # Clear cached fields
-      # List fields with `model: Relationship`
-      cached_fields = [
-        'is_suggested', 'is_confirmed', 'linked_items_count', 'suggestions_count','report_status','related_count',
-        'demand', 'last_seen', 'sources_as_sentence', 'added_as_similar_by_name', 'confirmed_as_similar_by_name'
-      ]
-      cached_fields.each do |name|
-        Rails.cache.delete("check_cached_field:ProjectMedia:#{source_id}:#{name}")
-        target_ids.each { |id| Rails.cache.delete("check_cached_field:ProjectMedia:#{id}:#{name}") }
-      end
+    def delete_cached_fields(source_id, target_ids)
+      ids = [source_id, target_ids].flatten
+      ProjectMedia.where(id: ids).each { |pm| pm.clear_cached_fields }
     end
 
     def run_update_callbacks(ids_json, extra_options_json)
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		paths-ignore:
		- "data/**" # Ignore the localization data directory, which contains unusual characters that cause parsing to fail.