From ed377e0885082f093b8908a705298bc8cf45cca7 Mon Sep 17 00:00:00 2001 From: manu vasconcelos Date: Mon, 17 Feb 2025 14:40:39 -0300 Subject: [PATCH 01/23] [WIP] First part: move methods to Media I moved the methods to Media so it's easier to compare them side-by-side, and to make sure this works from Media. I also made a small change to the create_media method in ProjectAssociation, so its behavior is closer to create_original_claim. Since those two have differences, but are both basically creating a Media associated to the ProjectMedia. Hopefully those two will be turned into one method. --- app/models/concerns/project_association.rb | 4 +- app/models/concerns/project_media_creators.rb | 50 ++--------------- app/models/media.rb | 53 +++++++++++++++++++ 3 files changed, 59 insertions(+), 48 deletions(-) diff --git a/app/models/concerns/project_association.rb b/app/models/concerns/project_association.rb index c1d482507..0cf32e5be 100644 --- a/app/models/concerns/project_association.rb +++ b/app/models/concerns/project_association.rb @@ -158,8 +158,8 @@ def is_unique def set_media unless self.url.blank? && self.quote.blank? && self.file.blank? && self.media_type != 'Blank' - m = self.create_media - self.media_id = m.id unless m.nil? + self.create_media + self.media_id unless self.media_id.nil? end end end diff --git a/app/models/concerns/project_media_creators.rb b/app/models/concerns/project_media_creators.rb index 60b10b1d7..be250100f 100644 --- a/app/models/concerns/project_media_creators.rb +++ b/app/models/concerns/project_media_creators.rb @@ -57,6 +57,10 @@ def create_original_claim self.media = Media.find_or_create_from_original_claim(claim, self.team) end + def create_media + self.media = Media.create_media_associated_to(self) + end + def set_quote_metadata media = self.media case media.type @@ -114,52 +118,6 @@ def add_source_creation_log protected - def create_with_file(media_type = 'UploadedImage') - klass = media_type.constantize - m = klass.find_by(file: Media.filename(self.file)) || klass.new(file: self.file) - m.save! if m.new_record? - m - end - - def create_claim - m = Claim.new - m.quote = self.quote - m.quote_attributions = self.quote_attributions - m.save! - m - end - - def create_link - team = self.team || Team.current - pender_key = team.get_pender_key if team - url_from_pender = Link.normalized(self.url, pender_key) - Link.find_by(url: url_from_pender) || Link.create(url: self.url, pender_key: pender_key) - end - - def create_media - m = nil - self.set_media_type if self.media_type.blank? - case self.media_type - when 'UploadedImage', 'UploadedVideo', 'UploadedAudio' - m = self.create_with_file(media_type) - when 'Claim' - m = self.create_claim - when 'Link' - m = self.create_link - when 'Blank' - m = Blank.create! - end - m - end - - def set_media_type - if !self.url.blank? - self.media_type = 'Link' - elsif !self.quote.blank? - self.media_type = 'Claim' - end - end - def set_jsonld_response(task) jsonld = self.media.metadata['raw']['json+ld'] if self.media.metadata.has_key?('raw') unless jsonld.nil? diff --git a/app/models/media.rb b/app/models/media.rb index 61bd26c2b..36025424f 100644 --- a/app/models/media.rb +++ b/app/models/media.rb @@ -77,6 +77,25 @@ def domain '' end + # copied + def self.create_media_associated_to(project_media) + puts '******************************** HIT Media' + m = nil + Media.set_media_type(project_media) if project_media.media_type.blank? + media_type = project_media.media_type + case media_type + when 'UploadedImage', 'UploadedVideo', 'UploadedAudio' + m = create_with_file(project_media,media_type) + when 'Claim' + m = create_claim(project_media) + when 'Link' + m = create_link(project_media) + when 'Blank' + m = Blank.create! + end + m + end + def self.find_or_create_from_original_claim(claim, project_media_team) if claim.match?(/\A#{URI::DEFAULT_PARSER.make_regexp(['http', 'https'])}\z/) uri = URI.parse(claim) @@ -108,6 +127,15 @@ def set_user self.user = User.current unless User.current.nil? end + # copied + def set_media_type(project_media) + if !project_media.url.blank? + project_media.media_type = 'Link' + elsif !project_media.quote.blank? + project_media.media_type = 'Claim' + end + end + def self.class_from_input(input) type = nil if input[:url].blank? @@ -130,6 +158,14 @@ def set_original_claim_hash self.original_claim_hash = Digest::MD5.hexdigest(original_claim) unless self.original_claim.blank? end + # copied + def self.create_with_file(project_media, media_type = 'UploadedImage') + klass = media_type.constantize + m = klass.find_by(file: Media.filename(project_media.file)) || klass.new(file: project_media.file) + m.save! if m.new_record? + m + end + def self.find_or_create_uploaded_file_media_from_original_claim(media_type, url, ext) klass = media_type.constantize existing_media = klass.find_by(original_claim_hash: Digest::MD5.hexdigest(url)) @@ -152,10 +188,27 @@ def self.download_file(url, ext) file end + # copied + def self.create_claim(project_media) + m = Claim.new + m.quote = project_media.quote + m.quote_attributions = project_media.quote_attributions + m.save! + m + end + def self.find_or_create_claim_media_from_original_claim(text) Claim.find_by(original_claim_hash: Digest::MD5.hexdigest(text)) || Claim.create!(quote: text, original_claim: text) end + # copied + def self.create_link(project_media) + team = project_media.team || Team.current + pender_key = team.get_pender_key if team + url_from_pender = Link.normalized(project_media.url, pender_key) + Link.find_by(url: url_from_pender) || Link.create(url: project_media.url, pender_key: pender_key) + end + def self.find_or_create_link_media_from_original_claim(url, project_media_team) pender_key = project_media_team.get_pender_key if project_media_team url_from_pender = Link.normalized(url, pender_key) From fa98310c15d245f105a18d0989a31df78776f736 Mon Sep 17 00:00:00 2001 From: manu vasconcelos Date: Mon, 17 Feb 2025 14:54:51 -0300 Subject: [PATCH 02/23] [WIP] make create_media and create_original_claim similar --- app/models/concerns/project_media_creators.rb | 3 +-- app/models/media.rb | 6 +++++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/app/models/concerns/project_media_creators.rb b/app/models/concerns/project_media_creators.rb index be250100f..ae25bbd37 100644 --- a/app/models/concerns/project_media_creators.rb +++ b/app/models/concerns/project_media_creators.rb @@ -53,8 +53,7 @@ def create_annotation end def create_original_claim - claim = self.set_original_claim.strip - self.media = Media.find_or_create_from_original_claim(claim, self.team) + self.media = Media.find_or_create_from_original_claim(self) end def create_media diff --git a/app/models/media.rb b/app/models/media.rb index 36025424f..870baa4bf 100644 --- a/app/models/media.rb +++ b/app/models/media.rb @@ -96,7 +96,11 @@ def self.create_media_associated_to(project_media) m end - def self.find_or_create_from_original_claim(claim, project_media_team) + def self.find_or_create_from_original_claim(project_media) + puts '******************************** HIT Media BUT make it original' + project_media_team = project_media.team + claim = project_media.set_original_claim.strip + if claim.match?(/\A#{URI::DEFAULT_PARSER.make_regexp(['http', 'https'])}\z/) uri = URI.parse(claim) content_type = Net::HTTP.get_response(uri)['content-type'] From ac8664ee21efd97e64406338805289060badd9a1 Mon Sep 17 00:00:00 2001 From: manu vasconcelos Date: Mon, 17 Feb 2025 17:15:56 -0300 Subject: [PATCH 03/23] merge methods to create claim into one --- app/models/media.rb | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/app/models/media.rb b/app/models/media.rb index 870baa4bf..d77a6e2bf 100644 --- a/app/models/media.rb +++ b/app/models/media.rb @@ -87,7 +87,7 @@ def self.create_media_associated_to(project_media) when 'UploadedImage', 'UploadedVideo', 'UploadedAudio' m = create_with_file(project_media,media_type) when 'Claim' - m = create_claim(project_media) + m = find_or_create_claim_media(project_media) when 'Link' m = create_link(project_media) when 'Blank' @@ -117,7 +117,7 @@ def self.find_or_create_from_original_claim(project_media) find_or_create_link_media_from_original_claim(claim, project_media_team) end else - find_or_create_claim_media_from_original_claim(claim) + find_or_create_claim_media(project_media) end end @@ -193,18 +193,20 @@ def self.download_file(url, ext) end # copied - def self.create_claim(project_media) - m = Claim.new - m.quote = project_media.quote - m.quote_attributions = project_media.quote_attributions - m.save! - m - end + def self.find_or_create_claim_media(project_media) + claim = project_media.set_original_claim&.strip - def self.find_or_create_claim_media_from_original_claim(text) - Claim.find_by(original_claim_hash: Digest::MD5.hexdigest(text)) || Claim.create!(quote: text, original_claim: text) + if claim + Claim.find_by(original_claim_hash: Digest::MD5.hexdigest(claim)) || Claim.create!(quote: claim, original_claim: claim) + else + Claim.create!(quote: project_media.quote, quote_attributions: project_media.quote_attributions) + end end + # def self.find_or_create_claim_media_from_original_claim(text) + # Claim.find_by(original_claim_hash: Digest::MD5.hexdigest(text)) || Claim.create!(quote: text, original_claim: text) + # end + # copied def self.create_link(project_media) team = project_media.team || Team.current From 574319b63d02ce1b87d39e4a33792481d1489643 Mon Sep 17 00:00:00 2001 From: manu vasconcelos Date: Mon, 17 Feb 2025 17:43:08 -0300 Subject: [PATCH 04/23] extract setting the media_type --- app/models/media.rb | 45 +++++++++++++++++++++++++++++++-------------- 1 file changed, 31 insertions(+), 14 deletions(-) diff --git a/app/models/media.rb b/app/models/media.rb index d77a6e2bf..e4806ea05 100644 --- a/app/models/media.rb +++ b/app/models/media.rb @@ -83,6 +83,7 @@ def self.create_media_associated_to(project_media) m = nil Media.set_media_type(project_media) if project_media.media_type.blank? media_type = project_media.media_type + case media_type when 'UploadedImage', 'UploadedVideo', 'UploadedAudio' m = create_with_file(project_media,media_type) @@ -101,22 +102,20 @@ def self.find_or_create_from_original_claim(project_media) project_media_team = project_media.team claim = project_media.set_original_claim.strip + Media.set_media_type(project_media) + media_type = project_media.media_type + if claim.match?(/\A#{URI::DEFAULT_PARSER.make_regexp(['http', 'https'])}\z/) uri = URI.parse(claim) - content_type = Net::HTTP.get_response(uri)['content-type'] ext = File.extname(uri.path) + end - case content_type - when /^image\// - find_or_create_uploaded_file_media_from_original_claim('UploadedImage', claim, ext) - when /^video\// - find_or_create_uploaded_file_media_from_original_claim('UploadedVideo', claim, ext) - when /^audio\// - find_or_create_uploaded_file_media_from_original_claim('UploadedAudio', claim, ext) - else - find_or_create_link_media_from_original_claim(claim, project_media_team) - end - else + case media_type + when 'UploadedImage', 'UploadedVideo', 'UploadedAudio' + find_or_create_uploaded_file_media_from_original_claim(media_type, claim, ext) + when 'Link' + find_or_create_link_media_from_original_claim(claim, project_media_team) + when 'Claim' find_or_create_claim_media(project_media) end end @@ -132,8 +131,26 @@ def set_user end # copied - def set_media_type(project_media) - if !project_media.url.blank? + def self.set_media_type(project_media) + claim = project_media.set_original_claim&.strip + + if claim.match?(/\A#{URI::DEFAULT_PARSER.make_regexp(['http', 'https'])}\z/) + uri = URI.parse(claim) + content_type = Net::HTTP.get_response(uri)['content-type'] + + case content_type + when /^image\// + project_media.media_type = 'UploadedImage' + when /^video\// + project_media.media_type = 'UploadedVideo' + when /^audio\// + project_media.media_type = 'UploadedAudio' + else + project_media.media_type = 'Link' + end + elsif claim + project_media.media_type = 'Claim' + elsif !project_media.url.blank? project_media.media_type = 'Link' elsif !project_media.quote.blank? project_media.media_type = 'Claim' From 9bc9271ac25db79d7b5f785738a87eaefd43c378 Mon Sep 17 00:00:00 2001 From: manu vasconcelos Date: Mon, 17 Feb 2025 18:13:14 -0300 Subject: [PATCH 05/23] merge methods to create uplodaded media and fix a variable name --- app/models/media.rb | 50 +++++++++++++++++++++++++-------------------- 1 file changed, 28 insertions(+), 22 deletions(-) diff --git a/app/models/media.rb b/app/models/media.rb index e4806ea05..20cf807fc 100644 --- a/app/models/media.rb +++ b/app/models/media.rb @@ -86,7 +86,7 @@ def self.create_media_associated_to(project_media) case media_type when 'UploadedImage', 'UploadedVideo', 'UploadedAudio' - m = create_with_file(project_media,media_type) + m = find_or_create_uploaded_file_media(project_media, media_type) when 'Claim' m = find_or_create_claim_media(project_media) when 'Link' @@ -100,21 +100,16 @@ def self.create_media_associated_to(project_media) def self.find_or_create_from_original_claim(project_media) puts '******************************** HIT Media BUT make it original' project_media_team = project_media.team - claim = project_media.set_original_claim.strip + original_claim = project_media.set_original_claim.strip Media.set_media_type(project_media) media_type = project_media.media_type - if claim.match?(/\A#{URI::DEFAULT_PARSER.make_regexp(['http', 'https'])}\z/) - uri = URI.parse(claim) - ext = File.extname(uri.path) - end - case media_type when 'UploadedImage', 'UploadedVideo', 'UploadedAudio' - find_or_create_uploaded_file_media_from_original_claim(media_type, claim, ext) + find_or_create_uploaded_file_media(project_media, media_type) when 'Link' - find_or_create_link_media_from_original_claim(claim, project_media_team) + find_or_create_link_media_from_original_claim(original_claim, project_media_team) when 'Claim' find_or_create_claim_media(project_media) end @@ -132,10 +127,10 @@ def set_user # copied def self.set_media_type(project_media) - claim = project_media.set_original_claim&.strip + original_claim = project_media.set_original_claim&.strip - if claim.match?(/\A#{URI::DEFAULT_PARSER.make_regexp(['http', 'https'])}\z/) - uri = URI.parse(claim) + if original_claim.match?(/\A#{URI::DEFAULT_PARSER.make_regexp(['http', 'https'])}\z/) + uri = URI.parse(original_claim) content_type = Net::HTTP.get_response(uri)['content-type'] case content_type @@ -148,7 +143,7 @@ def self.set_media_type(project_media) else project_media.media_type = 'Link' end - elsif claim + elsif original_claim project_media.media_type = 'Claim' elsif !project_media.url.blank? project_media.media_type = 'Link' @@ -187,15 +182,26 @@ def self.create_with_file(project_media, media_type = 'UploadedImage') m end - def self.find_or_create_uploaded_file_media_from_original_claim(media_type, url, ext) + def self.find_or_create_uploaded_file_media(project_media, media_type) klass = media_type.constantize - existing_media = klass.find_by(original_claim_hash: Digest::MD5.hexdigest(url)) + original_claim = project_media.set_original_claim&.strip + + if original_claim + uri = URI.parse(original_claim) + ext = File.extname(uri.path) + + existing_media = klass.find_by(original_claim_hash: Digest::MD5.hexdigest(original_claim)) - if existing_media - existing_media + if existing_media + existing_media + else + file = download_file(original_claim, ext) + klass.create!(file: file, original_claim: original_claim) + end else - file = download_file(url, ext) - klass.create!(file: file, original_claim: url) + m = klass.find_by(file: Media.filename(project_media.file)) || klass.new(file: project_media.file) + m.save! if m.new_record? + m end end @@ -211,10 +217,10 @@ def self.download_file(url, ext) # copied def self.find_or_create_claim_media(project_media) - claim = project_media.set_original_claim&.strip + original_claim = project_media.set_original_claim&.strip - if claim - Claim.find_by(original_claim_hash: Digest::MD5.hexdigest(claim)) || Claim.create!(quote: claim, original_claim: claim) + if original_claim + Claim.find_by(original_claim_hash: Digest::MD5.hexdigest(original_claim)) || Claim.create!(quote: original_claim, original_claim: original_claim) else Claim.create!(quote: project_media.quote, quote_attributions: project_media.quote_attributions) end From f194c1f1f9ba43404dd6617c3880028fc7567d46 Mon Sep 17 00:00:00 2001 From: manu vasconcelos Date: Mon, 17 Feb 2025 18:31:58 -0300 Subject: [PATCH 06/23] merge create_link methods --- app/models/media.rb | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/app/models/media.rb b/app/models/media.rb index 20cf807fc..0a9605e1a 100644 --- a/app/models/media.rb +++ b/app/models/media.rb @@ -90,7 +90,7 @@ def self.create_media_associated_to(project_media) when 'Claim' m = find_or_create_claim_media(project_media) when 'Link' - m = create_link(project_media) + m = find_or_create_link_media(project_media) when 'Blank' m = Blank.create! end @@ -99,9 +99,6 @@ def self.create_media_associated_to(project_media) def self.find_or_create_from_original_claim(project_media) puts '******************************** HIT Media BUT make it original' - project_media_team = project_media.team - original_claim = project_media.set_original_claim.strip - Media.set_media_type(project_media) media_type = project_media.media_type @@ -109,7 +106,7 @@ def self.find_or_create_from_original_claim(project_media) when 'UploadedImage', 'UploadedVideo', 'UploadedAudio' find_or_create_uploaded_file_media(project_media, media_type) when 'Link' - find_or_create_link_media_from_original_claim(original_claim, project_media_team) + find_or_create_link_media(project_media) when 'Claim' find_or_create_claim_media(project_media) end @@ -238,9 +235,18 @@ def self.create_link(project_media) Link.find_by(url: url_from_pender) || Link.create(url: project_media.url, pender_key: pender_key) end - def self.find_or_create_link_media_from_original_claim(url, project_media_team) + def self.find_or_create_link_media(project_media) + project_media_team = project_media.team pender_key = project_media_team.get_pender_key if project_media_team - url_from_pender = Link.normalized(url, pender_key) - Link.find_by(url: url_from_pender) || Link.find_by(original_claim_hash: Digest::MD5.hexdigest(url)) || Link.create!(url: url, pender_key: pender_key, original_claim: url) + + original_claim = project_media.set_original_claim&.strip + + if original_claim + url_from_pender = Link.normalized(original_claim, pender_key) + Link.find_by(url: url_from_pender) || Link.find_by(original_claim_hash: Digest::MD5.hexdigest(original_claim)) || Link.create!(url: original_claim, pender_key: pender_key, original_claim: original_claim) + else + url_from_pender = Link.normalized(project_media.url, pender_key) + Link.find_by(url: url_from_pender) || Link.create(url: project_media.url, pender_key: pender_key) + end end end From 371bf03013cd874fe3ab4724928e585c165cbe49 Mon Sep 17 00:00:00 2001 From: manu vasconcelos Date: Mon, 17 Feb 2025 18:33:55 -0300 Subject: [PATCH 07/23] do some cleanup --- app/models/media.rb | 26 ++------------------------ 1 file changed, 2 insertions(+), 24 deletions(-) diff --git a/app/models/media.rb b/app/models/media.rb index 0a9605e1a..105d359aa 100644 --- a/app/models/media.rb +++ b/app/models/media.rb @@ -79,7 +79,6 @@ def domain # copied def self.create_media_associated_to(project_media) - puts '******************************** HIT Media' m = nil Media.set_media_type(project_media) if project_media.media_type.blank? media_type = project_media.media_type @@ -98,7 +97,6 @@ def self.create_media_associated_to(project_media) end def self.find_or_create_from_original_claim(project_media) - puts '******************************** HIT Media BUT make it original' Media.set_media_type(project_media) media_type = project_media.media_type @@ -122,7 +120,6 @@ def set_user self.user = User.current unless User.current.nil? end - # copied def self.set_media_type(project_media) original_claim = project_media.set_original_claim&.strip @@ -171,14 +168,8 @@ def set_original_claim_hash self.original_claim_hash = Digest::MD5.hexdigest(original_claim) unless self.original_claim.blank? end - # copied - def self.create_with_file(project_media, media_type = 'UploadedImage') - klass = media_type.constantize - m = klass.find_by(file: Media.filename(project_media.file)) || klass.new(file: project_media.file) - m.save! if m.new_record? - m - end - + # we set it to UploadedImage by default, should we? + # def self.create_with_file(project_media, media_type = 'UploadedImage') def self.find_or_create_uploaded_file_media(project_media, media_type) klass = media_type.constantize original_claim = project_media.set_original_claim&.strip @@ -212,7 +203,6 @@ def self.download_file(url, ext) file end - # copied def self.find_or_create_claim_media(project_media) original_claim = project_media.set_original_claim&.strip @@ -223,18 +213,6 @@ def self.find_or_create_claim_media(project_media) end end - # def self.find_or_create_claim_media_from_original_claim(text) - # Claim.find_by(original_claim_hash: Digest::MD5.hexdigest(text)) || Claim.create!(quote: text, original_claim: text) - # end - - # copied - def self.create_link(project_media) - team = project_media.team || Team.current - pender_key = team.get_pender_key if team - url_from_pender = Link.normalized(project_media.url, pender_key) - Link.find_by(url: url_from_pender) || Link.create(url: project_media.url, pender_key: pender_key) - end - def self.find_or_create_link_media(project_media) project_media_team = project_media.team pender_key = project_media_team.get_pender_key if project_media_team From e5f58d611a8d766eda47f5b96f238be9efd967d8 Mon Sep 17 00:00:00 2001 From: manu vasconcelos Date: Tue, 18 Feb 2025 13:42:47 -0300 Subject: [PATCH 08/23] update methods names --- test/models/media_test.rb | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/test/models/media_test.rb b/test/models/media_test.rb index 8c877c1a7..76e734462 100644 --- a/test/models/media_test.rb +++ b/test/models/media_test.rb @@ -630,7 +630,7 @@ def setup test "Claim Media: should save the original_claim and original_claim_hash when created from original claim" do claim = 'This is a claim.' - claim_media = Media.find_or_create_claim_media_from_original_claim(claim) + claim_media = Media.find_or_create_claim_media(claim) assert_not_nil claim_media.original_claim_hash assert_not_nil claim_media.original_claim @@ -645,7 +645,7 @@ def setup test "Claim Media: should not create duplicate media if media with original_claim_hash exists" do assert_difference 'Claim.count', 1 do - 2.times { Media.find_or_create_claim_media_from_original_claim('This is a claim.') } + 2.times { Media.find_or_create_claim_media('This is a claim.') } end end @@ -664,7 +664,7 @@ def setup }.to_json WebMock.stub_request(:get, pender_url).with(query: { url: link_url }).to_return(body: link_response) - link_media = Media.find_or_create_link_media_from_original_claim(link_url, team) + link_media = Media.find_or_create_link_media(link_url, team) assert_not_nil link_media.original_claim_hash assert_not_nil link_media.original_claim @@ -706,7 +706,7 @@ def setup WebMock.stub_request(:get, pender_url).with(query: { url: link_url }).to_return(body: link_response) assert_difference 'Link.count', 1 do - 2.times { Media.find_or_create_link_media_from_original_claim(link_url, team) } + 2.times { Media.find_or_create_link_media(link_url, team) } end end @@ -718,7 +718,7 @@ def setup ext = File.extname(URI.parse(audio_url).path) WebMock.stub_request(:get, audio_url).to_return(body: file.read, headers: { 'Content-Type' => 'audio/mp3' }) - uploaded_media = Media.find_or_create_uploaded_file_media_from_original_claim('UploadedAudio', audio_url, ext) + uploaded_media = Media.find_or_create_uploaded_file_media('UploadedAudio', audio_url, ext) assert_not_nil uploaded_media.original_claim_hash assert_not_nil uploaded_media.original_claim @@ -742,7 +742,7 @@ def setup WebMock.stub_request(:get, audio_url).to_return(body: file.read, headers: { 'Content-Type' => 'audio/mp3' }) assert_difference 'UploadedAudio.count', 1 do - 2.times { Media.find_or_create_uploaded_file_media_from_original_claim('UploadedAudio', audio_url, ext) } + 2.times { Media.find_or_create_uploaded_file_media('UploadedAudio', audio_url, ext) } end end end From 4442c7ba47e4ec39a2aa5bdeb19359e22d5e05b5 Mon Sep 17 00:00:00 2001 From: manu vasconcelos Date: Tue, 18 Feb 2025 14:07:47 -0300 Subject: [PATCH 09/23] fix undefined method `match?' failing tests: rails test test/models/project_group_test.rb:39 rails test test/models/project_media_5_test.rb:799 rails test test/controllers/graphql_controller_7_test.rb:370 rails test test/controllers/graphql_controller_5_test.rb:200 - we make sure original_claim is not nil before trying to match - after that was fixed, project_media_5_test.rb:799 was still failing: - we werent passing team - so it failed to get the pender_key, since that expects a team - questions: - why was this working before? we expected team before as well - do we need to have team? --- app/models/media.rb | 4 +++- test/models/project_media_5_test.rb | 6 +++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/app/models/media.rb b/app/models/media.rb index 105d359aa..48aeb5849 100644 --- a/app/models/media.rb +++ b/app/models/media.rb @@ -1,3 +1,5 @@ +require 'byebug' + class Media < ApplicationRecord include AnnotationBase::Association @@ -123,7 +125,7 @@ def set_user def self.set_media_type(project_media) original_claim = project_media.set_original_claim&.strip - if original_claim.match?(/\A#{URI::DEFAULT_PARSER.make_regexp(['http', 'https'])}\z/) + if original_claim && original_claim.match?(/\A#{URI::DEFAULT_PARSER.make_regexp(['http', 'https'])}\z/) uri = URI.parse(original_claim) content_type = Net::HTTP.get_response(uri)['content-type'] diff --git a/test/models/project_media_5_test.rb b/test/models/project_media_5_test.rb index 66f465a67..c6708c825 100644 --- a/test/models/project_media_5_test.rb +++ b/test/models/project_media_5_test.rb @@ -798,7 +798,7 @@ def setup test "should create link and account using team pender key" do t = create_team - p = create_project(team: t) + create_project(team: t) Team.stubs(:current).returns(t) url1 = random_url @@ -811,13 +811,13 @@ def setup PenderClient::Request.stubs(:get_medias).with(CheckConfig.get('pender_url_private'), { url: url2 }, 'specific_token').returns({"type" => "media","data" => {"url" => url2, "type" => "item", "title" => "Specific token", "author_url" => author_url2}}) PenderClient::Request.stubs(:get_medias).with(CheckConfig.get('pender_url_private'), { url: author_url2 }, 'specific_token').returns({"type" => "media","data" => {"url" => author_url2, "type" => "profile", "title" => "Specific token", "author_name" => 'Author with specific token'}}) - pm = ProjectMedia.create url: url1 + pm = ProjectMedia.create url: url1, team: t assert_equal 'Default token', ProjectMedia.find(pm.id).media.metadata['title'] assert_equal 'Author with default token', ProjectMedia.find(pm.id).media.account.metadata['author_name'] t.set_pender_key = 'specific_token'; t.save! - pm = ProjectMedia.create! url: url2 + pm = ProjectMedia.create! url: url2, team: t assert_equal 'Specific token', ProjectMedia.find(pm.id).media.metadata['title'] assert_equal 'Author with specific token', ProjectMedia.find(pm.id).media.account.metadata['author_name'] From d914b85079d0bcd9562072ae474d07433f08d23f Mon Sep 17 00:00:00 2001 From: manu vasconcelos Date: Tue, 18 Feb 2025 18:22:49 -0300 Subject: [PATCH 10/23] move set_media_type back and make create_media and create_original_claim the same --- app/models/concerns/project_media_creators.rb | 32 +++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) diff --git a/app/models/concerns/project_media_creators.rb b/app/models/concerns/project_media_creators.rb index ae25bbd37..de392a631 100644 --- a/app/models/concerns/project_media_creators.rb +++ b/app/models/concerns/project_media_creators.rb @@ -53,11 +53,39 @@ def create_annotation end def create_original_claim - self.media = Media.find_or_create_from_original_claim(self) + self.set_media_type if self.media_type.blank? + self.media = Media.find_or_create_media_associated_to(self) end def create_media - self.media = Media.create_media_associated_to(self) + self.set_media_type if self.media_type.blank? + self.media = Media.find_or_create_media_associated_to(self) + end + + def set_media_type + original_claim = self.set_original_claim&.strip + + if original_claim && original_claim.match?(/\A#{URI::DEFAULT_PARSER.make_regexp(['http', 'https'])}\z/) + uri = URI.parse(original_claim) + content_type = Net::HTTP.get_response(uri)['content-type'] + + case content_type + when /^image\// + self.media_type = 'UploadedImage' + when /^video\// + self.media_type = 'UploadedVideo' + when /^audio\// + self.media_type = 'UploadedAudio' + else + self.media_type = 'Link' + end + elsif original_claim + self.media_type = 'Claim' + elsif !self.url.blank? + self.media_type = 'Link' + elsif !self.quote.blank? + self.media_type = 'Claim' + end end def set_quote_metadata From 007bc76d702df6011ddc76d12f79f6a4f1efa0ef Mon Sep 17 00:00:00 2001 From: manu vasconcelos Date: Tue, 18 Feb 2025 18:28:16 -0300 Subject: [PATCH 11/23] trying to combine methods and remove set_media_type One big difference between how we create Media is in one case we were sending the entire project_media, but when creating from original_claim we weren't. I think since this is Media it makes more sense only to receive what we need, instead of the whole project_media. So this is a step in that direction. There is still a lot of duplication, and I think there should be better ways to access the values we want. --- app/models/media.rb | 136 +++++++++++++++++--------------------------- 1 file changed, 51 insertions(+), 85 deletions(-) diff --git a/app/models/media.rb b/app/models/media.rb index 48aeb5849..3c87967aa 100644 --- a/app/models/media.rb +++ b/app/models/media.rb @@ -79,39 +79,38 @@ def domain '' end - # copied - def self.create_media_associated_to(project_media) + def self.find_or_create_media_associated_to(project_media) m = nil - Media.set_media_type(project_media) if project_media.media_type.blank? + original_claim = project_media.set_original_claim&.strip media_type = project_media.media_type + team = project_media.team - case media_type - when 'UploadedImage', 'UploadedVideo', 'UploadedAudio' - m = find_or_create_uploaded_file_media(project_media, media_type) - when 'Claim' - m = find_or_create_claim_media(project_media) - when 'Link' - m = find_or_create_link_media(project_media) - when 'Blank' - m = Blank.create! + if original_claim + case media_type + when 'UploadedImage', 'UploadedVideo', 'UploadedAudio' + m = find_or_create_uploaded_file_media(original_claim, media_type, true) + when 'Claim' + m = find_or_create_claim_media(original_claim, nil, true) + when 'Link' + m = find_or_create_link_media(original_claim, team, true) + when 'Blank' + m = Blank.create! + end + else + case media_type + when 'UploadedImage', 'UploadedVideo', 'UploadedAudio' + m = find_or_create_uploaded_file_media(project_media.file, media_type) + when 'Claim' + m = find_or_create_claim_media(project_media.quote, project_media.quote_attributions) + when 'Link' + m = find_or_create_link_media(project_media.url, team) + when 'Blank' + m = Blank.create! + end end m end - def self.find_or_create_from_original_claim(project_media) - Media.set_media_type(project_media) - media_type = project_media.media_type - - case media_type - when 'UploadedImage', 'UploadedVideo', 'UploadedAudio' - find_or_create_uploaded_file_media(project_media, media_type) - when 'Link' - find_or_create_link_media(project_media) - when 'Claim' - find_or_create_claim_media(project_media) - end - end - private def set_url_nil_if_empty @@ -122,32 +121,6 @@ def set_user self.user = User.current unless User.current.nil? end - def self.set_media_type(project_media) - original_claim = project_media.set_original_claim&.strip - - if original_claim && original_claim.match?(/\A#{URI::DEFAULT_PARSER.make_regexp(['http', 'https'])}\z/) - uri = URI.parse(original_claim) - content_type = Net::HTTP.get_response(uri)['content-type'] - - case content_type - when /^image\// - project_media.media_type = 'UploadedImage' - when /^video\// - project_media.media_type = 'UploadedVideo' - when /^audio\// - project_media.media_type = 'UploadedAudio' - else - project_media.media_type = 'Link' - end - elsif original_claim - project_media.media_type = 'Claim' - elsif !project_media.url.blank? - project_media.media_type = 'Link' - elsif !project_media.quote.blank? - project_media.media_type = 'Claim' - end - end - def self.class_from_input(input) type = nil if input[:url].blank? @@ -170,63 +143,56 @@ def set_original_claim_hash self.original_claim_hash = Digest::MD5.hexdigest(original_claim) unless self.original_claim.blank? end + def self.download_file(url, ext) + raise "Invalid URL when creating media from original claim attribute" unless url =~ /\A#{URI::DEFAULT_PARSER.make_regexp(['http', 'https'])}\z/ + + file = Tempfile.new(['download', ext]) + file.binmode + file.write(URI(url).open.read) + file.rewind + file + end + # we set it to UploadedImage by default, should we? # def self.create_with_file(project_media, media_type = 'UploadedImage') - def self.find_or_create_uploaded_file_media(project_media, media_type) + def self.find_or_create_uploaded_file_media(file_media, media_type, has_original_claim = false) klass = media_type.constantize - original_claim = project_media.set_original_claim&.strip - if original_claim - uri = URI.parse(original_claim) + if has_original_claim + uri = URI.parse(file_media) ext = File.extname(uri.path) - existing_media = klass.find_by(original_claim_hash: Digest::MD5.hexdigest(original_claim)) + existing_media = klass.find_by(original_claim_hash: Digest::MD5.hexdigest(file_media)) if existing_media existing_media else - file = download_file(original_claim, ext) - klass.create!(file: file, original_claim: original_claim) + file = download_file(file_media, ext) + klass.create!(file: file, original_claim: file_media) end else - m = klass.find_by(file: Media.filename(project_media.file)) || klass.new(file: project_media.file) + m = klass.find_by(file: Media.filename(file_media)) || klass.new(file: file_media) m.save! if m.new_record? m end end - def self.download_file(url, ext) - raise "Invalid URL when creating media from original claim attribute" unless url =~ /\A#{URI::DEFAULT_PARSER.make_regexp(['http', 'https'])}\z/ - - file = Tempfile.new(['download', ext]) - file.binmode - file.write(URI(url).open.read) - file.rewind - file - end - - def self.find_or_create_claim_media(project_media) - original_claim = project_media.set_original_claim&.strip - - if original_claim - Claim.find_by(original_claim_hash: Digest::MD5.hexdigest(original_claim)) || Claim.create!(quote: original_claim, original_claim: original_claim) + def self.find_or_create_claim_media(claim_media, quote_attributions = nil, has_original_claim = false) + if has_original_claim + Claim.find_by(original_claim_hash: Digest::MD5.hexdigest(claim_media)) || Claim.create!(quote: claim_media, original_claim: claim_media) else - Claim.create!(quote: project_media.quote, quote_attributions: project_media.quote_attributions) + Claim.create!(quote: claim_media, quote_attributions: quote_attributions) end end - def self.find_or_create_link_media(project_media) - project_media_team = project_media.team + def self.find_or_create_link_media(link_media, project_media_team, has_original_claim = false) pender_key = project_media_team.get_pender_key if project_media_team + url_from_pender = Link.normalized(link_media, pender_key) - original_claim = project_media.set_original_claim&.strip - - if original_claim - url_from_pender = Link.normalized(original_claim, pender_key) - Link.find_by(url: url_from_pender) || Link.find_by(original_claim_hash: Digest::MD5.hexdigest(original_claim)) || Link.create!(url: original_claim, pender_key: pender_key, original_claim: original_claim) + if has_original_claim + Link.find_by(url: url_from_pender) || Link.find_by(original_claim_hash: Digest::MD5.hexdigest(link_media)) || Link.create!(url: link_media, pender_key: pender_key, original_claim: link_media) else - url_from_pender = Link.normalized(project_media.url, pender_key) - Link.find_by(url: url_from_pender) || Link.create(url: project_media.url, pender_key: pender_key) + Link.find_by(url: url_from_pender) || Link.create(url: link_media, pender_key: pender_key) end end end From fe841ae81808a53090e2db9447cdc8526a2e61c4 Mon Sep 17 00:00:00 2001 From: manu vasconcelos Date: Wed, 19 Feb 2025 09:53:54 -0300 Subject: [PATCH 12/23] Fix failing tests The method signature changed, so I updated the tests. however: should it change? is there a better way to deal with this? --- test/models/media_test.rb | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/test/models/media_test.rb b/test/models/media_test.rb index 76e734462..f6c2ce0f3 100644 --- a/test/models/media_test.rb +++ b/test/models/media_test.rb @@ -630,7 +630,7 @@ def setup test "Claim Media: should save the original_claim and original_claim_hash when created from original claim" do claim = 'This is a claim.' - claim_media = Media.find_or_create_claim_media(claim) + claim_media = Media.find_or_create_claim_media(claim, nil, true) assert_not_nil claim_media.original_claim_hash assert_not_nil claim_media.original_claim @@ -645,7 +645,7 @@ def setup test "Claim Media: should not create duplicate media if media with original_claim_hash exists" do assert_difference 'Claim.count', 1 do - 2.times { Media.find_or_create_claim_media('This is a claim.') } + 2.times { Media.find_or_create_claim_media('This is a claim.', nil, true) } end end @@ -664,7 +664,7 @@ def setup }.to_json WebMock.stub_request(:get, pender_url).with(query: { url: link_url }).to_return(body: link_response) - link_media = Media.find_or_create_link_media(link_url, team) + link_media = Media.find_or_create_link_media(link_url, team, true) assert_not_nil link_media.original_claim_hash assert_not_nil link_media.original_claim @@ -715,10 +715,9 @@ def setup file.write(File.read(File.join(Rails.root, 'test', 'data', 'rails.mp3'))) file.rewind audio_url = "http://example.com/#{file.path.split('/').last}" - ext = File.extname(URI.parse(audio_url).path) WebMock.stub_request(:get, audio_url).to_return(body: file.read, headers: { 'Content-Type' => 'audio/mp3' }) - uploaded_media = Media.find_or_create_uploaded_file_media('UploadedAudio', audio_url, ext) + uploaded_media = Media.find_or_create_uploaded_file_media(audio_url, 'UploadedAudio', true) assert_not_nil uploaded_media.original_claim_hash assert_not_nil uploaded_media.original_claim @@ -738,11 +737,10 @@ def setup file.write(File.read(File.join(Rails.root, 'test', 'data', 'rails.mp3'))) file.rewind audio_url = "http://example.com/#{file.path.split('/').last}" - ext = File.extname(URI.parse(audio_url).path) WebMock.stub_request(:get, audio_url).to_return(body: file.read, headers: { 'Content-Type' => 'audio/mp3' }) assert_difference 'UploadedAudio.count', 1 do - 2.times { Media.find_or_create_uploaded_file_media('UploadedAudio', audio_url, ext) } + 2.times { Media.find_or_create_uploaded_file_media(audio_url, 'UploadedAudio', true) } end end end From da90f9e25384a1224d5f43abb7e004ba026f9386 Mon Sep 17 00:00:00 2001 From: manu vasconcelos Date: Wed, 19 Feb 2025 13:51:23 -0300 Subject: [PATCH 13/23] update condition on when to set_media_type create_media checked if media_type.blank? before running, however, the graphql request might send original_claim and media_type set to Blank. Which means in the case of original_claim present we do want to check and re-set media_type if needed. --- app/models/concerns/project_media_creators.rb | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/app/models/concerns/project_media_creators.rb b/app/models/concerns/project_media_creators.rb index de392a631..00768df71 100644 --- a/app/models/concerns/project_media_creators.rb +++ b/app/models/concerns/project_media_creators.rb @@ -53,18 +53,18 @@ def create_annotation end def create_original_claim - self.set_media_type if self.media_type.blank? + self.set_media_type if self.set_original_claim || self.media_type.blank? self.media = Media.find_or_create_media_associated_to(self) end def create_media - self.set_media_type if self.media_type.blank? + self.set_media_type if self.set_original_claim || self.media_type.blank? self.media = Media.find_or_create_media_associated_to(self) end def set_media_type original_claim = self.set_original_claim&.strip - +# byebug if original_claim && original_claim.match?(/\A#{URI::DEFAULT_PARSER.make_regexp(['http', 'https'])}\z/) uri = URI.parse(original_claim) content_type = Net::HTTP.get_response(uri)['content-type'] From 4a2be9ece55726b09da3d6e23c27f06a4707d612 Mon Sep 17 00:00:00 2001 From: manu vasconcelos Date: Wed, 19 Feb 2025 13:56:12 -0300 Subject: [PATCH 14/23] update to use ProjectMedia.create instead of create_project_media The issue here, I think, is create_project_media creates a bunch of stuff by default. So it was creating an extra Link Media, which caused some confusion on when debugging a different issue. So maybe it's a good idea to be more conservative in using sample_data's methods? --- test/controllers/graphql_controller_12_test.rb | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/controllers/graphql_controller_12_test.rb b/test/controllers/graphql_controller_12_test.rb index 112d6e637..44e317ee3 100644 --- a/test/controllers/graphql_controller_12_test.rb +++ b/test/controllers/graphql_controller_12_test.rb @@ -745,7 +745,7 @@ def teardown t = create_team p = create_project team: t - pm = create_project_media team: t, set_original_claim: url + pm = ProjectMedia.create(team: t, set_original_claim: url) assert_not_nil pm @@ -807,7 +807,7 @@ def teardown t.settings[:languages] << 'pt' t.save! p = create_project team: t - pm = create_project_media team: t, set_original_claim: url + pm = ProjectMedia.create(team: t, set_original_claim: url) c = create_claim_description project_media: pm fc_1 = create_fact_check claim_description: c @@ -870,7 +870,7 @@ def teardown WebMock.stub_request(:get, pender_url).with({ query: { url: url } }).to_return(body: response_body) t = create_team - pm = create_project_media team: t, set_original_claim: url + pm = ProjectMedia.create(team: t, set_original_claim: url) cd = create_claim_description project_media: pm fc = create_fact_check claim_description: cd From 3d2f767b8f95eafd1d2f209e24eb6f4e66838446 Mon Sep 17 00:00:00 2001 From: manu vasconcelos Date: Wed, 19 Feb 2025 13:59:16 -0300 Subject: [PATCH 15/23] update to keep create_media only, instead of create_original_claim They are now both doing the same thing. And I think conceptually they both create media, even if it is a media from an original claim. --- app/models/concerns/project_media_creators.rb | 5 ----- app/models/project_media.rb | 2 +- 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/app/models/concerns/project_media_creators.rb b/app/models/concerns/project_media_creators.rb index 00768df71..c27224980 100644 --- a/app/models/concerns/project_media_creators.rb +++ b/app/models/concerns/project_media_creators.rb @@ -52,11 +52,6 @@ def create_annotation end end - def create_original_claim - self.set_media_type if self.set_original_claim || self.media_type.blank? - self.media = Media.find_or_create_media_associated_to(self) - end - def create_media self.set_media_type if self.set_original_claim || self.media_type.blank? self.media = Media.find_or_create_media_associated_to(self) diff --git a/app/models/project_media.rb b/app/models/project_media.rb index 357ce0c1e..769f37c6d 100644 --- a/app/models/project_media.rb +++ b/app/models/project_media.rb @@ -32,7 +32,7 @@ class ProjectMedia < ApplicationRecord validates_presence_of :custom_title, if: proc { |pm| pm.title_field == 'custom_title' } before_validation :set_team_id, :set_channel, :set_project_id, on: :create - before_validation :create_original_claim, if: proc { |pm| pm.set_original_claim.present? }, on: :create + before_validation :create_media, if: proc { |pm| pm.set_original_claim.present? }, on: :create after_create :create_annotation, :create_metrics_annotation, :send_slack_notification, :create_relationship, :create_team_tasks, :create_claim_description_and_fact_check, :create_tags_in_background after_create :add_source_creation_log, unless: proc { |pm| pm.source_id.blank? } after_commit :apply_rules_and_actions_on_create, :set_quote_metadata, :notify_team_bots_create, on: [:create] From a2a97db37153fabe1455525a1c1c74c5bfa33bb6 Mon Sep 17 00:00:00 2001 From: manu vasconcelos Date: Thu, 20 Feb 2025 12:43:52 -0300 Subject: [PATCH 16/23] add ! to method name To make clear this method mutates the original object. --- app/models/concerns/project_association.rb | 2 +- app/models/concerns/project_media_creators.rb | 2 +- app/models/project_media.rb | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/app/models/concerns/project_association.rb b/app/models/concerns/project_association.rb index 0cf32e5be..6e71cc2c3 100644 --- a/app/models/concerns/project_association.rb +++ b/app/models/concerns/project_association.rb @@ -158,7 +158,7 @@ def is_unique def set_media unless self.url.blank? && self.quote.blank? && self.file.blank? && self.media_type != 'Blank' - self.create_media + self.create_media! self.media_id unless self.media_id.nil? end end diff --git a/app/models/concerns/project_media_creators.rb b/app/models/concerns/project_media_creators.rb index c27224980..0849bef2f 100644 --- a/app/models/concerns/project_media_creators.rb +++ b/app/models/concerns/project_media_creators.rb @@ -52,7 +52,7 @@ def create_annotation end end - def create_media + def create_media! self.set_media_type if self.set_original_claim || self.media_type.blank? self.media = Media.find_or_create_media_associated_to(self) end diff --git a/app/models/project_media.rb b/app/models/project_media.rb index 769f37c6d..cd8ceba63 100644 --- a/app/models/project_media.rb +++ b/app/models/project_media.rb @@ -32,7 +32,7 @@ class ProjectMedia < ApplicationRecord validates_presence_of :custom_title, if: proc { |pm| pm.title_field == 'custom_title' } before_validation :set_team_id, :set_channel, :set_project_id, on: :create - before_validation :create_media, if: proc { |pm| pm.set_original_claim.present? }, on: :create + before_validation :create_media!, if: proc { |pm| pm.set_original_claim.present? }, on: :create after_create :create_annotation, :create_metrics_annotation, :send_slack_notification, :create_relationship, :create_team_tasks, :create_claim_description_and_fact_check, :create_tags_in_background after_create :add_source_creation_log, unless: proc { |pm| pm.source_id.blank? } after_commit :apply_rules_and_actions_on_create, :set_quote_metadata, :notify_team_bots_create, on: [:create] From 9c49d01ad52562b2ee783110f258f311d978e99a Mon Sep 17 00:00:00 2001 From: manu vasconcelos Date: Thu, 20 Feb 2025 15:09:33 -0300 Subject: [PATCH 17/23] Update so we only send the necessary data to media Depending on the type of Media we try to create, it needs different data from the ProjectMedia object. Still, Media shouldn't need to know too much about ProjectMedia. So, we are deciding on what to send in ProjectMediaCreators, and only sending what is necessary: - I created a method that gets those media_arguments - Moved that and set_media_type under protected - Inside media it only gets that data and forwards it - There were a few changes to method's signatures (so tests were also updated) - Because there are a few differences between arguments needed, we are sending those as an additional_args hash --- app/models/concerns/project_media_creators.rb | 81 +++++++++++++------ app/models/media.rb | 60 ++++++-------- test/models/media_test.rb | 12 +-- 3 files changed, 85 insertions(+), 68 deletions(-) diff --git a/app/models/concerns/project_media_creators.rb b/app/models/concerns/project_media_creators.rb index 0849bef2f..405d366aa 100644 --- a/app/models/concerns/project_media_creators.rb +++ b/app/models/concerns/project_media_creators.rb @@ -54,33 +54,9 @@ def create_annotation def create_media! self.set_media_type if self.set_original_claim || self.media_type.blank? - self.media = Media.find_or_create_media_associated_to(self) - end - - def set_media_type - original_claim = self.set_original_claim&.strip -# byebug - if original_claim && original_claim.match?(/\A#{URI::DEFAULT_PARSER.make_regexp(['http', 'https'])}\z/) - uri = URI.parse(original_claim) - content_type = Net::HTTP.get_response(uri)['content-type'] - case content_type - when /^image\// - self.media_type = 'UploadedImage' - when /^video\// - self.media_type = 'UploadedVideo' - when /^audio\// - self.media_type = 'UploadedAudio' - else - self.media_type = 'Link' - end - elsif original_claim - self.media_type = 'Claim' - elsif !self.url.blank? - self.media_type = 'Link' - elsif !self.quote.blank? - self.media_type = 'Claim' - end + media_type, media_content, additional_args = self.media_arguments + self.media = Media.find_or_create_media_from_content(media_type, media_content, additional_args) end def set_quote_metadata @@ -140,6 +116,59 @@ def add_source_creation_log protected + def set_media_type + original_claim = self.set_original_claim&.strip + + if original_claim && original_claim.match?(/\A#{URI::DEFAULT_PARSER.make_regexp(['http', 'https'])}\z/) + uri = URI.parse(original_claim) + content_type = Net::HTTP.get_response(uri)['content-type'] + + case content_type + when /^image\// + self.media_type = 'UploadedImage' + when /^video\// + self.media_type = 'UploadedVideo' + when /^audio\// + self.media_type = 'UploadedAudio' + else + self.media_type = 'Link' + end + elsif original_claim + self.media_type = 'Claim' + elsif !self.url.blank? + self.media_type = 'Link' + elsif !self.quote.blank? + self.media_type = 'Claim' + end + end + + def media_arguments + media_type = self.media_type + original_claim = self.set_original_claim&.strip + + if original_claim + case media_type + when 'UploadedImage', 'UploadedVideo', 'UploadedAudio' + return media_type, original_claim, { has_original_claim: true } + when 'Claim' + return media_type, original_claim, { has_original_claim: true } + when 'Link' + return media_type, original_claim, { team: self.team, has_original_claim: true } + end + else + case media_type + when 'UploadedImage', 'UploadedVideo', 'UploadedAudio' + return media_type, self.file + when 'Claim' + return media_type, self.quote, { quote_attributions: self.quote_attributions } + when 'Link' + return media_type, self.url, { team: self.team } + when 'Blank' + return media_type + end + end + end + def set_jsonld_response(task) jsonld = self.media.metadata['raw']['json+ld'] if self.media.metadata.has_key?('raw') unless jsonld.nil? diff --git a/app/models/media.rb b/app/models/media.rb index 3c87967aa..81d66c911 100644 --- a/app/models/media.rb +++ b/app/models/media.rb @@ -79,36 +79,17 @@ def domain '' end - def self.find_or_create_media_associated_to(project_media) - m = nil - original_claim = project_media.set_original_claim&.strip - media_type = project_media.media_type - team = project_media.team - - if original_claim - case media_type - when 'UploadedImage', 'UploadedVideo', 'UploadedAudio' - m = find_or_create_uploaded_file_media(original_claim, media_type, true) - when 'Claim' - m = find_or_create_claim_media(original_claim, nil, true) - when 'Link' - m = find_or_create_link_media(original_claim, team, true) - when 'Blank' - m = Blank.create! - end - else - case media_type - when 'UploadedImage', 'UploadedVideo', 'UploadedAudio' - m = find_or_create_uploaded_file_media(project_media.file, media_type) - when 'Claim' - m = find_or_create_claim_media(project_media.quote, project_media.quote_attributions) - when 'Link' - m = find_or_create_link_media(project_media.url, team) - when 'Blank' - m = Blank.create! - end + def self.find_or_create_media_from_content(media_type, media_content = nil, additional_args = {}) + case media_type + when 'UploadedImage', 'UploadedVideo', 'UploadedAudio' + find_or_create_uploaded_file_media(media_content, media_type, additional_args) + when 'Claim' + find_or_create_claim_media(media_content, additional_args) + when 'Link' + find_or_create_link_media(media_content, additional_args) + when 'Blank' + Blank.create! end - m end private @@ -155,19 +136,20 @@ def self.download_file(url, ext) # we set it to UploadedImage by default, should we? # def self.create_with_file(project_media, media_type = 'UploadedImage') - def self.find_or_create_uploaded_file_media(file_media, media_type, has_original_claim = false) + def self.find_or_create_uploaded_file_media(file_media, media_type, additional_args = {}) + has_original_claim = additional_args&.fetch(:has_original_claim, nil) klass = media_type.constantize if has_original_claim - uri = URI.parse(file_media) - ext = File.extname(uri.path) - existing_media = klass.find_by(original_claim_hash: Digest::MD5.hexdigest(file_media)) if existing_media existing_media else + uri = URI.parse(file_media) + ext = File.extname(uri.path) file = download_file(file_media, ext) + klass.create!(file: file, original_claim: file_media) end else @@ -177,7 +159,10 @@ def self.find_or_create_uploaded_file_media(file_media, media_type, has_original end end - def self.find_or_create_claim_media(claim_media, quote_attributions = nil, has_original_claim = false) + def self.find_or_create_claim_media(claim_media, additional_args = {}) + has_original_claim = additional_args.fetch(:has_original_claim, nil) + quote_attributions = additional_args.fetch(:quote_attributions, nil) + if has_original_claim Claim.find_by(original_claim_hash: Digest::MD5.hexdigest(claim_media)) || Claim.create!(quote: claim_media, original_claim: claim_media) else @@ -185,8 +170,11 @@ def self.find_or_create_claim_media(claim_media, quote_attributions = nil, has_o end end - def self.find_or_create_link_media(link_media, project_media_team, has_original_claim = false) - pender_key = project_media_team.get_pender_key if project_media_team + def self.find_or_create_link_media(link_media, additional_args = {}) + has_original_claim = additional_args.fetch(:has_original_claim, nil) + project_media_team = additional_args.fetch(:team, nil) + + pender_key = project_media_team&.get_pender_key if project_media_team url_from_pender = Link.normalized(link_media, pender_key) if has_original_claim diff --git a/test/models/media_test.rb b/test/models/media_test.rb index f6c2ce0f3..36e4ca31f 100644 --- a/test/models/media_test.rb +++ b/test/models/media_test.rb @@ -630,7 +630,7 @@ def setup test "Claim Media: should save the original_claim and original_claim_hash when created from original claim" do claim = 'This is a claim.' - claim_media = Media.find_or_create_claim_media(claim, nil, true) + claim_media = Media.find_or_create_claim_media(claim, { has_original_claim: true }) assert_not_nil claim_media.original_claim_hash assert_not_nil claim_media.original_claim @@ -645,7 +645,7 @@ def setup test "Claim Media: should not create duplicate media if media with original_claim_hash exists" do assert_difference 'Claim.count', 1 do - 2.times { Media.find_or_create_claim_media('This is a claim.', nil, true) } + 2.times { Media.find_or_create_claim_media('This is a claim.', { has_original_claim: true }) } end end @@ -664,7 +664,7 @@ def setup }.to_json WebMock.stub_request(:get, pender_url).with(query: { url: link_url }).to_return(body: link_response) - link_media = Media.find_or_create_link_media(link_url, team, true) + link_media = Media.find_or_create_link_media(link_url, { team: team, has_original_claim: true }) assert_not_nil link_media.original_claim_hash assert_not_nil link_media.original_claim @@ -706,7 +706,7 @@ def setup WebMock.stub_request(:get, pender_url).with(query: { url: link_url }).to_return(body: link_response) assert_difference 'Link.count', 1 do - 2.times { Media.find_or_create_link_media(link_url, team) } + 2.times { Media.find_or_create_link_media(link_url, { team: team, has_original_claim: true }) } end end @@ -717,7 +717,7 @@ def setup audio_url = "http://example.com/#{file.path.split('/').last}" WebMock.stub_request(:get, audio_url).to_return(body: file.read, headers: { 'Content-Type' => 'audio/mp3' }) - uploaded_media = Media.find_or_create_uploaded_file_media(audio_url, 'UploadedAudio', true) + uploaded_media = Media.find_or_create_uploaded_file_media(audio_url, 'UploadedAudio', { has_original_claim: true }) assert_not_nil uploaded_media.original_claim_hash assert_not_nil uploaded_media.original_claim @@ -740,7 +740,7 @@ def setup WebMock.stub_request(:get, audio_url).to_return(body: file.read, headers: { 'Content-Type' => 'audio/mp3' }) assert_difference 'UploadedAudio.count', 1 do - 2.times { Media.find_or_create_uploaded_file_media(audio_url, 'UploadedAudio', true) } + 2.times { Media.find_or_create_uploaded_file_media(audio_url, 'UploadedAudio', { has_original_claim: true }) } end end end From d4ebac058e18bfaecca8673e04940bdc0bc77a09 Mon Sep 17 00:00:00 2001 From: manu vasconcelos Date: Fri, 21 Feb 2025 09:40:53 -0300 Subject: [PATCH 18/23] try to deal with codeclimate complains --- app/models/concerns/project_media_creators.rb | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/app/models/concerns/project_media_creators.rb b/app/models/concerns/project_media_creators.rb index 405d366aa..4833ba65f 100644 --- a/app/models/concerns/project_media_creators.rb +++ b/app/models/concerns/project_media_creators.rb @@ -55,7 +55,7 @@ def create_annotation def create_media! self.set_media_type if self.set_original_claim || self.media_type.blank? - media_type, media_content, additional_args = self.media_arguments + media_type, media_content, additional_args = *self.media_arguments self.media = Media.find_or_create_media_from_content(media_type, media_content, additional_args) end @@ -149,22 +149,22 @@ def media_arguments if original_claim case media_type when 'UploadedImage', 'UploadedVideo', 'UploadedAudio' - return media_type, original_claim, { has_original_claim: true } + [media_type, original_claim, { has_original_claim: true }] when 'Claim' - return media_type, original_claim, { has_original_claim: true } + [media_type, original_claim, { has_original_claim: true }] when 'Link' - return media_type, original_claim, { team: self.team, has_original_claim: true } + [media_type, original_claim, { team: self.team, has_original_claim: true }] end else case media_type when 'UploadedImage', 'UploadedVideo', 'UploadedAudio' - return media_type, self.file + [media_type, self.file] when 'Claim' - return media_type, self.quote, { quote_attributions: self.quote_attributions } + [media_type, self.quote, { quote_attributions: self.quote_attributions }] when 'Link' - return media_type, self.url, { team: self.team } + [media_type, self.url, { team: self.team }] when 'Blank' - return media_type + [media_type] end end end From 7507a0a2462b41aa8871b20d661dc76fce2ba473 Mon Sep 17 00:00:00 2001 From: manu vasconcelos Date: Fri, 21 Feb 2025 09:47:39 -0300 Subject: [PATCH 19/23] fix indentation --- app/models/concerns/project_media_creators.rb | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/app/models/concerns/project_media_creators.rb b/app/models/concerns/project_media_creators.rb index 4833ba65f..e0679852a 100644 --- a/app/models/concerns/project_media_creators.rb +++ b/app/models/concerns/project_media_creators.rb @@ -149,22 +149,22 @@ def media_arguments if original_claim case media_type when 'UploadedImage', 'UploadedVideo', 'UploadedAudio' - [media_type, original_claim, { has_original_claim: true }] + [media_type, original_claim, { has_original_claim: true }] when 'Claim' - [media_type, original_claim, { has_original_claim: true }] + [media_type, original_claim, { has_original_claim: true }] when 'Link' - [media_type, original_claim, { team: self.team, has_original_claim: true }] + [media_type, original_claim, { team: self.team, has_original_claim: true }] end else case media_type when 'UploadedImage', 'UploadedVideo', 'UploadedAudio' - [media_type, self.file] + [media_type, self.file] when 'Claim' - [media_type, self.quote, { quote_attributions: self.quote_attributions }] + [media_type, self.quote, { quote_attributions: self.quote_attributions }] when 'Link' - [media_type, self.url, { team: self.team }] + [media_type, self.url, { team: self.team }] when 'Blank' - [media_type] + [media_type] end end end From 00dd298035bad702dd6fdfaadd24e3c6034791e9 Mon Sep 17 00:00:00 2001 From: manu vasconcelos Date: Fri, 21 Feb 2025 09:51:45 -0300 Subject: [PATCH 20/23] try to make this a bit clearer --- app/models/concerns/project_media_creators.rb | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/app/models/concerns/project_media_creators.rb b/app/models/concerns/project_media_creators.rb index e0679852a..53da50cf0 100644 --- a/app/models/concerns/project_media_creators.rb +++ b/app/models/concerns/project_media_creators.rb @@ -133,12 +133,10 @@ def set_media_type else self.media_type = 'Link' end - elsif original_claim + elsif original_claim || self.quote.present? self.media_type = 'Claim' - elsif !self.url.blank? + elsif self.url.present? self.media_type = 'Link' - elsif !self.quote.blank? - self.media_type = 'Claim' end end From 09625ae37d883aa022f9473a760d4b723aa5187e Mon Sep 17 00:00:00 2001 From: manu vasconcelos Date: Mon, 24 Feb 2025 10:27:37 -0300 Subject: [PATCH 21/23] remove deprecated tests create_link is no longer in ProjectMediaCreators, only in Media. I think these two tests are already covered in media_test.rb --- .../concerns/project_media_creators_test.rb | 48 ------------------- 1 file changed, 48 deletions(-) delete mode 100644 test/models/concerns/project_media_creators_test.rb diff --git a/test/models/concerns/project_media_creators_test.rb b/test/models/concerns/project_media_creators_test.rb deleted file mode 100644 index 1cbf3a7a1..000000000 --- a/test/models/concerns/project_media_creators_test.rb +++ /dev/null @@ -1,48 +0,0 @@ -require 'test_helper' - -class FakeProjectMediaCreators - include ProjectMediaCreators - - def initialize(url) - @url = url - end - - def url - @url - end - - def team - nil - end -end - -class ProjectMediaCreatorsTest < ActiveSupport::TestCase - test "#create_link calls find with normalized URL" do - Team.current = nil - - Link.stubs(:normalized).returns('https://example.com/new') - Link.expects(:find_by).with(url: 'https://example.com/new').returns('fake link') - - fpc = FakeProjectMediaCreators.new('https://example.com/original') - assert_equal 'fake link', fpc.send(:create_link) - - Link.unstub(:normalized) - Link.unstub(:find_by) - end - - test "#create_link calls create with original URL, to let Pender normalize it" do - Team.current = nil - create_url = nil - - Link.stubs(:find_by).returns(nil) - Link.stubs(:normalized).returns('https://example.com/new') - Link.expects(:create).with(url: 'https://example.com/original', pender_key: nil).returns('fake link') - - fpc = FakeProjectMediaCreators.new('https://example.com/original') - assert_equal 'fake link', fpc.send(:create_link) - - Link.unstub(:find_by) - Link.unstub(:normalized) - Link.unstub(:create) - end -end From d47e1efe66a4f1aa7beed26f98f5c9708fd0e4d4 Mon Sep 17 00:00:00 2001 From: manu vasconcelos Date: Wed, 26 Feb 2025 14:52:23 -0300 Subject: [PATCH 22/23] clean up --- app/models/media.rb | 4 ---- 1 file changed, 4 deletions(-) diff --git a/app/models/media.rb b/app/models/media.rb index 81d66c911..094927f58 100644 --- a/app/models/media.rb +++ b/app/models/media.rb @@ -1,5 +1,3 @@ -require 'byebug' - class Media < ApplicationRecord include AnnotationBase::Association @@ -134,8 +132,6 @@ def self.download_file(url, ext) file end - # we set it to UploadedImage by default, should we? - # def self.create_with_file(project_media, media_type = 'UploadedImage') def self.find_or_create_uploaded_file_media(file_media, media_type, additional_args = {}) has_original_claim = additional_args&.fetch(:has_original_claim, nil) klass = media_type.constantize From 5b9944003d74ae2d452abf0ed718c91022e94645 Mon Sep 17 00:00:00 2001 From: manu vasconcelos Date: Wed, 26 Feb 2025 15:07:44 -0300 Subject: [PATCH 23/23] replace ProjectMedia.create by ProjectMedia.create! To make it more verbose if it fails. --- test/controllers/graphql_controller_12_test.rb | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/controllers/graphql_controller_12_test.rb b/test/controllers/graphql_controller_12_test.rb index 44e317ee3..dc72eb70b 100644 --- a/test/controllers/graphql_controller_12_test.rb +++ b/test/controllers/graphql_controller_12_test.rb @@ -745,7 +745,7 @@ def teardown t = create_team p = create_project team: t - pm = ProjectMedia.create(team: t, set_original_claim: url) + pm = ProjectMedia.create!(team: t, set_original_claim: url) assert_not_nil pm @@ -807,7 +807,7 @@ def teardown t.settings[:languages] << 'pt' t.save! p = create_project team: t - pm = ProjectMedia.create(team: t, set_original_claim: url) + pm = ProjectMedia.create!(team: t, set_original_claim: url) c = create_claim_description project_media: pm fc_1 = create_fact_check claim_description: c @@ -870,7 +870,7 @@ def teardown WebMock.stub_request(:get, pender_url).with({ query: { url: url } }).to_return(body: response_body) t = create_team - pm = ProjectMedia.create(team: t, set_original_claim: url) + pm = ProjectMedia.create!(team: t, set_original_claim: url) cd = create_claim_description project_media: pm fc = create_fact_check claim_description: cd