Skip to content

Commit

Permalink
Merge branch 'master' into feature/sense_scraper
Browse files Browse the repository at this point in the history
  • Loading branch information
fbacall authored Sep 12, 2023
2 parents 3ded2cf + daf0f1f commit 4620758
Show file tree
Hide file tree
Showing 11 changed files with 856 additions and 13 deletions.
2 changes: 1 addition & 1 deletion app/helpers/collections_helper.rb
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,6 @@ def item_order_badge(collection_item)
end

def item_comment(collection_item)
content_tag(:blockquote, collection_item.comment, class: 'collection-item-comment')
content_tag(:blockquote, collection_item.comment, class: 'collection-item-comment') unless collection_item&.comment.blank?
end
end
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
<h4 class="nav-heading">Type</h4>
<div class="nav-block">
<%= content_provider.content_provider_type %>
</div>
<% unless content_provider.keywords.blank? %>
<% unless TeSS::Config.feature['content_providers_disabled'].include?('type') %>
<h4 class="nav-heading">Type</h4>
<div class="nav-block">
<%= content_provider.content_provider_type %>
</div>
<% end %>
<% unless content_provider.keywords.blank? || TeSS::Config.feature['content_providers_disabled'].include?('keywords') %>
<h4 class="nav-heading">Keywords</h4>
<div class="nav-block">
<%= content_provider.keywords.join(', ') %>
Expand All @@ -21,7 +23,7 @@
</p>
</div>
<% end %>
<% unless @content_provider.contact.blank? %>
<% unless @content_provider.contact.blank? || TeSS::Config.feature['content_providers_disabled'].include?('contact') %>
<h4 class="nav-heading">Contact</h4>
<div class="nav-block">
<%= content_provider.contact %>
Expand All @@ -33,11 +35,13 @@
<%= render partial: 'content_providers/partials/twitter_card',
locals: { content_provider: @content_provider } %>
<h4 class="nav-heading">Owner</h4>
<div class="nav-block">
<%= link_to content_provider.user.name, content_provider.user %>
</div>
<% if content_provider.editors.any? %>
<% unless TeSS::Config.feature['content_providers_disabled'].include?('owner') %>
<h4 class="nav-heading">Owner</h4>
<div class="nav-block">
<%= link_to content_provider.user.name, content_provider.user %>
</div>
<% end %>
<% unless !content_provider.editors.any? || TeSS::Config.feature['content_providers_disabled'].include?('editors') %>
<h4 class="nav-heading"><%= ContentProvider.human_attribute_name(:editors) %></h4>
<% content_provider.editors.each do |editor| %>
<div class="nav-block">
Expand Down
1 change: 1 addition & 0 deletions config/tess.example.yml
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ default: &default
# other_types, subsets, syllabus, approved_editors address_finder
disabled: ['ardc_fields_of_research', 'other_types', 'subsets', 'syllabus', 'approved_editors']
materials_disabled: []
content_providers_disabled: []
restrict_content_provider_selection: false
user_ingestion_methods: ['bioschemas']
placeholder:
Expand Down
52 changes: 52 additions & 0 deletions lib/ingestors/dcc_ingestor.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
require 'open-uri'
require 'csv'
require 'nokogiri'

module Ingestors
class DccIngestor < Ingestor
def self.config
{
key: 'dcc_event',
title: 'DCC Events API',
category: :events
}
end

def read(url)
begin
process_dcc(url)
rescue Exception => e
@messages << "#{self.class.name} failed with: #{e.message}"
end

# finished
nil
end

private

def process_dcc(url)
event_page = Nokogiri::HTML5.parse(open_url(url.to_s, raise: true)).css("div[class='archive__content grid']")[0].css("div[class='column span-4-sm span-8-md span-6-lg']")
event_page.each do |event_data|
event = OpenStruct.new

event.url = event_data.css("h2[class='post-item__title h5']")[0].css("a")[0].get_attribute('href')
event.title = event_data.css("h2[class='post-item__title h5']")[0].css("a")[0].text.strip

start_str = event_data.css("ul[class='post-item__meta']")[0].css("li")[0].text.strip.split('—')
event.start = Time.zone.parse(start_str[0])
event.end = Time.zone.parse(start_str[0]).beginning_of_day + Time.zone.parse(start_str[1]).seconds_since_midnight.seconds

event.venue = event_data.css("ul[class='post-item__meta']")[0].css("li")[1].text.strip

event.source = 'DCC'
event.timezone = 'Amsterdam'
event.set_default_times

add_event(event)
rescue Exception => e
@messages << "Extract event fields failed with: #{e.message}"
end
end
end
end
4 changes: 3 additions & 1 deletion lib/ingestors/ingestor_factory.rb
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,9 @@ def self.ingestors
Ingestors::UhasseltIngestor,
Ingestors::OdisseiIngestor,
Ingestors::RstIngestor,
Ingestors::SenseIngestor,
Ingestors::OsciIngestor,
Ingestors::DccIngestor,
Ingestors::SenseIngestor
]
end

Expand Down
79 changes: 79 additions & 0 deletions lib/ingestors/osci_ingestor.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
require 'open-uri'
require 'csv'
require 'nokogiri'

module Ingestors
class OsciIngestor < Ingestor
def self.config
{
key: 'osci',
title: 'OSCI Events API',
category: :events
}
end

def read(url)
begin
process_osci(url)
rescue Exception => e
@messages << "#{self.class.name} failed with: #{e.message}"
end

# finished
nil
end

private

def process_osci(url)
month = Time.zone.now.month
year = Time.zone.now.year
(1..12).each do |i|
unless Rails.env.test? and File.exist?('test/vcr_cassettes/ingestors/osci.yml')
sleep(1)
end
scrape_url = "https://osc-international.com/my-calendar/?format=calendar&month=#{i}&yr=#{i >= month ? year : year + 1}"
event_page = Nokogiri::HTML5.parse(open_url(scrape_url.to_s, raise: true)).css("div[id='my-calendar']")[0].css("tbody")[0].css("td")
event_page.each do |event_data|
next if event_data.get_attribute('class').include?('no-events')

beep = event_data.css("div[id*=calendar-my-calendar]")
beep.each do |boop|
event = OpenStruct.new
el = boop.css("h3[class='event-title summary']")[0]
url_str = el.css("a")[0].get_attribute('href')
event.url = scrape_url + url_str

el2 = boop.css("div[id='#{url_str.gsub('#', '')}']")[0]
event.title = el2.css("h4[class='mc-title']")[0].text.strip
event.venue = el2.css("div[class='mc-location']")[0].css("strong[class='location-link']")[0].text.strip

if el2.css("div[class='time-block']")[0].css("span[class='event-time dtstart']").count.positive?
event.start = Time.zone.parse(el2.css("div[class='time-block']")[0].css("span[class='event-time dtstart']")[0].css("time")[0].get_attribute('datetime'))
event.end = Time.zone.parse(el2.css("div[class='time-block']")[0].css("span[class='end-time dtend']")[0].css("time")[0].get_attribute('datetime'))
else
event.start = Time.zone.parse(el2.css("div[class='time-block']")[0].css("span[class='mc-start-date dtstart']")[0].get_attribute('content'))
if el2.css("div[class='time-block']")[0].css("span[class='event-time dtend']").count.positive?
event.end = Time.zone.parse(el2.css("div[class='time-block']")[0].css("span[class='event-time dtend']")[0].text.strip)
else
event.end = event.start
end
end

# parsed datetimes are always 2 hours off
event.start += 2.hours
event.end += 2.hours

event.source = 'OSCI'
event.timezone = 'Amsterdam'
event.set_default_times

add_event(event)
end
rescue Exception => e
@messages << "Extract event fields failed with: #{e.message}"
end
end
end
end
end
31 changes: 31 additions & 0 deletions test/controllers/content_providers_controller_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -498,4 +498,35 @@ class ContentProvidersControllerTest < ActionController::TestCase
assert_select '.masonry-brick .markdown-description strong', count: 1
assert_select '.masonry-brick .markdown-description p'
end

test 'should hide disabled fields' do
sign_in users(:regular_user)
@content_provider.add_editor(users(:another_regular_user))
assert @content_provider.update(keywords: ['Science'])

get :show, params: { id: @content_provider }
assert_response :success
assert_select '#sidebar' do
assert_select '.nav-heading', text: 'Type', count: 1
assert_select '.nav-heading', text: 'ELIXIR node', count: 1
assert_select '.nav-heading', text: 'Keywords', count: 1
assert_select '.nav-heading', text: 'Contact', count: 1
assert_select '.nav-heading', text: 'Owner', count: 1
assert_select '.nav-heading', text: 'Editors', count: 1
end

with_settings(feature: { nodes: false,
content_providers_disabled: ['type', 'keywords', 'contact', 'owner', 'editors'] }) do
get :show, params: { id: @content_provider }
assert_response :success
assert_select '#sidebar' do
assert_select '.nav-heading', text: 'Type', count: 0
assert_select '.nav-heading', text: 'ELIXIR node', count: 0
assert_select '.nav-heading', text: 'Keywords', count: 0
assert_select '.nav-heading', text: 'Contact', count: 0
assert_select '.nav-heading', text: 'Owner', count: 0
assert_select '.nav-heading', text: 'Editors', count: 0
end
end
end
end
58 changes: 58 additions & 0 deletions test/unit/ingestors/dcc_ingestor.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
require 'test_helper'

class DccIngestorTest < ActiveSupport::TestCase
setup do
@user = users(:regular_user)
@content_provider = content_providers(:another_portal_provider)
mock_ingestions
mock_timezone # System time zone should not affect test result
end

teardown do
reset_timezone
end

test 'can ingest events from dcc' do
source = @content_provider.sources.build(
url: 'https://dcc-po.nl/agenda/',
method: 'dcc',
enabled: true
)

ingestor = Ingestors::DccIngestor.new

# check event doesn't
new_title = "DCC-PO dag"
new_url = 'https://dcc-po.nl/agenda/dcc-po-dag/'
refute Event.where(title: new_title, url: new_url).any?

# run task
assert_difference 'Event.count', 1 do
freeze_time(2019) do
VCR.use_cassette("ingestors/dcc") do
ingestor.read(source.url)
ingestor.write(@user, @content_provider)
end
end
end

assert_equal 1, ingestor.events.count
assert ingestor.materials.empty?
assert_equal 1, ingestor.stats[:events][:added]
assert_equal 0, ingestor.stats[:events][:updated]
assert_equal 0, ingestor.stats[:events][:rejected]

# check event does exist
event = Event.where(title: new_title, url: new_url).first
assert event
assert_equal new_title, event.title
assert_equal new_url, event.url

# check other fields
assert_equal 'DCC', event.source
assert_equal 'Amsterdam', event.timezone
assert_equal Time.zone.parse('Mon, 09 Oct 2019 10:00:00.000000000 UTC +00:00'), event.start
assert_equal Time.zone.parse('Mon, 09 Oct 2019 16:30:00.000000000 UTC +00:00'), event.end
assert_equal 'Domstad, Utrecht', event.venue
end
end
59 changes: 59 additions & 0 deletions test/unit/ingestors/osci_ingestor_test.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
require 'test_helper'

class OsciIngestorTest < ActiveSupport::TestCase
setup do
@user = users(:regular_user)
@content_provider = content_providers(:another_portal_provider)
mock_ingestions
mock_timezone # System time zone should not affect test result
end

teardown do
reset_timezone
end

test 'can ingest events from osci' do
source = @content_provider.sources.build(
url: 'https://osc-international.com/my-calendar/',
method: 'osci',
enabled: true
)

ingestor = Ingestors::OsciIngestor.new

# check event doesn't
new_title = "14:00: Open Science Coffee: Assessing robustness through multiverse analysis – Applications in research and education"
new_url = 'https://osc-international.com/my-calendar/?format=calendar&month=9&yr=2023#mc_calendar_03_2-calendar-details-my-calendar'

refute Event.where(title: new_title, url: new_url).any?

# run task
assert_difference 'Event.count', 12 do
freeze_time(2023) do
VCR.use_cassette("ingestors/osci") do
ingestor.read(source.url)
ingestor.write(@user, @content_provider)
end
end
end

assert_equal 18, ingestor.events.count
assert ingestor.materials.empty?
assert_equal 12, ingestor.stats[:events][:added]
assert_equal 6, ingestor.stats[:events][:updated]
assert_equal 0, ingestor.stats[:events][:rejected]

# check event does exist
event = Event.where(title: new_title, url: new_url).first
assert event
assert_equal new_title, event.title
assert_equal new_url, event.url

# check other fields
assert_equal 'OSCI', event.source
assert_equal 'Amsterdam', event.timezone
assert_equal Time.zone.parse('Sun, 03 Sep 2023 14:00:00.000000000 UTC +00:00'), event.start
assert_equal Time.zone.parse('Sun, 03 Sep 2023 15:00:00.000000000 UTC +00:00'), event.end
assert_equal 'OSC Leiden', event.venue
end
end
Loading

0 comments on commit 4620758

Please sign in to comment.