Skip to content

Commit

Permalink
Merge pull request #1024 from DaanVanVugt/bugfix/osci_ingestor
Browse files Browse the repository at this point in the history
osci ingestor fix
  • Loading branch information
fbacall authored Sep 16, 2024
2 parents 5dd11ff + dcb79da commit 15cfa29
Show file tree
Hide file tree
Showing 3 changed files with 87 additions and 138 deletions.
37 changes: 17 additions & 20 deletions lib/ingestors/osci_ingestor.rb
Original file line number Diff line number Diff line change
Expand Up @@ -25,39 +25,36 @@ def read(url)

private

def process_osci(url)
def process_osci(_url)
month = Time.zone.now.month
year = Time.zone.now.year
(1..12).each do |i|
unless Rails.env.test? and File.exist?('test/vcr_cassettes/ingestors/osci.yml')
sleep(1)
end
sleep(1) unless Rails.env.test? and File.exist?('test/vcr_cassettes/ingestors/osci.yml')
scrape_url = "https://osc-international.com/my-calendar/?format=calendar&month=#{i}&yr=#{i >= month ? year : year + 1}"
event_page = Nokogiri::HTML5.parse(open_url(scrape_url.to_s, raise: true)).css("div[id='my-calendar']")[0].css("tbody")[0].css("td")
event_page = Nokogiri::HTML5.parse(open_url(scrape_url.to_s, raise: true)).css('#my-calendar > .mc-content > table.my-calendar-table > tbody > tr > td')
event_page.each do |event_data|
next if event_data.get_attribute('class').include?('no-events')

event_cal = event_data.css("div[id*=calendar-my-calendar]")
event_cal = event_data.css('article.calendar-event')
event_cal.each do |boop|
event = OpenStruct.new
el = boop.css("h3[class='event-title summary']")[0]
url_str = el.css("a")[0].get_attribute('href')
el = boop.css('div.details')
url_str = el.css('a')[0].get_attribute('href')
event.url = scrape_url + url_str

el2 = boop.css("div[id='#{url_str.gsub('#', '')}']")[0]
event.title = el2.css("h4[class='mc-title']")[0].text.strip
event.venue = el2.css("div[class='mc-location']")[0].css("strong[class='location-link']")[0].text.strip
event.title = el.css('h4.mc-title')[0].text.strip
event.venue = el.css('.mc-location')[0].css('strong.location-link')[0].text.strip

if el2.css("div[class='time-block']")[0].css("span[class='event-time dtstart']").count.positive?
event.start = Time.zone.parse(el2.css("div[class='time-block']")[0].css("span[class='event-time dtstart']")[0].css("time")[0].get_attribute('datetime'))
event.end = Time.zone.parse(el2.css("div[class='time-block']")[0].css("span[class='end-time dtend']")[0].css("time")[0].get_attribute('datetime'))
if el.css('.time-block > span.event-time.dtstart').count.positive?
event.start = Time.zone.parse(el.css(".time-block']")[0].css('span.event-time.dtstart')[0].css('time')[0].get_attribute('datetime'))
event.end = Time.zone.parse(el.css('.time-block')[0].css('span.end-time.dtend')[0].css('time')[0].get_attribute('datetime'))
else
event.start = Time.zone.parse(el2.css("div[class='time-block']")[0].css("span[class='mc-start-date dtstart']")[0].get_attribute('content'))
if el2.css("div[class='time-block']")[0].css("span[class='event-time dtend']").count.positive?
event.end = Time.zone.parse(el2.css("div[class='time-block']")[0].css("span[class='event-time dtend']")[0].text.strip)
else
event.end = event.start
end
event.start = Time.zone.parse(el.css('.time-block')[0].css('span.mc-start-date.dtstart')[0].get_attribute('content'))
event.end = if el.css('.time-block')[0].css('span.event-time.dtend').count.positive?
Time.zone.parse(el.css('.time-block')[0].css('span.event-time.dtend')[0].text.strip)
else
event.start
end
end

# parsed datetimes are always 2 hours off
Expand Down
18 changes: 9 additions & 9 deletions test/unit/ingestors/osci_ingestor_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,14 @@ class OsciIngestorTest < ActiveSupport::TestCase
ingestor = Ingestors::OsciIngestor.new

# check event doesn't
new_title = '14:00: Open Science Coffee: Assessing robustness through multiverse analysis – Applications in research and education'
new_url = 'https://osc-international.com/my-calendar/?format=calendar&month=9&yr=2023#mc_calendar_03_2-calendar-details-my-calendar'
new_title = '12:00: Community Building for Citizen Science'
new_url = 'https://osc-international.com/my-calendar/?format=calendar&month=9&yr=2024https://osc-international.com/mc-locations/vu-library-main-building/'

refute Event.where(title: new_title, url: new_url).any?

# run task
assert_difference 'Event.count', 17 do
freeze_time(2023) do
assert_difference 'Event.count', 18 do
freeze_time(2024) do
VCR.use_cassette('ingestors/osci') do
ingestor.read(source.url)
ingestor.write(@user, @content_provider)
Expand All @@ -39,8 +39,8 @@ class OsciIngestorTest < ActiveSupport::TestCase

assert_equal 18, ingestor.events.count
assert ingestor.materials.empty?
assert_equal 17, ingestor.stats[:events][:added]
assert_equal 1, ingestor.stats[:events][:updated]
assert_equal 18, ingestor.stats[:events][:added]
assert_equal 0, ingestor.stats[:events][:updated]
assert_equal 0, ingestor.stats[:events][:rejected]

# check event does exist
Expand All @@ -52,8 +52,8 @@ class OsciIngestorTest < ActiveSupport::TestCase
# check other fields
assert_equal 'OSCI', event.source
assert_equal 'Amsterdam', event.timezone
assert_equal Time.zone.parse('Sun, 03 Sep 2023 14:00:00.000000000 UTC +00:00'), event.start
assert_equal Time.zone.parse('Sun, 03 Sep 2023 15:00:00.000000000 UTC +00:00'), event.end
assert_equal 'OSC Leiden', event.venue
assert_equal Time.zone.parse('Thu, 26 Sep 2024 12:00:00.000000000 UTC +00:00'), event.start
assert_equal Time.zone.parse('Thu, 26 Sep 2024 12:00:00.000000000 UTC +00:00'), event.end
assert_equal 'VU Library, Main building', event.venue
end
end
170 changes: 61 additions & 109 deletions test/vcr_cassettes/ingestors/osci.yml

Large diffs are not rendered by default.

0 comments on commit 15cfa29

Please sign in to comment.