Skip to content

Commit

Permalink
Merge pull request #888 from DaanVanVugt/feature/dcc_ingestor
Browse files Browse the repository at this point in the history
dcc ingestor
  • Loading branch information
fbacall authored Sep 11, 2023
2 parents 9c20460 + 866976b commit 4bb1319
Show file tree
Hide file tree
Showing 4 changed files with 161 additions and 0 deletions.
52 changes: 52 additions & 0 deletions lib/ingestors/dcc_ingestor.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
require 'open-uri'
require 'csv'
require 'nokogiri'

module Ingestors
class DccIngestor < Ingestor
def self.config
{
key: 'dcc_event',
title: 'DCC Events API',
category: :events
}
end

def read(url)
begin
process_dcc(url)
rescue Exception => e
@messages << "#{self.class.name} failed with: #{e.message}"
end

# finished
nil
end

private

def process_dcc(url)
event_page = Nokogiri::HTML5.parse(open_url(url.to_s, raise: true)).css("div[class='archive__content grid']")[0].css("div[class='column span-4-sm span-8-md span-6-lg']")
event_page.each do |event_data|
event = OpenStruct.new

event.url = event_data.css("h2[class='post-item__title h5']")[0].css("a")[0].get_attribute('href')
event.title = event_data.css("h2[class='post-item__title h5']")[0].css("a")[0].text.strip

start_str = event_data.css("ul[class='post-item__meta']")[0].css("li")[0].text.strip.split('—')
event.start = Time.zone.parse(start_str[0])
event.end = Time.zone.parse(start_str[0]).beginning_of_day + Time.zone.parse(start_str[1]).seconds_since_midnight.seconds

event.venue = event_data.css("ul[class='post-item__meta']")[0].css("li")[1].text.strip

event.source = 'DCC'
event.timezone = 'Amsterdam'
event.set_default_times

add_event(event)
rescue Exception => e
@messages << "Extract event fields failed with: #{e.message}"
end
end
end
end
1 change: 1 addition & 0 deletions lib/ingestors/ingestor_factory.rb
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ def self.ingestors
Ingestors::UhasseltIngestor,
Ingestors::OdisseiIngestor,
Ingestors::RstIngestor,
Ingestors::DccIngestor,
]
end

Expand Down
58 changes: 58 additions & 0 deletions test/unit/ingestors/dcc_ingestor.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
require 'test_helper'

class DccIngestorTest < ActiveSupport::TestCase
setup do
@user = users(:regular_user)
@content_provider = content_providers(:another_portal_provider)
mock_ingestions
mock_timezone # System time zone should not affect test result
end

teardown do
reset_timezone
end

test 'can ingest events from dcc' do
source = @content_provider.sources.build(
url: 'https://dcc-po.nl/agenda/',
method: 'dcc',
enabled: true
)

ingestor = Ingestors::DccIngestor.new

# check event doesn't
new_title = "DCC-PO dag"
new_url = 'https://dcc-po.nl/agenda/dcc-po-dag/'
refute Event.where(title: new_title, url: new_url).any?

# run task
assert_difference 'Event.count', 1 do
freeze_time(2019) do
VCR.use_cassette("ingestors/dcc") do
ingestor.read(source.url)
ingestor.write(@user, @content_provider)
end
end
end

assert_equal 1, ingestor.events.count
assert ingestor.materials.empty?
assert_equal 1, ingestor.stats[:events][:added]
assert_equal 0, ingestor.stats[:events][:updated]
assert_equal 0, ingestor.stats[:events][:rejected]

# check event does exist
event = Event.where(title: new_title, url: new_url).first
assert event
assert_equal new_title, event.title
assert_equal new_url, event.url

# check other fields
assert_equal 'DCC', event.source
assert_equal 'Amsterdam', event.timezone
assert_equal Time.zone.parse('Mon, 09 Oct 2019 10:00:00.000000000 UTC +00:00'), event.start
assert_equal Time.zone.parse('Mon, 09 Oct 2019 16:30:00.000000000 UTC +00:00'), event.end
assert_equal 'Domstad, Utrecht', event.venue
end
end
50 changes: 50 additions & 0 deletions test/vcr_cassettes/ingestors/dcc.yml

Large diffs are not rendered by default.

0 comments on commit 4bb1319

Please sign in to comment.