Skip to content

Commit 3c8739c

Browse files
committed
finish import global transparency data
1 parent c7fa6ad commit 3c8739c

File tree

1 file changed

+51
-4
lines changed

1 file changed

+51
-4
lines changed

lib/tasks/load_transparencyreport_data.rake

+51-4
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,9 @@ namespace :load_transparencyreport_data do
22
require 'csv'
33
require 'open-uri'
44

5-
desc "Loading transparencyreport data"
5+
desc "Loading transparencyreport data: content removal request"
66
task :load_content_removal_requests => :environment do
7-
#csv_text = File.read('https://www.google.com/transparencyreport/governmentrequests/google-content-removal-requests-by-product-and-reason.csv')
7+
# Read in XML data
88
c = CSV.parse(open('https://www.google.com/transparencyreport/governmentrequests/google-content-removal-requests.csv'), :headers => true)
99
c.each do |r|
1010
period_end = Date.strptime(r[0], '%m/%d/%Y')
@@ -13,14 +13,61 @@ namespace :load_transparencyreport_data do
1313
:period_end => period_end,
1414
:percentage_complied => r[4]
1515
)
16-
#puts r[2], Country.find_by_code(r[2])
1716
e.country_id = Country.find_by_code(r[2]).id
1817
e.save!
1918
end
2019
end
2120

21+
desc "Loading transparencyreport data: content removal request by product"
22+
task :load_content_removal_requests_by_product => :environment do
23+
# Create reason 'undefined' (since this dataset doesn't contain reasons)
24+
undefined = Reason.find_or_create_by_name('Undefined')
25+
# Read in XML data
26+
c = CSV.parse(open('https://www.google.com/transparencyreport/governmentrequests/google-content-removal-requests-by-product.csv'), :headers => true)
27+
c.each do |r|
28+
# Only add entries prior from 2010-12-31
29+
if Date.strptime(r[0], '%m/%d/%Y') < Date.new(2010,12,31)
30+
e = ContentRemovalRequest.new(
31+
:court_orders => r[4],
32+
:executive => r[5],
33+
:items => r[6]
34+
)
35+
e.content_removal_request_period = ContentRemovalRequestPeriod.joins(:country).where('countries.code' => r[2]).find_by_period_end(Date.strptime(r[0], '%m/%d/%Y'))
36+
e.product = Product.find_or_create_by_name(r[3])
37+
e.reason = undefined
38+
e.save!
39+
end
40+
end
41+
end
42+
43+
desc "Loading transparencyreport data: content removal request by product and reason"
44+
task :load_content_removal_requests_by_product_and_reason => :environment do
45+
# Read in XML data
46+
c = CSV.parse(open('https://www.google.com/transparencyreport/governmentrequests/google-content-removal-requests-by-product-and-reason.csv'), :headers => true)
47+
c.each do |r|
48+
period = ContentRemovalRequestPeriod.joins(:country).where('countries.code' => r[2]).find_by_period_end(Date.strptime(r[0], '%m/%d/%Y'))
49+
# Delete superseded data
50+
#puts period.content_removal_requests.joins(:product).where('products.name' => r[3]).count
51+
#period.content_removal_requests.joins(:product).where('products.name' => r[3]).delete_all
52+
# Create new data
53+
e = ContentRemovalRequest.new(
54+
:court_orders => r[5],
55+
:executive => r[6],
56+
:items => r[7]
57+
)
58+
e.content_removal_request_period = period
59+
e.product = Product.find_or_create_by_name(r[3])
60+
e.reason = Reason.find_or_create_by_name(r[4])
61+
e.save!
62+
end
63+
end
64+
2265
desc "Run all transparencyreport data tasks"
23-
task :all => [:load_content_removal_requests]
66+
task :all => [
67+
:load_content_removal_requests,
68+
:load_content_removal_requests_by_product,
69+
:load_content_removal_requests_by_product_and_reason
70+
]
2471

2572
end
2673

0 commit comments

Comments
 (0)