@@ -2,9 +2,9 @@ namespace :load_transparencyreport_data do
2
2
require 'csv'
3
3
require 'open-uri'
4
4
5
- desc "Loading transparencyreport data"
5
+ desc "Loading transparencyreport data: content removal request "
6
6
task :load_content_removal_requests => :environment do
7
- #csv_text = File.read('https://www.google.com/transparencyreport/governmentrequests/google-content-removal-requests-by-product-and-reason.csv')
7
+ # Read in XML data
8
8
c = CSV . parse ( open ( 'https://www.google.com/transparencyreport/governmentrequests/google-content-removal-requests.csv' ) , :headers => true )
9
9
c . each do |r |
10
10
period_end = Date . strptime ( r [ 0 ] , '%m/%d/%Y' )
@@ -13,14 +13,61 @@ namespace :load_transparencyreport_data do
13
13
:period_end => period_end ,
14
14
:percentage_complied => r [ 4 ]
15
15
)
16
- #puts r[2], Country.find_by_code(r[2])
17
16
e . country_id = Country . find_by_code ( r [ 2 ] ) . id
18
17
e . save!
19
18
end
20
19
end
21
20
21
+ desc "Loading transparencyreport data: content removal request by product"
22
+ task :load_content_removal_requests_by_product => :environment do
23
+ # Create reason 'undefined' (since this dataset doesn't contain reasons)
24
+ undefined = Reason . find_or_create_by_name ( 'Undefined' )
25
+ # Read in XML data
26
+ c = CSV . parse ( open ( 'https://www.google.com/transparencyreport/governmentrequests/google-content-removal-requests-by-product.csv' ) , :headers => true )
27
+ c . each do |r |
28
+ # Only add entries prior from 2010-12-31
29
+ if Date . strptime ( r [ 0 ] , '%m/%d/%Y' ) < Date . new ( 2010 , 12 , 31 )
30
+ e = ContentRemovalRequest . new (
31
+ :court_orders => r [ 4 ] ,
32
+ :executive => r [ 5 ] ,
33
+ :items => r [ 6 ]
34
+ )
35
+ e . content_removal_request_period = ContentRemovalRequestPeriod . joins ( :country ) . where ( 'countries.code' => r [ 2 ] ) . find_by_period_end ( Date . strptime ( r [ 0 ] , '%m/%d/%Y' ) )
36
+ e . product = Product . find_or_create_by_name ( r [ 3 ] )
37
+ e . reason = undefined
38
+ e . save!
39
+ end
40
+ end
41
+ end
42
+
43
+ desc "Loading transparencyreport data: content removal request by product and reason"
44
+ task :load_content_removal_requests_by_product_and_reason => :environment do
45
+ # Read in XML data
46
+ c = CSV . parse ( open ( 'https://www.google.com/transparencyreport/governmentrequests/google-content-removal-requests-by-product-and-reason.csv' ) , :headers => true )
47
+ c . each do |r |
48
+ period = ContentRemovalRequestPeriod . joins ( :country ) . where ( 'countries.code' => r [ 2 ] ) . find_by_period_end ( Date . strptime ( r [ 0 ] , '%m/%d/%Y' ) )
49
+ # Delete superseded data
50
+ #puts period.content_removal_requests.joins(:product).where('products.name' => r[3]).count
51
+ #period.content_removal_requests.joins(:product).where('products.name' => r[3]).delete_all
52
+ # Create new data
53
+ e = ContentRemovalRequest . new (
54
+ :court_orders => r [ 5 ] ,
55
+ :executive => r [ 6 ] ,
56
+ :items => r [ 7 ]
57
+ )
58
+ e . content_removal_request_period = period
59
+ e . product = Product . find_or_create_by_name ( r [ 3 ] )
60
+ e . reason = Reason . find_or_create_by_name ( r [ 4 ] )
61
+ e . save!
62
+ end
63
+ end
64
+
22
65
desc "Run all transparencyreport data tasks"
23
- task :all => [ :load_content_removal_requests ]
66
+ task :all => [
67
+ :load_content_removal_requests ,
68
+ :load_content_removal_requests_by_product ,
69
+ :load_content_removal_requests_by_product_and_reason
70
+ ]
24
71
25
72
end
26
73
0 commit comments