@@ -34,6 +34,15 @@ def self.crawl( api_key = nil )
34
34
end
35
35
36
36
37
+ def self . recrawl
38
+ api_key = fetch_api_key
39
+ return if api_key . to_s . empty?
40
+
41
+ Product . where ( language : Product ::A_LANGUAGE_RUST ) . to_a . each do |prod |
42
+ crawl_package ( prod [ :prod_key ] , api_key , false )
43
+ end
44
+ end
45
+
37
46
def self . crawl_product_list ( api_key , page_nr = 1 , per_page = 100 )
38
47
logger . info "going to crawl product list from #{ page_nr } , per_page: #{ per_page } "
39
48
@@ -86,9 +95,22 @@ def self.crawl_package(product_id, api_key = nil, ignore_existing = true)
86
95
87
96
def self . process_versions ( product_db , product_doc , api_key , ignore_existing )
88
97
owners = fetch_product_owners ( api_key , product_db . prod_key )
89
- product_license = product_doc [ :crate ] [ :license ] . to_s
98
+ if product_doc . nil?
99
+ logger . error "process_versions: product json response cant be empty"
100
+ return
101
+ end
102
+
103
+ if product_doc . has_key? ( :versions ) == false
104
+ logger . error "process_versions: product json has no :verisons field\n #{ product_doc } "
105
+ end
106
+
90
107
product_doc [ :versions ] . each do |version_doc |
91
108
version_num = version_doc [ :num ] . to_s . strip
109
+ if version_num . empty?
110
+ log . error "process_versions: version_doc has no :num field\n #{ version_doc } "
111
+ next
112
+ end
113
+
92
114
if !product_db . version_by_number ( version_num ) . nil? && ignore_existing
93
115
logger . info "process_versions: #{ product_db . prod_key } :#{ version_num } exist already"
94
116
next
@@ -100,7 +122,7 @@ def self.process_versions( product_db, product_doc, api_key, ignore_existing )
100
122
next
101
123
end
102
124
103
- upsert_version_licenses ( product_db , version_db . version , product_license )
125
+ upsert_version_licenses ( product_db , version_db . version , version_doc [ :license ] )
104
126
upsert_version_links ( product_db , version_db . version , product_doc [ :crate ] )
105
127
upsert_version_archive ( product_db , version_db . version , version_doc [ :dl_path ] )
106
128
upset_version_devs ( product_db , version_db , owners )
@@ -123,7 +145,7 @@ def self.crawl_dependencies( product_db, version_db, api_key )
123
145
end
124
146
125
147
logger . info "crawl_dependencies: fetching version details for #{ product_db . prod_key } - #{ version } "
126
- dep_docs = fetch_version_dependencies ( api_key , product_db [ : prod_key] , version )
148
+ dep_docs = fetch_version_dependencies ( api_key , product_db . prod_key , version )
127
149
dep_docs . to_a . each do |dep_doc |
128
150
upsert_product_dependency ( product_db , version , dep_doc )
129
151
end
@@ -133,6 +155,11 @@ def self.crawl_dependencies( product_db, version_db, api_key )
133
155
#-- persistance helpers
134
156
135
157
def self . upsert_product ( product_doc )
158
+ if product_doc . nil?
159
+ logger . error "upsert_product: API response had no :crate subdocument"
160
+ return
161
+ end
162
+
136
163
prod_key = product_doc [ :id ] . to_s . strip
137
164
prod_key_dc = prod_key . downcase
138
165
product_db = Product . where (
@@ -216,6 +243,11 @@ def self.upsert_product_owner(product_db, owner_doc, version_num)
216
243
217
244
218
245
def self . upsert_version_licenses ( product_db , version_label , license_label )
246
+ if license_label . nil? or license_label . empty?
247
+ logger . error "upsert_version_licenses: missing license of #{ product_db } /#{ version_label } "
248
+ return
249
+ end
250
+
219
251
licenses = license_label . to_s . strip . split ( '/' )
220
252
licenses . to_a . each do |license |
221
253
self . upsert_version_license ( product_db , version_label , license )
@@ -228,12 +260,12 @@ def self.upsert_version_licenses(product_db, version_label, license_label)
228
260
def self . upsert_version_license ( product_db , version_label , license_name )
229
261
license_name = license_name . to_s . strip
230
262
231
- lic_db = License . where (
263
+ lic_db = License . find_or_create_by (
232
264
language : product_db [ :language ] ,
233
265
prod_key : product_db [ :prod_key ] ,
234
266
version : version_label ,
235
267
name : license_name
236
- ) . first_or_create
268
+ )
237
269
238
270
lic_db . update ( source : 'crates' )
239
271
lic_db . save
@@ -243,14 +275,6 @@ def self.upsert_version_license(product_db, version_label, license_name)
243
275
244
276
245
277
def self . upsert_product_dependency ( product_db , version_id , dep_doc )
246
- dep_db = Dependency . where (
247
- prod_type : A_TYPE_CARGO ,
248
- language : product_db . language ,
249
- prod_key : product_db . prod_key ,
250
- prod_version : version_id ,
251
- dep_prod_key : dep_doc [ :crate_id ] ,
252
- ) . first_or_create
253
-
254
278
scope = if dep_doc [ :optional ]
255
279
Dependency ::A_SCOPE_OPTIONAL
256
280
elsif dep_doc [ :target ] == 'test'
@@ -259,12 +283,18 @@ def self.upsert_product_dependency(product_db, version_id, dep_doc)
259
283
Dependency ::A_SCOPE_COMPILE
260
284
end
261
285
262
- dep_db . update (
286
+ dep_db = Dependency . find_or_create_by (
287
+ prod_type : A_TYPE_CARGO ,
288
+ language : product_db . language ,
289
+ prod_key : product_db . prod_key ,
290
+ prod_version : version_id ,
291
+
292
+ dep_prod_key : dep_doc [ :crate_id ] ,
263
293
version : dep_doc [ :req ] ,
264
- name : dep_doc [ :crate_id ] ,
265
294
scope : scope
266
295
)
267
-
296
+ dep_db . name = dep_doc [ :crate_id ]
297
+ dep_db . save
268
298
dep_db
269
299
end
270
300
@@ -284,12 +314,12 @@ def self.upsert_version_links(product_db, version_id, product_doc)
284
314
285
315
286
316
def self . upsert_version_link ( product_db , version_id , name , url )
287
- url_db = Versionlink . where (
317
+ url_db = Versionlink . find_or_create_by (
288
318
language : product_db [ :language ] ,
289
319
prod_key : product_db [ :prod_key ] ,
290
320
version_id : version_id ,
291
321
link : url . to_s . strip
292
- ) . first_or_create
322
+ )
293
323
294
324
url_db . update ( name : name . to_s . strip )
295
325
url_db . save
@@ -301,12 +331,12 @@ def self.upsert_version_archive(product_db, version_id, dl_path)
301
331
pkg_name = "#{ product_db [ :prod_key ] } -#{ version_id } .crate"
302
332
url = "#{ API_HOST } /#{ dl_path } "
303
333
304
- url_db = Versionarchive . where (
334
+ url_db = Versionarchive . find_or_create_by (
305
335
language : product_db [ :language ] ,
306
336
prod_key : product_db [ :prod_key ] ,
307
337
version_id : version_id ,
308
338
name : pkg_name
309
- ) . first_or_create
339
+ )
310
340
311
341
url_db . update ( link : url )
312
342
url_db . save
0 commit comments