forked from UtrechtUniversity/yoda-ruleset
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbrowse.py
274 lines (227 loc) · 11.8 KB
/
browse.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
# -*- coding: utf-8 -*-
"""Functions for listing collection information."""
__copyright__ = 'Copyright (c) 2019-2021, Utrecht University'
__license__ = 'GPLv3, see LICENSE'
import re
from collections import OrderedDict
from genquery import AS_DICT, Query
from util import *
__all__ = ['api_browse_folder',
'api_browse_collections',
'api_search']
@api.make()
def api_browse_folder(ctx,
coll='/',
sort_on='name',
sort_order='asc',
offset=0,
limit=10,
space=pathutil.Space.OTHER.value):
"""Get paginated collection contents, including size/modify date information.
:param ctx: Combined type of a callback and rei struct
:param coll: Collection to get paginated contents of
:param sort_on: Column to sort on ('name', 'modified' or size)
:param sort_order: Column sort order ('asc' or 'desc')
:param offset: Offset to start browsing from
:param limit: Limit number of results
:param space: Space the collection is in
:returns: Dict with paginated collection contents
"""
def transform(row):
# Remove ORDER_BY etc. wrappers from column names.
x = {re.sub('.*\((.*)\)', '\\1', k): v for k, v in row.items()}
if 'DATA_NAME' in x:
return {'name': x['DATA_NAME'],
'type': 'data',
'size': int(x['DATA_SIZE']),
'modify_time': int(x['DATA_MODIFY_TIME'])}
else:
return {'name': x['COLL_NAME'].split('/')[-1],
'type': 'coll',
'modify_time': int(x['COLL_MODIFY_TIME'])}
if sort_on == 'modified':
# FIXME: Sorting on modify date is borked: There appears to be no
# reliable way to filter out replicas this way - multiple entries for
# the same file may be returned when replication takes place on a
# minute boundary, for example.
# We would want to take the max modify time *per* data name.
# (or not? replication may take place a long time after a modification,
# resulting in a 'too new' date)
ccols = ['COLL_NAME', 'ORDER(COLL_MODIFY_TIME)']
dcols = ['DATA_NAME', 'MIN(DATA_CREATE_TIME)', 'ORDER(DATA_MODIFY_TIME)', 'DATA_SIZE']
elif sort_on == 'size':
ccols = ['COLL_NAME', 'COLL_MODIFY_TIME']
dcols = ['DATA_NAME', 'MIN(DATA_CREATE_TIME)', 'MAX(DATA_MODIFY_TIME)', 'ORDER(DATA_SIZE)']
else:
ccols = ['ORDER(COLL_NAME)', 'COLL_MODIFY_TIME']
dcols = ['ORDER(DATA_NAME)', 'MIN(DATA_CREATE_TIME)', 'MAX(DATA_MODIFY_TIME)', 'DATA_SIZE']
if sort_order == 'desc':
ccols = [x.replace('ORDER(', 'ORDER_DESC(') for x in ccols]
dcols = [x.replace('ORDER(', 'ORDER_DESC(') for x in dcols]
zone = user.zone(ctx)
# We make offset/limit act on two queries at once, placing qdata right after qcoll.
if space == str(pathutil.Space.RESEARCH):
qcoll = Query(ctx, ccols,
"COLL_PARENT_NAME = '{}' AND COLL_NAME not like '/{}/home/vault-%' AND COLL_NAME not like '/{}/home/grp-vault-%'".format(coll, zone, zone),
offset=offset, limit=limit, output=AS_DICT)
elif space == str(pathutil.Space.VAULT):
qcoll = Query(ctx, ccols,
"COLL_PARENT_NAME = '{}' AND COLL_NAME like '/{}/home/%vault-%'".format(coll, zone),
offset=offset, limit=limit, output=AS_DICT)
else:
qcoll = Query(ctx, ccols, "COLL_PARENT_NAME = '{}'".format(coll),
offset=offset, limit=limit, output=AS_DICT)
colls = map(transform, list(qcoll))
qdata = Query(ctx, dcols, "COLL_NAME = '{}'".format(coll),
offset=max(0, offset - qcoll.total_rows()), limit=limit - len(colls), output=AS_DICT)
datas = map(transform, list(qdata))
if len(colls) + len(datas) == 0:
# No results at all?
# Make sure the collection actually exists.
if not collection.exists(ctx, coll):
return api.Error('nonexistent', 'The given path does not exist')
# (checking this beforehand would waste a query in the most common situation)
return OrderedDict([('total', qcoll.total_rows() + qdata.total_rows()),
('items', colls + datas)])
@api.make()
def api_browse_collections(ctx,
coll='/',
sort_on='name',
sort_order='asc',
offset=0,
limit=10,
space=pathutil.Space.OTHER.value):
"""Get paginated collection contents, including size/modify date information.
This function browses a folder and only looks at the collections in it. No dataobjects.
Specifically for folder selection for copying data to research area from vault for instance.
:param ctx: Combined type of a callback and rei struct
:param coll: Collection to get paginated contents of
:param sort_on: Column to sort on ('name', 'modified' or size)
:param sort_order: Column sort order ('asc' or 'desc')
:param offset: Offset to start browsing from
:param limit: Limit number of results
:param space: Space the collection is in
:returns: Dict with paginated collection contents
"""
def transform(row):
# Remove ORDER_BY etc. wrappers from column names.
x = {re.sub('.*\((.*)\)', '\\1', k): v for k, v in row.items()}
if 'DATA_NAME' in x:
return {'name': x['DATA_NAME'],
'type': 'data',
'size': int(x['DATA_SIZE']),
'modify_time': int(x['DATA_MODIFY_TIME'])}
else:
return {'name': x['COLL_NAME'].split('/')[-1],
'type': 'coll',
'modify_time': int(x['COLL_MODIFY_TIME'])}
if sort_on == 'modified':
# FIXME: Sorting on modify date is borked: There appears to be no
# reliable way to filter out replicas this way - multiple entries for
# the same file may be returned when replication takes place on a
# minute boundary, for example.
# We would want to take the max modify time *per* data name.
# (or not? replication may take place a long time after a modification,
# resulting in a 'too new' date)
ccols = ['COLL_NAME', 'ORDER(COLL_MODIFY_TIME)']
elif sort_on == 'size':
ccols = ['COLL_NAME', 'COLL_MODIFY_TIME']
else:
ccols = ['ORDER(COLL_NAME)', 'COLL_MODIFY_TIME']
if sort_order == 'desc':
ccols = [x.replace('ORDER(', 'ORDER_DESC(') for x in ccols]
zone = user.zone(ctx)
# We make offset/limit act on two queries at once, placing qdata right after qcoll.
if space == str(pathutil.Space.RESEARCH):
qcoll = Query(ctx, ccols,
"COLL_PARENT_NAME = '{}' AND COLL_NAME not like '/{}/home/vault-%' AND COLL_NAME not like '/{}/home/grp-vault-%'".format(coll, zone, zone),
offset=offset, limit=limit, output=AS_DICT)
elif space == str(pathutil.Space.VAULT):
qcoll = Query(ctx, ccols,
"COLL_PARENT_NAME = '{}' AND COLL_NAME like '/{}/home/%vault-%'".format(coll, zone),
offset=offset, limit=limit, output=AS_DICT)
else:
qcoll = Query(ctx, ccols, "COLL_PARENT_NAME = '{}'".format(coll),
offset=offset, limit=limit, output=AS_DICT)
colls = map(transform, list(qcoll))
if len(colls) == 0:
# No results at all?
# Make sure the collection actually exists.
if not collection.exists(ctx, coll):
return api.Error('nonexistent', 'The given path does not exist')
# (checking this beforehand would waste a query in the most common situation)
return OrderedDict([('total', qcoll.total_rows()),
('items', colls)])
@api.make()
def api_search(ctx,
search_string,
search_type='filename',
sort_on='name',
sort_order='asc',
offset=0,
limit=10):
"""Get paginated search results, including size/modify date/location information.
:param ctx: Combined type of a callback and rei struct
:param search_string: String used to search
:param search_type: Search type ('filename', 'folder', 'metadata', 'status')
:param sort_on: Column to sort on ('name', 'modified' or size)
:param sort_order: Column sort order ('asc' or 'desc')
:param offset: Offset to start browsing from
:param limit: Limit number of results
:returns: Dict with paginated search results
"""
def transform(row):
# Remove ORDER_BY etc. wrappers from column names.
x = {re.sub('.*\((.*)\)', '\\1', k): v for k, v in row.items()}
if 'DATA_NAME' in x:
_, _, path, subpath = pathutil.info(x['COLL_NAME'])
if subpath != '':
path = path + "/" + subpath
return {'name': "/{}/{}".format(path, x['DATA_NAME']),
'type': 'data',
'size': int(x['DATA_SIZE']),
'modify_time': int(x['DATA_MODIFY_TIME'])}
if 'COLL_NAME' in x:
_, _, path, subpath = pathutil.info(x['COLL_NAME'])
if subpath != '':
path = path + "/" + subpath
return {'name': "/{}".format(path),
'type': 'coll',
'modify_time': int(x['COLL_MODIFY_TIME'])}
# Replace, %, _ and \ since iRODS does not handle those correctly.
# HdR this can only be done in a situation where search_type is NOT status!
# Status description must be kept in tact.
if not search_type == 'status':
search_string = search_string.replace("\\", "\\\\")
search_string = search_string.replace("%", "\%")
search_string = search_string.replace("_", "\_")
zone = user.zone(ctx)
if search_type == 'filename':
cols = ['ORDER(DATA_NAME)', 'COLL_NAME', 'MIN(DATA_CREATE_TIME)', 'MAX(DATA_MODIFY_TIME)', 'DATA_SIZE']
where = "COLL_NAME like '{}%%' AND DATA_NAME like '%%{}%%'".format("/" + zone + "/home", search_string)
elif search_type == 'folder':
cols = ['ORDER(COLL_NAME)', 'COLL_PARENT_NAME', 'MIN(COLL_CREATE_TIME)', 'MAX(COLL_MODIFY_TIME)']
where = "COLL_PARENT_NAME like '{}%%' AND COLL_NAME like '%%{}%%'".format("/" + zone + "/home", search_string)
elif search_type == 'metadata':
cols = ['ORDER(COLL_NAME)', 'MIN(COLL_CREATE_TIME)', 'MAX(COLL_MODIFY_TIME)']
where = "META_COLL_ATTR_UNITS like '{}%%' AND META_COLL_ATTR_VALUE like '%%{}%%' AND COLL_NAME like '{}%%'".format(
constants.UUUSERMETADATAROOT + "_", search_string, "/" + zone + "/home"
)
elif search_type == 'status':
status = search_string.split(":")
status_value = status[1]
if status[0] == "research":
status_name = constants.IISTATUSATTRNAME
else:
status_name = constants.IIVAULTSTATUSATTRNAME
cols = ['ORDER(COLL_NAME)', 'MIN(COLL_CREATE_TIME)', 'MAX(COLL_MODIFY_TIME)']
where = "META_COLL_ATTR_NAME = '{}' AND META_COLL_ATTR_VALUE = '{}' AND COLL_NAME like '{}%%'".format(
status_name, status_value, "/" + zone + "/home"
)
if sort_order == 'desc':
cols = [x.replace('ORDER(', 'ORDER_DESC(') for x in cols]
qdata = Query(ctx, cols, where, offset=max(0, int(offset)),
limit=int(limit), case_sensitive=False, output=AS_DICT)
datas = map(transform, list(qdata))
return OrderedDict([('total', qdata.total_rows()),
('items', datas)])