From a3feba4d342d81c9fbf5e77054eb0ff3957f0ceb Mon Sep 17 00:00:00 2001 From: Andy Casey Date: Sun, 8 Oct 2023 15:58:43 -0600 Subject: [PATCH 1/7] add sdss_id_groups --- python/sdss_access/path/path.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/python/sdss_access/path/path.py b/python/sdss_access/path/path.py index dcc2649..2cea031 100644 --- a/python/sdss_access/path/path.py +++ b/python/sdss_access/path/path.py @@ -345,6 +345,8 @@ def extract(self, name, example): template = re.sub('@component_default[|]', '{component_default}', template) if re.search('@cat_id_groups[|]', template): template = re.sub('@cat_id_groups[|]', '{cat_id_groups}', template) + if re.search('@sdss_id_groups[|]', template): + template = re.sub('@sdss_id_groups[|]', '{sdss_id_groups}', template) # check if template has any brackets haskwargs = re.search('[{}]', template) @@ -1257,6 +1259,29 @@ def cat_id_groups(self, filetype, **kwargs): cat_id = int(kwargs['cat_id']) return f"{(cat_id // k) % k:0>2.0f}/{cat_id % k:0>2.0f}" + def sdss_id_groups(self, filetype, **kwargs): + ''' + Return a folder structure to group data together based on their SDSS + identifier so that we don't have too many files in any one folder. + + Parameters + ---------- + filetype : str + File type parameter. + sdss_id : int or str + SDSS-V identifier + + Returns + ------- + sdssid_groups : str + A set of folders. + ''' + # with k = 100 then even with 10 M sources, each folder will have ~1,000 files + k = 100 + sdss_id = int(kwargs["sdss_id"]) + return f"{(sdss_id // k) % k:0>2.0f}/{sdss_id % k:0>2.0f}" + + def component_default(self, filetype, **kwargs): ''' Return the component name, if given. From 44b3f5ee843b17466aa0b9c3ea9e4ab09592a042 Mon Sep 17 00:00:00 2001 From: Sean-Morrison Date: Wed, 18 Oct 2023 10:01:22 -0500 Subject: [PATCH 2/7] update pad_fieldid function Updates to pad_fieldid function to allow for wildcard and string fieldids --- python/sdss_access/path/path.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/python/sdss_access/path/path.py b/python/sdss_access/path/path.py index dcc2649..2795c42 100644 --- a/python/sdss_access/path/path.py +++ b/python/sdss_access/path/path.py @@ -1407,7 +1407,7 @@ def isplate(self, filetype, **kwargs): Parameters --------- filetype : str - File type paramter + File type parameter run2d : str BOSS idlspec2d run2d version @@ -1430,7 +1430,7 @@ def pad_fieldid(self, filetype, **kwargs): Parameters --------- filetype : str - File type paramter + File type parameter run2d : str BOSS idlspec2d run2d version fieldid : str or int @@ -1449,7 +1449,10 @@ def pad_fieldid(self, filetype, **kwargs): return '' if run2d in ['v6_0_1','v6_0_2', 'v6_0_3', 'v6_0_4']: return str(fieldid) - return str(fieldid).zfill(6) + if fieldid.isnumeric(): + return str(fieldid).zfill(6) + else: + return fieldid class AccessError(Exception): From cbac02c94950aec3ed25a9b9ee6d35a439905bde Mon Sep 17 00:00:00 2001 From: Sean-Morrison Date: Wed, 18 Oct 2023 10:16:00 -0500 Subject: [PATCH 3/7] bug fix in pad_fieldid --- python/sdss_access/path/path.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/sdss_access/path/path.py b/python/sdss_access/path/path.py index 2795c42..87aa47a 100644 --- a/python/sdss_access/path/path.py +++ b/python/sdss_access/path/path.py @@ -1442,7 +1442,7 @@ def pad_fieldid(self, filetype, **kwargs): padd_fieldid in the form of N*'0' where N is the number of necessary zeros to pad fieldid ''' - fieldid = kwargs.get('fieldid', None) + fieldid = str(kwargs.get('fieldid', None)) run2d = kwargs.get('run2d', None) if (not run2d) & (not fieldid): From eb61d0cefc528b69f36a6e93a24d378be4bc1043 Mon Sep 17 00:00:00 2001 From: Sean-Morrison Date: Wed, 18 Oct 2023 10:19:57 -0500 Subject: [PATCH 4/7] bug fix in pad_fieldid --- python/sdss_access/path/path.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/sdss_access/path/path.py b/python/sdss_access/path/path.py index 87aa47a..ffe599a 100644 --- a/python/sdss_access/path/path.py +++ b/python/sdss_access/path/path.py @@ -1442,11 +1442,12 @@ def pad_fieldid(self, filetype, **kwargs): padd_fieldid in the form of N*'0' where N is the number of necessary zeros to pad fieldid ''' - fieldid = str(kwargs.get('fieldid', None)) + fieldid = kwargs.get('fieldid', None) run2d = kwargs.get('run2d', None) if (not run2d) & (not fieldid): return '' + fieldid = str(fieldid) if run2d in ['v6_0_1','v6_0_2', 'v6_0_3', 'v6_0_4']: return str(fieldid) if fieldid.isnumeric(): From cd56b2640c2a0de465f67a12e6861adea6b429f6 Mon Sep 17 00:00:00 2001 From: Sean-Morrison Date: Wed, 18 Oct 2023 10:25:04 -0500 Subject: [PATCH 5/7] Add test for pad_fieldid --- tests/path/test_sdss5.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/path/test_sdss5.py b/tests/path/test_sdss5.py index fd78596..e0411c4 100644 --- a/tests/path/test_sdss5.py +++ b/tests/path/test_sdss5.py @@ -57,8 +57,10 @@ def test_apogee_paths(self, path, name, special, keys, exp): 'v6_0_8/001234/spFrame-b1-00005432.fits.gz'), ('spFrame', '@pad_fieldid', {'run2d': 'v6_0_4', 'br': 'b', 'id': '1', 'frame': '5432', 'fieldid':'1234'}, 'v6_0_4/1234p/spFrame-b1-00005432.fits.gz')], + ('spField', '@pad_fieldid', {'run2d': 'v6_0_8', 'mjd': '59630', 'fieldid': '*'}, + 'v6_1_1/*/spField-*-59630.fits'), ids=['configgrp', 'apgprefix-apo', 'apgprefix-lco', 'apgprefix-ins', - 'isplate-v6_0_4','pad_fieldid-5','pad_fieldid-6', 'frame-pad', 'frame-nopadp']) + 'isplate-v6_0_4','pad_fieldid-5','pad_fieldid-6', 'frame-pad', 'frame-nopadp', 'pad_fieldid-*']) def test_special_function(self, path, name, special, keys, exp): assert special in path.templates[name] full = path.full(name, **keys) From 63886200d74fdf6bbf901549a461a101a57db971 Mon Sep 17 00:00:00 2001 From: Sean-Morrison Date: Wed, 18 Oct 2023 10:28:57 -0500 Subject: [PATCH 6/7] Update test_sdss5.py --- tests/path/test_sdss5.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/path/test_sdss5.py b/tests/path/test_sdss5.py index e0411c4..497197a 100644 --- a/tests/path/test_sdss5.py +++ b/tests/path/test_sdss5.py @@ -56,9 +56,9 @@ def test_apogee_paths(self, path, name, special, keys, exp): ('spFrame', '@pad_fieldid', {'run2d': 'v6_0_8', 'br': 'b', 'id': '1', 'frame': '5432', 'fieldid':'1234'}, 'v6_0_8/001234/spFrame-b1-00005432.fits.gz'), ('spFrame', '@pad_fieldid', {'run2d': 'v6_0_4', 'br': 'b', 'id': '1', 'frame': '5432', 'fieldid':'1234'}, - 'v6_0_4/1234p/spFrame-b1-00005432.fits.gz')], + 'v6_0_4/1234p/spFrame-b1-00005432.fits.gz'), ('spField', '@pad_fieldid', {'run2d': 'v6_0_8', 'mjd': '59630', 'fieldid': '*'}, - 'v6_1_1/*/spField-*-59630.fits'), + 'v6_1_1/*/spField-*-59630.fits')], ids=['configgrp', 'apgprefix-apo', 'apgprefix-lco', 'apgprefix-ins', 'isplate-v6_0_4','pad_fieldid-5','pad_fieldid-6', 'frame-pad', 'frame-nopadp', 'pad_fieldid-*']) def test_special_function(self, path, name, special, keys, exp): From 6c7e32ab3a10c35145167a76eda18cdabb3befbb Mon Sep 17 00:00:00 2001 From: Sean-Morrison Date: Wed, 18 Oct 2023 10:31:33 -0500 Subject: [PATCH 7/7] Update test_sdss5.py --- tests/path/test_sdss5.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/path/test_sdss5.py b/tests/path/test_sdss5.py index 497197a..7309363 100644 --- a/tests/path/test_sdss5.py +++ b/tests/path/test_sdss5.py @@ -57,7 +57,7 @@ def test_apogee_paths(self, path, name, special, keys, exp): 'v6_0_8/001234/spFrame-b1-00005432.fits.gz'), ('spFrame', '@pad_fieldid', {'run2d': 'v6_0_4', 'br': 'b', 'id': '1', 'frame': '5432', 'fieldid':'1234'}, 'v6_0_4/1234p/spFrame-b1-00005432.fits.gz'), - ('spField', '@pad_fieldid', {'run2d': 'v6_0_8', 'mjd': '59630', 'fieldid': '*'}, + ('spField', '@pad_fieldid', {'run2d': 'v6_1_1', 'mjd': '59630', 'fieldid': '*'}, 'v6_1_1/*/spField-*-59630.fits')], ids=['configgrp', 'apgprefix-apo', 'apgprefix-lco', 'apgprefix-ins', 'isplate-v6_0_4','pad_fieldid-5','pad_fieldid-6', 'frame-pad', 'frame-nopadp', 'pad_fieldid-*'])