From 13404bc0b92339dc39027c269ab95248c43ca2fc Mon Sep 17 00:00:00 2001 From: Thomas Holder Date: Fri, 21 Dec 2018 16:10:34 +0100 Subject: [PATCH] skip encoding for strings with len > 4 https://github.com/rcsb/mmtf/issues/37 --- simplemmtf.py | 18 +++++++++++++++++- test.py | 23 +++++++++++++++++++++++ 2 files changed, 40 insertions(+), 1 deletion(-) diff --git a/simplemmtf.py b/simplemmtf.py index a674b97..a53adc8 100644 --- a/simplemmtf.py +++ b/simplemmtf.py @@ -437,6 +437,22 @@ def _PackedIntBufStrategy(nbytes=1, dectype='i4'): ########## MEDIUM LEVEL ARRAY ENCODE/DECODE API ################# +def check_encodable(arr, codec, param=0): + '''Check if an array is encodable with the requested strategy. + + Example of type 5 "fixed-length string array" with string length 4: + + >>> check_encodable(["ABCD"], 5, 4) + True + >>> check_encodable(["ABCDE"], 5, 4) + False + ''' + if codec == 5: + return all(len(s) <= param for s in arr) + + return True + + def encode_array(arr, codec, param=0): strategy = strategies[codec](param) @@ -757,7 +773,7 @@ def set(self, key, value, codec=-1, param=0): if codec == -1: codec, param = encodingrules.get(key, (0, 0)) - if codec != 0: + if codec != 0 and check_encodable(value, codec, param): value = encode_array(value, codec, param) self._data[mmtfstr(key)] = value diff --git a/test.py b/test.py index 8983d4a..f1403d0 100644 --- a/test.py +++ b/test.py @@ -110,11 +110,34 @@ def test_encode_recode(): raise UserWarning(key) +def test_encodable(): + atoms = [{ + u'atomName': u'N', + u'chainId': u'A', + u'coords': (12.284, 42.763, 10.037), + u'element': u'N', + u'groupName': u'MET', + }] + + # len('A') <= 4: encodable -> binary + d = simplemmtf.from_atoms(atoms) + assert d.get(u'chainIdList') == ['A'] + assert isinstance(d._data[u'chainIdList'], bytes) + + atoms[0][u'chainId'] = u'ABCDE' + + # len('ABCDE') > 4: not encodable -> array + d = simplemmtf.from_atoms(atoms) + assert d.get(u'chainIdList') == ['ABCDE'] + assert isinstance(d._data[u'chainIdList'], list) + + def test(): for fn in sys.argv[1:]: test_file(fn) test_encode_recode() + test_encodable() if __name__ == '__main__':