From 37d23cd8715bff066f8b2c8389ef15f18f20b0de Mon Sep 17 00:00:00 2001 From: Jarrett J Date: Sun, 31 Mar 2024 15:09:01 -0400 Subject: [PATCH 01/15] bcif wip impl --- layer2/CifFile.cpp | 445 ++++++++++++++++++++++++++++++++--- layer2/CifFile.h | 186 +++++++++++---- layer2/CifMoleculeReader.cpp | 39 ++- layer2/ObjectMolecule.h | 3 + layer3/Executive.cpp | 7 + layer3/Executive.h | 2 + modules/pymol/importing.py | 13 +- 7 files changed, 613 insertions(+), 82 deletions(-) diff --git a/layer2/CifFile.cpp b/layer2/CifFile.cpp index 4ef48e5f9..6bfca7a2c 100644 --- a/layer2/CifFile.cpp +++ b/layer2/CifFile.cpp @@ -12,14 +12,18 @@ #include #include +#include #include #include +#include #include "CifFile.h" #include "File.h" #include "MemoryDebug.h" #include "strcasecmp.h" +#include + namespace pymol { namespace _cif_detail { @@ -125,11 +129,18 @@ const char * cif_loop::get_value_raw(int row, int col) const { // get the number of elements in this array unsigned cif_array::size() const { - return (col == NOT_IN_LOOP) ? 1 : pointer.loop->nrows; + if (auto arr = std::get_if(&m_array)) { + return (arr->col == cif_detail::cif_str_array::NOT_IN_LOOP) + ? 1 + : arr->pointer.loop->nrows; + } else if (auto arr = std::get_if(&m_array)) { + return arr->m_arr.size(); + } + return 0; } /// Get array value, return NULL if `pos >= size()` or value in ['.', '?'] -const char* cif_array::get_value_raw(unsigned pos) const +const char* cif_detail::cif_str_array::get_value_raw(unsigned pos) const { if (col == NOT_IN_LOOP) return (pos > 0) ? nullptr : pointer.value; @@ -157,41 +168,84 @@ bool cif_array::is_missing_all() const { * @param key data name, must be lower case */ const cif_array * cif_data::get_arr(const char * key) const { - const char* p = strchr(key, '?'); - decltype(m_dict)::const_iterator it; + if (auto data = std::get_if(&m_data)) { + const auto& dict = data->m_dict; + const char* p = strchr(key, '?'); + std::remove_reference_t::const_iterator it; #ifndef NDEBUG - for (const char* q = key; *q; ++q) { - assert("key must be lower case" && !('Z' >= *q && *q >= 'A')); - } + for (const char* q = key; *q; ++q) { + assert("key must be lower case" && !('Z' >= *q && *q >= 'A')); + } #endif - // support alias shortcut: '?' matches '.' and '_' - if (p != nullptr) { - std::string tmp(key); - // replace '?' by '.' or '_' - tmp[p - key] = '.'; - if ((it = m_dict.find(tmp.c_str())) != m_dict.end()) - return &it->second; - tmp[p - key] = '_'; - if ((it = m_dict.find(tmp.c_str())) != m_dict.end()) - return &it->second; - } else { - if ((it = m_dict.find(key)) != m_dict.end()) - return &it->second; + // support alias shortcut: '?' matches '.' and '_' + if (p != nullptr) { + std::string tmp(key); + // replace '?' by '.' or '_' + tmp[p - key] = '.'; + if ((it = dict.find(tmp.c_str())) != dict.end()) + return &it->second; + tmp[p - key] = '_'; + if ((it = dict.find(tmp.c_str())) != dict.end()) + return &it->second; + } else { + if ((it = dict.find(key)) != dict.end()) + return &it->second; + } + } else if (auto data = std::get_if(&m_data)) { + + const auto& dict = data->m_dict; + + std::string_view keyView(key); + auto split_key = [](const char c) { + return c == '.' /*|| c == '_'*/ || c == '?'; + }; + auto splitTokenIt = std::find_if(keyView.begin(), keyView.end(), split_key); + if (splitTokenIt == keyView.end()) { + return nullptr; + } + auto dist = std::distance(keyView.begin(), splitTokenIt); + auto categoryView = keyView.substr(0, dist); + auto categoryStr = std::string(categoryView); + auto categoryIt = dict.find(categoryStr.c_str()); + if (categoryIt == dict.end()) { + return nullptr; + } + auto& category = categoryIt->second; + auto columnView = keyView.substr(dist + 1); + auto columnStr = std::string(columnView); + auto columnIt = category.find(columnStr.c_str()); + if (columnIt == category.end()) { + return nullptr; + } + auto arr = &columnIt->second; + auto& arrPtr = std::get(arr->m_array); + return &columnIt->second; } return nullptr; } +const char* cif_data::code() const +{ + if (auto data = std::get_if(&m_data)) { + return data->m_code ? data->m_code : ""; + } + return ""; +} + const cif_array* cif_data::empty_array() { return &EMPTY_ARRAY; } -const cif_data* cif_data::get_saveframe(const char* code) const { - auto it = m_saveframes.find(code); - if (it != m_saveframes.end()) - return &it->second; +const cif_detail::cif_str_data* cif_data::get_saveframe(const char* code) const { + if (auto data = std::get_if(&m_data)) { + const auto& saveframes = data->m_saveframes; + auto it = saveframes.find(code); + if (it != saveframes.end()) + return &it->second; + } return nullptr; } @@ -302,8 +356,8 @@ bool cif_file::parse(char*&& p) { } } - cif_data* current_frame = nullptr; - std::vector frame_stack; + cif_detail::cif_str_data* current_frame = nullptr; + std::vector frame_stack; std::unique_ptr global_block; decltype(m_datablocks) datablocksnew; @@ -324,7 +378,10 @@ bool cif_file::parse(char*&& p) { } tolowerinplace(tokens[i]); - current_frame->m_dict[tokens[i]].set_value(tokens[i + 1]); + current_frame->m_dict[tokens[i]].m_array = cif_detail::cif_str_array{}; + auto& cif_arr = std::get( + current_frame->m_dict[tokens[i]].m_array); + cif_arr.set_value(tokens[i + 1]); i++; } else if (strcasecmp("loop_", tokens[i]) == 0) { @@ -344,8 +401,10 @@ bool cif_file::parse(char*&& p) { // columns while (++i < n && keypossible[i] && tokens[i][0] == '_') { tolowerinplace(tokens[i]); - - current_frame->m_dict[tokens[i]].set_loop(loop, ncols); + current_frame->m_dict[tokens[i]].m_array = cif_detail::cif_str_array{}; + auto& cif_arr = std::get( + current_frame->m_dict[tokens[i]].m_array); + cif_arr.set_loop(loop, ncols); ncols++; } @@ -376,15 +435,18 @@ bool cif_file::parse(char*&& p) { i--; } else if (strncasecmp("data_", tokens[i], 5) == 0) { - datablocksnew.emplace_back(); - current_frame = &datablocksnew.back(); + auto& new_data = datablocksnew[tokens[i] + 5]; + new_data.m_data = cif_detail::cif_str_data(); + current_frame = &std::get(new_data.m_data); current_frame->m_code = tokens[i] + 5; frame_stack = {current_frame}; } else if (strncasecmp("global_", tokens[i], 5) == 0) { // STAR feature, not supported in CIF - current_frame = new cif_data; - global_block.reset(current_frame); + auto new_data = new cif_data; + new_data->m_data = cif_detail::cif_str_data{}; + current_frame = &std::get(new_data->m_data); + global_block.reset(new_data); frame_stack = {current_frame}; } else if (strncasecmp("save_", tokens[i], 5) == 0) { @@ -419,6 +481,323 @@ bool cif_file::parse(char*&& p) { return true; } +enum class DataTypes +{ + Int8 = 1, + Int16 = 2, + Int32 = 3, + UInt8 = 4, + UInt16 = 5, + UInt32 = 6, + Float32 = 32, + Float64 = 33, +}; + +template +void decodeAndPushBack(const std::vector& bytes, std::size_t& i, + std::size_t size, std::vector& result) +{ + T value; + std::memcpy(&value, &bytes[i], size); + result.push_back(value); +} + +static std::vector byte_array_decode(const std::vector& bytes, DataTypes dataType) +{ + std::vector result; + std::unordered_map dataTypeSize = { + {DataTypes::Int8, sizeof(std::int8_t)}, + {DataTypes::Int16, sizeof(std::int16_t)}, + {DataTypes::Int32, sizeof(std::int32_t)}, + {DataTypes::UInt8, sizeof(std::uint8_t)}, + {DataTypes::UInt16, sizeof(std::uint16_t)}, + {DataTypes::UInt32, sizeof(std::uint32_t)}, + {DataTypes::Float32, sizeof(float)}, + {DataTypes::Float64, sizeof(double)}, + }; + + auto size = dataTypeSize[dataType]; + for (std::size_t i = 0; i < bytes.size(); i += size) { + CifArrayElement valueVar; + switch (dataType) { + case DataTypes::Int8: + decodeAndPushBack(bytes, i, size, result); + break; + case DataTypes::Int16: + decodeAndPushBack(bytes, i, size, result); + break; + case DataTypes::Int32: + decodeAndPushBack(bytes, i, size, result); + break; + case DataTypes::UInt8: + decodeAndPushBack(bytes, i, size, result); + break; + case DataTypes::UInt16: + decodeAndPushBack(bytes, i, size, result); + break; + case DataTypes::UInt32: + decodeAndPushBack(bytes, i, size, result); + break; + case DataTypes::Float32: + decodeAndPushBack(bytes, i, size, result); + break; + case DataTypes::Float64: + decodeAndPushBack(bytes, i, size, result); + break; + } + } + return result; +} + +static std::vector integer_packing_decode( + const std::vector& packedInts, int byteCount, int srcSize, + bool isUnsigned) +{ + std::vector result(srcSize); + std::int32_t upperLimit; + if (isUnsigned) { + upperLimit = byteCount == 1 ? std::numeric_limits::max() + : std::numeric_limits::max(); + } else { + upperLimit = byteCount == 1 ? std::numeric_limits::max() + : std::numeric_limits::max(); + } + std::int32_t lowerLimit = -upperLimit - 1; + + auto as_int = [isUnsigned, byteCount](auto&& elem) -> std::int32_t { + if (isUnsigned) { + if (byteCount == 1) { + return static_cast(std::get(elem)); + } else { + return static_cast(std::get(elem)); + } + } else { + if (byteCount == 1) { + return static_cast(std::get(elem)); + } else { + return static_cast(std::get(elem)); + } + } + }; + + auto at_limit = [isUnsigned, upperLimit, lowerLimit](std::int32_t t) -> bool { + if (isUnsigned) { + return t == upperLimit; + } else { + return t == upperLimit || t == lowerLimit; + } + }; + + int i = 0; + int j = 0; + int n = packedInts.size(); + while (i < n) { + std::int32_t value = 0; + std::int32_t t = as_int(packedInts[i]); + while (at_limit(t)) { + value += t; + i++; + t = as_int(packedInts[i]); + } + value += t; + result[j] = value; + i++; + j++; + } + return result; +} + +static std::vector delta_decode( + std::vector& data, std::int32_t origin, DataTypes srcType) +{ + std::vector result = data; + result[0] = origin; + auto add_int32_t = [](auto&& a, auto&& b) -> std::int32_t { + return std::get(a) + std::get(b); + }; + std::inclusive_scan(result.begin(), result.end(), result.begin(), add_int32_t); + return result; +} + +static std::vector run_length_decode( + std::vector& data, DataTypes srcType, int srcSize) +{ + std::vector result; + std::int32_t value = 0; + for (std::size_t i = 0; i < data.size(); i += 2) { + auto item = std::get(data[i]); + auto count = std::get(data[i + 1]); + for (std::int32_t j = 0; j < count; j++) { + result.push_back(item); + } + } + return result; +} + +static std::vector fixed_array_decode( + std::vector& data, int factor, DataTypes srcType) +{ + std::vector result = data; + if (srcType == DataTypes::Float32) { + auto div_int32_t = [factor](auto&& a) -> float { + return std::get(a) / static_cast(factor); + }; + std::transform(data.begin(), data.end(), result.begin(), div_int32_t); + } else { + auto div_int32_t = [factor](auto&& a) -> double { + return std::get(a) / static_cast(factor); + }; + std::transform(data.begin(), data.end(), result.begin(), div_int32_t); + } + return result; +} + +static std::vector interval_quant_decode( + std::vector& data, double min, double max, int numSteps, + DataTypes srcType) +{ + std::vector result = data; + auto delta = (max - min) / (numSteps - 1); + std::transform(data.begin(), data.end(), result.begin(), + [min, delta](auto&& a) -> double { + return min + std::get(a) * delta; + }); + return result; +} + +static std::vector parse_bcif_decode( + const std::vector& rawData, + std::vector>& dataEncoding); + +static std::vector string_array_decode( + const std::vector& data, + std::vector>& indicesEncoding, + const std::string& stringData, const std::vector& offsets, + std::vector>& offsetEncoding) +{ + auto decodedOffsets = parse_bcif_decode(offsets, offsetEncoding); + auto indices = parse_bcif_decode(data, indicesEncoding); + + std::vector result; + result.reserve(indices.size()); + + std::vector strings = {""}; + strings.reserve(decodedOffsets.size()); + for (int i = 1; i < decodedOffsets.size(); i++) { + auto start = std::get(decodedOffsets[i - 1]); + auto end = std::get(decodedOffsets[i]); + auto str = stringData.substr(start, end - start); + strings.push_back(str); + } + + for (int i = 0; i < indices.size(); i++) { + auto index = std::get(indices[i]); + result.push_back(strings[index + 1]); + } + return result; +} + +static void parse_bcif_decode_kind(const std::string& kind, + const std::vector& rawData, + std::vector& result, + std::map& dataEncoding) +{ + if (kind == "ByteArray") { + auto type = dataEncoding["type"].as(); + result = byte_array_decode(rawData, static_cast(type)); + } else if (kind == "FixedPoint") { + auto factor = dataEncoding["factor"].as(); + auto srcType = dataEncoding["srcType"].as(); + result = fixed_array_decode(result, factor, static_cast(srcType)); + } else if (kind == "IntervalQuantization") { + auto min = dataEncoding["min"].as(); + auto max = dataEncoding["max"].as(); + auto numSteps = dataEncoding["numSteps"].as(); + auto srcType = dataEncoding["srcType"].as(); + result = interval_quant_decode(result, min, max, numSteps, static_cast(srcType)); + } else if (kind == "RunLength") { + auto srcType = dataEncoding["srcType"].as(); + auto srcSize = dataEncoding["srcSize"].as(); + result = run_length_decode(result, static_cast(srcType), srcSize); + } else if (kind == "Delta") { + auto origin = dataEncoding["origin"].as(); + auto srcType = dataEncoding["srcType"].as(); + result = delta_decode(result, origin, static_cast(srcType)); + } else if (kind == "IntegerPacking") { + auto byteCount = dataEncoding["byteCount"].as(); + auto srcSize = dataEncoding["srcSize"].as(); + auto isUnsigned = dataEncoding["isUnsigned"].as(); + result = integer_packing_decode(result, byteCount, srcSize, isUnsigned); + } else if (kind == "StringArray") { + auto indicesEncoding = dataEncoding["dataEncoding"].as>>(); + auto stringData = dataEncoding["stringData"].as(); + auto offsets = dataEncoding["offsets"].as>(); + auto offsetEncoding = dataEncoding["offsetEncoding"].as>>(); + result = string_array_decode(rawData, indicesEncoding, stringData, offsets, offsetEncoding); + } +} + +static std::vector parse_bcif_decode(const std::vector& rawData, + std::vector>& dataEncoding) +{ + std::vector result; + for (auto begin = std::rbegin(dataEncoding), end = std::rend(dataEncoding); + begin != end; ++begin) { + auto& dataEncode = *begin; + parse_bcif_decode_kind( + dataEncode["kind"].as(), rawData, result, dataEncode); + } + return result; +} + + +bool cif_file::parse_bcif(const char* bytes, std::size_t size) +{ + m_datablocks.clear(); + m_tokens.clear(); + + auto oh = msgpack::unpack(bytes, size); + auto msgobj = oh.get(); + auto dict = msgobj.as>(); + + auto dataBlocksRaw = dict["dataBlocks"].as>(); + pymol::cif_detail::bcif_data* currentFrame{}; + auto& dataDict = m_datablocks; + for (const auto& block : dataBlocksRaw) { + auto blockMap = block.as>(); + auto header = blockMap["header"].as(); + auto categoriesRaw = blockMap["categories"].as>(); + auto& new_block = m_datablocks[header]; + new_block.m_data = pymol::cif_detail::bcif_data{}; + currentFrame = &std::get(new_block.m_data); + pymol::cif_data& categories = dataDict[header]; + categories.m_data = pymol::cif_detail::bcif_data{}; + auto& categoriesData = std::get(categories.m_data); + for (const auto& category : categoriesRaw) { + auto categoryMap = category.as>(); + auto categoryName = categoryMap["name"].as(); + std::transform(categoryName.begin(), categoryName.end(), + categoryName.begin(), ::tolower); + auto rowCount = categoryMap["rowCount"].as(); + auto columnsRaw = categoryMap["columns"].as>(); + auto& columns = categoriesData.m_dict[categoryName]; + for (const auto& column : columnsRaw) { + auto columnMap = column.as>(); + auto columnName = columnMap["name"].as(); + std::transform(columnName.begin(), columnName.end(), + columnName.begin(), ::tolower); + auto dataRaw = columnMap["data"].as>(); + auto dataData = dataRaw["data"].as>(); + auto dataEncoding = dataRaw["encoding"].as>>(); + auto vec = parse_bcif_decode(dataData, dataEncoding); + columns[columnName] = std::move(vec); + } + } + dataDict[header] = std::move(categories); + } + return true; +} + } // namespace pymol // vi:sw=2:ts=2 diff --git a/layer2/CifFile.h b/layer2/CifFile.h index cbd79976e..760da68de 100644 --- a/layer2/CifFile.h +++ b/layer2/CifFile.h @@ -12,6 +12,8 @@ #include #include #include +#include +#include // for pymol::default_free #include "MemoryDebug.h" @@ -44,6 +46,11 @@ template T raw_to_typed(const char*); class cif_data; class cif_loop; class cif_array; +namespace cif_detail { + struct cif_str_data; + struct bcif_data; +}; +using CIFData = std::variant; /** * Class for reading CIF files. @@ -57,7 +64,7 @@ class cif_array; * * Iterate over data blocks: * @verbatim - for (auto& block : cf.datablocks()) { + for (auto& [code, block] : cf.datablocks()) { // data_ const char* code = block->code(); @@ -80,8 +87,9 @@ class cif_array; @endverbatim */ class cif_file { +public: // std::vector m_tokens; - std::vector m_datablocks; + std::map m_datablocks; std::unique_ptr m_contents; /** @@ -98,6 +106,14 @@ class cif_file { /// Parse CIF string bool parse_string(const char*); + /** + * Parse BinaryCIF blob + * @param bytes BinaryCIF blob + * @param size Blob size + * @post datablocks() is valid + */ + bool parse_bcif(const char* bytes, std::size_t size); + protected: /// Report a parsing error virtual void error(const char*); @@ -114,54 +130,85 @@ class cif_file { cif_file(const char* filename, const char* contents = nullptr); /// Data blocks - const std::vector& datablocks() const { return m_datablocks; } + const std::map& datablocks() const { return m_datablocks; } }; -/** - * View on a CIF data array. The viewed data is owned by the cif_file - */ -class cif_array { - friend class cif_file; -private: - enum { NOT_IN_LOOP = -1 }; +using CifArrayElement = std::variant; - // column index, -1 if not in loop - short col; +namespace cif_detail { + struct cif_str_array { + enum { NOT_IN_LOOP = -1 }; - // pointer to either loop or single value - union { - const cif_loop * loop; - const char * value; - } pointer; + // column index, -1 if not in loop + short col; - // Raw data value or NULL for unknown/inapplicable and `pos >= size()` - const char* get_value_raw(unsigned pos = 0) const; + // pointer to either loop or single value + union { + const cif_loop * loop; + const char * value; + } pointer; - // point this array to a loop (only for parsing) - void set_loop(const cif_loop * loop, short col_) { - col = col_; - pointer.loop = loop; - }; + // Raw data value or NULL for unknown/inapplicable and `pos >= size()` + const char* get_value_raw(unsigned pos = 0) const; - // point this array to a single value (only for parsing) - void set_value(const char * value) { - col = NOT_IN_LOOP; - pointer.value = value; + // point this array to a loop (only for parsing) + void set_loop(const cif_loop * loop, short col_) { + col = col_; + pointer.loop = loop; + }; + + // point this array to a single value (only for parsing) + void set_value(const char * value) { + col = NOT_IN_LOOP; + pointer.value = value; + }; + }; + struct bcif_array { + std::vector m_arr{}; }; +} + +/** + * View on a CIF data array. The viewed data is owned by the cif_file + */ +class cif_array { + friend class cif_file; + +private: +public: // + mutable std::string m_internal_str_cache; + std::variant m_array; public: // constructor cif_array() = default; // constructor (only needed for EMPTY_ARRAY) - cif_array(std::nullptr_t) { set_value(nullptr); } + cif_array(std::nullptr_t) { + if (auto arr = std::get_if(&m_array)) { + arr->set_value(nullptr); + } else if (auto arr = std::get_if(&m_array)) { + arr->m_arr.clear(); + } + } + + cif_array(std::vector&& arr) { + m_array = cif_detail::bcif_array{std::move(arr)}; + } /// Number of elements in this array (= number of rows in loop) unsigned size() const; /// True if value in ['.', '?'] - bool is_missing(unsigned pos = 0) const { return !get_value_raw(pos); } + bool is_missing(unsigned pos = 0) const { + if (auto arr = std::get_if(&m_array)) { + return !arr->get_value_raw(pos); + } else { + return false; + } + } /// True if all values in ['.', '?'] bool is_missing_all() const; @@ -172,8 +219,25 @@ class cif_array { * @param d default value for unknown/inapplicable elements */ template T as(unsigned pos = 0, T d = T()) const { - const char* s = get_value_raw(pos); - return s ? _cif_detail::raw_to_typed(s) : d; + if (auto arr = std::get_if(&m_array)) { + const char* s = arr->get_value_raw(pos); + return s ? _cif_detail::raw_to_typed(s) : d; + } else if (auto arr = std::get_if(&m_array)) { + if (pos >= arr->m_arr.size()) + return d; + if constexpr(std::is_same_v) { + auto& str = std::get(arr->m_arr[pos]); + return !str.empty() ? str.c_str() : d; + } else { + if (auto ptr = std::get_if(&arr->m_arr[pos])) { + if (ptr->empty()) { + return d; + } + } + return std::get(arr->m_arr[pos]); + } + } + return d; } /** @@ -184,7 +248,26 @@ class cif_array { * @param d default value for unknown/inapplicable elements */ const char* as_s(unsigned pos = 0, const char* d = "") const { - return as(pos, d); + if (auto arr = std::get_if(&m_array)) { + return as(pos, d); + } else if (auto arr = std::get_if(&m_array)) { + if (pos >= arr->m_arr.size()) + return d; + if (auto str_ptr = std::get_if(&arr->m_arr[pos])) { + return str_ptr->c_str(); + } + auto to_string_visitor = [](auto&& arg) -> std::string { + if constexpr (std::is_same_v, + std::string>) { + return arg; + } else { + return std::to_string(arg); + } + }; + m_internal_str_cache = std::visit(to_string_visitor, arr->m_arr[pos]); + return m_internal_str_cache.c_str(); + } + return d; } /// Alias for as() @@ -210,17 +293,34 @@ class cif_array { /** * CIF data block. The viewed data is owned by the cif_file. */ -class cif_data { - friend class cif_file; - // data_ - const char* m_code = nullptr; +namespace cif_detail { + struct cif_str_data { + // data_ + const char* m_code = nullptr; + + std::map<_cif_detail::zstring_view, cif_array> m_dict; + std::map m_dict_str; + std::map<_cif_detail::zstring_view, cif_detail::cif_str_data> m_saveframes; + + // only needed for freeing + std::vector> m_loops; + }; + + using ColumnMap = std::map>; + using CategoryMap = std::map; + using DataBlockMap = std::map; + struct bcif_data { + std::string m_code; + std::map> m_dict; + }; +} - std::map<_cif_detail::zstring_view, cif_array> m_dict; - std::map<_cif_detail::zstring_view, cif_data> m_saveframes; +class cif_data { + friend class cif_file; - // only needed for freeing - std::vector> m_loops; +public: // + CIFData m_data; // generic default value static const cif_array* empty_array(); @@ -234,7 +334,7 @@ class cif_data { cif_data& operator=(cif_data&&) = default; /// Block code (never NULL) - const char* code() const { return m_code ? m_code : ""; } + const char* code() const; // Get a pointer to array or NULL if not found const cif_array* get_arr(const char* key) const; @@ -253,7 +353,7 @@ class cif_data { } /// Get a pointer to a save frame or NULL if not found - const cif_data* get_saveframe(const char* code) const; + const cif_detail::cif_str_data* get_saveframe(const char* code) const; }; } // namespace pymol diff --git a/layer2/CifMoleculeReader.cpp b/layer2/CifMoleculeReader.cpp index 84194d5a4..d1230339b 100644 --- a/layer2/CifMoleculeReader.cpp +++ b/layer2/CifMoleculeReader.cpp @@ -435,7 +435,7 @@ static bond_dict_t * get_global_components_bond_dict(PyMOLGlobals * G) { return nullptr; } - for (const auto& datablock : cif.datablocks()) { + for (const auto& [code, datablock] : cif.datablocks()) { read_chem_comp_bond_dict(&datablock, bond_dict); } } @@ -2264,7 +2264,7 @@ pymol::Result ObjectMoleculeReadCifStr(PyMOLGlobals * G, Object return pymol::make_error("Parsing CIF file failed: ", cif->m_error_msg); } - for (const auto& datablock : cif->datablocks()) { + for (const auto& [code, datablock] : cif->datablocks()) { ObjectMolecule * obj = ObjectMoleculeReadCifData(G, &datablock, discrete, quiet); if (!obj) { @@ -2330,7 +2330,7 @@ const bond_dict_t::mapped_type * bond_dict_t::get(PyMOLGlobals * G, const char * return nullptr; } - for (auto& item : cif.datablocks()) + for (auto& [code, item] : cif.datablocks()) read_chem_comp_bond_dict(&item, *this); } } @@ -2352,4 +2352,37 @@ const bond_dict_t::mapped_type * bond_dict_t::get(PyMOLGlobals * G, const char * return nullptr; } + +/////////////////////////////////////// + +pymol::Result ObjectMoleculeReadBCif(PyMOLGlobals* G, + ObjectMolecule* I, const char* bytes, std::size_t size, int frame, + int discrete, int quiet, int multiplex, int zoom) +{ + if (I) { + return pymol::Error("loading BCIF into existing object not supported, " + "please use 'create' to append to an existing object."); + } + + if (multiplex > 0) { + return pymol::Error("loading BCIF with multiplex=1 not supported, please " + "use 'split_states' after loading the object."); + } + + auto cif = std::make_unique(); + cif->parse_bcif(bytes, size); + + for (const auto& [code, datablock] : cif->datablocks()) { + auto obj = ObjectMoleculeReadCifData(G, &datablock, discrete, quiet); + if (!obj) { + PRINTFB(G, FB_ObjectMolecule, FB_Warnings) + " mmCIF-Warning: no coordinates found in data_%s\n", datablock.code() ENDFB(G); + continue; + } + if (cif->datablocks().size() == 1 || multiplex == 0) + return obj; + } + return nullptr; +} + // vi:sw=2:ts=2:expandtab diff --git a/layer2/ObjectMolecule.h b/layer2/ObjectMolecule.h index 08230c5c8..0a7ecd732 100644 --- a/layer2/ObjectMolecule.h +++ b/layer2/ObjectMolecule.h @@ -518,6 +518,9 @@ ObjectMolecule *ObjectMoleculeReadMmtfStr(PyMOLGlobals * G, ObjectMolecule * I, const char *st, int st_len, int frame, int discrete, int quiet, int multiplex, int zoom); pymol::Result ObjectMoleculeReadCifStr(PyMOLGlobals * G, ObjectMolecule * I, const char *st, int frame, int discrete, int quiet, int multiplex, int zoom); +pymol::Result ObjectMoleculeReadBCif(PyMOLGlobals* G, + ObjectMolecule* I, const char* bytes, std::size_t size, int frame, + int discrete, int quiet, int multiplex, int zoom); std::unique_ptr LoadTrajSeleHelper( const ObjectMolecule* obj, CoordSet* cs, const char* selection); diff --git a/layer3/Executive.cpp b/layer3/Executive.cpp index 4a8371166..b7565abe2 100644 --- a/layer3/Executive.cpp +++ b/layer3/Executive.cpp @@ -3883,6 +3883,13 @@ pymol::Result<> ExecutiveLoad(PyMOLGlobals* G, ExecutiveLoadArgs const& args) p_return_if_error(res); obj = res.result(); } break; + case cLoadTypeBCIF: + case cLoadTypeBCIFStr: { + auto res = ObjectMoleculeReadBCif(G, static_cast(origObj), + content, size, state, discrete, quiet, multiplex, zoom); + p_return_if_error(res); + obj = res.result(); + } break; case cLoadTypeMMTF: case cLoadTypeMMTFStr: obj = ObjectMoleculeReadMmtfStr(G, (ObjectMolecule *) origObj, diff --git a/layer3/Executive.h b/layer3/Executive.h index 4491a47d7..59cf034d0 100644 --- a/layer3/Executive.h +++ b/layer3/Executive.h @@ -127,6 +127,8 @@ enum cLoadType_t : int { cLoadTypeCCP4UnspecifiedStr = 76, cLoadTypeMRCStr = 77, + cLoadTypeBCIF = 78, + cLoadTypeBCIFStr = 79, }; /* NOTE: if you add new content/object type above, then be sure to add diff --git a/modules/pymol/importing.py b/modules/pymol/importing.py index 318ad6696..b6abd61cf 100644 --- a/modules/pymol/importing.py +++ b/modules/pymol/importing.py @@ -89,6 +89,8 @@ def filename_to_format(filename): format = 'pdbml' elif ext in ('mmcif',): format = 'cif' + elif ext in ('bcif',): + format = 'bcif' elif re.match(r'pdb\d+$', ext): format = 'pdb' elif re.match(r'xyz_\d+$', ext): @@ -1134,6 +1136,9 @@ def finish_object(name, *, _self=cmd): "/data/structures/divided/mmCIF/{mid}/{code}.cif.gz", "https://files-versioned.wwpdb.org/pdb_versioned/views/latest/coordinates/mmcif/{mid}/pdb_{code:0>8}/pdb_{code:0>8}_xyz.cif.gz", ], + "bcif" : [ + "https://models.rcsb.org/{code}.{type}.gz", + ], "2fofc" : "https://www.ebi.ac.uk/pdbe/coordinates/files/{code}.ccp4", "fofc" : "https://www.ebi.ac.uk/pdbe/coordinates/files/{code}_diff.ccp4", "pubchem": [ @@ -1182,6 +1187,8 @@ def _fetch(code, name, state, finish, discrete, multiplex, zoom, type, path, nameFmt = '{type}_{code}.sdf' elif type == 'cif': pass + elif type == 'bcif': + pass elif type == 'mmtf': pass elif type == 'cc': @@ -1300,7 +1307,7 @@ def _multifetch(code,name,state,finish,discrete,multiplex,zoom,type,path,file,qu obj_name = 'emd_' + obj_code chain = None - if (len(obj_code) > 4 and type in ('pdb', 'cif', 'mmtf') and + if (len(obj_code) > 4 and type in ('pdb', 'cif', 'mmtf', 'bcif') and # "Extended PDB accession codes" have 8 characters, # try to distinguish by leading non-zero digit '1' <= obj_code[0] <= '9'): @@ -1344,8 +1351,8 @@ def fetch(code, name='', state=0, finish=1, discrete=-1, state = the state number into which the file should loaded. - type = str: cif, pdb, pdb1, 2fofc, fofc, emd, cid, sid {default: cif - (default was "pdb" up to 1.7.6)} + type = str: cif, bcif, pdb, pdb1, 2fofc, fofc, emd, cid, sid + {default: cif (default was "pdb" up to 1.7.6)} async_ = 0/1: download in the background and do not block the PyMOL command line {default: 0 -- changed in PyMOL 2.3} From 542fbd0ec3cfdf0536acc6c7838ddb983a48d186 Mon Sep 17 00:00:00 2001 From: Jarrett J Date: Sun, 31 Mar 2024 15:15:03 -0400 Subject: [PATCH 02/15] cif tests --- layerCTest/Test_CifFile.cpp | 302 ++++++++++++++++++------------------ 1 file changed, 151 insertions(+), 151 deletions(-) diff --git a/layerCTest/Test_CifFile.cpp b/layerCTest/Test_CifFile.cpp index 67a05c8b0..e173dcafa 100644 --- a/layerCTest/Test_CifFile.cpp +++ b/layerCTest/Test_CifFile.cpp @@ -1,151 +1,151 @@ -#include "Test.h" - -#include "CifFile.h" - -using namespace pymol::test; - -const char* SAMPLE_CIF_STR = R"""( -data_FOO -_cat1.key1 noquotes -_cat1.key2 "two words" -_cat1.key3 ? # unknown -_cat1.key4 . # inapplicable -_cat1.KEY5 "UPPER CASE key" -loop_ -_cat2.key1 -_cat2.key2 -_cat2.key3 -_cat2.key4 -10 0.1 . foo -11 0.2 ? "TWO WORDS" -12 ? ? -;multi -line -value -; . 0.4 . . -data_bar -data_baz -_undotted_key "why not" -_typed_float1 1.23(45)e3 -_typed_float2 1.234(5)e1 -_typed_float3 1.23456789 -)"""; - -TEST_CASE("misc", "[CifFile]") -{ - // syntax 1 - pymol::cif_file cf1(nullptr, SAMPLE_CIF_STR); - // syntax 2 (requires move constructor) - auto cf2 = pymol::cif_file(nullptr, SAMPLE_CIF_STR); - // move assign - pymol::cif_file cf3; - cf3 = pymol::cif_file(nullptr, SAMPLE_CIF_STR); - - // check all three instances have same data - REQUIRE(cf1.datablocks().size() == 3); - REQUIRE(cf2.datablocks().size() == 3); - REQUIRE(cf3.datablocks().size() == 3); - REQUIRE(cf1.datablocks()[2].get_opt("_undotted_key")->as_s() == std::string("why not")); - REQUIRE(cf2.datablocks()[2].get_opt("_undotted_key")->as_s() == std::string("why not")); - REQUIRE(cf3.datablocks()[2].get_opt("_undotted_key")->as_s() == std::string("why not")); - - auto& blocks = cf1.datablocks(); - - REQUIRE(blocks[0].code() == std::string("FOO")); - REQUIRE(blocks[1].code() == std::string("bar")); - REQUIRE(blocks[2].code() == std::string("baz")); - - auto* data = &blocks.front(); - - REQUIRE(data->get_arr("_cat1.key3") != nullptr); - REQUIRE(data->get_arr("_cat1.key3") == data->get_opt("_cat1.key3")); - REQUIRE(data->get_arr("_cat1.key6") == nullptr); - - REQUIRE(data->get_opt("_cat1.key1")->is_missing() == false); - REQUIRE(data->get_opt("_cat1.key2")->is_missing() == false); - REQUIRE(data->get_opt("_cat1.key3")->is_missing()); - REQUIRE(data->get_opt("_cat1.key4")->is_missing()); - REQUIRE(data->get_opt("_cat1.key5")->is_missing() == false); - - REQUIRE(data->get_opt("_cat1.key4")->is_missing_all()); - REQUIRE(data->get_opt("_cat1.key5")->is_missing_all() == false); - - // looped data - - REQUIRE(data->get_opt("_cat2.key1")->is_missing_all() == false); - REQUIRE(data->get_opt("_cat2.key3")->is_missing_all()); - - // template getters - - std::vector vec1{10, 11, 12, 0}; - std::vector vec2{0.1f, 0.2f, 99.f, 0.4f}; - - REQUIRE(data->get_opt("_cat2.key1")->to_vector() == vec1); - REQUIRE(data->get_opt("_cat2.key2")->to_vector(99.f) == vec2); - - REQUIRE(data->get_opt("_cat2.key4")->as(0) == "foo"); - REQUIRE(data->get_opt("_cat2.key4")->as(1) == "TWO WORDS"); - REQUIRE(data->get_opt("_cat2.key4")->as(2) == "multi\nline\nvalue"); - REQUIRE(data->get_opt("_cat2.key4")->as(3) == ""); - - REQUIRE(data->get_opt("_cat2.key4")->as(0) == std::string("foo")); - REQUIRE(data->get_opt("_cat2.key4")->as(3) == nullptr); - - REQUIRE(data->get_opt("_cat2.key4")->to_vector()[0] == std::string("foo")); - REQUIRE(data->get_opt("_cat2.key4")->to_vector()[3] == nullptr); - REQUIRE(data->get_opt("_cat2.key4")->to_vector("ABC")[0] == std::string("foo")); - REQUIRE(data->get_opt("_cat2.key4")->to_vector("ABC")[3] == std::string("ABC")); - - // type deducted from default value - - REQUIRE(data->get_opt("_cat2.key1")->as(0, 99) / 3 == 3); // int - REQUIRE(data->get_opt("_cat2.key1")->as(0, 99) / 3 != Approx(10. / 3.)); // int - REQUIRE(data->get_opt("_cat2.key1")->as(0, 99.) / 3 == Approx(10. / 3.)); // double - REQUIRE(data->get_opt("_cat2.key2")->as(0, 99.) == 0.1); - REQUIRE(data->get_opt("_cat2.key3")->as(0, 99.f) == 99.f); - REQUIRE(data->get_opt("_cat2.key4")->as(0, std::string("type deducted")) == "foo"); - REQUIRE(data->get_opt("_cat2.key4")->as(3, std::string("type deducted")) == "type deducted"); - - // as_X getters - - REQUIRE(data->get_opt("_cat2.key4")->as_s(0, "ABC") == std::string("foo")); - REQUIRE(data->get_opt("_cat2.key4")->as_s(3, "ABC") == std::string("ABC")); // missing - - REQUIRE(data->get_opt("_cat2.key1")->as_i(0, 99) == 10); - REQUIRE(data->get_opt("_cat2.key1")->as_i(1, 99) == 11); - REQUIRE(data->get_opt("_cat2.key1")->as_i(3, 99) == 99); // missing - - REQUIRE(data->get_opt("_cat2.key1")->as_d(0, 99.) == 10.); - REQUIRE(data->get_opt("_cat2.key1")->as_d(1, 99.) == 11.); - REQUIRE(data->get_opt("_cat2.key1")->as_d(3, 99.) == 99.); // missing - - REQUIRE(data->get_opt("_cat2.key2")->as_d(0, 99.) == 0.1); - REQUIRE(data->get_opt("_cat2.key2")->as_d(2, 99.) == 99.f); // missing - REQUIRE(data->get_opt("_cat2.key2")->as_d(3, 99.) == 0.4); - - // out of bounds is default - - REQUIRE(data->get_opt("_cat2.key1")->as_i(50, 99) == 99); - - // alternate names - - REQUIRE(data->get_opt("_cat2.key1", "_other_name")->as_i(0, 99) == 10); - REQUIRE(data->get_opt("_other_name", "_cat2.key1")->as_i(0, 99) == 10); - REQUIRE(data->get_opt("_other_name", "_cat2_key1")->as_i(0, 99) == 99); - - // wildcard lookup - - REQUIRE(data->get_arr("_cat2_key1") == nullptr); - REQUIRE(data->get_opt("_cat2?key1")->as_i(0, 99) == 10); - REQUIRE(blocks[2].get_arr("_undotted.key") == nullptr); - REQUIRE(blocks[2].get_opt("_undotted?key")->as_s() == std::string("why not")); - - // float parsing - - REQUIRE(blocks[2].get_opt("_typed_float1")->as() == Approx(1230.f)); - REQUIRE(blocks[2].get_opt("_typed_float1")->as() == Approx(1230.00000)); - REQUIRE(blocks[2].get_opt("_typed_float2")->as() == Approx(12.3400000)); - REQUIRE(blocks[2].get_opt("_typed_float3")->as() == Approx(1.23456789)); -} - -// vi:sw=2:expandtab +#include "Test.h" + +#include "CifFile.h" + +using namespace pymol::test; + +const char* SAMPLE_CIF_STR = R"""( +data_FOO +_cat1.key1 noquotes +_cat1.key2 "two words" +_cat1.key3 ? # unknown +_cat1.key4 . # inapplicable +_cat1.KEY5 "UPPER CASE key" +loop_ +_cat2.key1 +_cat2.key2 +_cat2.key3 +_cat2.key4 +10 0.1 . foo +11 0.2 ? "TWO WORDS" +12 ? ? +;multi +line +value +; . 0.4 . . +data_bar +data_baz +_undotted_key "why not" +_typed_float1 1.23(45)e3 +_typed_float2 1.234(5)e1 +_typed_float3 1.23456789 +)"""; + +TEST_CASE("misc", "[CifFile]") +{ + // syntax 1 + pymol::cif_file cf1(nullptr, SAMPLE_CIF_STR); + // syntax 2 (requires move constructor) + auto cf2 = pymol::cif_file(nullptr, SAMPLE_CIF_STR); + // move assign + pymol::cif_file cf3; + cf3 = pymol::cif_file(nullptr, SAMPLE_CIF_STR); + + // check all three instances have same data + REQUIRE(cf1.datablocks().size() == 3); + REQUIRE(cf2.datablocks().size() == 3); + REQUIRE(cf3.datablocks().size() == 3); + REQUIRE(cf1.datablocks().find("baz")->second.get_opt("_undotted_key")->as_s() == std::string("why not")); + REQUIRE(cf2.datablocks().find("baz")->second.get_opt("_undotted_key")->as_s() == std::string("why not")); + REQUIRE(cf3.datablocks().find("baz")->second.get_opt("_undotted_key")->as_s() == std::string("why not")); + + auto& blocks = cf1.datablocks(); + + REQUIRE(blocks.find("FOO")->second.code() == std::string("FOO")); + REQUIRE(blocks.find("bar")->second.code() == std::string("bar")); + REQUIRE(blocks.find("baz")->second.code() == std::string("baz")); + + auto* data = &blocks.find("FOO")->second; + + REQUIRE(data->get_arr("_cat1.key3") != nullptr); + REQUIRE(data->get_arr("_cat1.key3") == data->get_opt("_cat1.key3")); + REQUIRE(data->get_arr("_cat1.key6") == nullptr); + + REQUIRE(data->get_opt("_cat1.key1")->is_missing() == false); + REQUIRE(data->get_opt("_cat1.key2")->is_missing() == false); + REQUIRE(data->get_opt("_cat1.key3")->is_missing()); + REQUIRE(data->get_opt("_cat1.key4")->is_missing()); + REQUIRE(data->get_opt("_cat1.key5")->is_missing() == false); + + REQUIRE(data->get_opt("_cat1.key4")->is_missing_all()); + REQUIRE(data->get_opt("_cat1.key5")->is_missing_all() == false); + + // looped data + + REQUIRE(data->get_opt("_cat2.key1")->is_missing_all() == false); + REQUIRE(data->get_opt("_cat2.key3")->is_missing_all()); + + // template getters + + std::vector vec1{10, 11, 12, 0}; + std::vector vec2{0.1f, 0.2f, 99.f, 0.4f}; + + REQUIRE(data->get_opt("_cat2.key1")->to_vector() == vec1); + REQUIRE(data->get_opt("_cat2.key2")->to_vector(99.f) == vec2); + + REQUIRE(data->get_opt("_cat2.key4")->as(0) == "foo"); + REQUIRE(data->get_opt("_cat2.key4")->as(1) == "TWO WORDS"); + REQUIRE(data->get_opt("_cat2.key4")->as(2) == "multi\nline\nvalue"); + REQUIRE(data->get_opt("_cat2.key4")->as(3) == ""); + + REQUIRE(data->get_opt("_cat2.key4")->as(0) == std::string("foo")); + REQUIRE(data->get_opt("_cat2.key4")->as(3) == nullptr); + + REQUIRE(data->get_opt("_cat2.key4")->to_vector()[0] == std::string("foo")); + REQUIRE(data->get_opt("_cat2.key4")->to_vector()[3] == nullptr); + REQUIRE(data->get_opt("_cat2.key4")->to_vector("ABC")[0] == std::string("foo")); + REQUIRE(data->get_opt("_cat2.key4")->to_vector("ABC")[3] == std::string("ABC")); + + // type deducted from default value + + REQUIRE(data->get_opt("_cat2.key1")->as(0, 99) / 3 == 3); // int + REQUIRE(data->get_opt("_cat2.key1")->as(0, 99) / 3 != Approx(10. / 3.)); // int + REQUIRE(data->get_opt("_cat2.key1")->as(0, 99.) / 3 == Approx(10. / 3.)); // double + REQUIRE(data->get_opt("_cat2.key2")->as(0, 99.) == 0.1); + REQUIRE(data->get_opt("_cat2.key3")->as(0, 99.f) == 99.f); + REQUIRE(data->get_opt("_cat2.key4")->as(0, std::string("type deducted")) == "foo"); + REQUIRE(data->get_opt("_cat2.key4")->as(3, std::string("type deducted")) == "type deducted"); + + // as_X getters + + REQUIRE(data->get_opt("_cat2.key4")->as_s(0, "ABC") == std::string("foo")); + REQUIRE(data->get_opt("_cat2.key4")->as_s(3, "ABC") == std::string("ABC")); // missing + + REQUIRE(data->get_opt("_cat2.key1")->as_i(0, 99) == 10); + REQUIRE(data->get_opt("_cat2.key1")->as_i(1, 99) == 11); + REQUIRE(data->get_opt("_cat2.key1")->as_i(3, 99) == 99); // missing + + REQUIRE(data->get_opt("_cat2.key1")->as_d(0, 99.) == 10.); + REQUIRE(data->get_opt("_cat2.key1")->as_d(1, 99.) == 11.); + REQUIRE(data->get_opt("_cat2.key1")->as_d(3, 99.) == 99.); // missing + + REQUIRE(data->get_opt("_cat2.key2")->as_d(0, 99.) == 0.1); + REQUIRE(data->get_opt("_cat2.key2")->as_d(2, 99.) == 99.f); // missing + REQUIRE(data->get_opt("_cat2.key2")->as_d(3, 99.) == 0.4); + + // out of bounds is default + + REQUIRE(data->get_opt("_cat2.key1")->as_i(50, 99) == 99); + + // alternate names + + REQUIRE(data->get_opt("_cat2.key1", "_other_name")->as_i(0, 99) == 10); + REQUIRE(data->get_opt("_other_name", "_cat2.key1")->as_i(0, 99) == 10); + REQUIRE(data->get_opt("_other_name", "_cat2_key1")->as_i(0, 99) == 99); + + // wildcard lookup + + REQUIRE(data->get_arr("_cat2_key1") == nullptr); + REQUIRE(data->get_opt("_cat2?key1")->as_i(0, 99) == 10); + REQUIRE(blocks.find("baz")->second.get_arr("_undotted.key") == nullptr); + REQUIRE(blocks.find("baz")->second.get_opt("_undotted?key")->as_s() == std::string("why not")); + + // float parsing + + REQUIRE(blocks.find("baz")->second.get_opt("_typed_float1")->as() == Approx(1230.f)); + REQUIRE(blocks.find("baz")->second.get_opt("_typed_float1")->as() == Approx(1230.00000)); + REQUIRE(blocks.find("baz")->second.get_opt("_typed_float2")->as() == Approx(12.3400000)); + REQUIRE(blocks.find("baz")->second.get_opt("_typed_float3")->as() == Approx(1.23456789)); +} + +// vi:sw=2:expandtab From 23bef142aefe1086ab591ebc32028fba677c9366 Mon Sep 17 00:00:00 2001 From: Jarrett J Date: Sun, 31 Mar 2024 15:47:26 -0400 Subject: [PATCH 03/15] Slight cleanup --- layer2/CifFile.cpp | 4 ---- layer2/CifFile.h | 5 +---- 2 files changed, 1 insertion(+), 8 deletions(-) diff --git a/layer2/CifFile.cpp b/layer2/CifFile.cpp index 6bfca7a2c..c14aa6e3d 100644 --- a/layer2/CifFile.cpp +++ b/layer2/CifFile.cpp @@ -219,8 +219,6 @@ const cif_array * cif_data::get_arr(const char * key) const { if (columnIt == category.end()) { return nullptr; } - auto arr = &columnIt->second; - auto& arrPtr = std::get(arr->m_array); return &columnIt->second; } @@ -623,7 +621,6 @@ static std::vector run_length_decode( std::vector& data, DataTypes srcType, int srcSize) { std::vector result; - std::int32_t value = 0; for (std::size_t i = 0; i < data.size(); i += 2) { auto item = std::get(data[i]); auto count = std::get(data[i + 1]); @@ -778,7 +775,6 @@ bool cif_file::parse_bcif(const char* bytes, std::size_t size) auto categoryName = categoryMap["name"].as(); std::transform(categoryName.begin(), categoryName.end(), categoryName.begin(), ::tolower); - auto rowCount = categoryMap["rowCount"].as(); auto columnsRaw = categoryMap["columns"].as>(); auto& columns = categoriesData.m_dict[categoryName]; for (const auto& column : columnsRaw) { diff --git a/layer2/CifFile.h b/layer2/CifFile.h index 760da68de..e27d11e13 100644 --- a/layer2/CifFile.h +++ b/layer2/CifFile.h @@ -87,7 +87,6 @@ using CIFData = std::variant; @endverbatim */ class cif_file { -public: // std::vector m_tokens; std::map m_datablocks; std::unique_ptr m_contents; @@ -177,7 +176,6 @@ class cif_array { friend class cif_file; private: -public: // mutable std::string m_internal_str_cache; std::variant m_array; @@ -248,7 +246,7 @@ class cif_array { * @param d default value for unknown/inapplicable elements */ const char* as_s(unsigned pos = 0, const char* d = "") const { - if (auto arr = std::get_if(&m_array)) { + if (std::get_if(&m_array)) { return as(pos, d); } else if (auto arr = std::get_if(&m_array)) { if (pos >= arr->m_arr.size()) @@ -319,7 +317,6 @@ namespace cif_detail { class cif_data { friend class cif_file; -public: // CIFData m_data; // generic default value From 2607fffd2cc8ca3aade9f0d45d06de33dfd5e3c3 Mon Sep 17 00:00:00 2001 From: Jarrett Johnson Date: Mon, 20 May 2024 14:55:18 -0400 Subject: [PATCH 04/15] bcif loadable --- modules/pymol/constants.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/modules/pymol/constants.py b/modules/pymol/constants.py index e17cb49ad..4d7ca8e24 100644 --- a/modules/pymol/constants.py +++ b/modules/pymol/constants.py @@ -59,6 +59,8 @@ class _loadable: dxstr = 75 # DX file (APBS) mapstr = 76 # unspecified CCP4 or MRC map mrcstr = 77 + bcif = 78 + bcifstr = 79 class loadable(_loadable): @classmethod From 5ba5767bd2925a041ef4f10a5b76fbb281f69ad3 Mon Sep 17 00:00:00 2001 From: Jarrett Johnson Date: Mon, 20 May 2024 15:12:46 -0400 Subject: [PATCH 05/15] Load from bcif.gz; add unit test --- layer3/Executive.cpp | 2 ++ modules/pymol/constants.py | 3 ++- modules/pymol/importing.py | 3 +++ testing/data/115d.bcif.gz | Bin 0 -> 15343 bytes testing/tests/api/test_importing.py | 8 ++++++++ 5 files changed, 15 insertions(+), 1 deletion(-) create mode 100644 testing/data/115d.bcif.gz create mode 100644 testing/tests/api/test_importing.py diff --git a/layer3/Executive.cpp b/layer3/Executive.cpp index 8f61a049c..dcbacc726 100644 --- a/layer3/Executive.cpp +++ b/layer3/Executive.cpp @@ -3693,6 +3693,7 @@ ExecutiveLoadPrepareArgs(PyMOLGlobals * G, case cLoadTypeSDF2Str: case cLoadTypeXYZStr: case cLoadTypeDXStr: + case cLoadTypeBCIFStr: if (!content) { return pymol::Error("content is nullptr"); } @@ -3715,6 +3716,7 @@ ExecutiveLoadPrepareArgs(PyMOLGlobals * G, case cLoadTypeSDF2: case cLoadTypeXYZ: case cLoadTypeDXMap: + case cLoadTypeBCIF: if (content) { fname_null_ok = true; break; diff --git a/modules/pymol/constants.py b/modules/pymol/constants.py index 4d7ca8e24..dc19cdebc 100644 --- a/modules/pymol/constants.py +++ b/modules/pymol/constants.py @@ -84,7 +84,8 @@ def _reverse_lookup(cls, number): loadable.map : loadable.mapstr, loadable.dx : loadable.dxstr, loadable.xyz : loadable.xyzstr, - loadable.sdf2 : loadable.sdf2str} + loadable.sdf2 : loadable.sdf2str, + loadable.bcif : loadable.bcifstr} sanitize_alpha_list_re = re.compile(r"[^a-zA-Z0-9_\'\"\.\-\[\]\,]+") nt_hidden_path_re = re.compile(r"\$[\/\\]") diff --git a/modules/pymol/importing.py b/modules/pymol/importing.py index b6abd61cf..da323aa02 100644 --- a/modules/pymol/importing.py +++ b/modules/pymol/importing.py @@ -91,6 +91,9 @@ def filename_to_format(filename): format = 'cif' elif ext in ('bcif',): format = 'bcif' + elif ext in ('bcifgz',): + format = 'bcif' + zipped = 'gz' elif re.match(r'pdb\d+$', ext): format = 'pdb' elif re.match(r'xyz_\d+$', ext): diff --git a/testing/data/115d.bcif.gz b/testing/data/115d.bcif.gz new file mode 100644 index 0000000000000000000000000000000000000000..08a37fcdeb4eb06b8d37e591e8e49853f0516e9b GIT binary patch literal 15343 zcmV^sPkGA2ddpV2=Ku$T8C=AG194cS1Ym%{*T4SyJNx_k)lJAzd=UyC zusrk38Hz4RJkN*(dJrw3e>PX3AksW{EgYH(g|~{NL{2k2wJg5&%0>l}=!5H#$hpK8 zMM|*D3p5me<@xrj>m13#4FEYwsBcm{Q)4;tHRaW-ba5Tu<(2F57t>t%wZC0orn%zH ztR$dkF+m{f+xQ(x?fNV7^&8W53CdU3rFs?mw?z5<3G_~xgdM(A5}~Ah>Wx?BPhCqu zMj|&5Wo4T4Y>EPv!yo>hd~s)E0R?G)|oT%e1_0` zhIpZT27f8~Iub9yGo(n-^clxZ`1y{j<0ioGD3BrqQEh3wsa|Sa=~;dZeyo1;vFfI2 zJDr%$#Iw^^uAm3DV;|^%eZb=nj49~*Jzj4!yK1;%EieMeet>0tEHO77i6lTI91ce= zgu`dU4!}4te$*V}5Ah=0Wo?q8plBl8oQU=|A{~S=%8_$L+cCDFr-8BGbJWv-Sdk$* z_}a;6zsKiOCgZw18Hv>V2%!I7BpixHBjGcMW;_A-e+Nv$*Y+?84Uz9xrsES?L#bAg z09R{d@dM` zMv)$pUzmG`?gIciXc-?Cb|QS8}yDwAZTwze3|k$)G{l5@3}VX{;(8 zF#f|9<6pOj0p>YcqDh7jAOn$E?EnC*7~p0Rij+WCZ3zhcvjYb1F%G;-gV3wm#~2>@*i0Lzhl&E(!7>POn6sw5dwh9A4e>SXz z=_Mzxh3%@qV-aZ6;FpH5Ut|x0AUmt<>T^SN`cR!3pnD(hmQX?kvWSGn|8XT2586{Vm3kZie6X!-#&J$J7{Bu-b0>w4sskR6eo#PC@$1Q zI7wUyO^0%!C6cPJBsWQ14y6T>E29g!P_{xYFGKX^?}RdBf#&!XnyO3^iIDAcf2hOU zAMzY|^vK~O$9CP`v1a=f`?}6c5E?TOHvmOTa@s^=xqs}pf@D7eE?ZTBW=WwQiB35n z3~~s=Y#zkCCec#QM(Tbb`rqEODWM1jSR_`FjuQy65hJ8Ig5-!;FtHRPcuGQ}5(rWv zBB2XJj1ZuR_y;pmD9*z*T9j~pj^In^10+`@rh_xcWGC=v@qLIa`aT8{aiNZs1w$;5 zl0;K53tz6`&jk~(3b`WEv_xDf;*oh2->yMIg0CUFpO}LTy%r*JmED`|hBeXqC3GRB zQd>Md8=MX1f-(6o9*jS&euSYpsN7b$(+j~2Stnv>?bbzF)NV5)kY%j>P443W?tAu6 zUY@hyzMb##H31CoglHSmGLm!s`RiSd94}WH$awPr+7|EJIMD z1ySlkH+|;gy9LXqBkmXNiSsGMO?W)1uYU93bQ3~1WcnDgOY;8SR`M==^@!&Wn#fzP zL(=GTn^Jb@7yWQq2g({M#{b1vda7<~Y(*DAESp;#fmMF*@^m(lIKLFjc|@ zbLq^&bnL7H7VtRwu~@*!RI2q6p4$(E*Ynx@a}KCx@%b#Wr+TiP&t{iK0Mq;53}2YK z6pTc|(Qr6=F&Mr$bsjy42BY$Wi{aqe^JgzPfQ27HKb>?Cf<2y1bR}m0&Q>w|*Iz#C zIoeEPf#;V&6*?@8xL;A<3=@SCS0$J6qoV7fU5nI(~`Db$4eB#{lpLeJCUD#_B4Ku!{i=y?(i3Vkk# z--lkBwLc1Ztiv21^Bg&XqmYLmef0Q=M<$fXKB)x!(9dtb40wM#;q`vu_51(t=$Fk# zta)(KENtxUUYhr{t~%}qe6ndjAdTZfbcu!nNM+*lM&o6lgu{r*?~?}k{kjz@f%Xek zRn!w$DW61t4Jm(8Wiz~!ftDJZ+267XFAqK{-oCQf)9ljszi-q2`D1Srs z`1ka5CjWH$nV9}j9Q{5WoI3TiQ#d+g&e1zK2~lH6unOsN#Ud`*bIYvHJ*T|?))-J* z0gFAKw-Was=!v=~^95I93rH98CBX6wq-qQ_ZsZtz!Cwd2nr*T)WGW#w0!-^D72T7Q z+i-Dj$!N1Rsh`$pF6Mi{yriZFUCf7pd7tI%F4{e!-7x4?+_zPO9$-sUv+94O=Izu= ztpS^!I-A(j%i=>!a1rurb%8GMT#aHNFCkyj0h80n6dQNJbD~rP49zW*<-r4TmE#>S zc3<1EvpmPMHZ*m4yAJKI6YswQKr!q@_%h%*2pA|K+t{9(sQXWyPRu?n|6!Z_bRwpE zkci8F4gl&Y3!px+0t!&%;M>4lpPkLe^09c_0fM`x|>w?mnY(x25y;wc&&hI?u_N1z&E; z?F8&tk8`Ox2aLOQJsNrJj^1#@BHNJfv*fSN0s9FX8c#N*%HHN`#`^O;CQAAuxnrW_ zke=ubb`QDJ0cQBj)28|i-;Km9vN;m5g#C)ObJo=7_wDXV@p>P4n_JIsR3MTx{J@h= zgXCd-8KeE3O1ds%#70ZAXqZsb1)$M!#n5-t31tYK+sSOJ+?$vkhA{5j83u8%PZh|m z{hIcW>KalHCw(J&oJ7D5FnW&6lAxOYazLKo$PrEK1`J(*vDlTJSKOUr1i1GqXMbbU zE?V|_Jq?casJWLe?b$JTmzV=Kwe-?~>h*KM=}c-d6^xz=CX%TaUd(6a6Z1>S;JH)5 z#ne(NJ(tR+=VG~}c0HCzE~T>V-VuK>*ZvmBd$qX574(;JfaE&;)YqRfY2B<3vx{{k z23AfJUO7uF zog-4`(bG%Hvne8Vq4^?uI)^^ufR*sr@ngpWRx81-Z1mW#$Ggf#KY=CgfBfNt2dAz+ zc<{ncJf6c}eDT1%{hZqNNpt#uh)q3cX#2EDignhGeV!-mF-xw-?2mzUHbHi)O(&Y& z)jhbyeZ#?H{p*g^;;NvcXisiI%WPC8HlgV{&7C$1bDqZO-;9j z_m(U>`pnw7c96LpBp<9tbA7cF9j>Y5RDbbA&t(GUOHOi#mTuF`I4hQZ z!r*V6XiDX(Ib4Ig^_I=n6ArbD_Y_`vn zAC8043!2?KD3kx`^`7d|K`}%5@jcmS`F7;x5qSYWGL6tL{_}f%{zLfDSipBku3hxW zm(c${pLaVTKM#xryngNVfM5BfPkDOCe+ZeeUHUT*-C+0Z{NC{p{*sFgSZ^a0Vr4<@vym~oP6i*PS!avTA%+t^nZEC z0rKsNf2$tD-4(3`nM0>6&8>x24_xy$$sP zm876@$~nLr=D`(W8+|n#>pbpvUI=DlFA|CQnVC#1o|{iC zIRJ#mEI_zTDJNQ3@!A1jlEyW(qpTsIqGm7%Osda%YGkrqglKS59k7setuiy=ct=BM zk(8tgTs5N9pUd~}|2r)6{XLETDikPuFr3I+bX4utYkmByp}9}}Hx_)@nbasQ-fI>Y z*g#x0RIF}k*Y+qo?9yhppO2+e8Cjdo+-Uoog*%U!r@86dCUJvi{-DBdsgRw+}z#e^0u1ko{45MYp7tY zo;vRc=<7_-4z*$9`WYWaSMcu7%GB!B_~C)ozUajpXx{bZi~}6Z0oB)Hd@5I$FQyZV zvqUVO%%Nx5GlM*&e&*|Iy2Ae@HCuhnvy#J#IsW$G@ z6`ZeAJlAK-XczN7G2dHHT^IJAVK*Mfya|eQsU&cQ)@V*UEn}Nx3p9>)^x>en4Ve3C zcYRk6d7o~=en5XGlS(ZTx#S{J__16*lXSp_+7q5;uI_d&Jiab%=;19(V`@kXtEzGY zGYZ5$t|72}L%qEJ189;f*EMoSfU)m45<;oPBrD6I@HSBP>3A??*1T~j4dP+L@qy_%)Yo3pUb3jnIWX;yKi5&L7JS-FNQOg7rAjvW`Hwu$5!pc)K zWpB&)yC%ziI1svE2}6tSDYruovTXtZmKaXdjIMt==;b~i^92zKE3gPCfySP#tPaa8 z3Fh6sf;wK(cEnoaH8T*0h}I5Ou%e5Z6=vH=mdG(DlaO40(xf;nX>zMU`sl^U-bb`+AslN4|sDhbeysF>k&uN5Z%1)=9% z;|Bw+`-Bx-Zm*&aOlDBw?vk`b>%*2fhzpi=c5fG~Dk-2BBq+Fxume)a5s(BD*?%rl zW=P%QDXLb*`31nzZpX+@5Rq#xL?#ezDQP_xk}eCGvJlB(e5U^*@)G)WA!-Lv^d50(5~g0qW;U zLmzpW0g0Cz^9d}?L3}EDjq0_W?qGoSi3}RDt0aN1(iD&?2s4s#*~~fv^OU+Ep#XWR z0$Ezb8!*xOEJI1zkM9aHQ{cX708WP)5UY>^6e+qn1~ce>rWAaD8Jz7CVWaP%t2V9E z@5(0Z6)2Dl1CrT1$hbI}<@#E2lG)4!%QWer;)GyYTyP;WfoNL?QOlvrE<%m!7^R%U zj=GJJI!v=esZf{IGJ3Z=w~)o@Wx7~HW<@NK1|^1ckIgj=Q&7AhKt^e&NAa*!0y8}J zoMw7G735&x^>b^y-0d;#9ZW5JxKdojtw{?|ZsbV2h;>8^^O}oTFNld1G`3~3>>}1C zp5J8kS1ZWzb~oKx^Ow%1&49R2qp4-FLUPL>k#XSG&M}E3dhrnSy3)EwHWmq7_<{wEhA=c74?rU-ddwrx;scSYL=`a}F4}RjUQ1 zwpnZQCGo5%A20x0_a?V>*p0XSwDq=J4M9v(fTVg12KjjXHR#`8}9gS(;X#V>ak_W4-Mnq8r3S>8zWcQY!sw-<#P4{wx!dFX~2?beXnT( z^&bv;yOx{y>(@;8TJJ}D{vJ4Um-3d)tNl^Fw<+k0@|F#gqd|FeVBE3~ln;nohRx?@ z+iOPy^Qn>LmZ_1IEoM~@4(IdRGUZ@JJ_mK=IHsBUQRA9s>PJV;wD#juHWeC9{XMwm z-8G4pn8ByO?$4$&iDV|3Ag;u6$qbRrW%6-+$Tcy)M8wcT)!nVlm^SdhyOMb-wqhPRfKX~f@^~>9Jv4g>|G0R8&#I=*WLQ~ zlf;Q_en~bVff$&CjlTKJ?1C+aC_6ziCRt$FS(Ple6B$`Dl9Ekksy3;eg&JmOhXn$m zsG=wmNXEhW5jF#4m=Cp3HC4&ZGEfv*K2y{Vvn+(oKmsI|CGC6NEvdWJQtQ(#z0S1l zcDJN<`@MVbyYJll?z?vpJ8dmp*ze!iw|>n+@0#`N?p(U?R)7DxTh^`Vhwo!Slto2E zDPB6@hfOfCeM|y{ipy{|zJL)e-$&u_>+SPsKm1Gzv^Eg{e&xuGE(J1pf*pZF5wv8A z{EDH?!EiJ@T+1BD_iSQtOLRR~IyKj;JjFCBRgDAw%9pC9ixnK4F~cKarbdx92r6{s z!?`k?l$k8mFH%}!eCPF+*!HY(n_<&J7o(2SXtykqF1sPn3i^ z2@dB-5KhEKV(~Gb8jFPWdac%hdwR7;TFO<>0gxRu2hkLgSBzMVofG*fF5kXDIGWJQFv4g?sI>;TKscD9Fh#lk zSeqss4AKUMa5PXj>b6vjxlJn6lZcwQ=who(>8&9{6*p34|Fl4k^ml=fmhAA_SNHpaxmAlwk0}RMzDIqFH1Y`)I=T|id z$&#T1FUgMiz$PXt3B7qP|5=?1;Bai5i3gZ%<8gTaUDd-TQN#msGeyD)^#erq_b%;Q z>RE~db}p6+DJPGMILQ(@BG^g#9?_wW0%t!|oz%~Vhq=YWg{b9hZWj!wKBrVXk|xWO z&)NbmmkWb#n@UBJ}aDTNE^1$5hwb&czAc@0-gAT7jPcc4po;I=Tr+*WQZ4E{k8+WpECkJebIzZR;S72vs<0G8R-wnC`>T#;%)^2Y7Esx?@m@nST|2?fUl z3}PqD<{Wjcpi4;I<6K!CCb$*;8#ZB%HhUm6qVqFDvE*h9=t~n)`WA@ya4F)@dE%5) z$sNdMwZa4W{kq`6yYK{YS$J?uXG>M{)Mb+uv1eYMJ;%nA(YP%T2@Wxl0PoktgH0v0 zF}C4eW`YJZAyQf*;)Y^l<4p{sd$qiD00CVm`q%aiZtU;v_h7I&j-*JEBk(*(>TDJr zNYfAierMU=#e3=nWJhbD6@{vjT2Qsna*&3{G-@{6X6~DiBybatMl{LkLST;h9X&pq z&+d2o9Db+I?Q{C=>f*Upjqn@=NNbKq{1;?X2JOh4Hf7L3foVz<$H}d)ix?G2Vxu*U z$xSf130cJ=kh-E}3|48#suKcMC6dADa4IjPSK)in~22dO#@d6ajvfmu|&ayx}OHlGq++Hmdry2mo zNz`n%<=i*=DLoc-1I3RHKnQO4IUyW(`<)Pw>ja2{dVsjy0OEMV1tF^pAdWSwc;^Wl zKaLdyxdi-yLfYyGgXr8HcvL`jY`lRcc$`=82J+wuzoDpmYd}a@9SuaC8v{oLRL8~^ zFjZg+9O4>ocso?}cD$Geg$^~$ECUrb1i(AW8S@6Kn$N1@VP&|{^}{QyaGt@dvMSJU zH|c;?hPw%CR%w=(SF3>8J9;FljH+B2~j{NpENX_NCvhB zBU_|{s^o5jQ`E<~|9^Ge+#}_vHAl8Lyzy~OpBWH#c$_yuI~AU3cr1X&UGVs>8L)k9 zNJK1`sgR{zAQm5H;&{lup_T+{X@aYL9ILvjk-;1-nUmk^GmrNu3HSQ2doLsjBycE1 zq0Bp6cVS2}SpFmw4=0&;IH&`XVl^CU%QiI;XGWMf{2L>?m&Wv#=A`;rAU8#a;Vn!c z6p1Amq+^o7Le;;g)X8jekS!O@*6fOIDaycJ)uRq^D@B|WHDFUcOU7Q0s|N!%m#SoN zNVrg@LHD1=p)M0B+j-v8r$OiUG+@z5x%Xs=-X9dKDHV6Vx#i0A8reTpTF6w<*UF6@~(oZ=o@luL{4(loJwb1cjkUjgj|=*`>@8^|KL| za3FBKV56=owQrQYWg28629$b6Q^UM7!dw#m0h{)dzp|Hz0(p+BXood}qI>$Z#}k!D zE6UTeR@1H-^nAA8(+G^BOj44PCuv!GQLdr0v{aZ~DjJ}T)u?#l1cXhiKnYad<656x zYmmMU(jQKQd1%9?`qd5X@r$u7gKny|4w^<)oSYl-6s^&<(G+xp)n1J9=tegZikA~c zBBNzpFmlbjy_ROaTAulNew$uZ&!=O|v8urvgY?aVH+q;h4!Z+xm&*}wyPd9p%k9*h z3nhXt^IV__lgT9SWKz+=_p+9jp4BXI@lpIf0Evq)K9(2`48@|u1^-^B;osA9d*vdnv(8EHiT$z9?;GVm;7ve}6AZ3ZngFtSqG@ zMV7D-s`W-`IvayF!yARilwf$Hnnn5p$hA=K?oCmTPnoxC=R!6SJ5xJ96=f1)JCF>< zH!*eZLY4Gmm6opXnfHs758I1jW@}iks_7$@bV2<#-hA+3QIbuzjrok+r5al+z3Z!< zrl9IcldvbxMZ(d0n0Os!gBt1u+T*(mE2Nb30ZCn#FUh$sgV9Y9rr^80vXt*qE*WjX zcd4l2J5)q6xe!wTW~u1jtf_?x?#&LI-J2bXg8hQW)q^4!;csV;-_hf7_IRWL6m=#k zwrZ55m@3X@kTbTic8#T7U&ZH)$1;KGDCnQRZ0%iBt3T zSqGObHA@m{f*`X5S(odyzNIF4BGUOmMv!Qrq0EvbAjwAhQPtGkxS9_gRi|l?=a(=| zLr0-Th{o<#gF|(52oJAp6)JTSqJ;sEt;Yj#gIjY(DMJDZPzALL;Q&;pAOqh`6It~= zwFV?83kfixYP%v;_aR(>htD>*g4&KGnWWg?^*L`f#{(q9E)Wl}tG^BOs$hCCw^gmIvbh(EOo-X?ZQ4Y39i5R0`c=1qEbLy*J@cyZY|qG`aUhN z57)yvM9*>t5}P(q$Rft{vMt2x=-t(ZmH{na-FqsWx@jr`1-0 zt;#SEt3~Qo!URZofZT4#{91K~Xqye6Gk7dE9EdZC@bH%WQuPy!+hn7-3@t`mDpUv2 z9zbGjAV?E{Xj>VF)@ajl-{3|JKu3PlF)bwNWJ8NQ%S&LM-DE~&15DOmL(v#LQgRxD zN4>%GTmy~6_Uw6wbgT0;&>-;_ph&x+CTxltjqd5}CLZL0= z!Dwi^q(YLRbv4ntx>6T=N5b1MkY&nH6wOlSC1sfZq+JEu zd%Z6711|^c@NoE6c~<&X_}t!=UboHbwtMY9r`KuoI_+MU-|e$|ZC*S4u}iar*12uS zSqtZvC~$rW0mv503IIiufFfz`05nCKSdkz}c$xFGJ;?2LDD!nb4L~}ZCgFGX3IIL? z3*Lk8&d+AxyW}*;%!IGb2=4hBn5My!cfs8J9i0Gtl94{;x;MeHw}g*rh0jS}6uGES zzfcOhp;$x@BD&w_@%If5Xg~HX3KC#JOck;?3)l@f8Fn8|7LXYfv{Lv=qc^hXwk-TK z3%F~VAWGf?GU&E)cVyB3gIChWP@<{!d3LG|#$#i_WH_YFN@Q*NUcR(1zDtpFh-JV8 zU(v+3;9&*;&c8*3P9&K)6GNpuM#8uddbu)K^fdr?D@Uk-9I2NREodtyiFS!y#a$8b z1@UKs>v_U!k#n16L@W+_5tM4RIyWcRMt{X+;U7!j+sOxu@tZJnZ@~i6>1$LCxC%IYG^#{fpKD@)ur%6h!qY%U5X&I7eDO>$qs8&ddNEpLP9yKKT6< zy#U^En!5J=Zn?XSP3P|+*=tk7 z@q10|S)?=kzKB=_&?|~1JPFt|1jK6oH9FcFj^}G4d!Y*fjvW|0jh@R$|XFXvDsYx7%(|cCrO#t{Ub~; zxg}n0aFKGyj|G>F#NuNMk=Bne3pX+Xi|8ON`T3$yT%y6iP%yy+#)IK_QBJMN zd~T|tqPbXEr__`vNg+LzRkRZ!98e*0xj{V;Dn*P6Op}B>N$axcRkRi4x5G*jA@SBc z4vCPc+}=hjGWoJVQjaP}UfE#GI%l1lq*Za!5p9k+5YCsG(aT`NuZEs@z>VruhiM-} zUb7Ax)u9gaaLL@(;-JIw$=u|1*l0N&Cf8uA7SzkNFw1?3wA84frPS3@&n7lAK`8>P zH7)NOna8vfh#ADUcZ}`m+WE?NckGBfATk6XwV7mSvs%5#-l3&f51pNvKC|QZfup+rqY;pYQy`Cp)&f|NXH&!ba{J+7h}aqE^Q2H$G%4C9>?cVEHZ-QdR!> z7r&i-_uSp{JIVjeejy4-m>7@8#+gW@h}<=I<&*!`v@4IE-3@+e`Qqzeoux8vyGU+e zN22f_sMV2+Ki9M)Ep4xV{?x2s>ymS?S?@V3@(d#*Ro)uKA39HPjc4l#Ry8LzBVDob zCc@L9-a!9a+vA$M>yS1F}sziAIA=?P~S`TZkl2RD9}H$cL%xV423kyEujPJtAJEX+Wil5>fuSCURj}qTq$X zLX3Wf6n;)l@vs`zt**ifJ<1bRU%Cp*Ru9DGJqt)vw zOY?A*ZiW=*y6V})=J1F#t4^&eUMfjEG5tMy!?d^$k#Lb4 zyyn&Z3vX-MiH+oJ;yz+|c2{?Qn!KNw3r96NQs>>)$?Hf4Vw0$;}8FL&4Q19wcxgo9_kbg zhUDLe6KXNy)b|%@I~X#Lo^K&;p^moPeNME(wJk6lPK-x_Ax0+YSIuH|c2Lva9I$+R zahyIjwU_F@AnMI_O?s0#t7&hRm|ni{1F*_;?}ax-+af2?#-C6y5>QiNwY{imUuJ^E z>6^)gnNPr9iInbh;b&(wOZvE(6Ew81wnZ-+%ykM!S!S26F)6Vpz?o4%04Ytmp{^x6vWTC~&va6KM zTuqtaTG#N{yqYj}1+jTg^s>QuxtKgIMz4>3)1D8`w6m$KKrQ%*X5T z5q$jJu6*bo7t6=p?asyU@j-p;yPg$%G#?k-r@cX~TtBh0GC>#0Y?VSD=FI#y9cmTIDiETSNyBcpm9cKG@T506$o?COi%tJ;Ni zWYidT%^h}<*-kfpME%Tm7_M?jXFJ_+{gN4Wqk0{7x|!`N8FsUpJKO1o>-ryd`6z1( z?~h>9&<7IXB$LR+R6qHro+|03k$5na3?$>hXksKV7K9DocDL<2@JC7t*?)00zdPVB zPyXHUCyzgNQzehKSNG_3%nw&tSO2i0M?0jCuB~EkearymgcB%J-!4Oao%W+9RzIQ4 zYWpR^YR4tQYKIJ~b=r?2xhgNR@9x|7w;TTXagoR#7d?6_gF3A{KFfAOZ%mf$gnl%K zG>hK>4#A*1a{%TcJ_Sc}%V;QtU?^&p&`c0(KJQQ~sNXg1MY0x=BQI1RD!G z@w)j%q)qYImT`peRfqMb!0uYQ-KEHY1E}Pgcg=?04Yy*DAD1?B9Oxph2VF!rJcjx6 zH^D~XP~M|V8$E-S`s?(=(m1AW?;%~T$rXh?Fa%&&pnY+)Nz zbcQWIL!u%!CE{UTR< zfGuH$DqN^s-(r3(qpgDa7L=821Arn)6F)1y2ApsVc7F=MoN2D$6u1bFR(N?93RZ@s z;E@HL@O1_r@V@pL;7k6_s{ew9uPhdLZ2>9x+X_Bsui3kb-an6NyJi7mgD6mp9})?Ncj@cL_()-Z=Xkdr>^pLmVyCeC7uRPlgp^< zM5`kld6{T3oRo2HYevBG5E*1W{0rPJCe zeNAjsTPJlOG}XtIk0Le_Io))WEm)yxsv~0-sc{w|MYG6$PcWX02DYgcv>RGgDQRSb zc1ty$H>ju^3F4Y;iVlGN)Mdog^egFM@X_=>vd46o{zu}s)Vb^r=mlT@TWb5%huL>C z_ga2T?lR3MoYV}`L!O_0Kb@I+Av@1RlCP&9vdp9wo502Ar(SQn7QCDK-Bf$Flm3_1 zxo4kB&F*@H>Pnx^ET!F3d(!8oelfKT{KM3??jKQezwWi(Z;FFM(=*8B^eHeu3 zr~e`~L40re?J0-FWBOO>O!_$a)6_Rpv$9XnPtx<#OTN6#>TL^xQ{)5G!R+Dby`~RR zZxd;H_^T_aHFVd72(g}gIW;5w$IKyc;G&24X!;(odiqrA)v5N(WacyQYr32IQG1Gf zXL`-lBbk*`Z)WzT)0tV*H;^5KKV_c&9kq_$p8XpWN&J6%S00sBwZ+eLzd=Tsd4eDg zAd!+0nFE#~&S=(JnD#_O#2ivka;m4x<&aidF%&8e=&P4fA`V%mJ&Y6;0VNa!#rG-k z0g{Tq_kH(F?*i4zR-n-X6>+bXIbNAlAbM~3UPRSCyvTZNb(-g2n zyeks<0r)*~;C<;h{bR~lQ~=iVYICaknHY!Omant9Xcfv=bYwee6yxy;l4m`nR&ujg zg@(3%N?$X^&_y&I4##D37CWx2fm4;a&`q(zV5J{FkP&V(+TE>-F21lta1VtLoP=tQR0fcOsB$W$_?B@ z7Ag_&1j97c=!V1j=c*UaL|q)#-z~IU>KcL4X}RcwKVVXw$XaY$#8blHejP^IE*nQE zb8w&-54Yh>B1!9Irfy(*Tp&DYjiH|Gg-vv$ejc{79M$($G9_n`;3}R)qePUNsP$HW zbu2GbKhrcs?MEtI+iKBrOqr26!#TLZGEq*XDfF&9MFYSV>Szol4vt^enM63yIrh1p zOjdZTYuN2jUj0VjZC+Eq%Ju!`-WWeGzLWsqs2Ymn$4x}zyTaO+#@>xNZID>B2Gs9rY0h3FeLg}X>^ zSp{a)J5zt17qAEl-%#wVJ`@=x>4#>3H_&oei~LZgfZ__!{;l$2YcjqD)``O`OU%_> zmnGIMa1WhEJtj?4$Ac^K@Ehof@& zEKUI1_)`9%WjuORX0vVl7&vO!XHB%_8&|WpsSkEh4;vTQg83Sr!tRlJ;+T;9S#zO>_wT1jsZoiClaR07x|NSGsH=*;yJhmTt(4V6F)$7q=8QYH{fh` zkk1eU;dMB#)gM?`F{*%6v(-||UgkpjDK2ulw8^LBMj0%3!42Sb>JGoqtjlN|O}@14 zRwKXwxdXVvNcae722r3yYd2SfIiJ+@`3!mo1cEH;Yn)7$+qSBk6 zFa*9Lio{1w7z`Ab#UT&^{g8nMf(XT=tbZpc$W|0ro4&GMX&x!72WQ`o4ZV@wUvM!$c1l$6MGyRw_JX5158Mk%#AWvV@&s zF~Xar(O8`gDCioqA)fUcEnPeX+Uq~yv$->~vJc1v^ zUN{geMnmN|(}>a%|Fk>L`JC)xD5!H^=r_NyR2O#hdywVZ?cVV|@n>5s*ggmb^`6;O za4Xq8?%MOd>#lT-_}0#;9Nk^vUvh4*_eHyRn-7G(FE)I8QS1YWEh?FB^wnoOdR6vw zo9rFnvc~Od?K;!JzQy^b{@r{=m$l*fz0!}z^jvF2eOhnSmOmYKuJF0082`8%7Y0u+ zD}3@FHP3j?_W0bgBscQ~^YvqGpAHJVq>mcIP|wOsA`_y=w232F_?6fMXsoCN@9 zNgK|1sX`n9vUQ5S0M526dre}C0vKe1#DYL~m9wCGIi)@PiWLsDM z5`RQ(lt;uYuLP8!8Vv%^#NIrQ)R7RDPG{44`X(ENV#FGHO22^E-~slM<0s@++frRL z$&q7eh~}j?Tm4w7I*fNml~y}pPY>&B$T-)d$|y?|G-%SKItY&;NtTV=moC<{K(!X9 z(lN#*WbB0}V6M`L?7@ekNw-s$gO8aJ^;bekv8*CV;0Q^utfCw4W$`T}G00$lgvw@?` zV^`H4)CacMK5#k$=UMA@j>=e2BEI88&>FE>ZdI-9BN_{`*d#hZmqixhSdq`>G7lCg zD>2ga_PC{9x80a0AR49=&?NK(K`a|Mkx`anmV}w@Ie7wR(Ec)(2FgR)XpzICVKCpx zLRhtUO@zQ@_z>C!m%^*un_d>#bQN5wjkew(ova1B7~#us3?2X)(H?oN`J5PqCaP!I z2=*`3WPO*8#5XjbYL)P0MY<8>3_2}ZSU`_6&MDPiUF+PfrB^53IA2pgv<;gxhE_P9 zKb7b?_ul3{VSh7yz18_lnJYOla^S^1eY<{rbmGmOBR!(c%R{@@?yuYK_pvPxzI881 zR#1odhP~z&JP)21K_b~}hD&+f6C$0QHhtf8p|8g!YtT|Lr^VRKlN2-;`IsBa+^?0^ z`xHBjK0JJ~>D+p~O&s*OeQSsFkowd@^-vg`u{s(cP zqMYBF=bO{9xvsl*v{SdAQJ0?`|MNM55D|#m!3Bic5f_c(4zK}t01NH#gaV0ntz9Xo z6NXUQS#q?yozIc>tDOgS%IV-}m)R*zV^Q=S($IYsk#joq{4mk|rzg5|9%y~~2bSl~ ze&)eX{r~-2D0k$z!J{zfS2X(&bAE_9Kg67W=J&Ki20xXF|Fb}q-}YGHj~?q!Gwifx N{tIC}_B?Bm0s!#D$>;z8 literal 0 HcmV?d00001 diff --git a/testing/tests/api/test_importing.py b/testing/tests/api/test_importing.py new file mode 100644 index 000000000..6a34c2c52 --- /dev/null +++ b/testing/tests/api/test_importing.py @@ -0,0 +1,8 @@ +from pymol import cmd +from pymol import test_utils + + +@test_utils.requires_version("3.0") +def test_bcif(): + cmd.load(test_utils.datafile("115d.bcif.gz")) + assert cmd.count_atoms() == 407 From ebd016a04551d9ea16892ca834588656bcf2177f Mon Sep 17 00:00:00 2001 From: Jarrett Johnson Date: Mon, 20 May 2024 16:33:48 -0400 Subject: [PATCH 06/15] fixed mix var --- layer2/CifFile.h | 38 +++++++++++++++++++++++++++----------- 1 file changed, 27 insertions(+), 11 deletions(-) diff --git a/layer2/CifFile.h b/layer2/CifFile.h index 123e34811..db22ed68a 100644 --- a/layer2/CifFile.h +++ b/layer2/CifFile.h @@ -18,6 +18,11 @@ // for pymol::default_free #include "MemoryDebug.h" +template +struct overloaded : Ts... { using Ts::operator()...; }; +template +overloaded(Ts...) -> overloaded; + namespace pymol { namespace _cif_detail { @@ -167,6 +172,26 @@ namespace cif_detail { struct bcif_array { std::vector m_arr{}; }; + + template T var_to_typed(const CifArrayElement& var, const T& d) + { + if constexpr (std::is_same_v) { + auto& str = std::get(var); + return !str.empty() ? str.c_str() : d; + } else { + if (auto ptr = std::get_if(&var); ptr && ptr->empty()) { + return d; + } + if constexpr (!std::is_same_v) { + return std::visit(overloaded{[](const std::string& s) -> T { + return _cif_detail::raw_to_typed( + s.c_str()); + }, + [](const auto& v) -> T { return v; }}, + var); + } + } + } } /** @@ -223,17 +248,8 @@ class cif_array { } else if (auto arr = std::get_if(&m_array)) { if (pos >= arr->m_arr.size()) return d; - if constexpr(std::is_same_v) { - auto& str = std::get(arr->m_arr[pos]); - return !str.empty() ? str.c_str() : d; - } else { - if (auto ptr = std::get_if(&arr->m_arr[pos])) { - if (ptr->empty()) { - return d; - } - } - return std::get(arr->m_arr[pos]); - } + auto& var = arr->m_arr[pos]; + return cif_detail::var_to_typed(var, d); } return d; } From 2be819ca740d6b8a7ff7e758b0a57b33ba177b55 Mon Sep 17 00:00:00 2001 From: Jarrett Johnson Date: Mon, 20 May 2024 16:42:15 -0400 Subject: [PATCH 07/15] slight simplification --- layer2/CifFile.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/layer2/CifFile.h b/layer2/CifFile.h index db22ed68a..793a642b7 100644 --- a/layer2/CifFile.h +++ b/layer2/CifFile.h @@ -270,15 +270,14 @@ class cif_array { if (auto str_ptr = std::get_if(&arr->m_arr[pos])) { return str_ptr->c_str(); } - auto to_string_visitor = [](auto&& arg) -> std::string { + m_internal_str_cache = std::visit([](auto&& arg) -> std::string { if constexpr (std::is_same_v, std::string>) { return arg; } else { return std::to_string(arg); } - }; - m_internal_str_cache = std::visit(to_string_visitor, arr->m_arr[pos]); + }, arr->m_arr[pos]); return m_internal_str_cache.c_str(); } return d; From bfab47edaa36fd00cc8ac96b1dcc9a0c15965865 Mon Sep 17 00:00:00 2001 From: Jarrett Johnson Date: Mon, 20 May 2024 16:49:00 -0400 Subject: [PATCH 08/15] return val --- layer2/CifFile.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/layer2/CifFile.h b/layer2/CifFile.h index 793a642b7..c6414acd8 100644 --- a/layer2/CifFile.h +++ b/layer2/CifFile.h @@ -173,6 +173,13 @@ namespace cif_detail { std::vector m_arr{}; }; + /** + * Returns a typed value from a CIF data element. + * If the element is missing or inapplicable, return `d`. + * @param var CIF data element + * @param d default value + * @return typed value + */ template T var_to_typed(const CifArrayElement& var, const T& d) { if constexpr (std::is_same_v) { @@ -191,6 +198,7 @@ namespace cif_detail { var); } } + return d; } } From b7705c1cbba5255032d9f0c9fe4d2caa9b7f7dd4 Mon Sep 17 00:00:00 2001 From: Jarrett Johnson Date: Mon, 20 May 2024 17:21:16 -0400 Subject: [PATCH 09/15] No msgpack-c --- layer2/CifFile.cpp | 10 ++++++++++ layer2/CifMoleculeReader.cpp | 8 ++++++++ 2 files changed, 18 insertions(+) diff --git a/layer2/CifFile.cpp b/layer2/CifFile.cpp index cda02e296..97fe9c144 100644 --- a/layer2/CifFile.cpp +++ b/layer2/CifFile.cpp @@ -22,7 +22,9 @@ #include "MemoryDebug.h" #include "strcasecmp.h" +#if !defined(_PYMOL_NO_MSGPACKC) #include +#endif namespace pymol { namespace _cif_detail { @@ -479,6 +481,8 @@ bool cif_file::parse(char*&& p) { return true; } + +#if !defined(_PYMOL_NO_MSGPACKC) enum class DataTypes { Int8 = 1, @@ -793,6 +797,12 @@ bool cif_file::parse_bcif(const char* bytes, std::size_t size) } return true; } +#else +bool cif_file::parse_bcif(const char* bytes, std::size_t size) +{ + return false; +} +#endif // !defined(_PYMOL_NO_MSGPACKC) } // namespace pymol diff --git a/layer2/CifMoleculeReader.cpp b/layer2/CifMoleculeReader.cpp index ddbcaea14..360867eaa 100644 --- a/layer2/CifMoleculeReader.cpp +++ b/layer2/CifMoleculeReader.cpp @@ -2359,6 +2359,14 @@ pymol::Result ObjectMoleculeReadBCif(PyMOLGlobals* G, ObjectMolecule* I, const char* bytes, std::size_t size, int frame, int discrete, int quiet, int multiplex, int zoom) { +#ifdef _PYMOL_NO_MSGPACKC + PRINTFB(G, FB_ObjectMolecule, FB_Errors) + " Error: This build has no BinaryCIF support.\n" + " Please install/enable msgpack-c.\n" + ENDFB(G); + return nullptr; +#endif + if (I) { return pymol::Error("loading BCIF into existing object not supported, " "please use 'create' to append to an existing object."); From 62ae4c87586e3fbbc46e5d1b752d5043deb2fe8c Mon Sep 17 00:00:00 2001 From: Jarrett Johnson Date: Mon, 20 May 2024 17:30:11 -0400 Subject: [PATCH 10/15] revert le --- layerCTest/Test_CifFile.cpp | 302 ++++++++++++++++++------------------ 1 file changed, 151 insertions(+), 151 deletions(-) diff --git a/layerCTest/Test_CifFile.cpp b/layerCTest/Test_CifFile.cpp index e173dcafa..77108d287 100644 --- a/layerCTest/Test_CifFile.cpp +++ b/layerCTest/Test_CifFile.cpp @@ -1,151 +1,151 @@ -#include "Test.h" - -#include "CifFile.h" - -using namespace pymol::test; - -const char* SAMPLE_CIF_STR = R"""( -data_FOO -_cat1.key1 noquotes -_cat1.key2 "two words" -_cat1.key3 ? # unknown -_cat1.key4 . # inapplicable -_cat1.KEY5 "UPPER CASE key" -loop_ -_cat2.key1 -_cat2.key2 -_cat2.key3 -_cat2.key4 -10 0.1 . foo -11 0.2 ? "TWO WORDS" -12 ? ? -;multi -line -value -; . 0.4 . . -data_bar -data_baz -_undotted_key "why not" -_typed_float1 1.23(45)e3 -_typed_float2 1.234(5)e1 -_typed_float3 1.23456789 -)"""; - -TEST_CASE("misc", "[CifFile]") -{ - // syntax 1 - pymol::cif_file cf1(nullptr, SAMPLE_CIF_STR); - // syntax 2 (requires move constructor) - auto cf2 = pymol::cif_file(nullptr, SAMPLE_CIF_STR); - // move assign - pymol::cif_file cf3; - cf3 = pymol::cif_file(nullptr, SAMPLE_CIF_STR); - - // check all three instances have same data - REQUIRE(cf1.datablocks().size() == 3); - REQUIRE(cf2.datablocks().size() == 3); - REQUIRE(cf3.datablocks().size() == 3); - REQUIRE(cf1.datablocks().find("baz")->second.get_opt("_undotted_key")->as_s() == std::string("why not")); - REQUIRE(cf2.datablocks().find("baz")->second.get_opt("_undotted_key")->as_s() == std::string("why not")); - REQUIRE(cf3.datablocks().find("baz")->second.get_opt("_undotted_key")->as_s() == std::string("why not")); - - auto& blocks = cf1.datablocks(); - - REQUIRE(blocks.find("FOO")->second.code() == std::string("FOO")); - REQUIRE(blocks.find("bar")->second.code() == std::string("bar")); - REQUIRE(blocks.find("baz")->second.code() == std::string("baz")); - - auto* data = &blocks.find("FOO")->second; - - REQUIRE(data->get_arr("_cat1.key3") != nullptr); - REQUIRE(data->get_arr("_cat1.key3") == data->get_opt("_cat1.key3")); - REQUIRE(data->get_arr("_cat1.key6") == nullptr); - - REQUIRE(data->get_opt("_cat1.key1")->is_missing() == false); - REQUIRE(data->get_opt("_cat1.key2")->is_missing() == false); - REQUIRE(data->get_opt("_cat1.key3")->is_missing()); - REQUIRE(data->get_opt("_cat1.key4")->is_missing()); - REQUIRE(data->get_opt("_cat1.key5")->is_missing() == false); - - REQUIRE(data->get_opt("_cat1.key4")->is_missing_all()); - REQUIRE(data->get_opt("_cat1.key5")->is_missing_all() == false); - - // looped data - - REQUIRE(data->get_opt("_cat2.key1")->is_missing_all() == false); - REQUIRE(data->get_opt("_cat2.key3")->is_missing_all()); - - // template getters - - std::vector vec1{10, 11, 12, 0}; - std::vector vec2{0.1f, 0.2f, 99.f, 0.4f}; - - REQUIRE(data->get_opt("_cat2.key1")->to_vector() == vec1); - REQUIRE(data->get_opt("_cat2.key2")->to_vector(99.f) == vec2); - - REQUIRE(data->get_opt("_cat2.key4")->as(0) == "foo"); - REQUIRE(data->get_opt("_cat2.key4")->as(1) == "TWO WORDS"); - REQUIRE(data->get_opt("_cat2.key4")->as(2) == "multi\nline\nvalue"); - REQUIRE(data->get_opt("_cat2.key4")->as(3) == ""); - - REQUIRE(data->get_opt("_cat2.key4")->as(0) == std::string("foo")); - REQUIRE(data->get_opt("_cat2.key4")->as(3) == nullptr); - - REQUIRE(data->get_opt("_cat2.key4")->to_vector()[0] == std::string("foo")); - REQUIRE(data->get_opt("_cat2.key4")->to_vector()[3] == nullptr); - REQUIRE(data->get_opt("_cat2.key4")->to_vector("ABC")[0] == std::string("foo")); - REQUIRE(data->get_opt("_cat2.key4")->to_vector("ABC")[3] == std::string("ABC")); - - // type deducted from default value - - REQUIRE(data->get_opt("_cat2.key1")->as(0, 99) / 3 == 3); // int - REQUIRE(data->get_opt("_cat2.key1")->as(0, 99) / 3 != Approx(10. / 3.)); // int - REQUIRE(data->get_opt("_cat2.key1")->as(0, 99.) / 3 == Approx(10. / 3.)); // double - REQUIRE(data->get_opt("_cat2.key2")->as(0, 99.) == 0.1); - REQUIRE(data->get_opt("_cat2.key3")->as(0, 99.f) == 99.f); - REQUIRE(data->get_opt("_cat2.key4")->as(0, std::string("type deducted")) == "foo"); - REQUIRE(data->get_opt("_cat2.key4")->as(3, std::string("type deducted")) == "type deducted"); - - // as_X getters - - REQUIRE(data->get_opt("_cat2.key4")->as_s(0, "ABC") == std::string("foo")); - REQUIRE(data->get_opt("_cat2.key4")->as_s(3, "ABC") == std::string("ABC")); // missing - - REQUIRE(data->get_opt("_cat2.key1")->as_i(0, 99) == 10); - REQUIRE(data->get_opt("_cat2.key1")->as_i(1, 99) == 11); - REQUIRE(data->get_opt("_cat2.key1")->as_i(3, 99) == 99); // missing - - REQUIRE(data->get_opt("_cat2.key1")->as_d(0, 99.) == 10.); - REQUIRE(data->get_opt("_cat2.key1")->as_d(1, 99.) == 11.); - REQUIRE(data->get_opt("_cat2.key1")->as_d(3, 99.) == 99.); // missing - - REQUIRE(data->get_opt("_cat2.key2")->as_d(0, 99.) == 0.1); - REQUIRE(data->get_opt("_cat2.key2")->as_d(2, 99.) == 99.f); // missing - REQUIRE(data->get_opt("_cat2.key2")->as_d(3, 99.) == 0.4); - - // out of bounds is default - - REQUIRE(data->get_opt("_cat2.key1")->as_i(50, 99) == 99); - - // alternate names - - REQUIRE(data->get_opt("_cat2.key1", "_other_name")->as_i(0, 99) == 10); - REQUIRE(data->get_opt("_other_name", "_cat2.key1")->as_i(0, 99) == 10); - REQUIRE(data->get_opt("_other_name", "_cat2_key1")->as_i(0, 99) == 99); - - // wildcard lookup - - REQUIRE(data->get_arr("_cat2_key1") == nullptr); - REQUIRE(data->get_opt("_cat2?key1")->as_i(0, 99) == 10); - REQUIRE(blocks.find("baz")->second.get_arr("_undotted.key") == nullptr); - REQUIRE(blocks.find("baz")->second.get_opt("_undotted?key")->as_s() == std::string("why not")); - - // float parsing - - REQUIRE(blocks.find("baz")->second.get_opt("_typed_float1")->as() == Approx(1230.f)); - REQUIRE(blocks.find("baz")->second.get_opt("_typed_float1")->as() == Approx(1230.00000)); - REQUIRE(blocks.find("baz")->second.get_opt("_typed_float2")->as() == Approx(12.3400000)); - REQUIRE(blocks.find("baz")->second.get_opt("_typed_float3")->as() == Approx(1.23456789)); -} - -// vi:sw=2:expandtab +#include "Test.h" + +#include "CifFile.h" + +using namespace pymol::test; + +const char* SAMPLE_CIF_STR = R"""( +data_FOO +_cat1.key1 noquotes +_cat1.key2 "two words" +_cat1.key3 ? # unknown +_cat1.key4 . # inapplicable +_cat1.KEY5 "UPPER CASE key" +loop_ +_cat2.key1 +_cat2.key2 +_cat2.key3 +_cat2.key4 +10 0.1 . foo +11 0.2 ? "TWO WORDS" +12 ? ? +;multi +line +value +; . 0.4 . . +data_bar +data_baz +_undotted_key "why not" +_typed_float1 1.23(45)e3 +_typed_float2 1.234(5)e1 +_typed_float3 1.23456789 +)"""; + +TEST_CASE("misc", "[CifFile]") +{ + // syntax 1 + pymol::cif_file cf1(nullptr, SAMPLE_CIF_STR); + // syntax 2 (requires move constructor) + auto cf2 = pymol::cif_file(nullptr, SAMPLE_CIF_STR); + // move assign + pymol::cif_file cf3; + cf3 = pymol::cif_file(nullptr, SAMPLE_CIF_STR); + + // check all three instances have same data + REQUIRE(cf1.datablocks().size() == 3); + REQUIRE(cf2.datablocks().size() == 3); + REQUIRE(cf3.datablocks().size() == 3); + REQUIRE(cf1.datablocks().find("baz")->second.get_opt("_undotted_key")->as_s() == std::string("why not")); + REQUIRE(cf2.datablocks().find("baz")->second.get_opt("_undotted_key")->as_s() == std::string("why not")); + REQUIRE(cf3.datablocks().find("baz")->second.get_opt("_undotted_key")->as_s() == std::string("why not")); + + auto& blocks = cf1.datablocks(); + + REQUIRE(blocks.find("FOO")->second.code() == std::string("FOO")); + REQUIRE(blocks.find("bar")->second.code() == std::string("bar")); + REQUIRE(blocks.find("baz")->second.code() == std::string("baz")); + + auto* data = &blocks.find("FOO")->second; + + REQUIRE(data->get_arr("_cat1.key3") != nullptr); + REQUIRE(data->get_arr("_cat1.key3") == data->get_opt("_cat1.key3")); + REQUIRE(data->get_arr("_cat1.key6") == nullptr); + + REQUIRE(data->get_opt("_cat1.key1")->is_missing() == false); + REQUIRE(data->get_opt("_cat1.key2")->is_missing() == false); + REQUIRE(data->get_opt("_cat1.key3")->is_missing()); + REQUIRE(data->get_opt("_cat1.key4")->is_missing()); + REQUIRE(data->get_opt("_cat1.key5")->is_missing() == false); + + REQUIRE(data->get_opt("_cat1.key4")->is_missing_all()); + REQUIRE(data->get_opt("_cat1.key5")->is_missing_all() == false); + + // looped data + + REQUIRE(data->get_opt("_cat2.key1")->is_missing_all() == false); + REQUIRE(data->get_opt("_cat2.key3")->is_missing_all()); + + // template getters + + std::vector vec1{10, 11, 12, 0}; + std::vector vec2{0.1f, 0.2f, 99.f, 0.4f}; + + REQUIRE(data->get_opt("_cat2.key1")->to_vector() == vec1); + REQUIRE(data->get_opt("_cat2.key2")->to_vector(99.f) == vec2); + + REQUIRE(data->get_opt("_cat2.key4")->as(0) == "foo"); + REQUIRE(data->get_opt("_cat2.key4")->as(1) == "TWO WORDS"); + REQUIRE(data->get_opt("_cat2.key4")->as(2) == "multi\nline\nvalue"); + REQUIRE(data->get_opt("_cat2.key4")->as(3) == ""); + + REQUIRE(data->get_opt("_cat2.key4")->as(0) == std::string("foo")); + REQUIRE(data->get_opt("_cat2.key4")->as(3) == nullptr); + + REQUIRE(data->get_opt("_cat2.key4")->to_vector()[0] == std::string("foo")); + REQUIRE(data->get_opt("_cat2.key4")->to_vector()[3] == nullptr); + REQUIRE(data->get_opt("_cat2.key4")->to_vector("ABC")[0] == std::string("foo")); + REQUIRE(data->get_opt("_cat2.key4")->to_vector("ABC")[3] == std::string("ABC")); + + // type deducted from default value + + REQUIRE(data->get_opt("_cat2.key1")->as(0, 99) / 3 == 3); // int + REQUIRE(data->get_opt("_cat2.key1")->as(0, 99) / 3 != Approx(10. / 3.)); // int + REQUIRE(data->get_opt("_cat2.key1")->as(0, 99.) / 3 == Approx(10. / 3.)); // double + REQUIRE(data->get_opt("_cat2.key2")->as(0, 99.) == 0.1); + REQUIRE(data->get_opt("_cat2.key3")->as(0, 99.f) == 99.f); + REQUIRE(data->get_opt("_cat2.key4")->as(0, std::string("type deducted")) == "foo"); + REQUIRE(data->get_opt("_cat2.key4")->as(3, std::string("type deducted")) == "type deducted"); + + // as_X getters + + REQUIRE(data->get_opt("_cat2.key4")->as_s(0, "ABC") == std::string("foo")); + REQUIRE(data->get_opt("_cat2.key4")->as_s(3, "ABC") == std::string("ABC")); // missing + + REQUIRE(data->get_opt("_cat2.key1")->as_i(0, 99) == 10); + REQUIRE(data->get_opt("_cat2.key1")->as_i(1, 99) == 11); + REQUIRE(data->get_opt("_cat2.key1")->as_i(3, 99) == 99); // missing + + REQUIRE(data->get_opt("_cat2.key1")->as_d(0, 99.) == 10.); + REQUIRE(data->get_opt("_cat2.key1")->as_d(1, 99.) == 11.); + REQUIRE(data->get_opt("_cat2.key1")->as_d(3, 99.) == 99.); // missing + + REQUIRE(data->get_opt("_cat2.key2")->as_d(0, 99.) == 0.1); + REQUIRE(data->get_opt("_cat2.key2")->as_d(2, 99.) == 99.f); // missing + REQUIRE(data->get_opt("_cat2.key2")->as_d(3, 99.) == 0.4); + + // out of bounds is default + + REQUIRE(data->get_opt("_cat2.key1")->as_i(50, 99) == 99); + + // alternate names + + REQUIRE(data->get_opt("_cat2.key1", "_other_name")->as_i(0, 99) == 10); + REQUIRE(data->get_opt("_other_name", "_cat2.key1")->as_i(0, 99) == 10); + REQUIRE(data->get_opt("_other_name", "_cat2_key1")->as_i(0, 99) == 99); + + // wildcard lookup + + REQUIRE(data->get_arr("_cat2_key1") == nullptr); + REQUIRE(data->get_opt("_cat2?key1")->as_i(0, 99) == 10); + REQUIRE(blocks.find("baz")->second.get_arr("_undotted.key") == nullptr); + REQUIRE(blocks.find("baz")->second.get_opt("_undotted?key")->as_s() == std::string("why not")); + + // float parsing + + REQUIRE(blocks.find("baz")->second.get_opt("_typed_float1")->as() == Approx(1230.f)); + REQUIRE(blocks.find("baz")->second.get_opt("_typed_float1")->as() == Approx(1230.00000)); + REQUIRE(blocks.find("baz")->second.get_opt("_typed_float2")->as() == Approx(12.3400000)); + REQUIRE(blocks.find("baz")->second.get_opt("_typed_float3")->as() == Approx(1.23456789)); +} + +// vi:sw=2:expandtab From 8c02b4c9056658cb599aa2874077f9d298f4e59e Mon Sep 17 00:00:00 2001 From: Jarrett Johnson Date: Mon, 20 May 2024 17:48:18 -0400 Subject: [PATCH 11/15] Some more simplification --- layer2/CifFile.cpp | 48 ++++++++++++++-------------------------------- 1 file changed, 14 insertions(+), 34 deletions(-) diff --git a/layer2/CifFile.cpp b/layer2/CifFile.cpp index 97fe9c144..a0c96fe73 100644 --- a/layer2/CifFile.cpp +++ b/layer2/CifFile.cpp @@ -568,43 +568,28 @@ static std::vector integer_packing_decode( auto as_int = [isUnsigned, byteCount](auto&& elem) -> std::int32_t { if (isUnsigned) { - if (byteCount == 1) { - return static_cast(std::get(elem)); - } else { - return static_cast(std::get(elem)); - } + return byteCount == 1 ? static_cast(std::get(elem)) + : static_cast(std::get(elem)); } else { - if (byteCount == 1) { - return static_cast(std::get(elem)); - } else { - return static_cast(std::get(elem)); - } + return byteCount == 1 ? static_cast(std::get(elem)) + : static_cast(std::get(elem)); } }; auto at_limit = [isUnsigned, upperLimit, lowerLimit](std::int32_t t) -> bool { - if (isUnsigned) { - return t == upperLimit; - } else { - return t == upperLimit || t == lowerLimit; - } + return isUnsigned ? (t == upperLimit) + : (t == upperLimit || t == lowerLimit); }; - int i = 0; - int j = 0; - int n = packedInts.size(); - while (i < n) { + for (int i = 0, j = 0; i < packedInts.size(); ++i, ++j) { std::int32_t value = 0; std::int32_t t = as_int(packedInts[i]); while (at_limit(t)) { value += t; - i++; - t = as_int(packedInts[i]); + t = as_int(packedInts[++i]); } value += t; result[j] = value; - i++; - j++; } return result; } @@ -639,17 +624,12 @@ static std::vector fixed_array_decode( std::vector& data, int factor, DataTypes srcType) { std::vector result = data; - if (srcType == DataTypes::Float32) { - auto div_int32_t = [factor](auto&& a) -> float { - return std::get(a) / static_cast(factor); - }; - std::transform(data.begin(), data.end(), result.begin(), div_int32_t); - } else { - auto div_int32_t = [factor](auto&& a) -> double { - return std::get(a) / static_cast(factor); - }; - std::transform(data.begin(), data.end(), result.begin(), div_int32_t); - } + auto div_int32_t = [factor, srcType](auto&& a) -> auto { + return srcType == DataTypes::Float32 + ? std::get(a) / static_cast(factor) + : std::get(a) / static_cast(factor); + }; + std::transform(data.begin(), data.end(), result.begin(), div_int32_t); return result; } From 377ac12bc78a85be016c099b5f00a3c756f24ce0 Mon Sep 17 00:00:00 2001 From: Jarrett Johnson Date: Mon, 20 May 2024 17:50:53 -0400 Subject: [PATCH 12/15] more simplificastion --- layer2/CifFile.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/layer2/CifFile.cpp b/layer2/CifFile.cpp index a0c96fe73..b53785b95 100644 --- a/layer2/CifFile.cpp +++ b/layer2/CifFile.cpp @@ -722,9 +722,8 @@ static std::vector parse_bcif_decode(const std::vector>& dataEncoding) { std::vector result; - for (auto begin = std::rbegin(dataEncoding), end = std::rend(dataEncoding); - begin != end; ++begin) { - auto& dataEncode = *begin; + for (auto it = std::rbegin(dataEncoding); it != std::rend(dataEncoding); ++it) { + auto& dataEncode = *it; parse_bcif_decode_kind( dataEncode["kind"].as(), rawData, result, dataEncode); } From bfefe074372b07b7ea558506904e4b6913c76ae9 Mon Sep 17 00:00:00 2001 From: Jarrett Johnson Date: Tue, 4 Jun 2024 23:39:52 -0400 Subject: [PATCH 13/15] test for cif_array data --- layer2/CifMoleculeReader.cpp | 13 +++++++++++-- testing/tests/api/test_importing.py | 19 +++++++++++++++++++ 2 files changed, 30 insertions(+), 2 deletions(-) diff --git a/layer2/CifMoleculeReader.cpp b/layer2/CifMoleculeReader.cpp index 360867eaa..5622ea2da 100644 --- a/layer2/CifMoleculeReader.cpp +++ b/layer2/CifMoleculeReader.cpp @@ -2377,16 +2377,25 @@ pymol::Result ObjectMoleculeReadBCif(PyMOLGlobals* G, "use 'split_states' after loading the object."); } - auto cif = std::make_unique(); + auto cif = std::make_shared(); cif->parse_bcif(bytes, size); for (const auto& [code, datablock] : cif->datablocks()) { auto obj = ObjectMoleculeReadCifData(G, &datablock, discrete, quiet); if (!obj) { PRINTFB(G, FB_ObjectMolecule, FB_Warnings) - " mmCIF-Warning: no coordinates found in data_%s\n", datablock.code() ENDFB(G); + " BCIF-Warning: no coordinates found in data_%s\n", datablock.code() ENDFB(G); continue; } + +#ifndef _PYMOL_NOPY + // we only provide access from the Python API so far + if (SettingGet(G, cSetting_cif_keepinmemory)) { + obj->m_cifdata = &datablock; + obj->m_ciffile = cif; + } +#endif + if (cif->datablocks().size() == 1 || multiplex == 0) return obj; } diff --git a/testing/tests/api/test_importing.py b/testing/tests/api/test_importing.py index 6a34c2c52..d9919464e 100644 --- a/testing/tests/api/test_importing.py +++ b/testing/tests/api/test_importing.py @@ -1,8 +1,27 @@ from pymol import cmd from pymol import test_utils +from pymol.querying import cif_get_array @test_utils.requires_version("3.0") def test_bcif(): cmd.load(test_utils.datafile("115d.bcif.gz")) assert cmd.count_atoms() == 407 + +@test_utils.requires_version("3.0") +def test_bcif_array(): + obj_name = "foo" + cmd.set('cif_keepinmemory', 1) + cmd.load(test_utils.datafile("115d.bcif.gz"), object=obj_name) + arr = cif_get_array(obj_name, "_pdbx_database_status.entry_id", "s") + assert arr == ["115D"] + + arr = cif_get_array(obj_name, "_entity_poly.pdbx_strand_id", "s") + assert arr == ["A,B"] + + + arr = cif_get_array(obj_name, "_pdbx_struct_oper_list.name", "s") + assert arr == ["1_555"] + + arr = cif_get_array(obj_name, "_pdbx_struct_assembly.oligomeric_count", "i") + assert arr == [2] \ No newline at end of file From 325e6d180554a47ee814172e8fb78074a291804b Mon Sep 17 00:00:00 2001 From: Jarrett Johnson Date: Tue, 4 Jun 2024 23:45:52 -0400 Subject: [PATCH 14/15] EOF --- testing/tests/api/test_importing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testing/tests/api/test_importing.py b/testing/tests/api/test_importing.py index d9919464e..7788aa8b3 100644 --- a/testing/tests/api/test_importing.py +++ b/testing/tests/api/test_importing.py @@ -24,4 +24,4 @@ def test_bcif_array(): assert arr == ["1_555"] arr = cif_get_array(obj_name, "_pdbx_struct_assembly.oligomeric_count", "i") - assert arr == [2] \ No newline at end of file + assert arr == [2] From 40f071a06084633bd000101dc0e65d4d9b67379f Mon Sep 17 00:00:00 2001 From: Jarrett Johnson Date: Tue, 4 Jun 2024 23:47:13 -0400 Subject: [PATCH 15/15] update --- testing/tests/api/test_importing.py | 1 - 1 file changed, 1 deletion(-) diff --git a/testing/tests/api/test_importing.py b/testing/tests/api/test_importing.py index 7788aa8b3..432eef8dd 100644 --- a/testing/tests/api/test_importing.py +++ b/testing/tests/api/test_importing.py @@ -19,7 +19,6 @@ def test_bcif_array(): arr = cif_get_array(obj_name, "_entity_poly.pdbx_strand_id", "s") assert arr == ["A,B"] - arr = cif_get_array(obj_name, "_pdbx_struct_oper_list.name", "s") assert arr == ["1_555"]