From 37d23cd8715bff066f8b2c8389ef15f18f20b0de Mon Sep 17 00:00:00 2001
From: Jarrett J <jarrett.johnson@schrodinger.com>
Date: Sun, 31 Mar 2024 15:09:01 -0400
Subject: [PATCH 01/15] bcif wip impl

---
 layer2/CifFile.cpp           | 445 ++++++++++++++++++++++++++++++++---
 layer2/CifFile.h             | 186 +++++++++++----
 layer2/CifMoleculeReader.cpp |  39 ++-
 layer2/ObjectMolecule.h      |   3 +
 layer3/Executive.cpp         |   7 +
 layer3/Executive.h           |   2 +
 modules/pymol/importing.py   |  13 +-
 7 files changed, 613 insertions(+), 82 deletions(-)
diff --git a/layer2/CifFile.cpp b/layer2/CifFile.cpp
index 4ef48e5f9..6bfca7a2c 100644
--- a/layer2/CifFile.cpp
+++ b/layer2/CifFile.cpp
@@ -12,14 +12,18 @@
 
 #include <cassert>
 #include <iostream>
+#include <numeric>
 #include <string>
 #include <vector>
+#include <variant>
 
 #include "CifFile.h"
 #include "File.h"
 #include "MemoryDebug.h"
 #include "strcasecmp.h"
 
+#include <msgpack.hpp>
+
 namespace pymol {
 namespace _cif_detail {
 
@@ -125,11 +129,18 @@ const char * cif_loop::get_value_raw(int row, int col) const {
 
 // get the number of elements in this array
 unsigned cif_array::size() const {
-  return (col == NOT_IN_LOOP) ? 1 : pointer.loop->nrows;
+  if (auto arr = std::get_if<cif_detail::cif_str_array>(&m_array)) {
+    return (arr->col == cif_detail::cif_str_array::NOT_IN_LOOP)
+               ? 1
+               : arr->pointer.loop->nrows;
+  } else if (auto arr = std::get_if<cif_detail::bcif_array>(&m_array)) {
+    return arr->m_arr.size();
+  }
+  return 0;
 }
 
 /// Get array value, return NULL if `pos >= size()` or value in ['.', '?']
-const char* cif_array::get_value_raw(unsigned pos) const
+const char* cif_detail::cif_str_array::get_value_raw(unsigned pos) const
 {
   if (col == NOT_IN_LOOP)
     return (pos > 0) ? nullptr : pointer.value;
@@ -157,41 +168,84 @@ bool cif_array::is_missing_all() const {
  * @param key data name, must be lower case
  */
 const cif_array * cif_data::get_arr(const char * key) const {
-  const char* p = strchr(key, '?');
-  decltype(m_dict)::const_iterator it;
+  if (auto data = std::get_if<pymol::cif_detail::cif_str_data>(&m_data)) {
+    const auto& dict = data->m_dict;
+    const char* p = strchr(key, '?');
+    std::remove_reference_t<decltype(dict)>::const_iterator it;
 
 #ifndef NDEBUG
-  for (const char* q = key; *q; ++q) {
-    assert("key must be lower case" && !('Z' >= *q && *q >= 'A'));
-  }
+    for (const char* q = key; *q; ++q) {
+      assert("key must be lower case" && !('Z' >= *q && *q >= 'A'));
+    }
 #endif
 
-  // support alias shortcut: '?' matches '.' and '_'
-  if (p != nullptr) {
-    std::string tmp(key);
-    // replace '?' by '.' or '_'
-    tmp[p - key] = '.';
-    if ((it = m_dict.find(tmp.c_str())) != m_dict.end())
-      return &it->second;
-    tmp[p - key] = '_';
-    if ((it = m_dict.find(tmp.c_str())) != m_dict.end())
-      return &it->second;
-  } else {
-    if ((it = m_dict.find(key)) != m_dict.end())
-      return &it->second;
+    // support alias shortcut: '?' matches '.' and '_'
+    if (p != nullptr) {
+      std::string tmp(key);
+      // replace '?' by '.' or '_'
+      tmp[p - key] = '.';
+      if ((it = dict.find(tmp.c_str())) != dict.end())
+        return &it->second;
+      tmp[p - key] = '_';
+      if ((it = dict.find(tmp.c_str())) != dict.end())
+        return &it->second;
+    } else {
+      if ((it = dict.find(key)) != dict.end())
+        return &it->second;
+    }
+  } else if (auto data = std::get_if<pymol::cif_detail::bcif_data>(&m_data)) {
+
+    const auto& dict = data->m_dict;
+
+    std::string_view keyView(key);
+    auto split_key = [](const char c) {
+      return c == '.' /*|| c == '_'*/ || c == '?';
+    };
+    auto splitTokenIt = std::find_if(keyView.begin(), keyView.end(), split_key);
+    if (splitTokenIt == keyView.end()) {
+      return nullptr;
+    }
+    auto dist = std::distance(keyView.begin(), splitTokenIt);
+    auto categoryView = keyView.substr(0, dist);
+    auto categoryStr = std::string(categoryView);
+    auto categoryIt = dict.find(categoryStr.c_str());
+    if (categoryIt == dict.end()) {
+      return nullptr;
+    }
+    auto& category = categoryIt->second;
+    auto columnView = keyView.substr(dist + 1);
+    auto columnStr = std::string(columnView);
+    auto columnIt = category.find(columnStr.c_str());
+    if (columnIt == category.end()) {
+      return nullptr;
+    }
+    auto arr = &columnIt->second;
+    auto& arrPtr = std::get<cif_detail::bcif_array>(arr->m_array);
+    return &columnIt->second;
   }
 
   return nullptr;
 }
 
+const char* cif_data::code() const
+{
+  if (auto data = std::get_if<pymol::cif_detail::cif_str_data>(&m_data)) {
+    return data->m_code ? data->m_code : "";
+  }
+  return "";
+}
+
 const cif_array* cif_data::empty_array() {
   return &EMPTY_ARRAY;
 }
 
-const cif_data* cif_data::get_saveframe(const char* code) const {
-  auto it = m_saveframes.find(code);
-  if (it != m_saveframes.end())
-    return &it->second;
+const cif_detail::cif_str_data* cif_data::get_saveframe(const char* code) const {
+  if (auto data = std::get_if<pymol::cif_detail::cif_str_data>(&m_data)) {
+    const auto& saveframes = data->m_saveframes;
+    auto it = saveframes.find(code);
+    if (it != saveframes.end())
+      return &it->second;
+  }
   return nullptr;
 }
 
@@ -302,8 +356,8 @@ bool cif_file::parse(char*&& p) {
     }
   }
 
-  cif_data* current_frame = nullptr;
-  std::vector<cif_data*> frame_stack;
+  cif_detail::cif_str_data* current_frame = nullptr;
+  std::vector<cif_detail::cif_str_data*> frame_stack;
   std::unique_ptr<cif_data> global_block;
   decltype(m_datablocks) datablocksnew;
 
@@ -324,7 +378,10 @@ bool cif_file::parse(char*&& p) {
       }
 
       tolowerinplace(tokens[i]);
-      current_frame->m_dict[tokens[i]].set_value(tokens[i + 1]);
+      current_frame->m_dict[tokens[i]].m_array = cif_detail::cif_str_array{};
+      auto& cif_arr = std::get<cif_detail::cif_str_array>(
+          current_frame->m_dict[tokens[i]].m_array);
+      cif_arr.set_value(tokens[i + 1]);
 
       i++;
     } else if (strcasecmp("loop_", tokens[i]) == 0) {
@@ -344,8 +401,10 @@ bool cif_file::parse(char*&& p) {
       // columns
       while (++i < n && keypossible[i] && tokens[i][0] == '_') {
         tolowerinplace(tokens[i]);
-
-        current_frame->m_dict[tokens[i]].set_loop(loop, ncols);
+        current_frame->m_dict[tokens[i]].m_array = cif_detail::cif_str_array{};
+        auto& cif_arr = std::get<cif_detail::cif_str_array>(
+            current_frame->m_dict[tokens[i]].m_array);
+        cif_arr.set_loop(loop, ncols);
 
         ncols++;
       }
@@ -376,15 +435,18 @@ bool cif_file::parse(char*&& p) {
       i--;
 
     } else if (strncasecmp("data_", tokens[i], 5) == 0) {
-      datablocksnew.emplace_back();
-      current_frame = &datablocksnew.back();
+      auto& new_data = datablocksnew[tokens[i] + 5];
+      new_data.m_data = cif_detail::cif_str_data();
+      current_frame = &std::get<cif_detail::cif_str_data>(new_data.m_data);
       current_frame->m_code = tokens[i] + 5;
       frame_stack = {current_frame};
 
     } else if (strncasecmp("global_", tokens[i], 5) == 0) {
       // STAR feature, not supported in CIF
-      current_frame = new cif_data;
-      global_block.reset(current_frame);
+      auto new_data = new cif_data;
+      new_data->m_data = cif_detail::cif_str_data{};
+      current_frame = &std::get<cif_detail::cif_str_data>(new_data->m_data);
+      global_block.reset(new_data);
       frame_stack = {current_frame};
 
     } else if (strncasecmp("save_", tokens[i], 5) == 0) {
@@ -419,6 +481,323 @@ bool cif_file::parse(char*&& p) {
   return true;
 }
 
+enum class DataTypes
+{
+  Int8 = 1,
+  Int16 = 2,
+  Int32 = 3,
+  UInt8 = 4,
+  UInt16 = 5,
+  UInt32 = 6,
+  Float32 = 32,
+  Float64 = 33,
+};
+
+template <typename T>
+void decodeAndPushBack(const std::vector<unsigned char>& bytes, std::size_t& i,
+    std::size_t size, std::vector<CifArrayElement>& result)
+{
+  T value;
+  std::memcpy(&value, &bytes[i], size);
+  result.push_back(value);
+}
+
+static std::vector<CifArrayElement> byte_array_decode(const std::vector<unsigned char>& bytes, DataTypes dataType)
+{
+  std::vector<CifArrayElement> result;
+  std::unordered_map<DataTypes, std::size_t> dataTypeSize = {
+    {DataTypes::Int8, sizeof(std::int8_t)},
+    {DataTypes::Int16, sizeof(std::int16_t)},
+    {DataTypes::Int32, sizeof(std::int32_t)},
+    {DataTypes::UInt8, sizeof(std::uint8_t)},
+    {DataTypes::UInt16, sizeof(std::uint16_t)},
+    {DataTypes::UInt32, sizeof(std::uint32_t)},
+    {DataTypes::Float32, sizeof(float)},
+    {DataTypes::Float64, sizeof(double)},
+  };
+
+  auto size = dataTypeSize[dataType];
+  for (std::size_t i = 0; i < bytes.size(); i += size) {
+    CifArrayElement valueVar;
+    switch (dataType) {
+    case DataTypes::Int8:
+      decodeAndPushBack<std::int8_t>(bytes, i, size, result);
+      break;
+    case DataTypes::Int16:
+      decodeAndPushBack<std::int16_t>(bytes, i, size, result);
+      break;
+    case DataTypes::Int32:
+      decodeAndPushBack<std::int32_t>(bytes, i, size, result);
+      break;
+    case DataTypes::UInt8:
+      decodeAndPushBack<std::uint8_t>(bytes, i, size, result);
+      break;
+    case DataTypes::UInt16:
+      decodeAndPushBack<std::uint16_t>(bytes, i, size, result);
+      break;
+    case DataTypes::UInt32:
+      decodeAndPushBack<std::uint32_t>(bytes, i, size, result);
+      break;
+    case DataTypes::Float32:
+      decodeAndPushBack<float>(bytes, i, size, result);
+      break;
+    case DataTypes::Float64:
+      decodeAndPushBack<double>(bytes, i, size, result);
+      break;
+    }
+  }
+  return result;
+}
+
+static std::vector<CifArrayElement> integer_packing_decode(
+    const std::vector<CifArrayElement>& packedInts, int byteCount, int srcSize,
+    bool isUnsigned)
+{
+  std::vector<CifArrayElement> result(srcSize);
+  std::int32_t upperLimit;
+  if (isUnsigned) {
+    upperLimit = byteCount == 1 ? std::numeric_limits<std::uint8_t>::max()
+                                : std::numeric_limits<std::uint16_t>::max();
+  } else {
+    upperLimit = byteCount == 1 ? std::numeric_limits<std::int8_t>::max()
+                                : std::numeric_limits<std::int16_t>::max();
+  }
+  std::int32_t lowerLimit = -upperLimit - 1;
+
+  auto as_int = [isUnsigned, byteCount](auto&& elem) -> std::int32_t {
+    if (isUnsigned) {
+      if (byteCount == 1) {
+        return static_cast<std::int32_t>(std::get<std::uint8_t>(elem));
+      } else {
+        return static_cast<std::int32_t>(std::get<std::uint16_t>(elem));
+      }
+    } else {
+      if (byteCount == 1) {
+        return static_cast<std::int32_t>(std::get<std::int8_t>(elem));
+      } else {
+        return static_cast<std::int32_t>(std::get<std::int16_t>(elem));
+      }
+    }
+  };
+
+  auto at_limit = [isUnsigned, upperLimit, lowerLimit](std::int32_t t) -> bool {
+    if (isUnsigned) {
+      return t == upperLimit;
+    } else {
+      return t == upperLimit || t == lowerLimit;
+    }
+  };
+
+  int i = 0;
+  int j = 0;
+  int n = packedInts.size();
+  while (i < n) {
+    std::int32_t value = 0;
+    std::int32_t t = as_int(packedInts[i]);
+    while (at_limit(t)) {
+      value += t;
+      i++;
+      t = as_int(packedInts[i]);
+    }
+    value += t;
+    result[j] = value;
+    i++;
+    j++;
+  }
+  return result;
+}
+
+static std::vector<CifArrayElement> delta_decode(
+    std::vector<CifArrayElement>& data, std::int32_t origin, DataTypes srcType)
+{
+  std::vector<CifArrayElement> result = data;
+  result[0] = origin;
+  auto add_int32_t = [](auto&& a, auto&& b) -> std::int32_t {
+    return std::get<std::int32_t>(a) + std::get<std::int32_t>(b);
+  };
+  std::inclusive_scan(result.begin(), result.end(), result.begin(), add_int32_t);
+  return result;
+}
+
+static std::vector<CifArrayElement> run_length_decode(
+    std::vector<CifArrayElement>& data, DataTypes srcType, int srcSize)
+{
+  std::vector<CifArrayElement> result;
+  std::int32_t value = 0;
+  for (std::size_t i = 0; i < data.size(); i += 2) {
+    auto item = std::get<std::int32_t>(data[i]);
+    auto count = std::get<std::int32_t>(data[i + 1]);
+    for (std::int32_t j = 0; j < count; j++) {
+      result.push_back(item);
+    }
+  }
+  return result;
+}
+
+static std::vector<CifArrayElement> fixed_array_decode(
+    std::vector<CifArrayElement>& data, int factor, DataTypes srcType)
+{
+  std::vector<CifArrayElement> result = data;
+  if (srcType == DataTypes::Float32) {
+    auto div_int32_t = [factor](auto&& a) -> float {
+      return std::get<std::int32_t>(a) / static_cast<float>(factor);
+    };
+    std::transform(data.begin(), data.end(), result.begin(), div_int32_t);
+  } else {
+    auto div_int32_t = [factor](auto&& a) -> double {
+      return std::get<std::int32_t>(a) / static_cast<double>(factor);
+    };
+    std::transform(data.begin(), data.end(), result.begin(), div_int32_t);
+  }
+  return result;
+}
+
+static std::vector<CifArrayElement> interval_quant_decode(
+    std::vector<CifArrayElement>& data, double min, double max, int numSteps,
+    DataTypes srcType)
+{
+  std::vector<CifArrayElement> result = data;
+  auto delta = (max - min) / (numSteps - 1);
+  std::transform(data.begin(), data.end(), result.begin(),
+      [min, delta](auto&& a) -> double {
+        return min + std::get<std::int32_t>(a) * delta;
+      });
+  return result;
+}
+
+static std::vector<CifArrayElement> parse_bcif_decode(
+    const std::vector<unsigned char>& rawData,
+    std::vector<std::map<std::string, msgpack::object>>& dataEncoding);
+
+static std::vector<CifArrayElement> string_array_decode(
+    const std::vector<unsigned char>& data,
+    std::vector<std::map<std::string, msgpack::object>>& indicesEncoding,
+    const std::string& stringData, const std::vector<unsigned char>& offsets,
+    std::vector<std::map<std::string, msgpack::object>>& offsetEncoding)
+{
+  auto decodedOffsets = parse_bcif_decode(offsets, offsetEncoding);
+  auto indices = parse_bcif_decode(data, indicesEncoding);
+
+  std::vector<CifArrayElement> result;
+  result.reserve(indices.size());
+
+  std::vector<std::string> strings = {""};
+  strings.reserve(decodedOffsets.size());
+  for (int i = 1; i < decodedOffsets.size(); i++) {
+    auto start = std::get<std::int32_t>(decodedOffsets[i - 1]);
+    auto end = std::get<std::int32_t>(decodedOffsets[i]);
+    auto str = stringData.substr(start, end - start);
+    strings.push_back(str);
+  }
+
+  for (int i = 0; i < indices.size(); i++) {
+    auto index = std::get<std::int32_t>(indices[i]);
+    result.push_back(strings[index + 1]);
+  }
+  return result;
+}
+
+static void parse_bcif_decode_kind(const std::string& kind,
+    const std::vector<unsigned char>& rawData,
+    std::vector<CifArrayElement>& result,
+    std::map<std::string, msgpack::object>& dataEncoding)
+{
+  if (kind == "ByteArray") {
+    auto type = dataEncoding["type"].as<int>();
+    result = byte_array_decode(rawData, static_cast<DataTypes>(type));
+  } else if (kind == "FixedPoint") {
+    auto factor = dataEncoding["factor"].as<int>();
+    auto srcType = dataEncoding["srcType"].as<int>();
+    result = fixed_array_decode(result, factor, static_cast<DataTypes>(srcType));
+  } else if (kind == "IntervalQuantization") {
+    auto min = dataEncoding["min"].as<float>();
+    auto max = dataEncoding["max"].as<float>();
+    auto numSteps = dataEncoding["numSteps"].as<float>();
+    auto srcType = dataEncoding["srcType"].as<int>();
+    result = interval_quant_decode(result, min, max, numSteps, static_cast<DataTypes>(srcType));
+  } else if (kind == "RunLength") {
+    auto srcType = dataEncoding["srcType"].as<int>();
+    auto srcSize = dataEncoding["srcSize"].as<int>();
+    result = run_length_decode(result, static_cast<DataTypes>(srcType), srcSize);
+  } else if (kind == "Delta") {
+    auto origin = dataEncoding["origin"].as<int>();
+    auto srcType = dataEncoding["srcType"].as<int>();
+    result = delta_decode(result, origin, static_cast<DataTypes>(srcType));
+  } else if (kind == "IntegerPacking") {
+    auto byteCount = dataEncoding["byteCount"].as<int>();
+    auto srcSize = dataEncoding["srcSize"].as<int>();
+    auto isUnsigned = dataEncoding["isUnsigned"].as<bool>();
+    result = integer_packing_decode(result, byteCount, srcSize, isUnsigned);
+  } else if (kind == "StringArray") {
+    auto indicesEncoding = dataEncoding["dataEncoding"].as<std::vector<std::map<std::string, msgpack::object>>>();
+    auto stringData = dataEncoding["stringData"].as<std::string>();
+    auto offsets = dataEncoding["offsets"].as<std::vector<unsigned char>>();
+    auto offsetEncoding = dataEncoding["offsetEncoding"].as<std::vector<std::map<std::string, msgpack::object>>>();
+    result = string_array_decode(rawData, indicesEncoding, stringData, offsets, offsetEncoding);
+  }
+}
+
+static std::vector<CifArrayElement> parse_bcif_decode(const std::vector<unsigned char>& rawData,
+    std::vector<std::map<std::string, msgpack::object>>& dataEncoding)
+{
+  std::vector<CifArrayElement> result;
+  for (auto begin = std::rbegin(dataEncoding), end = std::rend(dataEncoding);
+       begin != end; ++begin) {
+    auto& dataEncode = *begin;
+    parse_bcif_decode_kind(
+        dataEncode["kind"].as<std::string>(), rawData, result, dataEncode);
+  }
+  return result;
+}
+
+
+bool cif_file::parse_bcif(const char* bytes, std::size_t size)
+{
+  m_datablocks.clear();
+  m_tokens.clear();
+
+  auto oh = msgpack::unpack(bytes, size);
+  auto msgobj = oh.get();
+  auto dict = msgobj.as<std::map<std::string, msgpack::object>>();
+
+  auto dataBlocksRaw = dict["dataBlocks"].as<std::vector<msgpack::object>>();
+  pymol::cif_detail::bcif_data* currentFrame{};
+  auto& dataDict = m_datablocks;
+  for (const auto& block : dataBlocksRaw) {
+    auto blockMap = block.as<std::map<std::string, msgpack::object>>();
+    auto header = blockMap["header"].as<std::string>();
+    auto categoriesRaw = blockMap["categories"].as<std::vector<msgpack::object>>();
+    auto& new_block = m_datablocks[header];
+    new_block.m_data = pymol::cif_detail::bcif_data{};
+    currentFrame = &std::get<pymol::cif_detail::bcif_data>(new_block.m_data);
+    pymol::cif_data& categories = dataDict[header];
+    categories.m_data = pymol::cif_detail::bcif_data{};
+    auto& categoriesData = std::get<pymol::cif_detail::bcif_data>(categories.m_data);
+    for (const auto& category : categoriesRaw) {
+      auto categoryMap = category.as<std::map<std::string, msgpack::object>>();
+      auto categoryName = categoryMap["name"].as<std::string>();
+      std::transform(categoryName.begin(), categoryName.end(),
+          categoryName.begin(), ::tolower);
+      auto rowCount = categoryMap["rowCount"].as<int>();
+      auto columnsRaw = categoryMap["columns"].as<std::vector<msgpack::object>>();
+      auto& columns = categoriesData.m_dict[categoryName];
+      for (const auto& column : columnsRaw) {
+        auto columnMap = column.as<std::map<std::string, msgpack::object>>();
+        auto columnName = columnMap["name"].as<std::string>();
+        std::transform(columnName.begin(), columnName.end(),
+          columnName.begin(), ::tolower);
+        auto dataRaw = columnMap["data"].as<std::map<std::string, msgpack::object>>();
+        auto dataData = dataRaw["data"].as<std::vector<unsigned char>>();
+        auto dataEncoding = dataRaw["encoding"].as<std::vector<std::map<std::string, msgpack::object>>>();
+        auto vec = parse_bcif_decode(dataData, dataEncoding);
+        columns[columnName] = std::move(vec);
+      }
+    }
+    dataDict[header] = std::move(categories);
+  }
+  return true;
+}
+
 } // namespace pymol
 
 // vi:sw=2:ts=2
diff --git a/layer2/CifFile.h b/layer2/CifFile.h
index cbd79976e..760da68de 100644
--- a/layer2/CifFile.h
+++ b/layer2/CifFile.h
@@ -12,6 +12,8 @@
 #include <map>
 #include <memory>
 #include <vector>
+#include <string>
+#include <variant>
 
 // for pymol::default_free
 #include "MemoryDebug.h"
@@ -44,6 +46,11 @@ template <typename T> T raw_to_typed(const char*);
 class cif_data;
 class cif_loop;
 class cif_array;
+namespace cif_detail {
+  struct cif_str_data;
+  struct bcif_data;
+};
+using CIFData = std::variant<cif_detail::cif_str_data, cif_detail::bcif_data>;
 
 /**
  * Class for reading CIF files.
@@ -57,7 +64,7 @@ class cif_array;
  *
  * Iterate over data blocks:
  * @verbatim
-   for (auto& block : cf.datablocks()) {
+   for (auto& [code, block] : cf.datablocks()) {
      // data_<code>
      const char* code = block->code();
 
@@ -80,8 +87,9 @@ class cif_array;
    @endverbatim
  */
 class cif_file {
+public: //
   std::vector<char*> m_tokens;
-  std::vector<cif_data> m_datablocks;
+  std::map<std::string, cif_data> m_datablocks;
   std::unique_ptr<char, pymol::default_free> m_contents;
 
   /**
@@ -98,6 +106,14 @@ class cif_file {
   /// Parse CIF string
   bool parse_string(const char*);
 
+  /**
+   * Parse BinaryCIF blob
+   * @param bytes BinaryCIF blob
+   * @param size Blob size
+   * @post datablocks() is valid
+  */
+  bool parse_bcif(const char* bytes, std::size_t size);
+
 protected:
   /// Report a parsing error
   virtual void error(const char*);
@@ -114,54 +130,85 @@ class cif_file {
   cif_file(const char* filename, const char* contents = nullptr);
 
   /// Data blocks
-  const std::vector<cif_data>& datablocks() const { return m_datablocks; }
+  const std::map<std::string, cif_data>& datablocks() const { return m_datablocks; }
 };
 
-/**
- * View on a CIF data array. The viewed data is owned by the cif_file
- */
-class cif_array {
-  friend class cif_file;
 
-private:
-  enum { NOT_IN_LOOP = -1 };
+using CifArrayElement = std::variant<std::int8_t, std::int16_t, std::int32_t,
+    std::uint8_t, std::uint16_t, std::uint32_t, float, double, std::string>;
 
-  // column index, -1 if not in loop
-  short col;
+namespace cif_detail {
+  struct cif_str_array {
+    enum { NOT_IN_LOOP = -1 };
 
-  // pointer to either loop or single value
-  union {
-    const cif_loop * loop;
-    const char * value;
-  } pointer;
+    // column index, -1 if not in loop
+    short col;
 
-  // Raw data value or NULL for unknown/inapplicable and `pos >= size()`
-  const char* get_value_raw(unsigned pos = 0) const;
+    // pointer to either loop or single value
+    union {
+      const cif_loop * loop;
+      const char * value;
+    } pointer;
 
-  // point this array to a loop (only for parsing)
-  void set_loop(const cif_loop * loop, short col_) {
-    col = col_;
-    pointer.loop = loop;
-  };
+    // Raw data value or NULL for unknown/inapplicable and `pos >= size()`
+    const char* get_value_raw(unsigned pos = 0) const;
 
-  // point this array to a single value (only for parsing)
-  void set_value(const char * value) {
-    col = NOT_IN_LOOP;
-    pointer.value = value;
+    // point this array to a loop (only for parsing)
+    void set_loop(const cif_loop * loop, short col_) {
+      col = col_;
+      pointer.loop = loop;
+    };
+
+    // point this array to a single value (only for parsing)
+    void set_value(const char * value) {
+      col = NOT_IN_LOOP;
+      pointer.value = value;
+    };
+  };
+  struct bcif_array {
+    std::vector<CifArrayElement> m_arr{};
   };
+}
+
+/**
+ * View on a CIF data array. The viewed data is owned by the cif_file
+ */
+class cif_array {
+  friend class cif_file;
+
+private:
+public: //
+  mutable std::string m_internal_str_cache;
+  std::variant<cif_detail::cif_str_array, cif_detail::bcif_array> m_array;
 
 public:
   // constructor
   cif_array() = default;
 
   // constructor (only needed for EMPTY_ARRAY)
-  cif_array(std::nullptr_t) { set_value(nullptr); }
+  cif_array(std::nullptr_t) { 
+    if (auto arr = std::get_if<cif_detail::cif_str_array>(&m_array)) {
+      arr->set_value(nullptr);
+    } else if (auto arr = std::get_if<cif_detail::bcif_array>(&m_array)) {
+      arr->m_arr.clear();
+    }
+  }
+
+  cif_array(std::vector<CifArrayElement>&& arr) {
+    m_array = cif_detail::bcif_array{std::move(arr)};
+  }
 
   /// Number of elements in this array (= number of rows in loop)
   unsigned size() const;
 
   /// True if value in ['.', '?']
-  bool is_missing(unsigned pos = 0) const { return !get_value_raw(pos); }
+  bool is_missing(unsigned pos = 0) const {
+    if (auto arr = std::get_if<cif_detail::cif_str_array>(&m_array)) {
+      return !arr->get_value_raw(pos);
+    } else {
+      return false;
+    }
+  }
 
   /// True if all values in ['.', '?']
   bool is_missing_all() const;
@@ -172,8 +219,25 @@ class cif_array {
    * @param d default value for unknown/inapplicable elements
    */
   template <typename T> T as(unsigned pos = 0, T d = T()) const {
-    const char* s = get_value_raw(pos);
-    return s ? _cif_detail::raw_to_typed<T>(s) : d;
+    if (auto arr = std::get_if<cif_detail::cif_str_array>(&m_array)) {
+      const char* s = arr->get_value_raw(pos);
+      return s ? _cif_detail::raw_to_typed<T>(s) : d;
+    } else if (auto arr = std::get_if<cif_detail::bcif_array>(&m_array)) {
+      if (pos >= arr->m_arr.size())
+        return d;
+      if constexpr(std::is_same_v<T, const char*>) {
+        auto& str = std::get<std::string>(arr->m_arr[pos]);
+        return !str.empty() ? str.c_str() : d;
+      } else {
+        if (auto ptr = std::get_if<std::string>(&arr->m_arr[pos])) {
+          if (ptr->empty()) {
+            return d;
+          }
+        }
+        return std::get<T>(arr->m_arr[pos]);
+      }
+    }
+    return d;
   }
 
   /**
@@ -184,7 +248,26 @@ class cif_array {
    * @param d default value for unknown/inapplicable elements
    */
   const char* as_s(unsigned pos = 0, const char* d = "") const {
-    return as(pos, d);
+    if (auto arr = std::get_if<cif_detail::cif_str_array>(&m_array)) {
+      return as(pos, d);
+    } else if (auto arr = std::get_if<cif_detail::bcif_array>(&m_array)) {
+      if (pos >= arr->m_arr.size())
+        return d;
+      if (auto str_ptr = std::get_if<std::string>(&arr->m_arr[pos])) {
+        return str_ptr->c_str();
+      }
+      auto to_string_visitor = [](auto&& arg) -> std::string {
+        if constexpr (std::is_same_v<std::decay_t<decltype(arg)>,
+                          std::string>) {
+          return arg;
+        } else {
+          return std::to_string(arg);
+        }
+      };
+      m_internal_str_cache = std::visit(to_string_visitor, arr->m_arr[pos]);
+      return m_internal_str_cache.c_str();
+    }
+    return d;
   }
 
   /// Alias for as<int>()
@@ -210,17 +293,34 @@ class cif_array {
 /**
  * CIF data block. The viewed data is owned by the cif_file.
  */
-class cif_data {
-  friend class cif_file;
 
-  // data_<code>
-  const char* m_code = nullptr;
+namespace cif_detail {
+  struct cif_str_data {
+    // data_<code>
+    const char* m_code = nullptr;
+
+    std::map<_cif_detail::zstring_view, cif_array> m_dict;
+    std::map<std::string, cif_array> m_dict_str;
+    std::map<_cif_detail::zstring_view, cif_detail::cif_str_data> m_saveframes;
+
+    // only needed for freeing
+    std::vector<std::unique_ptr<cif_loop>> m_loops;
+  };
+
+  using ColumnMap = std::map<std::string, std::vector<CifArrayElement>>;
+  using CategoryMap = std::map<std::string, ColumnMap>;
+  using DataBlockMap = std::map<std::string, CategoryMap>;
+  struct bcif_data {
+    std::string m_code;
+    std::map<std::string, std::map<std::string, cif_array>> m_dict;
+  };
+}
 
-  std::map<_cif_detail::zstring_view, cif_array> m_dict;
-  std::map<_cif_detail::zstring_view, cif_data> m_saveframes;
+class cif_data {
+  friend class cif_file;
 
-  // only needed for freeing
-  std::vector<std::unique_ptr<cif_loop>> m_loops;
+public: //
+  CIFData m_data;
 
   // generic default value
   static const cif_array* empty_array();
@@ -234,7 +334,7 @@ class cif_data {
   cif_data& operator=(cif_data&&) = default;
 
   /// Block code (never NULL)
-  const char* code() const { return m_code ? m_code : ""; }
+  const char* code() const;
 
   // Get a pointer to array or NULL if not found
   const cif_array* get_arr(const char* key) const;
@@ -253,7 +353,7 @@ class cif_data {
   }
 
   /// Get a pointer to a save frame or NULL if not found
-  const cif_data* get_saveframe(const char* code) const;
+  const cif_detail::cif_str_data* get_saveframe(const char* code) const;
 };
 
 } // namespace pymol
diff --git a/layer2/CifMoleculeReader.cpp b/layer2/CifMoleculeReader.cpp
index 84194d5a4..d1230339b 100644
--- a/layer2/CifMoleculeReader.cpp
+++ b/layer2/CifMoleculeReader.cpp
@@ -435,7 +435,7 @@ static bond_dict_t * get_global_components_bond_dict(PyMOLGlobals * G) {
       return nullptr;
     }
 
-    for (const auto& datablock : cif.datablocks()) {
+    for (const auto& [code, datablock] : cif.datablocks()) {
       read_chem_comp_bond_dict(&datablock, bond_dict);
     }
   }
@@ -2264,7 +2264,7 @@ pymol::Result<ObjectMolecule*> ObjectMoleculeReadCifStr(PyMOLGlobals * G, Object
     return pymol::make_error("Parsing CIF file failed: ", cif->m_error_msg);
   }
 
-  for (const auto& datablock : cif->datablocks()) {
+  for (const auto& [code, datablock] : cif->datablocks()) {
     ObjectMolecule * obj = ObjectMoleculeReadCifData(G, &datablock, discrete, quiet);
 
     if (!obj) {
@@ -2330,7 +2330,7 @@ const bond_dict_t::mapped_type * bond_dict_t::get(PyMOLGlobals * G, const char *
           return nullptr;
         }
 
-        for (auto& item : cif.datablocks())
+        for (auto& [code, item] : cif.datablocks())
           read_chem_comp_bond_dict(&item, *this);
       }
     }
@@ -2352,4 +2352,37 @@ const bond_dict_t::mapped_type * bond_dict_t::get(PyMOLGlobals * G, const char *
   return nullptr;
 }
 
+
+///////////////////////////////////////
+
+pymol::Result<ObjectMolecule*> ObjectMoleculeReadBCif(PyMOLGlobals* G,
+    ObjectMolecule* I, const char* bytes, std::size_t size, int frame,
+    int discrete, int quiet, int multiplex, int zoom)
+{
+  if (I) {
+    return pymol::Error("loading BCIF into existing object not supported, "
+                        "please use 'create' to append to an existing object.");
+  }
+
+  if (multiplex > 0) {
+    return pymol::Error("loading BCIF with multiplex=1 not supported, please "
+                        "use 'split_states' after loading the object.");
+  }
+
+  auto cif = std::make_unique<pymol::cif_file>();
+  cif->parse_bcif(bytes, size);
+  
+  for (const auto& [code, datablock] : cif->datablocks()) {
+    auto obj = ObjectMoleculeReadCifData(G, &datablock, discrete, quiet);
+    if (!obj) {
+      PRINTFB(G, FB_ObjectMolecule, FB_Warnings)
+        " mmCIF-Warning: no coordinates found in data_%s\n", datablock.code() ENDFB(G);
+      continue;
+    }
+    if (cif->datablocks().size() == 1 || multiplex == 0)
+      return obj;
+  }
+  return nullptr;
+}
+
 // vi:sw=2:ts=2:expandtab
diff --git a/layer2/ObjectMolecule.h b/layer2/ObjectMolecule.h
index 08230c5c8..0a7ecd732 100644
--- a/layer2/ObjectMolecule.h
+++ b/layer2/ObjectMolecule.h
@@ -518,6 +518,9 @@ ObjectMolecule *ObjectMoleculeReadMmtfStr(PyMOLGlobals * G, ObjectMolecule * I,
     const char *st, int st_len, int frame, int discrete, int quiet, int multiplex, int zoom);
 pymol::Result<ObjectMolecule*> ObjectMoleculeReadCifStr(PyMOLGlobals * G, ObjectMolecule * I,
     const char *st, int frame, int discrete, int quiet, int multiplex, int zoom);
+pymol::Result<ObjectMolecule*> ObjectMoleculeReadBCif(PyMOLGlobals* G,
+    ObjectMolecule* I, const char* bytes, std::size_t size, int frame,
+    int discrete, int quiet, int multiplex, int zoom);
 
 std::unique_ptr<int[]> LoadTrajSeleHelper(
     const ObjectMolecule* obj, CoordSet* cs, const char* selection);
diff --git a/layer3/Executive.cpp b/layer3/Executive.cpp
index 4a8371166..b7565abe2 100644
--- a/layer3/Executive.cpp
+++ b/layer3/Executive.cpp
@@ -3883,6 +3883,13 @@ pymol::Result<> ExecutiveLoad(PyMOLGlobals* G, ExecutiveLoadArgs const& args)
     p_return_if_error(res);
     obj = res.result();
   } break;
+  case cLoadTypeBCIF:
+  case cLoadTypeBCIFStr: {
+    auto res = ObjectMoleculeReadBCif(G, static_cast<ObjectMolecule*>(origObj),
+        content, size, state, discrete, quiet, multiplex, zoom);
+    p_return_if_error(res);
+    obj = res.result();
+  } break;
   case cLoadTypeMMTF:
   case cLoadTypeMMTFStr:
     obj = ObjectMoleculeReadMmtfStr(G, (ObjectMolecule *) origObj,
diff --git a/layer3/Executive.h b/layer3/Executive.h
index 4491a47d7..59cf034d0 100644
--- a/layer3/Executive.h
+++ b/layer3/Executive.h
@@ -127,6 +127,8 @@ enum cLoadType_t : int {
 
   cLoadTypeCCP4UnspecifiedStr = 76,
   cLoadTypeMRCStr = 77,
+  cLoadTypeBCIF = 78,
+  cLoadTypeBCIFStr = 79,
 };
 
 /* NOTE: if you add new content/object type above, then be sure to add
diff --git a/modules/pymol/importing.py b/modules/pymol/importing.py
index 318ad6696..b6abd61cf 100644
--- a/modules/pymol/importing.py
+++ b/modules/pymol/importing.py
@@ -89,6 +89,8 @@ def filename_to_format(filename):
             format = 'pdbml'
         elif ext in ('mmcif',):
             format = 'cif'
+        elif ext in ('bcif',):
+            format = 'bcif'
         elif re.match(r'pdb\d+$', ext):
             format = 'pdb'
         elif re.match(r'xyz_\d+$', ext):
@@ -1134,6 +1136,9 @@ def finish_object(name, *, _self=cmd):
             "/data/structures/divided/mmCIF/{mid}/{code}.cif.gz",
             "https://files-versioned.wwpdb.org/pdb_versioned/views/latest/coordinates/mmcif/{mid}/pdb_{code:0>8}/pdb_{code:0>8}_xyz.cif.gz",
         ],
+        "bcif"  : [
+            "https://models.rcsb.org/{code}.{type}.gz",
+        ],
         "2fofc" : "https://www.ebi.ac.uk/pdbe/coordinates/files/{code}.ccp4",
         "fofc" : "https://www.ebi.ac.uk/pdbe/coordinates/files/{code}_diff.ccp4",
         "pubchem": [
@@ -1182,6 +1187,8 @@ def _fetch(code, name, state, finish, discrete, multiplex, zoom, type, path,
             nameFmt = '{type}_{code}.sdf'
         elif type == 'cif':
             pass
+        elif type == 'bcif':
+            pass
         elif type == 'mmtf':
             pass
         elif type == 'cc':
@@ -1300,7 +1307,7 @@ def _multifetch(code,name,state,finish,discrete,multiplex,zoom,type,path,file,qu
                     obj_name = 'emd_' + obj_code
 
             chain = None
-            if (len(obj_code) > 4 and type in ('pdb', 'cif', 'mmtf') and
+            if (len(obj_code) > 4 and type in ('pdb', 'cif', 'mmtf', 'bcif') and
                     # "Extended PDB accession codes" have 8 characters,
                     # try to distinguish by leading non-zero digit
                     '1' <= obj_code[0] <= '9'):
@@ -1344,8 +1351,8 @@ def fetch(code, name='', state=0, finish=1, discrete=-1,
 
     state = the state number into which the file should loaded.
 
-    type = str: cif, pdb, pdb1, 2fofc, fofc, emd, cid, sid {default: cif
-    (default was "pdb" up to 1.7.6)}
+    type = str: cif, bcif, pdb, pdb1, 2fofc, fofc, emd, cid, sid
+    {default: cif (default was "pdb" up to 1.7.6)}
 
     async_ = 0/1: download in the background and do not block the PyMOL
     command line {default: 0 -- changed in PyMOL 2.3}

From 542fbd0ec3cfdf0536acc6c7838ddb983a48d186 Mon Sep 17 00:00:00 2001
From: Jarrett J <jarrett.johnson@schrodinger.com>
Date: Sun, 31 Mar 2024 15:15:03 -0400
Subject: [PATCH 02/15] cif tests

---
 layerCTest/Test_CifFile.cpp | 302 ++++++++++++++++++------------------
 1 file changed, 151 insertions(+), 151 deletions(-)

diff --git a/layerCTest/Test_CifFile.cpp b/layerCTest/Test_CifFile.cpp
index 67a05c8b0..e173dcafa 100644
--- a/layerCTest/Test_CifFile.cpp
+++ b/layerCTest/Test_CifFile.cpp
@@ -1,151 +1,151 @@
-#include "Test.h"
-
-#include "CifFile.h"
-
-using namespace pymol::test;
-
-const char* SAMPLE_CIF_STR = R"""(
-data_FOO
-_cat1.key1 noquotes
-_cat1.key2 "two words"
-_cat1.key3 ? # unknown
-_cat1.key4 . # inapplicable
-_cat1.KEY5 "UPPER CASE key"
-loop_
-_cat2.key1
-_cat2.key2
-_cat2.key3
-_cat2.key4
-10 0.1 . foo
-11 0.2 ? "TWO WORDS"
-12  ?  ?
-;multi
-line
-value
-; . 0.4 . .
-data_bar
-data_baz
-_undotted_key "why not"
-_typed_float1 1.23(45)e3
-_typed_float2 1.234(5)e1
-_typed_float3 1.23456789
-)""";
-
-TEST_CASE("misc", "[CifFile]")
-{
-  // syntax 1
-  pymol::cif_file cf1(nullptr, SAMPLE_CIF_STR);
-  // syntax 2 (requires move constructor)
-  auto cf2 = pymol::cif_file(nullptr, SAMPLE_CIF_STR);
-  // move assign
-  pymol::cif_file cf3;
-  cf3 = pymol::cif_file(nullptr, SAMPLE_CIF_STR);
-
-  // check all three instances have same data
-  REQUIRE(cf1.datablocks().size() == 3);
-  REQUIRE(cf2.datablocks().size() == 3);
-  REQUIRE(cf3.datablocks().size() == 3);
-  REQUIRE(cf1.datablocks()[2].get_opt("_undotted_key")->as_s() == std::string("why not"));
-  REQUIRE(cf2.datablocks()[2].get_opt("_undotted_key")->as_s() == std::string("why not"));
-  REQUIRE(cf3.datablocks()[2].get_opt("_undotted_key")->as_s() == std::string("why not"));
-
-  auto& blocks = cf1.datablocks();
-
-  REQUIRE(blocks[0].code() == std::string("FOO"));
-  REQUIRE(blocks[1].code() == std::string("bar"));
-  REQUIRE(blocks[2].code() == std::string("baz"));
-
-  auto* data = &blocks.front();
-
-  REQUIRE(data->get_arr("_cat1.key3") != nullptr);
-  REQUIRE(data->get_arr("_cat1.key3") == data->get_opt("_cat1.key3"));
-  REQUIRE(data->get_arr("_cat1.key6") == nullptr);
-
-  REQUIRE(data->get_opt("_cat1.key1")->is_missing() == false);
-  REQUIRE(data->get_opt("_cat1.key2")->is_missing() == false);
-  REQUIRE(data->get_opt("_cat1.key3")->is_missing());
-  REQUIRE(data->get_opt("_cat1.key4")->is_missing());
-  REQUIRE(data->get_opt("_cat1.key5")->is_missing() == false);
-
-  REQUIRE(data->get_opt("_cat1.key4")->is_missing_all());
-  REQUIRE(data->get_opt("_cat1.key5")->is_missing_all() == false);
-
-  // looped data
-
-  REQUIRE(data->get_opt("_cat2.key1")->is_missing_all() == false);
-  REQUIRE(data->get_opt("_cat2.key3")->is_missing_all());
-
-  // template getters
-
-  std::vector<int> vec1{10, 11, 12, 0};
-  std::vector<float> vec2{0.1f, 0.2f, 99.f, 0.4f};
-
-  REQUIRE(data->get_opt("_cat2.key1")->to_vector<int>() == vec1);
-  REQUIRE(data->get_opt("_cat2.key2")->to_vector<float>(99.f) == vec2);
-
-  REQUIRE(data->get_opt("_cat2.key4")->as<std::string>(0) == "foo");
-  REQUIRE(data->get_opt("_cat2.key4")->as<std::string>(1) == "TWO WORDS");
-  REQUIRE(data->get_opt("_cat2.key4")->as<std::string>(2) == "multi\nline\nvalue");
-  REQUIRE(data->get_opt("_cat2.key4")->as<std::string>(3) == "");
-
-  REQUIRE(data->get_opt("_cat2.key4")->as<const char*>(0) == std::string("foo"));
-  REQUIRE(data->get_opt("_cat2.key4")->as<const char*>(3) == nullptr);
-
-  REQUIRE(data->get_opt("_cat2.key4")->to_vector<const char*>()[0] == std::string("foo"));
-  REQUIRE(data->get_opt("_cat2.key4")->to_vector<const char*>()[3] == nullptr);
-  REQUIRE(data->get_opt("_cat2.key4")->to_vector<const char*>("ABC")[0] == std::string("foo"));
-  REQUIRE(data->get_opt("_cat2.key4")->to_vector<const char*>("ABC")[3] == std::string("ABC"));
-
-  // type deducted from default value
-
-  REQUIRE(data->get_opt("_cat2.key1")->as(0, 99) / 3 == 3); // int
-  REQUIRE(data->get_opt("_cat2.key1")->as(0, 99) / 3 != Approx(10. / 3.)); // int
-  REQUIRE(data->get_opt("_cat2.key1")->as(0, 99.) / 3 == Approx(10. / 3.)); // double
-  REQUIRE(data->get_opt("_cat2.key2")->as(0, 99.) == 0.1);
-  REQUIRE(data->get_opt("_cat2.key3")->as(0, 99.f) == 99.f);
-  REQUIRE(data->get_opt("_cat2.key4")->as(0, std::string("type deducted")) == "foo");
-  REQUIRE(data->get_opt("_cat2.key4")->as(3, std::string("type deducted")) == "type deducted");
-
-  // as_X getters
-
-  REQUIRE(data->get_opt("_cat2.key4")->as_s(0, "ABC") == std::string("foo"));
-  REQUIRE(data->get_opt("_cat2.key4")->as_s(3, "ABC") == std::string("ABC")); // missing
-
-  REQUIRE(data->get_opt("_cat2.key1")->as_i(0, 99) == 10);
-  REQUIRE(data->get_opt("_cat2.key1")->as_i(1, 99) == 11);
-  REQUIRE(data->get_opt("_cat2.key1")->as_i(3, 99) == 99); // missing
-
-  REQUIRE(data->get_opt("_cat2.key1")->as_d(0, 99.) == 10.);
-  REQUIRE(data->get_opt("_cat2.key1")->as_d(1, 99.) == 11.);
-  REQUIRE(data->get_opt("_cat2.key1")->as_d(3, 99.) == 99.);  // missing
-
-  REQUIRE(data->get_opt("_cat2.key2")->as_d(0, 99.) == 0.1);
-  REQUIRE(data->get_opt("_cat2.key2")->as_d(2, 99.) == 99.f); // missing
-  REQUIRE(data->get_opt("_cat2.key2")->as_d(3, 99.) == 0.4);
-
-  // out of bounds is default
-
-  REQUIRE(data->get_opt("_cat2.key1")->as_i(50, 99) == 99);
-
-  // alternate names
-
-  REQUIRE(data->get_opt("_cat2.key1", "_other_name")->as_i(0, 99) == 10);
-  REQUIRE(data->get_opt("_other_name", "_cat2.key1")->as_i(0, 99) == 10);
-  REQUIRE(data->get_opt("_other_name", "_cat2_key1")->as_i(0, 99) == 99);
-
-  // wildcard lookup
-
-  REQUIRE(data->get_arr("_cat2_key1") == nullptr);
-  REQUIRE(data->get_opt("_cat2?key1")->as_i(0, 99) == 10);
-  REQUIRE(blocks[2].get_arr("_undotted.key") == nullptr);
-  REQUIRE(blocks[2].get_opt("_undotted?key")->as_s() == std::string("why not"));
-
-  // float parsing
-
-  REQUIRE(blocks[2].get_opt("_typed_float1")->as<float>() == Approx(1230.f));
-  REQUIRE(blocks[2].get_opt("_typed_float1")->as<double>() == Approx(1230.00000));
-  REQUIRE(blocks[2].get_opt("_typed_float2")->as<double>() == Approx(12.3400000));
-  REQUIRE(blocks[2].get_opt("_typed_float3")->as<double>() == Approx(1.23456789));
-}
-
-// vi:sw=2:expandtab
+#include "Test.h"
+
+#include "CifFile.h"
+
+using namespace pymol::test;
+
+const char* SAMPLE_CIF_STR = R"""(
+data_FOO
+_cat1.key1 noquotes
+_cat1.key2 "two words"
+_cat1.key3 ? # unknown
+_cat1.key4 . # inapplicable
+_cat1.KEY5 "UPPER CASE key"
+loop_
+_cat2.key1
+_cat2.key2
+_cat2.key3
+_cat2.key4
+10 0.1 . foo
+11 0.2 ? "TWO WORDS"
+12  ?  ?
+;multi
+line
+value
+; . 0.4 . .
+data_bar
+data_baz
+_undotted_key "why not"
+_typed_float1 1.23(45)e3
+_typed_float2 1.234(5)e1
+_typed_float3 1.23456789
+)""";
+
+TEST_CASE("misc", "[CifFile]")
+{
+  // syntax 1
+  pymol::cif_file cf1(nullptr, SAMPLE_CIF_STR);
+  // syntax 2 (requires move constructor)
+  auto cf2 = pymol::cif_file(nullptr, SAMPLE_CIF_STR);
+  // move assign
+  pymol::cif_file cf3;
+  cf3 = pymol::cif_file(nullptr, SAMPLE_CIF_STR);
+
+  // check all three instances have same data
+  REQUIRE(cf1.datablocks().size() == 3);
+  REQUIRE(cf2.datablocks().size() == 3);
+  REQUIRE(cf3.datablocks().size() == 3);
+  REQUIRE(cf1.datablocks().find("baz")->second.get_opt("_undotted_key")->as_s() == std::string("why not"));
+  REQUIRE(cf2.datablocks().find("baz")->second.get_opt("_undotted_key")->as_s() == std::string("why not"));
+  REQUIRE(cf3.datablocks().find("baz")->second.get_opt("_undotted_key")->as_s() == std::string("why not"));
+
+  auto& blocks = cf1.datablocks();
+
+  REQUIRE(blocks.find("FOO")->second.code() == std::string("FOO"));
+  REQUIRE(blocks.find("bar")->second.code() == std::string("bar"));
+  REQUIRE(blocks.find("baz")->second.code() == std::string("baz"));
+
+  auto* data = &blocks.find("FOO")->second;
+
+  REQUIRE(data->get_arr("_cat1.key3") != nullptr);
+  REQUIRE(data->get_arr("_cat1.key3") == data->get_opt("_cat1.key3"));
+  REQUIRE(data->get_arr("_cat1.key6") == nullptr);
+
+  REQUIRE(data->get_opt("_cat1.key1")->is_missing() == false);
+  REQUIRE(data->get_opt("_cat1.key2")->is_missing() == false);
+  REQUIRE(data->get_opt("_cat1.key3")->is_missing());
+  REQUIRE(data->get_opt("_cat1.key4")->is_missing());
+  REQUIRE(data->get_opt("_cat1.key5")->is_missing() == false);
+
+  REQUIRE(data->get_opt("_cat1.key4")->is_missing_all());
+  REQUIRE(data->get_opt("_cat1.key5")->is_missing_all() == false);
+
+  // looped data
+
+  REQUIRE(data->get_opt("_cat2.key1")->is_missing_all() == false);
+  REQUIRE(data->get_opt("_cat2.key3")->is_missing_all());
+
+  // template getters
+
+  std::vector<int> vec1{10, 11, 12, 0};
+  std::vector<float> vec2{0.1f, 0.2f, 99.f, 0.4f};
+
+  REQUIRE(data->get_opt("_cat2.key1")->to_vector<int>() == vec1);
+  REQUIRE(data->get_opt("_cat2.key2")->to_vector<float>(99.f) == vec2);
+
+  REQUIRE(data->get_opt("_cat2.key4")->as<std::string>(0) == "foo");
+  REQUIRE(data->get_opt("_cat2.key4")->as<std::string>(1) == "TWO WORDS");
+  REQUIRE(data->get_opt("_cat2.key4")->as<std::string>(2) == "multi\nline\nvalue");
+  REQUIRE(data->get_opt("_cat2.key4")->as<std::string>(3) == "");
+
+  REQUIRE(data->get_opt("_cat2.key4")->as<const char*>(0) == std::string("foo"));
+  REQUIRE(data->get_opt("_cat2.key4")->as<const char*>(3) == nullptr);
+
+  REQUIRE(data->get_opt("_cat2.key4")->to_vector<const char*>()[0] == std::string("foo"));
+  REQUIRE(data->get_opt("_cat2.key4")->to_vector<const char*>()[3] == nullptr);
+  REQUIRE(data->get_opt("_cat2.key4")->to_vector<const char*>("ABC")[0] == std::string("foo"));
+  REQUIRE(data->get_opt("_cat2.key4")->to_vector<const char*>("ABC")[3] == std::string("ABC"));
+
+  // type deducted from default value
+
+  REQUIRE(data->get_opt("_cat2.key1")->as(0, 99) / 3 == 3); // int
+  REQUIRE(data->get_opt("_cat2.key1")->as(0, 99) / 3 != Approx(10. / 3.)); // int
+  REQUIRE(data->get_opt("_cat2.key1")->as(0, 99.) / 3 == Approx(10. / 3.)); // double
+  REQUIRE(data->get_opt("_cat2.key2")->as(0, 99.) == 0.1);
+  REQUIRE(data->get_opt("_cat2.key3")->as(0, 99.f) == 99.f);
+  REQUIRE(data->get_opt("_cat2.key4")->as(0, std::string("type deducted")) == "foo");
+  REQUIRE(data->get_opt("_cat2.key4")->as(3, std::string("type deducted")) == "type deducted");
+
+  // as_X getters
+
+  REQUIRE(data->get_opt("_cat2.key4")->as_s(0, "ABC") == std::string("foo"));
+  REQUIRE(data->get_opt("_cat2.key4")->as_s(3, "ABC") == std::string("ABC")); // missing
+
+  REQUIRE(data->get_opt("_cat2.key1")->as_i(0, 99) == 10);
+  REQUIRE(data->get_opt("_cat2.key1")->as_i(1, 99) == 11);
+  REQUIRE(data->get_opt("_cat2.key1")->as_i(3, 99) == 99); // missing
+
+  REQUIRE(data->get_opt("_cat2.key1")->as_d(0, 99.) == 10.);
+  REQUIRE(data->get_opt("_cat2.key1")->as_d(1, 99.) == 11.);
+  REQUIRE(data->get_opt("_cat2.key1")->as_d(3, 99.) == 99.);  // missing
+
+  REQUIRE(data->get_opt("_cat2.key2")->as_d(0, 99.) == 0.1);
+  REQUIRE(data->get_opt("_cat2.key2")->as_d(2, 99.) == 99.f); // missing
+  REQUIRE(data->get_opt("_cat2.key2")->as_d(3, 99.) == 0.4);
+
+  // out of bounds is default
+
+  REQUIRE(data->get_opt("_cat2.key1")->as_i(50, 99) == 99);
+
+  // alternate names
+
+  REQUIRE(data->get_opt("_cat2.key1", "_other_name")->as_i(0, 99) == 10);
+  REQUIRE(data->get_opt("_other_name", "_cat2.key1")->as_i(0, 99) == 10);
+  REQUIRE(data->get_opt("_other_name", "_cat2_key1")->as_i(0, 99) == 99);
+
+  // wildcard lookup
+
+  REQUIRE(data->get_arr("_cat2_key1") == nullptr);
+  REQUIRE(data->get_opt("_cat2?key1")->as_i(0, 99) == 10);
+  REQUIRE(blocks.find("baz")->second.get_arr("_undotted.key") == nullptr);
+  REQUIRE(blocks.find("baz")->second.get_opt("_undotted?key")->as_s() == std::string("why not"));
+
+  // float parsing
+
+  REQUIRE(blocks.find("baz")->second.get_opt("_typed_float1")->as<float>() == Approx(1230.f));
+  REQUIRE(blocks.find("baz")->second.get_opt("_typed_float1")->as<double>() == Approx(1230.00000));
+  REQUIRE(blocks.find("baz")->second.get_opt("_typed_float2")->as<double>() == Approx(12.3400000));
+  REQUIRE(blocks.find("baz")->second.get_opt("_typed_float3")->as<double>() == Approx(1.23456789));
+}
+
+// vi:sw=2:expandtab

From 23bef142aefe1086ab591ebc32028fba677c9366 Mon Sep 17 00:00:00 2001
From: Jarrett J <jarrett.johnson@schrodinger.com>
Date: Sun, 31 Mar 2024 15:47:26 -0400
Subject: [PATCH 03/15] Slight cleanup

---
 layer2/CifFile.cpp | 4 ----
 layer2/CifFile.h   | 5 +----
 2 files changed, 1 insertion(+), 8 deletions(-)

diff --git a/layer2/CifFile.cpp b/layer2/CifFile.cpp
index 6bfca7a2c..c14aa6e3d 100644
--- a/layer2/CifFile.cpp
+++ b/layer2/CifFile.cpp
@@ -219,8 +219,6 @@ const cif_array * cif_data::get_arr(const char * key) const {
     if (columnIt == category.end()) {
       return nullptr;
     }
-    auto arr = &columnIt->second;
-    auto& arrPtr = std::get<cif_detail::bcif_array>(arr->m_array);
     return &columnIt->second;
   }
 
@@ -623,7 +621,6 @@ static std::vector<CifArrayElement> run_length_decode(
     std::vector<CifArrayElement>& data, DataTypes srcType, int srcSize)
 {
   std::vector<CifArrayElement> result;
-  std::int32_t value = 0;
   for (std::size_t i = 0; i < data.size(); i += 2) {
     auto item = std::get<std::int32_t>(data[i]);
     auto count = std::get<std::int32_t>(data[i + 1]);
@@ -778,7 +775,6 @@ bool cif_file::parse_bcif(const char* bytes, std::size_t size)
       auto categoryName = categoryMap["name"].as<std::string>();
       std::transform(categoryName.begin(), categoryName.end(),
           categoryName.begin(), ::tolower);
-      auto rowCount = categoryMap["rowCount"].as<int>();
       auto columnsRaw = categoryMap["columns"].as<std::vector<msgpack::object>>();
       auto& columns = categoriesData.m_dict[categoryName];
       for (const auto& column : columnsRaw) {
diff --git a/layer2/CifFile.h b/layer2/CifFile.h
index 760da68de..e27d11e13 100644
--- a/layer2/CifFile.h
+++ b/layer2/CifFile.h
@@ -87,7 +87,6 @@ using CIFData = std::variant<cif_detail::cif_str_data, cif_detail::bcif_data>;
    @endverbatim
  */
 class cif_file {
-public: //
   std::vector<char*> m_tokens;
   std::map<std::string, cif_data> m_datablocks;
   std::unique_ptr<char, pymol::default_free> m_contents;
@@ -177,7 +176,6 @@ class cif_array {
   friend class cif_file;
 
 private:
-public: //
   mutable std::string m_internal_str_cache;
   std::variant<cif_detail::cif_str_array, cif_detail::bcif_array> m_array;
 
@@ -248,7 +246,7 @@ class cif_array {
    * @param d default value for unknown/inapplicable elements
    */
   const char* as_s(unsigned pos = 0, const char* d = "") const {
-    if (auto arr = std::get_if<cif_detail::cif_str_array>(&m_array)) {
+    if (std::get_if<cif_detail::cif_str_array>(&m_array)) {
       return as(pos, d);
     } else if (auto arr = std::get_if<cif_detail::bcif_array>(&m_array)) {
       if (pos >= arr->m_arr.size())
@@ -319,7 +317,6 @@ namespace cif_detail {
 class cif_data {
   friend class cif_file;
 
-public: //
   CIFData m_data;
 
   // generic default value

From 2607fffd2cc8ca3aade9f0d45d06de33dfd5e3c3 Mon Sep 17 00:00:00 2001
From: Jarrett Johnson <jarrett.johnson@schrodinger.com>
Date: Mon, 20 May 2024 14:55:18 -0400
Subject: [PATCH 04/15] bcif loadable

---
 modules/pymol/constants.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/modules/pymol/constants.py b/modules/pymol/constants.py
index e17cb49ad..4d7ca8e24 100644
--- a/modules/pymol/constants.py
+++ b/modules/pymol/constants.py
@@ -59,6 +59,8 @@ class _loadable:
     dxstr = 75    # DX file (APBS)
     mapstr = 76   # unspecified CCP4 or MRC map
     mrcstr = 77
+    bcif = 78
+    bcifstr = 79
 
 class loadable(_loadable):
     @classmethod

From 5ba5767bd2925a041ef4f10a5b76fbb281f69ad3 Mon Sep 17 00:00:00 2001
From: Jarrett Johnson <jarrett.johnson@schrodinger.com>
Date: Mon, 20 May 2024 15:12:46 -0400
Subject: [PATCH 05/15] Load from bcif.gz; add unit test

---
 layer3/Executive.cpp                |   2 ++
 modules/pymol/constants.py          |   3 ++-
 modules/pymol/importing.py          |   3 +++
 testing/data/115d.bcif.gz           | Bin 0 -> 15343 bytes
 testing/tests/api/test_importing.py |   8 ++++++++
 5 files changed, 15 insertions(+), 1 deletion(-)
 create mode 100644 testing/data/115d.bcif.gz
 create mode 100644 testing/tests/api/test_importing.py

diff --git a/layer3/Executive.cpp b/layer3/Executive.cpp
index 8f61a049c..dcbacc726 100644
--- a/layer3/Executive.cpp
+++ b/layer3/Executive.cpp
@@ -3693,6 +3693,7 @@ ExecutiveLoadPrepareArgs(PyMOLGlobals * G,
   case cLoadTypeSDF2Str:
   case cLoadTypeXYZStr:
   case cLoadTypeDXStr:
+  case cLoadTypeBCIFStr:
     if (!content) {
       return pymol::Error("content is nullptr");
     }
@@ -3715,6 +3716,7 @@ ExecutiveLoadPrepareArgs(PyMOLGlobals * G,
   case cLoadTypeSDF2:
   case cLoadTypeXYZ:
   case cLoadTypeDXMap:
+  case cLoadTypeBCIF:
     if (content) {
       fname_null_ok = true;
       break;
diff --git a/modules/pymol/constants.py b/modules/pymol/constants.py
index 4d7ca8e24..dc19cdebc 100644
--- a/modules/pymol/constants.py
+++ b/modules/pymol/constants.py
@@ -84,7 +84,8 @@ def _reverse_lookup(cls, number):
               loadable.map : loadable.mapstr,
               loadable.dx : loadable.dxstr,
               loadable.xyz  : loadable.xyzstr,
-              loadable.sdf2 : loadable.sdf2str}
+              loadable.sdf2 : loadable.sdf2str,
+              loadable.bcif : loadable.bcifstr}
 
 sanitize_alpha_list_re = re.compile(r"[^a-zA-Z0-9_\'\"\.\-\[\]\,]+")
 nt_hidden_path_re = re.compile(r"\$[\/\\]")
diff --git a/modules/pymol/importing.py b/modules/pymol/importing.py
index b6abd61cf..da323aa02 100644
--- a/modules/pymol/importing.py
+++ b/modules/pymol/importing.py
@@ -91,6 +91,9 @@ def filename_to_format(filename):
             format = 'cif'
         elif ext in ('bcif',):
             format = 'bcif'
+        elif ext in ('bcifgz',):
+            format = 'bcif'
+            zipped = 'gz'
         elif re.match(r'pdb\d+$', ext):
             format = 'pdb'
         elif re.match(r'xyz_\d+$', ext):
diff --git a/testing/data/115d.bcif.gz b/testing/data/115d.bcif.gz
new file mode 100644
index 0000000000000000000000000000000000000000..08a37fcdeb4eb06b8d37e591e8e49853f0516e9b
GIT binary patch
literal 15343
zcmV<LI}pSliwFoCWxZtp12Hi*WG-T3X=VWIUD<ORM|#Iaik4(a_Bx4Gl}gQeH<9cR
z5CH1jWK#wK@Q_IY!C~2(a&-*`4PcgYz%v7q$V(D?lRW#_%e6UHrbvmRDA|^6+2!@7
zHk-HDm51a(sr>^sPkGA2ddpV2=Ku$T8C=AG194cS1Ym%{*T4SyJNx_k)lJAzd=UyC
zusrk38Hz4RJkN*(dJrw3e>PX3AksW{EgYH(g|~{NL{2k2wJg5&%0>l}=!5H#$hpK8
zMM|*D3p5me<@xrj>m13#4FEYwsBcm{Q)4;tHRaW-ba5Tu<(2F57t>t%wZC0orn%zH
ztR$dkF+m{f+xQ(x?fNV7^&8W53CdU3rFs?mw?z5<3G_~xgdM(A5}~Ah>Wx?BPhCqu
zMj|&5Wo4T4Y>EPv!yo>hd~s)<LsUZ{O;YG-`9=YKL!7U1l6Q+1^PEVRIavHx{GlxU
zm(XWQM_^O=Uh$VcPe2~oF9RO*v(A#@^1oac_*LbO%IJ3hSu4^Kpm?st3#^9C8_Lxi
zMVgWTel&21c32k8Ca4w*Ya#S44gl*|3s?#W+cF3$+Zk|pQ=xNfhF*ikG*5HVMv0^(
zUii%aYmMBmn~deN-+5$Dqw$)LMj3&(iTqnK`DlrdQY-y?pU3;>E0R?G)|oT%e1_0`
zhIpZT27f8~Iub9yGo(n-^clxZ`1y{j<0ioGD3BrqQEh3wsa|Sa=~;dZeyo1;vFfI2
zJDr%$#Iw^^uAm3DV;|^%eZb=nj49~*Jzj4!yK1;%EieMeet>0tEHO77i6lTI91ce=
zgu`dU4!}4te$*V}5Ah=0Wo?q8plBl8oQU=|A{~S=%8_$L+cCDFr-8BGbJWv-Sdk$*
z_}a;6zsKiOCgZw18Hv>V2%!I7BpixHBjGcMW;_A-e+Nv$*Y+?84Uz9xrsES?L#bAg
z09R<dFhGSCky3A%j^9__yU7daJ4nVbb7l~D_XNRMyHSjx)!n?l-@&hAlTJbgFTii@
zkGF1QYTN?k%2K64eS@L_s$?C5-9{AED5)kuf<@w=ae$R@TtV6A8Ph|&!;;_Sg$9F|
zbHI@BQwj7M2&B1=1P&0$9$EBOV`-r`?7y0~pk8n1MkALlO$Eajf{}C4aQI>{d@dM`
zMv)$pUz<XY&}(Odk&DrA^lUI3MK2q;_szCscVwAyd#nWS;jz|9YDmSgc2cnjC6Z=D
zO)CDdtyHvdB~%;SYjC>mG`?gIciXc-?Cb|QS8}yDwAZTwze3|k$)G{l5@3}VX{;(8
zF#f|9<6pOj0p>YcqDh7jAOn$E?EnC*7~p0Rij+WCZ3zhcvjYb1F%G;-gV3wm<eREw
zbHQl>#~2>@*i<W&1-@1_L`#NdDUY`s!MLp9yuIidGYWk+Q#fGV)!7*B%d!}4V}4u@
z@im!c_uUA!;0(PHntEOdLlZbyPf7&EvsH#BISS&-=qbZMIq;fLgF8qYm7sve1jPgt
z=oMHbD3YOSj2sOXZJ!bg!@4mgW1f@8zxIU2{K$WAu|UU_Ir^EYtWmQ(_5BhzJ$gM|
zN}ECZjTTOTss{qzv49&r7;5z3y2iKrE_1qYe4j1SrIH|fdl09#s0w5O7CeBIcy{+$
zf#-_5=@L<%3d}~F?bBc(I2+S2?m2$!*x@6c-Q15%g$yl0cE1En`=#p{fHf1amA;|d
zGJ%69TAHNyI_B1W7b_NQp%*J6FvyjV_CS|(4Qa`dme*RdS3oCnl=mP%d<A`-ET=$;
zA<IrMcb@phH(FM2Lk<O$NqiqPk5QCJsn&?I{Y3TRl~?=8lXiAg-_9f#9Khp#qpzwd
zP<ba$@2U4u-nygO!2+aKihx#vO=$7R0k=K_>0Lzhl&E(!7>POn6sw5dwh9A4e>SXz
z=_Mzxh3%@qV-aZ6;FpH5Ut|x0AUmt<>T^SN`cR!3pnD(hmQX?kvW<?M{sJ?29{K5u
z4#3K5!67?3H3`_k=R8K5PkbgAi6s}KP8d;cYQPMmPgp2b_#!gx;R<B<DvRB`K4TpR
z1F^e$uHsn5_i>SGn|8XT2586{Vm3kZie6X!-#&J$J7{Bu-b0>w4sskR6eo#PC@$1Q
zI7wUyO^0%!C6cPJBsWQ14y6T>E29g!P_{xYFGKX^?}RdBf#&!XnyO3^iIDAcf2hOU
zAMzY|^vK~O$9CP`v1a=f`?}6c5E?TOHvmOTa@s^=xqs}pf@D7eE?ZTBW=WwQiB35n
z3~~s=Y#zkCCec#QM(Tbb`rqEODWM1jSR_`FjuQy65hJ8Ig5-!;FtHRPcuGQ}5(rWv
zBB2XJj1ZuR_y;pmD9*z*T9j~pj^In^10+`@rh_xcWGC=v@qLIa`aT8{aiNZs1w$;5
zl0;K53tz6`&jk~(3b`WEv_xDf;*oh2->yMIg0CUFpO}LTy%r*JmED`|hBeXqC3GRB
zQd>Md8=MX1f-(6o9*jS&euSYpsN7b$(+j~2Stnv>?bbzF)NV5)kY%j>P443W?tAu6
zUY@hyzMb##H31C<kcEN(S4Myh@BiuL&@|1nJQGr8GnA-vNJrpVy&PIZFQcmr(iA)u
z%7n7$-vyp4^4w$=3L*P_`_8lR#Q2fl>oglHSmGLm!s`RiSd94}WH$awPr+7|EJIMD
z1ySlkH+|;gy9LXqBkmXNiSsGMO?W)1uYU93bQ3~1WcnDgOY;8SR`M==^@!&Wn#fzP
zL(=GTn^Jb@7yWQq2g({M#{b1vda7<~Y(*DAESp;#fmMF*@^m(lIKLFj<zv~)i>c|@
zbLq^&bnL7H7VtRwu~@*!RI2q6p4$(E*Ynx@a}KCx@%b#Wr+TiP&t{iK0Mq;53}2YK
z6pTc|(Qr6=F&Mr$bsjy42BY$Wi{aqe^JgzPfQ27HKb>?Cf<2y1bR}m0&Q>w|*Iz#C
zIoeEPf#;V&6<K+-Sm}9V_r7J-9Tb<)(&$}AB?3O5R={VdrbJwlt`!!c(I+$;+vlFh
z|NKq&fd+HFP*WpM;=l?neE6&1{rmy`AOHAz47LP3jbYtw8CKsliY{aI$XLex0CyUp
z2Sd<HD@Jcz?l%<CW{OVs1JA_B#v>*?<K8jfxPPL_jO`<;IIv+f^ukGVBUi6)e`Fpr
z>@8xL;A<3=@SCS0$J6qoV7fU5nI(~`Db$4eB#{lpLeJCUD#_B4Ku!{i=y?(i3Vkk#
z--lkBwLc1Ztiv21^Bg&XqmYLmef0Q=M<$fXKB)x!(9dtb40wM#;q`vu_51(t=$Fk#
zta)(KENtxUUYhr{t~%}qe6ndjAdTZfbcu!nNM+*lM&o6lgu{r*?~?}k{kjz@f%Xek
zRn!w$DW61t4Jm(8Wiz~!ftDJZ+267XFAqK{-oCQf)9ljszi-q2`D<zw!83Ih3UX}L
z0e;<~F;h6V&<%i+Hyr^`H^e&9FOmb`^!rWWyrY<{eHS?H2dMX_aFG7=xl_T!>1Srs
z`1ka5CjWH$nV9}j9Q{5WoI3TiQ#d+g&e1zK2~lH6unOsN#Ud`*bIYvHJ*T|?))-J*
z0gFAKw-Was=!v=~^95I93rH98CBX6wq-qQ_ZsZtz!Cwd2nr*T)WGW#w0!-^D72T7Q
z+i-Dj$!N1Rsh`$pF6Mi{yriZFUCf7pd7tI%F4{e!-7x4?+_zPO9$-sUv+94O=Izu=
ztpS^!I-A(j%i=>!a1rurb%8GMT#aHNFCkyj0h80n6dQNJbD~rP49zW*<-r4TmE#>S
zc3<1EvpmPMHZ*m4yAJKI6YswQKr!q@_%h%*2pA|K+t{9(sQXWyPRu?n|6!Z_bRwpE
zkci8F4gl&Y3!px+0t!&%;M>4lpPkLe^09c_0f<akl?Et?NCL?f?WiStJ2vU$+L;gJ
z;Y*}Yh7#C~6uP^Qby~f3_wik&Hd9sK9M9>M`x|>w?mnY(x25y;wc&&hI?u_N1z&E;
z?F8&tk8`Ox2aLOQJsNrJj^1#@BHNJfv*fSN0s9FX8c#N*%HHN`#`^O;CQAAuxnrW_
zke=ubb`QDJ0cQBj)28|i-;Km9vN;m5g#C)ObJo=7_wDXV@p>P4n_JIsR3MTx{J@h=
zgXCd-8KeE3O1ds%#70ZAXqZsb1)$M!#n5-t31tYK+sSOJ+?$vkhA{5j83u8%PZh|m
z{hIcW>KalHCw(J&oJ7D5FnW&6lAxOYazLKo$PrEK1`J(*vDlTJSKOUr1i1GqXMbbU
zE?V|_Jq?casJWLe?b$JTmzV=Kwe-?~>h*KM=}c-d6^xz=CX%TaUd(6a6Z1>S;JH)5
z#ne(NJ(tR+=VG~}c0HCzE~T>V-VuK>*ZvmBd$qX574(;JfaE&;)YqRfY2B<3vx{{k
z2<mK~f#2abaC4BSsC8`Xc3t^)H-v7g`Oi^UTCw;gVlEOTrUhatN~A6l>3AfJUO7uF
zog-4`(bG%Hvne8Vq4^?uI)^^ufR*sr@ngpWRx81-Z1mW#$Ggf#KY=CgfBfNt2dAz+
zc<{ncJf6c}eDT1%{hZqNNpt#uh)q3cX#2EDignhGeV!-mF-xw-?2mzUHbHi)O(&Y&
z)jhb<eF4_Nl|IqrKVc?lA#9J{bT3qQ@=G<}4|&tSai~XqVpKiq6Pic;rHN;KlPeZv
z5i#&!%aGXPX4MG;=ssF>yeZ#?H{p*g^;;NvcXisiI%WPC8HlgV{&7C$1bDqZO-;9j
z_m(U>`pnw7c96LpBp<9tbA7cF9j>Y5RDbbA&t(GUOH<p$inscpb>Oi#mTuF`I4hQZ
z!r*V6XiDX(Ib4Ig^_I=<R^BEt6Su#6Z7*Ub=si6kcdIYJ+Pho6{J>n6ArbD_Y_`vn
zAC8043!2?KD3kx`^`7d|K`}%5@jcmS`F7;x5qSYWGL6tL{_}f%{zLfDSipBku3hxW
zm(c${pLaVTKM#xryngNVfM5BfPkDOCe+ZeeUHUT*-C+0Z{<teWKUinS1vEcI_r@-}
z!(r}?!S1lvy|IZ+9p&ESdv<Tk1-;(A$?L12^IDrtkuOQBqyP=4ZQHXOa$_kUU&#Ks
z1CVaC&@%2)rWjHb=@L!#Q>NC{p{*sFgSZ^a0Vr4<@vym~oP6i*PS!avTA%+t^nZEC
z0rKsNf2$tD-4<LKH|bsj6l54pNd5SrA53$dk+lFYZhLsM&kXx#cYc3QBk{K73scey
zqf5H6V%(DSVq3ybEDVeAn<Q>(3`nM0>6&8>x24_xy<yS5rO;l0Ev@<;Pr3?wnPk}k
z(H^)nEZVPq2c#3zJxTAQ1|g+%2dQ!N%QeJ%YqWceNBd$YWQ^BY7D~G(?0)~C>$$sP
zm876@$~nL<fVp6=4TQTuAx)f*I2eUbl{OAFh5<vtW3-o*YJ_5Y5ulq%=o2nNCJ4QQ
z=m9yhz(7%L)zg!-zhP;sZvePfl^8>r=D`(W8+|n#>pbpvUI=DlFA|CQnVC#1o|{iC
zIRJ#mEI_zTDJNQ3@!A1jlEyW(qpTsIqGm7%Osda%YGkrqglKS59k7setuiy=ct=BM
zk(8tgTs5N9pUd~}|2r)6{XLETDikPuFr3I+bX4utYkmByp}9}}Hx_)@nbasQ-fI>Y
z*g#x0RIF}k*Y+qo?9yhppO2+e8Cjd<v(C_E$1QZ(x&+y(%aLYQH%12Z*8%NdAau1l
zH;_=!%&?^Jw%X&>o+-Uoog*%U!r@86dCUJvi{-DBdsgRw+}z#e^0u1ko{45MYp7tY
zo;vRc=<7_-4z*$9`WYWaSMcu7%GB!B_~C)ozUajpXx{bZi~}6Z0oB)Hd@5I$FQyZV
zvqUVO%%Nx5G<sovDGBD{3l1pK81o-Ks^fs>lM*&e&*|Iy2Ae@HCuhnvy#J#IsW$G@
z6`ZeAJlAK-XczN7G2dHHT^IJAVK*Mfya|eQsU&cQ)@V*UEn}Nx3p9>)^x>en4Ve3C
zcYRk6d7o~=en5XGlS(ZTx#S{J__16*lXSp_+7q5;uI_d&Jiab%=;19(V`@kXtEzGY
zGYZ5$t|72}L%qEJ189;f*EMoSfU)m45<;oPBr<PP+4%gzLJX&eh<I!fak~RJ{n+EC
zILq%F0gOLNE@fgf!KKvFd_Id%O`J{7A&}?OSqEVI*n}yz$`O-jxiN^7@$OahrcuE9
zA<&WTkr4ZHB*Z(Kwh_o8EpOS@)XfhZt!{3uk}FW%Yw9r4l&whk^aH9Q@GJtA0=xhi
zvIYSQT6TLoki&nv=l~Y?8tkOq4xJjVBVlWdw6xHxMO6+|1zzH*mQ1+|y^YX|6p0VV
zK%W95V*OoP3!;}Dfd5zrZ|jp5{Reh6i(z}7(?U60yOfRqON{Vp)XLhft{U5Q_LrDG
zY$uCYF#3+h=ptVeC}>D6I@HSBP>3A??*1T~j4dP+L@qy_%)Yo3pUb3jnIWX;yKi<r
zqH%=v1!n{vJq8Z#T9d2&vfkGkU}NJ|5|Ew|>5&L7JS-FNQOg7rAjvW`Hwu$5!pc)K
zWpB&)yC%ziI1svE2}6tSDYruovTXtZmKaXdjIMt==;b~i^92zKE3gPCfySP#tPaa8
z3Fh6sf;wK(cEnoaH8T*0<cVSJl)Y0#OF=}dK=!m5e$}0Blj(k^38e~MHY&^hX8EyU
z&GME~zXlN3_Ebml?Z-W)_aT{H!;J@nPwr}!OI2PJ=>h}I5O<NnD@rzlGAtCz5X`$U
zS-_;&=o#E$c!gwY?#6vHkQ7gzgA(GS6(pTxMeN!wrcg}-TI9hMnh}vAgQ7&Uz3-?v
z7*HuoPk_Z5S0p*A?t)|j(vCh8X~%)gk;$k7V6r+O-HxttvTsUrjfB9jJl{I#-s#;&
z2L{z|@7a)Dc&yVMU6Zz*7({Tn;ef6|8nGC_$BGa~7XVe05Gt~V3&?_K9EdyHyNL@u
zUJSqL!es^5r{?S>u%e5Z6=vH=mdG(DlaO40(xf;nX>zMU`sl^U-bb`+AslN<!{tz{
z@(@%RpiG_#Nc}bHIX0|`Zlc~b{miHMT|Gz{u*wU|E>4|sDhbeysF>k&uN5Z%1)=9%
z;|Bw+`-Bx-Zm*&aOlDBw?vk`b>%*2fhzpi=c5fG~Dk-2BBq+Fxume)a5s(BD*?%rl
zW=P%QDXLb*`31nzZpX+@5Rq#xL?#ezDQP_xk}eCGvJlB(e5U^*@)G)WA!-Lv^d<D`
zLevhTsh7~N3sHxK=5kR-h+Hn}D2rT(%s29gJhOs*T$V3F#*GNHiwHDBCA)55xwW^v
zi<Tu?1s5%gIsh&4PYJvz;ys#l5!lph{$aqY&#poT1DyAif=G!J@cB>50(5~g0qW;U
zLmzpW0g0Cz^9d}?L3}EDjq0_W?qGoSi3}RDt0aN1(iD&?2s4s#*~~fv^OU+Ep#XWR
z0$Ezb8!*xOEJI1zkM9aHQ{cX708WP)5UY>^6e+qn1~ce>rWAaD8Jz7CVWaP%t2V9E
z@5(0Z6)2Dl1CrT1$hbI}<@#E2lG)4!%QWer;)GyYTyP;WfoNL?QOlvrE<%m!7^R%U
zj=GJJI!v=esZf{IGJ3Z=w~)o@Wx7~HW<@NK1|^1ckIgj=Q&7AhKt^e&NAa*!0y8}J
zoMw7G735&x^>b^y-0d;#9ZW5JxKdojtw{?|ZsbV2h;>8^^O}oTFNld1G`3~3>>}1C
zp5J8kS1ZWzb~oKx^Ow%1&49R2qp4-FLUPL>k#<WTO=CTO)Wl#iZt_-T7rZX)vR3S-
zaV?>XSG&M}E3dhrnSy3)EwHWmq7_<{wEhA=c74?rU-ddwrx;scSYL=`a}F4}RjUQ1
zwpnZQCGo5%A20x0_a?V>*p0XSwDq=J4M9v(fTVg12K<?~`QG<h@6FI<lI!iR0b|dJ
zrW>jjXHR#`8}9gS(;X#V>ak_W4-Mnq8r3S>8zWcQY!sw-<#P4{wx!dFX~2?beXnT(
z^&bv;yOx{y>(@;8TJJ}D{vJ4Um-3d)tNl^Fw<+k0@|F#gqd|FeVBE3~ln;nohRx?@
z+iOPy^Qn>LmZ_1IEoM~@4(IdRGUZ@JJ_mK=IHsBUQRA9s>PJV;wD#juHWeC9{XMwm
z-8G4pn8ByO?$4$&iDV|3Ag;u6$qbRrW%6-+$Tcy)M8wc<FtHRPQt=!<515GMh)C!H
zdVLnXFSVE@GRbUeA@7j5!mI^!@2V$Jx+NRkY@CS4vTjbl!*(u(m+)nmp}LBJ-ciLl
zScdggUMPx&GX)Q>T)!nVlm^SdhyOMb-wqhPRfKX~f@^~>9Jv4g>|G0R8&#I=*WLQ~
zlf;Q_en~bVff$&CjlTKJ?1C+aC_6ziCRt$FS(Ple6B$`Dl9Ekksy3;eg&JmOhXn$m
zsG=wmNXEhW5jF#4m=Cp3HC4&ZGEfv*K2y{Vvn+(oKmsI|CGC6NEvdWJQtQ(#z0S1l
zcDJN<`@MVbyYJll?z?vpJ8dmp*ze!iw|>n+@0#`N?p(U?R)7DxTh^`Vhwo!Slto2E
zDPB6@hfOfCeM|y{ipy{|zJL)e-$&u_>+SPsKm1Gzv^Eg{e&xuGE(J1pf*pZF5wv8A
z{EDH?!EiJ@T+1BD_iSQtOLRR~IyKj;JjFCBRgDAw%9pC9ixnK4F~cKarbdx92r6{s
z!?`k?l$k8mFH%}!eCPF+*!HY(n_<&<j1k1jvA9BjMU%qvYBcXK%1st=W+XRPJYI6H
zs4DWeq{vnGnEk^;0e03`9pym1jR7-Dp&Zn2*edn=jXSvI>J7o(2SXtykqF1sPn3i^
z2@dB-5KhEKV(~G<P?WK%xnP0>b8jFPWdac<nPlSHjF?M-T1QxF1takP28NhTMgltp
z=fh#a#E67UT7Q(IByKg|QDd9mCUMzHR`8IG1kIxwhG$dicH=caNGN)U-?AMKMu)5S
zel>%hdwR7;TFO<>0gxRu2hkLgSBzMVofG*fF5kXDIGWJQFv4g?sI>;TKscD9Fh#lk
zSeqss4AKUMa5PXj>b6vjxlJn6lZcwQ=who(>8&9{6*p<L6+{CAO<Ybi3`N{%(cl)?
zgIkiD16YAMk(9_b5+6~{8^F1=rGcE8!L21Gn6aVAcIEZNDxJF;MMC@((V4Bkf_6y_
z=i85gtY2Qj>34|Fl4k^ml=fmhAA_SNHpaxmA<gKte>lwk0}RMzDIqFH1Y`)I=T|id
z$&#T1FUgMiz$PXt3B7qP|5=?1;Bai5i3gZ%<8gTaUDd-TQN#msGeyD)^#erq_b%;Q
z>RE~db}p6+DJPGMILQ(@BG^g#9?_wW0%t!|oz%~Vhq=YWg{b9hZWj!wKBrVXk|xWO
z&)NbmmkWb#n@UB<!odcotoZgoo=NHxVcXK}OTUXT5hl3}g?ijUCuq9fHY^C5uAkis
zd{l%^cevz8uEWiLf~M<j_n)BYdKGlN>J}aDTNE^1$5hwb&czAc@0-g<j1Prlk(`O}
zP`w61$^R0HMWY2nWO1#L>AT7jPcc4po;I=Tr+*WQZ4E{k8+WpECkJ<Oawivea&xB@
z+-c?J?ZffdCL4FMb0-IPa&jjZcXD&572JvcyH)(3FXzwvb8P&BZ2XgK{G)9Avuyms
zZ2Z$~xn}UsvoDu`?>ebIzZR;S72vs<0G8R-wnC`>T#;%)^2Y7Esx?@m@nST|2?fUl
z3}PqD<{Wjcpi4;I<6K!CCb$*;8#ZB%HhUm6qVqFDvE*h9=t~n)`WA@ya4F)@dE%5)
z$sNdMwZa4W{kq`6yYK{YS$J?uXG>M{)Mb+uv1eYMJ;%nA(YP%T2@Wxl0PoktgH0v0
zF}C4eW`YJZAyQf*;)Y^l<4p{sd$qiD00CVm`q%aiZtU;v_h7I&j-*JEBk(*(>TDJr
zNYfAierMU=#e3=nWJhbD6@{vjT2Qsna*&3{G-@{6X6~DiBybatMl{LkLST;h9X&pq
z&+d2o9Db+I?Q{C=>f*Upjqn@=NNbKq{1;?X2JOh4Hf7L3foVz<$H}d)ix?G2Vxu*U
z$xSf130cJ=kh-E}3|48#suKcMC6dADa4<ex-;`;vOG9=Y;yJ}H&FO_hxGi?(*d(0(
z?KpyQWeXQCFit}-enm;r>IjPSK)in~22dO#@d6ajvfmu|&ayx}OHlGq++Hmdry2mo
zNz`n%<=i*=DLoc-1I3RHKnQO4IUyW(`<)Pw>ja2{dVsjy0OEMV1tF^pAdWSwc;^Wl
zKaLdyxdi-yLfYyGgXr8HcvL`jY`lRcc$`=82J+wuzoDpmYd}a@9SuaC8v{oLRL8~^
zFjZg+9O4>ocso?}cD$Geg$^~$ECUrb1i(AW8S@6Kn$N1@VP&|{^}{QyaGt@dvMSJU
zH|c;?hPw%CR%w=(SF3>8J9;<V?)NS846gHh8v_%UXxA>FljH+B2~j{NpENX_NCvhB
zBU_|{s^o5jQ`E<~|9^Ge+#}_vHAl8Lyzy~OpBWH#c$_yuI~AU3cr1X&UGVs>8L)k9
zNJK1`sgR{zAQm5H;&{lup_T+{X@aYL9ILvjk-;1-nUmk^GmrNu3HSQ2doLsjBycE1
zq0Bp6cVS2}SpFmw4=0&;IH&`XVl^CU%QiI;XGWMf{2L>?m&Wv#=A`;rAU8#a;Vn!c
z6p1Amq+^o7Le;;g)X8jekS!O@*6fOIDaycJ)uRq^D@B|WHDFUcOU7Q0s|N!%m#SoN
zNVrg@LHD1=p)M0B+j-v8r$OiUG+@z5x%Xs=-X9dKDHV6Vx<Mp#7-&F9j1*<<#TbSD
zFM~$%G!iLeyZYM5j{%X&YonYh)}ZtHQN`+aey`W>#i0A8reTpTF6w<*UF<Kbi`A#(
zC81|Ty4awR9F3kCE>6@~(oZ=o@luL<dh*q}x>{4(loJwb1cjkUjgj|=*`>@8^|KL|
za3FBKV56=owQrQYWg28629$b6Q^UM7!dw#m0h{)dzp|Hz0(p+BXood}qI>$Z#}k!D
zE6UTeR@1H-^nAA8(+G^BOj44PCuv!GQLdr0v{aZ~DjJ}T)u?#l1cXhiKnYad<656x
zYmmMU(jQKQd1%9?`qd5X@r$u7gKny|4w^<)oSYl-6s^&<(G+xp)n1J9=tegZikA~c
zBBNzpFmlbjy_ROaTAulNew$uZ&!=O|v8urvgY?aVH+q;h4!Z+xm&*}wyPd9p%k9*h
z3nhXt^IV__lgT9SWKz+=_p+9jp4BXI@lpIf0Evq)K9(2`48@|u1^-^B;osA<e@|($
z6#DlD{0{BoEGj^31?Ndt=l(s+5U@C`PJjNrXBv_WpaS<EDNoU=-Fvi_`jTij+<RDn
zFvGpqJkqP-hia&f+wXVv4EP76>9d*vdnv(8EHiT$z9?;GVm;7ve}6AZ3ZngFtSqG@
zMV7D-s`W-`IvayF!yARilwf$Hnnn5p$hA=K?oCmTPnoxC=R!6SJ5xJ96=f1)JCF><
zH!*eZLY4Gmm6opXnfHs758I1jW@}iks_7$@bV2<#-hA+3QIbuzjrok+r5al+z3Z!<
zrl9IcldvbxMZ(d0n0Os!gBt1u+T*(mE2Nb30ZCn#FUh$sgV9Y9rr^80vXt*qE*WjX
zcd4l2J5)q6xe!wTW~u1jtf_?x?#&LI-J2bXg8hQW)q^4!;csV;-_hf7_IRWL6m=#k
zwrZ55m@<?6c<4Z><JdHJX%om~Kt^>3X@kTbTic8#T7U&ZH)$1;KGDCnQRZ0%iBt3T
zSqGObHA@m{f*`X5S(odyzNIF4BGUOmMv!Qrq0EvbAjwAhQPtGkxS9_gRi|l?=a(=|
zLr0-Th{o<#gF|(52oJAp6)JTSqJ;sEt;Yj#gIjY(DMJDZPzALL;Q&;pAOqh`6It~=
zwFV?83kfixYP%v;_aR(>htD>*g4&KGnWWg?^*L`f#{(q9E)Wl}tG^BO<QlV$x_H3g
z5QZEQ;Q@yZ@W3$$1pEVb|A5W6%4hRCdpvgSL#M0?{IU`hL<J;86?_F53ZjB(6UdYY
z1~ro000w1&0VY(vgn@yS!uSFMOk}~Bc1i*RgG2an=s9*lr@EEavSIe8Y}Zvv8M1O@
z!0W7q1aTL5>s$hCCw^gmIvbh(EOo-X?ZQ4Y39i5R0`c=1qEbLy*J@cyZY|qG`aUhN
z57)yvM9<DI&o91`_~gG_FMQ{#|2)4#RNm+?*Qcr%Ys<5Y`btxly_xO1_$ip1ea-ZA
z)|(EeC+P7MO^tw-)Kc({Xm)>*>t5}P(q$Rft{vMt2x=-t(ZmH{na-FqsWx@jr`1-0
zt;#SEt3~Qo!URZofZT4#{91K~Xqye6Gk7dE9EdZC@bH%WQuPy!+hn7-3@t`mDpUv2
z9zbGjAV?E{Xj>VF)@ajl-{3|JKu3PlF)bwNWJ8NQ%S&LM-DE~&15DOmL(v#LQgRxD
zN4>%GTmy<CRyy^NX7Sk3w1SRC%*M(#qpOM1rO85s^ms7NM3Z_OXnXo`7-yvHIMb}$
zkRiu%a-0X?G!MHV;2sG_nE=!%iC7MG@8UjuHWUdaYUE5qYaJZxUB7-|?*{*^x1)<Y
zdN-^Yyd8t-x}`P8D4?FmH<Yk@HVk4Q?|IE3!aNhnI1@~cL4B+PLQcTZm5j&6!yydl
z*&<8NLtKR=P90zX%9V+F^v-32y>~6_Uw6wbgT0;&>-;_ph&x+CTxltjqd5}CLZL0=
z!Dwi^q(YLRbv4ntx>6T=N5b1MkY&nH6wOl<hXbL_%ot~Dpa_thns|&y5n34`Cm+<t
zH9d&}?|b__{c9VMsjq@aQ3!{roS*t?a1FVLx|SAYD%#Hn-ZVYz=H!ElaW|*$^VvLh
zpTpzyxIAvZ-S6xhL}9shHZqsp*Mov_Ts>SC1sfZq<M8?%UbolQ<F%uo9<QUv>+JEu
zd%Z6711|^c@NoE6c~<&X_}t!=UboHbwtMY9r`KuoI_+MU-|e$|ZC*S4u}iar*12uS
zSqtZvC~$rW0mv503IIiufFfz`05nCKSdkz}c$xFGJ;?2LDD!nb4L~}ZCgFGX3IIL?
z3*Lk8&d+AxyW}*;%!IGb2=4hBn5My!cfs8J9i0Gtl94{;x;MeHw}g*rh0jS}6uGES
zzfcOhp;$x@BD&w_@%If5Xg~HX3KC#JOck;?3)l@f8Fn8|7LXYfv{Lv=qc^hXwk-TK
z3%F~VAWGf?GU&E)cVyB3gIChWP@<{!d3LG|#$#i_WH_YFN@Q*NUcR(1zDtpFh-JV8
zU(v+3;9&*;&c8*3P9&K)6GNpuM#8uddbu)K^fdr?D@Uk-9I2NREodtyiFS!y#a$8b
z1@UKs>v_U!k#n16L@W+_5tM4RIyWcRMt{X+;U7!j+sOxu@tZJn<Zi9;+pa%B{I&=z
zAs16Uv}mDZ0-s&jCwD0$xRpW#C&6xc;D%5e1adh*=5zYg23=Ry!mgPL?3xKjyP2{8
zv^&e3Qxv<neXra%I|b){1P7cnK_<(dEf)xaOruG(4gJOE@YgKj47{2lf7cZE+&RQq
z0<<bTC<Tvh;!HOoD?rWPiC2jL{gcPe3Xad=w|krr+k0Hd4Dh<4Bte<+vb*$nsLzR&
zmz69Y^ZQ*n*e@^;L<WF&C6qXuoC)ByK_h@Q0n`;nM=ip5gMu*L0ELmH07=1vrc4n1
zn>Z@~i6>1$LCxC%IYG^#{fpKD@)ur%6h!qY%U5X&I7eDO>$qs8&ddNEpLP9yKKT6<
zy#U^En!5J=Zn?X<KRySjEP2fbi^1F9_@JZggRZ;q?v3EBW#Fyrr$nXz8zzaSO(u@X
zgwm%tq5LEPFNR}VhA@DMG(VyD5j2Wc10(^hG8QIY#=Zt@dI@x<`S$>SP3P|+*=tk7
z@q10|S)?=kzKB=_&?|~1JPFt|1jK6oH9FcFj^}G4d<C^uaPUs%Ypv{R6gsbonu_TB
z5h*(7*S3!ZqQgVEz*8h3foL#?&>!Y*fjvW|0jh@R$|XFXvDsYx7%(|cCrO#t{Ub~;
zxg}n0aFKGyj|G>F#NuNMk=Bne3pX<nEY>+Xi|8ON`T3$yT%y6iP%yy+#)IK_QBJMN
zd~T|tqPbXEr__`vNg+LzRkRZ!98e*0xj{V;Dn*P6Op}B>N$axcRkRi4x5G*jA@SBc
z4vCPc+}=hjGWoJVQjaP}UfE#GI%l1lq*Za!5p9k+5YCsG(aT`NuZEs@z>VruhiM-}
zUb7Ax)u9gaaLL@(;-JIw$=u|1*l0N&Cf8uA7SzkNFw1?3wA84frPS3@&n7lAK`8>P
zH7)NOna8vfh#ADUcZ}`m+WE?NckGBfATk6XwV7mSvs%5#-l3&f51pNvKC|Q<Ne6<P
zHkAV6>Zfup+rqY;pYQy`Cp)&f|NXH&!ba{J+7h}aqE^Q2H$G%4C9>?cVEHZ-QdR!>
z7r&i-_uSp{JIVjeejy4-m>7@8#+gW@h}<=I<&*!`v@4IE-3@+e`Qqzeoux8vyGU+e
zN22f_sMV2+Ki9M)Ep4xV{?x2s>ymS?S?@V3@(d#*Ro)uKA39HPjc4l#Ry8LzBVDob
zCc@L9-a!9a+v<Lg%^m1N*LHO6Le~(;I9B)f+MMhMz0TF}4V#1i#DzYwbDy}`Pi*W9
z&x+OXdCR%$mGF91KzoSUqA(w}5bf<V;8%NF39dqeuKwMhh6h5pd77uFPAz085Sngj
z)JPz(+6rc^W-FML<iGOPi7p2uzUK77(J}*4u9+PTHuoUQjUm^>A$M>yS1<?O5U{Uo
z%rZfVkSeZy=1#JU?i6R97qrn<BGYVhmNN3IGghvpuasJyK`X%Hg(aGAsIf$2(V$<V
z>F}sziAIA=?P~S`TZkl2RD9}H$cL%xV423kyEujPJtAJEX+Wil5>fuSCURj}qTq$X
zLX3Wf6n;)l@vs`zt**ifJ<1bRU%Cp*<rrCAHCnx{s&74}1o?7JEXL=PqlT_hT~|qQ
zmHHX)4P1pmsJxDGRq@KP6jxPJ@q=t0uF}nr!d+KMag}Zet0!Eg4_$>Ru9DGJqt)vw
zOY?A*ZiW=*y6V})=J1F#t4^&e<tHDlYoT0TAb6RbNf2|>UMfjEG5tMy!?d^$k#Lb4
zyyn&Z3vX-MiH+oJ;yz+|c2{<usLbs%@o+-5y6ZC@&~9Y55<B1D`O*&S!kc<U?N5dy
zYRS<X*{z!L;5hNeuli>?Qn!KNw3r96NQs>>)$?Hf4Vw0$;}8FL&4Q19wcxgo9_kbg
zhUDLe6KXNy)b|%@I~X#Lo^K&;p^moPeNME(wJk6lPK-x_Ax0+YSIuH|c2Lva9I$+R
zahyIjwU_F@AnMI_O?s0#t7&hRm|ni{1F*_;?}ax-+af2?#-C6y5>QiNwY{imUuJ^E
z>6^)gnNPr9iInbh;b<rlj#gFfLa8gZ6PkAA%$tw*U-6s2Uh(OF99kmm%HCvnT&-#R
zQ%!sD>&(wOZvE(6Ew81wnZ-+%ykM!S!S26F)6Vpz?o4%04Ytmp{^x6vWTC~&va6KM
zTuqtaTG#N{yqYj}1+jTg^s>QuxtKgIMz4>3)1D8`<D&J@yXdWam>w6m$KKrQ%*X5T
z5q$jJu6*bo7t6=p?asyU@j-p;yPg$%G#?k-r@cX~TtBh<vdb^O{Iae>0GC<fq!Lw4
z)Q!g)W5?=YQA{6>>#0Y?VSD=FI#y9cmTIDiETSNyBcpm9cKG@T506$o?COi%tJ;Ni
zWYidT%^h}<*-kfpME%Tm7_M?jXFJ_+{gN4Wqk0{7x|!`N8FsUpJKO1o>-ryd`6z1(
z?~h>9&<7IXB$LR+R6qHro+|03k$5na3?$>hXksKV7K9DocDL<2@JC7t*?)00zdPVB
zPyXHUCyzgNQzehKSNG_3%nw&tSO2i0M?0jCuB~EkearymgcB%J-!4Oao%W+9RzIQ4
zYWpR^YR4tQYKIJ~b=r?2xhgNR@9x|7w;TTXagoR#7d?6_gF3A{KFfAOZ%mf$gnl%K
zG>hK>4#A*1a{%TcJ_Sc}%V;QtU<m-xzP>?^&p&`c0(KJQQ~sNXg1MY0x=BQI1RD!G
z@w)j%q)qYImT`peRfqMb!0uYQ-KEHY1E}Pgcg=?04Yy*DAD1?B9Oxph2VF!rJcjx6
zH^D~XP~M|<Vt^V6TsAcTlB4WyY>V8$E-S`s?(=(m1AW?;%~T$rXh?Fa%&&pnY+)Nz
zbcQWIL!u%!C<A`+yX+;AC2065Tdsym6O`mm{<aEl6#XCcPqKu}TUqQINNDgS3SQtN
zzH2HvR_CgEb%R+jAe@Q=;`Gga7pgLVDh@bN1p-vg!rp`G6tG1F*m`Adw)O>E{UTR<
zfGuH$DqN^s-(r3(qpgDa7L=821Arn)6F)1y2ApsVc7F=MoN2D$6u1bFR(N?93RZ@s
z;E@HL@O1_r@V@pL;7k6_s{ew9uPhdLZ2>9x+X_Bsui3kb-an6NyJi7mgD6mp9}<zI
z7I{f(oWXSE`=f~L;}YlJ!=(sAE6sC!xg9Hjnrk=wCDL13)ibJ*OIIm!X)?bH43C0F
zXd@U;;j0juBfHT-EfHSR8|7vUjq>)?Ncj@cL_()-Z=Xkdr>^pLmVyCeC7uRPlgp^<
zM5`kld6{T3oRo2HYevBG5E*1W<VG1KSuWR*`fV&xKZI-JBG5@*Ms?BMrY>{0rPJCe
zeNAjsTPJlOG}XtIk0Le_Io))WEm)yxsv~0-sc{w|MYG6$PcWX02DYgcv>RGgDQRSb
zc1ty$H>ju^3F4Y;iVlGN)Mdog^egFM@X_=>vd46o{zu}s)Vb^r=mlT@TWb5%huL>C
z_ga2T?lR3MoYV}`L!O_0Kb@I+Av@1RlCP&9vdp9wo502Ar(SQn7QCDK-Bf$Flm3_1
zxo4kB&F*@H>Pnx^ET!F3d(!8oelfKT{KM3??jKQezwWi(Z;FFM(=*8B^e<DR>Heu3
zr~e`~L40re?J0-FWBOO>O!_$a)6_Rpv$9XnPtx<#OTN6#>TL^xQ{)5G!R+Dby`~RR
zZxd;H_^T_aHFVd72(g}gIW;5w$IKyc;G&24X!;(odiqrA)v5N(WacyQYr32IQG1Gf
zXL`-lBbk*`Z)WzT)0tV*H;^5KKV_c&9kq_$p8XpWN&J6%S00sBwZ+eLzd=Tsd4eDg
zAd!+0nFE#~&S=(JnD#_O#2ivka;m4x<&aidF%&8e=&P4fA`V%mJ&Y6;0VNa!#rG-k
z0g{Tq_kH(F?*i4zR<G4+JzM?ZANTI>-n-X6>+bXIbNAlAbM~3UPRSCyvTZNb(-g2n
zyeks<0r)*~;C<;h{bR~lQ~=iVYICaknHY!Omant9Xcfv=bYwee6yxy;l4m`nR&ujg
zg@(3%N?$X^&_y&I4##D37CWx2fm4;a&`q(zV5J{<lpUvmP}ug!4^fO#kGhK_)rHS9
zK91gL*(GYFLN~HBcpCa!`=LQ>FkP&V(+TE>-F21lta1VtLoP=tQR0fcOsB$W$_?B@
z7Ag_&1j97c=!V1j=c*UaL|q)#-z~IU>KcL4X}RcwKVVXw$XaY$#8blHejP^IE*nQE
zb8w&-54Yh>B1!9Irfy(*Tp&DYjiH|Gg-vv$ejc{79M$($G9_n`;3}R)qePUNsP$HW
zbu2GbKhrcs?MEtI+iKBrOqr26!#TLZGEq*XDfF&9MFYSV>Szol4vt^enM63yIrh1p
zOjdZTYuN2jUj0VjZC+Eq%Ju!`-WWeGzLWsqs2Ymn$4<Ct@6@{?yt%lg(PfU3AGT@#
z*(oP01G^t+&i!ikz1_VsyT?|%_3T<(Pmo`<#I~zpayPR)tgq}p-PCeEbad94ul3a)
zVV6eVZW@qqCD<19$T{_M&&(5PQ{MaZCYE@aT|iHZ$s=H6MO9sp?*{qr)@e?<vKTdX
zcy874sQYT`c3S!`>x}<lOOM|9pe;Q`8>zyTaO+#@>xNZID>B2Gs9rY0h3FeLg}X>^
zSp{a)J5zt17qAEl-%#wVJ`@=x>4#>3H_&oei~LZgfZ__!{;l$2YcjqD)``O`OU%_>
zmnGIMa1WhEJ<t%Fi~Su70BJmz_9orc%XAh#jmtnUyooee*Rv>tj?4$Ac^K@Ehof@&
zEKUI1_)`9%WjuORX0vVl7&vO!XHB%_8&|WpsSkEh4;vTQg83Sr!t<pwaN=9#J|z&l
zq5>RlJ;+T;9S#zO>_wT1jsZoiClaR07x|NSGsH=*;yJhmTt(4V6F)$7q=8QYH{fh`
zkk1eU;dMB#)gM?`F{*%6v(-||UgkpjDK2ulw8^LBMj0%3!42Sb>JGoqtjlN|O}@14
zRwKXwxdXVvNcae722r3yYd2SfIiJ+@`3!mo1cEH;Yn)7$+qSBk<v&HWYz9+6Um1>6
zFa*9Lio{1w7z`Ab#UT&^{g8nMf(XT=tb<eNVPnzVP<%lmIT$DK=bg8Zg{mV;guCSh
zHOnwSakVa1O*}_@#$S@H{1{I~fhZVd$nPA+@ZEeCDA#(bh!xVaN;;mTB;g3&s?Tqp
ztE&>Zpc$W|0ro4&GMX&x!72WQ`o4ZV@wUvM!$c1l$6MGyRw_JX5158Mk%#AWvV@&s
zF~Xar(O8`g<yk&q<#G{=<K0jzdP;65DPV>DCioqA)fUcEnPeX+Uq~yv$->~vJc1v^
zUN{geMnmN|(}>a%|Fk>L`JC)xD5!H^=r_NyR2O#hdywVZ?cVV|@n>5s*ggmb^`6;O
za4Xq8?%MOd>#lT-_}0#;9Nk^vUvh4*_eHyRn-7G(FE)I8QS1YWEh?FB^wnoOdR6vw
zo9rFnvc~Od?K;!JzQy^b{@r{=m$l*fz0!}z^jvF2eOhnSmOmYKuJF0082`8%7Y0u+
zD}3@FHP3j?_W0bgBscQ~^YvqGpAHJV<m>q>mcIP|wOsA`_y=w232F_?6fMXsoCN@9
zNgK|<QQ}DyLiStZ>1sX`n9v<Lncvp8+WhHE&<3XCM9~xdUDlAV;7Za9Mw5fMltT76
zFw+5`m)?mC$ANMK4dBkYX=JcvKdpjWd8!($RL~5%8m;38SctyTuvTC^8&}CP;tV2{
zMbHI(z_tR$roj<pBYsm3B=3P>UQ5S0M526dre}C0vKe1#DYL~m9wCGIi)@PiWLsDM
z5`RQ(lt;uYuLP8!8Vv%^#NIrQ)R7RDPG{44`X(ENV#FGHO22^E-~slM<0s@++frRL
z$&q7eh~}j?Tm4w7I*fNml~y}pPY>&B$T-)d$|y?|G-%SKItY&;NtTV=moC<{K(!X9
z(lN#*WbB0}V6M`L?7@ekNw-s$gO8aJ^;bekv8*CV;0Q^utfCw4W$<BOq`@9<R$SqE
z&;W*zL84Uia8}4Wqy+BZY5K=7RA$)fRFhH(s;L(pEj`ICW902w>`T}G00$lgvw@?`
zV^`H4)CacMK5#k$=UMA@j>=e2BEI88&>FE>ZdI-9BN_{`*d#hZmqixhSdq`>G7lCg
zD>2ga_PC{9x80a0AR49=&?NK(K`a|Mkx`anmV}w@Ie7wR(Ec)(2FgR)XpzICVKCpx
zLRhtUO@zQ@_z>C!m%^*un_d>#bQN5wjkew(ova1B7~#us3?2X)(H?oN`J5PqCaP!I
z2=*`3WPO*8#5XjbYL)P0MY<8>3_2}ZSU`_6&MDPiUF+PfrB^53IA2pgv<;gxhE_P9
zKb7b?_ul3{VSh7yz18_lnJYOla^S^1eY<{rbmGmOBR!(c%R{@@?yuYK_pvPxzI881
zR#1odhP~z&JP)21K_b~}hD&+f6C$0QHhtf8p|8g!YtT|Lr^VRKlN2-;`IsBa+^?0^
z`xHBjK0JJ~>D+p~O&s*OeQSsFkowd<Zeor3**cTHn_-cXFCy`&Z|++Swje$}WntVa
z@xS}4{!485Pm8dAh-kX}5Yha&z*dV87cGcgkQ|$kl=AYcf1S68{;ju|kZpL+E)Q#G
zw`@b6PNS|r&@LT!4V~xT$0aA+msaT9LbH9lzPNd@@w4NS6CcLTf6>@^-vg`u{s(cP
zqMYBF=bO{9xvsl*v{SdAQJ0?`|MNM55D|#m!3Bic5f_c(4zK}t01NH#gaV0ntz9Xo
z6NXUQS#q?yozIc>tDOgS%IV-}m)R*zV^Q=S($IYsk#joq{4mk|rzg5|9%y~~2bSl~
ze&)eX{r~-2D0k$z!J{zfS2X(&bAE_9Kg67W=J&Ki20xXF|Fb}q-}YGHj~?q!Gwifx
N{tIC}_B?Bm0s!#D$>;z8

literal 0
HcmV?d00001

diff --git a/testing/tests/api/test_importing.py b/testing/tests/api/test_importing.py
new file mode 100644
index 000000000..6a34c2c52
--- /dev/null
+++ b/testing/tests/api/test_importing.py
@@ -0,0 +1,8 @@
+from pymol import cmd
+from pymol import test_utils
+
+
+@test_utils.requires_version("3.0")
+def test_bcif():
+    cmd.load(test_utils.datafile("115d.bcif.gz"))
+    assert cmd.count_atoms() == 407

From ebd016a04551d9ea16892ca834588656bcf2177f Mon Sep 17 00:00:00 2001
From: Jarrett Johnson <jarrett.johnson@schrodinger.com>
Date: Mon, 20 May 2024 16:33:48 -0400
Subject: [PATCH 06/15] fixed mix var

---
 layer2/CifFile.h | 38 +++++++++++++++++++++++++++-----------
 1 file changed, 27 insertions(+), 11 deletions(-)

diff --git a/layer2/CifFile.h b/layer2/CifFile.h
index 123e34811..db22ed68a 100644
--- a/layer2/CifFile.h
+++ b/layer2/CifFile.h
@@ -18,6 +18,11 @@
 // for pymol::default_free
 #include "MemoryDebug.h"
 
+template<class... Ts>
+struct overloaded : Ts... { using Ts::operator()...; };
+template<class... Ts>
+overloaded(Ts...) -> overloaded<Ts...>;
+
 namespace pymol {
 namespace _cif_detail {
 
@@ -167,6 +172,26 @@ namespace cif_detail {
   struct bcif_array {
     std::vector<CifArrayElement> m_arr{};
   };
+
+  template <typename T> T var_to_typed(const CifArrayElement& var, const T& d)
+  {
+    if constexpr (std::is_same_v<T, const char*>) {
+      auto& str = std::get<std::string>(var);
+      return !str.empty() ? str.c_str() : d;
+    } else {
+      if (auto ptr = std::get_if<std::string>(&var); ptr && ptr->empty()) {
+        return d;
+      }
+      if constexpr (!std::is_same_v<T, std::string>) {
+        return std::visit(overloaded{[](const std::string& s) -> T {
+                                       return _cif_detail::raw_to_typed<T>(
+                                           s.c_str());
+                                     },
+                              [](const auto& v) -> T { return v; }},
+            var);
+      }
+    }
+  }
 }
 
 /**
@@ -223,17 +248,8 @@ class cif_array {
     } else if (auto arr = std::get_if<cif_detail::bcif_array>(&m_array)) {
       if (pos >= arr->m_arr.size())
         return d;
-      if constexpr(std::is_same_v<T, const char*>) {
-        auto& str = std::get<std::string>(arr->m_arr[pos]);
-        return !str.empty() ? str.c_str() : d;
-      } else {
-        if (auto ptr = std::get_if<std::string>(&arr->m_arr[pos])) {
-          if (ptr->empty()) {
-            return d;
-          }
-        }
-        return std::get<T>(arr->m_arr[pos]);
-      }
+      auto& var = arr->m_arr[pos];
+      return cif_detail::var_to_typed<T>(var, d);
     }
     return d;
   }

From 2be819ca740d6b8a7ff7e758b0a57b33ba177b55 Mon Sep 17 00:00:00 2001
From: Jarrett Johnson <jarrett.johnson@schrodinger.com>
Date: Mon, 20 May 2024 16:42:15 -0400
Subject: [PATCH 07/15] slight simplification

---
 layer2/CifFile.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/layer2/CifFile.h b/layer2/CifFile.h
index db22ed68a..793a642b7 100644
--- a/layer2/CifFile.h
+++ b/layer2/CifFile.h
@@ -270,15 +270,14 @@ class cif_array {
       if (auto str_ptr = std::get_if<std::string>(&arr->m_arr[pos])) {
         return str_ptr->c_str();
       }
-      auto to_string_visitor = [](auto&& arg) -> std::string {
+      m_internal_str_cache = std::visit([](auto&& arg) -> std::string {
         if constexpr (std::is_same_v<std::decay_t<decltype(arg)>,
                           std::string>) {
           return arg;
         } else {
           return std::to_string(arg);
         }
-      };
-      m_internal_str_cache = std::visit(to_string_visitor, arr->m_arr[pos]);
+      }, arr->m_arr[pos]);
       return m_internal_str_cache.c_str();
     }
     return d;

From bfab47edaa36fd00cc8ac96b1dcc9a0c15965865 Mon Sep 17 00:00:00 2001
From: Jarrett Johnson <jarrett.johnson@schrodinger.com>
Date: Mon, 20 May 2024 16:49:00 -0400
Subject: [PATCH 08/15] return val

---
 layer2/CifFile.h | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/layer2/CifFile.h b/layer2/CifFile.h
index 793a642b7..c6414acd8 100644
--- a/layer2/CifFile.h
+++ b/layer2/CifFile.h
@@ -173,6 +173,13 @@ namespace cif_detail {
     std::vector<CifArrayElement> m_arr{};
   };
 
+  /**
+   * Returns a typed value from a CIF data element.
+   * If the element is missing or inapplicable, return `d`.
+   * @param var CIF data element
+   * @param d default value
+   * @return typed value
+   */
   template <typename T> T var_to_typed(const CifArrayElement& var, const T& d)
   {
     if constexpr (std::is_same_v<T, const char*>) {
@@ -191,6 +198,7 @@ namespace cif_detail {
             var);
       }
     }
+    return d;
   }
 }
 

From b7705c1cbba5255032d9f0c9fe4d2caa9b7f7dd4 Mon Sep 17 00:00:00 2001
From: Jarrett Johnson <jarrett.johnson@schrodinger.com>
Date: Mon, 20 May 2024 17:21:16 -0400
Subject: [PATCH 09/15] No msgpack-c

---
 layer2/CifFile.cpp           | 10 ++++++++++
 layer2/CifMoleculeReader.cpp |  8 ++++++++
 2 files changed, 18 insertions(+)

diff --git a/layer2/CifFile.cpp b/layer2/CifFile.cpp
index cda02e296..97fe9c144 100644
--- a/layer2/CifFile.cpp
+++ b/layer2/CifFile.cpp
@@ -22,7 +22,9 @@
 #include "MemoryDebug.h"
 #include "strcasecmp.h"
 
+#if !defined(_PYMOL_NO_MSGPACKC)
 #include <msgpack.hpp>
+#endif
 
 namespace pymol {
 namespace _cif_detail {
@@ -479,6 +481,8 @@ bool cif_file::parse(char*&& p) {
   return true;
 }
 
+
+#if !defined(_PYMOL_NO_MSGPACKC)
 enum class DataTypes
 {
   Int8 = 1,
@@ -793,6 +797,12 @@ bool cif_file::parse_bcif(const char* bytes, std::size_t size)
   }
   return true;
 }
+#else
+bool cif_file::parse_bcif(const char* bytes, std::size_t size)
+{
+  return false;
+}
+#endif // !defined(_PYMOL_NO_MSGPACKC)
 
 } // namespace pymol
 
diff --git a/layer2/CifMoleculeReader.cpp b/layer2/CifMoleculeReader.cpp
index ddbcaea14..360867eaa 100644
--- a/layer2/CifMoleculeReader.cpp
+++ b/layer2/CifMoleculeReader.cpp
@@ -2359,6 +2359,14 @@ pymol::Result<ObjectMolecule*> ObjectMoleculeReadBCif(PyMOLGlobals* G,
     ObjectMolecule* I, const char* bytes, std::size_t size, int frame,
     int discrete, int quiet, int multiplex, int zoom)
 {
+#ifdef _PYMOL_NO_MSGPACKC
+  PRINTFB(G, FB_ObjectMolecule, FB_Errors)
+    " Error: This build has no BinaryCIF support.\n"
+    " Please install/enable msgpack-c.\n"
+  ENDFB(G);
+  return nullptr;
+#endif
+
   if (I) {
     return pymol::Error("loading BCIF into existing object not supported, "
                         "please use 'create' to append to an existing object.");

From 62ae4c87586e3fbbc46e5d1b752d5043deb2fe8c Mon Sep 17 00:00:00 2001
From: Jarrett Johnson <jarrett.johnson@schrodinger.com>
Date: Mon, 20 May 2024 17:30:11 -0400
Subject: [PATCH 10/15] revert le

---
 layerCTest/Test_CifFile.cpp | 302 ++++++++++++++++++------------------
 1 file changed, 151 insertions(+), 151 deletions(-)

diff --git a/layerCTest/Test_CifFile.cpp b/layerCTest/Test_CifFile.cpp
index e173dcafa..77108d287 100644
--- a/layerCTest/Test_CifFile.cpp
+++ b/layerCTest/Test_CifFile.cpp
@@ -1,151 +1,151 @@
-#include "Test.h"
-
-#include "CifFile.h"
-
-using namespace pymol::test;
-
-const char* SAMPLE_CIF_STR = R"""(
-data_FOO
-_cat1.key1 noquotes
-_cat1.key2 "two words"
-_cat1.key3 ? # unknown
-_cat1.key4 . # inapplicable
-_cat1.KEY5 "UPPER CASE key"
-loop_
-_cat2.key1
-_cat2.key2
-_cat2.key3
-_cat2.key4
-10 0.1 . foo
-11 0.2 ? "TWO WORDS"
-12  ?  ?
-;multi
-line
-value
-; . 0.4 . .
-data_bar
-data_baz
-_undotted_key "why not"
-_typed_float1 1.23(45)e3
-_typed_float2 1.234(5)e1
-_typed_float3 1.23456789
-)""";
-
-TEST_CASE("misc", "[CifFile]")
-{
-  // syntax 1
-  pymol::cif_file cf1(nullptr, SAMPLE_CIF_STR);
-  // syntax 2 (requires move constructor)
-  auto cf2 = pymol::cif_file(nullptr, SAMPLE_CIF_STR);
-  // move assign
-  pymol::cif_file cf3;
-  cf3 = pymol::cif_file(nullptr, SAMPLE_CIF_STR);
-
-  // check all three instances have same data
-  REQUIRE(cf1.datablocks().size() == 3);
-  REQUIRE(cf2.datablocks().size() == 3);
-  REQUIRE(cf3.datablocks().size() == 3);
-  REQUIRE(cf1.datablocks().find("baz")->second.get_opt("_undotted_key")->as_s() == std::string("why not"));
-  REQUIRE(cf2.datablocks().find("baz")->second.get_opt("_undotted_key")->as_s() == std::string("why not"));
-  REQUIRE(cf3.datablocks().find("baz")->second.get_opt("_undotted_key")->as_s() == std::string("why not"));
-
-  auto& blocks = cf1.datablocks();
-
-  REQUIRE(blocks.find("FOO")->second.code() == std::string("FOO"));
-  REQUIRE(blocks.find("bar")->second.code() == std::string("bar"));
-  REQUIRE(blocks.find("baz")->second.code() == std::string("baz"));
-
-  auto* data = &blocks.find("FOO")->second;
-
-  REQUIRE(data->get_arr("_cat1.key3") != nullptr);
-  REQUIRE(data->get_arr("_cat1.key3") == data->get_opt("_cat1.key3"));
-  REQUIRE(data->get_arr("_cat1.key6") == nullptr);
-
-  REQUIRE(data->get_opt("_cat1.key1")->is_missing() == false);
-  REQUIRE(data->get_opt("_cat1.key2")->is_missing() == false);
-  REQUIRE(data->get_opt("_cat1.key3")->is_missing());
-  REQUIRE(data->get_opt("_cat1.key4")->is_missing());
-  REQUIRE(data->get_opt("_cat1.key5")->is_missing() == false);
-
-  REQUIRE(data->get_opt("_cat1.key4")->is_missing_all());
-  REQUIRE(data->get_opt("_cat1.key5")->is_missing_all() == false);
-
-  // looped data
-
-  REQUIRE(data->get_opt("_cat2.key1")->is_missing_all() == false);
-  REQUIRE(data->get_opt("_cat2.key3")->is_missing_all());
-
-  // template getters
-
-  std::vector<int> vec1{10, 11, 12, 0};
-  std::vector<float> vec2{0.1f, 0.2f, 99.f, 0.4f};
-
-  REQUIRE(data->get_opt("_cat2.key1")->to_vector<int>() == vec1);
-  REQUIRE(data->get_opt("_cat2.key2")->to_vector<float>(99.f) == vec2);
-
-  REQUIRE(data->get_opt("_cat2.key4")->as<std::string>(0) == "foo");
-  REQUIRE(data->get_opt("_cat2.key4")->as<std::string>(1) == "TWO WORDS");
-  REQUIRE(data->get_opt("_cat2.key4")->as<std::string>(2) == "multi\nline\nvalue");
-  REQUIRE(data->get_opt("_cat2.key4")->as<std::string>(3) == "");
-
-  REQUIRE(data->get_opt("_cat2.key4")->as<const char*>(0) == std::string("foo"));
-  REQUIRE(data->get_opt("_cat2.key4")->as<const char*>(3) == nullptr);
-
-  REQUIRE(data->get_opt("_cat2.key4")->to_vector<const char*>()[0] == std::string("foo"));
-  REQUIRE(data->get_opt("_cat2.key4")->to_vector<const char*>()[3] == nullptr);
-  REQUIRE(data->get_opt("_cat2.key4")->to_vector<const char*>("ABC")[0] == std::string("foo"));
-  REQUIRE(data->get_opt("_cat2.key4")->to_vector<const char*>("ABC")[3] == std::string("ABC"));
-
-  // type deducted from default value
-
-  REQUIRE(data->get_opt("_cat2.key1")->as(0, 99) / 3 == 3); // int
-  REQUIRE(data->get_opt("_cat2.key1")->as(0, 99) / 3 != Approx(10. / 3.)); // int
-  REQUIRE(data->get_opt("_cat2.key1")->as(0, 99.) / 3 == Approx(10. / 3.)); // double
-  REQUIRE(data->get_opt("_cat2.key2")->as(0, 99.) == 0.1);
-  REQUIRE(data->get_opt("_cat2.key3")->as(0, 99.f) == 99.f);
-  REQUIRE(data->get_opt("_cat2.key4")->as(0, std::string("type deducted")) == "foo");
-  REQUIRE(data->get_opt("_cat2.key4")->as(3, std::string("type deducted")) == "type deducted");
-
-  // as_X getters
-
-  REQUIRE(data->get_opt("_cat2.key4")->as_s(0, "ABC") == std::string("foo"));
-  REQUIRE(data->get_opt("_cat2.key4")->as_s(3, "ABC") == std::string("ABC")); // missing
-
-  REQUIRE(data->get_opt("_cat2.key1")->as_i(0, 99) == 10);
-  REQUIRE(data->get_opt("_cat2.key1")->as_i(1, 99) == 11);
-  REQUIRE(data->get_opt("_cat2.key1")->as_i(3, 99) == 99); // missing
-
-  REQUIRE(data->get_opt("_cat2.key1")->as_d(0, 99.) == 10.);
-  REQUIRE(data->get_opt("_cat2.key1")->as_d(1, 99.) == 11.);
-  REQUIRE(data->get_opt("_cat2.key1")->as_d(3, 99.) == 99.);  // missing
-
-  REQUIRE(data->get_opt("_cat2.key2")->as_d(0, 99.) == 0.1);
-  REQUIRE(data->get_opt("_cat2.key2")->as_d(2, 99.) == 99.f); // missing
-  REQUIRE(data->get_opt("_cat2.key2")->as_d(3, 99.) == 0.4);
-
-  // out of bounds is default
-
-  REQUIRE(data->get_opt("_cat2.key1")->as_i(50, 99) == 99);
-
-  // alternate names
-
-  REQUIRE(data->get_opt("_cat2.key1", "_other_name")->as_i(0, 99) == 10);
-  REQUIRE(data->get_opt("_other_name", "_cat2.key1")->as_i(0, 99) == 10);
-  REQUIRE(data->get_opt("_other_name", "_cat2_key1")->as_i(0, 99) == 99);
-
-  // wildcard lookup
-
-  REQUIRE(data->get_arr("_cat2_key1") == nullptr);
-  REQUIRE(data->get_opt("_cat2?key1")->as_i(0, 99) == 10);
-  REQUIRE(blocks.find("baz")->second.get_arr("_undotted.key") == nullptr);
-  REQUIRE(blocks.find("baz")->second.get_opt("_undotted?key")->as_s() == std::string("why not"));
-
-  // float parsing
-
-  REQUIRE(blocks.find("baz")->second.get_opt("_typed_float1")->as<float>() == Approx(1230.f));
-  REQUIRE(blocks.find("baz")->second.get_opt("_typed_float1")->as<double>() == Approx(1230.00000));
-  REQUIRE(blocks.find("baz")->second.get_opt("_typed_float2")->as<double>() == Approx(12.3400000));
-  REQUIRE(blocks.find("baz")->second.get_opt("_typed_float3")->as<double>() == Approx(1.23456789));
-}
-
-// vi:sw=2:expandtab
+#include "Test.h"
+
+#include "CifFile.h"
+
+using namespace pymol::test;
+
+const char* SAMPLE_CIF_STR = R"""(
+data_FOO
+_cat1.key1 noquotes
+_cat1.key2 "two words"
+_cat1.key3 ? # unknown
+_cat1.key4 . # inapplicable
+_cat1.KEY5 "UPPER CASE key"
+loop_
+_cat2.key1
+_cat2.key2
+_cat2.key3
+_cat2.key4
+10 0.1 . foo
+11 0.2 ? "TWO WORDS"
+12  ?  ?
+;multi
+line
+value
+; . 0.4 . .
+data_bar
+data_baz
+_undotted_key "why not"
+_typed_float1 1.23(45)e3
+_typed_float2 1.234(5)e1
+_typed_float3 1.23456789
+)""";
+
+TEST_CASE("misc", "[CifFile]")
+{
+  // syntax 1
+  pymol::cif_file cf1(nullptr, SAMPLE_CIF_STR);
+  // syntax 2 (requires move constructor)
+  auto cf2 = pymol::cif_file(nullptr, SAMPLE_CIF_STR);
+  // move assign
+  pymol::cif_file cf3;
+  cf3 = pymol::cif_file(nullptr, SAMPLE_CIF_STR);
+
+  // check all three instances have same data
+  REQUIRE(cf1.datablocks().size() == 3);
+  REQUIRE(cf2.datablocks().size() == 3);
+  REQUIRE(cf3.datablocks().size() == 3);
+  REQUIRE(cf1.datablocks().find("baz")->second.get_opt("_undotted_key")->as_s() == std::string("why not"));
+  REQUIRE(cf2.datablocks().find("baz")->second.get_opt("_undotted_key")->as_s() == std::string("why not"));
+  REQUIRE(cf3.datablocks().find("baz")->second.get_opt("_undotted_key")->as_s() == std::string("why not"));
+
+  auto& blocks = cf1.datablocks();
+
+  REQUIRE(blocks.find("FOO")->second.code() == std::string("FOO"));
+  REQUIRE(blocks.find("bar")->second.code() == std::string("bar"));
+  REQUIRE(blocks.find("baz")->second.code() == std::string("baz"));
+
+  auto* data = &blocks.find("FOO")->second;
+
+  REQUIRE(data->get_arr("_cat1.key3") != nullptr);
+  REQUIRE(data->get_arr("_cat1.key3") == data->get_opt("_cat1.key3"));
+  REQUIRE(data->get_arr("_cat1.key6") == nullptr);
+
+  REQUIRE(data->get_opt("_cat1.key1")->is_missing() == false);
+  REQUIRE(data->get_opt("_cat1.key2")->is_missing() == false);
+  REQUIRE(data->get_opt("_cat1.key3")->is_missing());
+  REQUIRE(data->get_opt("_cat1.key4")->is_missing());
+  REQUIRE(data->get_opt("_cat1.key5")->is_missing() == false);
+
+  REQUIRE(data->get_opt("_cat1.key4")->is_missing_all());
+  REQUIRE(data->get_opt("_cat1.key5")->is_missing_all() == false);
+
+  // looped data
+
+  REQUIRE(data->get_opt("_cat2.key1")->is_missing_all() == false);
+  REQUIRE(data->get_opt("_cat2.key3")->is_missing_all());
+
+  // template getters
+
+  std::vector<int> vec1{10, 11, 12, 0};
+  std::vector<float> vec2{0.1f, 0.2f, 99.f, 0.4f};
+
+  REQUIRE(data->get_opt("_cat2.key1")->to_vector<int>() == vec1);
+  REQUIRE(data->get_opt("_cat2.key2")->to_vector<float>(99.f) == vec2);
+
+  REQUIRE(data->get_opt("_cat2.key4")->as<std::string>(0) == "foo");
+  REQUIRE(data->get_opt("_cat2.key4")->as<std::string>(1) == "TWO WORDS");
+  REQUIRE(data->get_opt("_cat2.key4")->as<std::string>(2) == "multi\nline\nvalue");
+  REQUIRE(data->get_opt("_cat2.key4")->as<std::string>(3) == "");
+
+  REQUIRE(data->get_opt("_cat2.key4")->as<const char*>(0) == std::string("foo"));
+  REQUIRE(data->get_opt("_cat2.key4")->as<const char*>(3) == nullptr);
+
+  REQUIRE(data->get_opt("_cat2.key4")->to_vector<const char*>()[0] == std::string("foo"));
+  REQUIRE(data->get_opt("_cat2.key4")->to_vector<const char*>()[3] == nullptr);
+  REQUIRE(data->get_opt("_cat2.key4")->to_vector<const char*>("ABC")[0] == std::string("foo"));
+  REQUIRE(data->get_opt("_cat2.key4")->to_vector<const char*>("ABC")[3] == std::string("ABC"));
+
+  // type deducted from default value
+
+  REQUIRE(data->get_opt("_cat2.key1")->as(0, 99) / 3 == 3); // int
+  REQUIRE(data->get_opt("_cat2.key1")->as(0, 99) / 3 != Approx(10. / 3.)); // int
+  REQUIRE(data->get_opt("_cat2.key1")->as(0, 99.) / 3 == Approx(10. / 3.)); // double
+  REQUIRE(data->get_opt("_cat2.key2")->as(0, 99.) == 0.1);
+  REQUIRE(data->get_opt("_cat2.key3")->as(0, 99.f) == 99.f);
+  REQUIRE(data->get_opt("_cat2.key4")->as(0, std::string("type deducted")) == "foo");
+  REQUIRE(data->get_opt("_cat2.key4")->as(3, std::string("type deducted")) == "type deducted");
+
+  // as_X getters
+
+  REQUIRE(data->get_opt("_cat2.key4")->as_s(0, "ABC") == std::string("foo"));
+  REQUIRE(data->get_opt("_cat2.key4")->as_s(3, "ABC") == std::string("ABC")); // missing
+
+  REQUIRE(data->get_opt("_cat2.key1")->as_i(0, 99) == 10);
+  REQUIRE(data->get_opt("_cat2.key1")->as_i(1, 99) == 11);
+  REQUIRE(data->get_opt("_cat2.key1")->as_i(3, 99) == 99); // missing
+
+  REQUIRE(data->get_opt("_cat2.key1")->as_d(0, 99.) == 10.);
+  REQUIRE(data->get_opt("_cat2.key1")->as_d(1, 99.) == 11.);
+  REQUIRE(data->get_opt("_cat2.key1")->as_d(3, 99.) == 99.);  // missing
+
+  REQUIRE(data->get_opt("_cat2.key2")->as_d(0, 99.) == 0.1);
+  REQUIRE(data->get_opt("_cat2.key2")->as_d(2, 99.) == 99.f); // missing
+  REQUIRE(data->get_opt("_cat2.key2")->as_d(3, 99.) == 0.4);
+
+  // out of bounds is default
+
+  REQUIRE(data->get_opt("_cat2.key1")->as_i(50, 99) == 99);
+
+  // alternate names
+
+  REQUIRE(data->get_opt("_cat2.key1", "_other_name")->as_i(0, 99) == 10);
+  REQUIRE(data->get_opt("_other_name", "_cat2.key1")->as_i(0, 99) == 10);
+  REQUIRE(data->get_opt("_other_name", "_cat2_key1")->as_i(0, 99) == 99);
+
+  // wildcard lookup
+
+  REQUIRE(data->get_arr("_cat2_key1") == nullptr);
+  REQUIRE(data->get_opt("_cat2?key1")->as_i(0, 99) == 10);
+  REQUIRE(blocks.find("baz")->second.get_arr("_undotted.key") == nullptr);
+  REQUIRE(blocks.find("baz")->second.get_opt("_undotted?key")->as_s() == std::string("why not"));
+
+  // float parsing
+
+  REQUIRE(blocks.find("baz")->second.get_opt("_typed_float1")->as<float>() == Approx(1230.f));
+  REQUIRE(blocks.find("baz")->second.get_opt("_typed_float1")->as<double>() == Approx(1230.00000));
+  REQUIRE(blocks.find("baz")->second.get_opt("_typed_float2")->as<double>() == Approx(12.3400000));
+  REQUIRE(blocks.find("baz")->second.get_opt("_typed_float3")->as<double>() == Approx(1.23456789));
+}
+
+// vi:sw=2:expandtab

From 8c02b4c9056658cb599aa2874077f9d298f4e59e Mon Sep 17 00:00:00 2001
From: Jarrett Johnson <jarrett.johnson@schrodinger.com>
Date: Mon, 20 May 2024 17:48:18 -0400
Subject: [PATCH 11/15] Some more simplification

---
 layer2/CifFile.cpp | 48 ++++++++++++++--------------------------------
 1 file changed, 14 insertions(+), 34 deletions(-)

diff --git a/layer2/CifFile.cpp b/layer2/CifFile.cpp
index 97fe9c144..a0c96fe73 100644
--- a/layer2/CifFile.cpp
+++ b/layer2/CifFile.cpp
@@ -568,43 +568,28 @@ static std::vector<CifArrayElement> integer_packing_decode(
 
   auto as_int = [isUnsigned, byteCount](auto&& elem) -> std::int32_t {
     if (isUnsigned) {
-      if (byteCount == 1) {
-        return static_cast<std::int32_t>(std::get<std::uint8_t>(elem));
-      } else {
-        return static_cast<std::int32_t>(std::get<std::uint16_t>(elem));
-      }
+      return byteCount == 1 ? static_cast<std::int32_t>(std::get<std::uint8_t>(elem))
+                            : static_cast<std::int32_t>(std::get<std::uint16_t>(elem));
     } else {
-      if (byteCount == 1) {
-        return static_cast<std::int32_t>(std::get<std::int8_t>(elem));
-      } else {
-        return static_cast<std::int32_t>(std::get<std::int16_t>(elem));
-      }
+      return byteCount == 1 ? static_cast<std::int32_t>(std::get<std::int8_t>(elem))
+                            : static_cast<std::int32_t>(std::get<std::int16_t>(elem));
     }
   };
 
   auto at_limit = [isUnsigned, upperLimit, lowerLimit](std::int32_t t) -> bool {
-    if (isUnsigned) {
-      return t == upperLimit;
-    } else {
-      return t == upperLimit || t == lowerLimit;
-    }
+    return isUnsigned ? (t == upperLimit)
+                      : (t == upperLimit || t == lowerLimit);
   };
 
-  int i = 0;
-  int j = 0;
-  int n = packedInts.size();
-  while (i < n) {
+  for (int i = 0, j = 0; i < packedInts.size(); ++i, ++j) {
     std::int32_t value = 0;
     std::int32_t t = as_int(packedInts[i]);
     while (at_limit(t)) {
       value += t;
-      i++;
-      t = as_int(packedInts[i]);
+      t = as_int(packedInts[++i]);
     }
     value += t;
     result[j] = value;
-    i++;
-    j++;
   }
   return result;
 }
@@ -639,17 +624,12 @@ static std::vector<CifArrayElement> fixed_array_decode(
     std::vector<CifArrayElement>& data, int factor, DataTypes srcType)
 {
   std::vector<CifArrayElement> result = data;
-  if (srcType == DataTypes::Float32) {
-    auto div_int32_t = [factor](auto&& a) -> float {
-      return std::get<std::int32_t>(a) / static_cast<float>(factor);
-    };
-    std::transform(data.begin(), data.end(), result.begin(), div_int32_t);
-  } else {
-    auto div_int32_t = [factor](auto&& a) -> double {
-      return std::get<std::int32_t>(a) / static_cast<double>(factor);
-    };
-    std::transform(data.begin(), data.end(), result.begin(), div_int32_t);
-  }
+  auto div_int32_t = [factor, srcType](auto&& a) -> auto {
+    return srcType == DataTypes::Float32
+               ? std::get<std::int32_t>(a) / static_cast<float>(factor)
+               : std::get<std::int32_t>(a) / static_cast<double>(factor);
+  };
+  std::transform(data.begin(), data.end(), result.begin(), div_int32_t);
   return result;
 }
 

From 377ac12bc78a85be016c099b5f00a3c756f24ce0 Mon Sep 17 00:00:00 2001
From: Jarrett Johnson <jarrett.johnson@schrodinger.com>
Date: Mon, 20 May 2024 17:50:53 -0400
Subject: [PATCH 12/15] more simplificastion

---
 layer2/CifFile.cpp | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/layer2/CifFile.cpp b/layer2/CifFile.cpp
index a0c96fe73..b53785b95 100644
--- a/layer2/CifFile.cpp
+++ b/layer2/CifFile.cpp
@@ -722,9 +722,8 @@ static std::vector<CifArrayElement> parse_bcif_decode(const std::vector<unsigned
     std::vector<std::map<std::string, msgpack::object>>& dataEncoding)
 {
   std::vector<CifArrayElement> result;
-  for (auto begin = std::rbegin(dataEncoding), end = std::rend(dataEncoding);
-       begin != end; ++begin) {
-    auto& dataEncode = *begin;
+  for (auto it = std::rbegin(dataEncoding); it != std::rend(dataEncoding); ++it) {
+    auto& dataEncode = *it;
     parse_bcif_decode_kind(
         dataEncode["kind"].as<std::string>(), rawData, result, dataEncode);
   }

From bfefe074372b07b7ea558506904e4b6913c76ae9 Mon Sep 17 00:00:00 2001
From: Jarrett Johnson <jarrett.johnson@schrodinger.com>
Date: Tue, 4 Jun 2024 23:39:52 -0400
Subject: [PATCH 13/15] test for cif_array data

---
 layer2/CifMoleculeReader.cpp        | 13 +++++++++++--
 testing/tests/api/test_importing.py | 19 +++++++++++++++++++
 2 files changed, 30 insertions(+), 2 deletions(-)

diff --git a/layer2/CifMoleculeReader.cpp b/layer2/CifMoleculeReader.cpp
index 360867eaa..5622ea2da 100644
--- a/layer2/CifMoleculeReader.cpp
+++ b/layer2/CifMoleculeReader.cpp
@@ -2377,16 +2377,25 @@ pymol::Result<ObjectMolecule*> ObjectMoleculeReadBCif(PyMOLGlobals* G,
                         "use 'split_states' after loading the object.");
   }
 
-  auto cif = std::make_unique<pymol::cif_file>();
+  auto cif = std::make_shared<pymol::cif_file>();
   cif->parse_bcif(bytes, size);
   
   for (const auto& [code, datablock] : cif->datablocks()) {
     auto obj = ObjectMoleculeReadCifData(G, &datablock, discrete, quiet);
     if (!obj) {
       PRINTFB(G, FB_ObjectMolecule, FB_Warnings)
-        " mmCIF-Warning: no coordinates found in data_%s\n", datablock.code() ENDFB(G);
+        " BCIF-Warning: no coordinates found in data_%s\n", datablock.code() ENDFB(G);
       continue;
     }
+
+#ifndef _PYMOL_NOPY
+    // we only provide access from the Python API so far
+    if (SettingGet<bool>(G, cSetting_cif_keepinmemory)) {
+      obj->m_cifdata = &datablock;
+      obj->m_ciffile = cif;
+    }
+#endif
+
     if (cif->datablocks().size() == 1 || multiplex == 0)
       return obj;
   }
diff --git a/testing/tests/api/test_importing.py b/testing/tests/api/test_importing.py
index 6a34c2c52..d9919464e 100644
--- a/testing/tests/api/test_importing.py
+++ b/testing/tests/api/test_importing.py
@@ -1,8 +1,27 @@
 from pymol import cmd
 from pymol import test_utils
+from pymol.querying import cif_get_array
 
 
 @test_utils.requires_version("3.0")
 def test_bcif():
     cmd.load(test_utils.datafile("115d.bcif.gz"))
     assert cmd.count_atoms() == 407
+
+@test_utils.requires_version("3.0")
+def test_bcif_array():
+    obj_name = "foo"
+    cmd.set('cif_keepinmemory', 1)
+    cmd.load(test_utils.datafile("115d.bcif.gz"), object=obj_name)
+    arr = cif_get_array(obj_name, "_pdbx_database_status.entry_id", "s")
+    assert arr == ["115D"]
+
+    arr = cif_get_array(obj_name, "_entity_poly.pdbx_strand_id", "s")
+    assert arr == ["A,B"]
+
+
+    arr = cif_get_array(obj_name, "_pdbx_struct_oper_list.name", "s")
+    assert arr == ["1_555"]
+
+    arr = cif_get_array(obj_name, "_pdbx_struct_assembly.oligomeric_count", "i")
+    assert arr == [2]
\ No newline at end of file

From 325e6d180554a47ee814172e8fb78074a291804b Mon Sep 17 00:00:00 2001
From: Jarrett Johnson <jarrett.johnson@schrodinger.com>
Date: Tue, 4 Jun 2024 23:45:52 -0400
Subject: [PATCH 14/15] EOF

---
 testing/tests/api/test_importing.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/testing/tests/api/test_importing.py b/testing/tests/api/test_importing.py
index d9919464e..7788aa8b3 100644
--- a/testing/tests/api/test_importing.py
+++ b/testing/tests/api/test_importing.py
@@ -24,4 +24,4 @@ def test_bcif_array():
     assert arr == ["1_555"]
 
     arr = cif_get_array(obj_name, "_pdbx_struct_assembly.oligomeric_count", "i")
-    assert arr == [2]
\ No newline at end of file
+    assert arr == [2]

From 40f071a06084633bd000101dc0e65d4d9b67379f Mon Sep 17 00:00:00 2001
From: Jarrett Johnson <jarrett.johnson@schrodinger.com>
Date: Tue, 4 Jun 2024 23:47:13 -0400
Subject: [PATCH 15/15] update

---
 testing/tests/api/test_importing.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/testing/tests/api/test_importing.py b/testing/tests/api/test_importing.py
index 7788aa8b3..432eef8dd 100644
--- a/testing/tests/api/test_importing.py
+++ b/testing/tests/api/test_importing.py
@@ -19,7 +19,6 @@ def test_bcif_array():
     arr = cif_get_array(obj_name, "_entity_poly.pdbx_strand_id", "s")
     assert arr == ["A,B"]
 
-
     arr = cif_get_array(obj_name, "_pdbx_struct_oper_list.name", "s")
     assert arr == ["1_555"]