Merge pull request #286 from rs-station/hkls_attribute

add hkls attribute rs.DataSet
rs-station · Jan 4, 2025 · 22ed238 · 22ed238
2 parents f204463 + 03c3bef
commit 22ed238
Show file tree

Hide file tree

Showing 3 changed files with 98 additions and 2 deletions.
diff --git a/reciprocalspaceship/dataset.py b/reciprocalspaceship/dataset.py
@@ -43,6 +43,23 @@ class DataSet(pd.DataFrame):
     and attributes, please see the `Pandas.DataFrame documentation`_.
 
     .. _Pandas.DataFrame documentation: https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.html
+
+    Attributes
+    ----------
+    acentrics : rs.DataSet
+        Access only the acentric reflections in this dataset
+    cell : gemmi.UnitCell
+        The unit cell
+    centrics : rs.DataSet
+        Access only the centric reflections in this dataset
+    hkls : ndarray, shape=(n_reflections, 3)
+        Miller indices in DataSet.
+    merged : bool
+        Whether this is a merged dataset or unmerged
+    spacegroup : gemmi.SpaceGroup
+        The space group
+    reindexing_ops : list
+        Possible reindexing ops consistent with the cell and spacegroup
     """
 
     _metadata = ["_spacegroup", "_cell", "_index_dtypes", "_merged"]
@@ -131,6 +148,38 @@ def merged(self):
     def merged(self, val):
         self._merged = val
 
+    @property
+    @range_indexed
+    def hkls(self):
+        """Miller indices"""
+        hkl = self[["H", "K", "L"]].to_numpy(dtype=np.int32)
+        return hkl
+
+    def get_hkls(self):
+        """Get the Miller indices of the dataset."""
+        return self.hkls
+
+    @hkls.setter
+    @range_indexed
+    def hkls(self, hkls):
+        if isinstance(hkls, DataSet):
+            """Convert to numpy if hkls is a dataset"""
+            hkls = hkls.hkls
+        if isinstance(hkls, np.ndarray):
+            h, k, l = hkls[..., 0], hkls[..., 1], hkls[..., 2]
+        else:
+            """Try coercing to numpy"""
+            try:
+                hkls = np.array(hkls)
+                h, k, l = hkls[..., 0], hkls[..., 1], hkls[..., 2]
+            except:
+                raise ValueError(
+                    "Unable to convert hkls to a suitable type. Please ensure hkls is a numpy array or rs.DataSet"
+                )
+        self["H"] = DataSeries(h, index=self.index, dtype="H")
+        self["K"] = DataSeries(k, index=self.index, dtype="H")
+        self["L"] = DataSeries(l, index=self.index, dtype="H")
+
     @property
     def centrics(self):
         """Access centric reflections in DataSet"""

diff --git a/reciprocalspaceship/io/precognition.py b/reciprocalspaceship/io/precognition.py
@@ -31,7 +31,7 @@ def read_precognition(hklfile, spacegroup=None, cell=None, logfile=None):
         F = pd.read_csv(
             hklfile,
             header=None,
-            delim_whitespace=True,
+            sep="\\s+",
             names=["H", "K", "L", "F(+)", "SigF(+)", "F(-)", "SigF(-)"],
             usecols=usecols,
         )
@@ -49,7 +49,7 @@ def read_precognition(hklfile, spacegroup=None, cell=None, logfile=None):
         F = pd.read_csv(
             hklfile,
             header=None,
-            delim_whitespace=True,
+            sep="\\s+",
             names=[
                 "H",
                 "K",

diff --git a/tests/test_dataset.py b/tests/test_dataset.py
@@ -711,3 +711,50 @@ def test_select_mtzdtype_ValueError(data_merged, dtype):
     """
     with pytest.raises(ValueError):
         data_merged.select_mtzdtype(dtype)
+
+
+@pytest.mark.parametrize("merged", [True, False])
+@pytest.mark.parametrize("hkl_type", ["ds", "index", "numpy"])
+@pytest.mark.parametrize("range_index", [True, False])
+def test_hkls_property_setter(
+    data_merged, data_unmerged, merged, hkl_type, range_index
+):
+    """
+    Test the setter for the .hkls property of rs datasets
+    """
+    if merged:
+        input_ds = data_merged
+    else:
+        input_ds = data_unmerged
+
+    hkls = input_ds.copy().reset_index()[["H", "K", "L"]]
+
+    ds = input_ds.copy()
+    if range_index:
+        ds = ds.reset_index()
+
+    # Confirm we're starting with equivalent miller indices
+    expected = ds.hkls
+    value = hkls
+
+    # Shuffle the hkls
+    hkls = hkls.sample(frac=1.0)
+
+    # confirm shuffling
+    assert not np.array_equal(hkls, ds.hkls)
+
+    # confirm setter
+    if hkl_type == "ds":
+        ds.hkls = hkls
+    elif hkl_type == "index":
+        ds.hkls = hkls.set_index(["H", "K", "L"])
+    elif hkl_type == "numpy":
+        ds.hkls = hkls.to_numpy()
+    expected = ds.hkls
+    value = hkls.hkls
+    assert np.array_equal(value, expected)
+
+    # Test that all data remained the same
+    for k in input_ds:
+        if k not in ["H", "K", "L"]:
+            assert np.array_equal(ds[k], input_ds[k])