BlueBrain · GianlucaFicarelli · Apr 30, 2024 · Apr 11, 2024 · Apr 19, 2024
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -1,6 +1,16 @@
 Changelog
 =========
 
+Version 0.10.0
+--------------
+
+New Features
+~~~~~~~~~~~~
+
+- Add ``multi_index`` option to the features configuration, to decide whether ``reset_index()`` should be applied to the features DataFrames.
+  Note: the features cache will be rebuilt, although the resulting DataFrames are unchanged (because the default value of the new option is ``True``).
+
+
 Version 0.9.1
 -------------
 

diff --git a/src/blueetl/config/analysis_model.py b/src/blueetl/config/analysis_model.py
@@ -176,6 +176,7 @@ class FeaturesConfig(BaseModel):
     params_product: dict[str, Any] = {}
     params_zip: dict[str, Any] = {}
     suffix: str = ""
+    multi_index: bool = True
 
 
 class SingleAnalysisConfig(BaseModel):

diff --git a/src/blueetl/features.py b/src/blueetl/features.py
@@ -431,7 +431,11 @@ def _user_func_wrapper(
         # ignore the index if it's unnamed and with one level; this can be useful
         # for example when the returned DataFrame has a RangeIndex to be dropped
         drop = result_df.index.names == [None]
-        result_df = result_df.etl.add_conditions(conditions=key._fields, values=key, drop=drop)
+        if features_config.multi_index:
+            result_df = result_df.etl.add_conditions(conditions=key._fields, values=key, drop=drop)
+        else:
+            result_df.reset_index(drop=drop, inplace=True)
+            result_df.etl.insert_columns(loc=0, columns=key._fields, values=key)
         # the conversion to the desired dtype here is important to reduce memory usage and cpu time
         result_df = ensure_dtypes(result_df)
         output_dir = temp_folder / f"{feature_group}{features_config.suffix}"

diff --git a/src/blueetl/schemas/analysis_config.yaml b/src/blueetl/schemas/analysis_config.yaml
@@ -483,6 +483,15 @@ $defs:
           A numeric suffix is automatically added when any of ``params_product`` or ``params_zip`` is specified.
         default: "''"
         type: string
+      multi_index:
+        title: MultiIndex
+        description: |
+          - If True, do not reset the index of the resulting DataFrames of features, and add the values specified in ``groupby`` to the MultiIndex.
+          - If False, reset the index, returning columnar DataFrames.
+
+          The DataFrames with MultiIndex should use less memory then the columnar DataFrames, but they take more time to load and dump to disk.
+        type: boolean
+        default: "true"
     required:
     - type
     - groupby