equinor · Aleksander-Karlsson · Sep 27, 2024 · Sep 27, 2024 · Sep 27, 2024 · Sep 30, 2024
@@ -335,7 +335,7 @@ def main() -> None:
 
     paths_input_schedule = (inputs.inputfile, inputs.schedulefile)
 
-    logger.debug("Running Completor %s. An advanced well modelling tool.", completor.__version__)
+    logger.info("Running Completor version %s. An advanced well modelling tool.", completor.__version__)
     logger.debug("-" * 60)
     start_a = time.time()
 

@@ -1,7 +1,7 @@
 from __future__ import annotations
 
 import math
-from collections.abc import Callable, Mapping
+from collections.abc import MutableMapping
 from typing import Any
 
 import numpy as np
@@ -10,6 +10,8 @@
 
 from completor.constants import Content, Headers, Keywords
 from completor.exceptions import CompletorError
+from completor.logger import logger
+from completor.utils import check_width_lines
 from completor.wells import Lateral, Well
 
 
@@ -61,20 +63,16 @@ def add_columns_first_last(df_temp: pd.DataFrame, add_first: bool = True, add_la
 
 
 def dataframe_tostring(
-    df_temp: pd.DataFrame,
-    format_column: bool = False,
-    trim_df: bool = True,
-    header: bool = True,
-    formatters: Mapping[str | int, Callable[..., Any]] | None = None,
+    df_temp: pd.DataFrame, format_column: bool = False, trim_df: bool = True, header: bool = True, limit: int = 128
 ) -> str:
     """Convert DataFrame to string.
 
     Args:
         df_temp: COMPLETION_DATA, COMPLETION_SEGMENTS, etc.
         format_column: If columns are to be formatted.
         trim_df: To trim or not to trim. Default: True.
-        formatters: Dictionary of the column format. Default: None.
         header: Keep header (True) or not (False).
+        limit: Limit width of DataFrame.
 
     Returns:
         Text string of the DataFrame.
@@ -91,42 +89,120 @@ def dataframe_tostring(
     if columns[0] != "--":
         # then add first column
         df_temp = add_columns_first_last(df_temp, add_first=True, add_last=False)
-    # Add single quotes around well names in output file
+
+    # Add single quotes around well names in an output file.
     if Headers.WELL in df_temp.columns:
         df_temp[Headers.WELL] = "'" + df_temp[Headers.WELL].astype(str) + "'"
-    output_string = df_temp.to_string(index=False, justify="justify", header=header)
+
+    formatters: MutableMapping[Any, Any] = {}
     if format_column:
-        if formatters is None:
-            formatters = {
-                Headers.STRENGTH: "{:.10g}".format,
-                Headers.SCALE_FACTOR: "{:.10g}".format,
-                Headers.ROUGHNESS: "{:.10g}".format,
-                Headers.CONNECTION_FACTOR: "{:.10g}".format,
-                Headers.FORMATION_PERMEABILITY_THICKNESS: "{:.10g}".format,
-                Headers.MEASURED_DEPTH: "{:.3f}".format,
-                Headers.TRUE_VERTICAL_DEPTH: "{:.3f}".format,
-                Headers.START_MEASURED_DEPTH: "{:.3f}".format,
-                Headers.END_MEASURED_DEPTH: "{:.3f}".format,
-                Headers.FLOW_COEFFICIENT: "{:.10g}".format,
-                Headers.FLOW_CROSS_SECTIONAL_AREA: "{:.3e}".format,
-                Headers.OIL_FLOW_CROSS_SECTIONAL_AREA: "{:.3e}".format,
-                Headers.GAS_FLOW_CROSS_SECTIONAL_AREA: "{:.3e}".format,
-                Headers.WATER_FLOW_CROSS_SECTIONAL_AREA: "{:.3e}".format,
-                Headers.MAX_FLOW_CROSS_SECTIONAL_AREA: "{:.3e}".format,
-                Headers.DEFAULTS: "{:.10s}".format,
-                Headers.WATER_HOLDUP_FRACTION_LOW_CUTOFF: "{:.10g}".format,
-                Headers.WATER_HOLDUP_FRACTION_HIGH_CUTOFF: "{:.10g}".format,
-                Headers.GAS_HOLDUP_FRACTION_LOW_CUTOFF: "{:.10g}".format,
-                Headers.GAS_HOLDUP_FRACTION_HIGH_CUTOFF: "{:.10g}".format,
-                Headers.ALPHA_MAIN: "{:.10g}".format,
-                Headers.ALPHA_PILOT: "{:.10g}".format,
-            }
-        try:
-            output_string = df_temp.to_string(index=False, justify="justify", formatters=formatters, header=header)
-        except ValueError:
-            pass
+        formatters = {
+            Headers.STRENGTH: "{:.10g}".format,
+            Headers.SCALE_FACTOR: "{:.10g}".format,
+            Headers.ROUGHNESS: "{:.10g}".format,
+            Headers.CONNECTION_FACTOR: "{:.10g}".format,
+            "CONNECTION_FACTOR": "{:.10g}".format,
+            Headers.FORMATION_PERMEABILITY_THICKNESS: "{:.10g}".format,
+            "FORMATION_PERMEABILITY_THICKNESS": "{:.10g}".format,
+            Headers.MEASURED_DEPTH: "{:.3f}".format,
+            "MD": "{:.3f}".format,
+            Headers.TRUE_VERTICAL_DEPTH: "{:.3f}".format,
+            "TVD": "{:.3f}".format,
+            Headers.START_MEASURED_DEPTH: "{:.3f}".format,
+            "START_MD": "{:.3f}".format,
+            Headers.END_MEASURED_DEPTH: "{:.3f}".format,
+            "END_MD": "{:.3f}".format,
+            Headers.FLOW_COEFFICIENT: "{:.10g}".format,
+            "CV": "{:.10g}".format,
+            Headers.FLOW_CROSS_SECTIONAL_AREA: "{:.3e}".format,
+            "FLOW_CROSS_SECTIONAL_AREA": "{:.3e}".format,
+            Headers.OIL_FLOW_CROSS_SECTIONAL_AREA: "{:.3e}".format,
+            Headers.GAS_FLOW_CROSS_SECTIONAL_AREA: "{:.3e}".format,
+            Headers.WATER_FLOW_CROSS_SECTIONAL_AREA: "{:.3e}".format,
+            Headers.MAX_FLOW_CROSS_SECTIONAL_AREA: "{:.3e}".format,
+            Headers.DEFAULTS: "{:.10s}".format,
+            Headers.WATER_HOLDUP_FRACTION_LOW_CUTOFF: "{:.10g}".format,
+            Headers.WATER_HOLDUP_FRACTION_HIGH_CUTOFF: "{:.10g}".format,
+            Headers.GAS_HOLDUP_FRACTION_LOW_CUTOFF: "{:.10g}".format,
+            Headers.GAS_HOLDUP_FRACTION_HIGH_CUTOFF: "{:.10g}".format,
+            Headers.ALPHA_MAIN: "{:.10g}".format,
+            Headers.ALPHA_PILOT: "{:.10g}".format,
+        }
+
+        if header:
+            # Modify headers to reduce width.
+            column_splits = [tuple(column.split("_")) for column in df_temp.columns]
+            number_of_levels = max([len(tup) for tup in column_splits])
+            if number_of_levels > 1:
+                formatters.update(
+                    {
+                        ("SCALE", "FACTOR"): "{:.10g}".format,
+                        ("CONNECTION", "FACTOR"): "{:.10g}".format,
+                        ("FORMATION", "PERMEABILITY", "THICKNESS"): "{:.10g}".format,
+                        ("MEASURED", "DEPTH"): "{:.3f}".format,
+                        ("TRUE", "VERTICAL", "DEPTH"): "{:.3f}".format,
+                        ("START", "MEASURED", "DEPTH"): "{:.3f}".format,
+                        ("START", "MD"): "{:.3f}".format,
+                        ("END", "MEASURED", "DEPTH"): "{:.3f}".format,
+                        ("END", "MD"): "{:.3f}".format,
+                        ("FLOW", "COEFFICIENT"): "{:.10g}".format,
+                        ("FLOW", "CROSS", "SECTIONAL", "AREA"): "{:.3e}".format,
+                        ("OIL", "FLOW", "CROSS", "SECTIONAL", "AREA"): "{:.3e}".format,
+                        ("GAS", "FLOW", "CROSS", "SECTIONAL", "AREA"): "{:.3e}".format,
+                        ("WATER", "FLOW", "CROSS", "SECTIONAL", "AREA"): "{:.3e}".format,
+                        ("MAX", "FLOW", "CROSS", "SECTIONAL", "AREA"): "{:.3e}".format,
+                        ("WATER", "HOLDUP", "FRACTION", "LOW", "CUTOFF"): "{:.10g}".format,
+                        ("WATER", "HOLDUP", "FRACTION", "HIGH", "CUTOFF"): "{:.10g}".format,
+                        ("GAS", "HOLDUP", "FRACTION", "LOW", "CUTOFF"): "{:.10g}".format,
+                        ("GAS", "HOLDUP", "FRACTION", "HIGH", "CUTOFF"): "{:.10g}".format,
+                        ("ALPHA", "MAIN"): "{:.10g}".format,
+                        ("ALPHA", "PILOT"): "{:.10g}".format,
+                    }
+                )
+                if column_splits[0][0].startswith("--"):
+                    # Make sure each level is commented out!
+                    column_splits[0] = tuple(["--"] * number_of_levels)
+                # Replace nan with empty for printing purposes.
+                new_cols = pd.DataFrame(column_splits).fillna("")
+                df_temp.columns = pd.MultiIndex.from_frame(new_cols)
+
+    try:
+        output_string = df_temp.to_string(
+            index=False, justify="justify", formatters=formatters, header=header, sparsify=False
+        )
+    except ValueError:
+        if df_temp.isnull().values.any():
+            raise CompletorError("Got NaN values in table, please report if encountered!")
+        df_temp.replace("*", "1*", inplace=False)
+        columns_with_1_star = df_temp.columns[df_temp.eq("1*").any()]
+        df_temp = df_temp.replace("1*", np.nan, inplace=False)
+        # Probably find columns where this is the case and cast to numeric after replacing with nan?
+        df_temp[columns_with_1_star] = df_temp[columns_with_1_star].astype(np.float64, errors="ignore")
+        output_string = df_temp.to_string(
+            index=False, justify="justify", formatters=formatters, header=header, sparsify=False, na_rep="1*"
+        )
+
     if output_string is None:
         return ""
+
+    too_long_lines = check_width_lines(output_string, limit)
+    if too_long_lines:
+        output_string = df_temp.to_string(
+            index=False, justify="left", formatters=formatters, header=header, sparsify=False
+        )
+        if output_string is None:
+            return ""
+        too_long_lines2 = check_width_lines(output_string, limit)
+        if too_long_lines2:
+            # Still, some issues. Reporting on the original errors.
+            number_of_lines = len(too_long_lines)
+            logger.error(
+                f"Some data-lines in the output are wider than limit of {limit} characters for some reservoir "
+                f"simulators!\nThis is concerning line-numbers: {[tup[0] for tup in too_long_lines]}\n"
+                f"{'An excerpt of the five first' if number_of_lines > 5 else 'The'} lines:\n"
+                + "\n".join([tup[1] for tup in too_long_lines[: min(number_of_lines, 5)]])
+            )
+
     return output_string
 
 
@@ -909,7 +985,7 @@ def prepare_completion_data(
         (df_reservoir[Headers.ANNULUS_ZONE] > 0)
         | ((df_reservoir[Headers.NUMBER_OF_DEVICES] > 0) | (df_reservoir[Headers.DEVICE_TYPE] == Content.PERFORATED))
     ]
-    if df_reservoir.shape[0] == 0:
+    if df_reservoir.empty:
         return pd.DataFrame()
     compdat = pd.DataFrame()
     compdat[Headers.WELL] = [well_name] * df_reservoir.shape[0]

@@ -97,7 +97,9 @@ def find_quote(string: str) -> re.Match | None:
     return re.search(rf"([{quotes}])(?:(?=(\\?))\2.)*?\1", string)
 
 
-def clean_file_line(line: str, comment_prefix: str = "--", remove_quotation_marks: bool = False) -> str:
+def clean_file_line(
+    line: str, comment_prefix: str = "--", remove_quotation_marks: bool = False, replace_tabs: bool = True
+) -> str:
     """Remove comments, tabs, newlines and consecutive spaces from a string.
 
     Also remove trailing '/' comments, but ignore lines containing a file path.
@@ -107,6 +109,7 @@ def clean_file_line(line: str, comment_prefix: str = "--", remove_quotation_mark
         comment_prefix: The prefix used to denote a comment in the file.
         remove_quotation_marks: Whether quotation marks should be removed from the line.
             Used for cleaning schedule files.
+        replace_tabs: Whether tabs should be replaced with a space.
 
     Returns:
         A cleaned line. Returns an empty string in the case of a comment or empty line.
@@ -125,7 +128,8 @@ def clean_file_line(line: str, comment_prefix: str = "--", remove_quotation_mark
     if not line:
         return ""
     # Replace tabs with spaces, remove newlines and remove trailing spaces.
-    line = line.replace("\t", " ").replace("\n", "")
+    if replace_tabs:
+        line = line.replace("\t", " ").replace("\n", "")
     # Remove quotation marks if specified
     if remove_quotation_marks:
         line = line.replace("'", " ").replace('"', " ")
@@ -231,3 +235,28 @@ def get_active_wells(completion_table: pd.DataFrame, gp_perf_devicelayer: bool)
             )
         return np.array(completion_table[Headers.WELL][mask].unique())
     return np.array(completion_table[Headers.WELL].unique())
+
+
+def check_width_lines(result: str, limit: int) -> list[tuple[int, str]]:
+    """Check the width of each line versus limit.
+
+    Disregarding all content after '/' and '--' characters.
+
+    Args:
+        result: Raw text.
+        limit: The character width limit.
+
+    Raises:
+        ValueError: If there exists any data that is too long.
+    """
+    lines = result.splitlines()
+    lengths = np.char.str_len(lines)
+    lines_to_check = np.nonzero(lengths >= limit)[0]
+    too_long_lines = []
+    for line_index in lines_to_check:
+        cleaned_line = lines[line_index].rsplit("/")[0] + "/"
+        cleaned_line = cleaned_line.rsplit("--")[0] + "--"
+
+        if len(cleaned_line) > limit:
+            too_long_lines.append((line_index, lines[line_index]))
+    return too_long_lines