Skip to content

Commit

Permalink
Enhance LPdiag tool
Browse files Browse the repository at this point in the history
* Add test coverage
* Add some type hints
* Clarify function names
* Remove some outdated comments
  • Loading branch information
glatterf42 committed Oct 30, 2023
1 parent 8133c18 commit e6950bf
Show file tree
Hide file tree
Showing 3 changed files with 119 additions and 61 deletions.
71 changes: 67 additions & 4 deletions message_ix/tests/tools/test_lpdiag.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import os

import pytest

from message_ix.tools.lp_diag.lp_diag import LPdiag


Expand All @@ -15,7 +17,7 @@ def test_aez():
lp = LPdiag()

# Read MPS, store the matrix in dataFrame
lp.rd_mps(file)
lp.read_mps(file)

# Check that the matrix has the correct shape
assert lp.mat.shape == (8895, 5)
Expand Down Expand Up @@ -49,7 +51,7 @@ def test_diet():
lp = LPdiag()

# Read MPS, store the matrix in dataFrame
lp.rd_mps(file)
lp.read_mps(file)

# Check that the matrix has the correct shape
assert lp.mat.shape == (39, 5)
Expand Down Expand Up @@ -83,7 +85,7 @@ def test_jg_korh():
lp = LPdiag()

# Read MPS, store the matrix in dataFrame
lp.rd_mps(file)
lp.read_mps(file)

# Check that the matrix has the correct shape
assert lp.mat.shape == (10, 5)
Expand Down Expand Up @@ -117,7 +119,7 @@ def test_lotfi():
lp = LPdiag()

# Read MPS, store the matrix in dataFrame
lp.rd_mps(file)
lp.read_mps(file)

# Check that the matrix has the correct shape
assert lp.mat.shape == (1086, 5)
Expand All @@ -137,3 +139,64 @@ def test_lotfi():

# Check that sequence number of the goal function is not -1
assert lp.gf_seq != -1


# TODO: continue expanding tests
# Mostly, this means calling the last functions defined in lp_diag.py, but some
# lines also require special edge cases (mps files defined with 6 and 7 sections)
def test_error_cases():
"""Test error cases"""

# Read in the err_tst.mps file
file = os.path.join(
os.getcwd(), "message_ix", "tools", "lp_diag", "test_mps", "err_tst"
)
lp = LPdiag()

# Read MPS, store the matrix in dataFrame
with pytest.raises(AssertionError):
lp.read_mps(file)


def test_lpdiag_print_statistics():
"""Test auxiliary stat function."""

# Read in the diet.mps file
file = os.path.join(
os.getcwd(), "message_ix", "tools", "lp_diag", "test_mps", "jg_korh"
)
lp = LPdiag()

# Read MPS, store the matrix in dataFrame
lp.read_mps(file)

# Stats of matrix coeffs, incl. distrib. tails
lp.print_statistics(lo_tail=-7, up_tail=5)
# To get numbers of coeffs for each magnitute specify equal/overlapping tails:
lp.print_statistics(lo_tail=1, up_tail=0)

# The function only prints, so we can only ...
# Check that the matrix has the correct shape
assert lp.mat.shape == (10, 5)


def test_lpdiag_locate_outliers():
"""Test locating outliers."""

# Read in the diet.mps file
file = os.path.join(
os.getcwd(), "message_ix", "tools", "lp_diag", "test_mps", "lotfi"
)
lp = LPdiag()

# Read MPS, store the matrix in dataFrame
lp.read_mps(file)

# Test (lotfi) small-value outliers:
lp.locate_outliers(small=True, thresh=-1, max_rec=100)
# Test (lotfi) large-value outliers
lp.locate_outliers(small=False, thresh=2, max_rec=500)

# The function doesn't return anything, so we can only ...
# Check that the matrix has the correct shape
assert lp.mat.shape == (1086, 5)
57 changes: 30 additions & 27 deletions message_ix/tools/lp_diag/lp_diag.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ class LPdiag:
"""

def __init__(self):
self.fname = "undefined" # MPS file name, to be defined by rd_mps() call
self.fname = "undefined" # MPS file name, to be defined by read_mps() call
self.pname = "undefined" # problem name
self.id_rhs = False # True, if rhs_id defined
self.id_range = False # True, if range_id defined
Expand Down Expand Up @@ -75,7 +75,7 @@ def __init__(self):
# columns=['seq_id', 'name', 'type', 'lo_bnd', 'up_bnd']
# )

def rd_mps(self, fname): # process the MPS file
def read_mps(self, fname): # process the MPS file
print(f"\nReading MPS-format file {fname}.")
self.fname = fname
sections = [
Expand Down Expand Up @@ -122,12 +122,12 @@ def rd_mps(self, fname): # process the MPS file
elif n_section == 6: # SOS section
pass # SOS section not processed
# elif n_section == 7: # end data
# raise Exception(
# raise RunTimeError(
# "Unexpected execution flow; needs to be explored."
# )
else:
print(f"MPS record {n_line}, section id {n_section}.")
raise Exception(
raise RuntimeError(
f"MPS line '{line}' (line {n_line}) misplaced,"
f" processing section {sections[n_section]}."
)
Expand All @@ -140,7 +140,7 @@ def rd_mps(self, fname): # process the MPS file
elif n_section == 6: # SOS
print(f"WARNING: Section {sections[n_section]} not processed.")
else:
raise Exception(
raise RuntimeError(
f"Should not come here, n_section = {n_section}."
)

Expand Down Expand Up @@ -184,16 +184,16 @@ def next_sec(self, n_exp, words, sections):
return n_exp # n_sections equals to the expected: n_exp
else:
print(f"section {words} found.")
raise Exception(
f"Required MPS section {sections[n_exp]} undefined" " or misplaced."
raise NameError(
f"Required MPS section {sections[n_exp]} undefined or misplaced."
)
else: # the found section does not follow the last processed section
try:
n_section = sections.index(words[0])
except ValueError:
raise Exception(f"Unknown section: {words} (line {n_line}).")
except ValueError as e:
raise ValueError(f"Unknown section: {words} (line {n_line}).") from e
if n_section < n_exp:
raise Exception(
raise AttributeError(
f"Section {words[0]} (line {n_line}) is misplaced or duplicated."
)
return n_section
Expand Down Expand Up @@ -570,11 +570,11 @@ def add_bnd(self, words, n_line):
if typ in bnd_type1: # bound-types that require a value
try:
val = float(words[pos_name + 1])
except ValueError:
print(
except ValueError as e:
raise ValueError(
f"BOUND value {words[pos_name + 1]} (line {n_line}) is not a "
"number."
)
) from e
at_pos = bnd_type1.get(typ)
if at_pos == 3: # set both bounds
attr[1] = attr[2] = val
Expand All @@ -588,12 +588,12 @@ def add_bnd(self, words, n_line):
else:
attr[at_pos] = self.infty
elif typ in bnd_type3:
raise Exception(
raise TypeError(
f"Bound type {typ} of integer var. (line {n_line}) not"
" processed yet."
)
else:
raise Exception(f"Unknown bound type {typ} (line {n_line}).")
raise TypeError(f"Unknown bound type {typ} (line {n_line}).")
self.seq_col.update({col_seq: attr}) # store the updated col-attributes
self.n_bounds += 1

Expand Down Expand Up @@ -673,9 +673,9 @@ def row_att(self, row_seq, row_name, row_type, sec_name, val=0.0):
# f" {attr}."
# )
else: # update row attributes (used in RHS and ranges sections)
raise Exception(f"row_att() should not be called for {sec_name=}.")
raise SyntaxError(f"row_att() should not be called for {sec_name=}.")

def stat(self, lo_tail=-7, up_tail=6):
def print_statistics(self, lo_tail: int = -7, up_tail: int = 6):
"""Basic statistics of the matrix coefficients.
Focus on distributions of magnitudes of non-zero coeff. represented by values
Expand Down Expand Up @@ -754,7 +754,7 @@ def stat(self, lo_tail=-7, up_tail=6):
f" {self.mat.loc[self.mat['log'] == val]['log'].count()}"
)

def out_loc(self, small=True, thresh=-7, max_rec=500):
def locate_outliers(self, small: bool = True, thresh: int = -7, max_rec: int = 500):
"""Locations of outliers, i.e., elements having small/large coeff values.
Locations of outliers (in the term of the matrix coefficient values).
Expand Down Expand Up @@ -789,14 +789,14 @@ def out_loc(self, small=True, thresh=-7, max_rec=500):
) # sort the df with outliers ascending seq_id of rows
df1.reset_index()
col_out = [] # col_seq of outliers' cols
for n_rows, (indx, row) in enumerate(df1.iterrows()):
for n_rows, (_, row) in enumerate(df1.iterrows()):
assert (
n_rows < max_rec
), "To process all requested coeffs modify the safety limit assertion."
row_seq, row_name = self.ent_inf(
row_seq, row_name = self.get_entity_info(
row, True
) # row seq_id and name of the current coeff.
col_seq, col_name = self.ent_inf(
col_seq, col_name = self.get_entity_info(
row, False
) # col seq_id and name of the current coeff.
if col_seq not in col_out:
Expand All @@ -813,12 +813,12 @@ def out_loc(self, small=True, thresh=-7, max_rec=500):
] # df with all elements
# print(f'matrix elements in the same row:\n{df_row}')
print(
f"\tRow {row_name} {self.ent_range(row_seq, True)} has"
f"\tRow {row_name} {self.get_entity_range(row_seq, True)} has"
f" {df_row_out['log'].count()} outlier-coeff. of magnitudes in"
f" [{df_row_out['log'].min()}, {df_row_out['log'].max()}]"
)
print(
f"\tRow {row_name} {self.ent_range(row_seq, True)} has"
f"\tRow {row_name} {self.get_entity_range(row_seq, True)} has"
f" {df_row_all['log'].count()} (all)-coeff. of magnitudes in"
f" [{df_row_all['log'].min()}, {df_row_all['log'].max()}]"
)
Expand All @@ -827,7 +827,7 @@ def out_loc(self, small=True, thresh=-7, max_rec=500):
# df with outliers in the same col:
# df_col = df1.loc[df1['col'] == col_seq]
# print(
# f"\tCol {col_name} {self.ent_range(col_seq, False)} has "
# f"\tCol {col_name} {self.get_entity_range(col_seq, False)} has "
# f"{df_col["log"].count()} outlier coeff. of magnitudes in "
# f"[{df_col["log"].min()}, {df_col["log"].max()}]"
# )
Expand All @@ -842,12 +842,14 @@ def out_loc(self, small=True, thresh=-7, max_rec=500):
self.mat["col"] == col_seq
] # df with elements in the same col
print(
f"\tCol {col_name} {self.ent_range(col_seq, False)} has"
f"\tCol {col_name} {self.get_entity_range(col_seq, False)} has"
f" {df_col['log'].count()} coeff. of magnitudes in"
f" [{df_col['log'].min()}, {df_col['log'].max()}]"
)

def ent_inf(self, mat_row, by_row=True) -> typing.Tuple[int, str]:
def get_entity_info(
self, mat_row: pd.Series, by_row: bool = True
) -> typing.Tuple[int, str]:
"""Return info on the entity (either row or col) defining the selected matrix
coefficient.
Expand All @@ -874,7 +876,7 @@ def ent_inf(self, mat_row, by_row=True) -> typing.Tuple[int, str]:
name = self.seq_col.get(ent_seq)[0]
return ent_seq, name

def ent_range(self, seq_id, by_row=True) -> str:
def get_entity_range(self, seq_id: int, by_row: bool = True) -> str:
"""Return formatted string representing ranges of feasible values of either a
row or a column.
Expand Down Expand Up @@ -915,3 +917,4 @@ def ent_range(self, seq_id, by_row=True) -> str:
def plot_hist(self):
"""Plot histograms."""
# todo: might not be needed; therefore the implementation postponed
pass
52 changes: 22 additions & 30 deletions message_ix/tools/lp_diag/lpdiag.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,12 +66,9 @@ def read_args():
work_dir = os.getcwd()
print(f"work_dir: '{work_dir}'.")
tstart = dt.now()
# print('Started at:', str(tstart))

# Retrieve and assign arguments
args = read_args()
# dir2 = os.getcwd()
# print(f"{dir2 =}")
w_dir = args.wdir or "."
prob_id = args.mps or "test_mps/aez" # default MPS for testing
# alternative specs of test-MPS commented below
Expand All @@ -84,18 +81,16 @@ def read_args():
print(f"Changing work-directory to: {w_dir}.")
try:
os.chdir(w_dir)
except OSError:
print(f"Cannot change work-directory to: {w_dir}.")
# dir3 = os.getcwd()
# print(f"{dir3 =}")
except OSError as e:
raise OSError(f"Cannot change work-directory to: {w_dir}.") from e
assert isfile(prob_id), (
f"MPS file {prob_id} not accessible from the work-directory:\n'{work_dir}'."
"\nTry to use the --wdir command option to set the work-directory."
)
assert access(prob_id, R_OK), f"MPS file {prob_id} is not readable."

# large (1+ GB) MPSs files, shall not be posted to gitHub.
# app was tested on two (1+ GB) MPSs posted by Oliver in /t/fricko on Feb 16, 2023:
# app was tested on two (1+ GB) MPSs posted by OFR in /t/fricko on Feb 16, 2023:
# OFR_test_led_barrier.mps
# baseline_barrier.mps

Expand All @@ -110,40 +105,37 @@ def read_args():

default_stdout = sys.stdout
if fn_outp:
# fn_out = "./" + repdir + prob_id + ".txt" # file for redirected stdout
print(f"Stdout redirected to: {fn_outp}")
f_out = open(fn_outp, "w")
sys.stdout = f_out
# else: # defined to avoid warnings (only used when redir_stdo == True)
# fn_out = "foo"
# f_out = open(fn_out, "w")

lp = LPdiag() # LPdiag ctor
lp.rd_mps(prob_id) # read MPS, store the matrix in dataFrame
lp.stat(lo_tail=-7, up_tail=5) # stats of matrix coeffs, incl. distrib. tails
# to get numbers of coeffs for each magnitute specify equal/overlapping tails:
# lp.stat(lo_tail=0, up_tail=0)
lp.out_loc(small=True, thresh=-7, max_rec=100) # locations of small-value outliers
lp.out_loc(small=False, thresh=6, max_rec=500) # locations of large-value outliers
# lp.out_loc(small=True, thresh=-1, max_rec=100) # test (lotfi) small-value outliers
# lp.out_loc(small=False, thresh=2, max_rec=500) # test (lotfi) large-value outliers

tend = dt.now()
time_diff = tend - tstart
print("\nStarted at: ", str(tstart))
print("Finished at:", str(tend))
print(f"Wall-clock execution time: {time_diff.seconds} sec.")
lp.read_mps(prob_id) # read MPS, store the matrix in dataFrame
lp.print_statistics(
lo_tail=-7, up_tail=5
) # stats of matrix coeffs, incl. distrib. tails
# To get numbers of coeffs for each magnitute specify equal/overlapping tails:
# lp.print_statistics(lo_tail=0, up_tail=0)
lp.locate_outliers(
small=True, thresh=-7, max_rec=100
) # locations of small-value outliers
lp.locate_outliers(
small=False, thresh=6, max_rec=500
) # locations of large-value outliers

if fn_outp: # close the redirected output
# noinspection PyUnboundLocalVariable
f_out.close()
sys.stdout = default_stdout
print(f"\nRedirected stdout stored in {fn_outp}. Now writing to the console.")
print("\nStarted at: ", str(tstart))
print("Finished at:", str(tend))
print(f"Wall-clock execution time: {time_diff.seconds} sec.")

tend = dt.now()
time_diff = tend - tstart
print("\nStarted at: ", str(tstart))
print("Finished at:", str(tend))
print(f"Wall-clock execution time: {time_diff.seconds} sec.")

# todo: TBD, if the MPS-standard should be observed; should it cause error or info
# in particular, range of values: 10^{-10} < abs(val) < 10^{10}
# todo: naive scaling? might not be informative due to the later preprocessing
# todo: plots of distributions of coeffs, if indeed usefull
# todo: plots of distributions of coeffs, if indeed useful

0 comments on commit e6950bf

Please sign in to comment.