Skip to content

Commit

Permalink
FIX: Refactor PhaseRegion; better stoichiometric phase support (#185)
Browse files Browse the repository at this point in the history
* Make `PhaseRegion` a dataclass
* Introduce a `RegionVertex` dataclass instead of storing the vertex data in lists of `PhaseRegion`
* Fix a regression in ZPF data introduced by #151 where prescribed phase compositions of stoichiometric phases that used to work no longer work because the phase composition of a stoichiometric phase may be unsatisfiable. Now there's a check for stoichiometric phases that will not try to solve for the points.
* Fixes a bug where stoichiometric phases used `equilibrium` in driving force calculations which could give bad energies for exact equilibrium where mass balance could not be satisfied (approximate was not affected). Now exact and approximate equilibrium use driving force estimation via `calculate` which is always exact for stoichiometric phases because they have exactly one point.
  • Loading branch information
bocklund authored May 8, 2021
1 parent 7684418 commit 0b7b660
Show file tree
Hide file tree
Showing 3 changed files with 92 additions and 81 deletions.
2 changes: 2 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ Improvements
------------
* Fix a bug where excluded model contributions could be double counted (`@bocklund`_ - :issue:`181`)
* Support internal API changes for pycalphad 0.8.5 (`@bocklund`_ - :issue:`183`)
* Fix a regression for ZPF error calculations introduced in :issue:`181` where prescribed phase compositions of stoichiometric phases that used to work no longer work because the phase composition of a stoichiometric phase may be unsatisfiable (`@bocklund`_ - :issue:`185`).
* Fix a bug in ZPF error calculations where stoichiometric phases could give incorrect energies for exact equilibrium when prescribed mass balance conditions could not be satisfied. The fix now computes the driving force exactly in all cases for stoichiometric compounds. (`@bocklund`_ - :issue:`185`)

0.8.2 (2021-05-05)
==================
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@ def get_thermochemical_data(dbf, comps, phases, datasets, weight_dict=None, symb
desired_data = get_prop_data(comps, phase_name, prop, datasets, additional_query=(where('solver').exists()))
if len(desired_data) == 0:
continue
unique_exclusions = set([tuple(sorted(d.get('excluded_model_contributions', []))) for d in desired_data])
unique_exclusions = set([tuple(sorted(set(d.get('excluded_model_contributions', [])))) for d in desired_data])
for exclusion in unique_exclusions:
data_dict = {
'phase_name': phase_name,
Expand Down
169 changes: 89 additions & 80 deletions espei/error_functions/zpf_error.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,9 @@

import logging
import warnings
from dataclasses import dataclass
from collections import OrderedDict
from typing import Sequence, Dict, NamedTuple, Any, Union, List, Tuple
from typing import Sequence, Dict, Any, Union, List, Tuple

import numpy as np
from numpy.typing import ArrayLike
Expand All @@ -36,6 +37,29 @@
_log = logging.getLogger(__name__)


@dataclass
class RegionVertex:
phase_name: str
comp_conds: Dict[v.X, float]
points: ArrayLike
phase_records: Dict[str, PhaseRecord]
is_disordered: bool
has_missing_comp_cond: bool

@dataclass
class PhaseRegion:
vertices: Sequence[RegionVertex]
potential_conds: Dict[v.StateVariable, float]
dbf: Database
species: Sequence[v.Species]
phases: Sequence[str]
models: Dict[str, Model]

def eq_str(self):
phase_compositions = ', '.join(f'{vtx.phase_name}: {vtx.comp_conds}' for vtx in self.vertices)
return f"conds: ({self.potential_conds}), comps: ({phase_compositions})"


def _safe_index(items, index):
try:
return items[index]
Expand Down Expand Up @@ -188,7 +212,7 @@ def _sample_solution_constitution(mod: Model, soln: Dict[v.Y, Union[sympy.Expr,
return points


def extract_conditions(all_conditions: Dict[v.StateVariable, np.ndarray], idx: int) -> Dict[v.StateVariable, float]:
def _extract_pot_conds(all_conditions: Dict[v.StateVariable, np.ndarray], idx: int) -> Dict[v.StateVariable, float]:
"""Conditions are either scalar or 1d arrays for the conditions in the entire dataset.
This function extracts the condition corresponding to the current region,
based on the index in the 1d condition array.
Expand All @@ -204,38 +228,29 @@ def extract_conditions(all_conditions: Dict[v.StateVariable, np.ndarray], idx: i
return pot_conds


def extract_phases_comps(phase_region):
"""Extract the phase names, phase compositions and any phase flags from
each tie-line point in the phase region
def _extract_phases_comps(vertex):
"""Extract the phase name, phase compositions and disordered flag from a vertex
"""
region_phases = []
region_comp_conds = []
phase_flags = []
for tie_point in phase_region:
if len(tie_point) == 4: # phase_flag within
phase_name, components, compositions, flag = tie_point
elif len(tie_point) == 3: # no phase_flag within
phase_name, components, compositions = tie_point
flag = None
if len(vertex) == 4: # phase_flag within
phase_name, components, compositions, flag = vertex
if flag == "disordered":
disordered_flag = True
else:
raise ValueError("Wrong number of data in tie-line point")
region_phases.append(phase_name)
region_comp_conds.append(dict(zip(map(v.X, map(lambda x: x.upper(), components)), compositions)))
phase_flags.append(flag)
return region_phases, region_comp_conds, phase_flags


PhaseRegion = NamedTuple('PhaseRegion', (('region_phases', Sequence[str]),
('potential_conds', Dict[v.StateVariable, float]),
('comp_conds', Sequence[Dict[v.X, float]]),
('phase_points', Sequence[ArrayLike]),
('phase_flags', Sequence[str]),
('dbf', Database),
('species', Sequence[v.Species]),
('phases', Sequence[str]),
('models', Dict[str, Model]),
('phase_records', Sequence[Dict[str, PhaseRecord]]),
))
disordered_flag = False
elif len(vertex) == 3: # no phase_flag within
phase_name, components, compositions = vertex
disordered_flag = False
else:
raise ValueError("Wrong number of data in tie-line point")
comp_conds = dict(zip(map(v.X, map(str.upper, components)), compositions))
return phase_name, comp_conds, disordered_flag


def _phase_is_stoichiometric(dbf, species, phase_name):
phase_constituents = dbf.phases[phase_name].constituents
# phase constituents must be filtered to only active:
constituents = [[sp.name for sp in sorted(subl_constituents.intersection(species))] for subl_constituents in phase_constituents]
return all(len(subl) == 1 for subl in constituents)


def get_zpf_data(dbf: Database, comps: Sequence[str], phases: Sequence[str], datasets: PickleableTinyDB, parameters: Dict[str, float]):
Expand Down Expand Up @@ -276,26 +291,31 @@ def get_zpf_data(dbf: Database, comps: Sequence[str], phases: Sequence[str], dat
# Each phase_region is one set of phases in equilibrium (on a tie-line),
# e.g. [["ALPHA", ["B"], [0.25]], ["BETA", ["B"], [0.5]]]
for idx, phase_region in enumerate(all_regions):
# We need to construct a PhaseRegion by matching up phases/compositions to the conditions
# Extract the conditions for entire phase region
region_potential_conds = extract_conditions(conditions, idx)
region_potential_conds[v.N] = region_potential_conds.get(v.N) or 1.0 # Add v.N condition, if missing
pot_conds = _extract_pot_conds(conditions, idx)
pot_conds.setdefault(v.N, 1.0) # Add v.N condition, if missing
# Extract all the phases and compositions from the tie-line points
region_phases, region_comp_conds, phase_flags = extract_phases_comps(phase_region)
# Construct single-phase points satisfying the conditions for each phase in the region
region_phase_points = []
for phase_name, comp_conds in zip(region_phases, region_comp_conds):
vertices = []
for vertex in phase_region:
phase_name, comp_conds, disordered_flag = _extract_phases_comps(vertex)
phase_recs = build_phase_records(dbf, species, data_phases, {**pot_conds, **comp_conds}, models, parameters=parameters, build_gradients=True, build_hessians=True)
# Construct single-phase points satisfying the conditions for each phase in the region
if any(val is None for val in comp_conds.values()):
# We can't construct points because we don't have a known composition
region_phase_points.append(None)
continue
mod = models[phase_name]
sitefrac_soln = _solve_sitefracs_composition(mod, comp_conds)
phase_points = _sample_solution_constitution(mod, sitefrac_soln)
region_phase_points.append(phase_points)
region_phase_records = [build_phase_records(dbf, species, data_phases, {**region_potential_conds, **comp_conds}, models, parameters=parameters, build_gradients=True, build_hessians=True)
for comp_conds in region_comp_conds]
phase_regions.append(PhaseRegion(region_phases, region_potential_conds, region_comp_conds, region_phase_points, phase_flags, dbf, species, data_phases, models, region_phase_records))
has_missing_comp_cond = True
phase_points = None
elif _phase_is_stoichiometric(dbf, species, phase_name):
has_missing_comp_cond = False
phase_points = None
else:
has_missing_comp_cond = False
mod = models[phase_name]
sitefrac_soln = _solve_sitefracs_composition(mod, comp_conds)
phase_points = _sample_solution_constitution(mod, sitefrac_soln)
vtx = RegionVertex(phase_name, comp_conds, phase_points, phase_recs, disordered_flag, has_missing_comp_cond)
vertices.append(vtx)
region = PhaseRegion(vertices, pot_conds, dbf, species, data_phases, models)
phase_regions.append(region)

data_dict = {
'weight': data.get('weight', 1.0),
Expand Down Expand Up @@ -330,14 +350,11 @@ def estimate_hyperplane(phase_region: PhaseRegion, parameters: np.ndarray, appro
species = phase_region.species
phases = phase_region.phases
models = phase_region.models
for comp_conds, phase_flag, phase_records in zip(phase_region.comp_conds, phase_region.phase_flags, phase_region.phase_records):
# We are now considering a particular tie vertex
for vertex in phase_region.vertices:
phase_records = vertex.phase_records
update_phase_record_parameters(phase_records, parameters)
cond_dict = {**comp_conds, **phase_region.potential_conds}
for key, val in cond_dict.items():
if val is None:
cond_dict[key] = np.nan
if np.any(np.isnan(list(cond_dict.values()))):
cond_dict = {**vertex.comp_conds, **phase_region.potential_conds}
if vertex.has_missing_comp_cond:
# This composition is unknown -- it doesn't contribute to hyperplane estimation
pass
else:
Expand All @@ -362,7 +379,7 @@ def estimate_hyperplane(phase_region: PhaseRegion, parameters: np.ndarray, appro


def driving_force_to_hyperplane(target_hyperplane_chempots: np.ndarray, comps: Sequence[str],
phase_region: PhaseRegion, vertex_idx: int,
phase_region: PhaseRegion, vertex: RegionVertex,
parameters: np.ndarray, approximate_equilibrium: bool = False) -> float:
"""Calculate the integrated driving force between the current hyperplane and target hyperplane.
"""
Expand All @@ -372,24 +389,21 @@ def driving_force_to_hyperplane(target_hyperplane_chempots: np.ndarray, comps: S
_equilibrium = equilibrium_
dbf = phase_region.dbf
species = phase_region.species
phases = phase_region.phases
models = phase_region.models
current_phase = phase_region.region_phases[vertex_idx]
cond_dict = {**phase_region.potential_conds, **phase_region.comp_conds[vertex_idx]}
current_phase = vertex.phase_name
cond_dict = {**phase_region.potential_conds, **vertex.comp_conds}
str_statevar_dict = OrderedDict([(str(key),cond_dict[key]) for key in sorted(phase_region.potential_conds.keys(), key=str)])
phase_points = phase_region.phase_points[vertex_idx]
phase_flag = phase_region.phase_flags[vertex_idx]
phase_records = phase_region.phase_records[vertex_idx]
phase_points = vertex.points
phase_records = vertex.phase_records
update_phase_record_parameters(phase_records, parameters)
for key, val in cond_dict.items():
if val is None:
cond_dict[key] = np.nan
if np.any(np.isnan(list(cond_dict.values()))):
# We don't actually know the phase composition here, so we estimate it
if phase_points is None:
# We don't have the phase composition here, so we estimate the driving force.
# Can happen if one of the composition conditions is unknown or if the phase is
# stoichiometric and the user did not specify a valid phase composition.
single_eqdata = calculate_(dbf, species, [current_phase], str_statevar_dict, models, phase_records, pdens=500)
df = np.multiply(target_hyperplane_chempots, single_eqdata.X).sum(axis=-1) - single_eqdata.GM
driving_force = float(df.max())
elif phase_flag == 'disordered':
elif vertex.is_disordered:
# Construct disordered sublattice configuration from composition dict
# Compute energy
# Compute residual driving force
Expand Down Expand Up @@ -430,11 +444,6 @@ def driving_force_to_hyperplane(target_hyperplane_chempots: np.ndarray, comps: S
return driving_force


def _format_phase_compositions(phase_region):
phase_comp_cond_pairs = zip(phase_region.region_phases, phase_region.comp_conds)
phase_compositions = ', '.join(f'{ph}: {c}' for ph, c in phase_comp_cond_pairs)
return f"conds: ({phase_region.potential_conds}), comps: ({phase_compositions})"

def calculate_zpf_driving_forces(zpf_data: Sequence[Dict[str, Any]],
parameters: ArrayLike = None,
approximate_equilibrium: bool = False,
Expand Down Expand Up @@ -482,25 +491,25 @@ def calculate_zpf_driving_forces(zpf_data: Sequence[Dict[str, Any]],
# for the set of phases and corresponding tie-line verticies in equilibrium
for phase_region in data['phase_regions']:
# 1. Calculate the average multiphase hyperplane
eq_str = _format_phase_compositions(phase_region)
eq_str = phase_region.eq_str()
target_hyperplane = estimate_hyperplane(phase_region, parameters, approximate_equilibrium=approximate_equilibrium)
if np.any(np.isnan(target_hyperplane)):
_log.debug('NaN target hyperplane. Equilibria: (%s), driving force: 0.0, reference: %s.', eq_str, dataset_ref)
data_driving_forces.extend([0]*len(phase_region.comp_conds))
data_weights.extend([weight]*len(phase_region.comp_conds))
data_driving_forces.extend([0]*len(phase_region.vertices))
data_weights.extend([weight]*len(phase_region.vertices))
continue
# 2. Calculate the driving force to that hyperplane for each vertex
for vertex_idx in range(len(phase_region.comp_conds)):
for vertex in phase_region.vertices:
driving_force = driving_force_to_hyperplane(target_hyperplane, data_comps,
phase_region, vertex_idx, parameters,
phase_region, vertex, parameters,
approximate_equilibrium=approximate_equilibrium,
)
if np.isinf(driving_force) and short_circuit:
_log.debug('Equilibria: (%s), current phase: %s, hyperplane: %s, driving force: %s, reference: %s. Short circuiting.', eq_str, phase_region.region_phases[vertex_idx], target_hyperplane, driving_force, dataset_ref)
_log.debug('Equilibria: (%s), current phase: %s, hyperplane: %s, driving force: %s, reference: %s. Short circuiting.', eq_str, vertex.phase_name, target_hyperplane, driving_force, dataset_ref)
return [[np.inf]], [[np.inf]]
data_driving_forces.append(driving_force)
data_weights.append(weight)
_log.debug('Equilibria: (%s), current phase: %s, hyperplane: %s, driving force: %s, reference: %s', eq_str, phase_region.region_phases[vertex_idx], target_hyperplane, driving_force, dataset_ref)
_log.debug('Equilibria: (%s), current phase: %s, hyperplane: %s, driving force: %s, reference: %s', eq_str, vertex.phase_name, target_hyperplane, driving_force, dataset_ref)
driving_forces.append(data_driving_forces)
weights.append(data_weights)
return driving_forces, weights
Expand Down

0 comments on commit 0b7b660

Please sign in to comment.