Skip to content

Commit

Permalink
formatting
Browse files Browse the repository at this point in the history
Signed-off-by: lc3267 <[email protected]>
  • Loading branch information
lc3267 committed Oct 15, 2024
1 parent 6991ca6 commit d0518a0
Show file tree
Hide file tree
Showing 17 changed files with 575 additions and 322 deletions.
66 changes: 40 additions & 26 deletions qallse/data_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,24 +26,24 @@ def __init__(self, hits: pd.DataFrame, truth: pd.DataFrame):

# add proper indexing
for df in [self.hits, self.truth]:
df['idx'] = df.hit_id.values
df.set_index('idx', inplace=True)
df["idx"] = df.hit_id.values
df.set_index("idx", inplace=True)

# add radius information
hits['r'] = np.linalg.norm(hits[['x', 'y']].values.T, axis=0)
hits["r"] = np.linalg.norm(hits[["x", "y"]].values.T, axis=0)

# keep a lookup of real doublets: '{hit_id_1}_{hit_id_2}' -> [hit_id_1, hit_id_2]
df = hits.join(truth, lsuffix='_')
df = hits.join(truth, lsuffix="_")
self._doublets = truth_to_xplets(hits, df[df.weight > 0], x=2)
self._unfocused = truth_to_xplets(hits, df[df.weight == 0], x=2)

self._lookup = dict(
[(self._get_dkey(*d), XpletType.REAL) for d in self._doublets] +
[(self._get_dkey(*d), XpletType.REAL_UNFOCUSED) for d in self._unfocused]
[(self._get_dkey(*d), XpletType.REAL) for d in self._doublets]
+ [(self._get_dkey(*d), XpletType.REAL_UNFOCUSED) for d in self._unfocused]
)

def _get_dkey(self, h1, h2):
return f'{h1}_{h2}'
return f"{h1}_{h2}"

def get_unfocused_doublets(self) -> List[TDoublet]:
return self._unfocused
Expand Down Expand Up @@ -80,12 +80,12 @@ def sample_qubo(self, Q: TQubo) -> TDimodSample:
sample = dict()
for (k1, k2), v in Q.items():
if k1 == k2:
subtrack = list(map(int, k1.split('_')))
subtrack = list(map(int, k1.split("_")))
sample[k1] = int(self.is_real_xplet(subtrack) != XpletType.FAKE)
return sample

def compute_energy(self, Q: TQubo, sample: Optional[TDimodSample] = None) -> float:
"""Compute the energy of a given sample. If sample is None, the ideal sample is used (see :py:meth:~`sample_qubo`). """
"""Compute the energy of a given sample. If sample is None, the ideal sample is used (see :py:meth:~`sample_qubo`)."""
if sample is None:
sample = self.sample_qubo(Q)
en = 0
Expand All @@ -96,17 +96,22 @@ def compute_energy(self, Q: TQubo, sample: Optional[TDimodSample] = None) -> flo

# =============== scoring

def get_score_numbers(self, doublets: Union[List, np.array, pd.DataFrame]) -> [float, float, float]:
def get_score_numbers(
self, doublets: Union[List, np.array, pd.DataFrame]
) -> [float, float, float]:
"""
:param doublets: a set of doublets
:return: the number of real, fake and missing doublets
"""
if isinstance(doublets, pd.DataFrame): doublets = doublets.values
if isinstance(doublets, pd.DataFrame):
doublets = doublets.values
doublets_found, _, unfocused_found = diff_rows(doublets, self._unfocused)
missing, fakes, real = diff_rows(self._doublets, doublets_found)
return len(real), len(fakes), len(missing)

def compute_score(self, doublets: Union[List, np.array, pd.DataFrame]) -> [float, float, List[List]]:
def compute_score(
self, doublets: Union[List, np.array, pd.DataFrame]
) -> [float, float, List[List]]:
"""
Precision and recall are defined as follow:
* precision (purity): how many doublets are correct ? `len(real ∈ doublets) / len(doublets)`
Expand All @@ -115,14 +120,15 @@ def compute_score(self, doublets: Union[List, np.array, pd.DataFrame]) -> [float
:param doublets: a set of doublets
:return: the precision, the recall and the list of missing doublets. p and r are between 0 and 1.
"""
if isinstance(doublets, pd.DataFrame): doublets = doublets.values
if isinstance(doublets, pd.DataFrame):
doublets = doublets.values
doublets_found, _, unfocused_found = diff_rows(doublets, self._unfocused)
missing, fakes, real = diff_rows(self._doublets, doublets_found)
return len(real) / len(doublets_found), \
len(real) / len(self._doublets), \
missing
return len(real) / len(doublets_found), len(real) / len(self._doublets), missing

def add_missing_doublets(self, doublets: Union[np.array, pd.DataFrame]) -> pd.DataFrame:
def add_missing_doublets(
self, doublets: Union[np.array, pd.DataFrame]
) -> pd.DataFrame:
"""
:param doublets: a list of doublets
:return: a list of doublets with 100% recall
Expand All @@ -131,19 +137,21 @@ def add_missing_doublets(self, doublets: Union[np.array, pd.DataFrame]) -> pd.Da
doublets = doublets.values

ip, ir, missing = self.compute_score(doublets)
print(f'got {len(doublets)} doublets.')
print(f' Input precision (%): {ip * 100:.4f}, recall (%): {ir * 100:.4f}')
print(f"got {len(doublets)} doublets.")
print(f" Input precision (%): {ip * 100:.4f}, recall (%): {ir * 100:.4f}")

if len(missing) == 0:
# nothing to do
return doublets
else:
ret = pd.DataFrame(np.vstack((doublets, missing)), columns=['start', 'end'])
ret = pd.DataFrame(np.vstack((doublets, missing)), columns=["start", "end"])
p, _, _ = self.compute_score(ret.values)
print(f' New precision (%): {p * 100:.4f}')
print(f" New precision (%): {p * 100:.4f}")
return ret

def compute_trackml_score(self, final_tracks: List[TXplet], submission=None) -> float:
def compute_trackml_score(
self, final_tracks: List[TXplet], submission=None
) -> float:
"""
:param final_tracks: a list of xplets representing tracks
:param submission: (optional) a TrackML submission, see :py:meth:~`create_submission`
Expand All @@ -159,9 +167,13 @@ def create_submission(self, tracks: List[TXplet], event_id=1000) -> pd.DataFrame
n_rows = len(hit_ids)
sub_data = np.column_stack(([event_id] * n_rows, hit_ids, np.zeros(n_rows)))
submission = pd.DataFrame(
data=sub_data, columns=["event_id", "hit_id", "track_id"], index=hit_ids, dtype=int)
data=sub_data,
columns=["event_id", "hit_id", "track_id"],
index=hit_ids,
dtype=int,
)
for idx, track in enumerate(tracks):
submission.loc[track, 'track_id'] = idx + 1
submission.loc[track, "track_id"] = idx + 1
return submission

# =============== class utils
Expand All @@ -172,5 +184,7 @@ def from_path(cls, path):
Create a DataWrapper by reading the hits and the truth from a path.
:path: the path + event id, in the format `/path/to/directory/eventXXXXX`
"""
path = path.replace('-hits.csv', '')
return cls(hits=pd.read_csv(path + '-hits.csv'), truth=pd.read_csv(path + '-truth.csv'))
path = path.replace("-hits.csv", "")
return cls(
hits=pd.read_csv(path + "-hits.csv"), truth=pd.read_csv(path + "-truth.csv")
)
2 changes: 1 addition & 1 deletion qallse/dsmaker/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
from .dsmaker import create_dataset
from .dsmaker import create_dataset
37 changes: 21 additions & 16 deletions qallse/other/dw_timing_recorder.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,27 +40,28 @@
import dimod

_INTERESTING_COMPUTATION_KEYS = [
'clock_diff', # difference in seconds between the client-server UTC clocks
'time_created', # client-side: request created
'time_received', # server-side: request received
'time_solved', # server-side: response sent
'time_resolved' # client-side: response received
"clock_diff", # difference in seconds between the client-server UTC clocks
"time_created", # client-side: request created
"time_received", # server-side: request received
"time_solved", # server-side: response sent
"time_resolved", # client-side: response received
]


class TimingRecord(dict):
"""Use this wrapper to simplify the handling of times."""

@property
def qpu_time(self):
return self['timing']['total_real_time'] * 1E-6 # in microseconds
return self["timing"]["total_real_time"] * 1e-6 # in microseconds

@property
def service_time(self):
return (self['time_solved'] - self['time_received']).total_seconds()
return (self["time_solved"] - self["time_received"]).total_seconds()

@property
def total_time(self):
return (self['time_resolved'] - self['time_created']).total_seconds()
return (self["time_resolved"] - self["time_created"]).total_seconds()

@property
def internet_latency(self):
Expand All @@ -73,18 +74,20 @@ def _result_to_response_hook_patch(variables, vartype):
def _hook(computation):
result = computation.result()
# get the samples. The future will return all spins so filter for the ones in variables
samples = [[sample[v] for v in variables] for sample in result.get('solutions')]
samples = [[sample[v] for v in variables] for sample in result.get("solutions")]
# the only two data vectors we're interested in are energies and num_occurrences
vectors = {'energy': result['energies']}
if 'num_occurrences' in result:
vectors['num_occurrences'] = result['num_occurrences']
vectors = {"energy": result["energies"]}
if "num_occurrences" in result:
vectors["num_occurrences"] = result["num_occurrences"]
# PATCH: record all interesting timing information
info = {}
for attr in _INTERESTING_COMPUTATION_KEYS:
info[attr] = getattr(computation, attr, None)
if 'timing' in result:
info['timing'] = result['timing']
return dimod.Response.from_samples(samples, vectors, info, vartype, variable_labels=variables)
if "timing" in result:
info["timing"] = result["timing"]
return dimod.Response.from_samples(
samples, vectors, info, vartype, variable_labels=variables
)

return _hook

Expand All @@ -107,18 +110,20 @@ def solver_with_timing(sampler: dimod.Sampler, **solver_kwargs):
return

import dwave.system.samplers.dwave_sampler as spl

original_hook = spl._result_to_response_hook
spl._result_to_response_hook = _result_to_response_hook_patch

records = []

try:

def dimod_callback(Q, best_state):
result = sampler.sample_qubo(Q, **solver_kwargs)
sample = next(result.samples())
for key, value in sample.items():
best_state[key] = value
result.info['q_size'] = len(Q)
result.info["q_size"] = len(Q)
records.append(result.info)
return best_state

Expand Down
Loading

0 comments on commit d0518a0

Please sign in to comment.