|
1 |
| -from nnpdf_data.filter_utils.hera_utils import commondata, covmat_is_close |
2 |
| -from pathlib import Path |
3 | 1 | from dataclasses import dataclass
|
| 2 | +from os import PathLike |
| 3 | +from pathlib import Path |
4 | 4 | import typing
|
5 | 5 | from typing import List
|
| 6 | + |
6 | 7 | import numpy as np
|
7 | 8 | import pandas as pd
|
8 |
| -from os import PathLike |
9 | 9 | import yaml
|
10 | 10 |
|
| 11 | +from nnpdf_data.filter_utils.hera_utils import commondata, covmat_is_close |
| 12 | + |
| 13 | + |
11 | 14 | def mergetables() -> pd.DataFrame:
|
12 | 15 |
|
13 |
| - table_paths = [] |
14 |
| - for i in range(1,8): |
15 |
| - table_paths.append(Path(f"./rawdata/Table{i}.csv")) |
| 16 | + table_paths = [] |
| 17 | + for i in range(1, 8): |
| 18 | + table_paths.append(Path(f"./rawdata/Table{i}.csv")) |
| 19 | + |
| 20 | + # List with the rapidity bins for tables 1 to 7. |
| 21 | + yrap = [-0.2, -0.1, 0.0, 0.1, 0.2, 0.3, 0.4] |
16 | 22 |
|
17 |
| - # List with the rapidity bins for tables 1 to 7. |
18 |
| - yrap = [-0.2, -0.1, 0.0, 0.1, 0.2, 0.3, 0.4] |
| 23 | + col_names = ["M2", "dsig", "statp", "statm", "normp", "normm", "sysp", "sysm"] |
| 24 | + col_names_all = col_names + ["y", "sqrts"] |
19 | 25 |
|
20 |
| - col_names = ["M2","dsig","statp","statm","normp","normm","sysp","sysm"] |
21 |
| - col_names_all = col_names + ["y", "sqrts"] |
| 26 | + combined_df = pd.DataFrame(columns=col_names_all) |
| 27 | + for i, path in enumerate(table_paths): |
| 28 | + df = pd.read_csv(path, header=11, names=col_names) |
| 29 | + df["y"] = yrap[i] |
| 30 | + df["sqrts"] = 38.8 |
| 31 | + df = df[pd.to_numeric(df['dsig'], errors='coerce').notnull()] |
| 32 | + combined_df = pd.concat([combined_df, df], ignore_index=True) |
22 | 33 |
|
23 |
| - combined_df = pd.DataFrame(columns=col_names_all) |
24 |
| - for i, path in enumerate(table_paths): |
25 |
| - df = pd.read_csv(path, header=11, names=col_names) |
26 |
| - df["y"]=yrap[i] |
27 |
| - df["sqrts"]=38.8 |
28 |
| - df = df[pd.to_numeric(df['dsig'], errors='coerce').notnull()] |
29 |
| - combined_df = pd.concat([combined_df,df],ignore_index=True) |
| 34 | + # In the table we have sqrt(tau) not M2; compute M2=tau*s |
| 35 | + combined_df["M2"] = (combined_df["M2"] * 38.8) ** 2 |
30 | 36 |
|
31 |
| - # In the table we have sqrt(tau) not M2; compute M2=tau*s |
32 |
| - combined_df["M2"] = (combined_df["M2"]*38.8)**2 |
| 37 | + return combined_df |
33 | 38 |
|
34 |
| - return combined_df |
35 | 39 |
|
36 | 40 | def nuclear_uncert_dw(tableN: PathLike, tablep: PathLike):
|
37 |
| - dfN = pd.read_table(tableN) |
38 |
| - dfp = pd.read_table(tablep) |
39 |
| - return dfN, dfp |
| 41 | + dfN = pd.read_table(tableN) |
| 42 | + dfp = pd.read_table(tablep) |
| 43 | + return dfN, dfp |
| 44 | + |
40 | 45 |
|
41 | 46 | @dataclass
|
42 | 47 | class E605_commondata(commondata):
|
43 |
| - def __init__(self, data: pd.DataFrame, dataset_name: str, process: str): |
| 48 | + def __init__(self, data: pd.DataFrame, dataset_name: str, process: str): |
44 | 49 |
|
45 |
| - # Kinematic quantities. |
46 |
| - self.central_values = data["dsig"].astype(float).to_numpy() |
47 |
| - self.kinematics = data[["y", "M2", "sqrts"]].astype(float).to_numpy() |
48 |
| - self.kinematic_quantities = ["y", "M2", "sqrts"] |
| 50 | + # Kinematic quantities. |
| 51 | + self.central_values = data["dsig"].astype(float).to_numpy() |
| 52 | + self.kinematics = data[["y", "M2", "sqrts"]].astype(float).to_numpy() |
| 53 | + self.kinematic_quantities = ["y", "M2", "sqrts"] |
49 | 54 |
|
50 |
| - # Statistical uncertainties. |
51 |
| - self.statistical_uncertainties = data["statp"] |
| 55 | + # Statistical uncertainties. |
| 56 | + self.statistical_uncertainties = data["statp"] |
52 | 57 |
|
53 |
| - # the overall 10% statistical uncertainty is treated as |
54 |
| - # additive, while normalisation uncertainty is always treated |
55 |
| - # multiplicatively |
56 |
| - syst = pd.DataFrame(0.1 * self.central_values) |
| 58 | + # the overall 10% statistical uncertainty is treated as |
| 59 | + # additive, while normalisation uncertainty is always treated |
| 60 | + # multiplicatively |
| 61 | + syst = pd.DataFrame(0.1 * self.central_values) |
57 | 62 |
|
58 |
| - # Systematic uncertainties. |
59 |
| - syst["norm"] = (self.central_values |
60 |
| - *data["normp"].str.strip("%").astype(float)/100) |
| 63 | + # Systematic uncertainties. |
| 64 | + syst["norm"] = self.central_values * data["normp"].str.strip("%").astype(float) / 100 |
61 | 65 |
|
| 66 | + # self.systematic_uncertainties = np.dstack((stat,norm))[0] |
| 67 | + self.systypes = [("ADD", "UNCORR"), ("MULT", "CORR")] |
62 | 68 |
|
63 |
| - #self.systematic_uncertainties = np.dstack((stat,norm))[0] |
64 |
| - self.systypes = [("ADD","UNCORR"),("MULT", "CORR")] |
| 69 | + # Compute the point-to-point uncertainties |
| 70 | + nrep = 999 |
| 71 | + norm = np.sqrt(nrep) |
| 72 | + dfN, dfp = nuclear_uncert_dw( |
| 73 | + "rawdata/nuclear/output/tables/group_result_table.csv", |
| 74 | + "rawdata/proton_ite/output/tables/group_result_table.csv", |
| 75 | + ) |
65 | 76 |
|
66 |
| - # Compute the point-to-point uncertainties |
67 |
| - nrep=999 |
68 |
| - norm=np.sqrt(nrep) |
69 |
| - dfN, dfp = nuclear_uncert_dw("rawdata/nuclear/output/tables/group_result_table.csv", |
70 |
| - "rawdata/proton_ite/output/tables/group_result_table.csv") |
| 77 | + for rep in range(1, nrep + 1): |
| 78 | + Delta = (dfN[f"rep_{rep:05d}"] - dfp["theory_central"]) / norm |
| 79 | + syst[f"NUCLEAR{rep:05d}"] = Delta |
| 80 | + self.systypes.append(("ADD", f"NUCLEAR{rep:05d}")) |
71 | 81 |
|
72 |
| - for rep in range(1,nrep+1): |
73 |
| - Delta = (dfN[f"rep_{rep:05d}"]-dfp["theory_central"])/norm |
74 |
| - syst[f"NUCLEAR{rep:05d}"]=Delta |
75 |
| - self.systypes.append(("ADD", f"NUCLEAR{rep:05d}")) |
| 82 | + self.systematic_uncertainties = syst.to_numpy() |
76 | 83 |
|
77 |
| - self.systematic_uncertainties = syst.to_numpy() |
| 84 | + self.process = process |
| 85 | + self.dataset_name = dataset_name |
78 | 86 |
|
79 |
| - self.process = process |
80 |
| - self.dataset_name = dataset_name |
81 | 87 |
|
82 | 88 | def main():
|
83 |
| - data = mergetables() |
84 |
| - # First create the commondata variant without the nuclear uncertainties. |
85 |
| - DYE605 = E605_commondata(data, "DYE605_Z0_38P8GEV", "Z0") |
86 |
| - DYE605.write_new_commondata(Path("data_reimplemented_PXSEC.yaml"), |
87 |
| - Path("kinematics_reimplemented_PXSEC.yaml"), |
88 |
| - Path("uncertainties_reimplemented_PXSEC.yaml")) |
89 |
| - if(covmat_is_close("DYE605_Z0_38P8GEV_DW_PXSEC", "legacy", "reimplemented")): |
90 |
| - print("covmat is close") |
91 |
| - else: |
92 |
| - print("covmat is different.") |
93 |
| - |
94 |
| -if __name__ == "__main__": |
95 |
| - main() |
96 |
| - |
97 |
| - |
| 89 | + data = mergetables() |
| 90 | + # First create the commondata variant without the nuclear uncertainties. |
| 91 | + DYE605 = E605_commondata(data, "DYE605_Z0_38P8GEV", "Z0") |
| 92 | + DYE605.write_new_commondata( |
| 93 | + Path("data_reimplemented_PXSEC.yaml"), |
| 94 | + Path("kinematics_reimplemented_PXSEC.yaml"), |
| 95 | + Path("uncertainties_reimplemented_PXSEC.yaml"), |
| 96 | + ) |
| 97 | + if covmat_is_close("DYE605_Z0_38P8GEV_DW_PXSEC", "legacy", "reimplemented"): |
| 98 | + print("covmat is close") |
| 99 | + else: |
| 100 | + print("covmat is different.") |
98 | 101 |
|
99 | 102 |
|
| 103 | +if __name__ == "__main__": |
| 104 | + main() |
0 commit comments