Skip to content

Commit 422f1d1

Browse files
Run pre-commit hooks
1 parent da07ef2 commit 422f1d1

File tree

4 files changed

+299
-259
lines changed

4 files changed

+299
-259
lines changed
Original file line numberDiff line numberDiff line change
@@ -1,99 +1,104 @@
1-
from nnpdf_data.filter_utils.hera_utils import commondata, covmat_is_close
2-
from pathlib import Path
31
from dataclasses import dataclass
2+
from os import PathLike
3+
from pathlib import Path
44
import typing
55
from typing import List
6+
67
import numpy as np
78
import pandas as pd
8-
from os import PathLike
99
import yaml
1010

11+
from nnpdf_data.filter_utils.hera_utils import commondata, covmat_is_close
12+
13+
1114
def mergetables() -> pd.DataFrame:
1215

13-
table_paths = []
14-
for i in range(1,8):
15-
table_paths.append(Path(f"./rawdata/Table{i}.csv"))
16+
table_paths = []
17+
for i in range(1, 8):
18+
table_paths.append(Path(f"./rawdata/Table{i}.csv"))
19+
20+
# List with the rapidity bins for tables 1 to 7.
21+
yrap = [-0.2, -0.1, 0.0, 0.1, 0.2, 0.3, 0.4]
1622

17-
# List with the rapidity bins for tables 1 to 7.
18-
yrap = [-0.2, -0.1, 0.0, 0.1, 0.2, 0.3, 0.4]
23+
col_names = ["M2", "dsig", "statp", "statm", "normp", "normm", "sysp", "sysm"]
24+
col_names_all = col_names + ["y", "sqrts"]
1925

20-
col_names = ["M2","dsig","statp","statm","normp","normm","sysp","sysm"]
21-
col_names_all = col_names + ["y", "sqrts"]
26+
combined_df = pd.DataFrame(columns=col_names_all)
27+
for i, path in enumerate(table_paths):
28+
df = pd.read_csv(path, header=11, names=col_names)
29+
df["y"] = yrap[i]
30+
df["sqrts"] = 38.8
31+
df = df[pd.to_numeric(df['dsig'], errors='coerce').notnull()]
32+
combined_df = pd.concat([combined_df, df], ignore_index=True)
2233

23-
combined_df = pd.DataFrame(columns=col_names_all)
24-
for i, path in enumerate(table_paths):
25-
df = pd.read_csv(path, header=11, names=col_names)
26-
df["y"]=yrap[i]
27-
df["sqrts"]=38.8
28-
df = df[pd.to_numeric(df['dsig'], errors='coerce').notnull()]
29-
combined_df = pd.concat([combined_df,df],ignore_index=True)
34+
# In the table we have sqrt(tau) not M2; compute M2=tau*s
35+
combined_df["M2"] = (combined_df["M2"] * 38.8) ** 2
3036

31-
# In the table we have sqrt(tau) not M2; compute M2=tau*s
32-
combined_df["M2"] = (combined_df["M2"]*38.8)**2
37+
return combined_df
3338

34-
return combined_df
3539

3640
def nuclear_uncert_dw(tableN: PathLike, tablep: PathLike):
37-
dfN = pd.read_table(tableN)
38-
dfp = pd.read_table(tablep)
39-
return dfN, dfp
41+
dfN = pd.read_table(tableN)
42+
dfp = pd.read_table(tablep)
43+
return dfN, dfp
44+
4045

4146
@dataclass
4247
class E605_commondata(commondata):
43-
def __init__(self, data: pd.DataFrame, dataset_name: str, process: str):
48+
def __init__(self, data: pd.DataFrame, dataset_name: str, process: str):
4449

45-
# Kinematic quantities.
46-
self.central_values = data["dsig"].astype(float).to_numpy()
47-
self.kinematics = data[["y", "M2", "sqrts"]].astype(float).to_numpy()
48-
self.kinematic_quantities = ["y", "M2", "sqrts"]
50+
# Kinematic quantities.
51+
self.central_values = data["dsig"].astype(float).to_numpy()
52+
self.kinematics = data[["y", "M2", "sqrts"]].astype(float).to_numpy()
53+
self.kinematic_quantities = ["y", "M2", "sqrts"]
4954

50-
# Statistical uncertainties.
51-
self.statistical_uncertainties = data["statp"]
55+
# Statistical uncertainties.
56+
self.statistical_uncertainties = data["statp"]
5257

53-
# the overall 10% statistical uncertainty is treated as
54-
# additive, while normalisation uncertainty is always treated
55-
# multiplicatively
56-
syst = pd.DataFrame(0.1 * self.central_values)
58+
# the overall 10% statistical uncertainty is treated as
59+
# additive, while normalisation uncertainty is always treated
60+
# multiplicatively
61+
syst = pd.DataFrame(0.1 * self.central_values)
5762

58-
# Systematic uncertainties.
59-
syst["norm"] = (self.central_values
60-
*data["normp"].str.strip("%").astype(float)/100)
63+
# Systematic uncertainties.
64+
syst["norm"] = self.central_values * data["normp"].str.strip("%").astype(float) / 100
6165

66+
# self.systematic_uncertainties = np.dstack((stat,norm))[0]
67+
self.systypes = [("ADD", "UNCORR"), ("MULT", "CORR")]
6268

63-
#self.systematic_uncertainties = np.dstack((stat,norm))[0]
64-
self.systypes = [("ADD","UNCORR"),("MULT", "CORR")]
69+
# Compute the point-to-point uncertainties
70+
nrep = 999
71+
norm = np.sqrt(nrep)
72+
dfN, dfp = nuclear_uncert_dw(
73+
"rawdata/nuclear/output/tables/group_result_table.csv",
74+
"rawdata/proton_ite/output/tables/group_result_table.csv",
75+
)
6576

66-
# Compute the point-to-point uncertainties
67-
nrep=999
68-
norm=np.sqrt(nrep)
69-
dfN, dfp = nuclear_uncert_dw("rawdata/nuclear/output/tables/group_result_table.csv",
70-
"rawdata/proton_ite/output/tables/group_result_table.csv")
77+
for rep in range(1, nrep + 1):
78+
Delta = (dfN[f"rep_{rep:05d}"] - dfp["theory_central"]) / norm
79+
syst[f"NUCLEAR{rep:05d}"] = Delta
80+
self.systypes.append(("ADD", f"NUCLEAR{rep:05d}"))
7181

72-
for rep in range(1,nrep+1):
73-
Delta = (dfN[f"rep_{rep:05d}"]-dfp["theory_central"])/norm
74-
syst[f"NUCLEAR{rep:05d}"]=Delta
75-
self.systypes.append(("ADD", f"NUCLEAR{rep:05d}"))
82+
self.systematic_uncertainties = syst.to_numpy()
7683

77-
self.systematic_uncertainties = syst.to_numpy()
84+
self.process = process
85+
self.dataset_name = dataset_name
7886

79-
self.process = process
80-
self.dataset_name = dataset_name
8187

8288
def main():
83-
data = mergetables()
84-
# First create the commondata variant without the nuclear uncertainties.
85-
DYE605 = E605_commondata(data, "DYE605_Z0_38P8GEV", "Z0")
86-
DYE605.write_new_commondata(Path("data_reimplemented_PXSEC.yaml"),
87-
Path("kinematics_reimplemented_PXSEC.yaml"),
88-
Path("uncertainties_reimplemented_PXSEC.yaml"))
89-
if(covmat_is_close("DYE605_Z0_38P8GEV_DW_PXSEC", "legacy", "reimplemented")):
90-
print("covmat is close")
91-
else:
92-
print("covmat is different.")
93-
94-
if __name__ == "__main__":
95-
main()
96-
97-
89+
data = mergetables()
90+
# First create the commondata variant without the nuclear uncertainties.
91+
DYE605 = E605_commondata(data, "DYE605_Z0_38P8GEV", "Z0")
92+
DYE605.write_new_commondata(
93+
Path("data_reimplemented_PXSEC.yaml"),
94+
Path("kinematics_reimplemented_PXSEC.yaml"),
95+
Path("uncertainties_reimplemented_PXSEC.yaml"),
96+
)
97+
if covmat_is_close("DYE605_Z0_38P8GEV_DW_PXSEC", "legacy", "reimplemented"):
98+
print("covmat is close")
99+
else:
100+
print("covmat is different.")
98101

99102

103+
if __name__ == "__main__":
104+
main()
Original file line numberDiff line numberDiff line change
@@ -1,76 +1,94 @@
1-
from nnpdf_data.filter_utils.hera_utils import commondata, covmat_is_close
2-
from pathlib import Path
31
from dataclasses import dataclass
2+
from os import PathLike
3+
from pathlib import Path
44
import typing
55
from typing import List
6+
67
import numpy as np
78
import pandas as pd
8-
from os import PathLike
99
import yaml
1010

11+
from nnpdf_data.filter_utils.hera_utils import commondata, covmat_is_close
12+
13+
1114
def readdata() -> pd.DataFrame:
12-
col_names = ["xF","Mmin","Mmax","Mavg","xFavg","pt","dsig","stat","syst","kfact","rsig","rstat","rsyst"]
13-
table_path = Path(f"./rawdata/table.csv")
14-
df = pd.read_csv(table_path,names=col_names)
15-
return df
15+
col_names = [
16+
"xF",
17+
"Mmin",
18+
"Mmax",
19+
"Mavg",
20+
"xFavg",
21+
"pt",
22+
"dsig",
23+
"stat",
24+
"syst",
25+
"kfact",
26+
"rsig",
27+
"rstat",
28+
"rsyst",
29+
]
30+
table_path = Path(f"./rawdata/table.csv")
31+
df = pd.read_csv(table_path, names=col_names)
32+
return df
33+
1634

1735
@dataclass
1836
class E866commondata(commondata):
19-
def __init__(self, data: pd.DataFrame, dataset_name: str, process: str):
37+
def __init__(self, data: pd.DataFrame, dataset_name: str, process: str):
2038

21-
# Definitions, compute Jacobian, get dsig/dy/dM
22-
M = (data["Mmax"]+data["Mmin"])/2
23-
M2=M*M
24-
sqrts=M/M*38.8
25-
s=sqrts**2
26-
tau=M**2/s
27-
tau=tau.to_numpy()
28-
xF=data["xF"]
29-
y=np.arcsinh(xF/np.sqrt(tau)/2)
30-
jac=np.sqrt(xF**2+4*tau)
31-
dsigdydM = data["dsig"]*jac
39+
# Definitions, compute Jacobian, get dsig/dy/dM
40+
M = (data["Mmax"] + data["Mmin"]) / 2
41+
M2 = M * M
42+
sqrts = M / M * 38.8
43+
s = sqrts**2
44+
tau = M**2 / s
45+
tau = tau.to_numpy()
46+
xF = data["xF"]
47+
y = np.arcsinh(xF / np.sqrt(tau) / 2)
48+
jac = np.sqrt(xF**2 + 4 * tau)
49+
dsigdydM = data["dsig"] * jac
3250

51+
# Set the central values
52+
self.central_values = dsigdydM.astype(float).to_numpy()
3353

34-
# Set the central values
35-
self.central_values = dsigdydM.astype(float).to_numpy()
54+
# Pick the the kinematic quantities
55+
kin = pd.concat([y, M2, sqrts], axis=1)
56+
kin = kin.set_axis(["y", "M2", "sqrts"], axis=1)
57+
self.kinematics = kin.astype(float).to_numpy()
58+
self.kinematic_quantities = ["y", "M2", "sqrts"]
3659

37-
# Pick the the kinematic quantities
38-
kin=pd.concat([y,M2,sqrts],axis=1)
39-
kin=kin.set_axis(["y","M2","sqrts"],axis=1)
40-
self.kinematics = kin.astype(float).to_numpy()
41-
self.kinematic_quantities = ["y", "M2", "sqrts"]
60+
# Statistical uncertainties.
61+
self.statistical_uncertainties = data["stat"] * jac
4262

43-
# Statistical uncertainties.
44-
self.statistical_uncertainties = data["stat"]*jac
63+
# Systematic uncertainty
64+
syst = data["syst"] * jac
4565

46-
# Systematic uncertainty
47-
syst = data["syst"]*jac
66+
# Normalisation uncertainty of 6.5% from beam intensity calibration.
67+
norm = 6.5 / 100
68+
norm = norm * self.central_values
4869

49-
# Normalisation uncertainty of 6.5% from beam intensity calibration.
50-
norm = 6.5/100
51-
norm = norm * self.central_values
70+
self.systematic_uncertainties = np.dstack((syst, norm))[0]
71+
self.systypes = [("ADD", "UNCORR"), ("MULT", "CORR")]
5272

53-
self.systematic_uncertainties = np.dstack((syst,norm))[0]
54-
self.systypes = [("ADD", "UNCORR"),("MULT","CORR")]
73+
self.process = process
74+
self.dataset_name = dataset_name
5575

56-
self.process = process
57-
self.dataset_name = dataset_name
5876

5977
def main():
60-
data = readdata()
61-
# First create the commondata variant without the nuclear uncertainties.
62-
DYE866 = E866commondata(data, "DYE866_Z0", "Z0")
63-
DYE866.write_new_commondata(Path("data_reimplemented_PXSEC.yaml"),
64-
Path("kinematics_reimplemented_PXSEC.yaml"),
65-
Path("uncertainties_reimplemented_PXSEC.yaml"))
66-
67-
if(covmat_is_close("DYE866_Z0_800GEV_PXSEC", "legacy", "reimplemented")):
68-
print("covmat is close")
69-
else:
70-
print("covmat is different.")
71-
if __name__ == "__main__":
72-
main()
73-
78+
data = readdata()
79+
# First create the commondata variant without the nuclear uncertainties.
80+
DYE866 = E866commondata(data, "DYE866_Z0", "Z0")
81+
DYE866.write_new_commondata(
82+
Path("data_reimplemented_PXSEC.yaml"),
83+
Path("kinematics_reimplemented_PXSEC.yaml"),
84+
Path("uncertainties_reimplemented_PXSEC.yaml"),
85+
)
7486

87+
if covmat_is_close("DYE866_Z0_800GEV_PXSEC", "legacy", "reimplemented"):
88+
print("covmat is close")
89+
else:
90+
print("covmat is different.")
7591

7692

93+
if __name__ == "__main__":
94+
main()

0 commit comments

Comments
 (0)