Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implementing ATLAS_PH-8TEV_XSEC #2246

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
80 changes: 80 additions & 0 deletions nnpdf_data/nnpdf_data/commondata/ATLAS_PH_8TEV/data_XSEC.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
data_central:
- 1.03513740e+06
- 301090.3
- 115103.5
- 5.02673050e+04
- 25408.89
- 1.37034250e+04
- 6354.712
- 2535.047
- 1090.0
- 482.427
- 2.34292500e+02
- 98.10972
- 34.16238
- 1.40569950e+01
- 6.54196000e+00
- 2.842982
- 1.13152550e+00
- 4.04777250e-01
- 1.38166000e-01
- 4.35520400e-02
- 9.32802750e-03
- 6.11366600e-04
- 1.34977920e+06
- 3.90347400e+05
- 144561.6
- 6.62355050e+04
- 32981.85
- 17705.31
- 8203.69
- 3.23659800e+03
- 1.39020850e+03
- 616.616
- 3.00503350e+02
- 1.25231250e+02
- 43.1
- 16.54522
- 7.540344
- 3.071376
- 1.160232
- 3.81331500e-01
- 1.23733400e-01
- 2.95970400e-02
- 7.22349300e-03
- 444320.5
- 133886.1
- 48262.66
- 2.15118250e+04
- 1.07101650e+04
- 5.78355950e+03
- 2.69390050e+03
- 1015.716
- 438.5913
- 188.5275
- 8.93689600e+01
- 34.6956
- 1.08634850e+01
- 3.73744
- 1.495224
- 5.13887150e-01
- 1.24847350e-01
- 2.62151850e-02
- 9.49130750e+05
- 283815.4
- 1.04010400e+05
- 4.45961600e+04
- 2.15460000e+04
- 11787.02
- 5.37026850e+03
- 2.04784750e+03
- 828.8342
- 331.2862
- 151.3236
- 54.30558
- 14.10841
- 4.164325
- 1.35155250e+00
- 3.84890850e-01
- 7.19043450e-02
- 1.07784000e-02
259 changes: 259 additions & 0 deletions nnpdf_data/nnpdf_data/commondata/ATLAS_PH_8TEV/filter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,259 @@
import pathlib

import numpy as np
import pandas as pd
import yaml

from nnpdf_data.filter_utils.utils import prettify_float

yaml.add_representer(float, prettify_float)

MT_VALUE = 172.5
SQRT_S = 8_000.0

from nnpdf_data.filter_utils.utils import symmetrize_errors as se


def load_yaml(table_id: int, version: int = 1) -> dict:
"""Load the HEP data table in yaml format.
Parameters
----------
table_id: int
table ID number
Returns
-------
dict:
ditionary containing the table contents
"""
filename = f"HEPData-ins1457605-v{version}-Table_{table_id}"
table = pathlib.Path(f"./rawdata/{filename}.yaml")

return yaml.safe_load(table.read_text())


def get_kinematics(hepdata: dict, bin_index: list = [], indx: int = 0, mid_rap=None) -> list:
"""Read the version and list of tables from metadata.
Parameters
----------
hepdata: dict
dictionary containing all data info
bin_index: list
list of Non-empty bin index
indx: int
Column index from which to read, default=0
Returns
-------
kinematics: list
kinematic info
"""
bins = hepdata["independent_variables"][indx]["values"]

kinematics = []
for i in bin_index:
min_et, max_et = bins[i]["low"], bins[i]["high"]

kin_value = {
"eta": {"min": None, "mid": mid_rap, "max": None},
"ET": {"min": None, "mid": ((min_et + max_et) / 2), "max": None},
"sqrts": {"min": None, "mid": SQRT_S, "max": None},
}
kinematics.append(kin_value)

return kinematics


def get_data_values(hepdata: dict, bin_index: list, indx: int = 0) -> list:
"""Extract the central values from the HepData yaml file.
Parameters
----------
hepdata: dict
dictionary containing all data info
bin_index: list
Bin indices that must be parsed
indx: int
Column index from which to read the central value, default=0
Returns
-------
list:
list of dictionaries whose contents are the central values
"""
central = hepdata["dependent_variables"][indx]["values"]
return np.array([central[i]["value"] for i in bin_index])


def get_errors(hepdata: dict, bin_index: list) -> dict:
"""
Extract the uncertainties from hepdata and computes the shift of the central value in case of
asymmetric uncertainties
Parameters
----------
hepdata: dict
Hepdata yaml file loaded as dictionary
bin_index: list
Bin indices that must be parsed
Returns
-------
dict:
Dictionary containing the errors (as pandas DataFrame) and shifts of central values
"""
# parse the systematics
central_values = [] # relevant for asymmetric uncertainties
df_errors = pd.DataFrame()
for i, bin in enumerate(hepdata["dependent_variables"][0]["values"]):

error_sources = []
shift_cv = 0
error_names = []
for source in bin["errors"]:
error_names.append(source["label"])
if source["label"] == "stat":
error_sources.append(source["symerror"])
elif "asymerror" in source:
delta_min = float(source["asymerror"]["minus"])
delta_plus = float(source["asymerror"]["plus"])
se_delta, se_sigma = se(delta_plus, delta_min)
error_sources.append(se_sigma)
shift_cv += se_delta
elif "symerror" in source:
se_sigma = float(source["symerror"])
error_sources.append(se_sigma)
df_bin = pd.DataFrame([error_sources], columns=error_names, index=[f"bin {i}"])
df_errors = pd.concat([df_errors, df_bin])
cv_i = bin["value"] + shift_cv
central_values.append(cv_i)

# convert to fb

df_errors = df_errors * 1e3
central_values = np.array(central_values) * 1e3

return central_values, df_errors


def format_uncertainties(uncs: dict) -> list:
"""Format the uncertainties to be dumped into the yaml file.
Parameters
----------
uncs: dict
Dictionary containing the various source of uncertainties
Returns
-------
list:
list of dictionaries whose elements are the various errors
"""

combined_errors = []
n_bins = uncs["systematics"].index.str.startswith("bin").sum()
for i in range(n_bins):
errors = {}
if "statistics" in uncs:
errors["stat"] = uncs["statistics"].loc[f"bin {i}"].values.item()
for j, unc in enumerate(uncs["systematics"].loc[f"bin {i}"].values):
errors[f"sys_corr_{j + 1}"] = float(unc)

combined_errors.append(errors)

return combined_errors


def dump_commondata(kinematics: list, data: list, errors: dict, obs: str) -> None:
"""Function that generates and writes the commondata files.
Parameters
----------
kinematics: list
list containing the kinematic values
data: list
list containing the central values
errors: dict
Dictionary containing the different errors
obs: str
Name to append to the file names
"""

if "statistics" in errors:
error_definition = {
"stat": {
"description": "Uncorrelated statistical uncertainties",
"treatment": errors["statistics"].loc["treatment"].iloc[0],
"type": errors["statistics"].loc["type"].iloc[0],
}
}
else:
error_definition = {}

n_sys = errors["systematics"].shape[1]
for i in range(n_sys):

error_definition[f"sys_corr_{i + 1}"] = {
"description": errors["systematics"].columns[i],
"treatment": errors["systematics"].loc["treatment"].iloc[i],
"type": errors["systematics"].loc["type"].iloc[i],
}

errors_formatted = format_uncertainties(errors)
with open(f"data_{obs}.yaml", "w") as file:
yaml.dump({"data_central": data.tolist()}, file, sort_keys=False)

with open(f"kinematics_{obs}.yaml", "w") as file:
yaml.dump({"bins": kinematics}, file, sort_keys=False)

with open(f"uncertainties_{obs}.yaml", "w") as file:
yaml.dump(
{"definitions": error_definition, "bins": errors_formatted}, file, sort_keys=False
)


def main_filter() -> None:
"""
Main function that reads the HepData yaml files and generates the commondata files
"""

yaml_content_data = [load_yaml(table_id=i, version=1) for i in range(1, 5)]
uncertainties_all = pd.DataFrame()
central_values_all = np.array([])
kinematics_all = []
n_datapoints = [22, 21, 18, 18]
mid_rapidities = [0.3, 0.985, 1.685, 2.09]
for i, yaml_content in enumerate(yaml_content_data):
kinematics = get_kinematics(
yaml_content, bin_index=range(n_datapoints[i]), mid_rap=mid_rapidities[i]
)
central_values, uncertainties = get_errors(yaml_content, bin_index=range(n_datapoints[i]))
uncertainties_all = pd.concat([uncertainties_all, uncertainties])
central_values_all = np.concatenate([central_values_all, central_values])
kinematics_all += kinematics

uncertainties_all.index = [f"bin {i}" for i in range(uncertainties_all.shape[0])]

n_sources = uncertainties_all.shape[1]
sys_types = {
"treatment": ["ADD"] + ["MULT"] * (n_sources - 1),
"type": ["UNCORR"] * (n_sources - 1) + ["ATLASLUMI15"],
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
"type": ["UNCORR"] * (n_sources - 1) + ["ATLASLUMI15"],
"type": ["UNCORR"] * (n_sources - 1) + ["ATLASLUMI12"],

This is part of the 2012 data #2205 (comment)

}
sys_types_df = pd.DataFrame(sys_types, index=uncertainties_all.columns).T
df_errors = pd.concat([sys_types_df, uncertainties_all])

errors = {"statistics": df_errors.iloc[:, [0]], "systematics": df_errors.iloc[:, 1:]}

dump_commondata(kinematics_all, central_values_all, errors, obs="XSEC")

return


if __name__ == "__main__":
main_filter()
Loading
Loading