Skip to content

Commit

Permalink
Merge pull request #361 from LSSTDESC/mock_calibration
Browse files Browse the repository at this point in the history
Add mock catalog type requiring no calibration, and add example pipeline
  • Loading branch information
joezuntz authored Nov 8, 2024
2 parents 6d6dd49 + bded74c commit d0fd186
Show file tree
Hide file tree
Showing 11 changed files with 439 additions and 8 deletions.
15 changes: 11 additions & 4 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,10 @@ on:
- cron: '0 12 * * 1'

env:
EXAMPLE_DATA_FILE_VERSION: v3

EXAMPLE_DATA_FILE_VERSION: v5
# Setting this did not appear to work. Instead will need to
# find/replace when changing it.
# CONTAINER_IMAGE: ghcr.io/lsstdesc/txpipe:latest

jobs:
# Run a download step first so that it is in
Expand Down Expand Up @@ -110,7 +112,7 @@ jobs:
fail-fast: false
matrix:
os: [ubuntu-latest, macos-14]
pipeline: [metadetect, metacal, redmagic, lensfit, metadetect_source_only]
pipeline: [metadetect, metacal, redmagic, lensfit, metadetect_source_only, mock_shear]
include:
- os: ubuntu-latest
INSTALL_DEPS: echo nothing to do
Expand Down Expand Up @@ -158,7 +160,12 @@ jobs:
export TX_DASK_DEBUG=1
fi
ceci examples/${{ matrix.pipeline }}/pipeline.yml
test -f data/example/outputs_${{ matrix.pipeline }}/shear_xi_plus.png
if [ "${{ matrix.pipeline }}" == "mock_shear" ]; then
test -f data/example/outputs_mock_shear/binned_shear_catalog.hdf5
else
test -f data/example/outputs_${{ matrix.pipeline }}/shear_xi_plus.png
fi
- name: Show logs
if: always()
Expand Down
70 changes: 70 additions & 0 deletions bin/generate-nfw-shear-cat.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
import argparse



parser = argparse.ArgumentParser()
parser.add_argument("--output", type=str, default="simple_cat.txt")
parser.add_argument("--nmax", type=int, default=10000, help="Number of objects to generate, though cat size is reduced by redshift cut")
parser.add_argument("--mass", type=float, default=10.0e14, help="Cluster mass in Msun")
parser.add_argument("--cluster_z", type=float, default=0.22, help="Cluster redshift")
parser.add_argument("--concentration", type=float, default=4.0, help="Cluster concentration parameter")
parser.add_argument("--size", type=float, default=240.0, help="Size of the region in arcmin")
parser.add_argument("--mean-z", type=float, default=0.5, help="Mean redshift of the background galaxies")
parser.add_argument("--sigma-z", type=float, default=0.1, help="Redshift std dev of the background galaxies")
parser.add_argument("--mean-snr", type=float, default=20., help="Mean SNR")
parser.add_argument("--mean-size", type=float, default=0.3, help="Galaxy mean size^2 T parameter in arcsec")
parser.add_argument("--sigma-size", type=float, default=0.3, help="Galaxy std dev size^2 T parameter in arcsec")
args = parser.parse_args()

mass = args.mass
cluster_z = args.cluster_z
concentration = args.concentration


import galsim
import numpy as np
from astropy.table import Table
import argparse

nfw = galsim.NFWHalo(mass, concentration, cluster_z)

half_size = args.size / 2
xmin = -half_size
xmax = half_size
ymin = -half_size
ymax = half_size

N = 10000

x = np.random.uniform(xmin, xmax, N)
y = np.random.uniform(ymin, ymax, N)
z = np.random.normal(args.mean_z, args.sigma_z, N)

w = np.where(z > nfw.z)
x1 = x[w]
y1 = y[w]
z1 = z[w]
n = z1.size

ra = np.zeros(n) + x1 / 3600
dec = np.zeros(n) + y1 / 3600
g1, g2 = nfw.getShear([x1, y1], z1, reduced=False)
s2n = np.random.exponential(args.mean_snr, size=n)
print(s2n.mean())

# This should give plenty of selected objects since we cut on T/Tpsf > 0.5 by default
# and Tpsf default is ~.2
T = np.random.normal(args.mean_size, args.sigma_size, size=n).clip(0.01, np.inf)

data = {
"ra": ra,
"dec": dec,
"g1": g1,
"g2": g2,
"s2n": s2n,
"T": T,
"redshift": z1,
}

table = Table(data=data)
table.write(args.output, overwrite=True, format="ascii.commented_header")
24 changes: 24 additions & 0 deletions bin/make_single_cluster_catalog.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
import numpy as np
import h5py
ra = np.array([0.0])
dec = np.array([0.0])
z = np.array([0.22])
z_err = np.array([0.01])
ids = np.array([0])
richness = np.array([10.0])
richness_err = np.array([1.0])
scale = np.array([1.0])

with h5py.File("mock_single_cluster_catalog.hdf5", "w") as f:
g = f.create_group("clusters")
g
g.create_dataset("cluster_id", data=ids)
g.create_dataset("ra", data=ra)
g.create_dataset("dec", data=dec)
g.create_dataset("redshift", data=z)
g.create_dataset("redshift_err", data=z_err)
g.create_dataset("richness", data=richness)
g.create_dataset("richness_err", data=richness_err)
g.create_dataset("scaleval", data=scale)

g.attrs["n_clusters"] = 1
2 changes: 1 addition & 1 deletion environment-piponly.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ dependencies:
- ceci==2.0.1
- git+https://github.com/jlvdb/hyperbolic@b88b107a291fa16c2006cf971ce610248d58e94c
- dask-mpi>=2022.4.0
- git+https://github.com/LSSTDESC/CLMM.git@1.12.5
- git+https://github.com/LSSTDESC/CLMM.git@1.14.1
- parallel-statistics==0.13
- sacc[all]==0.14
- jax==0.4.27
Expand Down
26 changes: 26 additions & 0 deletions examples/mock_shear/config.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
global:
chunk_rows: 100000
pixelization: healpix
nside: 64
sparse: true


TXSourceSelectorSimple:
input_pz: false
true_z: true
bands: riz
T_cut: 0.5
s2n_cut: 10.0
max_rows: 1000
delta_gamma: 0.02
source_zbin_edges: [0.5, 0.7, 0.9, 1.1, 2.0]
shear_prefix: ''
verbose: true

TXMockTruthPZ:
mock_sigma_z: 0.001


CLClusterShearCatalogs:
redshift_cut_criterion: zmode
redshift_weight_criterion: zmode
54 changes: 54 additions & 0 deletions examples/mock_shear/pipeline.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
# Stages to run
stages:
- name: TXSimpleMock # Convert a text file mock catalog to HDF5
- name: TXSourceSelectorSimple # select and split objects into source bins
- name: TXShearCalibration # Calibrate and split the source sample tomographically
- name: TXMockTruthPZ # Generate PDFs as narrow gaussian centered on the true redshifts
aliases:
photoz_pdfs: source_photoz_pdfs
- name: CLClusterShearCatalogs # Extract and weight the shear catalog around every cluster



# modules and packages to import that have pipeline
# stages defined in them
modules: >
txpipe
# where to find any modules that are not in this repo,
# and any other code we need.
python_paths:
- submodules/WLMassMap/python/desc/

# Where to put outputs
output_dir: data/example/outputs_mock_shear

# How to run the pipeline: mini, parsl, or cwl
launcher:
name: mini
interval: 1.0

# Where to run the pipeline: cori-interactive, cori-batch, or local
site:
name: local
max_threads: 2

# configuration settings
config: examples/mock_shear/config.yml

# These are overall inputs to the whole pipeline, not generated within it
inputs:
mock_shear_catalog: data/example/inputs/mock_nfw_shear_catalog.txt
calibration_table: data/example/inputs/sample_cosmodc2_w10year_errors.dat
cluster_catalog: data/example/inputs/mock_single_cluster_catalog.hdf5
fiducial_cosmology: data/fiducial_cosmology.yml



# if supported by the launcher, restart the pipeline where it left off
# if interrupted
resume: true
# where to put output logs for individual stages
log_dir: data/example/logs_mock_shear
# where to put an overall parsl pipeline log
pipeline_log: data/example/mock_shear_log.txt
1 change: 1 addition & 0 deletions txpipe/calibrate.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ def run(self):
# is needed
tomo_file = self.get_input("shear_tomography_catalog")
cals, cal2d = Calibrator.load(tomo_file, null=use_true)
print("Using calibration method:", cal2d.__class__.__name__)

# The catalog columns are named differently in different cases
#  Get the correct shear catalogs
Expand Down
102 changes: 101 additions & 1 deletion txpipe/ingest/mocks.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from ..base_stage import PipelineStage
from ..data_types import ShearCatalog, HDFFile
from ..data_types import ShearCatalog, HDFFile, TextFile, QPPDFFile
from ..utils import (
band_variants,
metacal_variants,
Expand Down Expand Up @@ -1219,6 +1219,104 @@ def generate_mock_metacal_mag_responses(bands, nobj):
return mag_responses


class TXSimpleMock(PipelineStage):
"""
Load an ascii astropy table and put it in shear catalog format.
"""
name = "TXSimpleMock"
inputs = [("mock_shear_catalog", TextFile)]
outputs = [("shear_catalog", ShearCatalog)]
config_options = {
"mock_size_snr": False,
}
def run(self):
from astropy.table import Table
import numpy as np

# Load the data. We are assuming here it is small enough to fit in memory
input_filename = self.get_input("mock_shear_catalog")
input_data = Table.read(input_filename, format="ascii")
n = len(input_data)

data = {}
# required columns
for col in ["ra", "dec", "g1", "g2", "s2n", "T"]:
data[col] = input_data[col]

# It's most likely we will have a redshift column.
# Check for both that and "redshift_true"
if "redshift" in input_data.colnames:
data["redshift_true"] = input_data["redshift"]
elif "redshift_true" in input_data.colnames:
data["redshift_true"] = input_data["redshift_true"]

# If there is an ID column then use it, but otherwise just use
# sequential IDs
if "id" in input_data.colnames:
data["galaxy_id"] = input_data["id"]
else:
data["galaxy_id"] = np.arange(len(input_data))

# if these catalogs are not present then we fake them.
defaults = {
"T_err": 0.0,
"psf_g1": 0.0,
"psf_g2": 0.0,
"psf_T_mean": 0.202, # this corresponds to a FWHM of 0.75 arcsec
"weight": 1.0,
"flags": 0,
}

for key, value in defaults.items():
if key in input_data.colnames:
data[key] = input_data[key]
else:
data[key] = np.full(n, value)

self.save_catalog(data)

def save_catalog(self, data):
with self.open_output("shear_catalog") as f:
g = f.create_group("shear")
g.attrs["catalog_type"] = "simple"
for key, value in data.items():
g.create_dataset(key, data=value)


class TXMockTruthPZ(PipelineStage):
name = "TXMockTruthPZ"
inputs = [("shear_catalog", ShearCatalog)]
outputs = [("photoz_pdfs", QPPDFFile)]
config_options = {
"mock_sigma_z": 0.001,
}
def run(self):
import qp
import numpy as np
sigma_z = self.config["mock_sigma_z"]


# read the input truth redshifts
with self.open_input("shear_catalog", wrapper=True) as f:
group = f.file[f.get_primary_catalog_group()]
n = group["ra"].size
redshifts = group["redshift_true"][:]

zgrid = np.linspace(0, 3, 301)
pdfs = np.zeros((n, len(zgrid)))

spread_z = sigma_z * (1 + redshifts)
# make a gaussian PDF for each object
delta = zgrid[np.newaxis, :] - redshifts[:, np.newaxis]
pdfs = np.exp(-0.5 * (delta / spread_z[:, np.newaxis])**2) / np.sqrt(2 * np.pi) / spread_z[:, np.newaxis]

q = qp.Ensemble(qp.interp, data=dict(xvals=zgrid, yvals=pdfs))
q.set_ancil(dict(zmode=redshifts, zmean=redshifts, zmedian=redshifts))
q.write_to(self.get_output("photoz_pdfs"))




def test():
import pylab

Expand All @@ -1235,3 +1333,5 @@ def test():
results = make_mock_photometry(n_visit, bands, data, True)
pylab.hist(results["snr_r"], bins=50, histtype="step")
pylab.savefig("snr_r.png")


Loading

0 comments on commit d0fd186

Please sign in to comment.