Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ranking statistic for live singles #4689

Merged
Merged
Show file tree
Hide file tree
Changes from 22 commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
312b685
Allow the live single trigger fits to use ranking statistic rather th…
GarethCabournDavies Apr 4, 2024
1af93ff
inbin is no longer all the events above threshold, plotting to indica…
GarethCabournDavies Apr 4, 2024
cb8b685
deal better with cases where there are no triggers
GarethCabournDavies Apr 5, 2024
d1d39a3
Use ranking statistic for single-detector events
GarethCabournDavies Apr 5, 2024
15c0c9e
Fix some errors
GarethCabournDavies Apr 8, 2024
0984664
fix some statistics so they can produce single-detector events
GarethCabournDavies Apr 8, 2024
a12d99f
Some codeclimate suggestions
GarethCabournDavies Apr 9, 2024
ef29697
get fit coeff files into CI, set a maximum IFAR for singles
GarethCabournDavies Apr 10, 2024
2de35b6
alter the CI example run
GarethCabournDavies Apr 10, 2024
4d11273
Codeclimate suggestions
GarethCabournDavies Apr 10, 2024
0b7c91a
Line too long
GarethCabournDavies Apr 10, 2024
0e9e065
minor tweaks
GarethCabournDavies Apr 10, 2024
5500809
Used shared code
GarethCabournDavies Apr 10, 2024
914b00c
Fix broken fixing
GarethCabournDavies Apr 18, 2024
698b4e4
missed that this needs the module
GarethCabournDavies Apr 23, 2024
1601071
typo
GarethCabournDavies Apr 23, 2024
f2124c3
calculate plotmax earlier and use it to decide the histogram bins
GarethCabournDavies Apr 23, 2024
3141f4b
Update bin/live/pycbc_live_plot_single_significance_fits
GarethCabournDavies Jun 6, 2024
68cd60b
TDC comments
GarethCabournDavies Jun 6, 2024
2e9285b
Update threshold naming and description
GarethCabournDavies Jun 6, 2024
cedca28
update argument in example
GarethCabournDavies Jun 6, 2024
dfcea89
Please do not look at the previous commit and see how much of an idio…
GarethCabournDavies Jun 7, 2024
d3a3492
Update bin/live/pycbc_live_combine_single_significance_fits
GarethCabournDavies Jun 11, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 14 additions & 6 deletions bin/live/pycbc_live_combine_single_significance_fits
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,10 @@ for f in args.trfits_files:
bl = fit_f['bins_lower'][:]
bu = fit_f['bins_upper'][:]
sngl_rank = fit_f.attrs['sngl_ranking']
fit_thresh = fit_f.attrs['fit_threshold']
fit_func = fit_f.attrs['fit_function']
fit_thresh = {ifo: fit_f[ifo].attrs['fit_threshold']
GarethCabournDavies marked this conversation as resolved.
Show resolved Hide resolved
for ifo in args.ifos}
fit_func = {ifo: fit_f[ifo].attrs['fit_function']
for ifo in args.ifos}

# Now go back through the fit files and read the actual information. Skip the
# files that have fit parameters inconsistent with what we found earlier.
Expand All @@ -80,9 +82,13 @@ for f in args.trfits_files:
with h5py.File(f, 'r') as fits_f:
# Check that the file uses the same setup as file 0, to make sure
# all coefficients are comparable
new_fit_func = {ifo: fits_f[ifo].attrs['fit_function']
for ifo in args.ifos}
new_fit_thresh = {ifo: fits_f[ifo].attrs['fit_threshold']
for ifo in args.ifos}
same_conf = (fits_f.attrs['sngl_ranking'] == sngl_rank
and fits_f.attrs['fit_threshold'] == fit_thresh
and fits_f.attrs['fit_function'] == fit_func
and new_fit_thresh == fit_thresh
and new_fit_func == fit_func
and fits_f['bins_lower'].size == bl.size
and all(fits_f['bins_lower'][:] == bl)
and all(fits_f['bins_upper'][:] == bu))
Expand All @@ -99,7 +105,7 @@ for f in args.trfits_files:
gps_last = 0
gps_first = np.inf
for ifo in args.ifos:
if ifo not in fits_f:
if ifo not in fits_f or 'triggers' not in fits_f[ifo]:
continue
trig_times = fits_f[ifo]['triggers']['end_time'][:]
gps_last = max(gps_last, trig_times.max())
Expand Down Expand Up @@ -143,7 +149,6 @@ cons_counts_out = {ifo: np.inf * np.ones(len(alphas_bin[ifo])) for ifo in args.i
logging.info("Writing results")

fout = h5py.File(args.output, 'w')
fout.attrs['fit_threshold'] = fit_thresh
fout.attrs['conservative_percentile'] = args.conservative_percentile
fout.attrs['ifos'] = args.ifos
fout['bins_edges'] = list(bl) + [bu[-1]]
Expand All @@ -152,6 +157,9 @@ fout['fits_dates'] = ad + start_time_n
for ifo in args.ifos:
logging.info(ifo)
fout_ifo = fout.create_group(ifo)
for ifo in args.ifos:
fout_ifo.attrs['fit_threshold'] = fit_thresh[ifo]
fout_ifo.attrs['fit_function'] = fit_func[ifo]
GarethCabournDavies marked this conversation as resolved.
Show resolved Hide resolved
l_times = np.array(live_times[ifo])
total_time = l_times.sum()
fout_ifo.attrs['live_time'] = total_time
Expand Down
2 changes: 1 addition & 1 deletion bin/live/pycbc_live_plot_combined_single_significance_fits
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ for ifo in ifos:
continue

l_times = separate_times[ifo]
with np.errstate(divide='ignore'):
with np.errstate(divide='ignore', invalid='ignore'):
GarethCabournDavies marked this conversation as resolved.
Show resolved Hide resolved
rate = counts / l_times

ma = mean_alpha[ifo][i]
Expand Down
50 changes: 37 additions & 13 deletions bin/live/pycbc_live_plot_single_significance_fits
Original file line number Diff line number Diff line change
Expand Up @@ -71,17 +71,22 @@ with h5py.File(args.trigger_fits_file, 'r') as trfit_f:
ifos = [k for k in trfit_f.keys() if not k.startswith('bins')]

# Grab some info from the attributes
sngl_ranking = trfit_f.attrs['sngl_ranking']
fit_threshold = trfit_f.attrs['fit_threshold']
fit_function = trfit_f.attrs['fit_function']
if trfit_f.attrs['ranking_statistic'] in ['quadsum','single_ranking_only']:
x_label = trfit_f.attrs['sngl_ranking']
else:
x_label = "Ranking statistic"
fit_threshold = {ifo: trfit_f[ifo].attrs['fit_threshold'] for ifo in ifos}
fit_function = {ifo: trfit_f[ifo].attrs['fit_function'] for ifo in ifos}
analysis_date = trfit_f.attrs['analysis_date']

# Get the triggers for each detector
# (This is ones which passed the cuts in the fitting code)
stats = {ifo: {} for ifo in ifos}
durations = {ifo: {} for ifo in ifos}
for ifo in ifos:
stats[ifo] = trfit_f[ifo]['triggers'][sngl_ranking][:]
if 'triggers' not in trfit_f[ifo]:
continue
stats[ifo] = trfit_f[ifo]['triggers']['stat'][:]
durations[ifo] = trfit_f[ifo]['triggers']['template_duration'][:]
live_time = {ifo: trfit_f[ifo].attrs['live_time'] for ifo in ifos}
alphas = {ifo: trfit_f[ifo]['fit_coeff'][:] for ifo in ifos}
Expand Down Expand Up @@ -124,7 +129,10 @@ for ifo in all_ifos:
maxstat = stats[ifo].max()
max_rate = 0

plotbins = np.linspace(fit_threshold, 1.05 * maxstat, 400)
statrange = maxstat - max(stats[ifo].min(), fit_threshold[ifo])
plotmax = maxstat + statrange * 0.05

plotbins = np.linspace(fit_threshold[ifo], plotmax, 400)

logging.info("Putting events into bins")
event_bin = np.array([tbins[d] for d in durations[ifo]])
Expand All @@ -134,8 +142,26 @@ for ifo in all_ifos:
binlabel = f"{lower:.3g} - {upper:.3g}"

inbin = event_bin == bin_num
bin_prop = bin_num / len(tbins)
bin_colour = plt.get_cmap(args.colormap)(bin_prop)

# Skip if there are no triggers in this bin in this IFO
if not any(inbin):
if not any(inbin) or alphas[ifo][bin_num] == -1:
ax.plot(
[],
[],
linewidth=2,
color=bin_colour,
label=binlabel,
alpha=0.6
)
ax.plot(
[],
[],
"--",
color=bin_colour,
label="No triggers"
)
continue
binned_sngl_stats = stats[ifo][event_bin == bin_num]

Expand All @@ -147,30 +173,28 @@ for ifo in all_ifos:
max_rate = max(max_rate, cum_rate[0])

ecf = eval_cum_fit(
fit_function,
fit_function[ifo],
plotbins,
alphas[ifo][bin_num],
fit_threshold
fit_threshold[ifo]
)
cum_fit = counts[ifo][bin_num] / live_time[ifo] * ecf

bin_prop = bin_num / len(tbins)
bin_colour = plt.get_cmap(args.colormap)(bin_prop)
ax.plot(edges[:-1], cum_rate, linewidth=2,
color=bin_colour, label=binlabel, alpha=0.6)
ax.plot(plotbins, cum_fit, "--", color=bin_colour,
label=r"$\alpha = $%.2f" % alphas[ifo][bin_num])
ax.semilogy()
ax.grid()
ax.set_xlim(
fit_threshold if args.x_lim_lower is None else args.x_lim_lower,
1.05 * maxstat if args.x_lim_upper is None else args.x_lim_upper
fit_threshold[ifo] if args.x_lim_lower is None else args.x_lim_lower,
plotmax if args.x_lim_upper is None else args.x_lim_upper
)
ax.set_ylim(
0.5 / live_time[ifo] if args.y_lim_lower is None else args.y_lim_lower,
1.5 * max_rate if args.y_lim_upper is None else args.y_lim_upper
)
ax.set_xlabel(sngl_ranking)
ax.set_xlabel(x_label)
ax.set_ylabel("Number of louder triggers per live time")
title = f"{ifo} {analysis_date} trigger fits"
ax.set_title(title)
Expand Down
74 changes: 49 additions & 25 deletions bin/live/pycbc_live_single_significance_fits
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,11 @@ import h5py

import pycbc
from pycbc.bin_utils import IrregularBins
from pycbc.events import cuts, trigger_fits as trstats
from pycbc.events import cuts, trigger_fits as trstats, stat
from pycbc.io import DictArray
from pycbc.events import ranking
from pycbc.events.coinc import cluster_over_time
from pycbc.types import MultiDetOptionAction


def duration_bins_from_cli(args):
Expand Down Expand Up @@ -75,6 +76,7 @@ parser.add_argument("--file-identifier", default="H1L1V1-Live",
help="String required in filename to be considered for "
"analysis. Default: 'H1L1V1-Live'.")
parser.add_argument("--fit-function", default="exponential",
action=MultiDetOptionAction,
choices=["exponential", "rayleigh", "power"],
help="Functional form for the maximum likelihood fit. "
"Choose from exponential, rayleigh or power. "
Expand Down Expand Up @@ -112,18 +114,20 @@ parser.add_argument("--prune-stat-threshold", type=float,
help="Minimum --sngl-ranking value to consider a "
"trigger for pruning.")
parser.add_argument("--fit-threshold", type=float, default=5,
action=MultiDetOptionAction,
help="Lower threshold used in fitting the triggers."
"Default 5.")
"Default 5. Can be supplied as a single value, "
"or as a set of IFO:value pairs, e.g. H1:0:, L1:-1")
parser.add_argument("--cluster", action='store_true',
help="Only use maximum of the --sngl-ranking value "
"from each file.")
parser.add_argument("--output", required=True,
help="File in which to save the output trigger fit "
"parameters.")
parser.add_argument("--sngl-ranking", default="newsnr",
choices=ranking.sngls_ranking_function_dict.keys(),
help="The single-detector trigger ranking to use.")

stat.insert_statistic_option_group(
parser,
default_ranking_statistic='single_ranking_only'
)
cuts.insert_cuts_option_group(parser)

args = parser.parse_args()
Expand All @@ -133,8 +137,11 @@ pycbc.init_logging(args.verbose)
# Check input options

# Pruning options are mutually required or not needed
prune_options = [args.prune_loudest, args.prune_window,
args.prune_stat_threshold]
prune_options = [
args.prune_loudest is not None,
args.prune_window is not None,
args.prune_stat_threshold is not None
]

if any(prune_options) and not all(prune_options):
parser.error("Require all or none of --prune-loudest, "
Expand Down Expand Up @@ -162,6 +169,9 @@ if args.duration_bin_end and \
"--duration-bin-start, got "
f"{args.duration_bin_end} and {args.duration_bin_start}")


stat_kwargs = stat.parse_statistic_keywords_opt(args.statistic_keywords)

duration_bin_edges = duration_bins_from_cli(args)
logging.info("Duration bin edges: %s", duration_bin_edges)

Expand All @@ -177,13 +187,7 @@ args.template_cuts = args.template_cuts or []
args.template_cuts.append(f"template_duration:{min(duration_bin_edges)}:lower")
args.template_cuts.append(f"template_duration:{max(duration_bin_edges)}:upper_inc")

# Efficiency saving: add SNR cut before any others as sngl_ranking can
# only be less than SNR.
args.trigger_cuts = args.trigger_cuts or []
args.trigger_cuts.insert(0, f"snr:{args.fit_threshold}:lower_inc")

# Cut triggers with sngl-ranking below threshold
args.trigger_cuts.append(f"{args.sngl_ranking}:{args.fit_threshold}:lower_inc")

logging.info("Setting up the cut dictionaries")
trigger_cut_dict, template_cut_dict = cuts.ingest_cuts_option_group(args)
Expand Down Expand Up @@ -211,6 +215,9 @@ files = [f for f in os.listdir(date_directory)

events = {}

rank_method = {ifo: stat.get_statistic_from_opts(args, [ifo]) for ifo in args.ifos}

logging.info("Processing %d files", len(files))
for counter, filename in enumerate(files):
if counter and counter % 1000 == 0:
logging.info("Processed %d/%d files", counter, len(files))
Expand Down Expand Up @@ -284,10 +291,13 @@ for counter, filename in enumerate(files):
for k in trigs_ifo.keys()}

# Calculate the sngl_ranking values
sngls_value = ranking.get_sngls_ranking_from_trigs(
triggers_cut, args.sngl_ranking)
sds = rank_method[ifo].single(triggers_cut)
sngls_value = rank_method[ifo].rank_stat_single(
(ifo, sds),
**stat_kwargs
)

triggers_cut[args.sngl_ranking] = sngls_value
triggers_cut['stat'] = sngls_value

triggers_da = DictArray(data=triggers_cut)

Expand Down Expand Up @@ -330,7 +340,7 @@ for ifo in events:
for bin_num in range(n_bins):
inbin = event_bins[ifo] == bin_num

binned_events = events[ifo].data[args.sngl_ranking][inbin]
binned_events = events[ifo].data['stat'][inbin]
binned_event_times = events[ifo].data['end_time'][inbin]

# Cluster triggers in time with the pruning window to ensure
Expand Down Expand Up @@ -396,17 +406,32 @@ for ifo in events:
alphas[ifo][bin_num] = -1
continue

counts[ifo][bin_num] = np.count_nonzero(inbin)
stat_inbin = events[ifo].data['stat'][inbin]
counts[ifo][bin_num] = \
np.count_nonzero(stat_inbin > args.fit_threshold[ifo])

alphas[ifo][bin_num], _ = trstats.fit_above_thresh(
args.fit_function,
events[ifo].data[args.sngl_ranking][inbin],
args.fit_threshold
args.fit_function[ifo],
stat_inbin,
args.fit_threshold[ifo]
)

logging.info("Writing results")
with h5py.File(args.output, 'w') as fout:
for ifo in events:
for ifo in args.ifos:
fout_ifo = fout.create_group(ifo)
fout_ifo.attrs['fit_function'] = args.fit_function[ifo]
fout_ifo.attrs['fit_threshold'] = args.fit_threshold[ifo]
if ifo not in events:
# There were no triggers, but we should still produce some
# information
fout_ifo['fit_coeff'] = -1 * np.zeros(n_bins)
fout_ifo['counts'] = np.zeros(n_bins)
fout_ifo.attrs['live_time'] = live_time[ifo]
fout_ifo.attrs['pruned_times'] = []
fout_ifo.attrs['n_pruned'] = 0
continue

# Save the triggers we have used for the fits
fout_ifo_trigs = fout_ifo.create_group('triggers')
for key in events[ifo].data:
Expand All @@ -427,8 +452,7 @@ with h5py.File(args.output, 'w') as fout:
fout.attrs['analysis_date'] = args.analysis_date
fout.attrs['input'] = sys.argv
fout.attrs['cuts'] = args.template_cuts + args.trigger_cuts
fout.attrs['fit_function'] = args.fit_function
fout.attrs['fit_threshold'] = args.fit_threshold
fout.attrs['sngl_ranking'] = args.sngl_ranking
titodalcanton marked this conversation as resolved.
Show resolved Hide resolved
fout.attrs['ranking_statistic'] = args.ranking_statistic

logging.info("Done")
27 changes: 27 additions & 0 deletions examples/live/make_fit_coeffs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
"""
Makes files which can be used as the fit_coeffs statistic.
These are not of any scientific use, but the code will accept them
and run properly
"""

import h5py
titodalcanton marked this conversation as resolved.
Show resolved Hide resolved
import numpy as np

# Get number of templates from bank file
with h5py.File('template_bank.hdf', 'r') as bankf:
n_templates = bankf['mass1'].size

for ifo in ['H1','L1','V1']:
with h5py.File(f'{ifo}-fit_coeffs.hdf','w') as fits_f:
fits_f.attrs['analysis_time'] = 430000
fits_f.attrs['ifo'] = ifo
fits_f.attrs['stat'] = f'{ifo}-fit_coeffs'
fits_f.attrs['stat_threshold'] = 5

fits_f['count_above_thresh'] = np.ones(n_templates) * 100
fits_f['count_in_template'] = np.ones(n_templates) * 20000
fits_f['fit_coeff'] = np.ones(n_templates) * 5.5
fits_f['median_sigma'] = np.ones(n_templates) * 5800
fits_f['template_id'] = np.arange(n_templates)


Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import h5py
import numpy as np

f = h5py.File('single_trigger_fits.hdf','w')
f = h5py.File('single_significance_fits.hdf','w')
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Renaming to clarify the two types of single fits


# Some numbers to design the output
# These are loosely based on the O3a trigger fits file
Expand Down
Loading
Loading