gwastro · titodalcanton · Jun 11, 2024 · Apr 4, 2024 · Apr 4, 2024 · Apr 5, 2024
diff --git a/bin/live/pycbc_live_combine_single_significance_fits b/bin/live/pycbc_live_combine_single_significance_fits
@@ -62,8 +62,10 @@ for f in args.trfits_files:
             bl = fit_f['bins_lower'][:]
             bu = fit_f['bins_upper'][:]
             sngl_rank = fit_f.attrs['sngl_ranking']
-            fit_thresh = fit_f.attrs['fit_threshold']
-            fit_func = fit_f.attrs['fit_function']
+            fit_thresh = {ifo: fit_f[ifo].attrs['fit_threshold']
+                          for ifo in args.ifos}
+            fit_func = {ifo: fit_f[ifo].attrs['fit_function']
+                        for ifo in args.ifos}
 
 # Now go back through the fit files and read the actual information.  Skip the
 # files that have fit parameters inconsistent with what we found earlier.
@@ -80,9 +82,13 @@ for f in args.trfits_files:
     with h5py.File(f, 'r') as fits_f:
         # Check that the file uses the same setup as file 0, to make sure
         # all coefficients are comparable
+        new_fit_func = {ifo: fits_f[ifo].attrs['fit_function']
+                        for ifo in args.ifos}
+        new_fit_thresh = {ifo: fits_f[ifo].attrs['fit_threshold']
+                          for ifo in args.ifos}
         same_conf = (fits_f.attrs['sngl_ranking'] == sngl_rank
-                     and fits_f.attrs['fit_threshold'] == fit_thresh
-                     and fits_f.attrs['fit_function'] == fit_func
+                     and new_fit_thresh == fit_thresh
+                     and new_fit_func == fit_func
                      and fits_f['bins_lower'].size == bl.size
                      and all(fits_f['bins_lower'][:] == bl)
                      and all(fits_f['bins_upper'][:] == bu))
@@ -99,7 +105,7 @@ for f in args.trfits_files:
         gps_last = 0
         gps_first = np.inf
         for ifo in args.ifos:
-            if ifo not in fits_f:
+            if ifo not in fits_f or 'triggers' not in fits_f[ifo]:
                 continue
             trig_times = fits_f[ifo]['triggers']['end_time'][:]
             gps_last = max(gps_last, trig_times.max())
@@ -143,7 +149,6 @@ cons_counts_out = {ifo: np.inf * np.ones(len(alphas_bin[ifo])) for ifo in args.i
 logging.info("Writing results")
 
 fout = h5py.File(args.output, 'w')
-fout.attrs['fit_threshold'] = fit_thresh
 fout.attrs['conservative_percentile'] = args.conservative_percentile
 fout.attrs['ifos'] = args.ifos
 fout['bins_edges'] = list(bl) + [bu[-1]]
@@ -152,6 +157,9 @@ fout['fits_dates'] = ad + start_time_n
 for ifo in args.ifos:
     logging.info(ifo)
     fout_ifo = fout.create_group(ifo)
+    for ifo in args.ifos:
+        fout_ifo.attrs['fit_threshold'] = fit_thresh[ifo]
+        fout_ifo.attrs['fit_function'] = fit_func[ifo]
     l_times = np.array(live_times[ifo])
     total_time = l_times.sum()
     fout_ifo.attrs['live_time'] = total_time

diff --git a/bin/live/pycbc_live_plot_combined_single_significance_fits b/bin/live/pycbc_live_plot_combined_single_significance_fits
@@ -139,7 +139,7 @@ for ifo in ifos:
             continue
 
         l_times = separate_times[ifo]
-        with np.errstate(divide='ignore'):
+        with np.errstate(divide='ignore', invalid='ignore'):
             rate = counts / l_times
 
         ma = mean_alpha[ifo][i]

diff --git a/bin/live/pycbc_live_plot_single_significance_fits b/bin/live/pycbc_live_plot_single_significance_fits
@@ -71,17 +71,22 @@ with h5py.File(args.trigger_fits_file, 'r') as trfit_f:
     ifos = [k for k in trfit_f.keys() if not k.startswith('bins')]
 
     # Grab some info from the attributes
-    sngl_ranking = trfit_f.attrs['sngl_ranking']
-    fit_threshold = trfit_f.attrs['fit_threshold']
-    fit_function = trfit_f.attrs['fit_function']
+    if trfit_f.attrs['ranking_statistic'] in ['quadsum','single_ranking_only']:
+        x_label = trfit_f.attrs['sngl_ranking']
+    else:
+        x_label = "Ranking statistic"
+    fit_threshold = {ifo: trfit_f[ifo].attrs['fit_threshold'] for ifo in ifos}
+    fit_function = {ifo: trfit_f[ifo].attrs['fit_function'] for ifo in ifos}
     analysis_date = trfit_f.attrs['analysis_date']
 
     # Get the triggers for each detector
     # (This is ones which passed the cuts in the fitting code)
     stats = {ifo: {} for ifo in ifos}
     durations = {ifo: {} for ifo in ifos}
     for ifo in ifos:
-        stats[ifo] = trfit_f[ifo]['triggers'][sngl_ranking][:]
+        if 'triggers' not in trfit_f[ifo]:
+            continue
+        stats[ifo] = trfit_f[ifo]['triggers']['stat'][:]
         durations[ifo] = trfit_f[ifo]['triggers']['template_duration'][:]
     live_time = {ifo: trfit_f[ifo].attrs['live_time'] for ifo in ifos}
     alphas = {ifo: trfit_f[ifo]['fit_coeff'][:] for ifo in ifos}
@@ -124,7 +129,10 @@ for ifo in all_ifos:
     maxstat = stats[ifo].max()
     max_rate = 0
 
-    plotbins = np.linspace(fit_threshold, 1.05 * maxstat, 400)
+    statrange = maxstat - max(stats[ifo].min(), fit_threshold[ifo])
+    plotmax = maxstat + statrange * 0.05
+
+    plotbins = np.linspace(fit_threshold[ifo], plotmax, 400)
 
     logging.info("Putting events into bins")
     event_bin = np.array([tbins[d] for d in durations[ifo]])
@@ -134,8 +142,26 @@ for ifo in all_ifos:
         binlabel = f"{lower:.3g} - {upper:.3g}"
 
         inbin = event_bin == bin_num
+        bin_prop = bin_num / len(tbins)
+        bin_colour = plt.get_cmap(args.colormap)(bin_prop)
+
         # Skip if there are no triggers in this bin in this IFO
-        if not any(inbin):
+        if not any(inbin) or alphas[ifo][bin_num] == -1:
+            ax.plot(
+                [],
+                [],
+                linewidth=2,
+                color=bin_colour,
+                label=binlabel,
+                alpha=0.6
+            )
+            ax.plot(
+                [],
+                [],
+                "--",
+                color=bin_colour,
+                label="No triggers"
+            )
             continue
         binned_sngl_stats = stats[ifo][event_bin == bin_num]
 
@@ -147,30 +173,28 @@ for ifo in all_ifos:
         max_rate = max(max_rate, cum_rate[0])
 
         ecf = eval_cum_fit(
-            fit_function,
+            fit_function[ifo],
             plotbins,
             alphas[ifo][bin_num],
-            fit_threshold
+            fit_threshold[ifo]
         )
         cum_fit = counts[ifo][bin_num] / live_time[ifo] * ecf
 
-        bin_prop = bin_num / len(tbins)
-        bin_colour = plt.get_cmap(args.colormap)(bin_prop)
         ax.plot(edges[:-1], cum_rate, linewidth=2,
                 color=bin_colour, label=binlabel, alpha=0.6)
         ax.plot(plotbins, cum_fit, "--", color=bin_colour,
                 label=r"$\alpha = $%.2f" % alphas[ifo][bin_num])
     ax.semilogy()
     ax.grid()
     ax.set_xlim(
-        fit_threshold if args.x_lim_lower is None else args.x_lim_lower,
-        1.05 * maxstat if args.x_lim_upper is None else args.x_lim_upper
+        fit_threshold[ifo] if args.x_lim_lower is None else args.x_lim_lower,
+        plotmax if args.x_lim_upper is None else args.x_lim_upper
     )
     ax.set_ylim(
         0.5 / live_time[ifo] if args.y_lim_lower is None else args.y_lim_lower,
         1.5 * max_rate if args.y_lim_upper is None else args.y_lim_upper
     )
-    ax.set_xlabel(sngl_ranking)
+    ax.set_xlabel(x_label)
     ax.set_ylabel("Number of louder triggers per live time")
     title = f"{ifo} {analysis_date} trigger fits"
     ax.set_title(title)

diff --git a/bin/live/pycbc_live_single_significance_fits b/bin/live/pycbc_live_single_significance_fits
@@ -25,10 +25,11 @@ import h5py
 
 import pycbc
 from pycbc.bin_utils import IrregularBins
-from pycbc.events import cuts, trigger_fits as trstats
+from pycbc.events import cuts, trigger_fits as trstats, stat
 from pycbc.io import DictArray
 from pycbc.events import ranking
 from pycbc.events.coinc import cluster_over_time
+from pycbc.types import MultiDetOptionAction
 
 
 def duration_bins_from_cli(args):
@@ -75,6 +76,7 @@ parser.add_argument("--file-identifier", default="H1L1V1-Live",
                     help="String required in filename to be considered for "
                          "analysis. Default: 'H1L1V1-Live'.")
 parser.add_argument("--fit-function", default="exponential",
+                    action=MultiDetOptionAction,
                     choices=["exponential", "rayleigh", "power"],
                     help="Functional form for the maximum likelihood fit. "
                          "Choose from exponential, rayleigh or power. "
@@ -112,18 +114,20 @@ parser.add_argument("--prune-stat-threshold", type=float,
                     help="Minimum --sngl-ranking value to consider a "
                          "trigger for pruning.")
 parser.add_argument("--fit-threshold", type=float, default=5,
+                    action=MultiDetOptionAction,
                     help="Lower threshold used in fitting the triggers."
-                         "Default 5.")
+                         "Default 5. Can be supplied as a single value, "
+                         "or as a set of IFO:value pairs, e.g. H1:0:, L1:-1")
 parser.add_argument("--cluster", action='store_true',
                     help="Only use maximum of the --sngl-ranking value "
                          "from each file.")
 parser.add_argument("--output", required=True,
                     help="File in which to save the output trigger fit "
                          "parameters.")
-parser.add_argument("--sngl-ranking", default="newsnr",
-                    choices=ranking.sngls_ranking_function_dict.keys(),
-                    help="The single-detector trigger ranking to use.")
-
+stat.insert_statistic_option_group(
+    parser,
+    default_ranking_statistic='single_ranking_only'
+)
 cuts.insert_cuts_option_group(parser)
 
 args = parser.parse_args()
@@ -133,8 +137,11 @@ pycbc.init_logging(args.verbose)
 # Check input options
 
 # Pruning options are mutually required or not needed
-prune_options = [args.prune_loudest, args.prune_window,
-                 args.prune_stat_threshold]
+prune_options = [
+    args.prune_loudest is not None,
+    args.prune_window is not None,
+    args.prune_stat_threshold is not None
+]
 
 if any(prune_options) and not all(prune_options):
     parser.error("Require all or none of --prune-loudest, "
@@ -162,6 +169,9 @@ if args.duration_bin_end and \
                  "--duration-bin-start, got "
                  f"{args.duration_bin_end} and {args.duration_bin_start}")
 
+
+stat_kwargs = stat.parse_statistic_keywords_opt(args.statistic_keywords)
+
 duration_bin_edges = duration_bins_from_cli(args)
 logging.info("Duration bin edges: %s", duration_bin_edges)
 
@@ -177,13 +187,7 @@ args.template_cuts = args.template_cuts or []
 args.template_cuts.append(f"template_duration:{min(duration_bin_edges)}:lower")
 args.template_cuts.append(f"template_duration:{max(duration_bin_edges)}:upper_inc")
 
-# Efficiency saving: add SNR cut before any others as sngl_ranking can
-# only be less than SNR.
 args.trigger_cuts = args.trigger_cuts or []
-args.trigger_cuts.insert(0, f"snr:{args.fit_threshold}:lower_inc")
-
-# Cut triggers with sngl-ranking below threshold
-args.trigger_cuts.append(f"{args.sngl_ranking}:{args.fit_threshold}:lower_inc")
 
 logging.info("Setting up the cut dictionaries")
 trigger_cut_dict, template_cut_dict = cuts.ingest_cuts_option_group(args)
@@ -211,6 +215,9 @@ files = [f for f in os.listdir(date_directory)
 
 events = {}
 
+rank_method = {ifo: stat.get_statistic_from_opts(args, [ifo]) for ifo in args.ifos}
+
+logging.info("Processing %d files", len(files))
 for counter, filename in enumerate(files):
     if counter and counter % 1000 == 0:
         logging.info("Processed %d/%d files", counter, len(files))
@@ -284,10 +291,13 @@ for counter, filename in enumerate(files):
                         for k in trigs_ifo.keys()}
 
         # Calculate the sngl_ranking values
-        sngls_value = ranking.get_sngls_ranking_from_trigs(
-                          triggers_cut, args.sngl_ranking)
+        sds = rank_method[ifo].single(triggers_cut)
+        sngls_value = rank_method[ifo].rank_stat_single(
+            (ifo, sds),
+            **stat_kwargs
+        )
 
-        triggers_cut[args.sngl_ranking] = sngls_value
+        triggers_cut['stat'] = sngls_value
 
         triggers_da = DictArray(data=triggers_cut)
 
@@ -330,7 +340,7 @@ for ifo in events:
         for bin_num in range(n_bins):
             inbin = event_bins[ifo] == bin_num
 
-            binned_events = events[ifo].data[args.sngl_ranking][inbin]
+            binned_events = events[ifo].data['stat'][inbin]
             binned_event_times = events[ifo].data['end_time'][inbin]
 
             # Cluster triggers in time with the pruning window to ensure
@@ -396,17 +406,32 @@ for ifo in events:
             alphas[ifo][bin_num] = -1
             continue
 
-        counts[ifo][bin_num] = np.count_nonzero(inbin)
+        stat_inbin = events[ifo].data['stat'][inbin]
+        counts[ifo][bin_num] = \
+            np.count_nonzero(stat_inbin > args.fit_threshold[ifo])
+
         alphas[ifo][bin_num], _ = trstats.fit_above_thresh(
-            args.fit_function,
-            events[ifo].data[args.sngl_ranking][inbin],
-            args.fit_threshold
+            args.fit_function[ifo],
+            stat_inbin,
+            args.fit_threshold[ifo]
         )
 
 logging.info("Writing results")
 with h5py.File(args.output, 'w') as fout:
-    for ifo in events:
+    for ifo in args.ifos:
         fout_ifo = fout.create_group(ifo)
+        fout_ifo.attrs['fit_function'] = args.fit_function[ifo]
+        fout_ifo.attrs['fit_threshold'] = args.fit_threshold[ifo]
+        if ifo not in events:
+            # There were no triggers, but we should still produce some
+            # information
+            fout_ifo['fit_coeff'] = -1 * np.zeros(n_bins)
+            fout_ifo['counts'] = np.zeros(n_bins)
+            fout_ifo.attrs['live_time'] = live_time[ifo]
+            fout_ifo.attrs['pruned_times'] = []
+            fout_ifo.attrs['n_pruned'] = 0
+            continue
+
         # Save the triggers we have used for the fits
         fout_ifo_trigs = fout_ifo.create_group('triggers')
         for key in events[ifo].data:
@@ -427,8 +452,7 @@ with h5py.File(args.output, 'w') as fout:
     fout.attrs['analysis_date'] = args.analysis_date
     fout.attrs['input'] = sys.argv
     fout.attrs['cuts'] = args.template_cuts + args.trigger_cuts
-    fout.attrs['fit_function'] = args.fit_function
-    fout.attrs['fit_threshold'] = args.fit_threshold
     fout.attrs['sngl_ranking'] = args.sngl_ranking
+    fout.attrs['ranking_statistic'] = args.ranking_statistic
 
 logging.info("Done")
diff --git a/examples/live/make_fit_coeffs.py b/examples/live/make_fit_coeffs.py
@@ -0,0 +1,27 @@
+"""
+Makes files which can be used as the fit_coeffs statistic.
+These are not of any scientific use, but the code will accept them
+and run properly
+"""
+
+import h5py
+import numpy as np
+
+# Get number of templates from bank file
+with h5py.File('template_bank.hdf', 'r') as bankf:
+    n_templates = bankf['mass1'].size
+
+for ifo in ['H1','L1','V1']:
+    with h5py.File(f'{ifo}-fit_coeffs.hdf','w') as fits_f:
+        fits_f.attrs['analysis_time'] = 430000
+        fits_f.attrs['ifo'] = ifo
+        fits_f.attrs['stat'] = f'{ifo}-fit_coeffs'
+        fits_f.attrs['stat_threshold'] = 5
+
+        fits_f['count_above_thresh'] = np.ones(n_templates) * 100
+        fits_f['count_in_template'] = np.ones(n_templates) * 20000
+        fits_f['fit_coeff'] = np.ones(n_templates) * 5.5
+        fits_f['median_sigma'] = np.ones(n_templates) * 5800
+        fits_f['template_id'] = np.arange(n_templates)
+
+
diff --git a/examples/live/make_singles_fits_file.py → ...es/live/make_singles_significance_fits.py b/examples/live/make_singles_fits_file.py → ...es/live/make_singles_significance_fits.py
@@ -6,7 +6,7 @@
 import h5py
 import numpy as np
 
-f = h5py.File('single_trigger_fits.hdf','w')
+f = h5py.File('single_significance_fits.hdf','w')
 
 # Some numbers to design the output
 # These are loosely based on the O3a trigger fits file