From a5157d0fd643960151d2494fa0a5e19ce0b6a44e Mon Sep 17 00:00:00 2001 From: Chris Bielow Date: Fri, 27 Sep 2024 17:06:34 +0200 Subject: [PATCH] Accession order fixes (#330) * fix script: detect unsorted accessions; feature: report gap size * re-establish ordered accessions * Update version * Update psi-ms.obo --------- Co-authored-by: Joshua Klein --- psi-ms.obo | 34 +++++++++++++-------------- scripts/find_next_available_number.py | 4 +++- 2 files changed, 20 insertions(+), 18 deletions(-) diff --git a/psi-ms.obo b/psi-ms.obo index d52c811..4cb52a3 100644 --- a/psi-ms.obo +++ b/psi-ms.obo @@ -1,7 +1,7 @@ format-version: 1.2 -data-version: 4.1.176 -date: 17:09:2024 15:26 -saved-by: Michael Hoopmann +data-version: 4.1.177 +date: 27:09:2024 07:15 +saved-by: Chris Bielow auto-generated-by: OBO-Edit 2.3.1 default-namespace: MS namespace-id-rule: * MS:$sequence(7,0,9999999)$ @@ -22522,6 +22522,20 @@ name: spectrum clustering software def: "Software designed to group multiple mass spectra by high similarity, generally with the goal of grouping replicate spectra derived from the same analyte." [PSI:MS] is_a: MS:1000531 ! software +[Term] +id: MS:1003407 +name: Scout +def: "Identifying crosslinked peptides in complex protein mixtures" [PSI:MS] +is_a: MS:1001456 ! analysis software + +[Term] +id: MS:1003408 +name: Scout score +def: "Scout identification search engine score" [PSI:MS] +is_a: MS:1001143 ! PSM-level search engine specific statistic +relationship: has_order MS:1003407 ! higher score better +relationship: has_value_type xsd:double ! The allowed value-type for this CV term + [Term] id: MS:1003409 name: Stellar @@ -24306,20 +24320,6 @@ synonym: "RT-MSMS-Q2" RELATED [PMID:24494671] synonym: "RT-MSMS-Q3" RELATED [PMID:24494671] synonym: "RT-MSMS-Q4" RELATED [PMID:24494671] -[Term] -id: MS:1003407 -name: Scout -def: "Identifying crosslinked peptides in complex protein mixtures" [PSI:MS] -is_a: MS:1001456 ! analysis software - -[Term] -id: MS:1003408 -name: Scout score -def: "Scout identification search engine score" [PSI:MS] -is_a: MS:1001143 ! PSM-level search engine specific statistic -relationship: has_order MS:1003407 ! higher score better -relationship: has_value_type xsd:double ! The allowed value-type for this CV term - [Term] id: PEFF:0000001 name: PEFF CV term diff --git a/scripts/find_next_available_number.py b/scripts/find_next_available_number.py index 0efb069..6350c09 100644 --- a/scripts/find_next_available_number.py +++ b/scripts/find_next_available_number.py @@ -22,10 +22,12 @@ def collect_gaps(stream, min_value=1000300): last_seen[cv] = acc continue diff = acc - last_in_cv + if diff < 1: + sys.stderr.write(f"CV is not sorted! {cv}:{acc} found after {cv}:{last_in_cv}\n") if diff > 1 and acc > min_value: for i in range(1, diff): gaps.append((cv, last_in_cv + i)) - sys.stderr.write(f"Found gap {gaps[-diff + 1]} to {gaps[-1]}\n") + sys.stderr.write(f"Found gap of size {diff-1}: {gaps[-diff + 1]} to {gaps[-1]}\n") last_seen[cv] = acc return gaps, last_seen except UnicodeDecodeError: