From 4877916ec5053569c4a733d4af53f50e848c791f Mon Sep 17 00:00:00 2001
From: riasc <richard.schaefer@zoho.com>
Date: Thu, 13 Jun 2024 03:23:37 -0500
Subject: [PATCH] fixing boost anycast error

---
 CMakeLists.txt           |   2 +-
 Dockerfile               |  12 +-
 README.md                |   2 +-
 include/Analysis.hpp     |  46 ++-
 include/Clustering.hpp   |   8 +-
 include/Config.hpp       |   2 +-
 include/DataTypes.hpp    |  20 ++
 include/IBPTree.hpp      |   6 +-
 include/Node.hpp         |   6 +-
 include/Utility.hpp      |   7 +-
 src/Analysis.cpp         | 659 ++++++++++++++++++---------------------
 src/Clustering.cpp       |   6 +-
 src/Data.cpp             |   3 +-
 src/IBPTree.cpp          | 109 ++++++-
 src/Node.cpp             |  12 +-
 src/SplitReadCalling.cpp |   4 +-
 src/Utility.cpp          |  12 +
 17 files changed, 507 insertions(+), 409 deletions(-)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index eae5d7bd..9cb9e9e1 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,5 +1,5 @@
 cmake_minimum_required(VERSION 3.22.1)
-project(RNAnue VERSION 0.2.0)
+project(RNAnue VERSION 0.2.1)
 set(CMAKE_CXX_STANDARD 20)
 set(CMAKE_CXX_STANDARD_REQUIRED True)
 set(CMAKE_CXX_FLAGS -fopenmp)
diff --git a/Dockerfile b/Dockerfile
index 4ecd85a5..2bb79792 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -15,7 +15,7 @@ LABEL authors="Richard A. Schaefer"
 RUN apt-get -y update && apt-get -y upgrade
 RUN apt-get install -y curl build-essential cmake git pkg-config
 RUN apt-get install -y libbz2-dev zlib1g-dev libncurses5-dev liblzma-dev
-RUN apt-get install -y libboost-all-dev gdb
+RUN apt-get install -y libboost-all-dev gdb libmpfr-dev liblapacke-dev
 
 # install htslib
 WORKDIR /
@@ -25,24 +25,30 @@ WORKDIR /htslib-1.20
 RUN ./configure
 RUN make
 RUN make install
+RUN export PKG_CONFIG_PATH=/usr/local/lib/pkgconfig
 
 # install segemehl
 WORKDIR /
 RUN curl -L http://legacy.bioinf.uni-leipzig.de/Software/segemehl/downloads/segemehl-0.3.4.tar.gz -o segemehl-0.3.4.tar.gz
 RUN tar -xvf segemehl-0.3.4.tar.gz && rm segemehl-0.3.4.tar.gz
 WORKDIR /segemehl-0.3.4
-RUN export PKG_CONFIG_PATH=/usr/local/lib/pkgconfig
 RUN make all
 RUN cp segemehl.x /usr/local/bin
 
 # install ViennaRNA
+WORKDIR /
+RUN curl -L https://www.tbi.univie.ac.at/RNA/download/sourcecode/2_6_x/ViennaRNA-2.6.4.tar.gz -o ViennaRNA-2.6.4.tar.gz
+RUN tar -xvf ViennaRNA-2.6.4.tar.gz && rm ViennaRNA-2.6.4.tar.gz
+WORKDIR /ViennaRNA-2.6.4
+RUN ./configure
+RUN make && make install
 
 # retrieve RNAnue
 WORKDIR /
 RUN git clone https://github.com/Ibvt/RNAnue.git
 WORKDIR /RNAnue
 
-# retrieve SeqAn
+# retrieve SeqAn3
 RUN curl -L https://github.com/seqan/seqan3/releases/download/3.3.0/seqan3-3.3.0-Source.tar.xz -o seqan3-3.3.0-Source.tar.xz
 RUN tar -xvf seqan3-3.3.0-Source.tar.xz && rm seqan3-3.3.0-Source.tar.xz
 RUN mv seqan3-3.3.0-Source seqan3
diff --git a/README.md b/README.md
index 5937ab45..31c2b8b7 100644
--- a/README.md
+++ b/README.md
@@ -16,7 +16,7 @@ located in $PATH.
 * [Boost C++ Libraries](https://www.boost.org/) (v1.7.2)
 * [SeqAn](https://github.com/seqan/seqan3) (v3.3.0)
 * [Segemehl](http://www.bioinf.uni-leipzig.de/Software/segemehl/) (v0.3.4)
-* [Vienna Package](https://www.tbi.univie.ac.at/RNA/#binary_packages) (v2.4.17)
+* [Vienna Package](https://www.tbi.univie.ac.at/RNA/#binary_packages) (>= v2.4.17)
 
 ### CMake
 RNAnue is build using CMake. At first, clone the repository and change into the source directory.
diff --git a/include/Analysis.hpp b/include/Analysis.hpp
index a600f56b..9890e2b9 100644
--- a/include/Analysis.hpp
+++ b/include/Analysis.hpp
@@ -5,6 +5,9 @@
 #include <iostream>
 #include <map>
 #include <vector>
+#include <algorithm>
+#include <random>
+#include <cmath>
 
 // Boost
 #include <boost/program_options.hpp>
@@ -12,26 +15,63 @@
 #include <boost/property_tree/ptree.hpp>
 #include <boost/accumulators/accumulators.hpp>
 #include <boost/accumulators/statistics.hpp>
+#include <boost/math/distributions/binomial.hpp>
+
+
+// SeqAn3
+#include <seqan3/io/sam_file/all.hpp>
+#include <seqan3/core/debug_stream.hpp>
 
 // Class
 #include "IBPTree.hpp"
 
+// define tags
+using seqan3::operator""_tag;
+
 namespace po = boost::program_options;
 namespace fs = boost::filesystem;
 namespace pt = boost::property_tree;
+namespace ma = boost::math;
 
 class Analysis {
     public:
         Analysis(po::variables_map params);
         ~Analysis();
 
-        void start(pt::ptree sample);
-        void process(std::string& single, std::string& splits);
+        void start(pt::ptree sample, pt::ptree condition);
+        void processSingle(std::string& single);
+        void processSplits(std::string& splits, std::string& output);
+        void normalize(); // normalize the frequencies to 1
+
+        // write output files (of the analysis)
+        void writeInteractionsHeader(std::ofstream& fout);
+        void writeAllIntsHeader(std::ofstream& fout);
+        void addToAllIntsHeader(std::ofstream& fout, std::string key);
+        void writeAllInts();
+
+        // other operations
+        void addToFreqMap(std::pair<std::string,std::string> key, double value);
+        void addToSuppReads(dtp::IntPartner p1, dtp::IntPartner p2);
+        void addToCompl(dtp::IntPartner p1, dtp::IntPartner p2, double complementarity);
+        void addToHybEnergies(dtp::IntPartner p1, dtp::IntPartner p2, double hybenergy);
+
+        // calculate filters
+        double calcGCS(std::vector<double>& complementarities);
+        double calcGHS(std::vector<double>& hybenergies);
+        double calcStat(dtp::IntKey key, int x);
 
     private:
         po::variables_map params;
         IBPTree features;
-        std::map<std::pair<std::string, std::string>, double> pdf;
+        std::map<std::pair<std::string,std::string>,double> freq; // strand, name
+        std::string condition; // buffers the current condition
+        // maps for storing filters and suppreads
+        std::map<dtp::IntKey, std::vector<double>> suppreads;
+        std::map<dtp::IntKey, std::vector<std::vector<double>>> complementarities;
+        std::map<dtp::IntKey, std::vector<std::vector<double>>> hybenergies;
+
+        int repcount; // counter for current replicate
+        int readcount; // total number of reads
 };
 
 #endif //RNANUE_ANALYSIS_HPP
\ No newline at end of file
diff --git a/include/Clustering.hpp b/include/Clustering.hpp
index 1bdaa895..7ad241ae 100644
--- a/include/Clustering.hpp
+++ b/include/Clustering.hpp
@@ -24,13 +24,13 @@ namespace fs = boost::filesystem;
 
 struct Segment {
     std::string refid;
-    uint32_t flag;
+    seqan3::sam_flag flag;
     uint32_t start;
     uint32_t end;
 
-    Segment() : refid(""), flag(0), start(0), end(0) {};
-    Segment(std::string _refid, uint32_t _flag, unsigned int _start, unsigned int _end) :
-            refid(_refid), flag(_flag), start(_start), end(_end) {};
+    Segment() : refid(""), flag(seqan3::sam_flag{}), start(0), end(0) {};
+    Segment(std::string refid, seqan3::sam_flag flag, unsigned int start, unsigned int end) :
+            refid(refid), flag(flag), start(start), end(end) {};
 };
 
 struct Cluster {
diff --git a/include/Config.hpp b/include/Config.hpp
index 42f7d01e..0d9ab233 100644
--- a/include/Config.hpp
+++ b/include/Config.hpp
@@ -1,4 +1,4 @@
 // the configured options and settings for Tutorial
 #define RNAnue_VERSION_MAJOR 0
 #define RNAnue_VERSION_MINOR 2
-#define RNAnue_VERSION_PATCH 0
+#define RNAnue_VERSION_PATCH 1
diff --git a/include/DataTypes.hpp b/include/DataTypes.hpp
index 11a917a4..fbcfd058 100644
--- a/include/DataTypes.hpp
+++ b/include/DataTypes.hpp
@@ -79,6 +79,26 @@ namespace dtp {
     };
     using StatsMap = std::map<std::string, StatsFields>;
     using SpliceJunctions = std::map<std::string, std::vector<std::pair<size_t,size_t>>>;
+
+    // Analysis
+    using PdfMap = std::map<std::pair<std::string, std::string>, double>;
+    struct IntPartner {
+        std::string partner;
+        std::string orientation;
+        IntPartner() : partner(""), orientation("") {}
+        IntPartner(std::string partner, std::string orientation) : partner(partner), orientation(orientation) {}
+
+        bool operator<(const IntPartner& other) const {
+            if(partner < other.partner) {
+                return true;
+            } else if(partner == other.partner) {
+                return orientation < other.orientation;
+            } else {
+                return false;
+            }
+        }
+    };
+    using IntKey = std::vector<IntPartner>;
 }
 
 #endif //RNANUE_DATATYPES_HPP
diff --git a/include/IBPTree.hpp b/include/IBPTree.hpp
index cbd25602..f1044cf5 100644
--- a/include/IBPTree.hpp
+++ b/include/IBPTree.hpp
@@ -33,10 +33,12 @@ class IBPTree {
         // tree operations
         Node* getRoot(IntervalData& data);
         void insert(IntervalData& data);
+        void update(Node* node); // updates the keys in parent
         void insertIter(Node* node, IntervalData& data);
         void splitNode(Node* node, int index);
-        std::vector<IntervalData*> search(std::string chrom, dtp::Interval interval);
-        void searchIter(Node* node, const dtp::Interval& interval, std::vector<IntervalData*>& results);
+        std::vector<std::pair<Node*, IntervalData*>> search(std::string chrom, dtp::Interval interval);
+        void searchIter(Node* node, const dtp::Interval& interval,
+                        std::vector<std::pair<Node*,IntervalData*>>& results);
         bool isOverlapping(dtp::Interval intvl1, dtp::Interval intvl2);
 
 
diff --git a/include/Node.hpp b/include/Node.hpp
index 9c1e974c..888db20e 100644
--- a/include/Node.hpp
+++ b/include/Node.hpp
@@ -13,7 +13,7 @@
 class IntervalData {
     public:
         IntervalData(std::string chrom, char strand, std::string id, std::string name,
-                 std::string biotype, dtp::Interval interval);
+                 std::string biotype, dtp::Interval interval, IntervalData* split);
         ~IntervalData();
 
         // operator overloading
@@ -35,6 +35,8 @@ class IntervalData {
         void setInterval(dtp::Interval interval);
         dtp::SpliceJunctions getJunctions() const;
         void setJunctions(dtp::SpliceJunctions junctions);
+        IntervalData* getSplit() const;
+        void setSplit(IntervalData* split);
 
         // operations
         void addJunction(std::string name, std::pair<size_t,size_t> junction);
@@ -50,6 +52,7 @@ class IntervalData {
         std::string biotype;
         dtp::Interval interval;
         dtp::SpliceJunctions junctions;
+        IntervalData* split;
 };
 
 class Node {
@@ -60,7 +63,6 @@ class Node {
         std::vector<Node*> children;
         Node* next; // link to the next node
         Node* parent; // link to the parent node
-        std::vector<Node*> splits; // link to the corresponding split node
         bool isLeaf;
 
         // constructor
diff --git a/include/Utility.hpp b/include/Utility.hpp
index 28ce3095..968e3b76 100644
--- a/include/Utility.hpp
+++ b/include/Utility.hpp
@@ -41,12 +41,13 @@ namespace helper {
     // general operations
     bool withinRange(int a, int b, int range);
     std::string removeNonPrintable(const std::string str);
-
-
-
     std::string getTime(); // reports the current time
 }
 
+namespace stats {
+    double median(std::vector<double>& values);
+}
+
 // sequence input/output
 namespace seqIO {
     bool filterReads(auto& qual, int quality, int minlen);
diff --git a/src/Analysis.cpp b/src/Analysis.cpp
index ad7d377b..969b27dd 100644
--- a/src/Analysis.cpp
+++ b/src/Analysis.cpp
@@ -1,7 +1,7 @@
 #include "Analysis.hpp"
 
 //
-Analysis::Analysis(po::variables_map _params) : params(_params) {
+Analysis::Analysis(po::variables_map _params) : params(_params), repcount{0}, readcount{0} {
     int k = 3; // can be later extracted from config file
     // extract features (e.g., IBPTree)
     this->features = IBPTree(params, k); // create IBPT w/ annotations
@@ -11,423 +11,358 @@ Analysis::Analysis(po::variables_map _params) : params(_params) {
         fs::path clusterFile = outDir / fs::path("clustering") / fs::path("clusters.tab");
         this->features.iterateClusters(clusterFile.string());
     }
-    this->pdf = std::map<std::pair<std::string, std::string>, double>();
-
+    this->freq = std::map<std::pair<std::string, std::string>, double>();
+    this->suppreads = std::map<dtp::IntKey, std::vector<double>>();
+    this->complementarities = std::map<dtp::IntKey, std::vector<std::vector<double>>>();
+    this->hybenergies = std::map<dtp::IntKey, std::vector<std::vector<double>>>();
 }
 
-Analysis::~Analysis() {}
+Analysis::~Analysis() {
+}
 
-void Analysis::start(pt::ptree sample) {
-    this->pdf.clear(); // reset pdf
+void Analysis::start(pt::ptree sample, pt::ptree condition) {
+    this->freq.clear(); // reset freq
     // retrieve input and output files
     pt::ptree input = sample.get_child("input");
     std::string single = input.get<std::string>("single");
     std::string splits = input.get<std::string>("splits");
 
-    //iterate(single, splits);
-}
-
-void Analysis::process(std::string& single, std::string& splits) {
-
-}
-
-
+    pt::ptree output = sample.get_child("output");
+    std::string interactions = output.get<std::string>("interactions");
 
+    //
+    this->freq = std::map<std::pair<std::string, std::string>, double>();
+    this->condition = condition.get<std::string>("condition"); // store current condition
 
+    this->readcount = 0;
 
+    processSingle(single);
+    processSplits(splits, interactions);
 
-/*
-std::string line;
-std::ifstream anno;
+    this->repcount++; // increase replicate count
 
-anno.open(params["features"].as<std::string>());
-// parse annotations
-if(!anno.is_open()) {
-    perror("Error open");
-    exit(EXIT_FAILURE);
+    normalize();
 }
-while(getline(anno, line)) {
-    if(line[0] == '#') { continue; }
-
-    std::vector<std::string> tokens;
-    std::istringstream iss(line);
-    std::string token;
-    while(std::getline(iss, token, '\t')) {
-        tokens.push_back(token);
-    }
-
-    // boundaries
-    std::pair<int,int> bnds = std::make_pair(std::stoi(tokens[3]),std::stoi(tokens[4]));
-    std::pair<std::pair<int,int>,std::string> con = std::make_pair(bnds,"strand="+tokens[6]+";"+tokens[8]);
 
-    std::size_t start_position;
-    if(features.count(tokens[0]) == 0) { // check of RefName is in map
-        if(start_position != std::string::npos) {
-            features.insert(std::pair<std::string, std::vector<std::pair<std::pair<int,int>,std::string>>>(tokens[0],{con}));
+void Analysis::processSingle(std::string& single) {
+    seqan3::sam_file_input inputSingle{single}; // match reads
+    std::vector<size_t> ref_lengths{}; // header
+    for(auto &info : inputSingle.header().ref_id_info) {
+        ref_lengths.push_back(std::get<0>(info));
+    }
+    std::deque<std::string> refIds = inputSingle.header().ref_ids();
+    std::optional<int32_t> refOffset = std::nullopt;
+    std::string orient = ""; // e.g., sense (ss) or antisense (as)
+    char strand = ' ';
+    uint32_t start = 0, end = 0;
+    for(auto& rec : inputSingle) {
+        refOffset = rec.reference_position();
+        start = rec.reference_position().value();
+        end = rec.reference_position().value() + rec.sequence().size();
+        if(static_cast<bool>(rec.flag() & seqan3::sam_flag::on_reverse_strand)) { strand = '-'; } else { strand = '+'; }
+
+        std::vector<std::pair<Node*, IntervalData*>> ovlps = features.search(refIds[rec.reference_id().value()],
+                                                                             {start, end});
+        if(ovlps.size() > 0) {
+            for(auto& ovl : ovlps) {
+                if(strand == ovl.second->getStrand()) { orient = "sense"; } else { orient = "antisense"; }
+                addToFreqMap(std::make_pair(orient, ovl.second->getName()), 1.0/(double)ovlps.size()); // add to freq
+            }
         }
-    } else {
-        features[tokens[0]].push_back(con);
+        readcount++;
     }
 }
 
-// read file
-fs::path freqFile = fs::path(params["outdir"].as<std::string>()) / fs::path("frequency.txt");
-std::ifstream freqHandle;
-freqHandle.open(freqFile.string());
-
-// iterate and store into frquency map
-while(getline(freqHandle, line)) {
-    std::vector<std::string> tokens;
-    std::istringstream iss(line);
-    std::string token;
-    while(std::getline(iss, token, '\t')) {
-        tokens.push_back(token);
-    }
-    frequency.insert(std::pair<std::string,int>(tokens[0],std::stoi(tokens[1])));
-}*/
-
-
-
-
-
-/*
-//
-void Analysis::createCountTable() {
-    std::map<std::tuple<std::string,std::string,std::string,std::string>,std::vector<std::tuple<int,std::vector<float>,std::vector<float>>>> counts;
-
-    std::ifstream intsfile;
-    for(unsigned i=0;i<interPaths.size();++i) {
-        intsfile.open(interPaths[i]);
+void Analysis::processSplits(std::string& splits, std::string& output) {
+    seqan3::sam_file_input inputSplits{splits}; // match reads
+    std::ofstream outInts(output, std::ios::out); // output interactions
+    writeInteractionsHeader(outInts); // write header in output file
 
-        std::cout << interPaths[i] << std::endl;
-
-        if(!intsfile.is_open()) {
-            perror("Error open");
-            exit(EXIT_FAILURE);
-        }
-
-        std::string line;
-        while(getline(intsfile, line)){  //read data from file object and put it into string.
-            if(line[0] == '#') { continue; }
-
-            std::vector<std::string> tokens;
-            std::istringstream iss(line);
-            std::string token;
-            while(std::getline(iss, token, '\t')) {
-                tokens.push_back(token);
-            }
-
-            std::tuple<std::string,std::string,std::string,std::string> key1;
-            std::tuple<std::string,std::string,std::string,std::string> key2;
-            key1 = std::make_tuple(tokens[5],tokens[8],tokens[13],tokens[16]);
-            key2 = std::make_tuple(tokens[5],tokens[8],tokens[13],tokens[16]);
+    std::vector<size_t> ref_lengths{}; // header
+    for(auto &info : inputSplits.header().ref_id_info) {
+        ref_lengths.push_back(std::get<0>(info));
+    }
+    std::deque<std::string> refIds = inputSplits.header().ref_ids();
 
-            //
-            if(counts.count(key1) == 0 && counts.count(key2) == 0) {
-                std::vector<std::tuple<int,std::vector<float>,std::vector<float>>> content;
+    // variables
+    std::string outLine = ""; // buffers the output file for interaction entry
+    std::bitset<1> skip = 0; // skip if no match
+    char strand = ' '; // strand of segment
+    std::string orient = ""; // orientation of segment (ss or as)
+    uint32_t start = 0, end = 0; // start and end of segment
+    std::string refName = ""; // reference name
 
-                for(unsigned j=0; j<interPaths.size();++j) {
-                    std::tuple<int,std::vector<float>,std::vector<float>> vals{0,{},{}};
-                    content.push_back(vals);
+    // information from the tags
+    seqan3::sam_tag_dictionary tags;
+    std::pair<float, float> filters; // complementarity and hybridization energy
+    std::vector<std::string> aligns; // alignment of complementarity
+
+    // segment information (e.g., gene name, product, etc.) - keys are the as or ss
+    std::map<std::string, std::vector<IntervalData*>> segmentInfo = {};
+
+    for(auto&& rec : inputSplits | seqan3::views::chunk(2)) {
+        outLine = (*rec.begin()).id(); // QNAME
+        // reset
+        skip = 0; // reset skip
+        segmentInfo.clear(); // reset segment info
+        filters = std::make_pair(0.0, 0.0); // reset filters
+        aligns.clear();
+
+        // retrieve complementarity and energy
+        tags = (*rec.begin()).tags();
+        filters = std::make_pair(std::get<float>(tags["XC"_tag]), std::get<float>(tags["XE"_tag]));
+
+        dtp::IntKey intKey = {}; // key to store the interaction (partner1, orientation1, partner2, orientation2)
+
+        for (auto &split: rec) {
+            if (static_cast<bool>(split.flag() & seqan3::sam_flag::on_reverse_strand)) {
+                strand = '-';
+            } else { strand = '+'; }
+            outLine += "\t" + std::string(1, strand); // strand
+            start = split.reference_position().value();
+            end = split.reference_position().value() + split.sequence().size();
+            outLine += "\t" + std::to_string(start) + "\t" + std::to_string(end); // start & end
+            refName = refIds[split.reference_id().value()];
+            outLine += "\t" + refName; // reference name
+
+            tags = split.tags(); // retrieve the alignment information
+            aligns.push_back(std::get<std::string>(tags["XA"_tag]));
+
+            // match with features
+            std::vector<std::pair<Node *, IntervalData *>> ovlps = features.search(refIds[split.reference_id().value()],
+                                                                                   {start, end});
+            segmentInfo = {}; // reset segmentInfo
+            if(ovlps.size() > 0) {
+                segmentInfo.insert({"sense", {}});
+                segmentInfo.insert({"antisense", {}});
+                for (auto &ovl: ovlps) {
+                    if (strand == ovl.second->getStrand()) { orient = "sense"; } else { orient = "antisense"; }
+                    segmentInfo[orient].push_back(ovl.second); // add to segmentInfo
+                    addToFreqMap(std::make_pair(orient, ovl.second->getName()), 1.0/(double)ovlps.size()); // add to freq
+
+
+                    /*
+                    std::string info = ovl.second->getName() + "\t" + ovl.second->getBiotype() + "\t" +
+                                       ovl.second->getStrand() + "\t" + "." + "\t" + orient;
+                    segmentInfo[orient].push_back(info);*/
                 }
-                std::get<0>(content[i]) = 1;
-                std::vector<float> en = std::get<1>(content[i]);
-                std::vector<float> cm = std::get<2>(content[i]);
-
-                en.push_back(std::stof(tokens[17]));
-                cm.push_back(std::stof(tokens[18]));
-
-                std::get<1>(content[i]) = en;
-                std::get<2>(content[i]) = cm;
-
-                counts.insert(std::pair<std::tuple<std::string,std::string,std::string,std::string>,std::vector<std::tuple<int,std::vector<float>,std::vector<float>>>>(key1,content));
-
-            } else {
-                if(counts.count(key1) > 0) {
-                    std::tuple<int,std::vector<float>,std::vector<float>> oldVal;
-                    oldVal = counts[key1][i];
-                    std::get<0>(oldVal) = std::get<0>(oldVal) + 1;
-
-                    std::vector<float> oldValEnergy = std::get<1>(oldVal);
-                    std::vector<float> oldValCmpl = std::get<2>(oldVal);
-
-                    oldValEnergy.push_back(std::stof(tokens[17]));
-                    oldValCmpl.push_back(std::stof(tokens[18]));
-
-                    std::get<1>(oldVal) = oldValEnergy;
-                    std::get<2>(oldVal) = oldValCmpl;
-
-                    counts[key1][i] = oldVal;
+                // check which one to select
+                if (segmentInfo["sense"].size() == 1) {
+                    IntervalData* data = segmentInfo["sense"][0];
+                    outLine += "\t" + data->getName() + "\t" + data->getBiotype() + "\t" + data->getStrand();
+                    outLine += "\t.\tsense";
+                    intKey.push_back(dtp::IntPartner(data->getName(),"sense"));
                 } else {
-                    if(counts.count(key2) > 0) {
-                        std::tuple<int,std::vector<float>,std::vector<float>> oldVal;
-                        oldVal = counts[key2][i];
-                        std::get<0>(oldVal) = std::get<0>(oldVal) + 1;
-
-                        std::vector<float> oldValEnergy = std::get<1>(oldVal);
-                        std::vector<float> oldValCmpl = std::get<2>(oldVal);
-
-                        oldValEnergy.push_back(std::stof(tokens[17]));
-                        oldValCmpl.push_back(std::stof(tokens[18]));
-
-                        std::get<1>(oldVal) = oldValEnergy;
-                        std::get<2>(oldVal) = oldValCmpl;
-
-                        counts[key2][i] = oldVal;
+                    if (segmentInfo["antisense"].size() == 1) {
+                        IntervalData* data = segmentInfo["antisense"][0];
+                        outLine += "\t" + data->getName() + "\t" + data->getBiotype() + "\t" + data->getStrand();
+                        outLine += "\t.\tsense";
+                        intKey.push_back(dtp::IntPartner(data->getName(),"antisense"));
+                    } else {
+                        skip = 1; // skip if not unique
                     }
                 }
-            }
+            } else { skip = 1; } // skip if no match
         }
+        outLine += "\t" + std::to_string(filters.first); // complementarity
+        outLine += "\t" + aligns[0] + "\t" + aligns[1]; // alignment
+        outLine += "\t" + std::to_string(filters.second) + "\n"; // energy
+        if (skip == 0) {
+            outInts << outLine;
+            addToSuppReads(intKey[0], intKey[1]);
+            addToCompl(intKey[0], intKey[1], filters.first);
+            addToHybEnergies(intKey[0], intKey[1], filters.second);
+        } // write to file (if not skipped)
+        readcount++;
     }
+    outInts.close();
+}
 
+void Analysis::writeInteractionsHeader(std::ofstream& fout) {
+    fout << "qname\tfst_seg_strd\tfst_seg_strt\tfst_seg_end\tfst_seg_ref\tfst_seg_name\tfirst_seg_bt\t";
+    fout << "fst_seg_anno_strd\tfst_seg_prod\tfst_seg_ori\tsec_seg_strd\tsec_seg_strt\tsec_seg_end\t";
+    fout << "sec_seg_ref\tsec_seg_name\tsec_seg_bt\tsec_seg_anno_strd\tsec_seg_prod\tsec_seg_ori\t";
+    fout << "cmpl\tfst_seg_compl_aln\tsec_seg_cmpl_aln\tmfe\n";
+}
 
-    // write back to file
-    std::string outfile = params["outdir"].as<std::string>();
-    fs::path outPath = fs::path(outfile) / "allints.txt";
-
-    std::ofstream outFileHandle;
-    outFileHandle.open(outPath.string());
+void Analysis::writeAllIntsHeader(std::ofstream& fout) { fout << "fst_rna\tsec_rna\tfst_rna_ori\tsec_rna_ori";}
+void Analysis::addToAllIntsHeader(std::ofstream &fout, std::string key) { fout << "\t" << key; }
 
-    outFileHandle << "RNA1\tRNA2\tRNA1orientation\tRNA2orientation\t";
+void Analysis::normalize() {
+    // sum up all frequencies
+    double sum = 0.0;
+    for(auto& f : this->freq) {
+        sum += f.second;
+    }
 
-    for(unsigned i=0;i<interPaths.size();i++) {
-        outFileHandle << fs::path(interPaths[i]).stem().string() << "_counts\t";
-        outFileHandle << fs::path(interPaths[i]).stem().string() << "_ges\t";
-        outFileHandle << fs::path(interPaths[i]).stem().string() << "_gcs\t";
+    // normalize
+    for(auto& f : this->freq) {
+        f.second = f.second/sum;
     }
-    outFileHandle << "\n";
 
-    for(auto it=counts.begin(); it!=counts.end(); ++it) {
-        outFileHandle << std::get<0>(it->first) << "\t";
-        outFileHandle << std::get<2>(it->first) << "\t";
-        outFileHandle << std::get<1>(it->first) << "\t";
-        outFileHandle << std::get<3>(it->first) << "\t";
+    // print
+    /*
+    for(auto& f : freq) {
+        std::cout << f.first.first << " " << f.first.second << " " << f.second << std::endl;
+    }*/
+}
 
-        //
-        for(unsigned z=0;z<it->second.size();++z) {
-            outFileHandle << std::get<0>(it->second[z]) << "\t";
+void Analysis::addToFreqMap(std::pair<std::string, std::string> key, double value) {
+if(freq.count(key) == 0) {
+        freq.insert(std::make_pair(key, value));
+    } else {
+        freq[key] += value;
+    }
+}
 
-            std::vector<float> vecNrg = std::get<1>(it->second[z]);
-            std::vector<float> vecCpl = std::get<2>(it->second[z]);
+bool operator<(const dtp::IntKey& lhs, const dtp::IntKey& rhs) {
+    return std::lexicographical_compare(lhs.begin(), lhs.end(), rhs.begin(), rhs.end());
+}
 
-            // determine ges
-            float ges = 0.0;
-            if(vecNrg.size() == 1) {
-                ges = vecNrg[0];
+void Analysis::addToSuppReads(dtp::IntPartner p1, dtp::IntPartner p2) {
+    dtp::IntKey key;
+    if(p1 < p2) { key = {p1, p2}; } else { key = {p2, p1}; }
+    // check if key in map
+    if(suppreads.find(key) == suppreads.end()) {
+        std::vector<double> suppReadsRepl = {}; // new list for all replicates (up to the current one)
+        for(int i=0;i<repcount;++i) { suppReadsRepl.push_back(0.0); }
+        suppReadsRepl.push_back(1.0); // add 1.0 for the last (current) replicate
+        suppreads.insert({key, suppReadsRepl});
+    } else { // key found in map
+        std::vector<double>& suppReadsRepl = this->suppreads[key];
+        if(suppReadsRepl.size() < repcount) { // not yet at the current replicate (probably skipped a few)
+            for(int i=suppReadsRepl.size(); i<repcount; ++i) {
+                suppReadsRepl.push_back(0.0);
+            }
+            suppReadsRepl.push_back(1.0);
+        } else {
+            if(suppReadsRepl.size() > repcount) {
+                suppReadsRepl[repcount] += 1.0;
             } else {
-                if(vecNrg.size() > 1) {
-                    auto m = vecNrg.begin() + vecNrg.size()/2;
-                    std::nth_element(vecNrg.begin(), m, vecNrg.end());
-                    double min = *min_element(vecNrg.begin(), vecNrg.end());
-                    ges = vecNrg[vecNrg.size()/2];
-                }
+                suppReadsRepl.push_back(1.0);
             }
-            outFileHandle << ges << "\t";
+        }
+    }
+}
 
-            // determine gcs
-            float gcs = 0.0;
-            if(vecCpl.size() == 1) {
-                gcs = vecCpl[0];
+void Analysis::addToCompl(dtp::IntPartner p1, dtp::IntPartner p2, double complementarity) {
+    dtp::IntKey key;
+    if(p1 < p2) { key = {p1, p2}; } else { key = {p2, p1}; }
+    // check if key in map
+    if(complementarities.find(key) == complementarities.end()) {
+        std::vector<std::vector<double>> CmplsRepl = {}; // new list for all replicates (up to the current one)
+        for(int i=0;i<repcount;++i) { CmplsRepl.push_back({}); }
+        CmplsRepl.push_back({complementarity}); // add 1.0 for the last (current) replicate
+        complementarities.insert({key, CmplsRepl});
+    } else {
+        std::vector<std::vector<double>>& cmplsRepl = this->complementarities[key];
+        if(cmplsRepl.size() < repcount) { // not yet at the current replicate (probably skipped a few)
+            for(int i=cmplsRepl.size(); i<repcount; ++i) { cmplsRepl.push_back({}); }
+            cmplsRepl.push_back({complementarity});
+        } else {
+            if(cmplsRepl.size() > repcount) {
+                cmplsRepl[repcount].push_back(complementarity);
             } else {
-                if(vecCpl.size() > 1) {
-                    auto m = vecCpl.begin() + vecCpl.size()/2;
-                    std::nth_element(vecCpl.begin(), m, vecCpl.end());
-                    double min = *min_element(vecCpl.begin(), vecCpl.end());
-                    gcs = vecCpl[vecCpl.size()/2];
-                }
+                cmplsRepl.push_back({complementarity});
             }
-            outFileHandle << gcs << "\t";
         }
-        outFileHandle << "\n";
     }
-    outFileHandle.close();
 }
 
-
-
-//
-std::string Analysis::retrieveTagValue(std::string tags, std::string tagName, std::string oldValue) {
-    std::size_t start_position = tags.find(tagName+"=");
-    // gene name
-    if(start_position != std::string::npos) {
-        std::string sub = tags.substr(start_position+tagName.size()+1,tags.length());
-        std::size_t end_position = sub.find(";");
-        oldValue = sub.substr(0,end_position);
+void Analysis::addToHybEnergies(dtp::IntPartner p1, dtp::IntPartner p2, double hybenergy) {
+    dtp::IntKey key;
+    if(p1 < p2) { key = {p1, p2}; } else { key = {p2, p1}; }
+    // check if key in map
+    if(hybenergies.find(key) == hybenergies.end()) {
+        std::vector<std::vector<double>> hybRepl = {}; // new list for all replicates (up to the current one)
+        for(int i=0;i<repcount;++i) { hybRepl.push_back({}); }
+        hybRepl.push_back({hybenergy}); // add 1.0 for the last (current) replicate
+        hybenergies.insert({key, hybRepl});
+    } else {
+        std::vector<std::vector<double>>& hybRepl = this->hybenergies[key];
+        if(hybRepl.size() < repcount) { // not yet at the current replicate (probably skipped a few)
+            for(int i=hybRepl.size(); i<repcount; ++i) { hybRepl.push_back({}); }
+            hybRepl.push_back({hybenergy});
+        } else {
+            if(hybRepl.size() > repcount) {
+                hybRepl[repcount].push_back(hybenergy);
+            } else {
+                hybRepl.push_back({hybenergy});
+            }
+        }
     }
-    return oldValue;
 }
 
+double Analysis::calcGCS(std::vector<double>& complementarities) {
+    if(complementarities.size() == 0) { return 0.0; }
+    double median = stats::median(complementarities);
+    double maximum = *std::max_element(complementarities.begin(), complementarities.end());
+    return median * maximum;
+}
 
-float Analysis::calc_pvalue(int x, int n, float p) {
-    // simulate draw from binomial distribution
-    std::binomial_distribution<int> binomial(n, p);
-    // assign p value
-    float pval = 1.0 - cdf(binomial, x);
-    return pval;
-}*/
-
-
-    /*
-    // retrieve input and output files
-    pt::ptree input = sample.get_child("input");
-    std::string splits = input.get<std::string>("splits");
-    pt::ptree output = sample.get_child("output");
-    std::string interactions = output.get<std::string>("interactions");
-
-
-    interPaths.push_back(interactions);
-
-    // input .sam record
-    seqan3::alignment_file_input fin{splits,
-                                     seqan3::fields<seqan3::field::id,
-                                     seqan3::field::flag,
-                                     seqan3::field::ref_id,
-                                     seqan3::field::ref_offset,
-                                     seqan3::field::seq,
-                                     seqan3::field::tags>{}};
-
-    std::vector<seqan3::sam_flag> flags;
-    std::vector<std::string> refIDs;
-    std::vector<std::optional<int32_t>> refOffsets;
-
-    std::vector<size_t> ref_lengths{};
-    for(auto &info : fin.header().ref_id_info) {
-        ref_lengths.push_back(std::get<0>(info));
+double Analysis::calcGHS(std::vector<double>& hybenergies) {
+    if(hybenergies.size() == 0) { return 1000.0; }
+    double median = stats::median(hybenergies);
+    double minimum = *std::min_element(hybenergies.begin(), hybenergies.end());
+    if(median < 0 && minimum < 0) {
+        return -sqrt(median * minimum);
+    } else {
+        if(median > 0 && minimum > 0) {
+            return sqrt(median * minimum);
+        } else {
+            return sqrt(abs(median * minimum));
+        }
     }
-    std::deque<std::string> ref_ids = fin.header().ref_ids();
-
-//    uint32_t flag, start, end;
-
-    // open file
-    std::ofstream outInts;
-    outInts.open(interactions);
-
-    std::string entry; // stores output to write to file
-
-    // variables for interactions file
-    std::string qNAME, flag;
-    uint32_t start, end;
-    std::string geneID, geneName, product, annoStrand;
-    float hybnrg, cmpl;
+}
 
-    seqan3::sam_tag_dictionary tags;
+double Analysis::calcStat(dtp::IntKey key, int x) {
+    std::pair<std::string, std::string> p1 = {key[0].orientation, key[0].partner};
+    std::pair<std::string, std::string> p2 = {key[1].orientation, key[1].partner};
+    double p;
+    if(freq.find(p1) == freq.end() || freq.find(p2) == freq.end()) {
+        p = 0.0;
+    } else {
+        if(p1 == p2) {
+            p = freq[p1]*freq[p2];
+        } else {
+            p = 2*freq[p1]*freq[p2];
+        }
+    }
+    ma::binomial_distribution<double> binomial(this->readcount, p);
+    double pval = 1.0 - ma::cdf(binomial, x);
+    return pval;
+}
 
-    outInts << "#QNAME\tSegment1Strand\tSegment1Start\tSegment1End\tSegment1RefName"
-    outInts << "\tSegment1Name\tSegment1AnnoStrand\tSegment1Product\tSegment1Orientation"
-    outInts << "\tSegment2Strand\tSegment2Start\tSegment2End\tSegment2RefName\tSegment2Name"
-    outInts << "\tSegment2AnnoStrand\tSegment2Product\tSegment1Orientation\tenergy\tcomplementarity\n";
-
-    int segCnt = 0;
-    int segCntMatch = 0;
-    int segFound = 0;
-
-    for(auto && rec : fin | seqan3::views::chunk(2)) {
-        entry = "";
-        hybnrg = 0.0;
-        cmpl = 0.0;
-        for(auto & split : rec) {
-            //           seqan3::debug_stream << split << std::endl;
-
-            qNAME = seqan3::get<seqan3::field::id>(split);
-            flag = "+";
-            if(static_cast<bool>(seqan3::get<seqan3::field::flag>(split)
-                                 & seqan3::sam_flag::on_reverse_strand)) {
-                flag = "-";
+void Analysis::writeAllInts() {
+    std::string outDirPath = params["outdir"].as<std::string>();
+    fs::path outDirDetectPath = fs::path(outDirPath) / fs::path("analysis");
+    fs::path allIntsPath = fs::path(outDirDetectPath) / fs::path("allints.txt");
+    std::ofstream fout(allIntsPath.string(), std::ios::out);
+
+    if(fout.is_open()) {
+        for(auto& entry : suppreads) {
+            fout << entry.first[0].partner << "\t" << entry.first[1].partner;
+            fout << "\t" << entry.first[0].orientation << "\t" << entry.first[1].orientation;
+            for(int i=0;i<entry.second.size();++i) {
+                fout << "\t" << entry.second[i]; // counts
+                double gcs = calcGCS(complementarities[entry.first][i]); // complementarity score (GCS)
+                if(gcs != 0.0) { fout << "\t" << gcs; } else { fout << "\t."; }
+                double ghs = calcGHS(hybenergies[entry.first][i]); // Hybridization score (GHS)
+                if(ghs != 1000.0) { fout << "\t" << ghs; } else { fout << "\t."; }
+                // statistical
+                // call with key(RNA1/2 and orient) and counts
+                double stat = calcStat(entry.first, entry.second[i]);
+                fout << "\t" << stat;
             }
-
-            // start & end
-            start = seqan3::get<seqan3::field::ref_offset>(split).value();
-            end = start + seqan3::get<seqan3::field::seq>(split).size()-1;
-
-            // refID
-            std::optional<int32_t> refIDidx = seqan3::get<seqan3::field::ref_id>(split);
-            std::string refID = ref_ids[refIDidx.value()];
-
-            tags = seqan3::get<seqan3::field::tags>(split);
-            auto nrg = tags["XE"_tag];
-            auto cpl = tags["XC"_tag];
-            hybnrg = std::get<float>(nrg);
-            cmpl = std::get<float>(cpl);
-
-            std::vector<std::pair<std::pair<int,int>,std::string>> feat;
-            if(features.count(refID) > 0) {
-                feat = features[refID];
-
-                std::pair<std::string, std::string> geneNames;
-                geneID = ".";
-                geneName = ".";
-                product = ".";
-                annoStrand = ".";
-                for(unsigned i=0;i<feat.size();++i) {
-                    if((start >= feat[i].first.first && start <= feat[i].first.second) ||
-                       (end >= feat[i].first.first && end <= feat[i].first.second)) {
-
-                        // make sure that overlap is in exon
-                        if(params["splicing"].as<std::bitset<1>() && feat[i].second.find("exon") == std::string::npos) {
-                            continue;
-                        }
-                        geneID = retrieveTagValue(feat[i].second, "ID", geneName);
-                        geneName = retrieveTagValue(feat[i].second, "gene", geneName);
-                        product = retrieveTagValue(feat[i].second, "product", product);
-                        annoStrand = retrieveTagValue(feat[i].second, "strand", annoStrand);
-
-                        segFound = 1; // found a match (in annotation) for segment
-                    }
-                }
-
-                // search for geneNames in frequency map
-                geneNames = std::make_pair(retrieveTagValue(feat[0].second, "gene", geneName), retrieveTagValue(feat[1].second, "gene", geneName));
-
-                float value;
-                // check if geneNames first in frequency map
-                if(frequency.count(geneNames.first) > 0 && frequency.count(geneNames.second) > 0) {
-                    if (geneNames.first == geneNames.second) {
-                        value = frequency[geneNames.first] * frequency[geneNames.second];
-                    } else {
-                        value = 2 * (frequency[geneNames.first] * frequency[geneNames.second]);
-                    }
-                } else{
-                    value = 0;
-                }
-
-                // calculate p-value
-                int draws = frequency[geneNames.first] + frequency[geneNames.second];
-                float pval = calc_pvalue(segCntMatch, draws, value);
-
-
-                if(segFound == 1) {
-                    if(segCnt == 0) { entry += qNAME + "\t";}
-                    entry += flag + "\t";
-                    entry += std::to_string(start) + "\t";
-                    entry += std::to_string(end) + "\t";
-                    entry += refID + "\t";
-                    entry += geneName + "\t";
-                    entry += annoStrand + "\t";
-                    entry += "\"" + product  + "\"\t";
-
-                    if(flag == annoStrand) {
-                        entry += "sense\t";
-                    } else {
-                        entry += "antisense\t";
-                    }
-
-                    entry += pval + "\t";
-
-                    segCntMatch++;
-                    segFound = 0;
+            if(entry.second.size() < repcount) {
+                for(int i=entry.second.size();i<repcount;++i) {
+                    fout << "\t0\t.\t.\t.";
                 }
             }
-            segCnt++; // on to the next segment
-        }
-
-        if(segCntMatch == 2) {
-            outInts << entry << hybnrg << "\t" << cmpl << "\n";
+            fout << "\n";
         }
-
-        segCnt = 0; // reset segment
-        segCntMatch = 0;
+        fout.close();
+        std::cout << helper::getTime() << "Interactions written to file: " << allIntsPath.string() << "\n";
     }
-
-    outInts.close();*/
-
+}
 
diff --git a/src/Clustering.cpp b/src/Clustering.cpp
index 084e9ead..320dcaf9 100644
--- a/src/Clustering.cpp
+++ b/src/Clustering.cpp
@@ -18,7 +18,7 @@ void Clustering::iterate(std::string splits) {
         for(auto& split : rec) { // push
             //seqan3::debug_stream << split << std::endl;
             std::optional<int32_t> refId = split.reference_id();
-            uint32_t flag{0}; // SAMFLAG
+            seqan3::sam_flag flag = split.flag(); // SAMFLAG
             uint32_t start = split.reference_position().value();
             uint32_t end = start + split.sequence().size()-1;
             Segment seg(ref_ids[refId.value()], flag, start, end);
@@ -77,7 +77,7 @@ void Clustering::sumup() {
         if(outputFile.is_open()) {
             outputFile << "cluster" << clustID++ << "\t";
             outputFile << result[i].elements[0].refid << "\t";
-            if(result[i].elements[0].flag == 0) {
+            if(!static_cast<bool>(result[i].elements[0].flag & seqan3::sam_flag::on_reverse_strand)) {
                 outputFile << "+" << "\t";
             } else {
                 outputFile << "-" << "\t";
@@ -86,7 +86,7 @@ void Clustering::sumup() {
             outputFile << result[i].elements[0].end << "\t";
 
             outputFile << result[i].elements[1].refid << "\t";
-            if(result[i].elements[1].flag == 0) {
+            if(!static_cast<bool>(result[i].elements[0].flag & seqan3::sam_flag::on_reverse_strand)) {
                 outputFile << "+" << "\t";
             } else {
                 outputFile << "-" << "\t";
diff --git a/src/Data.cpp b/src/Data.cpp
index cbfb701f..d882847f 100644
--- a/src/Data.cpp
+++ b/src/Data.cpp
@@ -409,6 +409,7 @@ void Data::clustering() {
 void Data::analysis() {
     std::cout << helper::getTime() << "Start the Analysis\n";
     Analysis ana(params);
-    callInAndOut(std::bind(&Analysis::start, ana, std::placeholders::_1));
+    callInAndOut(std::bind(&Analysis::start, &ana, std::placeholders::_1, std::placeholders::_2));
+    ana.writeAllInts();
 }
 
diff --git a/src/IBPTree.cpp b/src/IBPTree.cpp
index 3e9a0aa4..f551ee82 100644
--- a/src/IBPTree.cpp
+++ b/src/IBPTree.cpp
@@ -19,10 +19,7 @@ IBPTree::~IBPTree() {}
 // constructs the IBPTree (either from annotations/clusters or both)
 void IBPTree::construct() {
     std::string subcall = params["subcall"].as<std::string>();
-    // fill tree with annotation
-    if(subcall == "detect") { // in the case of detect only annotation is needed
-        iterateFeatures(params["features"].as<std::string>());
-    }
+    iterateFeatures(params["features"].as<std::string>());
 }
 
 void IBPTree::iterateFeatures(std::string featuresFile) {
@@ -77,7 +74,7 @@ void IBPTree::iterateFeatures(std::string featuresFile) {
                 intvl = nullptr;
             }
             intvl = new IntervalData(fields.seqid, fields.strand, id, name, biotype,
-                                     std::make_pair(fields.start, fields.end));
+                                     std::make_pair(fields.start, fields.end), nullptr);
         } else {
             // only if the feature is an exon (needed for detect step)
             if(params["subcall"].as<std::string>() == "detect") {
@@ -102,12 +99,16 @@ void IBPTree::iterateFeatures(std::string featuresFile) {
 }
 
 void IBPTree::iterateClusters(std::string clusterFile) {
-    // iterate over clusters
     std::ifstream clusters(clusterFile);
     if(!clusters) {
         std::cout << helper::getTime() << " Cluster file " << clusterFile << " could not be opened!\n";
         EXIT_FAILURE;
     }
+
+    IntervalData* firstSegment = nullptr;
+    IntervalData* secondSegment = nullptr;
+
+    // variables to buffer the current cluster
     std::string name = "";
     std::pair<std::string, std::string> chroms = std::make_pair("", "");
     std::pair<int, int> start = std::make_pair(0,0);
@@ -115,6 +116,10 @@ void IBPTree::iterateClusters(std::string clusterFile) {
     std::pair<char, char> strand = std::make_pair(' ', ' ');
     std::pair<int, int> counts = std::make_pair(0,0);
     std::pair<int, int> size = std::make_pair(0,0);
+    int nosplits = 0;
+
+    IntervalData* intvl1 = nullptr;
+    IntervalData* intvl2 = nullptr;
 
     std::string line;
     if(getline(clusters, line)) {} // ignore the first line (header)
@@ -130,13 +135,62 @@ void IBPTree::iterateClusters(std::string clusterFile) {
         start = std::make_pair(std::stoi(tokens[3]), std::stoi(tokens[7]));
         end = std::make_pair(std::stoi(tokens[4]), std::stoi(tokens[8]));
         strand = std::make_pair(tokens[2][0], tokens[6][0]);
+        nosplits = std::stoi(tokens[9]);
+
+        if(nosplits < 2) { continue; } // ignore clusters with less than 2 splits reads
+        std::vector<std::pair<Node*, IntervalData*>> fstOvlps = search(chroms.first,
+                                                                       {start.first, end.first});
+
+        if(fstOvlps.size() > 0) {
+            for(auto& res : fstOvlps) {
+                dtp::Interval tmpIntvl = res.second->getInterval();
+                if(start.first < tmpIntvl.first && strand.first == res.second->getStrand()) {
+                    res.second->setId(name + "/" + res.second->getId());
+                    res.second->setName(name + "/" + res.second->getName());
+                    res.second->setInterval({start.first, res.second->getInterval().second});
+                    update(res.first->parent);
+                }
+                if(end.first > tmpIntvl.second && strand.first == res.second->getStrand()) {
+                    res.second->setId(res.second->getId() + "/" + name);
+                    res.second->setName(res.second->getName() + "/" + name);
+                    res.second->setInterval({res.second->getInterval().first, end.first});
+                    update(res.first->parent);
+                }
+                intvl1 = res.second;
+            }
+        } else {
+            intvl1 = new IntervalData(chroms.second, strand.second, name, name, "cluster",
+                                                   std::make_pair(start.second, end.second), nullptr);
+            insert(*intvl1);
+        }
+
+        std::vector<std::pair<Node*, IntervalData*>> secOvlps = search(chroms.second,
+                                                                       {start.second, end.second});
+        if(secOvlps.size() > 0) {
+            for(auto& res : secOvlps) {
+                dtp::Interval tmpIntvl = res.second->getInterval();
+                if(start.second < tmpIntvl.first && strand.second == res.second->getStrand()) {
+                    res.second->setId(name + "/" + res.second->getId());
+                    res.second->setName(name + "/" + res.second->getName());
+                    res.second->setInterval({start.second, res.second->getInterval().second});
+                    update(res.first->parent);
+                }
+                if(end.second > tmpIntvl.second && strand.second == res.second->getStrand()) {
+                    res.second->setId(res.second->getId() + "/" + name);
+                    res.second->setName(res.second->getName() + "/" + name);
+                    res.second->setInterval({res.second->getInterval().first, end.second});
+                    update(res.first->parent);
+                }
+                intvl2 = res.second;
+            }
+        } else {
+            intvl2 = new IntervalData(chroms.second, strand.second, name, name, "cluster",
+                                      std::make_pair(start.second, end.second), nullptr);
+            insert(*intvl2);
+        }
+        intvl1->setSplit(intvl2);
+        intvl2->setSplit(intvl1);
 
-        IntervalData* intvl1 = new IntervalData(chroms.first, strand.first, name, "", "",
-                                                std::make_pair(start.first, end.first));
-        IntervalData* intvl2 = new IntervalData(chroms.second, strand.second, name, "", "",
-                                                std::make_pair(start.second, end.second));
-        insert(*intvl1);
-        insert(*intvl2);
     }
 }
 
@@ -204,9 +258,9 @@ void IBPTree::splitNode(Node* parent, int index) {
     }
 }
 
-std::vector<IntervalData*> IBPTree::search(std::string chrom, dtp::Interval interval) {
+std::vector<std::pair<Node*, IntervalData*>> IBPTree::search(std::string chrom, dtp::Interval interval) {
     Node* root = this->rootnodes[chrom]; // search for the root node
-    std::vector<IntervalData*> result;
+    std::vector<std::pair<Node*, IntervalData*>> result;
     searchIter(root, interval, result);
 
     std::sort(result.begin(), result.end());
@@ -215,11 +269,12 @@ std::vector<IntervalData*> IBPTree::search(std::string chrom, dtp::Interval inte
     return result;
 }
 
-void IBPTree::searchIter(Node* node, const dtp::Interval& interval, std::vector<IntervalData*>& results) {
+void IBPTree::searchIter(Node* node, const dtp::Interval& interval,
+                         std::vector<std::pair<Node*, IntervalData*>>& results) {
     if(node->isLeaf) {
         for(int i=0; i<node->keys.size();++i) {
             if(isOverlapping(interval, node->keys[i].first)) {
-                results.push_back(node->keys[i].second);
+                results.push_back(std::make_pair(node, node->keys[i].second));
             }
         }
 
@@ -241,6 +296,28 @@ void IBPTree::searchIter(Node* node, const dtp::Interval& interval, std::vector<
     }
 }
 
+void IBPTree::update(Node* node) {
+    // update parent of node (until root is reached)
+    if(node == nullptr) {
+        return;
+    } else {
+        for(int i=0; i<node->keys.size(); ++i) {
+            dtp::Interval intvl = node->keys[i].first;
+            std::cout << node->children[i]->keys.size() << "\n";
+            for(int j=0; j<node->children[i]->keys.size(); ++j) {
+                if(node->children[i]->keys[j].first < intvl) {
+                    intvl.first = node->children[i]->keys[j].first.first;
+                }
+                if(node->children[i]->keys[j].first.second > intvl.second) {
+                    intvl.second = node->children[i]->keys[j].first.second;
+                }
+            }
+            node->keys[i].first = intvl;
+        }
+        update(node->parent);
+    }
+}
+
 bool IBPTree::isOverlapping(dtp::Interval intvl1, dtp::Interval intvl2) {
     dtp::Interval intvl = {std::max(intvl1.first, intvl2.first),
                            std::min(intvl1.second, intvl2.second)};
diff --git a/src/Node.cpp b/src/Node.cpp
index 1ab6e95f..247e6694 100644
--- a/src/Node.cpp
+++ b/src/Node.cpp
@@ -1,9 +1,8 @@
 #include "Node.hpp"
 
 IntervalData::IntervalData(std::string chrom, char strand, std::string id, std::string name,
-                   std::string biotype, dtp::Interval interval) :
-    chrom(chrom), strand(strand), id(id), name(name), biotype(biotype), interval(interval) {
-}
+                   std::string biotype, dtp::Interval interval, IntervalData* split) :
+    chrom(chrom), strand(strand), id(id), name(name), biotype(biotype), interval(interval), split{split} {}
 IntervalData::~IntervalData() {}
 bool IntervalData::operator>(const dtp::Interval& other) const { return this->interval.first > other.first; }
 bool IntervalData::operator<(const dtp::Interval& other) const { return this->interval.first < other.first; }
@@ -13,6 +12,8 @@ std::string IntervalData::getChrom() const { return chrom; }
 void IntervalData::setChrom(std::string chrom) { this->chrom = chrom; }
 char IntervalData::getStrand() const { return strand; }
 void IntervalData::setStrand(char strand) { this->strand = strand; }
+std::string IntervalData::getId() const { return id; }
+void IntervalData::setId(std::string id) { this->id = id; }
 std::string IntervalData::getName() const { return name; }
 void IntervalData::setName(std::string name) { this->name = name; }
 std::string IntervalData::getBiotype() const { return biotype; }
@@ -21,6 +22,8 @@ dtp::Interval IntervalData::getInterval() { return interval; }
 void IntervalData::setInterval(dtp::Interval interval) { this->interval = interval; }
 dtp::SpliceJunctions IntervalData::getJunctions() const { return junctions; }
 void IntervalData::setJunctions(dtp::SpliceJunctions junctions) { this->junctions = junctions; }
+IntervalData* IntervalData::getSplit() const { return split; }
+void IntervalData::setSplit(IntervalData* split) { this->split = split; }
 
 // operations
 void IntervalData::addJunction(std::string name, std::pair<size_t,size_t> junction) {
@@ -44,9 +47,8 @@ bool IntervalData::isSubset(int start, int end) {
     return (start >= this->interval.first && end <= this->interval.second);
 }
 
-
 // create new node
-Node::Node(int k) : order{k}, keys{}, children{}, next{nullptr}, parent{nullptr}, splits{}, isLeaf{false} {}
+Node::Node(int k) : order{k}, keys{}, children{}, next{nullptr}, parent{nullptr}, isLeaf{false} {}
 
 // operations
 void Node::addInterval(IntervalData& data) {
diff --git a/src/SplitReadCalling.cpp b/src/SplitReadCalling.cpp
index fec3dc47..7a8855fe 100644
--- a/src/SplitReadCalling.cpp
+++ b/src/SplitReadCalling.cpp
@@ -334,9 +334,9 @@ bool SplitReadCalling::filter(auto& sequence, uint32_t cigarmatch) {
 
 bool SplitReadCalling::matchSpliceSites(dtp::Interval& spliceSites, std::optional<uint32_t> refId) {
     if(params["splicing"].as<std::bitset<1>>() == std::bitset<1>("1")) {
-        std::vector<IntervalData*> ovlps = features.search(this->refIds[refId.value()], spliceSites);
+        std::vector<std::pair<Node*,IntervalData*>> ovlps = features.search(this->refIds[refId.value()], spliceSites);
         for(int i=0;i<ovlps.size();++i) { // iterate over all overlapping intervals
-            dtp::SpliceJunctions junctions = ovlps[i]->getJunctions();
+            dtp::SpliceJunctions junctions = ovlps[i].second->getJunctions();
             for(auto& [name, junc] : junctions) {
                 for(int j=1;j<junc.size()-1;++j) {
                     if(helper::withinRange(spliceSites.first, junc[j-1].second, 5) &&
diff --git a/src/Utility.cpp b/src/Utility.cpp
index 863ab48b..a1a7b494 100644
--- a/src/Utility.cpp
+++ b/src/Utility.cpp
@@ -189,6 +189,18 @@ std::string helper::getTime() {
     return time_string;
 }
 
+
+double stats::median(std::vector<double>& values){
+    std::sort(values.begin(), values.end());
+    size_t size = values.size();
+    if(size % 2 == 0) {
+        return (values[size / 2 - 1] + values[size / 2]) / 2;
+    } else {
+        return values[size / 2];
+    }
+}
+
+
 // ############ SEQIO ############
 bool seqIO::filterReads(auto& qual, int quality, int minlen) {
     // determine the length