Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix windows #19

Merged
merged 6 commits into from
Oct 7, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ language: python
python:
- "3.6"
install:
- pip install cython
- pip install cython pytest
- python setup.py install

# functionality is tested in the pyranges package
script: cd ~/; python -c 'import ncls; print(ncls.__version__)'
script: py.test tests/test_ncls.py && cd ~/ && python -c 'import ncls; print(ncls.__version__)'
6 changes: 5 additions & 1 deletion CHANGELOG
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
# 0.0.44 (unreleased)
# 0.0.45 (07.10.19)
- try to fix another windows error

# 0.0.44 (10.09.19)
- remove sys.time dep. perhaps it now works on windows?
- add k_find_both
- fix bug when large ints were used

# 0.0.43 (23.07.19)
- add coverage-method which finds bp overlap of intervals
Expand Down
54 changes: 49 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Nested containment list

[![Build Status](https://travis-ci.org/hunt-genes/ncls.svg?branch=master)](https://travis-ci.org/hunt-genes/ncls) [![PyPI version](https://badge.fury.io/py/ncls.svg)](https://badge.fury.io/py/ncls)
[![Build Status](https://travis-ci.org/biocore-ntnu/ncls.svg?branch=master)](https://travis-ci.org/hunt-genes/ncls) [![PyPI version](https://badge.fury.io/py/ncls.svg)](https://badge.fury.io/py/ncls)

The Nested Containment List is a datastructure for interval overlap queries,
like the interval tree. It is usually an order of magnitude faster than the
Expand All @@ -14,7 +14,8 @@ gains.
It was implemented to be the cornerstone of the PyRanges project, but I have made
it available to the Python community as a stand-alone library. Enjoy.

Paper: https://academic.oup.com/bioinformatics/article/23/11/1386/199545
Original Paper: https://academic.oup.com/bioinformatics/article/23/11/1386/199545
Cite: http://dx.doi.org/10.1093/bioinformatics/btz615

## Install

Expand All @@ -25,7 +26,6 @@ pip install ncls
## Usage

```python
# see the examples/ folder for more examples
from ncls import NCLS

import pandas as pd
Expand All @@ -34,6 +34,16 @@ starts = pd.Series(range(0, 5))
ends = starts + 100
ids = starts

subject_df = pd.DataFrame({"Start": starts, "End": ends}, index=ids)

print(subject_df)
# Start End
# 0 0 100
# 1 1 101
# 2 2 102
# 3 3 103
# 4 4 104

ncls = NCLS(starts.values, ends.values, ids.values)

# python API, slower
Expand All @@ -47,11 +57,45 @@ starts_query = pd.Series([1, 3])
ends_query = pd.Series([52, 14])
indexes_query = pd.Series([10000, 100])

query_df = pd.DataFrame({"Start": starts_query.values, "End": ends_query.values}, index=indexes_query.values)

query_df
# Start End
# 10000 1 52
# 100 3 14


# everything done in C/Cython; faster
ncls.all_overlaps_both(starts_query.values, ends_query.values, indexes_query.values)
l_idxs, r_idxs = ncls.all_overlaps_both(starts_query.values, ends_query.values, indexes_query.values)
l_idxs, r_idxs
# (array([10000, 10000, 10000, 10000, 10000, 100, 100, 100, 100,
# 100]), array([0, 1, 2, 3, 4, 0, 1, 2, 3, 4]))

print(query_df.loc[l_idxs])
# Start End
# 10000 1 52
# 10000 1 52
# 10000 1 52
# 10000 1 52
# 10000 1 52
# 100 3 14
# 100 3 14
# 100 3 14
# 100 3 14
# 100 3 14
print(subject_df.loc[r_idxs])
# Start End
# 0 0 100
# 1 1 101
# 2 2 102
# 3 3 103
# 4 4 104
# 0 0 100
# 1 1 101
# 2 2 102
# 3 3 103
# 4 4 104

# return intervals in python (slow/mem-consuming)
intervals = ncls.intervals()
intervals
Expand All @@ -74,7 +118,7 @@ usage is one fifth and one ninth.

## Cite

https://www.biorxiv.org/content/10.1101/609396v1
http://dx.doi.org/10.1093/bioinformatics/btz615

## Original paper

Expand Down
11 changes: 6 additions & 5 deletions ncls/src/cncls.pxd
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from libc.stdint cimport int64_t

# cdef extern from "string.h":
# ctypedef int size_t
Expand Down Expand Up @@ -39,8 +40,8 @@ cdef extern from "stdio.h":

cdef extern from "ncls/src/intervaldb.h":
ctypedef struct IntervalMap:
int start
int end
int64_t start
int64_t end
int target_id
int sublist

Expand All @@ -51,7 +52,7 @@ cdef extern from "ncls/src/intervaldb.h":
int start
int len

int find_overlap_start(int start, int end, IntervalMap im[], int n)
int find_overlap_start(int64_t start, int64_t end, IntervalMap im[], int n)
int imstart_qsort_cmp(void *void_a,void *void_b)
# int target_qsort_cmp(void *void_a,void *void_b)
IntervalMap *read_intervals(int n,FILE *ifile)
Expand All @@ -68,8 +69,8 @@ cdef extern from "ncls/src/intervaldb.h":
int *nfound)

int find_intervals(IntervalIterator *it0,
int start,
int end,
int64_t start,
int64_t end,
IntervalMap im[],
int n,
SublistHeader subheader[],
Expand Down
2 changes: 1 addition & 1 deletion ncls/src/cncls32.pxd
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@

from libc.stdint cimport int32_t
from libc.stdint cimport int32_t, int64_t

cdef extern from "stdlib.h":
void free(void *)
Expand Down
9 changes: 5 additions & 4 deletions ncls/src/intervaldb.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@


#include <stdint.h>
#include "intervaldb.h"

int C_int_max=INT_MAX; /* KLUDGE TO LET PYREX CODE ACCESS VALUE OF INT_MAX MACRO */
Expand Down Expand Up @@ -331,9 +332,9 @@ IntervalMap *interval_map_alloc(int n)



inline int find_overlap_start(int start,int end,IntervalMap im[],int n)
inline int64_t find_overlap_start(int64_t start,int64_t end,IntervalMap im[],int n)
{
int l=0,mid,r;
int64_t l=0,mid,r;

r=n-1;
while (l<r) {
Expand Down Expand Up @@ -426,14 +427,14 @@ void reorient_intervals(int n,IntervalMap im[],int ori_sign)
}


int find_intervals(IntervalIterator *it0, int start, int end,
int64_t find_intervals(IntervalIterator *it0, int64_t start, int64_t end,
IntervalMap im[],int n,
SublistHeader subheader[], int nlists,
IntervalMap buf[], int nbuf,
int *p_nreturn, IntervalIterator **it_return)
{
IntervalIterator *it=NULL,*it2=NULL;
int ibuf=0,j,k,ori_sign=1;
int64_t ibuf=0,j,k,ori_sign=1;
if (!it0) { /* ALLOCATE AN ITERATOR IF NOT SUPPLIED*/
CALLOC(it,1,IntervalIterator);
}
Expand Down
22 changes: 10 additions & 12 deletions ncls/src/intervaldb.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,21 +4,23 @@
#include "default.h"
#include <limits.h>

#include <stdint.h>

extern int C_int_max;

typedef struct {
int start;
int end;
int target_id;
int64_t start;
int64_t end;
int64_t target_id;
/* int target_start; */
/* int target_end; */
int sublist;
int64_t sublist;
} IntervalMap;


typedef struct {
int start;
int end;
int64_t start;
int64_t end;
} IntervalIndex;

typedef struct {
Expand Down Expand Up @@ -75,7 +77,7 @@ typedef struct {

extern int *alloc_array(int n);

extern int find_overlap_start(int start,int end,IntervalMap im[],int n);
extern int64_t find_overlap_start(int64_t start,int64_t end,IntervalMap im[],int n);
extern int imstart_qsort_cmp(const void *void_a,const void *void_b);
extern int target_qsort_cmp(const void *void_a,const void *void_b);
extern IntervalMap *read_intervals(int n,FILE *ifile);
Expand All @@ -88,7 +90,7 @@ extern IntervalDB *build_interval_db(IntervalMap im[],int n);
extern IntervalIterator *interval_iterator_alloc(void);
extern int free_interval_iterator(IntervalIterator *it);
extern IntervalIterator *reset_interval_iterator(IntervalIterator *it);
extern int find_intervals(IntervalIterator *it0,int start,int end,IntervalMap im[],int n,SublistHeader subheader[],int nlists,IntervalMap buf[],int nbuf,int *p_nreturn,IntervalIterator **it_return);
extern int64_t find_intervals(IntervalIterator *it0,int64_t start,int64_t end,IntervalMap im[],int n,SublistHeader subheader[],int nlists,IntervalMap buf[],int nbuf,int *p_nreturn,IntervalIterator **it_return);
extern int read_imdiv(FILE *ifile,IntervalMap imdiv[],int div,int i_div,int ntop);
extern IntervalMap *read_sublist(FILE *ifile,SublistHeader *subheader,IntervalMap *im);
extern int find_file_intervals(IntervalIterator *it0,int start,int end,
Expand All @@ -110,10 +112,6 @@ extern int save_text_file(char filestem[],char err_msg[],
extern int text_file_to_binaries(FILE *infile,char buildpath[],char err_msg[]);
extern void reorient_intervals(int n,IntervalMap im[],int ori_sign);

extern int find_intervals_stack(int start_stack[], int end_stack[], int sp, int start,
int end, IntervalMap im[], int n,
SublistHeader subheader[], IntervalMap buf[],
int *nfound);
/* extern int find_k_next(int start, int end, */
/* IntervalMap im[], int n, */
/* SublistHeader subheader[], int nlists, */
Expand Down
Loading