Skip to content

Commit

Permalink
Add sequence, slice, and read schema
Browse files Browse the repository at this point in the history
  • Loading branch information
heuermh committed Jun 28, 2016
1 parent c1c56d7 commit bf0cb94
Showing 1 changed file with 164 additions and 53 deletions.
217 changes: 164 additions & 53 deletions src/main/resources/avro/bdg.avdl
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,6 @@ protocol BDG {
/**
Record for describing a reference assembly. Not used for storing the contents
of said assembly.
@see NucleotideContigFragment
*/
record Contig {
/**
Expand Down Expand Up @@ -328,55 +326,6 @@ enum Base {
D // not C
}

/**
Stores a contig of nucleotides; this may be a reference chromosome, may be an
assembly, may be a BAC. Very long contigs (>1Mbp) need to be split into fragments.
It seems that they are too long to load in a single go. For best performance,
it seems like 10kbp is a good point at which to start splitting contigs into
fragments.
*/
record NucleotideContigFragment {
/**
The contig identification descriptor for this contig.
*/
union { null, Contig } contig = null;
/**
A description for this contig. When importing from FASTA, the FASTA header
description line should be stored here.
*/
union { null, string } description = null;
/**
The sequence of bases in this fragment.
*/
union { null, string } fragmentSequence = null;
/**
In a fragmented contig, the position of this fragment in the set of fragments.
Can be null if the contig is not fragmented.
*/
union { null, int } fragmentNumber = null;
/**
The position of the first base of this fragment in the overall contig. E.g.,
if all fragments are 10kbp and this is the third fragment in the contig,
the start position would be 20000L.
*/
union { null, long } fragmentStartPosition = null;
/**
The position of the last base of this fragment in the overall contig. E.g.,
if all fragments are 10kbp and this is the third fragment in the contig,
the end position would be 29999L.
*/
union { null, long } fragmentEndPosition = null;
/**
The length of this fragment.
*/
union { null, long } fragmentLength = null;
/**
The total count of fragments that this contig has been broken into. Can be
null if the contig is not fragmented.
*/
union { null, int } numberOfFragmentsInContig = null; // total number of fragments in contig
}

/**
Descriptors for the type of a structural variant. The most specific descriptor
should be used, if possible. E.g., duplication should be used instead of
Expand Down Expand Up @@ -1183,7 +1132,7 @@ record Feature {

/**
Sample.
*/
*/
record Sample {

/**
Expand All @@ -1208,4 +1157,166 @@ record Sample {
*/
map<string> attributes = {};
}
}

/**
Alphabet.
*/
enum Alphabet {

/**
DNA alphabet.
*/
DNA,

/**
RNA alphabet.
*/
RNA,

/**
Protein alphabet.
*/
PROTEIN
}

/**
Sequence.
*/
record Sequence {

/**
Name of this sequence.
*/
union { null, string } name = null;

/**
Description for this sequence.
*/
union { null, string } description = null;

/**
Alphabet for this sequence, defaults to Alphabet.DNA.
*/
union { Alphabet, null } alphabet = "DNA";

/**
Sequence.
*/
union { null, string } sequence = null;

/**
Length of this sequence.
*/
union { null, long } length = null;
}

/**
View on a contiguous region of a sequence.
*/
record Slice { // extends Sequence

/**
Name of the sequence this slice views.
*/
union { null, string } name = null;

/**
Description for the sequence this slice views.
*/
union { null, string } description = null;

/**
Alphabet for the sequence this slice views, defaults to Alphabet.DNA.
*/
union { Alphabet, null } alphabet = "DNA";

/**
Sequence for this slice.
*/
union { null, string } sequence = null;

/**
Start position for this slice on the sequence this slice views, in 0-based coordinate
system with closed-open intervals.
*/
union { null, long } start = null;

/**
End position for this slice on the sequence this slice views, in 0-based coordinate
system with closed-open intervals.
*/
union { null, long } end = null;

/**
Strand for this slice, if any. Defaults to Strand.Independent.
*/
union { Strand, null } strand = "Independent";

/**
Length of this slice.
*/
union { null, long } length = null;
}

/**
FASTQ sequence format variant.
*/
enum FastqVariant {

/**
Sanger and Illumina version &gt;= 1.8 FASTQ sequence format variant.
*/
SANGER,

/**
Solexa and Illumina version 1.0 FASTQ sequence format variant.
*/
SOLEXA,

/**
Illumina version &gt;= 1.3 and &lt; 1.8 FASTQ sequence format variant.
*/
ILLUMINA
}

/**
Sequence with quality scores.
*/
record Read { // extends Sequence

/**
Name of this read.
*/
union { null, string } name = null;

/**
Description for this read.
*/
union { null, string } description = null;

/**
Alphabet for this read, defaults to Alphabet.DNA.
*/
union { Alphabet, null } alphabet = "DNA";

/**
Sequence for this read.
*/
union { null, string } sequence = null;

/**
Length of this read.
*/
union { null, long } length = null;

/**
FASTQ sequence format variant for this read, defaults to FastqVariant.SANGER.
*/
union { FastqVariant, null } fastqVariant = "SANGER";

/**
Quality scores for this read.
*/
union { null, string } qualityScores = null;
}
}

0 comments on commit bf0cb94

Please sign in to comment.