forked from chenlianfu/geta
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathconf_for_big_genome.txt
76 lines (59 loc) · 4.41 KB
/
conf_for_big_genome.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
## Set the detail parameters of the modules during GETA pipeline through this config file.
# the parameters set below will rewrite the default threashold.
# the template conf.txt in GETA software was the default values.
# the anotated lines start with # will be ingnored. the blank lines will be ignored.
[RepeatMasker]
-e ncbi -gff
# During the step of RepeatMasker: -species was passed by --RM_species; -pa was passed by --CPU.
[trimmomatic]
TruSeq3-PE-2.fa:2:30:10 LEADING:3 TRAILING:3 SLIDINGWINDOW:4:15 MINLEN:50 TOPHRED33
# note: must start with the adapter file name.
[hisat2-build]
-p 1
# note: the multi threads parameter -p was set to 1 by default, which was not passed by --CPU as usual.
[hisat2]
--min-intronlen 20 --max-intronlen 20000 --dta --score-min L,0.0,-0.4
# During the step of hisat2: -x was set to "genome"; -p was passed by --CPU;
[sam2transfrag]
--fraction 0.05 --min_expressed_base_depth 2 --max_expressed_base_depth 50 --min_junction_depth 2 --max_junction_depth 50 --min_fragment_count_per_transfrags 10 --min_intron_length 20
# During the step of sam2transfrag: --no_strand_specific was passed reversely by --strand_specific;
# If RNA-Seq reads containig single-end data, the --no_PE_sequencing should be added. Otherwise, the alignment of single-end will be ignored.
# The defalut value means: the dynamic coverage threshold of mapping regions (come frome SAM file) was determinated by the maximum base depth of each region * 0.05, as well as this threshold should between 2 and 50.
[TransDecoder.LongOrfs]
-m 100 -G universal
# note: the strand specific parameter -S will be set automatically if transcripts had introns which could determinated the direction.
[TransDecoder.Predict]
--retain_long_orfs_mode dynamic
# note: when predicting the ORF of transfrag with single exon, the parameter --train will be set automatically to the result of transfrag with multi exons.
[homolog_genewise]
--coverage_ratio 0.4 --evalue 1e-9
# During the step of homolog_genewise: --cpu was passed by --CPU; --max_gene_length, --segmentSize and --overlapSize were automatically calculated.
[homolog_genewiseGFF2GFF3]
--min_score 15 --gene_prefix genewise --filterMiddleStopCodon
# During the step of homolog_genewiseGFF2GFF3: --input_genewise_start_info was fixed to "genewise.start_info.txt"; --output_start_and_stop_hints_of_augustus was fixed to "genewise.start_stop_hints.gff".
[geneModels2AugusutsTrainingInput]
--min_evalue 1e-9 --min_identity 0.8 --min_coverage_ratio 0.8 --min_cds_num 2 --min_cds_length 600 --min_cds_exon_ratio 0.60
# During the step of geneModels2AugusutsTrainingInput: --cpu was passed by --CPU; --out_prefix was set to ati.
# if gene models number of ati.filter2.gff3 was few, recommed to decrease --min_cds_exon_ratio and increase --min_coverage_ratio.
[BGM2AT]
--min_gene_number_for_augustus_training 500 --gene_number_for_accuracy_detection 200 --min_gene_number_of_optimize_augustus_chunk 50 --max_gene_number_of_optimize_augustus_chunk 200
# During the step of BGM2AT: --flanking_length was automatically calculated; --stopAfterFirstEtraining was set or not set at different steps; --onlytrain_GFF3 was set to a intermediate file.
[prepareAugusutusHints]
--margin 20
# note: to obtain the exonpart or cdspart hints, the margin 20bp will be removed.
[paraAugusutusWithHints]
--gene_prefix augustus --min_intron_len 30 --alternatives_from_evidence
# During the step of paraAugusutusWithHints: --species was passed by --augustus_species; --cpu was passed by --CPU; --segmentSize and --overlapSize were automatically calculated.
[paraCombineGeneModels]
--overlap 30 --min_augustus_transcriptSupport_percentage 50.0 --min_augustus_intronSupport_number 1 --min_augustus_intronSupport_ratio 0.5
# During the step of paraAugusutusWithHints: --cpu was passed by --CPU.
[PfamValidateABinitio]
--CDS_length 1200 --CDS_num 4 --evalue 1e-9 --coverage 0.4
# During the step of PfamValidateABinitio: --pfam_db was passed by --pfam_db; --cpu was passed by --CPU; --out_prefix was fixed to "combine2".
[remove_genes_in_repeats]
--ratio 0.2
# The gene models overlapping with repeats was removed if the overlap ratio of CDS region >= 0.8 . If this value was set > 1.0, the gene models overlapping with repeats would not be removed.
# During the step of remove_genes_in_repeats: --filtered_gene_models was fixed to "genome.completed.genes_in_repeats.gff3".
[remove_short_genes]
--cds_length 450
# at last, the short gene models was filtered by PfamValidateABinitio.