-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathqg2021.bib
2951 lines (2787 loc) · 252 KB
/
qg2021.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
@article{BrowningBrowning2009,
abstract = {We present methods for imputing data for ungenotyped markers and for inferring haplotype phase in large data sets of unrelated individuals and parent-offspring trios. Our methods make use of known haplotype phase when it is available, and our methods are computationally efficient so that the full information in large reference panels with thousands of individuals is utilized. We demonstrate that substantial gains in imputation accuracy accrue with increasingly large reference panel sizes, particularly when imputing low-frequency variants, and that unphased reference panels can provide highly accurate genotype imputation. We place our methodology in a unified framework that enables the simultaneous use of unphased and phased data from trios and unrelated individuals in a single analysis. For unrelated individuals, our imputation methods produce well-calibrated posterior genotype probabilities and highly accurate allele-frequency estimates. For trios, our haplotype-inference method is four orders of magnitude faster than the gold-standard PHASE program and has excellent accuracy. Our methods enable genotype imputation to be performed with unphased trio or unrelated reference panels, thus accounting for haplotype-phase uncertainty in the reference panel. We present a useful measure of imputation accuracy, allelic R(2), and show that this measure can be estimated accurately from posterior genotype probabilities. Our methods are implemented in version 3.0 of the BEAGLE software package.},
author = {Browning, Brian L. and Browning, Sharon R.},
doi = {10.1016/j.ajhg.2009.01.005},
file = {:C$\backslash$:/Users/STME/Documents/Mendeley Desktop/American Journal of Human Genetics/Browning, Browning - 2009 - A unified approach to genotype imputation and haplotype-phase inference for large data sets of trios and unr.pdf:pdf},
issn = {1537-6605},
journal = {Am. J. Hum. Genet.},
keywords = {Beagle,Computer Simulation,Female,Gene Frequency,Gene Frequency: genetics,Genetic,Genotype,Haplotypes,Haplotypes: genetics,Humans,Male,Markov Chains,Models,Nuclear Family,Reproducibility of Results},
mendeley-tags = {Beagle},
month = feb,
number = {2},
pages = {210--23},
pmid = {19200528},
title = {{A unified approach to genotype imputation and haplotype-phase inference for large data sets of trios and unrelated individuals.}},
url = {http://www.sciencedirect.com/science/article/pii/S0002929709000123},
volume = {84},
year = {2009}
}
@article{BrowningYu2009,
abstract = {We present a novel method for simultaneous genotype calling and haplotype-phase inference. Our method employs the computationally efficient BEAGLE haplotype-frequency model, which can be applied to large-scale studies with millions of markers and thousands of samples. We compare genotype calls made with our method to genotype calls made with the BIRDSEED, CHIAMO, GenCall, and ILLUMINUS genotype-calling methods, using genotype data from the Illumina 550K and Affymetrix 500K arrays. We show that our method has higher genotype-call accuracy and yields fewer uncalled genotypes than competing methods. We perform single-marker analysis of data from the Wellcome Trust Case Control Consortium bipolar disorder and type 2 diabetes studies. For bipolar disorder, the genotype calls in the original study yield 25 markers with apparent false-positive association with bipolar disorder at a p < 10(-7) significance level, whereas genotype calls made with our method yield no associated markers at this significance threshold. Conversely, for markers with replicated association with type 2 diabetes, there is good concordance between genotype calls used in the original study and calls made by our method. Results from single-marker and haplotypic analysis of our method's genotype calls for the bipolar disorder study indicate that our method is highly effective at eliminating genotyping artifacts that cause false-positive associations in genome-wide association studies. Our new genotype-calling methods are implemented in the BEAGLE and BEAGLECALL software packages.},
author = {Browning, Brian L. and Yu, Zhaoxia},
doi = {10.1016/j.ajhg.2009.11.004},
file = {::},
issn = {1537-6605},
journal = {Am. J. Hum. Genet.},
keywords = {Algorithms,Alleles,Beagle,Data Interpretation,False Positive Reactions,Genetic Markers,Genome-Wide Association Study,Genotype,Haplotypes,Humans,Models,Oligonucleotide Array Sequence Analysis,Probability,Reproducibility of Results,Software,Statistical},
mendeley-groups = {Software,DGRP genomic features},
mendeley-tags = {Beagle},
month = dec,
number = {6},
pages = {847--61},
pmid = {19931040},
title = {{Simultaneous genotype calling and haplotype phasing improves genotype accuracy and reduces false-positive associations for genome-wide association studies.}},
url = {http://www.sciencedirect.com/science/article/pii/S0002929709005199},
volume = {85},
year = {2009}
}
@article{DelosCampos2010,
abstract = {Although genome-wide association studies have identified markers that are associated with various human traits and diseases, our ability to predict such phenotypes remains limited. A perhaps overlooked explanation lies in the limitations of the genetic models and statistical techniques commonly used in association studies. We propose that alternative approaches, which are largely borrowed from animal breeding, provide potential for advances. We review selected methods and discuss the challenges and opportunities ahead.},
author = {de los Campos, Gustavo and Gianola, Daniel and Allison, David B.},
doi = {10.1038/nrg2898},
file = {:C$\backslash$:/Users/STME/Documents/Mendeley Desktop/Nature Reviews Genetics/de los Campos, Gianola, Allison - 2010 - Predicting genetic predisposition in humans the promise of whole-genome markers.pdf:pdf;:C$\backslash$:/Users/STME/Documents/Mendeley Desktop/Nature Reviews Genetics/de los Campos, Gianola, Allison - 2010 - Predicting genetic predisposition in humans the promise of whole-genome markers.pdf:pdf},
issn = {1471-0064},
journal = {Nat. Rev. Genet.},
keywords = {Disease,Disease: genetics,Genetic Markers,Genetic Predisposition to Disease,Genome-Wide Association Study,Humans},
month = dec,
number = {12},
pages = {880--6},
pmid = {21045869},
publisher = {Nature Publishing Group},
title = {{Predicting genetic predisposition in humans: the promise of whole-genome markers.}},
url = {http://www.ncbi.nlm.nih.gov/pubmed/21045869},
volume = {11},
year = {2010}
}
@article{Johnson1995,
author = {Johnson, D.L. and Thompson, Robin},
doi = {10.3168/jds.S0022-0302(95)76654-1},
file = {:C$\backslash$:/Users/STME/Documents/Mendeley Desktop/Journal of Dairy Science/Johnson, Thompson - 1995 - Restricted Maximum Likelihood Estimation of Variance Components for Univariate Animal Models Using Sparse Mat.pdf:pdf},
issn = {00220302},
journal = {J. Dairy Sci.},
keywords = {AI-REML,Per's references.},
mendeley-tags = {Per's references.},
month = feb,
number = {2},
pages = {449--456},
publisher = {Elsevier},
title = {{Restricted Maximum Likelihood Estimation of Variance Components for Univariate Animal Models Using Sparse Matrix Techniques and Average Information}},
url = {http://www.journalofdairyscience.org/article/S0022-0302(95)76654-1/abstract},
volume = {78},
year = {1995}
}
@article{langoallen2010,
abstract = {Most common human traits and diseases have a polygenic pattern of inheritance: DNA sequence variants at many genetic loci influence the phenotype. Genome-wide association (GWA) studies have identified more than 600 variants associated with human traits, but these typically explain small fractions of phenotypic variation, raising questions about the use of further studies. Here, using 183,727 individuals, we show that hundreds of genetic variants, in at least 180 loci, influence adult height, a highly heritable and classic polygenic trait. The large number of loci reveals patterns with important implications for genetic studies of common human diseases and traits. First, the 180 loci are not random, but instead are enriched for genes that are connected in biological pathways (P = 0.016) and that underlie skeletal growth defects (P < 0.001). Second, the likely causal gene is often located near the most strongly associated variant: in 13 of 21 loci containing a known skeletal growth gene, that gene was closest to the associated variant. Third, at least 19 loci have multiple independently associated variants, suggesting that allelic heterogeneity is a frequent feature of polygenic traits, that comprehensive explorations of already-discovered loci should discover additional variants and that an appreciable fraction of associated loci may have been identified. Fourth, associated variants are enriched for likely functional effects on genes, being over-represented among variants that alter amino-acid structure of proteins and expression levels of nearby genes. Our data explain approximately 10\% of the phenotypic variation in height, and we estimate that unidentified common variants of similar effect sizes would increase this figure to approximately 16\% of phenotypic variation (approximately 20\% of heritable variation). Although additional approaches are needed to dissect the genetic architecture of polygenic human traits fully, our findings indicate that GWA studies can identify large numbers of loci that implicate biologically relevant genes and pathways.},
author = {{Lango Allen}, Hana and Estrada, Karol and Lettre, Guillaume and Berndt, Sonja I. and Weedon, Michael N. and Rivadeneira, Fernando and Willer, Cristen J. and Jackson, Anne U. and Vedantam, Sailaja and Raychaudhuri, Soumya and Ferreira, Teresa and Wood, Andrew R. and Weyant, Robert J. and Segr\`{e}, Ayellet V. and Speliotes, Elizabeth K. and Wheeler, Eleanor and Soranzo, Nicole and Park, Ju-Hyun and Yang, Jian and Gudbjartsson, Daniel and Heard-Costa, Nancy L. and Randall, Joshua C. and Qi, Lu and {Vernon Smith}, Albert and M\"{a}gi, Reedik and Pastinen, Tomi and Liang, Liming and Heid, Iris M. and Luan, Jian'an and Thorleifsson, Gudmar and et al.},
doi = {10.1038/nature09410},
file = {:C$\backslash$:/Users/STME/Documents/Mendeley Desktop/Nature/Lango Allen et al. - 2010 - Hundreds of variants clustered in genomic loci and biological pathways affect human height.pdf:pdf},
issn = {1476-4687},
journal = {Nature},
keywords = {Body Height,Body Height: genetics,Chromosomes,Genetic Loci,Genetic Loci: genetics,Genetic Predisposition to Disease,Genetic Predisposition to Disease: genetics,Genome,Genome-Wide Association Study,Human,Human: genetics,Humans,Metabolic Networks and Pathways,Metabolic Networks and Pathways: genetics,Multifactorial Inheritance,Multifactorial Inheritance: genetics,Pair 3,Pair 3: genetics,Pathways,Phenotype,Polymorphism,Single Nucleotide,Single Nucleotide: genetics},
mendeley-tags = {Pathways},
month = oct,
number = {7317},
pages = {832--8},
pmid = {20881960},
publisher = {Nature Publishing Group, a division of Macmillan Publishers Limited. All Rights Reserved.},
shorttitle = {Nature},
title = {{Hundreds of variants clustered in genomic loci and biological pathways affect human height}},
url = {http://dx.doi.org/10.1038/nature09410 http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=2955183\&tool=pmcentrez\&rendertype=abstract},
volume = {467},
year = {2010}
}
@article{Mackay2012,
abstract = {A major challenge of biology is understanding the relationship between molecular genetic variation and variation in quantitative traits, including fitness. This relationship determines our ability to predict phenotypes from genotypes and to understand how evolutionary forces shape variation within and between species. Previous efforts to dissect the genotype-phenotype map were based on incomplete genotypic information. Here, we describe the Drosophila melanogaster Genetic Reference Panel (DGRP), a community resource for analysis of population genomics and quantitative traits. The DGRP consists of fully sequenced inbred lines derived from a natural population. Population genomic analyses reveal reduced polymorphism in centromeric autosomal regions and the X chromosome, evidence for positive and negative selection, and rapid evolution of the X chromosome. Many variants in novel genes, most at low frequency, are associated with quantitative traits and explain a large fraction of the phenotypic variance. The DGRP facilitates genotype-phenotype mapping using the power of Drosophila genetics.},
author = {Mackay, Trudy F. C. and Richards, Stephen and Stone, Eric A. and Barbadilla, Antonio and Ayroles, Julien F. and Zhu, Dianhui and Casillas, S\`{o}nia and Han, Yi and Magwire, Michael M. and Cridland, Julie M. and Richardson, Mark F. and Anholt, Robert R. H. and Barr\'{o}n, Maite and Bess, Crystal and Blankenburg, Kerstin Petra and Carbone, Mary Anna and Castellano, David and Chaboub, Lesley and Duncan, Laura and Harris, Zeke and Javaid, Mehwish and Jayaseelan, Joy Christina and Jhangiani, Shalini N. and Jordan, Katherine W. and Lara, Fremiet and Lawrence, Faye and Lee, Sandra L. and Librado, Pablo and Linheiro, Raquel S. and Lyman, Richard F. and Mackey, Aaron J and Munidasa, Mala and Muzny, Donna Marie and Nazareth, Lynne and Newsham, Irene and Perales, Lora and Pu, Ling-Ling and Qu, Carson and R\`{a}mia, Miquel and Reid, Jeffrey G. and Rollmann, Stephanie M. and Rozas, Julio and Saada, Nehad and Turlapati, Lavanya and Worley, Kim C. and Wu, Yuan-Qing and Yamamoto, Akihiko and Zhu, Yiming and Bergman, Casey M. and Thornton, Kevin R. and Mittelman, David and Gibbs, Richard A.},
doi = {10.1038/nature10811},
file = {:C$\backslash$:/Users/STME/Documents/Mendeley Desktop/Nature/Mackay et al. - 2012 - The Drosophila melanogaster Genetic Reference Panel.pdf:pdf},
issn = {1476-4687},
journal = {Nature},
keywords = {Alleles,Animals,Centromere,Centromere: genetics,Chromosomes,DGRP,Drosophila melanogaster,Drosophila melanogaster: genetics,Genetic,Genetic: genetics,Genome-Wide Association Study,Genomics,Genotype,Insect,Insect: genetics,Phenotype,Polymorphism,Quantitative Trait Loci,Quantitative Trait Loci: genetics,Selection,Single Nucleotide,Single Nucleotide: genetics,Starvation,Starvation: genetics,Telomere,Telomere: genetics,X Chromosome,X Chromosome: genetics},
mendeley-tags = {DGRP},
month = feb,
number = {7384},
pages = {173--8},
pmid = {22318601},
publisher = {Nature Publishing Group, a division of Macmillan Publishers Limited. All Rights Reserved.},
shorttitle = {Nature},
title = {{The \textit{Drosophila melanogaster} Genetic Reference Panel}},
url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=3683990\&tool=pmcentrez\&rendertype=abstract http://dx.doi.org/10.1038/nature10811},
volume = {482},
year = {2012}
}
@inproceedings{Madsen1994,
address = {Guelph, Canada},
author = {Madsen, Per and Jensen, Just and Thompson, Robin},
booktitle = {5th WCGALP},
file = {:C$\backslash$:/Users/STME/Documents/Mendeley Desktop/5th WCGALP/Madsen, Jensen, Thompson - 1994 - Estimation of (co)variance components by REML in multivariate mixed linear models using average of obs.pdf:pdf},
keywords = {Per's references,REML},
mendeley-tags = {Per's references},
pages = {455--462},
title = {{Estimation of (co)variance components by REML in multivariate mixed linear models using average of observed and expected information}},
year = {1994}
}
@article{Makowsky2011,
abstract = {Despite rapid advances in genomic technology, our ability to account for phenotypic variation using genetic information remains limited for many traits. This has unfortunately resulted in limited application of genetic data towards preventive and personalized medicine, one of the primary impetuses of genome-wide association studies. Recently, a large proportion of the "missing heritability" for human height was statistically explained by modeling thousands of single nucleotide polymorphisms concurrently. However, it is currently unclear how gains in explained genetic variance will translate to the prediction of yet-to-be observed phenotypes. Using data from the Framingham Heart Study, we explore the genomic prediction of human height in training and validation samples while varying the statistical approach used, the number of SNPs included in the model, the validation scheme, and the number of subjects used to train the model. In our training datasets, we are able to explain a large proportion of the variation in height (h(2) up to 0.83, R(2) up to 0.96). However, the proportion of variance accounted for in validation samples is much smaller (ranging from 0.15 to 0.36 depending on the degree of familial information used in the training dataset). While such R(2) values vastly exceed what has been previously reported using a reduced number of pre-selected markers (<0.10), given the heritability of the trait (∼ 0.80), substantial room for improvement remains.},
author = {Makowsky, Robert and Pajewski, Nicholas M. and Klimentidis, Yann C. and Vazquez, Ana I. and Duarte, Christine W. and Allison, David B. and de los Campos, Gustavo},
doi = {10.1371/journal.pgen.1002051},
editor = {Gibson, Greg},
file = {:C$\backslash$:/Users/STME/Documents/Mendeley Desktop/PLoS Genetics/Makowsky et al. - 2011 - Beyond missing heritability prediction of complex traits.pdf:pdf;:C$\backslash$:/Users/STME/Documents/Mendeley Desktop/PLoS Genetics/Makowsky et al. - 2011 - Beyond missing heritability prediction of complex traits(2).pdf:pdf},
issn = {1553-7404},
journal = {PLoS Genet.},
keywords = {Bayes Theorem,Body Height,Body Height: genetics,Genome,Genome-Wide Association Study,Genotype,Heritable,Human,Humans,Peters intro refs,Phenotype,Polymorphism,Quantitative Trait,Single Nucleotide},
mendeley-tags = {Peters intro refs},
month = apr,
number = {4},
pages = {e1002051},
pmid = {21552331},
publisher = {Public Library of Science},
title = {{Beyond missing heritability: prediction of complex traits.}},
url = {http://dx.plos.org/10.1371/journal.pgen.1002051 http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=3084207\&tool=pmcentrez\&rendertype=abstract},
volume = {7},
year = {2011}
}
@article{Manolio2009,
abstract = {Genome-wide association studies have identified hundreds of genetic variants associated with complex human diseases and traits, and have provided valuable insights into their genetic architecture. Most variants identified so far confer relatively small increments in risk, and explain only a small proportion of familial clustering, leading many to question how the remaining, 'missing' heritability can be explained. Here we examine potential sources of missing heritability and propose research strategies, including and extending beyond current genome-wide association approaches, to illuminate the genetics of complex diseases and enhance its potential to enable effective disease prevention or treatment.},
author = {Manolio, Teri A. and Collins, Francis S. and Cox, Nancy J. and Goldstein, David B. and Hindorff, Lucia A. and Hunter, David J. and McCarthy, Mark I. and Ramos, Erin M. and Cardon, Lon R. and Chakravarti, Aravinda and Cho, Judy H. and Guttmacher, Alan E. and Kong, Augustine and Kruglyak, Leonid and Mardis, Elaine and Rotimi, Charles N. and Slatkin, Montgomery and Valle, David and Whittemore, Alice S. and Boehnke, Michael and Clark, Andrew G. and Eichler, Evan E. and Gibson, Greg and Haines, Jonathan L. and Mackay, Trudy F. C. and McCarroll, Steven A. and Visscher, Peter M.},
doi = {10.1038/nature08494},
file = {:C$\backslash$:/Users/STME/Documents/Mendeley Desktop/Nature/Manolio et al. - 2009 - Finding the missing heritability of complex diseases.pdf:pdf},
issn = {1476-4687},
journal = {Nature},
keywords = {Genetic Diseases,Genetic Predisposition to Disease,Genetic Predisposition to Disease: genetics,Genetics,Genome-Wide Association Study,Genome-Wide Association Study: methods,Genome-Wide Association Study: trends,Humans,Inborn,Inborn: genetics,Inheritance Patterns,Inheritance Patterns: genetics,Medical,Medical: methods,Medical: trends,Pedigree,Peters intro refs},
mendeley-tags = {Peters intro refs},
month = oct,
number = {7265},
pages = {747--53},
pmid = {19812666},
publisher = {Macmillan Publishers Limited. All rights reserved},
shorttitle = {Nature},
title = {{Finding the missing heritability of complex diseases.}},
url = {http://dx.doi.org/10.1038/nature08494},
volume = {461},
year = {2009}
}
@article{Penagaricano2012,
abstract = {Whole-genome association studies typically focus on genetic markers with the strongest evidence of association. However, single markers often explain only a small component of the genetic variance and hence offer a limited understanding of the trait under study. As such, the objective of this study was to perform a pathway-based association analysis in Holstein dairy cattle in order to identify relevant pathways involved in bull fertility. The results of a single-marker association analysis, using 1,755 bulls with sire conception rate data and genotypes for 38,650 single nucleotide polymorphisms (SNPs), were used in this study. A total of 16,819 annotated genes, including 2,767 significantly associated with bull fertility, were used to interrogate a total of 662 Gene Ontology (GO) terms and 248 InterPro (IP) entries using a test of proportions based on the cumulative hypergeometric distribution. After multiple-testing correction, 20 GO categories and one IP entry showed significant overrepresentation of genes statistically associated with bull fertility. Several of these functional categories such as small GTPases mediated signal transduction, neurogenesis, calcium ion binding, and cytoskeleton are known to be involved in biological processes closely related to male fertility. These results could provide insight into the genetic architecture of this complex trait in dairy cattle. In addition, this study shows that quantitative trait pathways inferred from single-marker analyses could enhance our interpretations of the results of genome-wide association studies.},
author = {Pe\~{n}agaricano, Francisco and Weigel, Kent A and Rosa, Guilherme J M and Khatib, Hasan},
doi = {10.3389/fgene.2012.00307},
file = {:C$\backslash$:/Users/STME/Documents/Mendeley Desktop/Frontiers in Genetics/Pe\~{n}agaricano et al. - 2012 - Inferring quantitative trait pathways associated with bull fertility from a genome-wide association study.pdf:pdf},
issn = {1664-8021},
journal = {Front. Genet.},
keywords = {Peters ref},
mendeley-tags = {Peters ref},
month = jan,
pages = {307},
pmid = {23335935},
title = {{Inferring quantitative trait pathways associated with bull fertility from a genome-wide association study.}},
url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=3542705\&tool=pmcentrez\&rendertype=abstract},
volume = {3},
year = {2012}
}
@article{Su2012,
abstract = {This study investigated genomic prediction using medium-density ($\sim$54,000; 54K) and high-density marker panels ($\sim$777,000; 777K), based on data from Nordic Holstein and Red Dairy Cattle (RDC). The Holstein data comprised 4,539 progeny-tested bulls, and the RDC data 4,403 progeny-tested bulls. The data were divided into reference data and test data using October 1, 2001, as a cut-off date (birth date of the bulls). This resulted in about 25\% genotyped bulls in the Holstein test data and 20\% in the RDC test data. For each breed, 3 data sets of markers were used to predict breeding values: (1) 54K data set with missing genotypes, (2) 54K data set where missing genotypes were imputed, and (3) imputed high-density (HD) marker data set created by imputing the 54K data to the HD data based on 557 bulls genotyped using a 777K single nucleotide polymorphism chip in Holstein, and 706 bulls in RDC. Based on the 3 marker data sets, direct genomic breeding values (DGV) for protein, fertility, and udder health were predicted using a genomic BLUP model (GBLUP) and a Bayesian mixture model with 2 normal distributions. Reliability of DGV was measured as squared correlations between deregressed proofs (DRP) and DGV corrected for reliability of DRP. Unbiasedness was assessed by regression of DRP on DGV, based on the bulls in the test data sets. Averaged over the 3 traits, reliability of DGV based on the HD markers was 0.5\% higher than that based on the 54K data in Holstein, and 1.0\% higher than that in RDC. In addition, the HD markers led to an improvement of unbiasedness of DGV. The Bayesian mixture model led to 0.5\% higher reliability than the GBLUP model in Holstein, but not in RDC. Imputing missing genotypes in the 54K marker data did not improve genomic predictions for most of the traits.},
annote = {Replace tildes with \$\backslash sim 54,000\$.},
author = {Su, G. and Br{\o}ndum, Rasmus F and Ma, Peipei and Guldbrandtsen, Bernt and Aamand, G.P. and Lund, Mogens Sand{\o}},
doi = {10.3168/jds.2012-5379},
file = {:C$\backslash$:/Users/STME/Documents/Mendeley Desktop/Journal of Dairy Science/Su et al. - 2012 - Comparison of genomic predictions using medium-density (∼54,000) and high-density (∼777,000) single nucleotide polymo.pdf:pdf},
issn = {1525-3198},
journal = {J. Dairy Sci.},
keywords = {777k,Animals,Bayes Theorem,Cattle,Cattle: genetics,Female,Genetic,Genetic Markers,Genome,Genotype,Heritable,Holstein,Imputation,Male,Models,Polymorphism,Quantitative Trait,Reproducibility of Results,SNP50,Selection,Single Nucleotide},
mendeley-tags = {777k,Holstein,Imputation,SNP50},
month = aug,
number = {8},
pages = {4657--65},
pmid = {22818480},
publisher = {Elsevier},
title = {{Comparison of genomic predictions using medium-density ($\sim 54,000$) and high-density ($\sim 777,000$) single nucleotide polymorphism marker panels in Nordic Holstein and Red Dairy Cattle populations.}},
url = {http://www.journalofdairyscience.org/article/S0022-0302(12)00455-9/abstract},
volume = {95},
year = {2012}
}
@article{Zimin2009,
abstract = {The genome of the domestic cow, Bos taurus, was sequenced using a mixture of hierarchical and whole-genome shotgun sequencing methods.},
author = {Zimin, Aleksey V. and Delcher, Arthur L. and Florea, Liliana and Kelley, David R. and Schatz, Michael C. and Puiu, Daniela and Hanrahan, Finnian and Pertea, Geo and {Van Tassell}, Curtis P. and Sonstegard, Tad S. and Mar\c{c}ais, Guillaume and Roberts, Michael and Subramanian, Poorani and Yorke, James A. and Salzberg, Steven L},
doi = {10.1186/gb-2009-10-4-r42},
file = {:C$\backslash$:/Users/STME/Documents/Mendeley Desktop/Genome Biology/Zimin et al. - 2009 - A whole-genome assembly of the domestic cow, Bos taurus.pdf:pdf},
issn = {1465-6914},
journal = {Genome Biol.},
keywords = {Animals,Cattle,Cattle: genetics,Chromosome Mapping,DNA,DNA: methods,DNA: statistics \& numerical data,Female,Genome,Genome: genetics,Genomics,Human,Human: genetics,Humans,Male,Sequence Analysis,Synteny,Y Chromosome,Y Chromosome: genetics},
month = jan,
number = {4},
pages = {R42},
pmid = {19393038},
title = {{A whole-genome assembly of the domestic cow, \textit{Bos taurus}.}},
url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=2688933\&tool=pmcentrez\&rendertype=abstract},
volume = {10},
year = {2009}
}
@article{Zuk2012,
abstract = {Human genetics has been haunted by the mystery of "missing heritability" of common traits. Although studies have discovered >1,200 variants associated with common diseases and traits, these variants typically appear to explain only a minority of the heritability. The proportion of heritability explained by a set of variants is the ratio of (i) the heritability due to these variants (numerator), estimated directly from their observed effects, to (ii) the total heritability (denominator), inferred indirectly from population data. The prevailing view has been that the explanation for missing heritability lies in the numerator-that is, in as-yet undiscovered variants. While many variants surely remain to be found, we show here that a substantial portion of missing heritability could arise from overestimation of the denominator, creating "phantom heritability." Specifically, (i) estimates of total heritability implicitly assume the trait involves no genetic interactions (epistasis) among loci; (ii) this assumption is not justified, because models with interactions are also consistent with observable data; and (iii) under such models, the total heritability may be much smaller and thus the proportion of heritability explained much larger. For example, 80\% of the currently missing heritability for Crohn's disease could be due to genetic interactions, if the disease involves interaction among three pathways. In short, missing heritability need not directly correspond to missing variants, because current estimates of total heritability may be significantly inflated by genetic interactions. Finally, we describe a method for estimating heritability from isolated populations that is not inflated by genetic interactions.},
author = {Zuk, Or and Hechter, Eliana and Sunyaev, Shamil R and Lander, Eric S},
doi = {10.1073/pnas.1119675109},
file = {:C$\backslash$:/Users/STME/Documents/Mendeley Desktop/Proceedings of the National Academy of Sciences of the United States of America/Zuk et al. - 2012 - The mystery of missing heritability Genetic interactions create phantom heritability.pdf:pdf},
issn = {1091-6490},
journal = {Proc. Natl. Acad. Sci. U. S. A.},
keywords = {Peters intro refs},
mendeley-tags = {Peters intro refs},
month = jan,
number = {4},
pages = {1193--8},
pmid = {22223662},
title = {{The mystery of missing heritability: Genetic interactions create phantom heritability.}},
url = {http://www.pnas.org/cgi/content/abstract/109/4/1193},
volume = {109},
year = {2012}
}
@article{Hu2013,
abstract = {The Animal QTL database (QTLdb; http://www.animalgenome.org/QTLdb) is designed to house all publicly available QTL and single-nucleotide polymorphism/gene association data on livestock animal species. An earlier version was published in the Nucleic Acids Research Database issue in 2007. Since then, we have continued our efforts to develop new and improved database tools to allow more data types, parameters and functions. Our efforts have transformed the Animal QTLdb into a tool that actively serves the research community as a quality data repository and more importantly, a provider of easily accessible tools and functions to disseminate QTL and gene association information. The QTLdb has been heavily used by the livestock genomics community since its first public release in 2004. To date, there are 5920 cattle, 3442 chicken, 7451 pigs, 753 sheep and 88 rainbow trout data points in the database, and at least 290 publications that cite use of the database. The rapid advancement in genomic studies of cattle, chicken, pigs, sheep and other livestock animals has presented us with challenges, as well as opportunities for the QTLdb to meet the evolving needs of the research community. Here, we report our progress over the recent years and highlight new functions and services available to the general public.},
author = {Hu, Zhi-Liang and Park, Carissa A. and Wu, Xiao-Lin and Reecy, James M.},
doi = {10.1093/nar/gks1150},
file = {::},
issn = {1362-4962},
journal = {Nucleic Acids Res.},
keywords = {Animals,Cattle,Chromosome Banding,Chromosome Mapping,Databases,Genome-Wide Association Study,Genomics,Internet,Livestock,Livestock: genetics,Nucleic Acid,Peters references,Quantitative Trait Loci,Software},
mendeley-tags = {Peters references},
month = jan,
number = {Database issue},
pages = {D871--9},
pmid = {23180796},
title = {{Animal QTLdb: an improved database tool for livestock animal QTL/association data dissemination in the post-genome era.}},
url = {http://nar.oxfordjournals.org/content/41/D1/D871.abstract},
volume = {41},
year = {2013}
}
@article{Lage2012,
abstract = {Congenital heart disease (CHD) occurs in ∼1\% of newborns. CHD arises from many distinct etiologies, ranging from genetic or genomic variation to exposure to teratogens, which elicit diverse cell and molecular responses during cardiac development. To systematically explore the relationships between CHD risk factors and responses, we compiled and integrated comprehensive datasets from studies of CHD in humans and model organisms. We examined two alternative models of potential functional relationships between genes in these datasets: direct convergence, in which CHD risk factors significantly and directly impact the same genes and molecules and functional convergence, in which risk factors significantly impact different molecules that participate in a discrete heart development network. We observed no evidence for direct convergence. In contrast, we show that CHD risk factors functionally converge in protein networks driving the development of specific anatomical structures (e.g., outflow tract, ventricular septum, and atrial septum) that are malformed by CHD. This integrative analysis of CHD risk factors and responses suggests a complex pattern of functional interactions between genomic variation and environmental exposures that modulate critical biological systems during heart development.},
author = {Lage, Kasper and Greenway, Steven C. and Rosenfeld, Jill A. and Wakimoto, Hiroko and Gorham, Joshua M. and Segr\`{e}, Ayellet V. and Roberts, Amy E. and Smoot, Leslie B. and Pu, William T. and Pereira, Alexandre C. and Mesquita, Sonia M. and Tommerup, Niels and Brunak, S\o ren and Ballif, Blake C. and Shaffer, Lisa G. and Donahoe, Patricia K. and Daly, Mark J. and Seidman, Jonathan G. and Seidman, Christine E. and Larsen, Lars A.},
doi = {10.1073/pnas.1210730109},
issn = {1091-6490},
journal = {Proc. Natl. Acad. Sci. U. S. A.},
keywords = {Congenital,Congenital: epidemiology,Congenital: genetics,Databases,Environment,Genetic,Genetic Predisposition to Disease,Genetic Predisposition to Disease: epidemiology,Hand Deformities,Heart,Heart: embryology,Humans,Infant,Newborn,Nonparametric,Peters references,Protein Interaction Maps,Protein Interaction Maps: genetics,Risk Factors,Statistics,Transcriptome},
mendeley-tags = {Peters references},
month = aug,
number = {35},
pages = {14035--40},
pmid = {22904188},
title = {{Genetic and environmental risk factors in congenital heart disease functionally converge in protein networks driving heart development.}},
url = {http://www.pnas.org/content/109/35/14035},
volume = {109},
year = {2012}
}
@article{Maurano2012,
abstract = {Genome-wide association studies have identified many noncoding variants associated with common diseases and traits. We show that these variants are concentrated in regulatory DNA marked by deoxyribonuclease I (DNase I) hypersensitive sites (DHSs). Eighty-eight percent of such DHSs are active during fetal development and are enriched in variants associated with gestational exposure–related phenotypes. We identified distant gene targets for hundreds of variant-containing DHSs that may explain phenotype associations. Disease-associated variants systematically perturb transcription factor recognition sequences, frequently alter allelic chromatin states, and form regulatory networks. We also demonstrated tissue-selective enrichment of more weakly disease-associated variants within DHSs and the de novo identification of pathogenic cell types for Crohn’s disease, multiple sclerosis, and an electrocardiogram trait, without prior knowledge of physiological mechanisms. Our results suggest pervasive involvement of regulatory DNA variation in common human disease and provide pathogenic insights into diverse disorders.},
annote = {10.1126/science.1222794 },
author = {Maurano, Matthew T. and Humbert, Richard and Rynes, Eric and Thurman, Robert E. and Haugen, Eric and Wang, Hao and Reynolds, Alex P. and Sandstrom, Richard and Qu, Hongzhu and Brody, Jennifer and Shafer, Anthony and Neri, Fidencio and Lee, Kristen and Kutyavin, Tanya and Stehling-Sun, Sandra and Johnson, Audra K. and Canfield, Theresa K. and Giste, Erika and Diegel, Morgan and Bates, Daniel and Hansen, R. Scott and Neph, Shane and Sabo, Peter J. and Heimfeld, Shelly and Raubitschek, Antony and Ziegler, Steven and Cotsapas, Chris and Sotoodehnia, Nona and Glass, Ian and Sunyaev, Shamil R. and Kaul, Rajinder and Stamatoyannopoulos, John A.},
doi = {10.1126/science.1222794},
journal = {Science},
keywords = {Peters references},
mendeley-tags = {Peters references},
month = sep,
number = {6099},
pages = {1190--1195},
title = {{Systematic Localization of Common Disease-Associated Variation in Regulatory DNA}},
url = {http://www.sciencemag.org/content/337/6099/1190.abstract},
volume = {337},
year = {2012}
}
@article{ORoak2012,
annote = {10.1038/nature10989},
author = {O'Roak, Brian J. and Vives, Laura and Girirajan, Santhosh and Karakoc, Emre and Krumm, Niklas and Coe, Bradley P. and Levy, Roie and Ko, Arthur and Lee, Choli and Smith, Joshua D. and Turner, Emily H. and Stanaway, Ian B. and Vernot, Benjamin and Malig, Maika and Baker, Carl and Reilly, Beau and Akey, Joshua M. and Borenstein, Elhanan and Rieder, Mark J. and Nickerson, Deborah A. and Bernier, Raphael and Shendure, Jay and Eichler, Evan E.},
issn = {0028-0836},
journal = {Nature},
keywords = {Peters references},
mendeley-tags = {Peters references},
month = may,
number = {7397},
pages = {246--250},
publisher = {Nature Publishing Group, a division of Macmillan Publishers Limited. All Rights Reserved.},
title = {{Sporadic autism exomes reveal a highly interconnected protein network of de novo mutations}},
url = {http://dx.doi.org/10.1038/nature10989 http://www.nature.com/nature/journal/v485/n7397/abs/nature10989.html\#supplementary-information},
volume = {485},
year = {2012}
}
@article{Yang2011,
abstract = {We estimate and partition genetic variation for height, body mass index (BMI), von Willebrand factor and QT interval (QTi) using 586,898 SNPs genotyped on 11,586 unrelated individuals. We estimate that \~{}45\%, \~{}17\%, \~{}25\% and \~{}21\% of the variance in height, BMI, von Willebrand factor and QTi, respectively, can be explained by all autosomal SNPs and a further \~{}0.5–1\% can be explained by X chromosome SNPs. We show that the variance explained by each chromosome is proportional to its length, and that SNPs in or near genes explain more variation than SNPs between genes. We propose a new approach to estimate variation due to cryptic relatedness and population stratification. Our results provide further evidence that a substantial proportion of heritability is captured by common SNPs, that height, BMI and QTi are highly polygenic traits, and that the additive variation explained by a part of the genome is approximately proportional to the total length of DNA contained within genes therein.},
author = {Yang, Jian and Manolio, Teri A. and Pasquale, Louis R. and Boerwinkle, Eric and Caporaso, Neil and Cunningham, Julie M. and de Andrade, Mariza and Feenstra, Bjarke and Feingold, Eleanor and Hayes, M Geoffrey and Hill, William G. and Landi, Maria Teresa and Alonso, Alvaro and Lettre, Guillaume and Lin, Peng and Ling, Hua and Lowe, William and Mathias, Rasika A. and Melbye, Mads and Pugh, Elizabeth and Cornelis, Marilyn C. and Weir, Bruce S. and Goddard, Michael E. and Visscher, Peter M.},
doi = {10.1038/ng.823},
file = {:C$\backslash$:/Users/STME/Documents/Mendeley Desktop/Nature Genetics/Yang et al. - 2011 - Genome partitioning of genetic variation for complex traits using common SNPs.pdf:pdf},
issn = {1061-4036},
journal = {Nat. Genet.},
keywords = {Peters intro refs},
mendeley-groups = {Manuscript1,DGRP genomic features},
mendeley-tags = {Peters intro refs},
month = may,
number = {6},
pages = {519--525},
title = {{Genome partitioning of genetic variation for complex traits using common SNPs}},
url = {http://www.nature.com/doifinder/10.1038/ng.823},
volume = {43},
year = {2011}
}
@manual{RCoreTeam,
address = {Vienna, Austria},
annote = {\{ISBN\} 3-900051-07-0},
author = {{R Core Team}},
isbn = {3-900051-07-0},
mendeley-groups = {Manuscript1,DGRP genomic features},
organization = {R Foundation for Statistical Computing},
publisher = {R Foundation for Statistical Computing},
title = {{R: A Language and Environment for Statistical Computing}},
url = {http://www.r-project.org/},
year = {2012}
}
@article{Wood2011,
author = {Wood, Simon N.},
doi = {10.1111/j.1467-9868.2010.00749.x},
issn = {13697412},
journal = {J. R. Stat. Soc. B.},
keywords = {GAM,R-package,mgcv},
mendeley-groups = {DGRP genomic features},
mendeley-tags = {GAM,mgcv,R-package},
month = jan,
number = {1},
pages = {3--36},
title = {{Fast stable restricted maximum likelihood and marginal likelihood estimation of semiparametric generalized linear models}},
url = {http://doi.wiley.com/10.1111/j.1467-9868.2010.00749.x},
volume = {73},
year = {2011}
}
@article{Wood2004,
abstract = {Representation of generalized additive models (GAM's) using penalized regression splines allows GAM's to be employed in a straightforward manner using penalized regression methods. Not only is inference facilitated by this approach, but it is also possible to integrate model selection in the form of smoothing parameter selection into model fitting in a computationally efficient manner using well founded criteria such as generalized cross-validation. The current fitting and smoothing parameter selection methods for such models are usually effective, but do not provide the level of numerical stability to which users of linear regression packages, for example, are accustomed. In particular the existing methods cannot deal adequately with numerical rank deficiency of the GAM fitting problem, and it is not straightforward to produce methods that can do so, given that the degree of rank deficiency can be smoothing parameter dependent. In addition, models with the potential flexibility of GAM's can also present practical fitting difficulties as a result of indeterminacy in the model likelihood: Data with many zeros fitted by a model with a log link are a good example. In this article it is proposed that GAM's with a ridge penalty provide a practical solution in such circumstances, and a multiple smoothing parameter selection method suitable for use in the presence of such a penalty is developed. The method is based on the pivoted QR decomposition and the singular value decomposition, so that with or without a ridge penalty it has good error propagation properties and is capable of detecting and coping elegantly with numerical rank deficiency. The method also allows mixtures of user specified and estimated smoothing parameters and the setting of lower bounds on smoothing parameters. In terms of computational efficiency, the method compares well with existing methods. A simulation study compares the method to existing methods, including treating GAM's as mixed models},
author = {Wood, Simon N.},
doi = {http://dx.doi.org/10.1198/016214504000000980},
journal = {J. Am. Stat. Assoc.},
keywords = {GAM,R-package,mgcv},
mendeley-groups = {DGRP genomic features},
mendeley-tags = {GAM,mgcv,R-package},
month = sep,
number = {467},
pages = {673--686},
publisher = {University of Bath},
title = {{Stable and efficient multiple smoothing parameter estimation for generalized additive models}},
url = {http://opus.bath.ac.uk/7196/1/magic.pdf},
volume = {99},
year = {2004}
}
@Manual{org.Dm.eg.db,
title = {org.Dm.eg.db: Genome wide annotation for Fly},
author = {Marc Carlson},
note = {R package version 2.10.1},
}
@article{Aguilar2011,
abstract = "Genomic evaluations can be calculated using a unified procedure that combines phenotypic, pedigree and genomic information. Implementation of such a procedure requires the inverse of the relationship matrix based on pedigree and genomic relationships. The objective of this study was to investigate efficient computing options to create relationship matrices based on genomic markers and pedigree information as well as their inverses. SNP maker information was simulated for a panel of 40 K SNPs, with the number of genotyped animals up to 30 000. Matrix multiplication in the computation of the genomic relationship was by a simple 'do' loop, by two optimized versions of the loop, and by a specific matrix multiplication subroutine. Inversion was by a generalized inverse algorithm and by a LAPACK subroutine. With the most efficient choices and parallel processing, creation of matrices for 30 000 animals would take a few hours. Matrices required to implement a unified approach can be computed
efficiently. Optimizations can be either by modifications of existing code or by the use of efficient automatic optimizations provided by open source or third-party libraries.",
author = "Aguilar, I. and Misztal, I. and Legarra, A. and Tsuruta, S.",
doi = "10.1111/j.1439-0388.2010.00912.x",
file = ":home/stefan/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Aguilar et al. - 2011 - Efficient computation of the genomic relationship matrix and other matrices used in single-step evaluation.pdf:pdf",
issn = "1439-0388",
journal = "Journal of animal breeding and genetics = Zeitschrift f{\"u}r Tierz{\"u}chtung und Z{\"u}chtungsbiologie",
keywords = "computing methods; genomic selection; relationship matrix",
month = dec,
number = "6",
pages = "422--428",
pmid = "22059575",
title = "{Efficient computation of the genomic relationship matrix and other matrices used in single-step evaluation.}",
url = "http://doi.wiley.com/10.1111/j.1439-0388.2010.00912.x; http://www.ncbi.nlm.nih.gov/pubmed/22059575",
volume = "128",
year = "2011"
}
@article{Allison1999,
abstract = "Detection of linkage to genes for quantitative traits remains a challenging task. Recently, variance components (VC) techniques have emerged as among the more powerful of available methods. As often implemented, such techniques require assumptions about the phenotypic distribution. Usually, multivariate normality is assumed. However, several factors may lead to markedly nonnormal phenotypic data, including (a) the presence of a major gene (not necessarily linked to the markers under study), (b) some types of gene x environment interaction, (c) use of a dichotomous phenotype (i.e., affected vs. unaffected), (d) nonnormality of the population within-genotype (residual) distribution, and (e) selective (extreme) sampling. Using simulation, we have investigated, for sib-pair studies, the robustness of the likelihood-ratio test for a VC quantitative-trait locus-detection procedure to violations of normality that are due to these factors. Results showed (a) that some types of nonnormality, such as
leptokurtosis, produced type I error rates in excess of the nominal, or alpha, levels whereas others did not; and (b) that the degree of type I error-rate inflation appears to be directly related to the residual sibling correlation. Potential solutions to this problem are discussed. Investigators contemplating use of this VC procedure are encouraged to provide evidence that their trait data are normally distributed, to employ a procedure that allows for nonnormal data, or to consider implementation of permutation tests.",
author = "Allison, D B and Neale, M C and Zannolli, R and Schork, N J and Amos, C I and Blangero, J",
file = ":home/stefan/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Allison et al. - 1999 - Testing the robustness of the likelihood-ratio test in a variance-component quantitative-trait loci-mapping procedure.pdf:pdf",
issn = "0002-9297",
journal = "Am. J. Hum. Genet.",
keywords = "Analysis of Variance; Chromosome Mapping; Computer Simulation; Genetic Linkage; Humans; Likelihood Functions; Likelihood tests; Matched-Pair Analysis; Nuclear Family; Phenotype; Quantitative Trait; Heritable; Reproducibility of Results; Sample Size; Software; Statistical Distributions",
mendeley-tags = "Likelihood tests",
month = aug,
number = "2",
pages = "531--44",
title = "{Testing the robustness of the likelihood-ratio test in a variance-component quantitative-trait loci-mapping procedure.}",
url = "http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=1377951&tool=pmcentrez&rendertype=abstract",
volume = "65",
year = "1999"
}
@article{Balding2006,
author = "Balding, David J.",
doi = "10.1038/nrg1916",
file = ":home/stefan/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Balding - 2006 - A tutorial on statistical methods for population association studies.pdf:pdf",
issn = "1471-0056",
journal = "Nat Rev Genet",
keywords = "GWAS; Microarray; Peters intro refs",
mendeley-tags = "GWAS,Microarray,Peters intro refs",
month = oct,
number = "10",
pages = "781--791",
title = "{A tutorial on statistical methods for population association studies}",
url = "http://dx.doi.org/10.1038/nrg1916",
volume = "7",
year = "2006"
}
@article{Ballantyne2012,
author = "Ballantyne, A. P. and Alden, C. B. and Miller, J. B. and Tans, P. P. and White, J. W. C.",
issn = "0028-0836",
journal = "Nature",
month = aug,
number = "7409",
pages = "70--72",
publisher = "Nature Publishing Group, a division of Macmillan Publishers Limited. All Rights Reserved.",
shorttitle = "Nature",
title = "{Increase in observed net carbon dioxide uptake by land and oceans during the past 50 years}",
url = "http://dx.doi.org/10.1038/nature11299",
volume = "488",
year = "2012"
}
@article{Bar2007,
abstract = {Our objective was to estimate the milk losses associated with multiple occurrences of generic bovine clinical mastitis (CM) within and across lactations. We studied 10,380 lactations from 5 large, high-producing dairy herds that used automatic recording of daily milk yields. Mixed models, with a random herd effect and an autoregressive covariance structure to account for repeated measurements, were used to quantify the effect of CM and other control variables (parity, week of lactation, other diseases) on milk yield. Many cows that developed CM were higher producers than their non-mastitic herdmates before CM occurred. Milk yield began to drop after diagnosis; the greatest loss occurred in the first weeks (up to 126 kg) and then gradually tapered to a constant value approximately 2 mo after CM. Mastitic cows often never recovered their potential yield. First-lactation cows lost 164 kg of milk for the first episode and 198 kg for the second in the 2 mo after CM diagnosis, compared with their potential yield. Among older cows, this estimate was 253 kg for the first, 238 kg for the second, and 216 kg for the third CM case. A cow that had 1 or more CM episodes in her previous lactation produced 1.2 kg/d less milk over the whole current lactation (95\% confidence interval: 0.6, 1.7) than a cow without CM in her previous lactation. These findings provide dairy producers with information on the average milk loss associated with CM cases without considering the causative agent, and can be used for economic analysis.},
author = {Bar, D and Gr\"{o}hn, Y T and Bennett, G and Gonz\'{a}lez, R N and Hertl, J A and Schulte, H F and Tauer, L W and Welcome, F L and Schukken, Y H},
doi = {10.3168/jds.2007-0145},
file = {:C$\backslash$:/Users/STME/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/Bar et al. - 2007 - Effect of repeated episodes of generic clinical mastitis on milk yield in dairy cows.pdf:pdf},
issn = {1525-3198},
journal = {J. Dairy Sci.},
keywords = {Animals,Bacteria,Bacteria: isolation \& purification,Bo's reference.,Bovine,Bovine: microbiology,Bovine: physiopathology,Cattle,Cattle Diseases,Cattle Diseases: microbiology,Cattle Diseases: physiopathology,Dairying,Female,Lactation,Lactation: physiology,Mastitis,Milk,Milk: secretion,Time Factors},
mendeley-tags = {Bo's reference.},
month = oct,
number = {10},
pages = {4643--53},
pmid = {17881685},
publisher = {Elsevier},
title = {{Effect of repeated episodes of generic clinical mastitis on milk yield in dairy cows.}},
url = {http://www.journalofdairyscience.org/article/S0022-0302(07)71928-8/abstract},
volume = {90},
year = {2007}
}
@article{Browning2009,
abstract = "We present methods for imputing data for ungenotyped markers and for inferring haplotype phase in large data sets of unrelated individuals and parent-offspring trios. Our methods make use of known haplotype phase when it is available, and our methods are computationally efficient so that the full information in large reference panels with thousands of individuals is utilized. We demonstrate that substantial gains in imputation accuracy accrue with increasingly large reference panel sizes, particularly when imputing low-frequency variants, and that unphased reference panels can provide highly accurate genotype imputation. We place our methodology in a unified framework that enables the simultaneous use of unphased and phased data from trios and unrelated individuals in a single analysis. For unrelated individuals, our imputation methods produce well-calibrated posterior genotype probabilities and highly accurate allele-frequency estimates. For trios, our haplotype-inference method is four orders
of magnitude faster than the gold-standard PHASE program and has excellent accuracy. Our methods enable genotype imputation to be performed with unphased trio or unrelated reference panels, thus accounting for haplotype-phase uncertainty in the reference panel. We present a useful measure of imputation accuracy, allelic R(2), and show that this measure can be estimated accurately from posterior genotype probabilities. Our methods are implemented in version 3.0 of the BEAGLE software package.",
author = "Browning, Brian L and Browning, Sharon R",
doi = "10.1016/j.ajhg.2009.01.005",
file = ":home/stefan/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Browning, Browning - 2009 - A unified approach to genotype imputation and haplotype-phase inference for large data sets of trios and unrelated individuals.pdf:pdf",
issn = "1537-6605",
journal = "American journal of human genetics",
keywords = "Computer Simulation; Female; Gene Frequency; Gene Frequency: genetics; Genotype; Haplotypes; Haplotypes: genetics; Humans; Male; Markov Chains; Models; Genetic; Nuclear Family; Reproducibility of Results",
month = feb,
number = "2",
pages = "210--23",
pmid = "19200528",
title = "{A unified approach to genotype imputation and haplotype-phase inference for large data sets of trios and unrelated individuals.}",
url = "http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=2668004&tool=pmcentrez&rendertype=abstract",
volume = "84",
year = "2009"
}
@article{Buitenhuis2011,
abstract = "Bovine mastitis is one of the most costly and prevalent diseases affecting dairy cows worldwide. In order to develop new strategies to prevent Escherichia coli-induced mastitis, a detailed understanding of the molecular mechanisms underlying the host immune response to an E. coli infection is necessary. To this end, we performed a global gene-expression analysis of mammary gland tissue collected from dairy cows that had been exposed to a controlled E. coli infection. Biopsy samples of healthy and infected utter tissue were collected at T = 24 h post-infection (p.i.) and at T = 192 h p.i. to represent the acute phase response (APR) and chronic stage, respectively. Differentially expressed (DE) genes for each stage were analyzed and the DE genes detected at T = 24 h were also compared to data collected from two previous E. coli mastitis studies that were carried out on post mortem tissue.",
author = "Buitenhuis, Bart and {R{\o}ntved}, Christine M. and Edwards, Stefan McKinnon and Ingvartsen, Klaus L. and {S{\o}rensen}, Peter",
doi = "10.1186/1471-2164-12-130",
file = "::",
issn = "1471-2164",
journal = "BMC Genomics",
keywords = "Animal; Animal: metabolism; Animal: microbiology; Animals; Bovine; Bovine: genetics; Bovine: immunology; Bovine: microbiology; Cattle; Escherichia coli; Escherichia coli Infections; Escherichia coli Infections: genetics; Escherichia coli Infections: immunology; Escherichia coli Infections: veterinary; Female; Gene Expression Profiling; Lipid Metabolism; Lipid Metabolism: genetics; Mammary Glands; Mastitis; Milk; Milk: microbiology; Oligonucleotide Array Sequence Analysis",
month = jan,
number = "1",
pages = "130",
pmid = "21352611",
title = "{In depth analysis of genes and pathways of the mammary gland involved in the pathogenesis of bovine \textit{Escherichia coli}-mastitis.}",
url = "http://www.biomedcentral.com/1471-2164/12/130",
volume = "12",
year = "2011"
}
@misc{orgBtdb2011,
author = "Carlson, Marc and Falcon, Seth and Pages, Herve and Li, Nianhua",
title = "{org.Bt.eg.db: Genome wide annotation for Bovine. R package version 2.5.0.}",
year = "2011"
}
@article{CorbeilSearle1976,
author = "Corbeil, RR",
journal = "Technometrics",
keywords = "Mixed Model; Restricted Maximum Likelihood; Variance Components; W-transformation",
month = feb,
number = "1",
pages = "31--38",
series = "{Technometrics}",
title = "{Restricted maximum likelihood (REML) estimation of variance components in the mixed model}",
url = "http://www.jstor.org/stable/1267913; http://www.jstor.org/stable/10.2307/1267913",
volume = "18",
year = "1976"
}
@misc{AnnotationFuncs2011,
author = "Edwards, Stefan McKinnon",
month = feb,
title = "{Annotation translation functions for Bioconductors annotation packages}",
url = "http://www.iysik.com/index.php?page=annotation-functions",
year = "2011"
}
@misc{txtPhenomeWWW,
author = "Edwards, Stefan McKinnon and Jiang, Li",
title = "{txtPhenome - using textual descriptions as phenotypes}",
url = "https://djfextranet.agrsci.dk/sites/txtphenome/public/Pages/front.aspx",
urldate = "01-11-2011",
year = "2011"
}
@article{Fridley2011,
abstract = "The last decade of human genetic research witnessed the completion of hundreds of genome-wide association studies (GWASs). However, the genetic variants discovered through these efforts account for only a small proportion of the heritability of complex traits. One explanation for the missing heritability is that the common analysis approach, assessing the effect of each single-nucleotide polymorphism (SNP) individually, is not well suited to the detection of small effects of multiple SNPs. Gene set analysis (GSA) is one of several approaches that may contribute to the discovery of additional genetic risk factors for complex traits. Complex phenotypes are thought to be controlled by networks of interacting biochemical and physiological pathways influenced by the products of sets of genes. By assessing the overall evidence of association of a phenotype with all measured variation in a set of genes, GSA may identify functionally relevant sets of genes corresponding to relevant biomolecular pathways,
which will enable more focused studies of genetic risk factors. This approach may thus contribute to the discovery of genetic variants responsible for some of the missing heritability. With the increased use of these approaches for the secondary analysis of data from GWAS, it is important to understand the different GSA methods and their strengths and weaknesses, and consider challenges inherent in these types of analyses. This paper provides an overview of GSA, highlighting the key challenges, potential solutions, and directions for ongoing research.",
author = "Fridley, Brooke L and Biernacka, Joanna M",
doi = "10.1038/ejhg.2011.57",
file = ":home/stefan/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Fridley, Biernacka - 2011 - Gene set analysis of SNP data benefits, challenges, and future directions.pdf:pdf",
issn = "1476-5438",
journal = "Eur. J. Hum. Genet.",
keywords = "Disease; Disease: genetics; Genetic Predisposition to Disease; Genome-Wide Association Study; Genome-Wide Association Study: methods; Humans; Linkage Disequilibrium; Metabolic Networks and Pathways; Models; Polymorphism; Recommended by Peter; Single Nucleotide; Statistical",
mendeley-tags = "Recommended by Peter",
month = aug,
number = "8",
pages = "837--43",
pmid = "21487444",
title = "{Gene set analysis of SNP data: benefits, challenges, and future directions.}",
url = "http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=3172936&tool=pmcentrez&rendertype=abstract",
volume = "19",
year = "2011"
}
@article{biobase2004,
abstract = "The Bioconductor project is an initiative for the collaborative creation of extensible software for computational biology and bioinformatics. The goals of the project include: fostering collaborative development and widespread use of innovative software, reducing barriers to entry into interdisciplinary scientific research, and promoting the achievement of remote reproducibility of research results. We describe details of our aims and methods, identify current challenges, compare Bioconductor to other open bioinformatics projects, and provide working examples",
author = "Gentleman, Robert and Carey, Vincent and Bates, Douglas and Bolstad, Ben and Dettling, Marcel and Dudoit, Sandrine and Ellis, Byron and Gautier, Laurent and Ge, Yongchao and Gentry, Jeff and Hornik, Kurt and Hothorn, Torsten and Huber, Wolfgang and Iacus, Stefano and Irizarry, Rafael and Leisch, Friedrich and Li, Cheng and Maechler, Martin and Rossini, Anthony and Sawitzki, Gunther and Smith, Colin and Smyth, Gordon K. and Tierney, Luke and Yang, Jean and Zhang, Jianhua",
doi = "10.1186/gb-2004-5-10-r80",
issn = "1465-6906",
journal = "Genome Biol.",
number = "10",
pages = "R80",
title = "{Bioconductor: open software development for computational biology and bioinformatics}",
url = "http://genomebiology.com/2004/5/10/R80",
volume = "5",
year = "2004"
}
@article{Gilmour1995,
abstract = "A strategy of using an average information matrix is shown to be computationally convenient and efficient for estimating variance components by restricted maximum likelihood (REML) in the mixed linear model. Three applications are described. The motivation for the algorithm was the estimation of variance components in the analysis of wheat variety means from 1,071 experiments representing 10 years and 60 locations in New South Wales. We also apply the algorithm to the analysis of designed experiments by incomplete block analysis and spatial analysis of field experiments.",
author = "Gilmour, Arthur R and Thompson, Robin and Cullis, Brian R",
file = ":home/stefan/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Gilmour, Thompson, Cullis - 1995 - Average Information REML An Efficient Algorithm for Variance Parameter Estimation in Linear Mixed Models.pdf:pdf",
issn = "0006341X",
journal = "Biometrics",
number = "4",
pages = "1440--1450",
publisher = "International Biometric Society",
title = "{Average Information REML: An Efficient Algorithm for Variance Parameter Estimation in Linear Mixed Models}",
url = "http://www.jstor.org/stable/2533274",
volume = "51",
year = "1995"
}
@article{Goddard2008,
author = "Goddard, Mike",
doi = "10.1007/s10709-008-9308-0",
issn = "0016-6707",
journal = "Genetica",
keywords = "NOVA course",
mendeley-tags = "NOVA course",
month = aug,
number = "2",
pages = "245--257",
shorttitle = "Genomic selection",
title = "{Genomic selection: prediction of accuracy and maximisation of long term response}",
url = "http://www.springerlink.com/index/10.1007/s10709-008-9308-0",
volume = "136",
year = "2008"
}
@article{Goeman2004,
abstract = "Motivation: This paper presents a global test to be used for the analysis of microarray data. Using this test it can be determined whether the global expression pattern of a group of genes is significantly related to some clinical outcome of interest. Groups of genes may be any size from a single gene to all genes on the chip (e.g. known pathways, specific areas of the genome or clusters from a cluster analysis). Result: The test allows groups of genes of different size to be compared, because the test gives one p-value for the group, not a p-value for each gene. Researchers can use the test to investigate hypotheses based on theory or past research or to mine gene ontology databases for interesting pathways. Multiple testing problems do not occur unless many groups are tested. Special attention is given to visualizations of the test result, focussing on the associations between samples and showing the impact of individual genes on the test result.",
author = "Goeman, J. J. and van de Geer, S. a. and de Kort, F. and van Houwelingen, H. C.",
doi = "10.1093/bioinformatics/btg382",
file = ":home/stefan/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Goeman et al. - 2003 - A global test for groups of genes testing association with a clinical outcome.pdf:pdf",
issn = "1367-4803",
journal = "Bioinformatics",
month = dec,
number = "1",
pages = "93--99",
title = "{A global test for groups of genes: testing association with a clinical outcome}",
url = "http://bioinformatics.oxfordjournals.org/cgi/doi/10.1093/bioinformatics/btg382",
volume = "20",
year = "2004"
}
@article{Goeman2006,
author = {Goeman, Jelle J. and van de Geer, Sara a. and van Houwelingen, Hans C.},
doi = {10.1111/j.1467-9868.2006.00551.x},
file = {:C$\backslash$:/Users/STME/Documents/Mendeley Desktop/Journal of the Royal Statistical Society Series B (Statistical Methodology)/Goeman, van de Geer, van Houwelingen - 2006 - Testing against a high dimensional alternative.pdf:pdf},
issn = {1369-7412},
journal = {J. R. Stat. Soc. Ser. B (Statistical Methodol.},
keywords = {Gene set test,empirical bayes modelling,f -test,high dimensional data,hypothesis testing,locally most powerful test,power,score test},
mendeley-tags = {Gene set test},
month = jun,
number = {3},
pages = {477--493},
title = {{Testing against a high dimensional alternative}},
url = {http://doi.wiley.com/10.1111/j.1467-9868.2006.00551.x},
volume = {68},
year = {2006}
}
@article{Grisart2004,
abstract = "We recently used a positional cloning approach to identify a nonconservative lysine to alanine substitution (K232A) in the bovine DGAT1 gene that was proposed to be the causative quantitative trait nucleotide underlying a quantitative trait locus (QTL) affecting milk fat composition, previously mapped to the centromeric end of bovine chromosome 14. We herein generate genetic and functional data that confirm the causality of the DGAT1 K232A mutation. We have constructed a high-density single-nucleotide polymorphism map of the 3.8-centimorgan BULGE30-BULGE9 interval containing the QTL and show that the association with milk fat percentage maximizes at the DGAT1 gene. We provide evidence that the K allele has undergone a selective sweep. By using a baculovirus expression system, we have expressed both DGAT1 alleles in Sf9 cells and show that the K allele, causing an increase in milk fat percentage in the live animal, is characterized by a higher Vmax in producing triglycerides than the A allele.",
author = "Grisart, Bernard and Farnir, Fr{\'e}d{\'e}ric and Karim, Latifa and Cambisano, Nadine and Kim, Jong-Joo and Kvasz, Alex and Mni, Myriam and Simon, Patricia and Fr{\`e}re, Jean-Marie and Coppieters, Wouter and Georges, Michel",
doi = "10.1073/pnas.0308518100",
file = ":home/stefan/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Grisart et al. - 2004 - Genetic and functional confirmation of the causality of the DGAT1 K232A quantitative trait nucleotide in affecting milk yield and composition(2).pdf:pdf",
issn = "0027-8424",
journal = "P. Natl. Acad. Sci. USA",
keywords = "Acyltransferases; Acyltransferases: genetics; Acyltransferases: metabolism; Amino Acid Substitution; Animal; Animals; Base Sequence; Cattle; Cattle: genetics; Cell Line; Chromosome Mapping; DGAT1; DNA Primers; Diacylglycerol O-Acyltransferase; Female; Genetic Markers; Linkage Disequilibrium; Male; Mammary Glands; Milk; Milk: secretion; Mutagenesis; Quantitative Trait Loci; Recombinant Proteins; Recombinant Proteins: metabolism; Reverse Transcriptase Polymerase Chain Reaction; Site-Directed; Spodoptera",
mendeley-tags = "DGAT1",
month = feb,
number = "8",
pages = "2398--403",
pmid = "14983021",
title = "{Genetic and functional confirmation of the causality of the DGAT1 K232A quantitative trait nucleotide in affecting milk yield and composition}",
url = "http://www.pnas.org/cgi/content/abstract/101/8/2398",
volume = "101",
year = {2004}
}
@article{Jensen1997,
abstract = "An algorithm for the REML estimation of (co) variance components in general multivariate mixed linear models is described. The algorithm is based on the use of Average Information (AI) as second differentials of the likelihood function. The AI is obtained by averaging the information matrices based on observed and expected information. It is manipulated to a form that is much easier to calculate than either of the two. This involves the setting up of dummy variables as functions of residuals and calculating sums of squares and cross-products associated with these. Procedures that are based on second differentials can lead to estimates outside the parameter space. By contrast, the EM-algorithm always ensures that estimates are in the parameter space. An alternative fonnulation of the EM-algorithm allows the possibility of constructing algorithms that are intermediate between AI and EM and can ensure estimates within the parameter space without the problem of slow convergence of the EM algorithm.
The new algorithm was compared to derivative-free (DF) and EM algorithms by analysing two sets of field data under several models. The AI algorithm converged in much fewer rounds than the other algorithms and was in general able to locate a higher maximum of the likelihood function.",
author = "Jensen, Just and Mantysaari, Esa A. and Madsen, Per and Thompson, Robin",
journal = "J. Indian Soc. Agr. Stat.",
pages = "215--236",
title = "{Residual Maximum likelihood Estimation of (Co) Variance Components in Multivariate Mixed Linear Models Using Average Information}",
url = "http://isas.org.in/jisas/jsp/abstract.jsp?title=Residual",
volume = "49",
year = "1997"
}
@article{Jiang2008,
abstract = "Liver plays a profound role in the acute phase response (APR) observed in the early phase of acute bovine mastitis caused by Escherichia coli (E. coli). To gain an insight into the genes and pathways involved in hepatic APR of dairy cows we performed a global gene expression analysis of liver tissue sampled at different time points before and after intra-mammary (IM) exposure to E. coli lipopolysaccharide (LPS) treatment.",
author = "Jiang, Li and {S{\o}rensen}, Peter and {R{\o}ntved}, Christine and Vels, Lotte and Ingvartsen, Klaus L",
doi = "10.1186/1471-2164-9-443",
file = ":home/stefan/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Jiang et al. - 2008 - Gene expression profiling of liver from dairy cows treated intra-mammary with lipopolysaccharide.pdf:pdf",
issn = "1471-2164",
journal = "BMC genomics",
keywords = "Acute-Phase Proteins; Acute-Phase Proteins: genetics; Acute-Phase Proteins: metabolism; Acute-Phase Reaction; Acute-Phase Reaction: genetics; Acute-Phase Reaction: veterinary; Animals; Cattle; Dairying; Escherichia coli Infections; Escherichia coli Infections: genetics; Escherichia coli Infections: veterinary; Female; Gene Expression Profiling; Lipopolysaccharides; Lipopolysaccharides: metabolism; Lipopolysaccharides: pharmacology; Liver; Liver: metabolism; Mammary Glands; Animal; Mammary Glands; Animal: immunology; Mammary Glands; Animal: metabolism; Mastitis; Bovine; Mastitis; Bovine: genetics; Mastitis; Bovine: metabolism; txtPhenome",
mendeley-tags = "txtPhenome",
month = jan,
number = "1",
pages = "443",
pmid = "18816405",
title = "{Gene expression profiling of liver from dairy cows treated intra-mammary with lipopolysaccharide.}",
url = "http://www.biomedcentral.com/1471-2164/9/443",
volume = "9",
year = "2008"
}
@article{Jiang2012,
abstract = "Identifying causal genes that underlie complex traits such as susceptibility to disease is a primary aim of genetic and biomedical studies. Genetic mapping of quantitative trait loci (QTL) and gene expression profiling based on high-throughput technologies are common first approaches toward identifying associations between genes and traits; however, it is often difficult to assess whether the biological function of a putative candidate gene is consistent with a particular phenotype. Here, we have implemented a network-based disease gene prioritization approach for ranking genes associated with quantitative traits and diseases in livestock species. The approach uses ortholog mapping and integrates information on disease or trait phenotypes, gene-associated phenotypes, and protein-protein interactions. It was used for ranking all known genes present in the cattle genome for their potential roles in bovine mastitis. Gene-associated phenome profile and transcriptome profile in response to
Escherichia coli infection in the mammary gland were integrated to make a global inference of bovine genes involved in mastitis. The top ranked genes were highly enriched for pathways and biological processes underlying inflammation and immune responses, which supports the validity of our approach for identifying genes that are relevant to animal health and disease. These gene-associated phenotypes were used for a local prioritization of candidate genes located in a QTL affecting the susceptibility to mastitis. Our study provides a general framework for prioritizing genes associated with various complex traits in different species. To our knowledge this is the first time that gene expression, ortholog mapping, protein interactions, and biomedical text data have been integrated systematically for ranking candidate genes in any livestock species.",
author = "Jiang, Li and {S{\o}rensen}, Peter and Thomsen, Bo and Edwards, Stefan McKinnon and Skarman, Axel and {R{\o}ntved}, Christine M. and Lund, Mogens Sand{\o} and Workman, Christopher T.",
doi = "10.1152/physiolgenomics.00047.2011",
file = "::",
issn = "1531-2267",
journal = "Physiological genomics",
month = mar,
number = "5",
pages = "305--17",
pmid = "22234994",
title = "{Gene prioritization for livestock diseases by data integration.}",
url = "http://physiolgenomics.physiology.org/cgi/content/abstract/44/5/305",
volume = "44",
year = "2012"
}
@article{Kanamori2004,
abstract = "Here we describe the development of a genome-wide and nonredundant mouse transcription factor database and its viewer (http://genome.gsc.riken.gp/TFdb/). We systematically selected transcription factors with DNA-binding properties and their regulators on the basis of their LocusLink and Gene Ontology annotations. We also incorporated into our database information regarding the corresponding available cDNA clones and their structural properties. Because of these features, our database is unique and may provide useful information for systematic genome-wide studies of transcriptional regulation.",
author = "Kanamori, Mutsumi and Konno, Hideaki and Osato, Naoki and Kawai, Jun and Hayashizaki, Yoshihide and Suzuki, Harukazu",
doi = "10.1016/j.bbrc.2004.07.179",
issn = "0006-291X",
journal = "Biochemical and biophysical research communications",
keywords = "Animals; DNA-Binding Proteins; DNA-Binding Proteins: genetics; Databases; Nucleic Acid; Gene Expression Regulation; Gene Expression Regulation: genetics; Mice; Mice: genetics; TF; Transcription Factors; Transcription Factors: genetics; Transcription; Genetic; Transcription; Genetic: genetics",
mendeley-tags = "TF",
month = sep,
number = "3",
pages = "787--93",
pmid = "15336533",
title = "{A genome-wide and nonredundant mouse transcription factor database.}",
url = "http://dx.doi.org/10.1016/j.bbrc.2004.07.179",
volume = "322",
year = "2004"
}
@article{Kanehisa1997,
author = "Kanehisa, M",
doi = "10.1016/S0168-9525(97)01223-7",
file = ":home/stefan/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Kanehisa - 1997 - A database for post-genome analysis.pdf:pdf",
journal = "Trends in Genetics",
keywords = "KEGG",
mendeley-tags = "KEGG",
number = "9",
pages = "375--376",
title = "{A database for post-genome analysis}",
volume = "13",
year = "1997"
}
@article{Kanehisa1996,
author = "Kanehisa, M",
file = ":home/stefan/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Kanehisa - 1996 - Toward pathway engineering a new database of genetic and molecular pathways.pdf:pdf",
journal = "Science \& Technology Japan",
keywords = "KEGG",
mendeley-tags = "KEGG",
pages = "34--38",
title = "{Toward pathway engineering: a new database of genetic and molecular pathways}",
url = "http://www.genome.jp/kegg/docs/stj.pdf",
volume = "59",
year = "1996"
}
@article{Kanehisa2008,
abstract = "KEGG (http://www.genome.jp/kegg/) is a database of biological systems that integrates genomic, chemical and systemic functional information. KEGG provides a reference knowledge base for linking genomes to life through the process of PATHWAY mapping, which is to map, for example, a genomic or transcriptomic content of genes to KEGG reference pathways to infer systemic behaviors of the cell or the organism. In addition, KEGG provides a reference knowledge base for linking genomes to the environment, such as for the analysis of drug-target relationships, through the process of BRITE mapping. KEGG BRITE is an ontology database representing functional hierarchies of various biological objects, including molecules, cells, organisms, diseases and drugs, as well as relationships among them. KEGG PATHWAY is now supplemented with a new global map of metabolic pathways, which is essentially a combined map of about 120 existing pathway maps. In addition, smaller pathway modules are defined and stored in
KEGG MODULE that also contains other functional units and complexes. The KEGG resource is being expanded to suit the needs for practical applications. KEGG DRUG contains all approved drugs in the US and Japan, and KEGG DISEASE is a new database linking disease genes, pathways, drugs and diagnostic markers.",
author = "Kanehisa, Minoru and Araki, Michihiro and Goto, Susumu and Hattori, Masahiro and Hirakawa, Mika and Itoh, Masumi and Katayama, Toshiaki and Kawashima, Shuichi and Okuda, Shujiro and Tokimatsu, Toshiaki and Yamanishi, Yoshihiro",
doi = "10.1093/nar",
issn = "1362-4962",
journal = "Nucleic Acids Res.",
keywords = "Databases- Factual; Disease; Genomics; Humans; Internet; Metabolic Networks and Pathways; Molecular Structure; Pharmaceutical Preparations; Systems Biology; Systems Integration; User-Computer Interface",
mendeley-tags = "Databases- Factual,Disease,Genomics,Humans,Internet,Metabolic Networks and Pathways,Molecular Structure,Pharmaceutical Preparations,Systems Biology,Systems Integration,User-Computer Interface",
month = jan,
number = "Database issue",
pages = "D480--484",
title = "{KEGG for linking genomes to life and the environment}",
url = "http://www.ncbi.nlm.nih.gov/pubmed/18077471",
volume = "36",
year = "2008"
}
@article{Kanehisa2000,
abstract = "KEGG (Kyoto Encyclopedia of Genes and Genomes) is a knowledge base for systematic analysis of gene functions, linking genomic information with higher order functional information. The genomic information is stored in the GENES database, which is a collection of gene catalogs for all the completely sequenced genomes and some partial genomes with up-to-date annotation of gene functions. The higher order functional information is stored in the PATHWAY database, which contains graphical representations of cellular processes, such as metabolism, membrane transport, signal transduction and cell cycle. The PATHWAY database is supplemented by a set of ortholog group tables for the information about conserved subpathways (pathway motifs), which are often encoded by positionally coupled genes on the chromosome and which are especially useful in predicting gene functions. A third database in KEGG is LIGAND for the information about chemical compounds, enzyme molecules and enzymatic reactions. KEGG provides
Java graphics tools for browsing genome maps, comparing two genome maps and manipulating expression maps, as well as computational tools for sequence comparison, graph comparison and path computation. The KEGG databases are daily updated and made freely available (http://www.genome.ad.jp/kegg/ )",
author = "Kanehisa, Minoru and Goto, Susumu",
doi = "10.1093/nar",
journal = "Nucleic Acids Research",
keywords = "KEGG",
mendeley-tags = "KEGG",
number = "1",
pages = "27--30",
title = "{KEGG: Kyoto Encyclopedia of Genes and Genomes}",
url = "http://nar.oxfordjournals.org/content/28/1/27.abstract",
volume = "28",
year = "2000"
}
@article{Kanehisa2006,
abstract = "The increasing amount of genomic and molecular information is the basis for understanding higher-order biological systems, such as the cell and the organism, and their interactions with the environment, as well as for medical, industrial and other practical applications. The KEGG resource (http://www.genome.jp/kegg/) provides a reference knowledge base for linking genomes to biological systems, categorized as building blocks in the genomic space (KEGG GENES) and the chemical space (KEGG LIGAND), and wiring diagrams of interaction networks and reaction networks (KEGG PATHWAY). A fourth component, KEGG BRITE, has been formally added to the KEGG suite of databases. This reflects our attempt to computerize functional interpretations as part of the pathway reconstruction process based on the hierarchically structured knowledge about the genomic, chemical and network spaces. In accordance with the new chemical genomics initiatives, the scope of KEGG LIGAND has been significantly expanded to cover both
endogenous and exogenous molecules. Specifically, RPAIR contains curated chemical structure transformation patterns extracted from known enzymatic reactions, which would enable analysis of genome-environment interactions, such as the prediction of new reactions and new enzyme genes that would degrade new environmental compounds. Additionally, drug information is now stored separately and linked to new KEGG DRUG structure maps",
author = "Kanehisa, Minoru and Goto, Susumu and Hattori, Masahiro and Aoki-Kinoshita, Kiyoko F. and Itoh, Masumi and Kawashima, Shuichi and Katayama, Toshiaki and Araki, Michihiro and Hirakawa, Mika",
doi = "10.1093/nar",
issn = "1362-4962",
journal = "Nucleic Acids Research",
keywords = "Biotransformation; Chemical Phenomena; Chemistry; Databases- Factual; Databases- Genetic; Environment; Enzymes; Genomics; Humans; Internet; KEGG; Ligands; Pharmaceutical Preparations; Signal Transduction; Systems Integration; User-Computer Interface",
mendeley-tags = "Biotransformation,Chemical Phenomena,Chemistry,Databases- Factual,Databases- Genetic,Environment,Enzymes,Genomics,Humans,Internet,KEGG,Ligands,Pharmaceutical Preparations,Signal Transduction,Systems Integration,User-Computer Interface",
month = jan,
number = "Database issue",
pages = "D354--D357",
shorttitle = "From genomics to chemical genomics",
title = "{From genomics to chemical genomics: new developments in KEGG}",
url = "http://nar.oxfordjournals.org/content/34/suppl\_1/D354.abstract; http://www.ncbi.nlm.nih.gov/pubmed/16381885",
volume = "34",
year = "2006"
}
@article{Kanehisa2002,
abstract = "The Kyoto Encyclopedia of Genes and Genomes (KEGG) is the primary database resource of the Japanese GenomeNet service (http://www.genome.ad.jp/) for understanding higher order functional meanings and utilities of the cell or the organism from its genome information. KEGG consists of the PATHWAY database for the computerized knowledge on molecular interaction networks such as pathways and complexes, the GENES database for the information about genes and proteins generated by genome sequencing projects, and the LIGAND database for the information about chemical compounds and chemical reactions that are relevant to cellular processes. In addition to these three main databases, limited amounts of experimental data for microarray gene expression profiles and yeast two-hybrid systems are stored in the EXPRESSION and BRITE databases, respectively. Furthermore, a new database, named SSDB, is available for exploring the universe of all protein coding genes in the complete genomes and for identifying
functional links and ortholog groups. The data objects in the KEGG databases are all represented as graphs and various computational methods are developed to detect graph features that can be related to biological functions. For example, the correlated clusters are graph similarities which can be used to predict a set of genes coding for a pathway or a complex, as summarized in the ortholog group tables, and the cliques in the SSDB graph are used to annotate genes. The KEGG databases are updated daily and made freely available (http://www.genome.ad.jp/kegg/).",
author = "Kanehisa, Minoru and Goto, Susumu and Kawashima, Shuichi and Nakaya, Akihiro",
issn = "1362-4962",
journal = "Nucleic Acids Research",
keywords = "Animals; Computational Biology; Computer Graphics; Databases- Genetic; Databases- Protein; Gene Expression Profiling; Genome; Humans; Information Storage and Retrieval; Internet; Japan; Macromolecular Substances; Metabolism; Multigene Family; Protein Conformation; Proteins; Sequence Homology",
month = jan,
number = "1",
pages = "42--46",
title = "{The KEGG databases at GenomeNet}",
url = "http://www.ncbi.nlm.nih.gov/pubmed/11752249",
volume = "30",
year = "2002"
}
@article{Kanehisa2012,
abstract = "Kyoto Encyclopedia of Genes and Genomes (KEGG, http://www.genome.jp/kegg/ or http://www.kegg.jp/) is a database resource that integrates genomic, chemical and systemic functional information. In particular, gene catalogs from completely sequenced genomes are linked to higher-level systemic functions of the cell, the organism and the ecosystem. Major efforts have been undertaken to manually create a knowledge base for such systemic functions by capturing and organizing experimental knowledge in computable forms; namely, in the forms of KEGG pathway maps, BRITE functional hierarchies and KEGG modules. Continuous efforts have also been made to develop and improve the cross-species annotation procedure for linking genomes to the molecular networks through the KEGG Orthology system. Here we report KEGG Mapper, a collection of tools for KEGG PATHWAY, BRITE and MODULE mapping, enabling integration and interpretation of large-scale data sets. We also report a variant of the KEGG mapping procedure to
extend the knowledge base, where different types of data and knowledge, such as disease genes and drug targets, are integrated as part of the KEGG molecular networks. Finally, we describe recent enhancements to the KEGG content, especially the incorporation of disease and drug information used in practice and in society, to support translational bioinformatics.",
author = "Kanehisa, Minoru and Goto, Susumu and Sato, Yoko and Furumichi, Miho and Tanabe, Mao",
doi = "10.1093/nar",
file = ":home/stefan/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Kanehisa et al. - 2012 - KEGG for integration and interpretation of large-scale molecular data sets.pdf:pdf",
issn = "1362-4962",
journal = "Nucleic Acids Research",
keywords = "Computational Biology; Databases; Disease; Factual; Genomics; Humans; KEGG; Knowledge Bases; Molecular Sequence Annotation; Pharmacological Phenomena; Software; Systems Integration",
mendeley-tags = "KEGG",
month = jan,
number = "Database issue",
pages = "D109--14",
pmid = "22080510",
title = "{KEGG for integration and interpretation of large-scale molecular data sets.}",
url = "http://nar.oxfordjournals.org/content/40/D1/D109.long",
volume = "40",
year = "2012"
}
@article{Kemper2012,
abstract = "Much of the heritability for human stature is caused by mutations of small-to-medium effect. This is because detrimental pleiotropy restricts large-effect mutations to very low frequency.",
author = "Kemper, Kathryn E. and Visscher, Peter M. and Goddard, Michael E.",
file = ":home/stefan/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Kemper, Visscher, Goddard - 2012 - Genetic architecture of body size in mammals.pdf:pdf",
journal = "Genome Biol.",
keywords = "Human height; Recommended by Peter; mutation-selection balance; mutations effects",
mendeley-tags = "Recommended by Peter",
number = "4",
pages = "244",
title = "{Genetic architecture of body size in mammals}",
url = "http://genomebiology.com/2012/13/4/244; http://w14.biomedcentral.com/content/pdf/gb-2012-13-4-244.pdf",
volume = "13",
year = "2012"
}
@misc{quantreg,
abstract = "R package version 4.81",
author = "Koenker, Roger",
keywords = "R-package",
mendeley-tags = "R-package",
title = "{quantreg: Quantile Regression}",
url = "http://cran.r-project.org/package=quantreg",
year = "2012"
}
@article{Koenker1978,
abstract = {A simple minimization problem yielding the ordinary sample quantiles in the location model is shown to generalize naturally to the linear model generating a new class of statistics we term "regression quantiles." The estimator which minimizes the sum of absolute residuals is an important special case. Some equivariance properties and the joint asymptotic distribution of regression quantiles are established. These results permit a natural generalization to the linear model of certain well-known robust estimators of location. Estimators are suggested, which have comparable efficiency to least squares for Gaussian linear models while substantially out-performing the least-squares estimator over a wide class of non-Gaussian error distributions.},
author = "Koenker, Roger and Basset, Gilbert",
file = ":home/stefan/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Koenker, Basset - 1978 - Regression Quantiles.pdf:pdf",
journal = "Econometrica",
keywords = "Regression Quantiles",
number = "1",
pages = "33--50",
title = "{Regression Quantiles}",
url = "http://www.jstor.org/stable/1913643",
volume = "46",
year = "1978"
}
@article{Koenker1994,
author = "Koenker, Roger and Ng, Pin and Portnoy, Stephen",
doi = "10.2307/2337070",
file = ":home/stefan/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Koenker, Ng, Portnoy - 1994 - Quantile Smoothing Splines.pdf:pdf",
issn = "00063444",
journal = "Biometrika",
keywords = "Regression Quantiles",
month = dec,
number = "4",
pages = "673",
title = "{Quantile Smoothing Splines}",
url = "http://www.jstor.org/stable/2337070?origin=crossref",
volume = "81",
year = "1994"
}
@article{Ku2010,
author = "Ku, Chee Seng and Loy, En Yun and Pawitan, Yudi and Chia, Kee Seng",
doi = "10.1038/jhg.2010.19",
issn = "1434-5161, 1435-232X",
journal = "Journal of Human Genetics",
month = mar,
pages = "195--206",
shorttitle = "The pursuit of genome-wide association studies",
title = "{The pursuit of genome-wide association studies: where are we now?}",
url = "http://www.nature.com/doifinder/10.1038/jhg.2010.19",
volume = "55",
year = "2010"
}
@article{Lee2006,
author = "Lee, Sang Hong and van der Werf, Julius H.J.",
doi = "10.1051/gse\AE2005025",
file = ":home/stefan/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Lee, Werf - 2006 - An efficient variance component approach implementing an average information REML suitable for combined LD and linkage mapping with a general complex pedigree.pdf:pdf",
journal = "Genet Sel Evol",
pages = "25--43",
title = "{An efficient variance component approach implementing an average information REML suitable for combined LD and linkage mapping with a general complex pedigree}",
volume = "38",
year = "2006"
}
@article{Lehne2009,
abstract = "Over the past few years, the number of known protein-protein interactions has increased substantially. To make this information more readily available, a number of publicly available databases have set out to collect and store protein-protein interaction data. Protein-protein interactions have been retrieved from six major databases, integrated and the results compared. The six databases (the Biological General Repository for Interaction Datasets [BioGRID], the Molecular INTeraction database [MINT], the Biomolecular Interaction Network Database [BIND], the Database of Interacting Proteins [DIP], the IntAct molecular interaction database [IntAct] and the Human Protein Reference Database [HPRD]) differ in scope and content; integration of all datasets is non-trivial owing to differences in data annotation. With respect to human protein-protein interaction data, HPRD seems to be the most comprehensive. To obtain a complete dataset, however, interactions from all six databases have to be combined.
To overcome this limitation, meta-databases such as the Agile Protein Interaction Database (APID) offer access to integrated protein-protein interaction datasets, although these also currently have certain restrictions.",
author = "Lehne, Benjamin and Schlitt, Thomas",
file = ":home/stefan/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Lehne, Schlitt - 2009 - Protein-protein interaction databases keeping up with growing interactomes.pdf:pdf",
issn = "1479-7364",
journal = "Human genomics",
keywords = "Animals; Databases; Humans; Protein; Protein Interaction Mapping; STRING; Software",
mendeley-tags = "STRING",
month = apr,
number = "3",
pages = "291--7",
pmid = "19403463",
title = "{Protein-protein interaction databases: keeping up with growing interactomes.}",
url = "http://www.ncbi.nlm.nih.gov/pubmed/19403463",
volume = "3",
year = "2009"
}
@inproceedings{LidauerStranden1999,
address = "Tuusula, Finland",
author = "Lidauer, M. and Strand{\'e}n, Ismo",
booktitle = "International workshop on high performance computing and new statistical methods in dairy cattle breeding",
pages = "20--25",
publisher = "INTERBULL Bulletin, No. 20",
title = "{Fast and flexible program for genetic evaluation in dairy cattle.}",
year = "1999"
}
@article{Lippert2011,
abstract = "We describe factored spectrally transformed linear mixed models (FaST-LMM), an algorithm for genome-wide association studies (GWAS) that scales linearly with cohort size in both run time and memory use. On Wellcome Trust data for 15,000 individuals, FaST-LMM ran an order of magnitude faster than current efficient algorithms. Our algorithm can analyze data for 120,000 individuals in just a few hours, whereas current algorithms fail on data for even 20,000 individuals (http://mscompbio.codeplex.com/).",
author = "Lippert, Christoph and Listgarten, Jennifer and Liu, Ying and Kadie, Carl M and Davidson, Robert I and Heckerman, David",
doi = "10.1038/nmeth.1681",
file = ":home/stefan/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Lippert et al. - 2011 - FaST linear mixed models for genome-wide association studies.pdf:pdf",
issn = "1548-7105",
journal = "Nature methods",
keywords = "Algorithms; Computer Simulation; Genetic; Genome-Wide Association Study; Models; Software",
month = jan,
number = "10",
pages = "833--5",
pmid = "21892150",
title = "{FaST linear mixed models for genome-wide association studies.}",
url = "http://www.ncbi.nlm.nih.gov/pubmed/21892150",
volume = "8",
year = "2011"
}
@article{Lund2011,
abstract = "Size of the reference population and reliability of phenotypes are crucial factors influencing the reliability of genomic predictions. It is therefore useful to combine closely related populations. Increased accuracies of genomic predictions depend on the number of individuals added to the reference population, the reliability of their phenotypes, and the relatedness of the populations that are combined.",
author = "Lund, Mogens Sand{\o} and de Roos, Adrianus P W and de Vries, Alfred G and Druet, Tom and Ducrocq, Vincent and Fritz, S{\'e}bastien and Guillaume, Fran\c{c}ois and Guldbrandtsen, Bernt and Liu, Zenting and Reents, Reinhard and Schrooten, Chris and Seefried, Franz and Su, Guosheng",
doi = "10.1186/1297-9686-43-43",
file = ":home/stefan/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Lund et al. - 2011 - A common reference population from four European Holstein populations increases reliability of genomic predictions.pdf:pdf",
issn = "1297-9686",
journal = "Genet. Sel. Evol.",
month = jan,
number = "1",
pages = "43",
pmid = "22152008",
title = "{A common reference population from four European Holstein populations increases reliability of genomic predictions.}",
url = "http://www.gsejournal.org/content/43/1/43",
volume = "43",
year = "2011"
}
@book{LynchWalsh1998,
address = "Sunderland, USA",
author = "Lynch, Michael and Walsh, Bruce",
isbn = "0-87893-481-2",
publisher = "Sinauer Associates, Inc.",
title = "{Genetics and Analysis of Quantitative Traits}",
year = "1998"
}
@misc{DMU5.1,
address = "Tjele, Denmark",
author = "Madsen, Per and Jensen, Just",
pages = "32",