-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathreferences-mendely.bib
2141 lines (2141 loc) · 161 KB
/
references-mendely.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
@article{Adelson1992,
author = {Adelson, E.H. and Wang, J.Y.a.},
doi = {10.1109/34.121783},
file = {:home/yani/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Adelson, Wang - 1992 - Single lens stereo with a plenoptic camera.pdf:pdf},
issn = {01628828},
journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence},
number = {2},
pages = {99--106},
title = {{Single lens stereo with a plenoptic camera}},
url = {http://ieeexplore.ieee.org/lpdocs/epic03/wrapper.htm?arnumber=121783},
volume = {14},
year = {1992}
}
@article{journals/neco/AmitG97,
author = {Amit, Yali and Geman, Donald},
journal = {Neural Computation},
keywords = {dblp},
number = {7},
pages = {1545--1588},
title = {{Shape Quantization And Recognition With Randomized Trees.}},
url = {http://dblp.uni-trier.de/db/journals/neco/neco9.html{\#}AmitG97},
volume = {9},
year = {1997}
}
@inproceedings{Ba2013dothey,
abstract = {Currently, deep neural networks are the state of the art on problems such as speech recognition and computer vision. In this extended abstract, we show that shal- low feed-forward networks can learn the complex functions previously learned by deep nets and achieve accuracies previously only achievable with deep models. Moreover, in some cases the shallow neural nets can learn these deep functions using a total number of parameters similar to the original deep model. We eval- uate our method on the TIMIT phoneme recognition task and are able to train shallow fully-connected nets that perform similarly to complex, well-engineered, deep convolutional architectures. Our success in training shallow neural nets to mimic deeper models suggests that there probably exist better algorithms for train- ing shallow feed-forward nets than those currently available.},
archivePrefix = {arXiv},
arxivId = {arXiv:1312.6184v5},
author = {Ba, Lj and Caurana, R},
booktitle = {arXiv preprint arXiv:1312.6184},
doi = {10.1038/nature14539},
eprint = {arXiv:1312.6184v5},
isbn = {3135786504},
issn = {0028-0836},
pages = {1--6},
pmid = {26017442},
title = {{Do Deep Nets Really Need to be Deep ?}},
url = {http://arxiv.org/abs/1312.6184},
volume = {2014},
year = {2013}
}
@inproceedings{conf/icml/2015,
editor = {Bach, Francis R and Blei, David M},
publisher = {JMLR.org},
series = {{\{}JMLR{\}} Workshop and Conference Proceedings},
title = {{Proceedings of the 32nd International Conference on Machine Learning, {\{}ICML{\}} 2015, Lille, France, 6-11 July 2015}},
url = {http://jmlr.org/proceedings/papers/v37/},
volume = {37},
year = {2015}
}
@article{Barron2012,
abstract = {We address the problem of recovering shape, albedo, and illumination from a single grayscale image of an object, using shading as our primary cue. Because this problem is fundamentally underconstrained, we construct statistical models of albedo and shape, and define an optimization problem that searches for the most likely explanation of a single image. We present two priors on albedo which en- courage local smoothness and global sparsity, and three priors on shape which encourage flatness, outward-facing orientation at the occluding contour, and local smoothness. We present an optimization technique for using these pri- ors to recover shape, albedo, and a spherical harmonic model of illumination. Our model, which we call SAIFS (shape, albedo, and illumination from shading) produces reasonable results on arbitrary grayscale images taken in the real world, and outperforms all previous grayscale in- trinsic image-style algorithms on the MIT Intrinsic Images dataset.},
author = {Barron, Jonathan T and Malik, Jitendra and Berkeley, U C},
doi = {10.1109/CVPR.2012.6247693},
isbn = {9781467312288},
issn = {10636919},
journal = {IEEE Conference on Computer Vision and Patern Recognition},
pages = {334--341},
publisher = {IEEE},
title = {{Shape , Albedo , and Illumination from a Single Image of an Unknown Object}},
url = {http://ieeexplore.ieee.org/articleDetails.jsp?arnumber=6247693{\&}contentType=Conference+Publications},
year = {2012}
}
@inproceedings{bastani2016measuring,
author = {Bastani, Osbert and Ioannou, Yani and Lampropoulos, Leonidas and Vytiniotis, Dimitrios and Nori, Aditya and Criminisi, Antonio},
booktitle = {Neural Information Processing Systems (NIPS), 2016},
file = {:home/yani/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Bastani et al. - 2016 - Measuring Neural Net Robustness with Constraints(2).pdf:pdf},
title = {{Measuring Neural Net Robustness with Constraints}},
year = {2016}
}
@misc{Beacco2003,
abstract = {The characterization of the photometric properties of a road surface is of prime importance in the design of lighting plant and when the real vision condition should be determined by computer simulation. The measurement could be done in laboratory but the in situ measurement are very interested because it permit to test several zone on the road and there is no mechanical starch on the surface of the sample. This work describes an innovative portable system based on a CCD luminance meter able to obtain uncertainty comparable in traditional laboratory systems.},
author = {Beacco, D and Fiorentin, P and Rossi, G},
booktitle = {Proceedings of the 20th IEEE Instrumentation Technology Conference Cat No03CH37412},
doi = {10.1109/IMTC.2003.1208001},
file = {:home/yani/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Beacco, Fiorentin, Rossi - 2003 - A system for in situ measurements of road reflection properties.pdf:pdf},
isbn = {0780377052},
issn = {10915281},
number = {May},
pages = {1508--1512},
publisher = {Ieee},
title = {{A system for in situ measurements of road reflection properties}},
url = {http://ieeexplore.ieee.org/lpdocs/epic03/wrapper.htm?arnumber=1208001},
volume = {2},
year = {2003}
}
@article{Bellia2002,
abstract = {Recent availability of video-cameras with CCD-type sensors (charge coupled device) has proved to be particularly stimulating for all those applications requiring photometric measurements, above all for the measurement of luminance values related to the physical and technical qualities of a built environment. This method allows the instantaneous capture of an image, thus enabling collection of luminance values relating to the points of measurement; this in turn leads to the evaluation of luminance distribution and lighting levels of the surfaces that make up the environment. Setting up this system requires the following basic configuration: a photopic filter V($\lambda$), an optic interface, a computer equipped with an appropriate card for the capture and digitalisation of the acquired image (the grqqframe grabber) and, finally, suitable software for the processing of collected data. In this article a detailed description of this acquisition system is reported, and subsequently a report on the procedure adopted for its calibration so as to enable the capture of relevant photometric values. Final analysis and validation of results are carried out by means of field test. A case study of CCD photometerapplication has been then performed using a basic software tool autonomously developed to evaluate indoor lighting level; the luminance map of a diffuse light source has been used as grqqinput data for the developed software, and the grqqoutput data, i.e. illumination levels, have been then compared with measured values.},
author = {Bellia, L and Cesarano, A and Minichiello, F and Sibilio, S},
doi = {10.1016/S0360-1323(01)00093-2},
file = {:home/yani/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Bellia et al. - 2002 - Setting up a CCD photometer for lighting research and design.pdf:pdf},
issn = {03601323},
journal = {Building and Environment},
keywords = {calculation,luminance,photometers,software code,video camera},
number = {11},
pages = {1099--1106},
publisher = {CIE Poland},
title = {{Setting up a CCD photometer for lighting research and design}},
url = {http://linkinghub.elsevier.com/retrieve/pii/S0360132301000932},
volume = {37},
year = {2002}
}
@inproceedings{Bengio2010labeltree,
author = {Bengio, S and Weston, J and Grangier, D},
booktitle = {Conference and Workshop on Neural Information Processing Systems},
title = {{Label Embedding Trees for Large Multi-Class Tasks}},
year = {2010}
}
@article{bengio:ieeenn94,
author = {Bengio, Yoshua and Simard, Patrick and Frasconi, Paolo},
journal = {IEEE Transactions on Neural Networks},
keywords = {nn},
number = {2},
pages = {157--166},
title = {{Learning Long-Term Dependencies With Gradient Descent Is Difficult}},
volume = {5},
year = {1994}
}
@book{Bishop1995,
address = {Oxford},
author = {Bishop, Christopher M},
keywords = {imported},
publisher = {Oxford University Press},
title = {{Neural Networks for Pattern Recognition}},
year = {1995}
}
@incollection{Bottou2012sgdtricks,
author = {Bottou, L{\'{e}}on},
booktitle = {Neural Networks: Tricks of the Trade (2nd ed.)},
editor = {Montavon, Gr{\'{e}}goire and Orr, Genevieve B and M{\"{u}}ller, Klaus-Robert},
isbn = {978-3-642-35288-1},
keywords = {dblp},
pages = {421--436},
publisher = {Springer},
series = {Lecture Notes in Computer Science},
title = {{Stochastic Gradient Descent Tricks.}},
volume = {7700},
year = {2012}
}
@article{breiman2001random,
author = {Breiman, Leo},
journal = {Machine Learning},
keywords = {forests random},
pages = {5--32},
title = {{Random Forests}},
volume = {45},
year = {2001}
}
@article{breiman1996bagging,
abstract = {Bagging predictors is a method for generating multiple versions of a pre-dictor and using these to get an aggregated predictor. The aggregation av-erages over the versions when predicting a numerical outcome and does a plurality v ote when predicting a class. The multiple versions are formed by making bootstrap replicates of the learning set and using these as new learning sets. Tests on real and simulated data sets using classiication and regression trees and subset selection in linear regression show that bagging can give substantial gains in accuracy. The vital element is the instability o f the prediction method. If perturbing the learning set can cause signiicant changes in the predictor constructed, then bagging can improve accuracy.},
author = {Breiman, Leo},
doi = {10.1007/BF00058655},
isbn = {0885-6125},
issn = {0885-6125},
journal = {Machine Learning},
keywords = {aggregation,averaging,bootstrap,combining},
number = {421},
pages = {123--140},
pmid = {17634459},
publisher = {Springer},
title = {{Bagging Predictors}},
volume = {24},
year = {1996}
}
@book{breiman84,
author = {Breiman, Leo and Friedman, Jerome H and Olshen, Richard A and Stone, Charles J},
booktitle = {CA: Wadsworth International Group},
doi = {10.1371/journal.pone.0015807},
isbn = {978-0534980535},
issn = {19326203},
pmid = {462029},
publisher = {CRC press},
title = {{Classification and regression trees}},
year = {1984}
}
@inproceedings{Chang2012,
author = {Chang, Hyung Jin and Jeong, Hawook and Choi, And Jin Young},
booktitle = {IEEE Conference on Computer Vision and Pattern Recognition},
title = {{Active Attentional Sampling for Speed-up of Background Subtraction}},
year = {2012}
}
@inproceedings{Chen2015,
abstract = {As deep nets are increasingly used in applications suited for mobile devices, a fundamental dilemma becomes apparent: the trend in deep learning is to grow models to absorb ever-increasing data set sizes; however mobile devices are designed with very little memory and cannot store such large models. We present a novel network architecture, HashedNets, that exploits inherent redundancy in neural networks to achieve drastic reductions in model sizes. HashedNets uses a low-cost hash function to randomly group connection weights into hash buckets, and all connections within the same hash bucket share a single parameter value. These parameters are tuned to adjust to the HashedNets weight sharing architecture with standard backprop during training. Our hashing procedure introduces no additional memory overhead, and we demonstrate on several benchmark data sets that HashedNets shrink the storage requirements of neural networks substantially while mostly preserving generalization performance.},
archivePrefix = {arXiv},
arxivId = {1504.04788},
author = {Chen, Wenlin and Wilson, James T. and Tyree, Stephen and Weinberger, Kilian Q. and Chen, Yixin},
booktitle = {Proceedings of The 32nd International Conference on Machine Learning},
editor = {Bach, Francis R and Blei, David M},
eprint = {1504.04788},
isbn = {9781510810587},
keywords = {dblp},
pages = {2285--2294},
publisher = {JMLR.org},
series = {JMLR Proceedings},
title = {{Compressing Neural Networks with the Hashing Trick}},
url = {http://arxiv.org/abs/1504.04788},
volume = {37},
year = {2015}
}
@inproceedings{Ciresan2012,
abstract = {Traditional methods of computer vision and machine learning cannot match human performance on tasks such as the recognition of handwritten digits or traffic signs. Our biologically plausible deep artificial neural network architectures can. Small (often minimal) receptive fields of convolutional winnertake-all neurons yield large network depth, resulting in roughly as many sparsely connected neural layers as found in mammals between retina and visual cortex. Only winner neurons are trained. Several deep neural columns become experts on inputs preprocessed in different ways; their predictions are averaged. Graphics cards allow for fast training. On the very competitive MNIST handwriting benchmark, our method is the first to achieve near-human performance. On a traffic sign recognition benchmark it outperforms humans by a factor of two. We also improve the state-of-the-art on a plethora of common image classification benchmarks.},
archivePrefix = {arXiv},
arxivId = {1202.2745},
author = {Ciresan, Dan and Meier, Ueli and Schmidhuber, J{\"{u}}rgen},
booktitle = {arXiv:1202.2745v1 [cs.CV]},
eprint = {1202.2745},
isbn = {1467312266},
pages = {3642--3649},
title = {{Multi-column deep neural networks for image classification}},
url = {http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.299.4060{\&}rep=rep1{\&}type=pdf{\%}5Cnhttp://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.299.4060{\&}rank=5},
year = {2012}
}
@inproceedings{Cogswell2016,
author = {Cogswell, Michael and Ahmed, Faruk and Girshick, Ross B and Zitnick, Larry and Batra, Dhruv},
booktitle = {International Conference on Learning Representations},
title = {{Reducing Overfitting in Deep Networks by Decorrelating Representations.}},
year = {2016}
}
@article{criminisi2013decision,
author = {Criminisi, Antonio and Shotton, Jamie},
publisher = {Springer Publishing Company, Incorporated},
title = {{Decision Forests for Computer Vision and Medical Image Analysis}},
year = {2013}
}
@article{Cucchiara2001,
author = {Cucchiara, R. and Crana, C. and Piccardi, M. and Prati, a. and Sirotti, S.},
doi = {10.1109/ITSC.2001.948679},
file = {:home/yani/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Cucchiara et al. - 2001 - Improving shadow suppression in moving object detection with HSV color information.pdf:pdf},
isbn = {0-7803-7194-1},
journal = {ITSC 2001. 2001 IEEE Intelligent Transportation Systems. Proceedings (Cat. No.01TH8585)},
pages = {334--339},
publisher = {Ieee},
title = {{Improving shadow suppression in moving object detection with HSV color information}},
url = {http://ieeexplore.ieee.org/lpdocs/epic03/wrapper.htm?arnumber=948679},
year = {2001}
}
@article{Cucchiara2003,
author = {Cucchiara, Rita and Grana, Costantino and Piccardi, Massimo and Prati, Andrea},
file = {:home/yani/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Cucchiara et al. - 2003 - Detecting Moving Objects , Ghosts , and Shadows in Video Streams {\ae}.pdf:pdf},
number = {10},
pages = {1337--1342},
title = {{Detecting Moving Objects , Ghosts , and Shadows in Video Streams {\ae}}},
volume = {25},
year = {2003}
}
@inproceedings{lecun1989optimal,
abstract = {We have used information-theoretic ideas to derive a class of practical and nearly optimal schemes for adapting the size of a neural network. By removing unimportant weights from a network, several improvements can be expected: better generalization, fewer training examples required, and improved speed of learning and/or classification. The basic idea is to use second-derivative information to make a tradeoff between network complexity and training set error. Experiments confirm the usefulness of the methods on a real-world application.},
archivePrefix = {arXiv},
arxivId = {arXiv:1011.1669v3},
author = {Cun, Yann Le and Denker, John S and Solla, Sara a},
booktitle = {Advances in Neural Information Processing Systems},
doi = {10.1.1.32.7223},
eprint = {arXiv:1011.1669v3},
isbn = {1558601007},
issn = {1098-6596},
number = {1},
pages = {598--605},
pmid = {25246403},
title = {{Optimal Brain Damage}},
volume = {2},
year = {1990}
}
@article{journals/mcss/Cybenko92,
author = {Cybenko, G},
journal = {Mathematics of control, signals, and systems},
keywords = {dblp},
number = {4},
pages = {303--314},
title = {{Approximation by superpositions of a sigmoid function}},
url = {http://dblp.uni-trier.de/db/journals/mcss/mcss5.html{\#}Cybenko92},
volume = {2},
year = {1989}
}
@book{damelin2011,
abstract = {Arising from courses taught by the authors, this largely self-contained treatment is ideal for mathematicians who are interested in applications or for students from applied fields who want to understand the mathematics behind their subject. Early chapters cover Fourier analysis, functional analysis, probability and linear algebra, all of which have been chosen to prepare the reader for the applications to come. The book includes rigorous proofs of core results in compressive sensing and wavelet convergence. Fundamental is the treatment of the linear system y=$\Phi$x in both finite and infinite dimensions. There are three possibilities: the system is determined, overdetermined or underdetermined, each with different aspects. The authors assume only basic familiarity with advanced calculus, linear algebra and matrix theory and modest familiarity with signal processing, so the book is accessible to students from the advanced undergraduate level. Many exercises are also included.},
address = {Cambridge},
author = {Damelin, Steven B. and {Miller Jr}, Willard},
doi = {10.1017/CBO9781139003896},
isbn = {9781107601048},
pages = {462},
pmid = {17238176},
publisher = {Cambridge University Press},
title = {{The Mathematics of Signal Processing}},
url = {http://www.amazon.com/Mathematics-Signal-Processing-Cambridge-Applied/dp/1107601045/ref=pd{\_}sim{\_}sbs{\_}b{\_}4?ie=UTF8{\&}refRID=0TKKM2SWXXJPAXKE5KWG},
year = {2012}
}
@article{Debevec2008,
author = {Debevec, PE and Malik, J},
file = {:home/yani/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Debevec, Malik - 2008 - Recovering high dynamic range radiance maps from photographs.pdf:pdf},
journal = {ACM SIGGRAPH 2008 classes},
title = {{Recovering high dynamic range radiance maps from photographs}},
url = {http://dl.acm.org/citation.cfm?id=1401174},
year = {2008}
}
@misc{DeMenthon1990a,
abstract = {An exact method for computing the position of a triangle in space from its image is presented. Also presented is an approximate method based on orthoperspective, an approximation of perspective which produces lower errors for off-center triangle images than scaled orthographic projection. A comparison is made of exact and approximate solutions for the triangle pose. This comparison gives the relative combinations of image and triangle characteristics which are likely to generate the largest errors. Model-based pose estimation techniques which match image and model triangles require large numbers of matching operations in real-world applications. It is shown that the approximate model can be used to build lookup tables for each of the triangles of a model and that they speed up the estimation of an object pose},
author = {DeMenthon, D and Davis, L S},
booktitle = {Proceedings IEEE International Conference on Robotics and Automation},
doi = {10.1109/ROBOT.1990.125943},
file = {:home/yani/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/DeMenthon, Davis - 1990 - New exact and approximate solutions of the three-point perspective problem(2).pdf:pdf},
isbn = {0818690615},
number = {11},
pages = {40--45},
publisher = {IEEE Comput. Soc. Press},
title = {{New exact and approximate solutions of the three-point perspective problem}},
url = {http://ieeexplore.ieee.org/lpdocs/epic03/wrapper.htm?arnumber=125943},
volume = {14},
year = {1990}
}
@misc{DeMenthon1990,
abstract = {An exact method for computing the position of a triangle in space from its image is presented. Also presented is an approximate method based on orthoperspective, an approximation of perspective which produces lower errors for off-center triangle images than scaled orthographic projection. A comparison is made of exact and approximate solutions for the triangle pose. This comparison gives the relative combinations of image and triangle characteristics which are likely to generate the largest errors. Model-based pose estimation techniques which match image and model triangles require large numbers of matching operations in real-world applications. It is shown that the approximate model can be used to build lookup tables for each of the triangles of a model and that they speed up the estimation of an object pose},
author = {DeMenthon, D and Davis, L S},
booktitle = {Proceedings IEEE International Conference on Robotics and Automation},
doi = {10.1109/ROBOT.1990.125943},
isbn = {0818690615},
number = {11},
pages = {40--45},
publisher = {IEEE Comput. Soc. Press},
title = {{New exact and approximate solutions of the three-point perspective problem}},
url = {http://ieeexplore.ieee.org/lpdocs/epic03/wrapper.htm?arnumber=125943},
volume = {14},
year = {1990}
}
@misc{DeMenthon1992,
abstract = {Model-based pose estimation techniques that match image and model triangles require large numbers of matching operations in real-world applications. The authors show that by using approximations to perspective, 2D lookup tables can be built for each of the triangles of the models. An approximation called `weak perspective' has been applied previously to this problem; the authors consider two other perspective approximations: paraperspective and orthoperspective. These approximations produce lower errors for off-center image features than weak perspective},
author = {DeMenthon, D and Davis, L S},
booktitle = {IEEE Transactions on Pattern Analysis and Machine Intelligence},
doi = {10.1109/34.166625},
issn = {01628828},
number = {11},
pages = {1100--1105},
title = {{Exact and approximate solutions of the perspective-three-point problem}},
url = {http://ieeexplore.ieee.org/lpdocs/epic03/wrapper.htm?arnumber=166625},
volume = {14},
year = {1992}
}
@inproceedings{Deng2011fastbalanced,
author = {Deng, J and Satheesh, S and Berg, A C and Li, F.-F.},
booktitle = {Conference and Workshop on Neural Information Processing Systems},
title = {{Fast and Balanced: Efficient Label Tree Learning for Large Scale Object Recognition}},
year = {2011}
}
@inproceedings{Denil2013predicting,
abstract = {We demonstrate that there is significant redundancy in the parameterization of several deep learning models. Given only a few weight values for each feature it is possible to accurately predict the remaining values. Moreover, we show that not only can the parameter values be predicted, but many of them need not be learned at all. We train several different architectures by learning only a small number of weights and predicting the rest. In the best case we are able to predict more than 95{\%} of the weights of a network without any drop in accuracy.},
archivePrefix = {arXiv},
arxivId = {1306.0543},
author = {Denil, Misha and Shakibi, Babak and Dinh, Laurent and Ranzato, Marc'Aurelio and de Freitas, Nando},
booktitle = {Neural Information Processing Systems (NIPS)},
eprint = {1306.0543},
pages = {2148--2156},
title = {{Predicting Parameters in Deep Learning}},
url = {http://papers.nips.cc/paper/5025-predicting-parameters-in-deep-learning},
year = {2013}
}
@inproceedings{Denton2014efficient,
abstract = {We present techniques for speeding up the test-time evaluation of large convolutional networks, designed for object recognition tasks. These models deliver impressive accuracy but each image evaluation requires millions of floating point operations, making their deployment on smartphones and Internet-scale clusters problematic. The computation is dominated by the convolution operations in the lower layers of the model. We exploit the linear structure present within the convolutional filters to derive approximations that significantly reduce the required computation. Using large state-of-the-art models, we demonstrate we demonstrate speedups of convolutional layers on both CPU and GPU by a factor of 2x, while keeping the accuracy within 1{\%} of the original model.},
archivePrefix = {arXiv},
arxivId = {1404.0736},
author = {Denton, Emily and Zaremba, Wojciech and Bruna, Joan and LeCun, Yann and Fergus, Rob},
booktitle = {arXiv},
eprint = {1404.0736},
issn = {10495258},
number = {1},
pages = {1--11},
title = {{Exploiting Linear Structure Within Convolutional Networks for Efficient Evaluation}},
url = {http://arxiv.org/abs/1404.0736},
year = {2014}
}
@article{Drew,
author = {Drew, Mark S},
file = {:home/yani/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Drew - Unknown - Photometric stereo without multiple images 1 INTRODUCTION.pdf:pdf},
keywords = {based vision,color,dichromatic model,lambertian,neutral interface,physics,reflectance,shape,shape representation},
number = {604},
pages = {369--380},
title = {{Photometric stereo without multiple images 1 INTRODUCTION}},
volume = {3016}
}
@article{Edelman1998,
archivePrefix = {arXiv},
arxivId = {arXiv:physics/9806030v1},
author = {Edelman, A and Arias, TA},
eprint = {9806030v1},
file = {:home/yani/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Edelman, Arias - 1998 - The geometry of algorithms with orthogonality constraints.pdf:pdf},
journal = {Arxiv preprint physics/9806030},
keywords = {15a18,49m07,49m15,51f20,53b20,65f15,81v55,ams subject classifications,conjugate gradient,eigenvalue optimization,eigenvalues and eigenvectors,electronic structures computation,grassmann manifold,invariant subspace,newton,orthogonality constraints,programming,rayleigh quotient iteration,reduced gradient method,s method,sequential quadratic,stiefel manifold,subspace tracking},
primaryClass = {arXiv:physics},
title = {{The geometry of algorithms with orthogonality constraints}},
url = {http://arxiv.org/abs/physics/9806030},
year = {1998}
}
@inproceedings{Fahlman1989,
author = {Fahlmann, S E and Lebiere, C},
booktitle = {Advances in Neural Information Processing Systems 2},
doi = {10.1190/1.1821929},
editor = {Touretzky, David S},
isbn = {1558601007},
issn = {10459227},
pages = {524--532},
pmid = {220943591},
publisher = {Morgan Kaufmann},
title = {{The Cascade-Correlation Learning Architecture}},
year = {1990}
}
@misc{Fleck1995,
abstract = {Perspective projection is generally accepted as the ideal model of image formation. Many recent algorithms, and many recent judgements about the relative merits of different algorithms, depend on this assumption. However, perspective projection represents only the front half of the viewing sphere and it distorts the shape and intensity of objects unless they lie near the optical axis. It is only one of several projections used in lens design and it does not accurately model the behavior of many real lenses. It works well only for narrow-angle images. This paper surveys the properties of several alternative models of image formation. A model based on stereographic projection of the viewing sphere is shown to be a better general-purpose imaging model than perspective projection. The new model can represent wider fields of view and more closely approximates real wide-angle lenses. It preserves a suitable range of shape properties, including local symmetries. It approximates narrow-angl...},
author = {Fleck, Margaret M},
booktitle = {Research report},
pages = {95--01},
publisher = {University of Iowa},
title = {{Perspective projection: the wrong imaging model}},
url = {http://www.cs.illinois.edu/{~}mfleck/my-papers/stereographic-TR.pdf},
year = {1995}
}
@incollection{Hertzmann2005,
author = {Fleet, David and Hertzmann, Aaron},
file = {:home/yani/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Fleet, Hertzmann - 2005 - Radiometry and Reflection.pdf:pdf},
pages = {76--91},
title = {{Radiometry and Reflection}},
year = {2005}
}
@misc{fodor2002survey,
abstract = {This paper, we assume that we have n observations, each being a realization of the p- dimensional random variable x = (x 1 , . . . , x p with mean E(x) = = 1 , . . . , p and covariance matrix E(x )(x = pp . We denote such an observation matrix by X = i,j : 1 p, 1 n. If i and i = (i,i) denote the mean and the standard deviation of the ith random variable, respectively, then we will often standardize the observations x i,j by (x i,j i i , where i = x i = 1/n j=1 x i,j , and i = 1/n j=1 (x i,j x i},
author = {Fodor, I K},
booktitle = {Center for Applied Scientific Computing Lawrence Livermore National Laboratory},
doi = {10.2172/15002155},
pages = {1--18},
publisher = {Technical Report UCRL-ID-148494, Lawrence Livermore National Laboratory},
title = {{A survey of dimension reduction techniques}},
url = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.8.5098},
volume = {9},
year = {2002}
}
@article{Fuk80,
author = {Fukushima, K},
journal = {Biological Cybernetics},
keywords = {deep fukushima learning neocognitron networks neur},
pages = {193--202},
title = {{Neocognitron: A self-organizing neural network model for a mechanish of pattern recognition unaffected by shifts in position}},
volume = {36},
year = {1980}
}
@article{fukushima2013artificial,
abstract = {The neocognitron is a neural network model proposed by. Fukushima (1980). Its architecture was suggested by neurophysiological findings on the visual systems of mammals. It is a hierarchical multi-layered network. It acquires the ability to robustly recognize visual patterns through learning. Although the neocognitron has a long history, modifications of the network to improve its performance are still going on. For example, a recent neocognitron uses a new learning rule, named add-if-silent, which makes the learning process much simpler and more stable. Nevertheless, a high recognition rate can be kept with a smaller scale of the network. Referring to the history of the neocognitron, this paper discusses recent advances in the neocognitron. We also show that various new functions can be realized by, for example, introducing top-down connections to the neocognitron: mechanism of selective attention, recognition and completion of partly occluded patterns, restoring occluded contours, and so on. {\textcopyright} 2012 Elsevier Ltd.},
author = {Fukushima, Kunihiko},
doi = {10.1016/j.neunet.2012.09.016},
isbn = {0893-6080},
issn = {08936080},
journal = {Neural Networks},
keywords = {Artificial vision,Bottom-up and top-down,Hierarchical network,Modeling neural networks,Neocognitron},
pages = {103--119},
pmid = {23098752},
publisher = {Elsevier},
title = {{Artificial vision by multi-layered neural networks: Neocognitron and its advances}},
volume = {37},
year = {2013}
}
@inproceedings{conf/icml/2010,
booktitle = {ICML},
editor = {F{\"{u}}rnkranz, Johannes and Joachims, Thorsten},
keywords = {dblp},
publisher = {Omnipress},
title = {{Proceedings of the 27th International Conference on Machine Learning (ICML-10), June 21-24, 2010, Haifa, Israel}},
url = {http://dblp.uni-trier.de/db/conf/icml/icml2010.html},
year = {2010}
}
@inproceedings{Gal2016Dropout,
author = {Gal, Yarin and Ghahramani, Zoubin},
booktitle = {Proceedings of the 33rd International Conference on Machine Learning (ICML-16)},
title = {{Dropout as a {\{}B{\}}ayesian Approximation: Representing Model Uncertainty in Deep Learning}},
year = {2016}
}
@article{Geiger2012,
author = {Geiger, Andreas and Lenz, Philip and Urtasun, Raquel},
file = {:home/yani/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Geiger, Lenz, Urtasun - 2012 - Are we ready for autonomous driving the kitti vision benchmark suite.pdf:pdf},
journal = {Computer Vision and},
title = {{Are we ready for autonomous driving? the kitti vision benchmark suite}},
url = {http://h1997453.stratoserver.net/publications/cvpr12.pdf},
year = {2012}
}
@inproceedings{girshick2015deformable,
author = {Girshick, Ross and Iandola, Forrest and Darrell, Trevor and Malik, Jitendra},
booktitle = {Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition},
pages = {437--446},
title = {{Deformable Part Models are Convolutional Neural Networks}},
year = {2015}
}
@inproceedings{glorot2010understanding,
abstract = {Whereas before 2006 it appears that deep multilayer neural networks were not successfully trained, since then several algorithms have been shown to successfully train them, with experimental results showing the superiority of deeper vs less deep architectures. All these experimental results were obtained with new initialization or training mechanisms. Our objective here is to understand better why standard gradient descent from random initialization is doing so poorly with deep neural networks, to better understand these recent relative successes and help design better algorithms in the future. We first observe the influence of the non-linear activations functions. We find that the logistic sigmoid activation is unsuited for deep networks with random initialization because of its mean value, which can drive especially the top hidden layer into saturation. Surprisingly, we find that saturated units can move out of saturation by themselves, albeit slowly, and explaining the plateaus sometimes seen when training neural networks. We find that a new non-linearity that saturates less can often be beneficial. Finally, we study how activations and gradients vary across layers and during training, with the idea that training may be more difficult when the singular values of the Jacobian associated with each layer are far from 1. Based on these considerations, we propose a new initialization scheme that brings substantially faster convergence.},
author = {Glorot, Xavier and Bengio, Yoshua},
booktitle = {Proceedings of the 13th International Conference on Artificial Intelligence and Statistics (AISTATS)},
doi = {10.1.1.207.2059},
issn = {15324435},
pages = {249--256},
title = {{Understanding the difficulty of training deep feedforward neural networks}},
url = {http://machinelearning.wustl.edu/mlpapers/paper{\_}files/AISTATS2010{\_}GlorotB10.pdf},
volume = {9},
year = {2010}
}
@article{Golovinskiy2009,
abstract = {We present a min-cut based method of segmenting objects in point clouds. Given an object location, our method builds a k-nearest neighbors graph, assumes a background prior, adds hard foreground (and optionally background) constraints, and finds the min-cut to compute a foreground-background segmentation. Our method can be run fully automatically, or interactively with a user interface. We test our system on an outdoor urban scan, quantitatively evaluate our algorithm on a test set of about 1000 objects, and compare to several alternative approaches.},
author = {Golovinskiy, Aleksey and Funkhouser, Thomas},
doi = {10.1109/ICCVW.2009.5457721},
file = {:home/yani/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Golovinskiy, Funkhouser - 2009 - Min-cut based segmentation of point clouds.pdf:pdf},
isbn = {9781424444427},
journal = {2009 IEEE 12th International Conference on Computer Vision Workshops ICCV Workshops},
pages = {39--46},
publisher = {Ieee},
title = {{Min-cut based segmentation of point clouds}},
url = {http://ieeexplore.ieee.org/lpdocs/epic03/wrapper.htm?arnumber=5457721},
volume = {150},
year = {2009}
}
@inproceedings{goodfellow2013maxout,
abstract = {We consider the problem of designing mod-els to leverage a recently introduced ap-proximate model averaging technique called dropout. We define a simple new model called maxout (so named because its output is the max of a set of inputs, and because it is a nat-ural companion to dropout) designed to both facilitate optimization by dropout and im-prove the accuracy of dropout's fast approxi-mate model averaging technique. We empir-ically verify that the model successfully ac-complishes both of these tasks. We use max-out and dropout to demonstrate state of the art classification performance on four bench-mark datasets: MNIST, CIFAR-10, CIFAR-100, and SVHN.},
archivePrefix = {arXiv},
arxivId = {1302.4389},
author = {Goodfellow, Ian J and Warde-Farley, David and Mirza, Mehdi and Courville, Aaron and Bengio, Yoshua},
booktitle = {Proceedings of the 30th International Conference on Machine Learning (ICML)},
eprint = {1302.4389},
pages = {1319--1327},
title = {{Maxout Networks}},
volume = {28},
year = {2013}
}
@book{Goodfellow-et-al-2016-Book,
annote = {Book in preparation for MIT Press},
author = {Goodfellow, Ian and Bengio, Yoshua and Courville, Aaron},
title = {{Deep Learning}},
url = {http://www.deeplearningbook.org},
year = {2016}
}
@article{Gortler1996,
address = {New York, New York, USA},
author = {Gortler, Steven J and Grzeszczuk, Radek and Szeliski, Richard and Cohen, Michael F},
doi = {10.1145/237170.237200},
file = {:home/yani/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Gortler et al. - 1996 - The lumigraph.pdf:pdf},
isbn = {0897917464},
journal = {Proceedings of the 23rd annual conference on Computer graphics and interactive techniques - SIGGRAPH '96},
pages = {43--54},
publisher = {ACM Press},
title = {{The lumigraph}},
url = {http://portal.acm.org/citation.cfm?doid=237170.237200},
year = {1996}
}
@misc{1502.02551v1,
abstract = {Training of large-scale deep neural networks is often constrained by the available computational resources. We study the effect of limited preci- sion data representation and computation on neu- ral network training. Within the context of low- precision fixed-point computations, we observe the rounding scheme to play a crucial role in de- termining the network's behavior during train- ing. Our results show that deep networks can be trained using only 16-bit wide fixed-point num- ber representation when using stochastic round- ing, and incur little to no degradation in the classification accuracy. We also demonstrate an energy-efficient hardware accelerator that imple- ments low-precision fixed-point arithmetic with stochastic rounding.},
annote = {published = 2015-02-09T16:37:29Z, updated = 2015-02-09T16:37:29Z, 10 pages, 6 figures, 1 table},
archivePrefix = {arXiv},
arxivId = {1502.02551},
author = {Gupta, Suyog and Agrawal, Ankur and Gopalakrishnan, Kailash and Narayanan, Pritish},
booktitle = {Proceedings of the 32nd International Conference on Machine Learning (ICML-15)},
doi = {10.1109/72.80206},
eprint = {1502.02551},
isbn = {9781510810587},
issn = {19410093},
month = {feb},
pages = {1737--1746},
pmid = {18282824},
title = {{Deep Learning with Limited Numerical Precision}},
url = {http://jmlr.org/proceedings/papers/v37/gupta15.pdf},
year = {2015}
}
@article{journals/iandc/HancockJLT96,
abstract = {k-Decision lists and decision trees play important roles in learning theory as well as in practical learning systems.k-Decision lists generalize classes such as monomials,k-DNF, andk-CNF, and like these subclasses they are polynomially PAC-learnable [R. Rivest,Mach. Learning2(1987), 229–246]. This leaves open the question of whetherk-decision lists can be learned as efficiently ask-DNF. We answer this question negatively in a certain sense, thus disproving a claim in a popular textbook [M. Anthony and N. Biggs, “Computational Learning Theory,” Cambridge Univ. Press, Cambridge, UK, 1992]. Decision trees, on the other hand, are not even known to be polynomially PAC-learnable, despite their widespread practical application. We will show that decision trees are not likely to be efficiently PAC-learnable. We summarize our specific results. The following problems cannot be approximated in polynomial time within a factor of 2log$\delta$ nfor any$\delta${\textless}1, unlessNP⊂DTIME[2polylog n]: a generalized set cover,k-decision lists,k-decision lists by monotone decision lists, and decision trees. Decision lists cannot be approximated in polynomial time within a factor ofn$\delta$, for some constant$\delta${\textgreater}0, unlessNP=P. Also,k-decision lists withl0–1 alternations cannot be approximated within a factor logl nunlessNP⊂DTIME[nO(log log n)] (providing an interesting comparison to the upper bound obtained by A. Dhagat and L. Hellerstein [in“FOCS '94,” pp. 64–74]).},
author = {Hancock, Thomas and Jiang, Tao and Li, Ming and Tromp, John},
doi = {10.1006/inco.1996.0040},
isbn = {3540590420},
issn = {0890-5401},
journal = {Information and Computation},
number = {2},
pages = {114--122},
title = {{Lower Bounds on Learning Decision Lists and Trees}},
url = {http://www.sciencedirect.com/science/article/pii/S0890540196900401{\%}5Cnhttp://www.sciencedirect.com/science/article/pii/S0890540196900401/pdf?md5=59bdd8c077309262836d57b76a5a5577{\&}pid=1-s2.0-S0890540196900401-main.pdf},
volume = {126},
year = {1996}
}
@misc{Hanmandlu2000,
abstract = {A recursive estimation of depth from a sequence of images is proposed. Using the spherical projection, a simple equation is derived that relates image motion with the object motion. This equation is reformulated into a dynamical state space model for which Kalman filter can be easily applied to yield the estimate of depth. Point correspondences have been used to obtain feature points and the motion parameters are assumed to be known. The results are illustrated on a real object},
author = {Hanmandlu, M and Shantaram, V and Sudheer, K},
booktitle = {Proceedings of International Conference on Robotics and Automation},
doi = {10.1109/ITCC.2000.844211},
isbn = {0769505406},
number = {April},
pages = {2264--2269},
publisher = {Ieee},
title = {{Depth estimation from a sequence of images using spherical projection}},
url = {http://ieeexplore.ieee.org/lpdocs/epic03/wrapper.htm?arnumber=619298},
volume = {3},
year = {2000}
}
@article{Happel1994,
author = {Happel, Bart L M and Murre, Jacob M J},
journal = {Neural Networks},
number = {6-7},
pages = {985--1004},
title = {{Design and evolution of modular neural network architectures.}},
volume = {7},
year = {1994}
}
@article{Haralick1989,
author = {Haralick, R M},
doi = {10.1109/CVPR.1989.37874},
isbn = {081861918X},
journal = {Proceedings CVPR 89 IEEE Computer Society Conference on Computer Vision and Pattern Recognition},
pages = {370--378},
publisher = {IEEE Comput. Soc. Press},
title = {{Monocular vision using inverse perspective projection geometry: analytic relations}},
url = {http://ieeexplore.ieee.org/lpdocs/epic03/wrapper.htm?arnumber=37874},
volume = {10},
year = {1989}
}
@inproceedings{Hardt2015,
abstract = {We show that any model trained by a stochastic gradient method with few iterations has vanishing generalization error. We prove this by showing the method is algorithmically stable in the sense of Bousquet and Elisseeff. Our analysis only employs elementary tools from convex and continuous optimization. Our results apply to both convex and non-convex optimization under standard Lipschitz and smoothness assumptions. Applying our results to the convex case, we provide new explanations for why multiple epochs of stochastic gradient descent generalize well in practice. In the nonconvex case, we provide a new interpretation of common practices in neural networks, and provide a formal rationale for stability-promoting mechanisms in training large, deep models. Conceptually, our findings underscore the importance of reducing training time beyond its obvious benefit.},
address = {New York, New York, USA},
archivePrefix = {arXiv},
arxivId = {1509.01240},
author = {Hardt, Moritz and Recht, Benjamin and Singer, Yoram},
booktitle = {Proceedings of the 33rd International Conference on Machine Learning (ICML 2016)},
eprint = {1509.01240},
file = {:home/yani/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Hardt, Recht, Singer - 2015 - Train faster, generalize better Stability of stochastic gradient descent.pdf:pdf},
isbn = {9781510829008},
pages = {1--24},
title = {{Train faster, generalize better: Stability of stochastic gradient descent}},
url = {http://arxiv.org/abs/1509.01240},
year = {2015}
}
@article{Hasinoff2010,
author = {Hasinoff, Samuel W. and Durand, Fredo and Freeman, William T.},
doi = {10.1109/CVPR.2010.5540167},
file = {:home/yani/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Hasinoff, Durand, Freeman - 2010 - Noise-optimal capture for high dynamic range photography.pdf:pdf},
isbn = {978-1-4244-6984-0},
journal = {2010 IEEE Computer Society Conference on Computer Vision and Pattern Recognition},
month = {jun},
pages = {553--560},
publisher = {Ieee},
title = {{Noise-optimal capture for high dynamic range photography}},
url = {http://ieeexplore.ieee.org/lpdocs/epic03/wrapper.htm?arnumber=5540167},
year = {2010}
}
@inproceedings{He2012,
author = {He, Jun and Balzano, Laura and Szlam, Arthur},
file = {:home/yani/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/He, Balzano, Szlam - 2012 - Incremental Gradient on the Grassmannian for Online Foreground and Background Separation in Subsampled Video.pdf:pdf},
isbn = {9781467312288},
pages = {1568--1575},
title = {{Incremental Gradient on the Grassmannian for Online Foreground and Background Separation in Subsampled Video}},
year = {2012}
}
@article{ieee7005506,
abstract = {Existing deep convolutional neural networks (CNNs) require a fixed-size (e.g., 224224) input image. This requirement is ``artificial'' and may reduce the recognition accuracy for the images or sub-images of an arbitrary size/scale. In this work, we equip the networks with another pooling strategy, ``spatial pyramid pooling'', to eliminate the above requirement. The new network structure, called SPP-net, can generate a fixed-length representation regardless of image size/scale. Pyramid pooling is also robust to object deformations. With these advantages, SPP-net should in general improve all CNN-based image classification methods. On the ImageNet 2012 dataset, we demonstrate that SPP-net boosts the accuracy of a variety of CNN architectures despite their different designs. On the Pascal VOC 2007 and Caltech101 datasets, SPP-net achieves state-of-theart classification results using a single full-image representation and no fine-tuning. The power of SPP-net is also significant in object detection. Using SPP-net, we compute the feature maps from the entire image only once, and then pool features in arbitrary regions (sub-images) to generate fixed-length representations for training the detectors. This method avoids repeatedly computing the convolutional features. In processing test images, our method is 24-102 faster than the R-CNN method, while achieving better or comparable accuracy on Pascal VOC 2007. In ImageNet Large Scale Visual Recognition Challenge (ILSVRC) 2014, our methods rank {\#}2 in object detection and {\#}3 in image classification among all 38 teams. This manuscript also introduces the improvement made for this competition.},
author = {He, K and Zhang, X and Ren, S and Sun, J},
doi = {10.1109/TPAMI.2015.2389824},
issn = {0162-8828},
journal = {Pattern Analysis and Machine Intelligence, IEEE Transactions on},
keywords = {Accuracy; Agriculture; Convolutional codes; Featur},
number = {99},
pages = {1},
title = {{Spatial Pyramid Pooling in Deep Convolutional Networks for Visual Recognition}},
volume = {PP},
year = {2015}
}
@inproceedings{he2015convolutional,
author = {He, Kaiming and Sun, Jian},
booktitle = {Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition},
pages = {5353--5360},
title = {{Convolutional Neural Networks at Constrained Time Cost}},
year = {2015}
}
@article{He2015,
abstract = {Deeper neural networks are more difficult to train. We present a residual learning framework to ease the training of networks that are substantially deeper than those used previously. We explicitly reformulate the layers as learn- ing residual functions with reference to the layer inputs, in- stead of learning unreferenced functions. We provide com- prehensive empirical evidence showing that these residual networks are easier to optimize, and can gain accuracy from considerably increased depth. On the ImageNet dataset we evaluate residual nets with a depth of up to 152 layers—8× deeper than VGG nets [41] but still having lower complex- ity. An ensemble of these residual nets achieves 3.57{\%} error on the ImageNet test set. This result won the 1st place on the ILSVRC 2015 classification task. We also present analysis on CIFAR-10 with 100 and 1000 layers. The depth of representations is of central importance for many visual recognition tasks. Solely due to our ex- tremely deep representations, we obtain a 28{\%} relative im- provement on the COCO object detection dataset. Deep residual nets are foundations of our submissions to ILSVRC {\&} COCO 2015 competitions1, where we also won the 1st places on the tasks of ImageNet detection, ImageNet local- ization, COCO detection, and COCO segmentation.},
archivePrefix = {arXiv},
arxivId = {1512.03385},
author = {He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
doi = {10.3389/fpsyg.2013.00124},
eprint = {1512.03385},
isbn = {978-1-4673-6964-0},
issn = {1664-1078},
journal = {Arxiv.Org},
keywords = {deep learning,denoising auto-encoder,image denoising},
number = {3},
pages = {171--180},
pmid = {23554596},
title = {{Deep Residual Learning for Image Recognition}},
url = {http://arxiv.org/pdf/1512.03385v1.pdf},
volume = {7},
year = {2015}
}
@inproceedings{He2015b,
abstract = {Rectified activation units (rectifiers) are essential for state-of-the-art neural networks. In this work, we study rectifier neural networks for image classification from two aspects. First, we propose a Parametric Rectified Linear Unit (PReLU) that generalizes the traditional rectified unit. PReLU improves model fitting with nearly zero extra com-putational cost and little overfitting risk. Second, we derive a robust initialization method that particularly considers the rectifier nonlinearities. This method enables us to train extremely deep rectified models directly from scratch and to investigate deeper or wider network architectures. Based on the learnable activation and advanced initialization, we achieve 4.94{\%} top-5 test error on the ImageNet 2012 clas-sification dataset. This is a 26{\%} relative improvement over the ILSVRC 2014 winner (GoogLeNet, 6.66{\%} [33]). To our knowledge, our result is the first 1 to surpass the reported human-level performance (5.1{\%}, [26]) on this dataset.},
archivePrefix = {arXiv},
arxivId = {1502.01852},
author = {He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
booktitle = {IEEE Conference on Computer Vision and Patern Recognition (ICCV)},
doi = {10.1109/ICCV.2015.123},
eprint = {1502.01852},
isbn = {978-1-4673-8391-2},
issn = {15505499},
keywords = {dblp},
pages = {1026--1034},
publisher = {IEEE},
title = {{Delving Deep into Rectifiers: Surpassing Human-Level Performance on ImageNet Classification}},
url = {http://ieeexplore.ieee.org/xpl/mostRecentIssue.jsp?punumber=7407725},
year = {2015}
}
@article{He2016,
abstract = {Deep residual networks have emerged as a family of extremely deep architectures showing compelling accuracy and nice convergence behaviors. In this paper, we analyze the propagation formulations behind the residual building blocks, which suggest that the forward and backward signals can be directly propagated from one block to any other block, when using identity mappings as the skip connections and after-addition activation. A series of ablation experiments support the importance of these identity mappings. This motivates us to propose a new residual unit, which further makes training easy and improves generalization. We report improved results using a 1001-layer ResNet on CIFAR-10/100, and a 200-layer ResNet on ImageNet.},
archivePrefix = {arXiv},
arxivId = {1603.05027},
author = {He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
eprint = {1603.05027},
journal = {arXiv preprint},
pages = {1--15},
title = {{Identity Mappings in Deep Residual Networks}},
url = {http://arxiv.org/abs/1603.05027},
volume = {abs/1603.0},
year = {2016}
}
@article{helearning,
author = {He, X and Mnih, V and Ioannou, Y and Zemel, R S},
title = {{Learning Visual Features for Outdoor Localization}}
}
@article{Healey1994,
author = {Healey, Glenn E and Kondepudy, Raghava and Member, Student},
file = {:home/yani/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Healey, Kondepudy, Member - 1994 - Radiometric CCD camera calibration and noise estimation.pdf:pdf},
journal = {Pattern Analysis and Machine {\ldots}},
number = {3},
title = {{Radiometric CCD camera calibration and noise estimation}},
url = {http://ieeexplore.ieee.org/xpls/abs{\_}all.jsp?arnumber=276126},
volume = {16},
year = {1994}
}
@misc{dropoutsurprising,
archivePrefix = {arXiv},
arxivId = {cs.LG/1602.04484},
author = {Helmbold, David P and Long, Philip M},
eprint = {1602.04484},
month = {nov},
primaryClass = {cs.LG},
title = {{Surprising properties of dropout in deep networks}},
url = {http://arxiv.org/abs/1602.04484; http://arxiv.org/pdf/1602.04484},
year = {2016}
}
@article{Himmelsbach2008,
author = {Himmelsbach, M},
file = {:home/yani/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Himmelsbach - 2008 - LIDAR-based 3D object perception.pdf:pdf},
journal = {Proceedings of 1st},
title = {{LIDAR-based 3D object perception}},
url = {http://www.cs.princeton.edu/courses/archive/spring11/cos598A/pdfs/Himmelsbach08.pdf},
year = {2008}
}
@article{hinton2006reducing,
abstract = {High-dimensional data can be converted to low-dimensional codes by training a multilayer neural$\backslash$rnetwork with a small central layer to reconstruct high-dimensional input vectors. Gradient descent$\backslash$r$\backslash$ncan be used for fine-tuning the weights in such ‘‘autoencoder'' networks, but this works well only if$\backslash$r$\backslash$nthe initial weights are close to a good solution. We describe an effective way of initializing the$\backslash$r$\backslash$nweights that allows deep autoencoder networks to learn low-dimensional codes that work much$\backslash$r$\backslash$nbetter than principal components analysis as a tool to reduce the dimensionality of data.$\backslash$r$\backslash$n},
archivePrefix = {arXiv},
arxivId = {20},
author = {Hinton, Geoffrey E and Salakhutdinov, Ruslan R},
doi = {10.1126/science.1127647},
eprint = {20},
isbn = {1095-9203 (Electronic)$\backslash$r0036-8075 (Linking)},
issn = {1095-9203},
journal = {Science},
number = {5786},
pages = {504--507},
pmid = {16873662},
publisher = {American Association for the Advancement of Science},
title = {{Reducing the Dimensionality of Data with Neural Networks$\backslash$r}},
volume = {313},
year = {2006}
}
@misc{Hinton2012,
abstract = {When a large feedforward neural network is trained on a small training set, it typically performs poorly on held-out test data. This "overfitting" is greatly reduced by randomly omitting half of the feature detectors on each training case. This prevents complex co-adaptations in which a feature detector is only helpful in the context of several other specific feature detectors. Instead, each neuron learns to detect a feature that is generally helpful for producing the correct answer given the combinatorially large variety of internal contexts in which it must operate. Random "dropout" gives big improvements on many benchmark tasks and sets new records for speech and object recognition.},
archivePrefix = {arXiv},
arxivId = {1207.0580},
author = {Hinton, Geoffrey E. and Srivastava, Nitish and Krizhevsky, Alex and Sutskever, Ilya and Salakhutdinov, Ruslan R.},
booktitle = {ArXiv e-prints},
doi = {arXiv:1207.0580},
eprint = {1207.0580},
isbn = {9781467394673},
issn = {9781467394673},
month = {jul},
pages = {1--18},
pmid = {1000104337},
title = {{Improving neural networks by preventing co-adaptation of feature detectors}},
url = {http://arxiv.org/abs/1207.0580},
year = {2012}
}
@phdthesis{hochreiter1991untersuchungen,
author = {Hochreiter, Sepp},
booktitle = {Diploma, Technische Universit{\{}{\"{a}}{\}}t M{\{}{\"{u}}{\}}nchen},
pages = {91},
school = {Technische Universit{\{}{\"{a}}{\}}t M{\{}{\"{u}}{\}}nchen},
title = {{Untersuchungen zu dynamischen neuronalen Netzen}},
year = {1991}
}
@misc{Hochreiter01gradientflow,
abstract = {Introduction Recurrent networks (crossreference Chapter 12) can, in principle, use their feedback connections to store representations of recent input events in the form of activations. The most widely used algorithms for learning what to put in short-term memory, however, take too much time to be feasible or do not work well at all, especially when minimal time lags between inputs and corresponding teacher signals are long. Although theoretically fascinating, they do not provide clear practical advantages over, say, backprop in feedforward networks with limited time windows (see crossreference Chapters 11 and 12). With conventional {\&}034;algorithms based on the computation of the complete gradient{\&}034;, such as {\&}034;Back-Propagation Through Time{\&}034; (BPTT, e.g., 22, 27, 26) or {\&}034;Real-Time Recurrent Learning{\&}034; (RTRL, e.g., 21) error signals {\&}034;flowing backwards in time{\&}034; tend to either (1) blow up or (2) vanish: the temporal evolution of the backpropagated error ex},
archivePrefix = {arXiv},
arxivId = {arXiv:1011.1669v3},
author = {Hochreiter, Sepp and Bengio, Y and Frasconi, Paolo and Schmidhuber, J},
booktitle = {A Field Guide to Dynamical Recurrent Networks},
doi = {10.1109/9780470544037.ch14},
eprint = {arXiv:1011.1669v3},
isbn = {978-0-7803-5369-5},
issn = {1098-6596},
pages = {237--243},
pmid = {25246403},
title = {{Gradient flow in recurrent nets: the difficulty of learning long-term dependencies}},
url = {http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.24.7321{\&}rep=rep1{\&}type=pdf},
year = {2001}
}
@article{Horn1990,
abstract = {The method described here for recovering the shape of a surface from a shaded image can deal with complex, wrinkled surfaces. Integrability can be enforced easily because both surface height and gradient are represented. (A gradient field is integrable if it is the gradient of some surface height function.) The robustness of the method stems in part from linearization of the reflectance map about the current estimate of the surface orientation at each picture cell. (The reflectance map gives the dependence of scene radiance on surface orientation.) The new scheme can find an exact solution of a given shape-from-shading problem even though a regularizing term is included. The reason is that the penalty term is needed only to stabilize the iterative scheme when it is far from the correct solution; it can be turned off as the solution is approached. This is a reflection of the fact that shape-from-shading problems are not ill posed when boundary conditions are available, or when the image contains singular points.},
author = {Horn, B K P},
doi = {10.1007/bf00056771},
issn = {09205691},
journal = {The International Journal of Computer Vision},
number = {1},
pages = {37--75},
publisher = {Springer Netherlands},
title = {{Height and Gradient from Shading}},
url = {http://www.springerlink.com/index/L90617LKGL701386.pdf},
volume = {5},
year = {1990}
}
@article{Horn1974,
author = {Horn, B K P},
journal = {Computer Graphics and Image Processing},
title = {{Determining lightness from an image}},
url = {http://www.sciencedirect.com/science/article/pii/0146664X74900227},
year = {1974}
}
@article{Horn1979,
abstract = {It appears that the development of machine vision may benefit from a detailed understanding of the imaging process. The reflectance map, showing scene radiance as a function of surface gradient, has proved to be helpful in this endeavor. The reflectance map depends both on the nature of the surface layers of the objects being imaged and the distribution of light sources. Recently, a unified approach to the specification of surface reflectance in terms of both incident and reflected beam geometry has been proposed. The reflecting properties of a surface are specified in terms of the bidirectional reflectance-distribution function (BRDF). Here we derive the reflectance map in terms of the BRDF and the distribution of source radiance. A number of special cases of practical importance are developed in detail. The significance of this approach to the understanding of image formation is briefly indicated.},
author = {Horn, B K and Sjoberg, R W},
doi = {10.1364/AO.18.001770},
file = {:home/yani/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Horn, Sjoberg - 1979 - Calculating the reflectance map.pdf:pdf},
journal = {Applied Optics},
keywords = {image brightness,scene brightness,surface reflec},
number = {11},
pages = {1770--1779},
pmid = {20212547},
publisher = {OSA},
title = {{Calculating the reflectance map.}},
url = {http://www.ncbi.nlm.nih.gov/pubmed/20212547},
volume = {18},
year = {1979}
}
@article{Horn1977,
abstract = {Traditionally, image intensities have been processed to segment an image into regions or to find edge-fragments. Image intensities carry a great deal more information about three-dimensional shape, however. To exploit this information, it is necessary to understand how images are formed and what determines the observed intensity in the image. The gradient space, popularized by Huffman and Mackworth in a slightly different context, is a helpful tool in the development of new methods.},
author = {Horn, Berthold K P},
doi = {10.1016/0004-3702(77)90020-0},
file = {:home/yani/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Horn - 1977 - Understanding Image Intensities.pdf:pdf},
issn = {00043702},
journal = {Artificial Intelligence},
number = {2},
pages = {201--231},
title = {{Understanding Image Intensities}},
url = {http://linkinghub.elsevier.com/retrieve/pii/0004370277900200},
volume = {8},
year = {1977}
}
@misc{Horn1970,
abstract = {A method will be described for finding the shape of a smooth apaque object form a monocular image, given a knowledge of the surface photometry, the position of the lightsource and certain auxiliary information to resolve ambiguities. This method is complementary to the use of stereoscopy which relies on matching up sharp detail and will fail on smooth objects. Until now the image processing of single views has been restricted to objects which can meaningfully be considered two-dimensional or bounded by plane surfaces. It is possible to derive a first-order non-linear partial differential equation in two unknowns relating the intensity at the image points to the shape of the objects. This equation can be solved by means of an equivalent set of five ordinary differential equations. A curve traced out by solving this set of equations for one set of starting values is called a characteristic strip. Starting one of these strips from each point on some initial curve will produce the whole solution surface. The initial curves can usually be constructed around so-called singular points. A number of applications of this metod will be discussed including one to lunar topography and one to the scanning electron microscope. In both of these cases great simplifications occur in the equations. A note on polyhedra follows and a quantitative theory of facial make-up is touched upon. An implementation of some of these ideas on the PDP-6 computer with its attached image-dissector camera at the Artificial intelligence Laboratory will be described, and also a nose-recognition program.},
author = {Horn, Berthold K P},
booktitle = {Doctor},
institution = {MIT Artificial Intelligence Laboratory},
number = {232},
pages = {196},
title = {{Shape from Shading: A Method for Obtaining the Shape of a Smooth Opaque Object from One View}},
url = {http://dspace.mit.edu/handle/1721.1/6885},
year = {1970}
}
@article{hornik89a,
abstract = {Thesis BIB},
author = {Hornik, K and Stinchcombe, M and White, H},
journal = {Neural Networks},
keywords = {imported},
pages = {356--366},
title = {{Multilayer feedforward networks are universal approximators}},
volume = {2},
year = {1989}
}
@article{Horprasert1999,
author = {Horprasert, Thanarat and Harwood, David},
file = {:home/yani/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Horprasert, Harwood - 1999 - A statistical approach for real-time robust background subtraction and shadow detection.pdf:pdf},
journal = {IEEE ICCV},
pages = {1--19},
title = {{A statistical approach for real-time robust background subtraction and shadow detection}},
url = {http://vast.uccs.edu/{~}tboult/frame/Horprasert/},
year = {1999}
}
@article{Humenberger2012,
author = {Humenberger, Martin and Schraml, Stephan and Sulzbachner, Christoph and Belbachir, Ahmed Nabil},
file = {:home/yani/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Humenberger et al. - 2012 - Embedded Fall Detection with a Neural Network and Bio-Inspired Stereo Vision.pdf:pdf},
isbn = {9781467316125},
pages = {60--67},
title = {{Embedded Fall Detection with a Neural Network and Bio-Inspired Stereo Vision}},
year = {2012}
}
@article{Hwang2011,
abstract = {By the laws of quantum physics, pixel intensity does not have a true value, but should be a random variable. Contrary to the conventional assumptions, the distribution of intensity may not be an additive Gaussian. We propose to directly model the intensity difference, and show its validity by an experimental comparison to the conventional additive model. As a model of the intensity difference, we present a Skellam distribution derived from the Poisson photon noise model. This modeling induces a linear relationship between intensity and Skellam parameters, while conventional variance computation methods do not yield any significant relationship between these parameters under natural illumination. The intensity-Skellam line is invariant to scene, illumination and even most of camera parameters. We also propose practical methods to obtain the line using a color pattern and an arbitrary image under a natural illumination. Because the Skellam parameters that can be obtained from this linearity determine a noise distribution for each intensity value, we can statistically determine whether any intensity difference is caused by an underlying signal difference or by noise. We demonstrate the effectiveness of this new noise model by applying it to practical applications of background subtraction and edge detection.},
author = {Hwang, Youngbae and Kim, Jun-Sik and Kweon, In So},
doi = {10.1109/TPAMI.2011.224},
file = {:home/yani/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Hwang, Kim, Kweon - 2011 - Difference-based Image Noise Modeling Using Skellam Distribution.pdf:pdf},
issn = {1939-3539},
journal = {IEEE transactions on pattern analysis and machine intelligence},
month = {nov},
number = {7},
pages = {1329--1341},
pmid = {22144520},
title = {{Difference-based Image Noise Modeling Using Skellam Distribution.}},
url = {http://www.ncbi.nlm.nih.gov/pubmed/22144520},
volume = {34},
year = {2011}
}
@article{Hyvarinen2000,
abstract = {A fundamental problem in neural network research, as well as in many other disciplines, is finding a suitable representation of multivariate data, i.e. random vectors. For reasons of computational and conceptual simplicity, the representation is often sought as a linear transformation of the original data. In other words, each component of the representation is a linear combination of the original variables. Well-known linear transformation methods include principal component analysis, factor analysis, and projection pursuit. Independent component analysis (ICA) is a recently developed method in which the goal is to find a linear representation of non-Gaussian data so that the components are statistically independent, or as independent as possible. Such a representation seems to capture the essential structure of the data in many applications, including feature extraction and signal separation. In this paper, we present the basic theory and applications of ICA, and our recent work on the subject.},
author = {Hyv{\"{a}}rinen, A and Oja, E},
institution = {Neural Networks Research Centre, Helsinki University of Technology, Finland. aapo.hyvarinen@hut.fi},
journal = {Neural Networks},
keywords = {algorithms,artifacts,brain,brain physiology,humans,magnetoencephalography,neural networks (computer),normal distribution},
number = {4-5},
pages = {411--430},
pmid = {10946390},
publisher = {Elsevier},
title = {{Independent component analysis: algorithms and applications.}},
url = {http://www.ncbi.nlm.nih.gov/pubmed/10946390},
volume = {13},
year = {2000}
}
@article{I-lealey,
author = {I-lealey, Glenn and Kondepudy, Raghava},
file = {:home/yani/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/I-lealey, Kondepudy - Unknown - CCD Camera Calibration and Noise Estimation.pdf:pdf},
isbn = {0818628553},
number = {5},
pages = {90--95},
title = {{CCD Camera Calibration and Noise Estimation}},
volume = {92717}
}
@article{ioannou2012difference,
author = {Ioannou, Y and Taati, B and Harrap, R and Greenspan, M},
journal = {arXiv preprint arXiv:1209.1759},
title = {{Difference of Normals as a Multi-Scale Operator in Unorganized Point Clouds}},
year = {2012}
}
@inproceedings{Ioannou2009a,
abstract = {Potential Well Space Embedding (PWSE) has been shown to be an effective global method to recognize segmented objects in range data. Here Local PWSE is proposed as an extension of PWSE. LPWSE features are generated by iterating ICP to the local minima of a multiscale registration model at each point. The locations of the local minima are then used to generate feature vectors, which can be matched against a preprocessed database of such features to determine correspondences between images and models. The method has been implemented and tested on real data, and has been found to be effective at recognizing sparse segmented (self-)occluded range images. A classifi-cation accuracy of 92{\%} is achieved with 3750 points, dropping to 78{\%} at 500 points, on 50 randomly sub-sampled sparse views of 5 objects. {\textcopyright}2009 IEEE.},
author = {Ioannou, Y. and Shang, L. and Harrap, R. and Greenspan, M.},
booktitle = {2009 IEEE 12th International Conference on Computer Vision Workshops, ICCV Workshops 2009},
doi = {10.1109/ICCVW.2009.5457491},
file = {:home/yani/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Ioannou, Shang - 2009 - Local PotentialWell Space Embedding.pdf:pdf},
isbn = {9781424444427},
pages = {1726--1732},
title = {{Local potentialwell space embedding}},
year = {2009}
}
@article{Ioannou2010,
abstract = {Recent advances in Light Detection and Ranging (LIDAR) technology and integration have resulted in vehicle-borne platforms for urban LIDAR scanning, such as Terrapoint Inc.'s TITAN system. Such technology has lead to an explosion in ground LIDAR data. The large size of such mobile urban LIDAR data sets, and the ease at which they may now be collected, has shifted the bottleneck of creating abstract urban models for Geographical Information Systems (GIS) from data collection to data processing. While turning such data into useful models has traditionally relied on human analysis, this is no longer practical. This thesis outlines a methodology for automatically recovering the necessary information to create abstract urban models from mobile urban LIDAR data using computer vision methods. As an integral part of the methodology, a novel scale-based interest operator is introduced (Difference of Normals) that is efficient enough to process large datasets, while accurately isolating objects of interest in the scene according to real-world parameters. Finally a novel localized object recognition algorithm is introduced (Local Potential Well Space Embedding), derived from a proven global method for object recognition (Potential Well Space Embedding). The object recognition phase of our methodology is discussed with these two algorithms as a focus.},
author = {Ioannou, Yani Andrew},
journal = {Thesis (Master, Computing), Queen's University},
keywords = {LIDAR, point clouds, GIS, computer vision, urban},
mendeley-tags = {LIDAR, point clouds, GIS, computer vision, urban},
number = {February},
title = {{Automatic urban modelling using mobile urban lidar data}},
url = {http://qspace.library.queensu.ca/handle/1974/5443},
year = {2010}
}
@inproceedings{Ioannou2016,
author = {Ioannou, Yani and Robertson, Duncan P and Shotton, Jamie and Cipolla, Roberto and Criminisi, Antonio},
booktitle = {International Conference on Learning Representations},
file = {:home/yani/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Ioannou et al. - 2016 - Training CNNs with Low-Rank Filters for Efficient Image Classification.pdf:pdf},
title = {{Training CNNs with Low-Rank Filters for Efficient Image Classification}},
year = {2016}
}
@article{Ioannou2016e,
abstract = {We propose a new method for training computationally efficient and compact convolutional neural networks (CNNs) using a novel sparse connection structure that resembles a tree root. Our sparse connection structure facilitates a significant reduction in computational cost and number of parameters of state-of-the-art deep CNNs without compromising accuracy. We validate our approach by using it to train more efficient variants of state-of-the-art CNN architectures, evaluated on the CIFAR10 and ILSVRC datasets. Our results show similar or higher accuracy than the baseline architectures with much less compute, as measured by CPU and GPU timings. For example, for ResNet 50, our model has 40{\%} fewer parameters, 45{\%} fewer floating point operations, and is 31{\%} (12{\%}) faster on a CPU (GPU). For the deeper ResNet 200 our model has 25{\%} fewer floating point operations and 44{\%} fewer parameters, while maintaining state-of-the-art accuracy. For GoogLeNet, our model has 7{\%} fewer parameters and is 21{\%} (16{\%}) faster on a CPU (GPU).},
archivePrefix = {arXiv},
arxivId = {1605.06489},
author = {Ioannou, Yani and Robertson, Duncan and Cipolla, Roberto and Criminisi, Antonio},
eprint = {1605.06489},
file = {:home/yani/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Ioannou et al. - 2016 - Deep Roots Improving CNN Efficiency with Hierarchical Filter Groups.pdf:pdf},
journal = {arXiv pre-print},
month = {may},
title = {{Deep Roots: Improving CNN Efficiency with Hierarchical Filter Groups}},
url = {http://arxiv.org/abs/1605.06489},
year = {2016}
}
@techreport{Ioannou2015,
author = {Ioannou, Yani and Robertson, Duncan and Zikic, Darko and Kontschieder, Peter and Shotton, Jamie and Brown, Matthew and Criminisi, Antonio},
booktitle = {Technical Report},
institution = {Microsoft Research},
month = {apr},
number = {MSR-TR-2015-58},
title = {{Decision Forests, Convolutional Networks and the Models in-Between}},
year = {2015}
}
@techreport{Ioannou2015,
author = {Ioannou, Yani and Robertson, Duncan and Zikic, Darko and Kontschieder, Peter and Shotton, Jamie and Brown, Matthew and Criminisi, Antonio},
booktitle = {Technical Report},
file = {:home/yani/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Ioannou et al. - 2015 - Decision Forests, Convolutional Networks and the Models in-Between(2).pdf:pdf},
institution = {Microsoft Research},
month = {apr},