references-mendely.bib

@article{Adelson1992,
author = {Adelson, E.H. and Wang, J.Y.a.},
doi = {10.1109/34.121783},
file = {:home/yani/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Adelson, Wang - 1992 - Single lens stereo with a plenoptic camera.pdf:pdf},
issn = {01628828},
journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence},
number = {2},
pages = {99--106},
title = {{Single lens stereo with a plenoptic camera}},
url = {http://ieeexplore.ieee.org/lpdocs/epic03/wrapper.htm?arnumber=121783},
volume = {14},
year = {1992}
}
@article{journals/neco/AmitG97,
author = {Amit, Yali and Geman, Donald},
journal = {Neural Computation},
keywords = {dblp},
number = {7},
pages = {1545--1588},
title = {{Shape Quantization And Recognition With Randomized Trees.}},
url = {http://dblp.uni-trier.de/db/journals/neco/neco9.html{\#}AmitG97},
volume = {9},
year = {1997}
}
@inproceedings{Ba2013dothey,
abstract = {Currently, deep neural networks are the state of the art on problems such as speech recognition and computer vision. In this extended abstract, we show that shal- low feed-forward networks can learn the complex functions previously learned by deep nets and achieve accuracies previously only achievable with deep models. Moreover, in some cases the shallow neural nets can learn these deep functions using a total number of parameters similar to the original deep model. We eval- uate our method on the TIMIT phoneme recognition task and are able to train shallow fully-connected nets that perform similarly to complex, well-engineered, deep convolutional architectures. Our success in training shallow neural nets to mimic deeper models suggests that there probably exist better algorithms for train- ing shallow feed-forward nets than those currently available.},
archivePrefix = {arXiv},
arxivId = {arXiv:1312.6184v5},
author = {Ba, Lj and Caurana, R},
booktitle = {arXiv preprint arXiv:1312.6184},
doi = {10.1038/nature14539},
eprint = {arXiv:1312.6184v5},
isbn = {3135786504},
issn = {0028-0836},
pages = {1--6},
pmid = {26017442},
title = {{Do Deep Nets Really Need to be Deep ?}},
url = {http://arxiv.org/abs/1312.6184},
volume = {2014},
year = {2013}
}
@inproceedings{conf/icml/2015,
editor = {Bach, Francis R and Blei, David M},
publisher = {JMLR.org},
series = {{\{}JMLR{\}} Workshop and Conference Proceedings},
title = {{Proceedings of the 32nd International Conference on Machine Learning, {\{}ICML{\}} 2015, Lille, France, 6-11 July 2015}},
url = {http://jmlr.org/proceedings/papers/v37/},
volume = {37},
year = {2015}
}
@article{Barron2012,
abstract = {We address the problem of recovering shape, albedo, and illumination from a single grayscale image of an object, using shading as our primary cue. Because this problem is fundamentally underconstrained, we construct statistical models of albedo and shape, and define an optimization problem that searches for the most likely explanation of a single image. We present two priors on albedo which en- courage local smoothness and global sparsity, and three priors on shape which encourage flatness, outward-facing orientation at the occluding contour, and local smoothness. We present an optimization technique for using these pri- ors to recover shape, albedo, and a spherical harmonic model of illumination. Our model, which we call SAIFS (shape, albedo, and illumination from shading) produces reasonable results on arbitrary grayscale images taken in the real world, and outperforms all previous grayscale in- trinsic image-style algorithms on the MIT Intrinsic Images dataset.},
author = {Barron, Jonathan T and Malik, Jitendra and Berkeley, U C},
doi = {10.1109/CVPR.2012.6247693},
isbn = {9781467312288},
issn = {10636919},
journal = {IEEE Conference on Computer Vision and Patern Recognition},
pages = {334--341},
publisher = {IEEE},
title = {{Shape , Albedo , and Illumination from a Single Image of an Unknown Object}},
url = {http://ieeexplore.ieee.org/articleDetails.jsp?arnumber=6247693{\&}contentType=Conference+Publications},
year = {2012}
}
@inproceedings{bastani2016measuring,
author = {Bastani, Osbert and Ioannou, Yani and Lampropoulos, Leonidas and Vytiniotis, Dimitrios and Nori, Aditya and Criminisi, Antonio},
booktitle = {Neural Information Processing Systems (NIPS), 2016},
file = {:home/yani/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Bastani et al. - 2016 - Measuring Neural Net Robustness with Constraints(2).pdf:pdf},
title = {{Measuring Neural Net Robustness with Constraints}},
year = {2016}
}
@misc{Beacco2003,
abstract = {The characterization of the photometric properties of a road surface is of prime importance in the design of lighting plant and when the real vision condition should be determined by computer simulation. The measurement could be done in laboratory but the in situ measurement are very interested because it permit to test several zone on the road and there is no mechanical starch on the surface of the sample. This work describes an innovative portable system based on a CCD luminance meter able to obtain uncertainty comparable in traditional laboratory systems.},
author = {Beacco, D and Fiorentin, P and Rossi, G},
booktitle = {Proceedings of the 20th IEEE Instrumentation Technology Conference Cat No03CH37412},
doi = {10.1109/IMTC.2003.1208001},
file = {:home/yani/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Beacco, Fiorentin, Rossi - 2003 - A system for in situ measurements of road reflection properties.pdf:pdf},
isbn = {0780377052},
issn = {10915281},
number = {May},
pages = {1508--1512},
publisher = {Ieee},
title = {{A system for in situ measurements of road reflection properties}},
url = {http://ieeexplore.ieee.org/lpdocs/epic03/wrapper.htm?arnumber=1208001},
volume = {2},
year = {2003}
}
@article{Bellia2002,
abstract = {Recent availability of video-cameras with CCD-type sensors (charge coupled device) has proved to be particularly stimulating for all those applications requiring photometric measurements, above all for the measurement of luminance values related to the physical and technical qualities of a built environment. This method allows the instantaneous capture of an image, thus enabling collection of luminance values relating to the points of measurement; this in turn leads to the evaluation of luminance distribution and lighting levels of the surfaces that make up the environment. Setting up this system requires the following basic configuration: a photopic filter V($\lambda$), an optic interface, a computer equipped with an appropriate card for the capture and digitalisation of the acquired image (the grqqframe grabber) and, finally, suitable software for the processing of collected data. In this article a detailed description of this acquisition system is reported, and subsequently a report on the procedure adopted for its calibration so as to enable the capture of relevant photometric values. Final analysis and validation of results are carried out by means of field test. A case study of CCD photometerapplication has been then performed using a basic software tool autonomously developed to evaluate indoor lighting level; the luminance map of a diffuse light source has been used as grqqinput data for the developed software, and the grqqoutput data, i.e. illumination levels, have been then compared with measured values.},
author = {Bellia, L and Cesarano, A and Minichiello, F and Sibilio, S},
doi = {10.1016/S0360-1323(01)00093-2},
file = {:home/yani/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Bellia et al. - 2002 - Setting up a CCD photometer for lighting research and design.pdf:pdf},
issn = {03601323},
journal = {Building and Environment},
keywords = {calculation,luminance,photometers,software code,video camera},
number = {11},
pages = {1099--1106},
publisher = {CIE Poland},
title = {{Setting up a CCD photometer for lighting research and design}},
url = {http://linkinghub.elsevier.com/retrieve/pii/S0360132301000932},
volume = {37},
year = {2002}
}
@inproceedings{Bengio2010labeltree,
author = {Bengio, S and Weston, J and Grangier, D},
booktitle = {Conference and Workshop on Neural Information Processing Systems},
title = {{Label Embedding Trees for Large Multi-Class Tasks}},
year = {2010}
}
@article{bengio:ieeenn94,
author = {Bengio, Yoshua and Simard, Patrick and Frasconi, Paolo},
journal = {IEEE Transactions on Neural Networks},
keywords = {nn},
number = {2},
pages = {157--166},
title = {{Learning Long-Term Dependencies With Gradient Descent Is Difficult}},
volume = {5},
year = {1994}
}
@book{Bishop1995,
address = {Oxford},
author = {Bishop, Christopher M},
keywords = {imported},
publisher = {Oxford University Press},
title = {{Neural Networks for Pattern Recognition}},
year = {1995}
}
@incollection{Bottou2012sgdtricks,
author = {Bottou, L{\'{e}}on},
booktitle = {Neural Networks: Tricks of the Trade (2nd ed.)},
editor = {Montavon, Gr{\'{e}}goire and Orr, Genevieve B and M{\"{u}}ller, Klaus-Robert},
isbn = {978-3-642-35288-1},
keywords = {dblp},
pages = {421--436},
publisher = {Springer},
series = {Lecture Notes in Computer Science},
title = {{Stochastic Gradient Descent Tricks.}},
volume = {7700},
year = {2012}
}
@article{breiman2001random,
author = {Breiman, Leo},
journal = {Machine Learning},
keywords = {forests random},
pages = {5--32},
title = {{Random Forests}},
volume = {45},
year = {2001}
}
@article{breiman1996bagging,
abstract = {Bagging predictors is a method for generating multiple versions of a pre-dictor and using these to get an aggregated predictor. The aggregation av-erages over the versions when predicting a numerical outcome and does a plurality v ote when predicting a class. The multiple versions are formed by making bootstrap replicates of the learning set and using these as new learning sets. Tests on real and simulated data sets using classiication and regression trees and subset selection in linear regression show that bagging can give substantial gains in accuracy. The vital element is the instability o f the prediction method. If perturbing the learning set can cause signiicant changes in the predictor constructed, then bagging can improve accuracy.},
author = {Breiman, Leo},
doi = {10.1007/BF00058655},
isbn = {0885-6125},
issn = {0885-6125},
journal = {Machine Learning},
keywords = {aggregation,averaging,bootstrap,combining},
number = {421},
pages = {123--140},
pmid = {17634459},
publisher = {Springer},
title = {{Bagging Predictors}},
volume = {24},
year = {1996}
}
@book{breiman84,
author = {Breiman, Leo and Friedman, Jerome H and Olshen, Richard A and Stone, Charles J},
booktitle = {CA: Wadsworth International Group},
doi = {10.1371/journal.pone.0015807},
isbn = {978-0534980535},
issn = {19326203},
pmid = {462029},
publisher = {CRC press},
title = {{Classification and regression trees}},
year = {1984}
}
@inproceedings{Chang2012,
author = {Chang, Hyung Jin and Jeong, Hawook and Choi, And Jin Young},
booktitle = {IEEE Conference on Computer Vision and Pattern Recognition},
title = {{Active Attentional Sampling for Speed-up of Background Subtraction}},
year = {2012}
}
@inproceedings{Chen2015,
abstract = {As deep nets are increasingly used in applications suited for mobile devices, a fundamental dilemma becomes apparent: the trend in deep learning is to grow models to absorb ever-increasing data set sizes; however mobile devices are designed with very little memory and cannot store such large models. We present a novel network architecture, HashedNets, that exploits inherent redundancy in neural networks to achieve drastic reductions in model sizes. HashedNets uses a low-cost hash function to randomly group connection weights into hash buckets, and all connections within the same hash bucket share a single parameter value. These parameters are tuned to adjust to the HashedNets weight sharing architecture with standard backprop during training. Our hashing procedure introduces no additional memory overhead, and we demonstrate on several benchmark data sets that HashedNets shrink the storage requirements of neural networks substantially while mostly preserving generalization performance.},
archivePrefix = {arXiv},
arxivId = {1504.04788},
author = {Chen, Wenlin and Wilson, James T. and Tyree, Stephen and Weinberger, Kilian Q. and Chen, Yixin},
booktitle = {Proceedings of The 32nd International Conference on Machine Learning},
editor = {Bach, Francis R and Blei, David M},
eprint = {1504.04788},
isbn = {9781510810587},
keywords = {dblp},
pages = {2285--2294},
publisher = {JMLR.org},
series = {JMLR Proceedings},
title = {{Compressing Neural Networks with the Hashing Trick}},
url = {http://arxiv.org/abs/1504.04788},
volume = {37},
year = {2015}
}
@inproceedings{Ciresan2012,
abstract = {Traditional methods of computer vision and machine learning cannot match human performance on tasks such as the recognition of handwritten digits or traffic signs. Our biologically plausible deep artificial neural network architectures can. Small (often minimal) receptive fields of convolutional winnertake-all neurons yield large network depth, resulting in roughly as many sparsely connected neural layers as found in mammals between retina and visual cortex. Only winner neurons are trained. Several deep neural columns become experts on inputs preprocessed in different ways; their predictions are averaged. Graphics cards allow for fast training. On the very competitive MNIST handwriting benchmark, our method is the first to achieve near-human performance. On a traffic sign recognition benchmark it outperforms humans by a factor of two. We also improve the state-of-the-art on a plethora of common image classification benchmarks.},
archivePrefix = {arXiv},
arxivId = {1202.2745},
author = {Ciresan, Dan and Meier, Ueli and Schmidhuber, J{\"{u}}rgen},
booktitle = {arXiv:1202.2745v1 [cs.CV]},
eprint = {1202.2745},
isbn = {1467312266},
pages = {3642--3649},
title = {{Multi-column deep neural networks for image classification}},
url = {http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.299.4060{\&}rep=rep1{\&}type=pdf{\%}5Cnhttp://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.299.4060{\&}rank=5},
year = {2012}
}
@inproceedings{Cogswell2016,
author = {Cogswell, Michael and Ahmed, Faruk and Girshick, Ross B and Zitnick, Larry and Batra, Dhruv},
booktitle = {International Conference on Learning Representations},
title = {{Reducing Overfitting in Deep Networks by Decorrelating Representations.}},
year = {2016}
}
@article{criminisi2013decision,
author = {Criminisi, Antonio and Shotton, Jamie},
publisher = {Springer Publishing Company, Incorporated},
title = {{Decision Forests for Computer Vision and Medical Image Analysis}},
year = {2013}
}
@article{Cucchiara2001,
author = {Cucchiara, R. and Crana, C. and Piccardi, M. and Prati, a. and Sirotti, S.},
doi = {10.1109/ITSC.2001.948679},
file = {:home/yani/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Cucchiara et al. - 2001 - Improving shadow suppression in moving object detection with HSV color information.pdf:pdf},
isbn = {0-7803-7194-1},
journal = {ITSC 2001. 2001 IEEE Intelligent Transportation Systems. Proceedings (Cat. No.01TH8585)},
pages = {334--339},
publisher = {Ieee},
title = {{Improving shadow suppression in moving object detection with HSV color information}},
url = {http://ieeexplore.ieee.org/lpdocs/epic03/wrapper.htm?arnumber=948679},
year = {2001}
}
@article{Cucchiara2003,
author = {Cucchiara, Rita and Grana, Costantino and Piccardi, Massimo and Prati, Andrea},
file = {:home/yani/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Cucchiara et al. - 2003 - Detecting Moving Objects , Ghosts , and Shadows in Video Streams {\ae}.pdf:pdf},
number = {10},
pages = {1337--1342},
title = {{Detecting Moving Objects , Ghosts , and Shadows in Video Streams {\ae}}},
volume = {25},
year = {2003}
}
@inproceedings{lecun1989optimal,
abstract = {We have used information-theoretic ideas to derive a class of practical and nearly optimal schemes for adapting the size of a neural network. By removing unimportant weights from a network, several improvements can be expected: better generalization, fewer training examples required, and improved speed of learning and/or classification. The basic idea is to use second-derivative information to make a tradeoff between network complexity and training set error. Experiments confirm the usefulness of the methods on a real-world application.},
archivePrefix = {arXiv},
arxivId = {arXiv:1011.1669v3},
author = {Cun, Yann Le and Denker, John S and Solla, Sara a},
booktitle = {Advances in Neural Information Processing Systems},
doi = {10.1.1.32.7223},
eprint = {arXiv:1011.1669v3},
isbn = {1558601007},
issn = {1098-6596},
number = {1},
pages = {598--605},
pmid = {25246403},
title = {{Optimal Brain Damage}},
volume = {2},
year = {1990}
}
@article{journals/mcss/Cybenko92,
author = {Cybenko, G},
journal = {Mathematics of control, signals, and systems},
keywords = {dblp},
number = {4},
pages = {303--314},
title = {{Approximation by superpositions of a sigmoid function}},
url = {http://dblp.uni-trier.de/db/journals/mcss/mcss5.html{\#}Cybenko92},
volume = {2},
year = {1989}
}
@book{damelin2011,
abstract = {Arising from courses taught by the authors, this largely self-contained treatment is ideal for mathematicians who are interested in applications or for students from applied fields who want to understand the mathematics behind their subject. Early chapters cover Fourier analysis, functional analysis, probability and linear algebra, all of which have been chosen to prepare the reader for the applications to come. The book includes rigorous proofs of core results in compressive sensing and wavelet convergence. Fundamental is the treatment of the linear system y=$\Phi$x in both finite and infinite dimensions. There are three possibilities: the system is determined, overdetermined or underdetermined, each with different aspects. The authors assume only basic familiarity with advanced calculus, linear algebra and matrix theory and modest familiarity with signal processing, so the book is accessible to students from the advanced undergraduate level. Many exercises are also included.},
address = {Cambridge},
author = {Damelin, Steven B. and {Miller Jr}, Willard},
doi = {10.1017/CBO9781139003896},
isbn = {9781107601048},
pages = {462},
pmid = {17238176},
publisher = {Cambridge University Press},
title = {{The Mathematics of Signal Processing}},
url = {http://www.amazon.com/Mathematics-Signal-Processing-Cambridge-Applied/dp/1107601045/ref=pd{\_}sim{\_}sbs{\_}b{\_}4?ie=UTF8{\&}refRID=0TKKM2SWXXJPAXKE5KWG},
year = {2012}
}
@article{Debevec2008,
author = {Debevec, PE and Malik, J},
file = {:home/yani/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Debevec, Malik - 2008 - Recovering high dynamic range radiance maps from photographs.pdf:pdf},
journal = {ACM SIGGRAPH 2008 classes},
title = {{Recovering high dynamic range radiance maps from photographs}},
url = {http://dl.acm.org/citation.cfm?id=1401174},
year = {2008}
}
@misc{DeMenthon1990a,
abstract = {An exact method for computing the position of a triangle in space from its image is presented. Also presented is an approximate method based on orthoperspective, an approximation of perspective which produces lower errors for off-center triangle images than scaled orthographic projection. A comparison is made of exact and approximate solutions for the triangle pose. This comparison gives the relative combinations of image and triangle characteristics which are likely to generate the largest errors. Model-based pose estimation techniques which match image and model triangles require large numbers of matching operations in real-world applications. It is shown that the approximate model can be used to build lookup tables for each of the triangles of a model and that they speed up the estimation of an object pose},
author = {DeMenthon, D and Davis, L S},
booktitle = {Proceedings IEEE International Conference on Robotics and Automation},
doi = {10.1109/ROBOT.1990.125943},
file = {:home/yani/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/DeMenthon, Davis - 1990 - New exact and approximate solutions of the three-point perspective problem(2).pdf:pdf},
isbn = {0818690615},
number = {11},
pages = {40--45},
publisher = {IEEE Comput. Soc. Press},
title = {{New exact and approximate solutions of the three-point perspective problem}},
url = {http://ieeexplore.ieee.org/lpdocs/epic03/wrapper.htm?arnumber=125943},
volume = {14},
year = {1990}
}
@misc{DeMenthon1990,
abstract = {An exact method for computing the position of a triangle in space from its image is presented. Also presented is an approximate method based on orthoperspective, an approximation of perspective which produces lower errors for off-center triangle images than scaled orthographic projection. A comparison is made of exact and approximate solutions for the triangle pose. This comparison gives the relative combinations of image and triangle characteristics which are likely to generate the largest errors. Model-based pose estimation techniques which match image and model triangles require large numbers of matching operations in real-world applications. It is shown that the approximate model can be used to build lookup tables for each of the triangles of a model and that they speed up the estimation of an object pose},
author = {DeMenthon, D and Davis, L S},
booktitle = {Proceedings IEEE International Conference on Robotics and Automation},
doi = {10.1109/ROBOT.1990.125943},
isbn = {0818690615},
number = {11},
pages = {40--45},
publisher = {IEEE Comput. Soc. Press},
title = {{New exact and approximate solutions of the three-point perspective problem}},
url = {http://ieeexplore.ieee.org/lpdocs/epic03/wrapper.htm?arnumber=125943},
volume = {14},
year = {1990}
}
@misc{DeMenthon1992,
abstract = {Model-based pose estimation techniques that match image and model triangles require large numbers of matching operations in real-world applications. The authors show that by using approximations to perspective, 2D lookup tables can be built for each of the triangles of the models. An approximation called `weak perspective' has been applied previously to this problem; the authors consider two other perspective approximations: paraperspective and orthoperspective. These approximations produce lower errors for off-center image features than weak perspective},
author = {DeMenthon, D and Davis, L S},
booktitle = {IEEE Transactions on Pattern Analysis and Machine Intelligence},
doi = {10.1109/34.166625},
issn = {01628828},
number = {11},
pages = {1100--1105},
title = {{Exact and approximate solutions of the perspective-three-point problem}},
url = {http://ieeexplore.ieee.org/lpdocs/epic03/wrapper.htm?arnumber=166625},
volume = {14},
year = {1992}
}
@inproceedings{Deng2011fastbalanced,
author = {Deng, J and Satheesh, S and Berg, A C and Li, F.-F.},
booktitle = {Conference and Workshop on Neural Information Processing Systems},
title = {{Fast and Balanced: Efficient Label Tree Learning for Large Scale Object Recognition}},
year = {2011}
}
@inproceedings{Denil2013predicting,
abstract = {We demonstrate that there is signiﬁcant redundancy in the parameterization of several deep learning models. Given only a few weight values for each feature it is possible to accurately predict the remaining values. Moreover, we show that not only can the parameter values be predicted, but many of them need not be learned at all. We train several different architectures by learning only a small number of weights and predicting the rest. In the best case we are able to predict more than 95{\%} of the weights of a network without any drop in accuracy.},
archivePrefix = {arXiv},
arxivId = {1306.0543},
author = {Denil, Misha and Shakibi, Babak and Dinh, Laurent and Ranzato, Marc'Aurelio and de Freitas, Nando},
booktitle = {Neural Information Processing Systems (NIPS)},
eprint = {1306.0543},
pages = {2148--2156},
title = {{Predicting Parameters in Deep Learning}},
url = {http://papers.nips.cc/paper/5025-predicting-parameters-in-deep-learning},
year = {2013}
}
@inproceedings{Denton2014efficient,
abstract = {We present techniques for speeding up the test-time evaluation of large convolutional networks, designed for object recognition tasks. These models deliver impressive accuracy but each image evaluation requires millions of floating point operations, making their deployment on smartphones and Internet-scale clusters problematic. The computation is dominated by the convolution operations in the lower layers of the model. We exploit the linear structure present within the convolutional filters to derive approximations that significantly reduce the required computation. Using large state-of-the-art models, we demonstrate we demonstrate speedups of convolutional layers on both CPU and GPU by a factor of 2x, while keeping the accuracy within 1{\%} of the original model.},
archivePrefix = {arXiv},
arxivId = {1404.0736},
author = {Denton, Emily and Zaremba, Wojciech and Bruna, Joan and LeCun, Yann and Fergus, Rob},
booktitle = {arXiv},
eprint = {1404.0736},
issn = {10495258},
number = {1},
pages = {1--11},
title = {{Exploiting Linear Structure Within Convolutional Networks for Efficient Evaluation}},
url = {http://arxiv.org/abs/1404.0736},
year = {2014}
}
@article{Drew,
author = {Drew, Mark S},
file = {:home/yani/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Drew - Unknown - Photometric stereo without multiple images 1 INTRODUCTION.pdf:pdf},
keywords = {based vision,color,dichromatic model,lambertian,neutral interface,physics,reflectance,shape,shape representation},
number = {604},
pages = {369--380},
title = {{Photometric stereo without multiple images 1 INTRODUCTION}},
volume = {3016}
}
@article{Edelman1998,
archivePrefix = {arXiv},
arxivId = {arXiv:physics/9806030v1},
author = {Edelman, A and Arias, TA},
eprint = {9806030v1},
file = {:home/yani/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Edelman, Arias - 1998 - The geometry of algorithms with orthogonality constraints.pdf:pdf},
journal = {Arxiv preprint physics/9806030},
keywords = {15a18,49m07,49m15,51f20,53b20,65f15,81v55,ams subject classifications,conjugate gradient,eigenvalue optimization,eigenvalues and eigenvectors,electronic structures computation,grassmann manifold,invariant subspace,newton,orthogonality constraints,programming,rayleigh quotient iteration,reduced gradient method,s method,sequential quadratic,stiefel manifold,subspace tracking},
primaryClass = {arXiv:physics},
title = {{The geometry of algorithms with orthogonality constraints}},
url = {http://arxiv.org/abs/physics/9806030},
year = {1998}
}
@inproceedings{Fahlman1989,
author = {Fahlmann, S E and Lebiere, C},
booktitle = {Advances in Neural Information Processing Systems 2},
doi = {10.1190/1.1821929},
editor = {Touretzky, David S},
isbn = {1558601007},
issn = {10459227},
pages = {524--532},
pmid = {220943591},
publisher = {Morgan Kaufmann},
title = {{The Cascade-Correlation Learning Architecture}},
year = {1990}
}
@misc{Fleck1995,
abstract = {Perspective projection is generally accepted as the ideal model of image formation. Many recent algorithms, and many recent judgements about the relative merits of different algorithms, depend on this assumption. However, perspective projection represents only the front half of the viewing sphere and it distorts the shape and intensity of objects unless they lie near the optical axis. It is only one of several projections used in lens design and it does not accurately model the behavior of many real lenses. It works well only for narrow-angle images. This paper surveys the properties of several alternative models of image formation. A model based on stereographic projection of the viewing sphere is shown to be a better general-purpose imaging model than perspective projection. The new model can represent wider fields of view and more closely approximates real wide-angle lenses. It preserves a suitable range of shape properties, including local symmetries. It approximates narrow-angl...},
author = {Fleck, Margaret M},
booktitle = {Research report},
pages = {95--01},
publisher = {University of Iowa},
title = {{Perspective projection: the wrong imaging model}},
url = {http://www.cs.illinois.edu/{~}mfleck/my-papers/stereographic-TR.pdf},
year = {1995}
}
@incollection{Hertzmann2005,
author = {Fleet, David and Hertzmann, Aaron},
file = {:home/yani/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Fleet, Hertzmann - 2005 - Radiometry and Reflection.pdf:pdf},
pages = {76--91},
title = {{Radiometry and Reflection}},
year = {2005}
}
@misc{fodor2002survey,
abstract = {This paper, we assume that we have n observations, each being a realization of the p- dimensional random variable x = (x 1 , . . . , x p with mean E(x) = = 1 , . . . , p and covariance matrix E(x )(x = pp . We denote such an observation matrix by X = i,j : 1 p, 1 n. If i and i = (i,i) denote the mean and the standard deviation of the ith random variable, respectively, then we will often standardize the observations x i,j by (x i,j i i , where i = x i = 1/n j=1 x i,j , and i = 1/n j=1 (x i,j x i},
author = {Fodor, I K},
booktitle = {Center for Applied Scientific Computing Lawrence Livermore National Laboratory},
doi = {10.2172/15002155},
pages = {1--18},
publisher = {Technical Report UCRL-ID-148494, Lawrence Livermore National Laboratory},
title = {{A survey of dimension reduction techniques}},
url = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.8.5098},
volume = {9},
year = {2002}
}
@article{Fuk80,
author = {Fukushima, K},
journal = {Biological Cybernetics},
keywords = {deep fukushima learning neocognitron networks neur},
pages = {193--202},
title = {{Neocognitron: A self-organizing neural network model for a mechanish of pattern recognition unaffected by shifts in position}},
volume = {36},
year = {1980}
}
@article{fukushima2013artificial,
abstract = {The neocognitron is a neural network model proposed by. Fukushima (1980). Its architecture was suggested by neurophysiological findings on the visual systems of mammals. It is a hierarchical multi-layered network. It acquires the ability to robustly recognize visual patterns through learning. Although the neocognitron has a long history, modifications of the network to improve its performance are still going on. For example, a recent neocognitron uses a new learning rule, named add-if-silent, which makes the learning process much simpler and more stable. Nevertheless, a high recognition rate can be kept with a smaller scale of the network. Referring to the history of the neocognitron, this paper discusses recent advances in the neocognitron. We also show that various new functions can be realized by, for example, introducing top-down connections to the neocognitron: mechanism of selective attention, recognition and completion of partly occluded patterns, restoring occluded contours, and so on. {\textcopyright} 2012 Elsevier Ltd.},
author = {Fukushima, Kunihiko},
doi = {10.1016/j.neunet.2012.09.016},
isbn = {0893-6080},
issn = {08936080},
journal = {Neural Networks},
keywords = {Artificial vision,Bottom-up and top-down,Hierarchical network,Modeling neural networks,Neocognitron},
pages = {103--119},
pmid = {23098752},
publisher = {Elsevier},
title = {{Artificial vision by multi-layered neural networks: Neocognitron and its advances}},
volume = {37},
year = {2013}
}
@inproceedings{conf/icml/2010,
booktitle = {ICML},
editor = {F{\"{u}}rnkranz, Johannes and Joachims, Thorsten},
keywords = {dblp},
publisher = {Omnipress},
title = {{Proceedings of the 27th International Conference on Machine Learning (ICML-10), June 21-24, 2010, Haifa, Israel}},
url = {http://dblp.uni-trier.de/db/conf/icml/icml2010.html},
year = {2010}
}
@inproceedings{Gal2016Dropout,
author = {Gal, Yarin and Ghahramani, Zoubin},
booktitle = {Proceedings of the 33rd International Conference on Machine Learning (ICML-16)},
title = {{Dropout as a {\{}B{\}}ayesian Approximation: Representing Model Uncertainty in Deep Learning}},
year = {2016}
}
@article{Geiger2012,
author = {Geiger, Andreas and Lenz, Philip and Urtasun, Raquel},
file = {:home/yani/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Geiger, Lenz, Urtasun - 2012 - Are we ready for autonomous driving the kitti vision benchmark suite.pdf:pdf},
journal = {Computer Vision and},
title = {{Are we ready for autonomous driving? the kitti vision benchmark suite}},
url = {http://h1997453.stratoserver.net/publications/cvpr12.pdf},
year = {2012}
}
@inproceedings{girshick2015deformable,
author = {Girshick, Ross and Iandola, Forrest and Darrell, Trevor and Malik, Jitendra},
booktitle = {Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition},
pages = {437--446},
title = {{Deformable Part Models are Convolutional Neural Networks}},
year = {2015}
}
@inproceedings{glorot2010understanding,
abstract = {Whereas before 2006 it appears that deep multilayer neural networks were not successfully trained, since then several algorithms have been shown to successfully train them, with experimental results showing the superiority of deeper vs less deep architectures. All these experimental results were obtained with new initialization or training mechanisms. Our objective here is to understand better why standard gradient descent from random initialization is doing so poorly with deep neural networks, to better understand these recent relative successes and help design better algorithms in the future. We ﬁrst observe the inﬂuence of the non-linear activations functions. We ﬁnd that the logistic sigmoid activation is unsuited for deep networks with random initialization because of its mean value, which can drive especially the top hidden layer into saturation. Surprisingly, we ﬁnd that saturated units can move out of saturation by themselves, albeit slowly, and explaining the plateaus sometimes seen when training neural networks. We ﬁnd that a new non-linearity that saturates less can often be beneﬁcial. Finally, we study how activations and gradients vary across layers and during training, with the idea that training may be more difﬁcult when the singular values of the Jacobian associated with each layer are far from 1. Based on these considerations, we propose a new initialization scheme that brings substantially faster convergence.},
author = {Glorot, Xavier and Bengio, Yoshua},
booktitle = {Proceedings of the 13th International Conference on Artificial Intelligence and Statistics (AISTATS)},
doi = {10.1.1.207.2059},
issn = {15324435},
pages = {249--256},
title = {{Understanding the difficulty of training deep feedforward neural networks}},
url = {http://machinelearning.wustl.edu/mlpapers/paper{\_}files/AISTATS2010{\_}GlorotB10.pdf},
volume = {9},
year = {2010}
}
@article{Golovinskiy2009,
abstract = {We present a min-cut based method of segmenting objects in point clouds. Given an object location, our method builds a k-nearest neighbors graph, assumes a background prior, adds hard foreground (and optionally background) constraints, and finds the min-cut to compute a foreground-background segmentation. Our method can be run fully automatically, or interactively with a user interface. We test our system on an outdoor urban scan, quantitatively evaluate our algorithm on a test set of about 1000 objects, and compare to several alternative approaches.},
author = {Golovinskiy, Aleksey and Funkhouser, Thomas},
doi = {10.1109/ICCVW.2009.5457721},
file = {:home/yani/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Golovinskiy, Funkhouser - 2009 - Min-cut based segmentation of point clouds.pdf:pdf},
isbn = {9781424444427},
journal = {2009 IEEE 12th International Conference on Computer Vision Workshops ICCV Workshops},
pages = {39--46},
publisher = {Ieee},
title = {{Min-cut based segmentation of point clouds}},
url = {http://ieeexplore.ieee.org/lpdocs/epic03/wrapper.htm?arnumber=5457721},
volume = {150},
year = {2009}
}
@inproceedings{goodfellow2013maxout,
abstract = {We consider the problem of designing mod-els to leverage a recently introduced ap-proximate model averaging technique called dropout. We define a simple new model called maxout (so named because its output is the max of a set of inputs, and because it is a nat-ural companion to dropout) designed to both facilitate optimization by dropout and im-prove the accuracy of dropout's fast approxi-mate model averaging technique. We empir-ically verify that the model successfully ac-complishes both of these tasks. We use max-out and dropout to demonstrate state of the art classification performance on four bench-mark datasets: MNIST, CIFAR-10, CIFAR-100, and SVHN.},
archivePrefix = {arXiv},
arxivId = {1302.4389},
author = {Goodfellow, Ian J and Warde-Farley, David and Mirza, Mehdi and Courville, Aaron and Bengio, Yoshua},
booktitle = {Proceedings of the 30th International Conference on Machine Learning (ICML)},
eprint = {1302.4389},
pages = {1319--1327},
title = {{Maxout Networks}},
volume = {28},
year = {2013}
}
@book{Goodfellow-et-al-2016-Book,
annote = {Book in preparation for MIT Press},
author = {Goodfellow, Ian and Bengio, Yoshua and Courville, Aaron},
title = {{Deep Learning}},
url = {http://www.deeplearningbook.org},
year = {2016}
}
@article{Gortler1996,
address = {New York, New York, USA},
author = {Gortler, Steven J and Grzeszczuk, Radek and Szeliski, Richard and Cohen, Michael F},
doi = {10.1145/237170.237200},
file = {:home/yani/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Gortler et al. - 1996 - The lumigraph.pdf:pdf},
isbn = {0897917464},
journal = {Proceedings of the 23rd annual conference on Computer graphics and interactive techniques - SIGGRAPH '96},
pages = {43--54},
publisher = {ACM Press},
title = {{The lumigraph}},
url = {http://portal.acm.org/citation.cfm?doid=237170.237200},
year = {1996}
}
@misc{1502.02551v1,
abstract = {Training of large-scale deep neural networks is often constrained by the available computational resources. We study the effect of limited preci- sion data representation and computation on neu- ral network training. Within the context of low- precision fixed-point computations, we observe the rounding scheme to play a crucial role in de- termining the network's behavior during train- ing. Our results show that deep networks can be trained using only 16-bit wide fixed-point num- ber representation when using stochastic round- ing, and incur little to no degradation in the classification accuracy. We also demonstrate an energy-efficient hardware accelerator that imple- ments low-precision fixed-point arithmetic with stochastic rounding.},
annote = {published = 2015-02-09T16:37:29Z, updated = 2015-02-09T16:37:29Z, 10 pages, 6 figures, 1 table},
archivePrefix = {arXiv},
arxivId = {1502.02551},
author = {Gupta, Suyog and Agrawal, Ankur and Gopalakrishnan, Kailash and Narayanan, Pritish},
booktitle = {Proceedings of the 32nd International Conference on Machine Learning (ICML-15)},
doi = {10.1109/72.80206},
eprint = {1502.02551},
isbn = {9781510810587},
issn = {19410093},
month = {feb},
pages = {1737--1746},
pmid = {18282824},
title = {{Deep Learning with Limited Numerical Precision}},
url = {http://jmlr.org/proceedings/papers/v37/gupta15.pdf},
year = {2015}
}
@article{journals/iandc/HancockJLT96,
abstract = {k-Decision lists and decision trees play important roles in learning theory as well as in practical learning systems.k-Decision lists generalize classes such as monomials,k-DNF, andk-CNF, and like these subclasses they are polynomially PAC-learnable [R. Rivest,Mach. Learning2(1987), 229–246]. This leaves open the question of whetherk-decision lists can be learned as efficiently ask-DNF. We answer this question negatively in a certain sense, thus disproving a claim in a popular textbook [M. Anthony and N. Biggs, “Computational Learning Theory,” Cambridge Univ. Press, Cambridge, UK, 1992]. Decision trees, on the other hand, are not even known to be polynomially PAC-learnable, despite their widespread practical application. We will show that decision trees are not likely to be efficiently PAC-learnable. We summarize our specific results. The following problems cannot be approximated in polynomial time within a factor of 2log$\delta$ nfor any$\delta${\textless}1, unlessNP⊂DTIME[2polylog n]: a generalized set cover,k-decision lists,k-decision lists by monotone decision lists, and decision trees. Decision lists cannot be approximated in polynomial time within a factor ofn$\delta$, for some constant$\delta${\textgreater}0, unlessNP=P. Also,k-decision lists withl0–1 alternations cannot be approximated within a factor logl nunlessNP⊂DTIME[nO(log log n)] (providing an interesting comparison to the upper bound obtained by A. Dhagat and L. Hellerstein [in“FOCS '94,” pp. 64–74]).},
author = {Hancock, Thomas and Jiang, Tao and Li, Ming and Tromp, John},
doi = {10.1006/inco.1996.0040},
isbn = {3540590420},
issn = {0890-5401},
journal = {Information and Computation},
number = {2},
pages = {114--122},
title = {{Lower Bounds on Learning Decision Lists and Trees}},
url = {http://www.sciencedirect.com/science/article/pii/S0890540196900401{\%}5Cnhttp://www.sciencedirect.com/science/article/pii/S0890540196900401/pdf?md5=59bdd8c077309262836d57b76a5a5577{\&}pid=1-s2.0-S0890540196900401-main.pdf},
volume = {126},
year = {1996}
}
@misc{Hanmandlu2000,
abstract = {A recursive estimation of depth from a sequence of images is proposed. Using the spherical projection, a simple equation is derived that relates image motion with the object motion. This equation is reformulated into a dynamical state space model for which Kalman filter can be easily applied to yield the estimate of depth. Point correspondences have been used to obtain feature points and the motion parameters are assumed to be known. The results are illustrated on a real object},
author = {Hanmandlu, M and Shantaram, V and Sudheer, K},
booktitle = {Proceedings of International Conference on Robotics and Automation},
doi = {10.1109/ITCC.2000.844211},
isbn = {0769505406},
number = {April},
pages = {2264--2269},
publisher = {Ieee},
title = {{Depth estimation from a sequence of images using spherical projection}},
url = {http://ieeexplore.ieee.org/lpdocs/epic03/wrapper.htm?arnumber=619298},
volume = {3},
year = {2000}
}
@article{Happel1994,
author = {Happel, Bart L M and Murre, Jacob M J},
journal = {Neural Networks},
number = {6-7},
pages = {985--1004},
title = {{Design and evolution of modular neural network architectures.}},
volume = {7},
year = {1994}
}
@article{Haralick1989,
author = {Haralick, R M},
doi = {10.1109/CVPR.1989.37874},
isbn = {081861918X},
journal = {Proceedings CVPR 89 IEEE Computer Society Conference on Computer Vision and Pattern Recognition},
pages = {370--378},
publisher = {IEEE Comput. Soc. Press},
title = {{Monocular vision using inverse perspective projection geometry: analytic relations}},
url = {http://ieeexplore.ieee.org/lpdocs/epic03/wrapper.htm?arnumber=37874},
volume = {10},
year = {1989}
}
@inproceedings{Hardt2015,
abstract = {We show that any model trained by a stochastic gradient method with few iterations has vanishing generalization error. We prove this by showing the method is algorithmically stable in the sense of Bousquet and Elisseeff. Our analysis only employs elementary tools from convex and continuous optimization. Our results apply to both convex and non-convex optimization under standard Lipschitz and smoothness assumptions. Applying our results to the convex case, we provide new explanations for why multiple epochs of stochastic gradient descent generalize well in practice. In the nonconvex case, we provide a new interpretation of common practices in neural networks, and provide a formal rationale for stability-promoting mechanisms in training large, deep models. Conceptually, our findings underscore the importance of reducing training time beyond its obvious benefit.},
address = {New York, New York, USA},
archivePrefix = {arXiv},
arxivId = {1509.01240},
author = {Hardt, Moritz and Recht, Benjamin and Singer, Yoram},
booktitle = {Proceedings of the 33rd International Conference on Machine Learning (ICML 2016)},
eprint = {1509.01240},
file = {:home/yani/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Hardt, Recht, Singer - 2015 - Train faster, generalize better Stability of stochastic gradient descent.pdf:pdf},
isbn = {9781510829008},
pages = {1--24},
title = {{Train faster, generalize better: Stability of stochastic gradient descent}},
url = {http://arxiv.org/abs/1509.01240},
year = {2015}
}
@article{Hasinoff2010,
author = {Hasinoff, Samuel W. and Durand, Fredo and Freeman, William T.},
doi = {10.1109/CVPR.2010.5540167},
file = {:home/yani/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Hasinoff, Durand, Freeman - 2010 - Noise-optimal capture for high dynamic range photography.pdf:pdf},
isbn = {978-1-4244-6984-0},
journal = {2010 IEEE Computer Society Conference on Computer Vision and Pattern Recognition},
month = {jun},
pages = {553--560},
publisher = {Ieee},
title = {{Noise-optimal capture for high dynamic range photography}},
url = {http://ieeexplore.ieee.org/lpdocs/epic03/wrapper.htm?arnumber=5540167},
year = {2010}
}
@inproceedings{He2012,
author = {He, Jun and Balzano, Laura and Szlam, Arthur},
file = {:home/yani/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/He, Balzano, Szlam - 2012 - Incremental Gradient on the Grassmannian for Online Foreground and Background Separation in Subsampled Video.pdf:pdf},
isbn = {9781467312288},
pages = {1568--1575},
title = {{Incremental Gradient on the Grassmannian for Online Foreground and Background Separation in Subsampled Video}},
year = {2012}
}
@article{ieee7005506,
abstract = {Existing deep convolutional neural networks (CNNs) require a fixed-size (e.g., 224224) input image. This requirement is ``artificial'' and may reduce the recognition accuracy for the images or sub-images of an arbitrary size/scale. In this work, we equip the networks with another pooling strategy, ``spatial pyramid pooling'', to eliminate the above requirement. The new network structure, called SPP-net, can generate a fixed-length representation regardless of image size/scale. Pyramid pooling is also robust to object deformations. With these advantages, SPP-net should in general improve all CNN-based image classification methods. On the ImageNet 2012 dataset, we demonstrate that SPP-net boosts the accuracy of a variety of CNN architectures despite their different designs. On the Pascal VOC 2007 and Caltech101 datasets, SPP-net achieves state-of-theart classification results using a single full-image representation and no fine-tuning. The power of SPP-net is also significant in object detection. Using SPP-net, we compute the feature maps from the entire image only once, and then pool features in arbitrary regions (sub-images) to generate fixed-length representations for training the detectors. This method avoids repeatedly computing the convolutional features. In processing test images, our method is 24-102 faster than the R-CNN method, while achieving better or comparable accuracy on Pascal VOC 2007. In ImageNet Large Scale Visual Recognition Challenge (ILSVRC) 2014, our methods rank {\#}2 in object detection and {\#}3 in image classification among all 38 teams. This manuscript also introduces the improvement made for this competition.},
author = {He, K and Zhang, X and Ren, S and Sun, J},
doi = {10.1109/TPAMI.2015.2389824},
issn = {0162-8828},
journal = {Pattern Analysis and Machine Intelligence, IEEE Transactions on},
keywords = {Accuracy; Agriculture; Convolutional codes; Featur},
number = {99},
pages = {1},
title = {{Spatial Pyramid Pooling in Deep Convolutional Networks for Visual Recognition}},
volume = {PP},
year = {2015}
}
@inproceedings{he2015convolutional,
author = {He, Kaiming and Sun, Jian},
booktitle = {Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition},
pages = {5353--5360},
title = {{Convolutional Neural Networks at Constrained Time Cost}},
year = {2015}
}
@article{He2015,
abstract = {Deeper neural networks are more difficult to train. We present a residual learning framework to ease the training of networks that are substantially deeper than those used previously. We explicitly reformulate the layers as learn- ing residual functions with reference to the layer inputs, in- stead of learning unreferenced functions. We provide com- prehensive empirical evidence showing that these residual networks are easier to optimize, and can gain accuracy from considerably increased depth. On the ImageNet dataset we evaluate residual nets with a depth of up to 152 layers—8× deeper than VGG nets [41] but still having lower complex- ity. An ensemble of these residual nets achieves 3.57{\%} error on the ImageNet test set. This result won the 1st place on the ILSVRC 2015 classification task. We also present analysis on CIFAR-10 with 100 and 1000 layers. The depth of representations is of central importance for many visual recognition tasks. Solely due to our ex- tremely deep representations, we obtain a 28{\%} relative im- provement on the COCO object detection dataset. Deep residual nets are foundations of our submissions to ILSVRC {\&} COCO 2015 competitions1, where we also won the 1st places on the tasks of ImageNet detection, ImageNet local- ization, COCO detection, and COCO segmentation.},
archivePrefix = {arXiv},
arxivId = {1512.03385},
author = {He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
doi = {10.3389/fpsyg.2013.00124},
eprint = {1512.03385},
isbn = {978-1-4673-6964-0},
issn = {1664-1078},
journal = {Arxiv.Org},
keywords = {deep learning,denoising auto-encoder,image denoising},
number = {3},
pages = {171--180},
pmid = {23554596},
title = {{Deep Residual Learning for Image Recognition}},
url = {http://arxiv.org/pdf/1512.03385v1.pdf},
volume = {7},
year = {2015}
}
@inproceedings{He2015b,
abstract = {Rectified activation units (rectifiers) are essential for state-of-the-art neural networks. In this work, we study rectifier neural networks for image classification from two aspects. First, we propose a Parametric Rectified Linear Unit (PReLU) that generalizes the traditional rectified unit. PReLU improves model fitting with nearly zero extra com-putational cost and little overfitting risk. Second, we derive a robust initialization method that particularly considers the rectifier nonlinearities. This method enables us to train extremely deep rectified models directly from scratch and to investigate deeper or wider network architectures. Based on the learnable activation and advanced initialization, we achieve 4.94{\%} top-5 test error on the ImageNet 2012 clas-sification dataset. This is a 26{\%} relative improvement over the ILSVRC 2014 winner (GoogLeNet, 6.66{\%} [33]). To our knowledge, our result is the first 1 to surpass the reported human-level performance (5.1{\%}, [26]) on this dataset.},
archivePrefix = {arXiv},
arxivId = {1502.01852},
author = {He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
booktitle = {IEEE Conference on Computer Vision and Patern Recognition (ICCV)},
doi = {10.1109/ICCV.2015.123},
eprint = {1502.01852},
isbn = {978-1-4673-8391-2},
issn = {15505499},
keywords = {dblp},
pages = {1026--1034},
publisher = {IEEE},
title = {{Delving Deep into Rectifiers: Surpassing Human-Level Performance on ImageNet Classification}},
url = {http://ieeexplore.ieee.org/xpl/mostRecentIssue.jsp?punumber=7407725},
year = {2015}
}
@article{He2016,
abstract = {Deep residual networks have emerged as a family of extremely deep architectures showing compelling accuracy and nice convergence behaviors. In this paper, we analyze the propagation formulations behind the residual building blocks, which suggest that the forward and backward signals can be directly propagated from one block to any other block, when using identity mappings as the skip connections and after-addition activation. A series of ablation experiments support the importance of these identity mappings. This motivates us to propose a new residual unit, which further makes training easy and improves generalization. We report improved results using a 1001-layer ResNet on CIFAR-10/100, and a 200-layer ResNet on ImageNet.},
archivePrefix = {arXiv},
arxivId = {1603.05027},
author = {He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
eprint = {1603.05027},
journal = {arXiv preprint},
pages = {1--15},
title = {{Identity Mappings in Deep Residual Networks}},
url = {http://arxiv.org/abs/1603.05027},
volume = {abs/1603.0},
year = {2016}
}
@article{helearning,
author = {He, X and Mnih, V and Ioannou, Y and Zemel, R S},
title = {{Learning Visual Features for Outdoor Localization}}
}
@article{Healey1994,
author = {Healey, Glenn E and Kondepudy, Raghava and Member, Student},
file = {:home/yani/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Healey, Kondepudy, Member - 1994 - Radiometric CCD camera calibration and noise estimation.pdf:pdf},
journal = {Pattern Analysis and Machine {\ldots}},
number = {3},
title = {{Radiometric CCD camera calibration and noise estimation}},
url = {http://ieeexplore.ieee.org/xpls/abs{\_}all.jsp?arnumber=276126},
volume = {16},
year = {1994}
}
@misc{dropoutsurprising,
archivePrefix = {arXiv},
arxivId = {cs.LG/1602.04484},
author = {Helmbold, David P and Long, Philip M},
eprint = {1602.04484},
month = {nov},
primaryClass = {cs.LG},
title = {{Surprising properties of dropout in deep networks}},
url = {http://arxiv.org/abs/1602.04484; http://arxiv.org/pdf/1602.04484},
year = {2016}
}
@article{Himmelsbach2008,
author = {Himmelsbach, M},
file = {:home/yani/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Himmelsbach - 2008 - LIDAR-based 3D object perception.pdf:pdf},
journal = {Proceedings of 1st},
title = {{LIDAR-based 3D object perception}},
url = {http://www.cs.princeton.edu/courses/archive/spring11/cos598A/pdfs/Himmelsbach08.pdf},
year = {2008}
}
@article{hinton2006reducing,
abstract = {High-dimensional data can be converted to low-dimensional codes by training a multilayer neural$\backslash$rnetwork with a small central layer to reconstruct high-dimensional input vectors. Gradient descent$\backslash$r$\backslash$ncan be used for fine-tuning the weights in such ‘‘autoencoder'' networks, but this works well only if$\backslash$r$\backslash$nthe initial weights are close to a good solution. We describe an effective way of initializing the$\backslash$r$\backslash$nweights that allows deep autoencoder networks to learn low-dimensional codes that work much$\backslash$r$\backslash$nbetter than principal components analysis as a tool to reduce the dimensionality of data.$\backslash$r$\backslash$n},
archivePrefix = {arXiv},
arxivId = {20},
author = {Hinton, Geoffrey E and Salakhutdinov, Ruslan R},
doi = {10.1126/science.1127647},
eprint = {20},
isbn = {1095-9203 (Electronic)$\backslash$r0036-8075 (Linking)},
issn = {1095-9203},
journal = {Science},
number = {5786},
pages = {504--507},
pmid = {16873662},
publisher = {American Association for the Advancement of Science},
title = {{Reducing the Dimensionality of Data with Neural Networks$\backslash$r}},
volume = {313},
year = {2006}
}
@misc{Hinton2012,
abstract = {When a large feedforward neural network is trained on a small training set, it typically performs poorly on held-out test data. This "overfitting" is greatly reduced by randomly omitting half of the feature detectors on each training case. This prevents complex co-adaptations in which a feature detector is only helpful in the context of several other specific feature detectors. Instead, each neuron learns to detect a feature that is generally helpful for producing the correct answer given the combinatorially large variety of internal contexts in which it must operate. Random "dropout" gives big improvements on many benchmark tasks and sets new records for speech and object recognition.},
archivePrefix = {arXiv},
arxivId = {1207.0580},
author = {Hinton, Geoffrey E. and Srivastava, Nitish and Krizhevsky, Alex and Sutskever, Ilya and Salakhutdinov, Ruslan R.},
booktitle = {ArXiv e-prints},
doi = {arXiv:1207.0580},
eprint = {1207.0580},
isbn = {9781467394673},
issn = {9781467394673},
month = {jul},
pages = {1--18},
pmid = {1000104337},
title = {{Improving neural networks by preventing co-adaptation of feature detectors}},
url = {http://arxiv.org/abs/1207.0580},
year = {2012}
}
@phdthesis{hochreiter1991untersuchungen,
author = {Hochreiter, Sepp},
booktitle = {Diploma, Technische Universit{\{}{\"{a}}{\}}t M{\{}{\"{u}}{\}}nchen},
pages = {91},
school = {Technische Universit{\{}{\"{a}}{\}}t M{\{}{\"{u}}{\}}nchen},
title = {{Untersuchungen zu dynamischen neuronalen Netzen}},
year = {1991}
}
@misc{Hochreiter01gradientflow,
abstract = {Introduction Recurrent networks (crossreference Chapter 12) can, in principle, use their feedback connections to store representations of recent input events in the form of activations. The most widely used algorithms for learning what to put in short-term memory, however, take too much time to be feasible or do not work well at all, especially when minimal time lags between inputs and corresponding teacher signals are long. Although theoretically fascinating, they do not provide clear practical advantages over, say, backprop in feedforward networks with limited time windows (see crossreference Chapters 11 and 12). With conventional {\&}034;algorithms based on the computation of the complete gradient{\&}034;, such as {\&}034;Back-Propagation Through Time{\&}034; (BPTT, e.g., 22, 27, 26) or {\&}034;Real-Time Recurrent Learning{\&}034; (RTRL, e.g., 21) error signals {\&}034;flowing backwards in time{\&}034; tend to either (1) blow up or (2) vanish: the temporal evolution of the backpropagated error ex},
archivePrefix = {arXiv},
arxivId = {arXiv:1011.1669v3},
author = {Hochreiter, Sepp and Bengio, Y and Frasconi, Paolo and Schmidhuber, J},
booktitle = {A Field Guide to Dynamical Recurrent Networks},
doi = {10.1109/9780470544037.ch14},
eprint = {arXiv:1011.1669v3},
isbn = {978-0-7803-5369-5},
issn = {1098-6596},
pages = {237--243},
pmid = {25246403},
title = {{Gradient flow in recurrent nets: the difficulty of learning long-term dependencies}},
url = {http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.24.7321{\&}rep=rep1{\&}type=pdf},
year = {2001}
}
@article{Horn1990,
abstract = {The method described here for recovering the shape of a surface from a shaded image can deal with complex, wrinkled surfaces. Integrability can be enforced easily because both surface height and gradient are represented. (A gradient field is integrable if it is the gradient of some surface height function.) The robustness of the method stems in part from linearization of the reflectance map about the current estimate of the surface orientation at each picture cell. (The reflectance map gives the dependence of scene radiance on surface orientation.) The new scheme can find an exact solution of a given shape-from-shading problem even though a regularizing term is included. The reason is that the penalty term is needed only to stabilize the iterative scheme when it is far from the correct solution; it can be turned off as the solution is approached. This is a reflection of the fact that shape-from-shading problems are not ill posed when boundary conditions are available, or when the image contains singular points.},
author = {Horn, B K P},
doi = {10.1007/bf00056771},
issn = {09205691},
journal = {The International Journal of Computer Vision},
number = {1},
pages = {37--75},
publisher = {Springer Netherlands},
title = {{Height and Gradient from Shading}},
url = {http://www.springerlink.com/index/L90617LKGL701386.pdf},
volume = {5},
year = {1990}
}
@article{Horn1974,
author = {Horn, B K P},
journal = {Computer Graphics and Image Processing},
title = {{Determining lightness from an image}},
url = {http://www.sciencedirect.com/science/article/pii/0146664X74900227},
year = {1974}
}
@article{Horn1979,
abstract = {It appears that the development of machine vision may benefit from a detailed understanding of the imaging process. The reflectance map, showing scene radiance as a function of surface gradient, has proved to be helpful in this endeavor. The reflectance map depends both on the nature of the surface layers of the objects being imaged and the distribution of light sources. Recently, a unified approach to the specification of surface reflectance in terms of both incident and reflected beam geometry has been proposed. The reflecting properties of a surface are specified in terms of the bidirectional reflectance-distribution function (BRDF). Here we derive the reflectance map in terms of the BRDF and the distribution of source radiance. A number of special cases of practical importance are developed in detail. The significance of this approach to the understanding of image formation is briefly indicated.},
author = {Horn, B K and Sjoberg, R W},
doi = {10.1364/AO.18.001770},
file = {:home/yani/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Horn, Sjoberg - 1979 - Calculating the reflectance map.pdf:pdf},
journal = {Applied Optics},
keywords = {image brightness,scene brightness,surface reflec},
number = {11},
pages = {1770--1779},
pmid = {20212547},
publisher = {OSA},
title = {{Calculating the reflectance map.}},
url = {http://www.ncbi.nlm.nih.gov/pubmed/20212547},
volume = {18},
year = {1979}
}
@article{Horn1977,
abstract = {Traditionally, image intensities have been processed to segment an image into regions or to find edge-fragments. Image intensities carry a great deal more information about three-dimensional shape, however. To exploit this information, it is necessary to understand how images are formed and what determines the observed intensity in the image. The gradient space, popularized by Huffman and Mackworth in a slightly different context, is a helpful tool in the development of new methods.},
author = {Horn, Berthold K P},
doi = {10.1016/0004-3702(77)90020-0},
file = {:home/yani/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Horn - 1977 - Understanding Image Intensities.pdf:pdf},
issn = {00043702},
journal = {Artificial Intelligence},
number = {2},
pages = {201--231},
title = {{Understanding Image Intensities}},
url = {http://linkinghub.elsevier.com/retrieve/pii/0004370277900200},
volume = {8},
year = {1977}
}
@misc{Horn1970,
abstract = {A method will be described for finding the shape of a smooth apaque object form a monocular image, given a knowledge of the surface photometry, the position of the lightsource and certain auxiliary information to resolve ambiguities. This method is complementary to the use of stereoscopy which relies on matching up sharp detail and will fail on smooth objects. Until now the image processing of single views has been restricted to objects which can meaningfully be considered two-dimensional or bounded by plane surfaces. It is possible to derive a first-order non-linear partial differential equation in two unknowns relating the intensity at the image points to the shape of the objects. This equation can be solved by means of an equivalent set of five ordinary differential equations. A curve traced out by solving this set of equations for one set of starting values is called a characteristic strip. Starting one of these strips from each point on some initial curve will produce the whole solution surface. The initial curves can usually be constructed around so-called singular points. A number of applications of this metod will be discussed including one to lunar topography and one to the scanning electron microscope. In both of these cases great simplifications occur in the equations. A note on polyhedra follows and a quantitative theory of facial make-up is touched upon. An implementation of some of these ideas on the PDP-6 computer with its attached image-dissector camera at the Artificial intelligence Laboratory will be described, and also a nose-recognition program.},
author = {Horn, Berthold K P},
booktitle = {Doctor},
institution = {MIT Artificial Intelligence Laboratory},
number = {232},
pages = {196},
title = {{Shape from Shading: A Method for Obtaining the Shape of a Smooth Opaque Object from One View}},
url = {http://dspace.mit.edu/handle/1721.1/6885},
year = {1970}
}
@article{hornik89a,
abstract = {Thesis BIB},
author = {Hornik, K and Stinchcombe, M and White, H},
journal = {Neural Networks},
keywords = {imported},
pages = {356--366},
title = {{Multilayer feedforward networks are universal approximators}},
volume = {2},
year = {1989}
}
@article{Horprasert1999,
author = {Horprasert, Thanarat and Harwood, David},
file = {:home/yani/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Horprasert, Harwood - 1999 - A statistical approach for real-time robust background subtraction and shadow detection.pdf:pdf},
journal = {IEEE ICCV},
pages = {1--19},
title = {{A statistical approach for real-time robust background subtraction and shadow detection}},
url = {http://vast.uccs.edu/{~}tboult/frame/Horprasert/},
year = {1999}
}
@article{Humenberger2012,
author = {Humenberger, Martin and Schraml, Stephan and Sulzbachner, Christoph and Belbachir, Ahmed Nabil},
file = {:home/yani/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Humenberger et al. - 2012 - Embedded Fall Detection with a Neural Network and Bio-Inspired Stereo Vision.pdf:pdf},
isbn = {9781467316125},
pages = {60--67},
title = {{Embedded Fall Detection with a Neural Network and Bio-Inspired Stereo Vision}},
year = {2012}
}
@article{Hwang2011,
abstract = {By the laws of quantum physics, pixel intensity does not have a true value, but should be a random variable. Contrary to the conventional assumptions, the distribution of intensity may not be an additive Gaussian. We propose to directly model the intensity difference, and show its validity by an experimental comparison to the conventional additive model. As a model of the intensity difference, we present a Skellam distribution derived from the Poisson photon noise model. This modeling induces a linear relationship between intensity and Skellam parameters, while conventional variance computation methods do not yield any significant relationship between these parameters under natural illumination. The intensity-Skellam line is invariant to scene, illumination and even most of camera parameters. We also propose practical methods to obtain the line using a color pattern and an arbitrary image under a natural illumination. Because the Skellam parameters that can be obtained from this linearity determine a noise distribution for each intensity value, we can statistically determine whether any intensity difference is caused by an underlying signal difference or by noise. We demonstrate the effectiveness of this new noise model by applying it to practical applications of background subtraction and edge detection.},
author = {Hwang, Youngbae and Kim, Jun-Sik and Kweon, In So},
doi = {10.1109/TPAMI.2011.224},
file = {:home/yani/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Hwang, Kim, Kweon - 2011 - Difference-based Image Noise Modeling Using Skellam Distribution.pdf:pdf},
issn = {1939-3539},
journal = {IEEE transactions on pattern analysis and machine intelligence},
month = {nov},
number = {7},
pages = {1329--1341},
pmid = {22144520},
title = {{Difference-based Image Noise Modeling Using Skellam Distribution.}},
url = {http://www.ncbi.nlm.nih.gov/pubmed/22144520},
volume = {34},
year = {2011}
}
@article{Hyvarinen2000,
abstract = {A fundamental problem in neural network research, as well as in many other disciplines, is finding a suitable representation of multivariate data, i.e. random vectors. For reasons of computational and conceptual simplicity, the representation is often sought as a linear transformation of the original data. In other words, each component of the representation is a linear combination of the original variables. Well-known linear transformation methods include principal component analysis, factor analysis, and projection pursuit. Independent component analysis (ICA) is a recently developed method in which the goal is to find a linear representation of non-Gaussian data so that the components are statistically independent, or as independent as possible. Such a representation seems to capture the essential structure of the data in many applications, including feature extraction and signal separation. In this paper, we present the basic theory and applications of ICA, and our recent work on the subject.},
author = {Hyv{\"{a}}rinen, A and Oja, E},
institution = {Neural Networks Research Centre, Helsinki University of Technology, Finland. aapo.hyvarinen@hut.fi},
journal = {Neural Networks},
keywords = {algorithms,artifacts,brain,brain physiology,humans,magnetoencephalography,neural networks (computer),normal distribution},
number = {4-5},
pages = {411--430},
pmid = {10946390},
publisher = {Elsevier},
title = {{Independent component analysis: algorithms and applications.}},
url = {http://www.ncbi.nlm.nih.gov/pubmed/10946390},
volume = {13},
year = {2000}
}
@article{I-lealey,
author = {I-lealey, Glenn and Kondepudy, Raghava},
file = {:home/yani/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/I-lealey, Kondepudy - Unknown - CCD Camera Calibration and Noise Estimation.pdf:pdf},
isbn = {0818628553},
number = {5},
pages = {90--95},
title = {{CCD Camera Calibration and Noise Estimation}},
volume = {92717}
}
@article{ioannou2012difference,
author = {Ioannou, Y and Taati, B and Harrap, R and Greenspan, M},
journal = {arXiv preprint arXiv:1209.1759},
title = {{Difference of Normals as a Multi-Scale Operator in Unorganized Point Clouds}},
year = {2012}
}
@inproceedings{Ioannou2009a,
abstract = {Potential Well Space Embedding (PWSE) has been shown to be an effective global method to recognize segmented objects in range data. Here Local PWSE is proposed as an extension of PWSE. LPWSE features are generated by iterating ICP to the local minima of a multiscale registration model at each point. The locations of the local minima are then used to generate feature vectors, which can be matched against a preprocessed database of such features to determine correspondences between images and models. The method has been implemented and tested on real data, and has been found to be effective at recognizing sparse segmented (self-)occluded range images. A classifi-cation accuracy of 92{\%} is achieved with 3750 points, dropping to 78{\%} at 500 points, on 50 randomly sub-sampled sparse views of 5 objects. {\textcopyright}2009 IEEE.},
author = {Ioannou, Y. and Shang, L. and Harrap, R. and Greenspan, M.},
booktitle = {2009 IEEE 12th International Conference on Computer Vision Workshops, ICCV Workshops 2009},
doi = {10.1109/ICCVW.2009.5457491},
file = {:home/yani/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Ioannou, Shang - 2009 - Local PotentialWell Space Embedding.pdf:pdf},
isbn = {9781424444427},
pages = {1726--1732},
title = {{Local potentialwell space embedding}},
year = {2009}
}
@article{Ioannou2010,
abstract = {Recent advances in Light Detection and Ranging (LIDAR) technology and integration have resulted in vehicle-borne platforms for urban LIDAR scanning, such as Terrapoint Inc.'s TITAN system. Such technology has lead to an explosion in ground LIDAR data. The large size of such mobile urban LIDAR data sets, and the ease at which they may now be collected, has shifted the bottleneck of creating abstract urban models for Geographical Information Systems (GIS) from data collection to data processing. While turning such data into useful models has traditionally relied on human analysis, this is no longer practical. This thesis outlines a methodology for automatically recovering the necessary information to create abstract urban models from mobile urban LIDAR data using computer vision methods. As an integral part of the methodology, a novel scale-based interest operator is introduced (Difference of Normals) that is efficient enough to process large datasets, while accurately isolating objects of interest in the scene according to real-world parameters. Finally a novel localized object recognition algorithm is introduced (Local Potential Well Space Embedding), derived from a proven global method for object recognition (Potential Well Space Embedding). The object recognition phase of our methodology is discussed with these two algorithms as a focus.},
author = {Ioannou, Yani Andrew},
journal = {Thesis (Master, Computing), Queen's University},
keywords = {LIDAR, point clouds, GIS, computer vision, urban},
mendeley-tags = {LIDAR, point clouds, GIS, computer vision, urban},
number = {February},
title = {{Automatic urban modelling using mobile urban lidar data}},
url = {http://qspace.library.queensu.ca/handle/1974/5443},
year = {2010}
}
@inproceedings{Ioannou2016,
author = {Ioannou, Yani and Robertson, Duncan P and Shotton, Jamie and Cipolla, Roberto and Criminisi, Antonio},
booktitle = {International Conference on Learning Representations},
file = {:home/yani/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Ioannou et al. - 2016 - Training CNNs with Low-Rank Filters for Efficient Image Classification.pdf:pdf},
title = {{Training CNNs with Low-Rank Filters for Efficient Image Classification}},
year = {2016}
}
@article{Ioannou2016e,
abstract = {We propose a new method for training computationally efficient and compact convolutional neural networks (CNNs) using a novel sparse connection structure that resembles a tree root. Our sparse connection structure facilitates a significant reduction in computational cost and number of parameters of state-of-the-art deep CNNs without compromising accuracy. We validate our approach by using it to train more efficient variants of state-of-the-art CNN architectures, evaluated on the CIFAR10 and ILSVRC datasets. Our results show similar or higher accuracy than the baseline architectures with much less compute, as measured by CPU and GPU timings. For example, for ResNet 50, our model has 40{\%} fewer parameters, 45{\%} fewer floating point operations, and is 31{\%} (12{\%}) faster on a CPU (GPU). For the deeper ResNet 200 our model has 25{\%} fewer floating point operations and 44{\%} fewer parameters, while maintaining state-of-the-art accuracy. For GoogLeNet, our model has 7{\%} fewer parameters and is 21{\%} (16{\%}) faster on a CPU (GPU).},
archivePrefix = {arXiv},
arxivId = {1605.06489},
author = {Ioannou, Yani and Robertson, Duncan and Cipolla, Roberto and Criminisi, Antonio},
eprint = {1605.06489},
file = {:home/yani/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Ioannou et al. - 2016 - Deep Roots Improving CNN Efficiency with Hierarchical Filter Groups.pdf:pdf},
journal = {arXiv pre-print},
month = {may},
title = {{Deep Roots: Improving CNN Efficiency with Hierarchical Filter Groups}},
url = {http://arxiv.org/abs/1605.06489},
year = {2016}
}
@techreport{Ioannou2015,
author = {Ioannou, Yani and Robertson, Duncan and Zikic, Darko and Kontschieder, Peter and Shotton, Jamie and Brown, Matthew and Criminisi, Antonio},
booktitle = {Technical Report},
institution = {Microsoft Research},
month = {apr},
number = {MSR-TR-2015-58},
title = {{Decision Forests, Convolutional Networks and the Models in-Between}},
year = {2015}
}
@techreport{Ioannou2015,
author = {Ioannou, Yani and Robertson, Duncan and Zikic, Darko and Kontschieder, Peter and Shotton, Jamie and Brown, Matthew and Criminisi, Antonio},
booktitle = {Technical Report},
file = {:home/yani/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Ioannou et al. - 2015 - Decision Forests, Convolutional Networks and the Models in-Between(2).pdf:pdf},
institution = {Microsoft Research},
month = {apr},
number = {MSR-TR-2015-58},
title = {{Decision Forests, Convolutional Networks and the Models in-Between}},
year = {2015}
}
@article{Ioannou2012,
abstract = {A novel multi-scale operator for unorganized 3D point clouds is introduced. The Difference of Normals (DoN) provides a computationally efficient, multi-scale approach to processing large unorganized 3D point clouds. The application of DoN in the multi-scale filtering of two different real-world outdoor urban LIDAR scene datasets is quantitatively and qualitatively demonstrated. In both datasets the DoN operator is shown to segment large 3D point clouds into scale-salient clusters, such as cars, people, and lamp posts towards applications in semi-automatic annotation, and as a pre-processing step in automatic object recognition. The application of the operator to segmentation is evaluated on a large public dataset of outdoor LIDAR scenes with ground truth annotations.},
archivePrefix = {arXiv},
arxivId = {arXiv:1209.1759v1},
author = {Ioannou, Yani and Taati, B and Harrap, R and Greenspan, M},
eprint = {arXiv:1209.1759v1},
file = {:home/yani/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Ioannou et al. - 2012 - Difference of Normals as a Multi-Scale Operator in Unorganized Point Clouds(2).pdf:pdf},
journal = {3D Imaging, Modeling, Processing, Visualization and Transmission (3DIMPVT), 2012 Second International Conference on},
keywords = {point cloud, lidar, segmentation, multi-scale},
mendeley-tags = {point cloud, lidar, segmentation, multi-scale},
pages = {501--508},
title = {{Difference of Normals as a Multi-Scale Operator in Unorganized Point Clouds}},
url = {http://arxiv.org/abs/1209.1759},
year = {2012}
}
@inproceedings{Ioffe2015,
author = {Ioffe, Sergey and Szegedy, Christian},
booktitle = {Proceedings of the 32 nd International Conference on Machine Learning, Lille, France, 2015},
title = {{Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift.}},
year = {2015}
}
@inproceedings{journals/corr/JaderbergVZ14,
author = {Jaderberg, Max and Vedaldi, Andrea and Zisserman, Andrew},
booktitle = {British Machine Vision Conference},
keywords = {dblp},
title = {{Speeding up Convolutional Neural Networks with Low Rank Expansions.}},
year = {2014}
}
@article{Jhurani2015,
author = {Jhurani, Chetan and Mullowney, Paul},
journal = {Journal of Parallel and Distributed Computing},
pages = {133--140},
publisher = {Elsevier},
title = {{A GEMM interface and implementation on NVIDIA GPUs for multiple small matrices}},
volume = {75},
year = {2015}
}
@article{Jia2014,
abstract = {Caffe provides multimedia scientists and practitioners with a clean and modifiable framework for state-of-the-art deep learning algorithms and a collection of reference models. The framework is a BSD-licensed C++ library with Python and MATLAB bindings for training and deploying general-purpose convolutional neural networks and other deep models efficiently on commodity architectures. Caffe fits industry and internet-scale media needs by CUDA GPU computation, processing over 40 million images a day on a single K40 or Titan GPU ({\$}\backslashapprox{\$} 2.5 ms per image). By separating model representation from actual implementation, Caffe allows experimentation and seamless switching among platforms for ease of development and deployment from prototyping machines to cloud environments. Caffe is maintained and developed by the Berkeley Vision and Learning Center (BVLC) with the help of an active community of contributors on GitHub. It powers ongoing research projects, large-scale industrial applications, and startup prototypes in vision, speech, and multimedia.},
archivePrefix = {arXiv},
arxivId = {1408.5093},
author = {Jia, Yangqing and Shelhamer, Evan and Donahue, Jeff and Karayev, Sergey and Long, Jonathan and Girshick, Ross and Guadarrama, Sergio and Darrell, Trevor},
doi = {10.1145/2647868.2654889},
eprint = {1408.5093},
isbn = {9781450330633},
issn = {10636919},
journal = {ACM International Conference on Multimedia},
keywords = {computation,computer vision,corresponding authors,machine learning,neural networks,open source,parallel},
pages = {675--678},
pmid = {18267787},
title = {{Caffe: Convolutional Architecture for Fast Feature Embedding}},
url = {http://arxiv.org/abs/1408.5093},
year = {2014}
}
@article{johnson1984extensions,
abstract = {In this note we consider the following extension problem for Lipschitz functions: Given a metric space X and n= 2, 3, 4,, estimate the smallest constant L= L (X, n) so that every mapping f from every n—element subset of X into 62 extends to a mapping I from X into E with 2 ufn, ip 5 L uanp.},
author = {Johnson, William B and Lindenstrauss, Joram},
doi = {10.1090/conm/026/737400},
isbn = {082185030X 9780821850305},
issn = {1042-9832},
journal = {Contemporary mathematics},
pages = {189--206},
title = {{Extensions of Lipschitz mappings into a Hilbert space}},
volume = {26},
year = {1984}
}
@inproceedings{kaski1998dimensionality,
author = {Kaski, Samuel},
booktitle = {Neural Networks Proceedings, 1998. IEEE World Congress on Computational Intelligence. The 1998 IEEE International Joint Conference on},
organization = {IEEE},
pages = {413--418},
title = {{Dimensionality reduction by random mapping: Fast similarity computation for clustering}},
volume = {1},
year = {1998}
}
@article{Kim2012,
abstract = {We present a study of the in-camera image processing through an extensive analysis of more than 10,000 images from over 30 cameras. The goal of this work is to investigate if image values can be transformed to physically meaningful values, and if so, when and how this can be done. From our analysis, we found a major limitation of the imaging model employed in conventional radiometric calibration methods and propose a new in-camera imaging model that fits well with today's cameras. With the new model, we present associated calibration procedures that allow us to convert an sRGB images back to their original CCD RAW responses in a manner that is significantly more accurate than any existing methods. Additionally, we show how this new imaging model can be used to build an image correction application that converts an sRGB input image captured with the wrong camera settings to an sRGB output image that would have been recorded under the correct settings of a specific camera.},
author = {Kim, Seon Joo and Lin, Hai Ting and Lu, Zheng and Susstrunk, Sabine and Lin, Stephen and Brown, Michael S},
doi = {10.1109/TPAMI.2012.58},
file = {:home/yani/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Kim et al. - 2012 - A New In-Camera Imaging Model for Color Computer Vision and its Application.pdf:pdf},
issn = {1939-3539},
journal = {IEEE transactions on pattern analysis and machine intelligence},
month = {feb},
number = {X},
pages = {1--14},
pmid = {22371428},
title = {{A New In-Camera Imaging Model for Color Computer Vision and its Application.}},
url = {http://www.ncbi.nlm.nih.gov/pubmed/22371428},
volume = {X},
year = {2012}
}
@inproceedings{Kim2016,
abstract = {Although the latest high-end smartphone has powerful CPU and GPU, running deeper convolutional neural networks (CNNs) for complex tasks such as ImageNet classification on mobile devices is challenging. To deploy deep CNNs on mobile devices, we present a simple and effective scheme to compress the entire CNN, which we call one-shot whole network compression. The proposed scheme consists of three steps: (1) rank selection with variational Bayesian matrix factorization, (2) Tucker decomposition on kernel tensor, and (3) fine-tuning to recover accumulated loss of accuracy, and each step can be easily implemented using publicly available tools. We demonstrate the effectiveness of the proposed scheme by testing the performance of various compressed CNNs (AlexNet, VGGS, GoogLeNet, and VGG-16) on the smartphone. Significant reductions in model size, runtime, and energy consumption are obtained, at the cost of small loss in accuracy. In addition, we address the important implementation level issue on 1?1 convolution, which is a key operation of inception module of GoogLeNet as well as CNNs compressed by our proposed scheme.},
archivePrefix = {arXiv},
arxivId = {1511.06530},
author = {Kim, Yong-Deok and Park, Eunhyeok and Yoo, Sungjoo and Choi, Taelim and Yang, Lu and Shin, Dongjun},
booktitle = {International Conference on Learning Representations (ICLR)},
eprint = {1511.06530},
pages = {1--16},
title = {{Compression of Deep Convolutional Neural Networks for Fast and Low Power Mobile Applications}},
url = {http://arxiv.org/abs/1511.06530},
year = {2016}
}
@techreport{CIFAR10,
abstract = {Groups at MIT and NYU have collected a dataset of millions of tiny colour images from the web. It is, in principle, an excellent dataset for unsupervised training of deep generative models, but previous researchers who have tried this have found it difficult to learn a good set of filters from the images. We show how to train a multi-layer generative model that learns to extract meaningful features which resemble those found in the human visual cortex. Using a novel parallelization algorithm to distribute the work among multiple machines connected on a network, we show how training such a model can be done in reasonable time. A second problematic aspect of the tiny images dataset is that there are no reliable class labels which makes it hard to use for object recognition experiments. We created two sets of reliable labels. The CIFAR-10 set has 6000 examples of each of 10 classes and the CIFAR-100 set has 600 examples of each of 100 non-overlapping classes. Using these labels, we show that object recognition is significantly improved by pre-training a layer of features on a large set of unlabeled tiny images.},
archivePrefix = {arXiv},
arxivId = {arXiv:1011.1669v3},
author = {Krizhevsky, Alex},
booktitle = {{\ldots} Science Department, University of Toronto, Tech. {\ldots}},
doi = {10.1.1.222.9220},
eprint = {arXiv:1011.1669v3},
institution = {Univ. Toronto},
isbn = {9788578110796},
issn = {1098-6596},
pages = {1--60},
pmid = {25246403},
title = {{Learning Multiple Layers of Features from Tiny Images}},
type = {Technical Report},
url = {http://scholar.google.com/scholar?hl=en{\&}btnG=Search{\&}q=intitle:Learning+Multiple+Layers+of+Features+from+Tiny+Images{\#}0},
year = {2009}
}
@inproceedings{Krizhevsky2014,
abstract = {I present a new way to parallelize the training of convolutional neural networks across multiple GPUs. The method scales signiﬁcantly better than all alternatives when applied to modern convolutional neural networks.},
archivePrefix = {arXiv},
arxivId = {arXiv:1404.5997v2},
author = {Krizhevsky, Alex},
booktitle = {arXiv preprint},
eprint = {arXiv:1404.5997v2},
pages = {1--7},
title = {{One weird trick for parallelizing convolutional neural networks}},
url = {http://arxiv.org/abs/1404.5997},
year = {2014}
}
@inproceedings{Krizhevsky2012,
abstract = {We trained a large, deep convolutional neural network to classify the 1.2 million high-resolution images in the ImageNet LSRVRC-2010 contest into the 1000 different classes. On the test data, we achieved top-1 and top-5 error rates of 37.5{\%} and 17.0{\%} which is considerably better than the previous state of the art. The neural network, which has 60 million paramters and 650,000 neurons, consists of five convolutional layers, some of which are followed by max-pooling layers, and three fully connected layers with a final 1000-way softmax. To make training faster, we used non-saturating neurons and a very efficient GPU implementation of the convolutional operation. To reduce overfitting in the fully-connected layers, we employed a recently-developed method called 'dropout' that proved to be effective. We also entered a variant of the model in the ILSVRC-2012 competition and achievd a top-5 test error rate of 15.3{\%}, compared to 26.2{\%} achieved by the second-best entry.},
archivePrefix = {arXiv},
arxivId = {1102.0183},
author = {Krizhevsky, Alex and Sutskever, Ilya and Hinton, Geoffrey E},
booktitle = {Advances In Neural Information Processing Systems},
doi = {http://dx.doi.org/10.1016/j.protcy.2014.09.007},
editor = {Bartlett, Peter L and Pereira, Fernando C N and Burges, Christopher J C and Bottou, L{\'{e}}on and Weinberger, Kilian Q},
eprint = {1102.0183},
isbn = {9781627480031},
issn = {10495258},
keywords = {Convolutional with Classification Deep ImageNet Ne},
pages = {1--9},
pmid = {7491034},
title = {{ImageNet Classification with Deep Convolutional Neural Networks}},
year = {2012}
}
@article{Laptev2005,
abstract = {Local image features or interest points provide compact and abstract representations of patterns in an image. In this paper, we extend the notion of spatial interest points into the spatio-temporal domain and show how the resulting features often reflect interesting events that can be used for a compact representation of video data as well as for interpretation of spatio-temporal events.},
author = {Laptev, Ivan},
doi = {10.1007/s11263-005-1838-7},
issn = {09205691},
journal = {International Journal of Computer Vision},
keywords = {interest points,matching,scale selection,scale space,video interpretation},
number = {2-3},
pages = {107--123},
publisher = {Springer},
title = {{On Space-Time Interest Points}},
url = {http://www.springerlink.com/index/10.1007/s11263-005-1838-7},
volume = {64},
year = {2005}
}
@article{larsen2016optimality,
abstract = {For any integers {\$}d, n \backslashgeq 2{\$} and {\$}1/({\{}\backslashmin\backslash{\{}n,d\backslash{\}}{\}}){\^{}}{\{}0.4999{\}} {\textless} \backslashvarepsilon{\textless}1{\$}, we show the existence of a set of {\$}n{\$} vectors {\$}X\backslashsubset \backslashmathbb{\{}R{\}}{\^{}}d{\$} such that any embedding {\$}f:X\backslashrightarrow \backslashmathbb{\{}R{\}}{\^{}}m{\$} satisfying {\$}{\$} $\backslash$forall x,y$\backslash$in X,$\backslash$ (1-$\backslash$varepsilon)$\backslash$|x-y$\backslash$|{\_}2{\^{}}2$\backslash$le $\backslash$|f(x)-f(y)$\backslash$|{\_}2{\^{}}2 $\backslash$le (1+$\backslash$varepsilon)$\backslash$|x-y$\backslash$|{\_}2{\^{}}2 {\$}{\$} must have {\$}{\$} m = $\backslash$Omega($\backslash$varepsilon{\^{}}{\{}-2{\}} $\backslash$lg n). {\$}{\$} This lower bound matches the upper bound given by the Johnson-Lindenstrauss lemma [JL84]. Furthermore, our lower bound holds for nearly the full range of {\$}\backslashvarepsilon{\$} of interest, since there is always an isometric embedding into dimension {\$}\backslashmin\backslash{\{}d, n\backslash{\}}{\$} (either the identity map, or projection onto {\$}\backslashmathop{\{}span{\}}(X){\$}). Previously such a lower bound was only known to hold against linear maps {\$}f{\$}, and not for such a wide range of parameters {\$}\backslashvarepsilon, n, d{\$} [LN16]. The best previously known lower bound for general {\$}f{\$} was {\$}m = \backslashOmega(\backslashvarepsilon{\^{}}{\{}-2{\}}\backslashlg n/\backslashlg(1/\backslashvarepsilon)){\$} [Wel74, Alo03], which is suboptimal for any {\$}\backslashvarepsilon = o(1){\$}.},
archivePrefix = {arXiv},
arxivId = {1609.02094},
author = {Larsen, Kasper Green and Nelson, Jelani},
eprint = {1609.02094},
isbn = {0001415123},
journal = {arXiv preprint},
number = {1},
pages = {1--11},
title = {{Optimality of the Johnson-Lindenstrauss Lemma}},
url = {http://arxiv.org/abs/1609.02094},
year = {2016}
}
@inproceedings{Lebedev2015,
author = {Lebedev, V and Ganin, Y and Rakhuba, M and Oseledets, I and Lempitsky, V},
booktitle = {International Conference on Learning Representations},
title = {{Speeding-up Convolutional Neural Networks Using Fine-tuned CP-Decomposition.}},
year = {2015}
}
@misc{Leclerc1991,
abstract = {A method for recovering shape from shading that solves directly for the surface height is presented. By using a discrete formulation of the problem, it is possible to achieve good convergence behavior by employing numerical solution techniques more powerful than gradient descent methods derived from variational calculus. Because this method solves directly for height, it avoids the problem of finding an integrable surface maximally consistent with surface orientation. Furthermore, since additional constraints are not needed to make the problem well posed, a smoothness constraint is used only to drive the system towards a good solution; the weight of the smoothness term is eventually reduced to near zero. By solving directly for height, stereo processing may be used to provide initial and boundary conditions. The shape from shading technique, as well as its relation to stereo, is demonstrated on both synthetic and real imagery},
author = {Leclerc, Y G and Bobick, A F},
booktitle = {Proceedings 1991 IEEE Computer Society Conference on Computer Vision and Pattern Recognition},
doi = {10.1109/CVPR.1991.139752},
isbn = {0818621486},
issn = {10636919},
pages = {552--558},
publisher = {IEEE Comput. Sco. Press},
title = {{The direct computation of height from shading}},
url = {http://ieeexplore.ieee.org/lpdocs/epic03/wrapper.htm?arnumber=139752},
volume = {91},
year = {1991}
}
@article{Lecun1998,
abstract = {Multilayer neural networks trained with the back-propagation algorithm constitute the best example of a successful gradient based learning technique. Given an appropriate network architecture, gradient-based learning algorithms can be used to synthesize a complex decision surface that can classify high-dimensional patterns, such as handwritten characters, with minimal preprocessing. This paper reviews various methods applied to handwritten character recognition and compares them on a standard handwritten digit recognition task. Convolutional neural networks, which are specifically designed to deal with the variability of 2D shapes, are shown to outperform all other techniques. Real-life document recognition systems are composed of multiple modules including field extraction, segmentation recognition, and language modeling. A new learning paradigm, called graph transformer networks (GTN), allows such multimodule systems to be trained globally using gradient-based methods so as to minimize an overall performance measure. Two systems for online handwriting recognition are described. Experiments demonstrate the advantage of global training, and the flexibility of graph transformer networks. A graph transformer network for reading a bank cheque is also described. It uses convolutional neural network character recognizers combined with global training techniques to provide record accuracy on business and personal cheques. It is deployed commercially and reads several million cheques per day},
author = {Lecun, Y and Bottou, L and Bengio, Y and Haffner, P},
issn = {0018-9219},
journal = {Proceedings of the IEEE},
keywords = {2D GTN; back-propagation; backpropagation; based c},
number = {11},
pages = {2278--2324},
title = {{Gradient-based learning applied to document recognition}},
volume = {86},
year = {1998}
}
@article{Lecun2015,
abstract = {Deep learning allows computational models that are composed of multiple processing layers to learn representations of data with multiple levels of abstraction. These methods have dramatically improved the state-of-the-art in speech rec- ognition, visual object recognition, object detection and many other domains such as drug discovery and genomics. Deep learning discovers intricate structure in large data sets by using the backpropagation algorithm to indicate how a machine should change its internal parameters that are used to compute the representation in each layer from the representation in the previous layer. Deep convolutional nets have brought about breakthroughs in processing images, video, speech and audio, whereas recurrent nets have shone light on sequential data such as text and speech.},
archivePrefix = {arXiv},
arxivId = {arXiv:1312.6184v5},
author = {Lecun, Yann and Bengio, Yoshua and Hinton, Geoffrey},
doi = {10.1038/nature14539},
eprint = {arXiv:1312.6184v5},
isbn = {9780521835688},
issn = {1548-7091},
journal = {Nature},
number = {1},
pages = {436--444},
pmid = {10463930},
title = {{Deep learning}},
url = {http://www.nature.com/nature/journal/v521/n7553/full/nature14539.html},
volume = {521},
year = {2015}
}
@article{Lee2005,
abstract = {Research over the last decade has built a solid mathematical foundation for representation and analysis of 3D meshes in graphics and geometric modeling. Much of this work however does not explicitly incorporate models of low-level human visual attention. In this paper we introduce the idea of mesh saliency as a measure of regional importance for graphics meshes. Our notion of saliency is inspired by low-level human visual system cues. We define mesh saliency in a scale-dependent manner using a center-surround operator on Gaussian-weighted mean curvatures. We observe that such a definition of mesh saliency is able to capture what most would classify as visually interesting regions on a mesh. The human-perception-inspired importance measure computed by our mesh saliency operator results in more visually pleasing results in processing and viewing of 3D meshes. compared to using a purely geometric measure of shape. such as curvature. We discuss how mesh saliency can be incorporated in graphics applications such as mesh simplification and viewpoint selection and present examples that show visually appealing results from using mesh saliency.},
author = {Lee, Chang Ha and Varshney, Amitabh and Jacobs, David W},
doi = {10.1145/1073204.1073244},
issn = {07300301},
journal = {ACM Transactions on Graphics},
keywords = {perception,saliency,simplification,visual attention},
number = {3},
pages = {659},
publisher = {ACM},
series = {SIGGRAPH '05},
title = {{Mesh saliency}},
url = {http://portal.acm.org/citation.cfm?doid=1073204.1073244},
volume = {24},
year = {2005}
}
@article{lee2014deeply,
abstract = {Our proposed deeply-supervised nets (DSN) method simultaneously minimizes classification error while making the learning process of hidden layers direct and transparent. We make an attempt to boost the classification performance by studying a new formulation in deep networks. Three aspects in convolutional neural networks (CNN) style architectures are being looked at: (1) transparency of the intermediate layers to the overall classification; (2) discriminativeness and robustness of learned features, especially in the early layers; (3) effectiveness in training due to the presence of the exploding and vanishing gradients. We introduce "companion objective" to the individual hidden layers, in addition to the overall objective at the output layer (a different strategy to layer-wise pre-training). We extend techniques from stochastic gradient methods to analyze our algorithm. The advantage of our method is evident and our experimental result on benchmark datasets shows significant performance gain over existing methods (e.g. all state-of-the-art results on MNIST, CIFAR-10, CIFAR-100, and SVHN).},
archivePrefix = {arXiv},
arxivId = {1409.5185},
author = {Lee, Chen-Yu and Xie, Saining and Gallagher, Patrick and Zhang, Zhengyou and Tu, Zhuowen},
eprint = {1409.5185},
issn = {15337928},
journal = {arXiv preprint arXiv:1409.5185},
title = {{Deeply-Supervised Nets}},
url = {http://arxiv.org/abs/1409.5185},
year = {2014}
}
@techreport{Leichter2012,
author = {Leichter, Ido and Krupka, Eyal},
institution = {Advanced Technology Labs Israel, Microsoft Research},
title = {{Monotonicity and Error Type Diﬀerentiability in Performance
Measures for Target Detection and Tracking in Video}},
url = {http://research.microsoft.com/pubs/160662/MSR-TR-2012-23updated.pdf},
year = {2012}
}
@article{Levin2007,
abstract = {Individuals susceptible to high-altitude pulmonary oedema (HAPE) are characterised by an abnormal increase of pulmonary artery systolic pressure (PASP) in hypoxia and during normoxic exercise, reduced hypoxic ventilatory response, and smaller lung volume. In 37 mountaineers with well-documented altitude tolerance, it was investigated whether any combination of these noninvasive measurements, including exercise in hypoxia, could improve the identification of HAPE-susceptible subjects at low altitude. HAPE-susceptible subjects showed a significant higher increase of PASP during hypoxia at rest (48+/-10 mmHg) compared with controls (38+/-3 mmHg), as well as during normoxic exercise (57+/-14 versus 38+/-7 mmHg) and hypoxic exercise (69+/-13 versus 49+/-8 mmHg). PASP could not be assessed in three and eight subjects during normoxic or hypoxic exercise, respectively, due to insufficient Doppler profiles or systemic arterial hypertension. Sensitivity (77-94{\%}) and specificity (76-93{\%}) were not significantly different between the various testing conditions. Additional assessment of hypoxic ventilatory response and lung function parameters did not improve identification of HAPE-susceptible subjects in a multivariate analysis. Due to the greater number of missing values in pulmonary artery systolic pressure measurements during hypoxic exercise, it was concluded that pulmonary artery systolic pressure measurements at rest during hypoxia or exercise in normoxia are most feasible for the identification of high-altitude pulmonary oedema-susceptible subjects.},
author = {Levin, Anat and Fergus, Rob and Durand, Fr{\'{e}}do and Freeman, William T},
doi = {10.1145/1276377.1276464},
institution = {ACM},
isbn = {9781595936486},
issn = {07300301},
journal = {ACM Transactions on Graphics},
keywords = {coded imaging,computational photography,deblurring,depth,field,image statistics,range estimation},
number = {3},
pages = {70},
pmid = {15738301},
publisher = {ACM},
title = {{Image and depth from a conventional camera with a coded aperture}},
url = {http://portal.acm.org/citation.cfm?doid=1276377.1276464},
volume = {26},
year = {2007}
}
@article{Levoy2000,
author = {Levoy, Marc and Pulli, Kari and Curless, Brian},
isbn = {1581132085},
journal = {Proceedings of the 27th {\{}{\ldots}{\}}},
pages = {131--144},
title = {{The digital Michelangelo project: 3D scanning of large statues}},
url = {http://dl.acm.org/citation.cfm?id=344849},
year = {2000}
}
@article{Lin2011,
author = {Lin, Haiting and Susstrunk, Sabine and Brown, Michael S.},
doi = {10.1109/ICCV.2011.6126234},
file = {:home/yani/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Lin, Susstrunk, Brown - 2011 - Revisiting radiometric calibration for color computer vision.pdf:pdf},
isbn = {978-1-4577-1102-2},
journal = {2011 International Conference on Computer Vision},
month = {nov},
pages = {129--136},
publisher = {Ieee},
title = {{Revisiting radiometric calibration for color computer vision}},
url = {http://ieeexplore.ieee.org/lpdocs/epic03/wrapper.htm?arnumber=6126234},
year = {2011}
}
@inproceedings{Lin2014,
author = {Lin, Min and Chen, Qiang and Yan, Shuicheng},
booktitle = {International Conference on Learning Representations (ICLR)},
series = {2014},
title = {{Network in network}},
year = {2014}
}
@article{Lin2013NiN,
abstract = {We propose a novel deep network structure called "Network In Network" (NIN) to enhance model discriminability for local patches within the receptive field. The conventional convolutional layer uses linear filters followed by a nonlinear activation function to scan the input. Instead, we build micro neural networks with more complex structures to abstract the data within the receptive field. We instantiate the micro neural network with a multilayer perceptron, which is a potent function approximator. The feature maps are obtained by sliding the micro networks over the input in a similar manner as CNN; they are then fed into the next layer. Deep NIN can be implemented by stacking mutiple of the above described structure. With enhanced local modeling via the micro network, we are able to utilize global average pooling over feature maps in the classification layer, which is easier to interpret and less prone to overfitting than traditional fully connected layers. We demonstrated the state-of-the-art classification performances with NIN on CIFAR-10 and CIFAR-100, and reasonable performances on SVHN and MNIST datasets.},
archivePrefix = {arXiv},
arxivId = {1312.4400},
author = {Lin, Min and Chen, Qiang and Yan, Shuicheng},
doi = {10.1109/ASRU.2015.7404828},
eprint = {1312.4400},
isbn = {9781479972913},
journal = {arXiv preprint},
keywords = {In Network},
pages = {10},
title = {{Network In Network}},
url = {http://arxiv.org/abs/1312.4400},
volume = {abs/1312.4},
year = {2013}
}
@inproceedings{Lin2014a,
abstract = {We present a new dataset with the goal of advancing the state-of-the-art in object recognition by placing the question of object recognition in the context of the broader question of scene understanding. This is achieved by gathering images of complex everyday scenes containing common objects in their natural context. Objects are labeled using per-instance segmentations to aid in precise object localization. Our dataset contains photos of 91 objects types that would be easily recognizable by a 4 year old. With a total of 2.5 million labeled instances in 328k images, the creation of our dataset drew upon extensive crowd worker involvement via novel user interfaces for category detection, instance spotting and instance segmentation. We present a detailed statistical analysis of the dataset in comparison to PASCAL, ImageNet, and SUN. Finally, we provide baseline performance analysis for bounding box and segmentation detection results using a Deformable Parts Model.},
archivePrefix = {arXiv},
arxivId = {arXiv:1405.0312v1},
author = {Lin, Tsung Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'{a}}r, Piotr and Zitnick, C. Lawrence},
booktitle = {Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)},
doi = {10.1007/978-3-319-10602-1_48},
eprint = {arXiv:1405.0312v1},
isbn = {978-3-319-10601-4},
issn = {16113349},
number = {PART 5},
pages = {740--755},
title = {{Microsoft COCO: Common objects in context}},
volume = {8693 LNCS},
year = {2014}
}
@article{Lowe2004,
abstract = {This paper presents a method for extracting distinctive invariant features from images that can be used to perform reliable matching between different views of an object or scene. The features are invariant to image scale and rotation, and are shown to provide robust matching across a substantial range of affine distortion, change in 3D viewpoint, addition of noise, and change in illumination. The features are highly distinctive, in the sense that a single feature can be correctly matched with high probability against a large database of features from many images. This paper also describes an approach to using these features for object recognition. The recognition proceeds by matching individual features to a database of features from known objects using a fast nearest-neighbor algorithm, followed by a Hough transform to identify clusters belonging to a single object, and finally performing verification through least-squares solution for consistent pose parameters. This approach to recognition can robustly identify objects among clutter and occlusion while achieving near real-time performance.},
author = {Lowe, David G},
doi = {10.1023/B:VISI.0000029664.99615.94},
file = {:home/yani/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Lowe - 2004 - Distinctive Image Features from Scale-Invariant Keypoints(2).pdf:pdf},
isbn = {1568811012},
issn = {09205691},
journal = {International Journal of Computer Vision},
number = {2},
pages = {91--110},
pmid = {20064111},
publisher = {Springer},
series = {Int. J. Comput. Vis. (Netherlands)},
title = {{Distinctive Image Features from Scale-Invariant Keypoints}},
url = {http://www.springerlink.com/openurl.asp?id=doi:10.1023/B:VISI.0000029664.99615.94},
volume = {60},
year = {2004}
}
@inproceedings{Luo2010switchable,
author = {Luo, P and Tian, Y and Wang, X and Tang, X},
booktitle = {Proceedings of the 2010 IEEE Conference on Computer Vision and Pattern Recognition},
title = {{Switchable Deep Networks for Pedestrian Detection}},
year = {2010}
}
@phdthesis{MacKay91,
author = {MacKay, D J C},
school = {California Institute of Technology},
title = {{Bayesian Methods for Adaptive Models}},
year = {1991}
}
@incollection{mamalet2012simplifying,
author = {Mamalet, Franck and Garcia, Christophe},
booktitle = {Artificial Neural Networks and Machine Learning--ICANN 2012},
pages = {58--65},
publisher = {Springer},
title = {{Simplifying convnets for fast learning}},
year = {2012}
}
@inproceedings{martens2010deep,
author = {Martens, James},
booktitle = {Proceedings of the 27th International Conference on Machine Learning (ICML-10)},
pages = {735--742},
title = {{Deep learning via Hessian-free optimization}},
year = {2010}
}
@article{Martinez-Verdu1998,
author = {Mart{\'{i}}nez-Verd{\'{u}}, Francisco Miguel and Pujol, Jaume and Bouzada, Alejandro and Capilla, Pascual},
doi = {10.1117/12.334567},
journal = {Proceedings of the SPIE},
keywords = {colorimetry,digital cameras,image input device characterisation,spectral sensitivity},
number = {January 1999},
pages = {279--290},
publisher = {SPIE},
title = {{Spectroradiometric characterization of the spectral linearity of a conventional digital camera}},
url = {http://link.aip.org/link/?PSI/3648/279/1{\%}7B{\&}{\%}7DAgg=doi},
volume = {3648},
year = {1998}
}
@inproceedings{Mathieu2014,
author = {Mathieu, Michael and Henaff, Mikael and LeCun, Yann},
booktitle = {International Conference on Learning Representations (ICLR)},
title = {{Fast training of convolutional networks through {\{}FFTs{\}}}},
year = {2014}
}
@book{minsky1988perceptrons,
author = {Minsky, Marvin and Papert, Seymour},
publisher = {MIT press},
title = {{Perceptrons}},
year = {1988}
}
@article{Mitsunaga1999,
author = {Mitsunaga, T. and Nayar, S.K.},
doi = {10.1109/CVPR.1999.786966},
file = {:home/yani/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Mitsunaga, Nayar - 1999 - Radiometric self calibration.pdf:pdf},
isbn = {0-7695-0149-4},
journal = {Proceedings. 1999 IEEE Computer Society Conference on Computer Vision and Pattern Recognition (Cat. No PR00149)},
pages = {374--380},
publisher = {IEEE Comput. Soc},
title = {{Radiometric self calibration}},
url = {http://ieeexplore.ieee.org/lpdocs/epic03/wrapper.htm?arnumber=786966},
year = {1999}
}
@article{Mohr1996,
abstract = {Significant progress has recently been made by applying tools fromclassical projective and algebraic geometry to fundamental problems in computer vision. To some extent this work was foreshadowed by early mathematical photogrammetrists. However the modern approach has gone far beyond these early studies, particularly as regards our ability to deal with multiple images and unknown camera parameters, and on practical computational issues such as stability, robustness and precision. These new techniques are greatly extending the scope and flexibility of digital photogrammetric systems. This tutorial provides a practical, applications-oriented introduction to the projective geometry needed to understand these new developments. No currently available textbook covers all of this material, although several existing texts consider parts of it. Kanatanis book 11 studies many computational and statistical aspects of computer vision in a projective framework. Faugeras 4 investigates the geometric aspects of 3D vision, including several of the projective results obtained by his team before 1993. The collections edited by Mundy, Zisserman and Forsyth 18, 19 summarize recent research on the applications of geometric invariants to computer vision: projective results are central to this programme. Mathematical introductions to projective geometry can be found in many books. A standard text covering the necessary aspects of both projective and algebraic geometry is Semple and Kneebone 23. Unfortunately this is currently out of print, however many scientific libraries have it and it is said to be reprinting soon.},
author = {Mohr, Roger and Triggs, Bill},
institution = {ISPRS workshop tutorial},
journal = {Int Symp Photogrammetry and Remote Sensing},
number = {July},
publisher = {Citeseer},
title = {{Projective Geometry for Image Analysis}},
url = {http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.37.3924{\&}rep=rep1{\&}type=pdf},
year = {1996}
}
@inproceedings{montillo2011entangled,
abstract = {This work addresses the challenging problem of simultaneously segmenting multiple anatomical structures in highly varied CT scans. We propose the entangled decision forest (EDF) as a new discriminative classifier which augments the state of the art decision forest, resulting in higher prediction accuracy and shortened decision time. Our main contribution is two-fold. First, we propose entangling the binary tests applied at each tree node in the forest, such that the test result can depend on the result of tests applied earlier in the same tree and at image points offset from the voxel to be classified. This is demonstrated to improve accuracy and capture long-range semantic context. Second, during training, we propose injecting randomness in a guided way, in which node feature types and parameters are randomly drawn from a learned (nonuniform) distribution. This further improves classification accuracy. We assess our probabilistic anatomy segmentation technique using a labeled database of CT image volumes of 250 different patients from various scan protocols and scanner vendors. In each volume, 12 anatomical structures have been manually segmented. The database comprises highly varied body shapes and sizes, a wide array of pathologies, scan resolutions, and diverse contrast agents. Quantitative comparisons with state of the art algorithms demonstrate both superior test accuracy and computational efficiency.},
author = {Montillo, Albert and Shotton, Jamie and Winn, John and Iglesias, Juan Eugenio and Metaxas, Dimitri and Criminisi, Antonio},
booktitle = {Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)},
doi = {10.1007/978-3-642-22092-0_16},
isbn = {9783642220913},
issn = {03029743},
keywords = {CT,Entanglement,auto-context,decision forests,segmentation},
pages = {184--196},
pmid = {21761656},
title = {{Entangled decision forests and their application for semantic segmentation of CT images}},
volume = {6801 LNCS},
year = {2011}
}
@inproceedings{conf/icml/NairH10,
abstract = {Restricted Boltzmann machines were developed using binary stochastic hidden units. These can be generalized by replacing each binary unit by an inﬁnite number of copies that all have the same weights but have progressively more negative biases. The learning and inference rules for these “Stepped Sigmoid Units” are unchanged. They can be approximated eﬃciently by noisy, rectiﬁed linear units. Compared with binary units, these units learn features that are better for object recognition on the NORB dataset and face veriﬁcation on the Labeled Faces in the Wild dataset. Unlike binary units, rectiﬁed linear units preserve information about relative intensities as information travels through multiple layers of feature detectors.},
author = {Nair, Vinod and Hinton, Geoffrey E},
booktitle = {Proceedings of the 27th International Conference on Machine Learning},
doi = {10.1.1.165.6419},
editor = {F{\"{u}}rnkranz, Johannes and Joachims, Thorsten},
isbn = {9781605589077},
issn = {1935-8237},
keywords = {dblp},
number = {3},
pages = {807--814},
pmid = {22404682},
publisher = {Omnipress},
title = {{Rectified Linear Units Improve Restricted Boltzmann Machines}},
url = {http://dblp.uni-trier.de/db/conf/icml/icml2010.html{\#}NairH10},
year = {2010}
}
@article{Narayana2012,
author = {Narayana, Manjunath and Hanson, Allen and Learned-miller, Erik},
journal = {viswwwcsumassedu},
title = {{Background Modeling Using Adaptive Pixelwise Kernel Variances in a Hybrid Feature Space}},
url = {http://vis-www.cs.umass.edu/papers/Manjunath{\_}background{\_}CVPR2012.pdf},
year = {2012}
}
@article{Ng2006,
author = {Ng, R},
file = {:home/yani/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Ng - 2006 - Digital light field photography.pdf:pdf},
title = {{Digital light field photography}},
url = {http://testcis.cis.rit.edu/{~}cnspci/references/dip/light{\_}field{\_}photography/ng2006.pdf},
year = {2006}
}
@misc{Numagami1995,
abstract = {In this paper, a reconstruction method of a curved object from a single gray-level image is proposed. To realize easy but robust shape from shading (SFS), the proposed method adopts isodensity lines. The results of computer simulation using virtual objects show that the prospects of using this method are very encouraging},
author = {Numagami, Y and Kajiwara, Y and Nakamura, O and Minami, T},
booktitle = {Proceedings 1995 Canadian Conference on Electrical and Computer Engineering},
doi = {10.1109/CCECE.1995.526675},
file = {:home/yani/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Numagami et al. - 1995 - Reconstruction of the 3-D shape of an object from a 2-D intensity image.pdf:pdf},
isbn = {0780327667},
issn = {08407789},
title = {{Reconstruction of the 3-D shape of an object from a 2-D intensity image}},
volume = {2},
year = {1995}
}
@inproceedings{Oquab:2014:LTM:2679600.2680210,
address = {Washington, DC, USA},
author = {Oquab, Maxime and Bottou, Leon and Laptev, Ivan and Sivic, Josef},
booktitle = {Proceedings of the 2014 IEEE Conference on Computer Vision and Pattern Recognition},
doi = {10.1109/CVPR.2014.222},
isbn = {978-1-4799-5118-5},
pages = {1717--1724},
publisher = {IEEE Computer Society},
series = {CVPR '14},
title = {{Learning and Transferring Mid-level Image Representations Using Convolutional Neural Networks}},
url = {http://dx.doi.org/10.1109/CVPR.2014.222},
year = {2014}
}
@article{Ortiz2004,
author = {Ortiz, Albert and Oliver, Gabriel},
file = {:home/yani/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Ortiz, Oliver - 2004 - Radiometric calibration of CCD sensors Dark current and fixed pattern noise estimation.pdf:pdf},
isbn = {0780382323},
journal = {{\ldots} , 2004. Proceedings. ICRA'04. 2004 IEEE {\ldots}},
keywords = {- robot vision,camera calibration,sensors},
number = {April},
pages = {4730--4735},
title = {{Radiometric calibration of CCD sensors: Dark current and fixed pattern noise estimation}},
url = {http://ieeexplore.ieee.org/xpls/abs{\_}all.jsp?arnumber=1302465},
year = {2004}
}
@book{Palmer2009,
author = {Palmer, James M and Grant, Barbara G},
booktitle = {SPIE Press Monograph},
doi = {10.1117/3.798237},
file = {:home/yani/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Palmer, Grant - 2009 - The Art of Radiometry.pdf:pdf;:home/yani/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Palmer, Grant - 2009 - The Art of Radiometry(2).pdf:pdf;:home/yani/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Palmer, Grant - 2009 - The Art of Radiometry(3).pdf:pdf;:home/yani/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Palmer, Grant - 2009 - The Art of Radiometry(4).pdf:pdf;:home/yani/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Palmer, Grant - 2009 - The Art of Radiometry(5).pdf:pdf;:home/yani/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Palmer, Grant - 2009 - The Art of Radiometry(6).pdf:pdf;:home/yani/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Palmer, Grant - 2009 - The Art of Radiometry(7).pdf:pdf;:home/yani/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Palmer, Grant - 2009 - The Art of Radiometry(8).pdf:pdf;:home/yani/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Palmer, Grant - 2009 - The Art of Radiometry(9).pdf:pdf;:home/yani/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Palmer, Grant - 2009 - The Art of Radiometry(10).pdf:pdf;:home/yani/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Palmer, Grant - 2009 - The Art of Radiometry(11).pdf:pdf;:home/yani/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Palmer, Grant - 2009 - The Art of Radiometry(12).pdf:pdf;:home/yani/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Palmer, Grant - 2009 - The Art of Radiometry(13).pdf:pdf;:home/yani/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Palmer, Grant - 2009 - The Art of Radiometry(14).pdf:pdf;:home/yani/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Palmer, Grant - 2009 - The Art of Radiometry(15).pdf:pdf;:home/yani/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Palmer, Grant - 2009 - The Art of Radiometry(16).pdf:pdf;:home/yani/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Palmer, Grant - 2009 - The Art of Radiometry(17).pdf:pdf;:home/yani/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Palmer, Grant - 2009 - The Art of Radiometry(18).pdf:pdf;:home/yani/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Palmer, Grant - 2009 - The Art of Radiometry(19).pdf:pdf;:home/yani/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Palmer, Grant - 2009 - The Art of Radiometry(20).pdf:pdf;:home/yani/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Palmer, Grant - 2009 - The Art of Radiometry(21).pdf:pdf;:home/yani/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Palmer, Grant - 2009 - The Art of Radiometry(22).pdf:pdf;:home/yani/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Palmer, Grant - 2009 - The Art of Radiometry(23).pdf:pdf;:home/yani/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Palmer, Grant - 2009 - The Art of Radiometry(24).pdf:pdf},
isbn = {9780819472458},
pages = {384},
publisher = {SPIE Press},
series = {Press Monograph},
title = {{The Art of Radiometry}},
url = {http://books.google.com/books?id=WCwBQgAACAAJ},
volume = {PM184},
year = {2009}
}
@misc{Park2011,
abstract = {An articulated trajectory is defined as a trajectory that remains at a fixed distance with respect to a parent trajectory. In this paper, we present a method to reconstruct an articulated trajectory in three dimensions given the two dimensional projection of the articulated trajectory, the 3D parent trajectory, and the camera pose at each time instant. This is a core challenge in reconstructing the 3D motion of articulated structures such as the human body because endpoints of each limb form articulated trajectories. We simultaneously apply activity-independent spatial and temporal constraints, in the form of fixed 3D distance to the parent trajectory and smooth 3D motion. There exist two solutions that satisfy each instantaneous 2D projection and articulation constraint (a ray intersects a sphere at up to two locations) and we show that resolving this ambiguity by enforcing smoothness is equivalent to solving a binary quadratic programming problem. A geometric analysis of the reconstruction of articulated trajectories is also presented and a measure of the reconstructibility of an articulated trajectory is proposed.},
author = {Park, Hyun Soo and Sheikh, Yaser},
booktitle = {2011 International Conference on Computer Vision},
doi = {10.1109/ICCV.2011.6126243},
file = {:home/yani/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Park, Sheikh - 2011 - 3D reconstruction of a smooth articulated trajectory from a monocular image sequence.pdf:pdf},
institution = {Carnegie Mellon University, 5000 Forbes Ave., Pittsburgh, PA, 15213, USA},
isbn = {9781457711008},
issn = {15505499},
pages = {201--208},
publisher = {IEEE},
title = {{3D reconstruction of a smooth articulated trajectory from a monocular image sequence}},
url = {http://www.andrew.cmu.edu/user/hyunsoop/iccv2011/iccv{\_}project{\_}page.html},
year = {2011}
}
@misc{Prados2005,
abstract = {Shape from shading is known to be an ill-posed problem. We show in this paper that if we model the problem in a different way than it is usually done, more precisely by taking into account the 1/r2 attenuation term of the illumination, shape from shading becomes completely well-posed. Thus the shading allows to recover (almost) any surface from only one image (of this surface) without any additional data (in particular, without the knowledge of the heights of the solution at the local intensity "minima", contrary to P. Dupuis et al. (1994), E. Prados et al. (2004), B. Horn (1986), E. Rouy et al. (1992), R. Kimmel et al. (2001)) and without regularity assumptions (contrary to J. Oliensis et al. (1993), R. Kimmel et al. (1995), for example). More precisely, we formulate the problem as that of solving a new partial differential equation (PDE), we develop a complete mathematical study of this equation and we design a new provably convergent numerical method. Finally, we present results of our new shape from shading method on various synthetic and real images.},
author = {Prados, E and Faugeras, O},
booktitle = {2005 IEEE Computer Society Conference on Computer Vision and Pattern Recognition CVPR05},
doi = {10.1109/CVPR.2005.319},
isbn = {0769523722},
issn = {10636919},
number = {c},
pages = {870--877},
publisher = {Ieee},
title = {{Shape from shading: a well-posed problem?}},
url = {http://ieeexplore.ieee.org/lpdocs/epic03/wrapper.htm?arnumber=1467534},
volume = {2},
year = {2005}
}
@article{Prati2003,
author = {Prati, Andrea and Mikic, Ivana and Trivedi, Mohan M and Cucchiara, Rita},
number = {7},
pages = {918--923},
title = {{Detecting Moving Shadows : Algorithms and Evaluation {\{}{\ae}{\}}}},
volume = {25},
year = {2003}
}
@article{Ramamoorthi2001,
author = {Ramamoorthi, Ravi and Hanrahan, Pat},
file = {:home/yani/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Ramamoorthi, Hanrahan - 2001 - A signal-processing framework for inverse rendering(2).pdf:pdf},
isbn = {158113374X},
journal = {{\{}{\ldots}{\}} of the 28th annual conference on {\{}{\ldots}{\}}},
keywords = {brdf; illumination; inverse; irradiance; light fie},
number = {August},
pages = {12--17},
title = {{A signal-processing framework for inverse rendering}},
url = {http://dl.acm.org/citation.cfm?id=383271},
year = {2001}
}
@article{ren2015noc,
author = {Ren, Shaoqing and He, Kaiming and Girshick, Ross and Zhang, Xiangyu and Sun, Jian},
journal = {arXiv preprint arXiv:1504.06066},
title = {{Object Detection Networks on Convolutional Feature Maps}},
year = {2015}
}
@inproceedings{conf/cvpr/RigamontiSLF13,
author = {Rigamonti, Roberto and Sironi, Amos and Lepetit, Vincent and Fua, Pascal},
booktitle = {Computer Vision and Pattern Recognition (CVPR)},
keywords = {dblp},
pages = {2754--2761},
publisher = {IEEE},
title = {{Learning Separable Filters.}},
year = {2013}
}
@article{rippel2015spectral,
abstract = {Discrete Fourier transforms provide a significant speedup in the computation of convolutions in deep learning. In this work, we demonstrate that, beyond its ad- vantages for efficient computation, the spectral domain also provides a powerful representation in which to model and train convolutional neural networks (CNNs). We employ spectral representations to introduce a number of innovations to CNN design. First, we propose spectral pooling, which performs dimensionality re- duction by truncating the representation in the frequency domain. This approach preserves considerably more information per parameter than other pooling strate- gies and enables flexibility in the choice of pooling output dimensionality. This representation also enables a new form of stochastic regularization by random- ized modification of resolution. We show that these methods achieve competitive results on classification and approximation tasks, without using any dropout or max-pooling. Finally, we demonstrate the effectiveness of complex-coefficient spectral param- eterization of convolutional filters. While this leaves the underlying model un- changed, it results in a representation that greatly facilitates optimization. We observe on a variety of popular CNN configurations that this leads to significantly faster convergence during training. 1},
archivePrefix = {arXiv},
arxivId = {1506.03767},
author = {Rippel, Oren and Snoek, Jasper and Adams, Ryan P},
eprint = {1506.03767},
issn = {10495258},
journal = {Advances in Neural Information Processing Systems 28},
pages = {2440--2448},
title = {{Spectral Representations for Convolutional Neural Networks}},
url = {http://papers.nips.cc/paper/5649-spectral-representations-for-convolutional-neural-networks.pdf},
year = {2015}
}
@inproceedings{BuloKontsch2014,
author = {{Rota Bul{\`{o}}}, S and Kontschieder, P},
booktitle = {Proceedings of the 2014 IEEE Conference on Computer Vision and Pattern Recognition},
month = {jun},
title = {{Neural Decision Forests for Semantic Image Labelling}},
year = {2014}
}
@article{ILSVRC2015,
author = {Russakovsky, Olga and Deng, Jia and Su, Hao and Krause, Jonathan and Satheesh, Sanjeev and Ma, Sean and Huang, Zhiheng and Karpathy, Andrej and Khosla, Aditya and Bernstein, Michael and Berg, Alexander C and Fei-Fei, Li},
doi = {10.1007/s11263-015-0816-y},
journal = {International Journal of Computer Vision (IJCV)},
title = {{ImageNet Large Scale Visual Recognition Challenge}},
year = {2015}
}
@article{Rusu2008,
author = {Rusu, R B and Marton, Z C and Blodow, N},
journal = {autonomous systems 10:},
keywords = {geometric reasoning; persistent feature histograms},
title = {{Persistent point feature histograms for 3D point clouds}},
url = {http://books.google.com/books?hl=en{\%}7B{\&}{\%}7Dlr={\%}7B{\&}{\%}7Did=KcYWTosXoOgC{\%}7B{\&}{\%}7Doi=fnd{\%}7B{\&}{\%}7Dpg=PR8{\%}7B{\&}{\%}7Ddq=Statistical+Approaches+to+Multi-scale+Point+Cloud+Processing{\%}7B{\&}{\%}7Dots=A2KI1JjP0A{\%}7B{\&}{\%}7Dsig=W1BJPYQSqpAT4{\%}7B{\_}{\%}7Dv2NxUt8G3CoLo http://books.google.com/bo},
year = {2008}
}
@phdthesis{Saligrama2012,
author = {Saligrama, Venkatesh and Chen, Zhu},
booktitle = {IEEE Conference on Computer Vision and Pattern Recognition},
title = {{Video Anomaly Detection Based on Local Statistical Aggregates}},
url = {http://blogs.bu.edu/srv/files/2012/04/cvpr{\_}final.pdf},
year = {2012}
}
@article{Sentenac2003,
author = {Sentenac, Thierry},
file = {:home/yani/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Sentenac - 2003 - Temperature correction of radiometric and geometric models for an uncooled CCD camera in the near infrared.pdf:pdf},
journal = {{\ldots} , IEEE Transactions on},
number = {1},
pages = {46--60},
title = {{Temperature correction of radiometric and geometric models for an uncooled CCD camera in the near infrared}},
url = {http://ieeexplore.ieee.org/xpls/abs{\_}all.jsp?arnumber=1191409},
volume = {52},
year = {2003}
}
@inproceedings{Sermanet2013overfeat,
abstract = {We present an integrated framework for using Convolutional Networks for classification, localization and detection. We show how a multiscale and sliding window approach can be efficiently implemented within a ConvNet. We also introduce a novel deep learning approach to localization by learning to predict object boundaries. Bounding boxes are then accumulated rather than suppressed in order to increase detection confidence. We show that different tasks can be learned simultaneously using a single shared network. This integrated framework is the winner of the localization task of the ImageNet Large Scale Visual Recognition Challenge 2013 (ILSVRC2013) and obtained very competitive results for the detection and classifications tasks. In post-competition work, we establish a new state of the art for the detection task. Finally, we release a feature extractor from our best model called OverFeat.},
archivePrefix = {arXiv},
arxivId = {1312.6229},
author = {Sermanet, Pierre and Eigen, David and Zhang, Xiang and Mathieu, Michael and Fergus, Rob and LeCun, Yann},
booktitle = {arXiv preprint arXiv},
eprint = {1312.6229},
pages = {1312.6229},
title = {{OverFeat: Integrated Recognition, Localization and Detection using Convolutional Networks}},
url = {http://arxiv.org/abs/1312.6229},
year = {2013}
}
@techreport{Sethi1990,
author = {Sethi, I K},
booktitle = {Proceedings of the {\{}IEEE{\}}},
institution = {Dept. of Computer Sci, Wayne State Univ.},
number = {10},
pages = {1605--1613},
title = {{Entropy Nets: From Decison Trees to Neural Networks}},
volume = {78},
year = {1990}
}
@inproceedings{shankar2016refining,
author = {Shankar, Sukrit and Robertson, Duncan and Ioannou, Yani and Criminisi, Antonio and Cipolla, Roberto},
booktitle = {Conference on Computer Vision and Pattern Recognition (CVPR), 2016},
title = {{Refining Architectures of Deep Convolutional Neural Networks}},
year = {2016}
}
@inproceedings{shankar2016refining,
author = {Shankar, Sukrit and Robertson, Duncan and Ioannou, Yani and Criminisi, Antonio and Cipolla, Roberto},
booktitle = {Conference on Computer Vision and Pattern Recognition (CVPR), 2016},
file = {:home/yani/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Shankar et al. - 2016 - Refining Architectures of Deep Convolutional Neural Networks(2).pdf:pdf},
title = {{Refining Architectures of Deep Convolutional Neural Networks}},
year = {2016}
}
@misc{conf/cvpr/ShottonFCSFMKB11,
abstract = {We propose a new method to quickly and accurately predict 3D positions of body joints from a single depth image, using no temporal information. We take an object recognition approach, designing an intermediate body parts representation that maps the difficult pose estimation problem into a simpler per-pixel classification problem. Our large and highly varied training dataset allows the classifier to estimate body parts invariant to pose, body shape, clothing, etc. Finally we generate confidence-scored 3D proposals of several body joints by reprojecting the classification result and finding local modes. The system runs at 200 frames per second on consumer hardware. Our evaluation shows high accuracy on both synthetic and real test sets, and investigates the effect of several training parameters. We achieve state of the art accuracy in our comparison with related work and demonstrate improved generalization over exact whole-skeleton nearest neighbor matching.},
author = {Shotton, Jamie and Fitzgibbon, Andrew and Cook, Mat and Sharp, Toby and Finocchio, Mark and Moore, Richard and Kipman, Alex and Blake, Andrew},
booktitle = {Computer Vision and Pattern Recognition (CVPR)},
institution = {Microsoft Research Cambridge {\&} Xbox Incubation},
isbn = {9781457703935},
issn = {10636919},
number = {3},
pages = {1297--1304},
publisher = {IEEE},
title = {{Real-time human pose recognition in parts from single depth images}},
url = {http://ieeexplore.ieee.org/lpdocs/epic03/wrapper.htm?arnumber=5995316},
volume = {2},
year = {2011}
}
@article{Siegmann2008,
author = {Siegmann, Philip},
file = {:home/yani/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Siegmann - 2008 - Fundaments in luminance and retroreflectivity measurements of vertical traffic signs using a color digital camera.pdf:pdf},
journal = {and Measurement,},
number = {3},
pages = {607--615},
title = {{Fundaments in luminance and retroreflectivity measurements of vertical traffic signs using a color digital camera}},
url = {http://ieeexplore.ieee.org/xpls/abs{\_}all.jsp?arnumber=4407733},
volume = {57},
year = {2008}
}
@inproceedings{Simonyan2014verydeep,
abstract = {Abstract: In this work we investigate the effect of the convolutional network depth on its accuracy in the large-scale image recognition setting. Our main contribution is a thorough evaluation of networks of increasing depth using an architecture with very small (3x3)  ...},
author = {Simonyan, K and Zisserman, A},
booktitle = {eprint ar{\{}X{\}}iv:arXiv:1409.1556v5},
title = {{Very deep convolutional networks for large-scale image recognition}},
url = {http://arxiv.org/abs/1409.1556{\%}5Cnfile:///Files/c0/c0b9816f-15a7-4fc4-bd63-f3e291c13f03.pdf},
year = {1409}
}
@incollection{NIPS2016_6205,
author = {Singh, Saurabh and Hoiem, Derek and Forsyth, David},
booktitle = {Advances in Neural Information Processing Systems 29},
editor = {Lee, D D and Sugiyama, M and Luxburg, U V and Guyon, I and Garnett, R},
pages = {28--36},
publisher = {Curran Associates, Inc.},
title = {{Swapout: Learning an ensemble of deep architectures}},
url = {http://papers.nips.cc/paper/6205-swapout-learning-an-ensemble-of-deep-architectures.pdf},
year = {2016}
}
@article{journals/pami/SironiTRLF15,
abstract = {Learning filters to produce sparse image representations in terms of $\backslash$nover complete dictionaries has emerged as a powerful way to create image $\backslash$nfeatures for many different purposes. Unfortunately, these filters are usually $\backslash$nboth numerous and non-separable, making their use computationally expensive. In $\backslash$nthis paper, we show that such filters can be computed as linear combinations of $\backslash$na smaller number of separable ones, thus greatly reducing the computational $\backslash$ncomplexity at no cost in terms of performance. This makes filter learning $\backslash$napproaches practical even for large images or 3D volumes, and we show that we $\backslash$nsignificantly outperform state-of-the-art methods on the linear structure $\backslash$nextraction task, in terms of both accuracy and speed. Moreover, our approach is $\backslash$ngeneral and can be used on generic filter banks to reduce the complexity of the $\backslash$nconvolutions.},
author = {Sironi, Amos and Tekin, Bugra and Rigamonti, Roberto and Lepetit, Vincent and Fua, Pascal},
doi = {10.1109/TPAMI.2014.2343229},
isbn = {978-0-7695-4989-7},
issn = {01628828},
journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence},
keywords = {Convolutional neural networks,Convolutional sparse coding,Features extraction,Filter learning,Image denoising,Segmentation of linear structures,Separable convolution,Tensor decomposition},
number = {1},
pages = {94--106},
pmid = {26353211},
title = {{Learning separable filters}},
volume = {37},
year = {2015}
}
@inproceedings{Snoek2012,
archivePrefix = {arXiv},
arxivId = {1206.2944},
author = {Snoek, Jasper and Larochelle, Hugo and Adams, Ryan Prescott},
booktitle = {Advances in Neural Information Processing Systems},
doi = {2012arXiv1206.2944S},
eprint = {1206.2944},
isbn = {9781627480031},
issn = {10495258},
pages = {2960--2968},
pmid = {9377276},
title = {{Practical Bayesian Optimization of Machine Learning Algorithms}},
year = {2012}
}
@article{Srivastava:2014:DSW:2627435.2670313,
abstract = {Deep neural nets with a large number of parameters are very powerful machine learning systems. However, overfitting is a serious problem in such networks. Large networks are also slow to use, making it difficult to deal with overfitting by combining the predictions of many different large neural nets at test time. Dropout is a technique for addressing this problem. The key idea is to randomly drop units (along with their connections) from the neural network during training. This prevents units from co-adapting too much. During training, dropout samples from an exponential number of different “thinned” networks. At test time, it is easy to approximate the effect of averaging the predictions of all these thinned networks by simply using a single unthinned network that has smaller weights. This significantly reduces overfitting and gives major improvements over other regularization methods. We show that dropout improves the performance of neural networks on supervised learning tasks in vision, speech recognition, document classification and computational biology, obtaining state-of-the-art results on many benchmark data sets},
archivePrefix = {arXiv},
arxivId = {1102.4807},
author = {Srivastava, Nitish and Hinton, Geoffrey E. and Krizhevsky, Alex and Sutskever, Ilya and Salakhutdinov, Ruslan},
doi = {10.1214/12-AOS1000},
eprint = {1102.4807},
isbn = {1532-4435},
issn = {15337928},
journal = {Journal of Machine Learning Research (JMLR)},
keywords = {deep learning,model combination,neural networks,regularization},
month = {jan},
number = {1},
pages = {1929--1958},
publisher = {JMLR.org},
title = {{Dropout : A Simple Way to Prevent Neural Networks from Overfitting}},
volume = {15},
year = {2014}
}
@inproceedings{Sutskever2013momentum,
author = {Sutskever, Ilya and Martens, James and Dahl, George E and Hinton, Geoffrey E},
booktitle = {Proceedings of the 30th International Conference on Machine Learning, {\{}ICML{\}} 2013, Atlanta, GA, USA, 16-21 June 2013},
pages = {1139--1147},
publisher = {JMLR.org},
series = {{\{}JMLR{\}} Workshop and Conference Proceedings},
title = {{On the importance of initialization and momentum in deep learning}},
url = {http://jmlr.org/proceedings/papers/v28/sutskever13.html},
volume = {28},
year = {2013}
}
@inproceedings{Szegedy2014going,
author = {Szegedy, Christian and Liu, Wei and Jia, Yangqing and Sermanet, Pierre and Reed, Scott and Anguelov, Dragomir and Erhan, Dumitru and Vanhoucke, Vincent and Rabinovich, Andrew},
booktitle = {Computer Vision and Pattern Recognition (CVPR)},
title = {{Going Deeper with Convolutions}},
url = {http://arxiv.org/abs/1409.4842},
year = {2015}
}
@proceedings{szegedy2015rethinking,
abstract = {Convolutional networks are at the core of most state-of-the-art computer vision solutions for a wide variety of tasks. Since 2014 very deep convolutional networks started to become mainstream, yielding substantial gains in various benchmarks. Although increased model size and computational cost tend to translate to immediate quality gains for most tasks (as long as enough labeled data is provided for training), computational efficiency and low parameter count are still enabling factors for various use cases such as mobile vision and big-data scenarios. Here we explore ways to scale up networks in ways that aim at utilizing the added computation as efficiently as possible by suitably factorized convolutions and aggressive regularization. We benchmark our methods on the ILSVRC 2012 classification challenge validation set demonstrate substantial gains over the state of the art: 21.2{\{}{\%}{\}} top-1 and 5.6{\{}{\%}{\}} top-5 error for single frame evaluation using a network with a computational cost of 5 billion multiply-adds per inference and with using less than 25 million parameters. With an ensemble of 4 models and multi-crop evaluation, we report 3.5{\{}{\%}{\}} top-5 error and 17.3{\{}{\%}{\}} top-1 error.},
archivePrefix = {arXiv},
arxivId = {1512.00567},
author = {Szegedy, Christian and Vanhoucke, Vincent and Ioffe, Sergey and Shlens, Jonathon and Wojna, Zbigniew},
booktitle = {arXiv preprint},
doi = {10.1002/2014GB005021},
eprint = {1512.00567},
isbn = {9781617796029},
issn = {08866236},
pmid = {8190083},
title = {{Rethinking the Inception Architecture for Computer Vision}},
url = {http://arxiv.org/abs/1512.00567},
volume = {The IEEE C},
year = {2016}
}
@inproceedings{Deselaers2011visual,
author = {{T. Deselaers}, V Ferrari},
booktitle = {Proceedings of the 2011 IEEE Conference on Computer Vision and Pattern Recognition},
title = {{Visual and Semantic Similarity in {\{}I{\}}mage{\{}N{\}}et}},
year = {2011}
}
@article{Terzopoulos1988,
author = {Terzopoulos, D and Witkin, A and Kass, M},
file = {:home/yani/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Terzopoulos, Witkin, Kass - 1988 - Constraints on deformable models Recovering 3D shape and nonrigid motion(2).pdf:pdf},
journal = {Artificial intelligence},
number = {1988},
pages = {91--123},
title = {{Constraints on deformable models: Recovering 3D shape and nonrigid motion}},
url = {http://www.sciencedirect.com/science/article/pii/000437028890080X},
volume = {36},
year = {1988}
}
@misc{TheMendeleySupportTeam2011c,
abstract = {A quick introduction to Mendeley. Learn how Mendeley creates your personal digital library, how to organize and annotate documents, how to collaborate and share with colleagues, and how to generate citations and bibliographies.},
address = {London},
author = {{The Mendeley Support Team}},
booktitle = {Mendeley Desktop},
file = {:home/yani/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/The Mendeley Support Team - 2011 - Getting Started with Mendeley.pdf:pdf},
keywords = {Mendeley,how-to,user manual},
pages = {1--16},
publisher = {Mendeley Ltd.},
title = {{Getting Started with Mendeley}},
url = {http://www.mendeley.com},
year = {2011}
}
@inproceedings{Tompson_2015_CVPR,
author = {Tompson, Jonathan and Goroshin, Ross and Jain, Arjun and LeCun, Yann and Bregler, Christoph},
booktitle = {The IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
month = {jun},
title = {{Efficient Object Localization Using Convolutional Networks}},
year = {2015}
}
@book{Trucco1998,
author = {Trucco, Emanuele and Verri, Alessandro},
isbn = {0132611082},
publisher = {Prentice Hall},
title = {{Introductory techniques for 3-D computer vision}},
year = {1998}
}
@inproceedings{Tsin2001,
author = {Tsin, Y. and Ramesh, V. and Kanade, T.},
booktitle = {Proceedings Eighth IEEE International Conference on Computer Vision. ICCV 2001},
doi = {10.1109/ICCV.2001.937555},
file = {:home/yani/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Tsin, Ramesh, Kanade - 2001 - Statistical calibration of CCD imaging process.pdf:pdf},
isbn = {0-7695-1143-0},
pages = {480--487},
publisher = {IEEE Comput. Soc},
title = {{Statistical calibration of CCD imaging process}},
url = {http://ieeexplore.ieee.org/lpdocs/epic03/wrapper.htm?arnumber=937555},
volume = {1},
year = {2001}
}
@phdthesis{Unnikrishnan2008a,
author = {Unnikrishnan, Ranjith},
booktitle = {wwwoldricmuedu},
number = {May},
pages = {1----146},
publisher = {CARNEGIE MELLON UNIVERSITY},
school = {Carnegie Mellon University},
title = {{Statistical Approaches to Multi-scale Point Cloud Processing}},
url = {http://www-old.ri.cmu.edu/pubs/pub{\_}6112{\_}text.html},
year = {2008}
}
@article{Unnikrishnan2008,
author = {Unnikrishnan, Ranjith and Hebert, Martial},
doi = {10.1109/CVPRW.2008.4563030},
file = {:home/yani/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Unnikrishnan, Hebert - 2008 - Multi-scale interest regions from unorganized point clouds.pdf:pdf},
isbn = {978-1-4244-2339-2},
journal = {2008 IEEE Computer Society Conference on Computer Vision and Pattern Recognition Workshops},
month = {jun},
pages = {1--8},
publisher = {Ieee},
title = {{Multi-scale interest regions from unorganized point clouds}},
url = {http://ieeexplore.ieee.org/lpdocs/epic03/wrapper.htm?arnumber=4563030},
year = {2008}
}
@inproceedings{vanhoucke2011improving,
abstract = {Recent advances in deep learning have made the use of large, deep neural net- works with tens of millions of parameters suitable for a number of applications that require real-time processing. The sheer size of these networks can represent a challenging computational burden, even for modern CPUs. For this reason, GPUs are routinely used instead to train and run such networks. This paper is a tutorial for students and researchers on some of the techniques that can be used to reduce this computational cost considerably on modern x86 CPUs. We emphasize data layout, batching of the computation, the use of SSE2 instructions, and particularly leverage SSSE3 and SSE4 fixed-point instructions which provide a 3× improve- ment over an optimized floating-point baseline. We use speech recognition as an example task, and show that a real-time hybrid hidden Markov model / neural network (HMM/NN) large vocabulary system can be built with a 10× speedup over an unoptimized baseline and a 4× speedup over an aggressively optimized floating-point baseline at no cost in accuracy. The techniques described extend readily to neural network training and provide an effective alternative to the use of specialized hardware.},
author = {Vanhoucke, Vincent and Senior, Andrew and Mao, Mz},
booktitle = {Proc. Deep Learning and {\ldots}},
issn = {9781450329569},
pages = {1--8},
title = {{Improving the speed of neural networks on CPUs}},
url = {http://research.google.com/pubs/archive/37631.pdf},
volume = {1},
year = {2011}
}
@inproceedings{wager2013dropout,
author = {Wager, S and Wang, S I and Liang, P},
booktitle = {Advances in Neural Information Processing Systems (NIPS)},
file = {:home/yani/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Wager, Wang, Liang - 2013 - Dropout Training as Adaptive Regularization.pdf:pdf},
title = {{Dropout Training as Adaptive Regularization}},
year = {2013}
}
@inproceedings{icml2013_wan13,
abstract = {We introduce DropConnect, a generalization of DropOut, for regularizing large fully-connected layers within neural networks. When training with Dropout, a randomly selected subset of activations are set to zero within each layer. DropConnect instead sets a randomly selected subset of weights within the network to zero. Each unit thus receives input from a random subset of units in the previous layer. We derive a bound on the generalization performance of both Dropout and DropConnect. We then evaluate DropConnect on a range of datasets, comparing to Dropout, and show state-of-the-art results on several image recoginition benchmarks can be obtained by aggregating multiple DropConnect-trained models.},
author = {Wan, Li and Zeiler, Matthew and Zhang, Sixin and Cun, Yann L and Fergus, Rob},
booktitle = {Proceedings of the 30th International Conference on Machine Learning (ICML-13)},
editor = {Dasgupta, Sanjoy and Mcallester, David},
month = {may},
number = {3},
pages = {1058--1066},
publisher = {JMLR Workshop and Conference Proceedings},
title = {{Regularization of Neural Networks using DropConnect}},
url = {http://jmlr.org/proceedings/papers/v28/wan13.pdf},
volume = {28},
year = {2013}
}
@inproceedings{Welbl2014casting,
author = {Welbl, J},
booktitle = {GCPR},
title = {{Casting Random Forests as Artificial Neural Networks (and Profiting from It)}},
year = {2014}
}
@inproceedings{Wu2015scalingup,
abstract = {We present a state-of-the-art image recognition system, Deep Image, developed using end-to-end deep learning. The key components are a custom-built supercomputer dedicated to deep learning, a highly optimized parallel algorithm using new strategies for data partitioning and communication, larger deep neural network models, novel data augmentation approaches, and usage of multi-scale high-resolution images. On one of the most challenging computer vision benchmarks, the ImageNet classification challenge, our system has achieved the best result to date, with a top-5 error rate of 5.33{\%}, a relative 20.0{\%} improvement over the previous best result.},
archivePrefix = {arXiv},
arxivId = {1501.02876},
author = {Wu, Ren and Yan, Shengen and Shan, Yi and Dang, Qingqing and Sun, Gang},
booktitle = {Arxiv},
doi = {10.1007/3-540-60220-8},
eprint = {1501.02876},
isbn = {9781931971164 1931971161},
issn = {0031921X},
pages = {12},
pmid = {903},
title = {{Deep Image: Scaling up Image Recognition}},
url = {http://arxiv.org/abs/1501.02876},
year = {2015}
}
@article{2016arXiv161105431X,
archivePrefix = {arXiv},
arxivId = {cs.CV/1611.05431},
author = {Xie, S and Girshick, R and Doll{\'{a}}r, P and Tu, Z and He, K},
eprint = {1611.05431},
journal = {ArXiv e-prints},
keywords = {Computer Science - Computer Vision and Pattern Rec},
month = {nov},
primaryClass = {cs.CV},
title = {{Aggregated Residual Transformations for Deep Neural Networks}},
year = {2016}
}
@inproceedings{xu2014deep,
author = {Xu, Li and Ren, Jimmy S and Liu, Ce and Jia, Jiaya},
booktitle = {Advances in Neural Information Processing Systems},
pages = {1790--1798},
title = {{Deep convolutional neural network for image deconvolution}},
year = {2014}
}
@article{Yang2012,
author = {Yang, Bo and Nevatia, Ram},
journal = {Learning},
title = {{An Online Learned CRF Model for Multi-Target Tracking}},
year = {2012}
}
@inproceedings{yi2016lift,
address = {Amsterdarm},
author = {Yi, Kwang Moo and Trulls, Eduard and Lepetit, Vincent and Fua, Pascal},
booktitle = {European Conference on Computer Vision (ECCV)},
title = {{LIFT: Learned Invariant Feature Transform}},
year = {2016}
}
@book{yu2014book,
author = {Yu, Dong and Deng, Li},
booktitle = {Book},
doi = {10.1007/978-1-4471-5779-3},
isbn = {9781447157786},
issn = {1860-4862},
publisher = {Springer},
title = {{Automatic Speech Recognition: A Deep Learning Approach}},
year = {2015}
}
@inproceedings{Zeiler2010Deconv,
author = {Zeiler, M D and Krishnan, D and Taylor, G W and R.Fergus},
booktitle = {Proceedings of the 2010 IEEE Conference on Computer Vision and Pattern Recognition},
title = {{Deconvolutional Networks}},
year = {2010}
}
@misc{1311.2901v3,
abstract = {Large Convolutional Network models have recently demonstrated impressive classification performance on the ImageNet benchmark Krizhevsky et al. [18]. However there is no clear understanding of why they perform so well, or how they might be improved. In this paper we explore both issues. We introduce a novel visualization technique that gives insight into the function of intermediate feature layers and the operation of the classifier. Used in a diagnostic role, these visualizations allow us to find model architectures that outperform Krizhevsky et al on the ImageNet classification benchmark. We also perform an ablation study to discover the performance contribution from different model layers. We show our ImageNet model generalizes well to other datasets: when the softmax classifier is retrained, it convincingly beats the current state-of-the-art results on Caltech-101 and Caltech-256 datasets.},
annote = {published = 2013-11-12T20:02:22Z, updated = 2013-11-28T23:04:01Z},
archivePrefix = {arXiv},
arxivId = {1311.2901},
author = {Zeiler, Matthew D. and Fergus, Rob},
booktitle = {Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)},
doi = {10.1007/978-3-319-10590-1_53},
eprint = {1311.2901},
isbn = {9783319105895},
issn = {16113349},
month = {nov},
number = {PART 1},
pages = {818--833},
pmid = {26353135},
title = {{Visualizing and understanding convolutional networks}},
volume = {8689 LNCS},
year = {2014}
}
@article{rethinking2016,
abstract = {Despite their massive size, successful deep artificial neural networks can exhibit a remarkably small difference between training and test performance. Conventional wisdom attributes small generalization error either to properties of the model family, or to the regularization techniques used during training. Through extensive systematic experiments, we show how these traditional approaches fail to explain why large neural networks generalize well in practice. Specifically, our experiments establish that state-of-the-art convolutional networks for image classification trained with stochastic gradient methods easily fit a random labeling of the training data. This phenomenon is qualitatively unaffected by explicit regularization, and occurs even if we replace the true images by completely unstructured random noise. We corroborate these experimental findings with a theoretical construction showing that simple depth two neural networks already have perfect finite sample expressivity as soon as the number of parameters exceeds the number of data points as it usually does in practice. We interpret our experimental findings by comparison with traditional models.},
archivePrefix = {arXiv},
arxivId = {1611.03530},
author = {Zhang, Chiyuan and Bengio, Samy and Hardt, Moritz and Recht, Benjamin and Vinyals, Oriol},
eprint = {1611.03530},
journal = {arXiv},
title = {{Understanding deep learning requires rethinking generalization}},
url = {http://arxiv.org/abs/1611.03530},
year = {2016}
}
@article{Zhang1999,
abstract = {Since the first shape-from-shading (SFS) technique was developed by Horn in the early 1970s, many different approaches have emerged. In this paper, six well-known SFS algorithms are implemented and compared. The performance of the algorithms was analyzed on synthetic images using mean and standard deviation of depth (Z) error, mean of surface gradient (p, q) error, and CPU timing. Each algorithm works well for certain images, but performs poorly for others. In general, minimization approaches are more robust, while the other approaches are faster},
author = {Zhang, Ruo and Tsai, Ping-sing and Cryer, James Edwin and Shah, Mubarak},
doi = {10.1109/34.784284},
institution = {University of Central Florida},
issn = {01628828},
journal = {Pattern Analysis and Machine Intelligence, IEEE Transactions on},
keywords = {analysis algorithms; cda 9222798; lambertian model},
number = {8},
pages = {1--41},
publisher = {IEEE},
title = {{Shape from Shading : A Survey}},
url = {http://ieeexplore.ieee.org/xpls/abs{\%}7B{\_}{\%}7Dall.jsp?arnumber=784284},
volume = {21},
year = {1999}
}
@article{Zhang1999,
abstract = {Since the first shape-from-shading (SFS) technique was developed by Horn in the early 1970s, many different approaches have emerged. In this paper, six well-known SFS algorithms are implemented and compared. The performance of the algorithms was analyzed on synthetic images using mean and standard deviation of depth (Z) error, mean of surface gradient (p, q) error, and CPU timing. Each algorithm works well for certain images, but performs poorly for others. In general, minimization approaches are more robust, while the other approaches are faster},
author = {Zhang, Ruo and Tsai, Ping-sing and Cryer, James Edwin and Shah, Mubarak},
doi = {10.1109/34.784284},
file = {:home/yani/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Zhang et al. - 1999 - Shape from Shading A Survey.pdf:pdf},
institution = {University of Central Florida},
issn = {01628828},
journal = {Pattern Analysis and Machine Intelligence, IEEE Transactions on},
keywords = {analysis algorithms; cda 9222798; lambertian model},
number = {8},
pages = {1--41},
publisher = {IEEE},
title = {{Shape from Shading : A Survey}},
url = {http://ieeexplore.ieee.org/xpls/abs{\%}7B{\_}{\%}7Dall.jsp?arnumber=784284},
volume = {21},
year = {1999}
}
@misc{1505.06798v1,
abstract = {This paper aims to accelerate the test-time computation of convolutional neural networks (CNNs), especially very deep CNNs that have substantially impacted the computer vision community. Unlike existing methods that are designed for approximating linear filters or linear responses, our method takes the nonlinear units into account. We develop an effective solution to the resulting nonlinear optimization problem without the need of stochastic gradient descent (SGD). More importantly, while current methods mainly focus on optimizing one or two layers, our nonlinear method enables an asymmetric reconstruction that reduces the rapidly accumulated error when multiple (e.g., {\textgreater}=10) layers are approximated. For the widely used very deep VGG-16 model, our method achieves a whole-model speedup of 4x with merely a 0.3{\%} increase of top-5 error in ImageNet classification. Our 4x accelerated VGG-16 model also shows a graceful accuracy degradation for object detection when plugged into the latest Fast R-CNN detector.},
annote = {published = 2015-05-26T03:30:59Z, updated = 2015-05-26T03:30:59Z, Technical report. arXiv admin note: substantial text overlap with arXiv:1411.4229},
archivePrefix = {arXiv},
arxivId = {1505.06798},
author = {Zhang, Xiangyu and Zou, Jianhua and He, Kaiming and Sun, Jian},
booktitle = {Pattern Analysis and Machine Intelligence, IEEE Transactions on},
doi = {10.1109/TPAMI.2015.2502579},
eprint = {1505.06798},
issn = {0162-8828},
month = {may},
pages = {1--14},
pmid = {26599615},
title = {{Accelerating Very Deep Convolutional Networks for Classification and Detection}},
url = {http://arxiv.org/abs/1505.06798},
year = {2015}
}
@inproceedings{Zhang2015efficient,
abstract = {This paper aims to accelerate the test-time computation of deep convolutional neural networks (CNNs). Unlike existing methods that are designed for approximating linear filters or linear responses, our method takes the nonlinear units into account. We minimize the reconstruction error of the nonlinear responses, subject to a low-rank constraint which helps to reduce the complexity of filters. We develop an effective solution to this constrained nonlinear optimization problem. An algorithm is also presented for reducing the accumulated error when multiple layers are approximated. A whole-model speedup ratio of 4x is demonstrated on a large network trained for ImageNet, while the top-5 error rate is only increased by 0.9{\%}. Our accelerated model has a comparably fast speed as the "AlexNet", but is 4.7{\%} more accurate.},
archivePrefix = {arXiv},
arxivId = {1411.4229},
author = {Zhang, Xiangyu and Zou, Jianhua and Ming, Xiang and He, Kaiming and Sun, Jian},
booktitle = {eprint ar{\{}X{\}}iv:1411.4229v1},
eprint = {1411.4229},
isbn = {9781467369640},
title = {{Efficient and Accurate Approximations of Nonlinear Convolutional Networks}},
url = {http://arxiv.org/abs/1411.4229},
year = {2014}
}
@inproceedings{zhou2014learning,
abstract = {Scene recognition is one of the hallmark tasks of computer vision, allowing definition of a context for object recognition. Whereas the tremendous recent progress in object recognition tasks is due to the availability of large datasets like ImageNet and the rise of Convolutional Neural Networks (CNNs) for learning high-level features, performance at scene recognition has not attained the same level of success. This may be because current deep features trained from ImageNet are not competitive enough for such tasks. Here, we introduce a new scene-centric database called Places with over 7 million labeled pictures of scenes. We propose new methods to compare the density and diversity of image datasets and show that Places is as dense as other scene datasets and has more diversity. Using CNN, we learn deep features for scene recognition tasks, and establish new state-of-the-art results on several scene-centric datasets. A visualization of the CNN layers' responses allows us to show differences in the internal representations of object-centric and scene-centric networks.},
author = {Zhou, Bolei and Lapedriza, Agata and Xiao, Jianxiong and Torralba, Antonio and Oliva, Aude},
booktitle = {Advances in Neural Information Processing Systems 27},
issn = {10495258},
pages = {487--495},
title = {{Learning Deep Features for Scene Recognition using Places Database}},
url = {http://papers.nips.cc/paper/5349-learning-deep-features-for-scene-recognition-using-places-database.pdf},
year = {2014}
}
@article{Zickler,
author = {Zickler, T and Mallick, S P},
doi = {10.1109/CVPR.2006.77},
isbn = {0-7695-2597-0},
journal = {International Journal of {\{}{\ldots}{\}}},
pages = {2000--2010},
publisher = {Ieee},
title = {{Color subspaces as photometric invariants}},
url = {http://ieeexplore.ieee.org/lpdocs/epic03/wrapper.htm?arnumber=1640998 http://www.springerlink.com/index/383064l520t33622.pdf},
volume = {2},
year = {2008}
}
@inproceedings{segmentation-of-brain-tumor-tissues-with-convolutional-neural-networks,
author = {Zikic, Darko and Ioannou, Yani and Brown, Matthew and Criminisi, Antonio},
booktitle = {MICCAI workshop on Multimodal Brain Tumor Segmentation Challenge (BRATS)},
month = {oct},
publisher = {Springer},
title = {{Segmentation of Brain Tumor Tissues with Convolutional Neural Networks}},
year = {2014}
}
@inproceedings{conf/cvpr/2013,
publisher = {IEEE},
title = {{2013 {\{}IEEE{\}} Conference on Computer Vision and Pattern Recognition, Portland, OR, USA, June 23-28, 2013}},
year = {2013}
}
@inproceedings{conf/iccv/2015,
isbn = {978-1-4673-8391-2},
publisher = {{\{}IEEE{\}} Computer Society},
title = {{2015 {\{}IEEE{\}} International Conference on Computer Vision, {\{}ICCV{\}} 2015, Santiago, Chile, December 7-13, 2015}},
url = {http://ieeexplore.ieee.org/xpl/mostRecentIssue.jsp?punumber=7407725},
year = {2015}
}
@inproceedings{conf/icml/2013,
publisher = {JMLR.org},
series = {{\{}JMLR{\}} Workshop and Conference Proceedings},
title = {{Proceedings of the 30th International Conference on Machine Learning, {\{}ICML{\}} 2013, Atlanta, GA, USA, 16-21 June 2013}},
url = {http://jmlr.org/proceedings/papers/v28/},
volume = {28},
year = {2013}
}