glossary.tex

% From here: https://en.m.wikibooks.org/wiki/LaTeX/Glossary
% \DeclareDocumentCommand{\newdualentry}{ O{} O{} m m m m } {
%   \newglossaryentry{gls-#3}{name={#5},text={#5\glsadd{#3}},
%     description={#6},#1
%   }
%   \newacronym[see={[Glossary:]{gls-#3}},#2]{#3}{#4}{#5\glsadd{gls-#3}}
% }
\usepackage{xparse}
\DeclareDocumentCommand{\newdualentry}{ O{} O{} m m m m } {
  \newglossaryentry{gls-#3}{name={#4},text={#4},
    description={\Glsfmtfirst{gls-#3}, {#6}},#1
  }
  \makenoidxglossaries{}
  \newacronym[see={[see Glossary:]{gls-#3}},#2]{#3}{#4}{#5}
}
% set glossary acronyms to expand on first use
\setabbreviationstyle{long-short}

%https://tex.stackexchange.com/questions/127648/how-can-i-add-every-glossary-entry-to-the-index#127671
%\defglsdisplayfirst[\acronymtype]{#1#4\index{#1}}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% GLOSSARY
\newdualentry{cnn}{CNN}{Convolutional Neural Network}{a neural network designed for learning representations of image inputs, with shared parameters in the form of a set of convolutional filters}
\newdualentry{dnn}{DNN}{Deep Neural Network}{a neural network with two-or-more hidden layers}
\newdualentry{rnn}{RNN}{Recurrent Neural Network}{a neural network designed for sequences, with shared parameters in the form of a recurrence}
\newdualentry[see={cifar10}]{cifar}{CIFAR}{Canadian Institute for Advanced Research}{government agency behind the funding of several prominent researchers in Canada, notably the lab of Geoffery Hinton who released two popular datasets, \glsfmttext{cifar10} and CIFAR-100}
\newdualentry{mnist}{MNIST}{Modified National Institute of Standards and Technology}{dataset of handwritten numerical digits commonly used as a 60,000 image training/10,000 image testing dataset for machine learning algorithms}
\newdualentry{mlp}{MLP}{Multi-layer Perceptron}{An established misnomyer for a neural network with one or more hidden layers, not related to a Perceptron}
\newdualentry{vgg}{VGG}{Visual Geometry Group}{a research group at the University of Oxford from which the popular VGG network architecture was proposed by \citet{Simonyan2014verydeep}}
\newdualentry[see={gap,lde}]{nin}{NiN}{Network in Network}{a neural network architecture proposed by \citet{Lin2013NiN} which introduced \glsfmttext{gap} and \glsfmttext{lde}}
\newdualentry[see={api}]{blas}{BLAS}{Basic Linear Algebra Subprograms}{a common \glsfmttext{api} for accelerating linear algebra operations, notably matrix multiplication, on hardware. Typically a heavily optimized version is provided by the hardware company.}
\newdualentry[see={blas,cuda}]{cublas}{CuBLAS}{CUDA BLAS}{Nvidia's implementation of \glsfmttext{blas} for the \glsfmttext{cuda}}
\newdualentry[see={api,gpu}]{cuda}{CUDA}{Compute Unified Device Architecture}{Nvidia's \glsfmttext{gpu} programming \glsfmttext{api}}
\newdualentry[see={blas,cpu}]{mkl}{MKL}{Intel's Matrix Kernel Library}{\glsfmttext{blas} implementation for Intel \glsfmttext{cpu}}

\newglossaryentry{cifar10}{name=CIFAR-10, description={An image recognition dataset funded by \glsfmttext{cifar} and created by \citet{CIFAR10} consisting of 60,000 32$\times$32 colour images of 10 classes of objects}, see={cifar}}
%\newglossaryentry{cifar100}{name=CIFAR-100, description={An image recognition dataset created by \citet{CIFAR10} consisting of 60,000 32$\times$32 colour images of 100 classes of objects}}
\newglossaryentry{alexnet}{name=AlexNet, description={A neural network architecture proposed by \citet{Krizhevsky2012} that revolutionized computer vision, and renewed interest in neural networks}}
\newglossaryentry{googlenet}{name=GoogLeNet, description={A neural network architecture proposed by \citet{Szegedy2014going} and since extended in the \glsfmttext{inception} v1--4 refinements},see={inception}}
\newglossaryentry{inception}{name=Inception, description={A building-block of the \glsfmttext{googlenet} neural network architecture designed for efficient state-of-the-art image recognition}, see={googlenet}}
\newglossaryentry{resnet}{name=ResNet, description={Residual network, a network architecture proposed by \citet{He2015} that uses residual connections to improve generalization and training of very deep architectures.}}
\newglossaryentry{structuralprior}{name=Structural Prior, description={The encoding of prior knowledge into a neural network by architecture design, \eg{} a \glsfmttext{cnn}, what some might call ``infinitely strong regularization''~\citep{goodfellow2016deep}}, see={cnn}}
\newglossaryentry{cudnn}{name=CuDNN, description={Nvidia's Deep Neural Network acceleration library}, see={cuda}}
\newglossaryentry{occam}{name={Occam's razor}, description={A general principle in hypothesis selection; given several hypothesis that match the evidence, the simplest hypothesis, \ie the one with the least assumptions, should be selected}}
\newglossaryentry{finetuned}{name={finetuned}, description={A method of continuing the training of a pre-trained network, with varied definitions. Typically a subset or all of the layers of a pre-trained \glsfmttext{dnn} are trained at a greatly reduced learning rate}}
\newglossaryentry{featuremap}{name={feature map}, description={The input/output of a convolutional layer, a 3D tensor with two spatial dimensions and a third dimension corresponding to the output image from a single convolutional filter}}
\newglossaryentry{filter}{name={filter}, description={A convolutional filter, or kernel, of spatial dimensions \(w\)\(\times\)\(h\), and depth $c$, where $c$ is the number of channels in the input \Glsfmtlong{featuremap}},see={featuremap}}
\newglossaryentry{regularization}{name={regularization}, description={a broadly-used, but relatively ill-defined term --- often its usage implies a definition of `anything that improves generalization', instead we define regularization as any modification of the training algorithm that modifies, explicitly or implicitly, the error surface such that it is smoother, the prototypical method being weight decay~\citep{hinton1987learning}}}
\newglossaryentry{padding}{name={padding}, description={padding of the input \glsfmttext{featuremap}/image for convolution, pads the outer edge of the image with (typically zero) dummy values to allow the convolutional filter to be applied to every input pixel},see={featuremap}}
\newglossaryentry{stride}{name={stride}, description={number of rows/columns of the input \glsfmttext{featuremap}/image to skip during convolution},see={featuremap}}
\newglossaryentry{implicitrouting}{name={implicit routing}, description={in a conditional network, routing caused by the network's structure}}
\newglossaryentry{explicitrouting}{name={explicit routing}, description={in a conditional network, routing caused by a routing node}}
\newglossaryentry{objectinstancerecognition}{name={object instance recognition}, description={the problem of recognizing a specific instance of an object class, \eg recognizing a specific car model}}
\newglossaryentry{objectclassrecognition}{name={object class recognition}, description={the problem of recognizing a general object class, \eg recognizing a car \vs bicycle}}
\newglossaryentry{compositelayer}{name={composite layer}, description={a \glsfmttext{dnn} layer effectively composed of several potentially heterogeneous layers, \eg the \glsfmttext{inception} module}, see={inception}}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% ACRONYMS
\newacronym{relu}{ReLU}{Rectified Linear Unit}
\newacronym{pca}{PCA}{Principled Component Analysis}
\newacronym{gpu}{GPU}{Graphical Processing Unit}
\newacronym{cpu}{CPU}{Computer Processing Unit}
\newacronym{rgb}{RGB}{Red-Green-Blue}
\newacronym{ilsvrc}{ILSVRC}{Imagenet Large-Scale Visual Recognition Challenge}
\newacronym{sgd}{SGD}{Stochastic Gradient Descent}
\newacronym{msr}{MSR}{Microsoft Research}
\newacronym{cvpr}{CVPR}{Computer Vision and Pattern Recognition}
\newacronym{iclr}{ICLR}{International Conference on Learning Representations}
\newacronym{lde}{LDE}{Low-Dimensional Embeddings}
\newacronym{gap}{GAP}{Global Average Pooling}
\newacronym{gmp}{GMP}{Global Max Pooling}
\newacronym{vc}{VC}{Vapnik-Chervonenkis}
\newacronym{nfl}{NFL}{No Free Lunch theorem}
\newacronym{zca}{ZCA}{Zero Component Analysis}
\newacronym{ma}{MA}{Multiply-Accumulate}
\newacronym{flops}{FLOPS}{Floating Point Operations}
\newacronym{api}{API}{Application Programming Interface}
\newacronym{doi}{DOI}{Digital Object Identifier}
\newacronym{sift}{SIFT}{Scale-Invariant Feature Transform}
\newacronym{ltu}{LTU}{Linear Threshold Unit}
\newacronym{obd}{OBD}{Optimal Brain Damage}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% SYMBOLS
\glsxtrnewsymbol[description={input of a neuron}, sort={x}]{x}{\ensuremath{x}}
\glsxtrnewsymbol[description={output of a neuron}, sort={y}]{y}{\ensuremath{y}}
\glsxtrnewsymbol[description={bias of a neuron}, sort={b}]{b}{\ensuremath{b}}
\glsxtrnewsymbol[description={weight of a neuron}, sort={w}]{w}{\ensuremath{w}}
\glsxtrnewsymbol[description={net activation of a neuron}, sort={a}]{a}{\ensuremath{a}}
\glsxtrnewsymbol[description={error for an output neuron with a given sample}, sort={e}]{e}{\ensuremath{e}}
\glsxtrnewsymbol[description={target label for an output neuron with a given sample}, sort={t}]{t}{\ensuremath{t}}
\glsxtrnewsymbol[description={error function}, sort={E}]{E}{\ensuremath{E}}
\glsxtrnewsymbol[description={surogate loss function}, sort={L}]{L}{\ensuremath{\mathcal{L}}}
\glsxtrnewsymbol[description={learning rate hyper-parameter}, sort={gamma}]{lr}{\ensuremath{\gamma}}
\glsxtrnewsymbol[description={weight decay hyper-parameter}, sort={lambda}]{weightdecay}{\ensuremath{\lambda}}
\glsxtrnewsymbol[description={training set}, sort={X}]{X}{\ensuremath{X}}
\glsxtrnewsymbol[description={activation function of a neuron}, sort={f}]{f}{\ensuremath{f}}
\glsxtrnewsymbol[description={vector of inputs to a neuron, \ensuremath{\mathbf{x} = \left\{x_0, x_1, \ldots\right\}}}, sort={x}]{vectorx}{\ensuremath{\mathbf{x}}}
\glsxtrnewsymbol[description={vector of weights for a neuron, \ensuremath{\mathbf{w} = \left\{w_0, w_1, \ldots\right\}}}, sort={w}]{vectorw}{\ensuremath{\mathbf{w}}}
\glsxtrnewsymbol[description={vector of outputs for a single pixel in an output featuremap}, sort={y}]{vectory}{\ensuremath{\mathbf{y}}}
\glsxtrnewsymbol[description={vector of biases for a convolutional layer}, sort={b}]{vectorb}{\ensuremath{\mathbf{b}}}
\glsxtrnewsymbol[description={`local gradient'/`error'/delta}, sort={delta}]{delta}{\ensuremath{\delta}}
\glsxtrnewsymbol[description={velocity vector, used in momentum}, sort={v}]{velocity}{\ensuremath{\mathbf{v}}}
\glsxtrnewsymbol[description={training iteration}, sort={t}]{iteration}{\ensuremath{t}}
\glsxtrnewsymbol[description={height of a convolutional \glsfmttext{featuremap}}, sort={H}]{H}{\ensuremath{H}}
\glsxtrnewsymbol[description={width of a convolutional \glsfmttext{featuremap}}, sort={W}]{W}{\ensuremath{W}}
\glsxtrnewsymbol[description={height of a convolutional \glsfmttext{filter}}, sort={h}]{filterh}{\ensuremath{h}}
\glsxtrnewsymbol[description={width of a convolutional \glsfmttext{filter}}, sort={w}]{filterw}{\ensuremath{w}}
\glsxtrnewsymbol[description={\# (input) channels of a convolutional \glsfmttext{filter}/\glsfmttext{featuremap}}, sort={c}]{c}{\ensuremath{c}}
\glsxtrnewsymbol[description={\# output channels of a convolutional \glsfmttext{filter}/\glsfmttext{featuremap}}, sort={d}]{d}{\ensuremath{d}}
\glsxtrnewsymbol[description={response/gradient per-layer scaling factor}, sort={beta}]{beta}{\ensuremath{\beta}}
\glsxtrnewsymbol[description={expected value}, sort={E}]{expected}{\ensuremath{\mathrm{E}}}
\glsxtrnewsymbol[description={standard deviation}, sort={stddev}]{stddev}{\ensuremath{\sigma}}
\glsxtrnewsymbol[description={mean}, sort={mu}]{mean}{\ensuremath{\mu}}
\glsxtrnewsymbol[description={Hessian matrix of second-order derivatives}, sort={Hessian}]{hessian}{\ensuremath{\mathbf{H}}}
\glsxtrnewsymbol[description={convolutional layer weight matrix}, sort={W}]{wmatrix}{\ensuremath{\mathbf{W}}}
\glsxtrnewsymbol[description={worst case computational complexity}, sort={O}]{bigoh}{\ensuremath{O}}
\glsxtrnewsymbol[description={incoming convolutional \glsfmttext{featuremap} tensor}, sort={Xfm}]{fmX}{\ensuremath{\mathbf{X}}}
\glsxtrnewsymbol[description={outgoing convolutional \glsfmttext{featuremap} tensor}, sort={Xfm}]{fmY}{\ensuremath{\mathbf{Y}}}
\glsxtrnewsymbol[description={convolutional filter/kernel tensor}, sort={F}]{fmK}{\ensuremath{\mathbf{F}}}
\glsxtrnewsymbol[description={group index}, sort={g}]{g}{\ensuremath{g}}


\glsxtrnewsymbol[description={covariance}, sort={c}, category={functions}]{covar}{\ensuremath{\operatorname{covar}}}
\glsxtrnewsymbol[description={convolution operator}, sort={convolution}, category={functions}]{convolution}{\ensuremath{\mathop{\convolution}}}
\glsxtrnewsymbol[description={function composition operator}, sort={composition}, category={functions}]{composition}{\ensuremath{\mathop{\circ}}}
\glsxtrnewsymbol[description={variance}, sort={Var}, category={functions}]{var}{\ensuremath{\mathrm{Var}}}
\glsxtrnewsymbol[description={normalized response}, sort={xhat}, category={functions}]{normx}{\ensuremath{\hat{x}}}