Skip to content

Commit

Permalink
Adding He and LeCun initializers (tensorflow#612)
Browse files Browse the repository at this point in the history
* Adding He and LeCun initializers, tests for those, and helper functions for layer initialization.

* Added documentation for the seed parameters on these initializers.

* Calling out the Xavier synonym for Glorot initialization in documentation.
  • Loading branch information
BradLarson authored Jan 10, 2020
1 parent c7afd2d commit 0bb037f
Show file tree
Hide file tree
Showing 3 changed files with 198 additions and 13 deletions.
100 changes: 97 additions & 3 deletions Sources/TensorFlow/Initializers.swift
Original file line number Diff line number Diff line change
Expand Up @@ -508,7 +508,7 @@ fileprivate extension TensorShape {
}

public extension Tensor where Scalar: TensorFlowFloatingPoint {
/// Creates a tensor with the specified shape by performing Glorot uniform initialization.
/// Creates a tensor with the specified shape by performing Glorot (Xavier) uniform initialization.
///
/// It draws random samples from a uniform distribution between `-limit` and `limit`
/// generated by the default random number generator, where `limit` is
Expand All @@ -520,16 +520,17 @@ public extension Tensor where Scalar: TensorFlowFloatingPoint {
///
/// - Parameters:
/// - shape: The dimensions of the tensor.
/// - seed: The seed value.
init(glorotUniform shape: TensorShape, seed: TensorFlowSeed = Context.local.randomSeed) {
let (fanIn, fanOut) = shape.fans()
let limit = Tensor<Scalar>(Scalar.sqrt(6 / Scalar(fanIn + fanOut)))
self.init(randomUniform: shape, lowerBound: -limit, upperBound: limit, seed: seed)
}

/// Creates a tensor with the specified shape by performing Glorot normal initialization.
/// Creates a tensor with the specified shape by performing Glorot (Xavier) normal initialization.
///
/// It draws random samples from a truncated normal distribution centered on `0` with
/// standard deviation `sqrt(2 / (fanIn + fanOut))`generated by the default random number
/// standard deviation `sqrt(2 / (fanIn + fanOut))` generated by the default random number
/// generator, where `fanIn`/`fanOut` represent the number of input and output features
/// multiplied by the receptive field size.
///
Expand All @@ -538,6 +539,7 @@ public extension Tensor where Scalar: TensorFlowFloatingPoint {
///
/// - Parameters:
/// - shape: The dimensions of the tensor.
/// - seed: The seed value.
init(glorotNormal shape: TensorShape, seed: TensorFlowSeed = Context.local.randomSeed) {
let (fanIn, fanOut) = shape.fans()
var standardDeviation = Tensor<Scalar>(Scalar.sqrt(2 / Scalar(fanIn + fanOut)))
Expand All @@ -552,6 +554,98 @@ public extension Tensor where Scalar: TensorFlowFloatingPoint {
}
}

public extension Tensor where Scalar: TensorFlowFloatingPoint {
/// Creates a tensor with the specified shape by performing He (Kaiming) uniform initialization.
///
/// It draws random samples from a uniform distribution between `-limit` and `limit`
/// generated by the default random number generator, where `limit` is
/// `sqrt(6 / fanIn)` and `fanIn` represents the number of input features multiplied by the
/// receptive field size.
///
/// Reference: ["Delving Deep into Rectifiers: Surpassing Human-Level Performance on ImageNet
/// Classification"](https://www.cv-foundation.org/openaccess/content_iccv_2015/papers/He_Delving_Deep_into_ICCV_2015_paper.pdf)
///
/// - Parameters:
/// - shape: The dimensions of the tensor.
/// - seed: The seed value.
init(heUniform shape: TensorShape, seed: TensorFlowSeed = Context.local.randomSeed) {
let (fanIn, _) = shape.fans()
let limit = Tensor<Scalar>(Scalar.sqrt(6 / Scalar(fanIn)))
self.init(randomUniform: shape, lowerBound: -limit, upperBound: limit, seed: seed)
}

/// Creates a tensor with the specified shape by performing He (Kaiming) normal initialization.
///
/// It draws random samples from a truncated normal distribution centered on `0` with
/// standard deviation `sqrt(2 / fanIn))` generated by the default random number
/// generator, where `fanIn` represents the number of input features multiplied by the
/// receptive field size.
///
/// Reference: ["Delving Deep into Rectifiers: Surpassing Human-Level Performance on ImageNet
/// Classification"](https://www.cv-foundation.org/openaccess/content_iccv_2015/papers/He_Delving_Deep_into_ICCV_2015_paper.pdf)
///
/// - Parameters:
/// - shape: The dimensions of the tensor.
/// - seed: The seed value.
init(heNormal shape: TensorShape, seed: TensorFlowSeed = Context.local.randomSeed) {
let (fanIn, _) = shape.fans()
var standardDeviation = Tensor<Scalar>(Scalar.sqrt(2 / Scalar(fanIn)))
// Standard deviation of truncated standard normal between `-2` and `2` standard deviations.
let truncationDeviation = Tensor<Scalar>(0.87962566103423978)
standardDeviation /= truncationDeviation // Smooths the tails of the clipped normal.
self.init(
randomTruncatedNormal: shape,
mean: Tensor<Scalar>(0),
standardDeviation: standardDeviation,
seed: seed)
}
}

public extension Tensor where Scalar: TensorFlowFloatingPoint {
/// Creates a tensor with the specified shape by performing LeCun uniform initialization.
///
/// It draws random samples from a uniform distribution between `-limit` and `limit`
/// generated by the default random number generator, where `limit` is
/// `sqrt(3 / fanIn)` and `fanIn` represents the number of input features multiplied
/// by the receptive field size.
///
/// Reference: ["Efficient BackProp"](http://yann.lecun.com/exdb/publis/pdf/lecun-98b.pdf)
///
/// - Parameters:
/// - shape: The dimensions of the tensor.
/// - seed: The seed value.
init(leCunUniform shape: TensorShape, seed: TensorFlowSeed = Context.local.randomSeed) {
let (fanIn, _) = shape.fans()
let limit = Tensor<Scalar>(Scalar.sqrt(3 / Scalar(fanIn)))
self.init(randomUniform: shape, lowerBound: -limit, upperBound: limit, seed: seed)
}

/// Creates a tensor with the specified shape by performing LeCun normal initialization.
///
/// It draws random samples from a truncated normal distribution centered on `0` with
/// standard deviation `sqrt(1 / fanIn)` generated by the default random number
/// generator, where `fanIn` represents the number of input features multiplied by the
/// receptive field size.
///
/// Reference: ["Efficient BackProp"](http://yann.lecun.com/exdb/publis/pdf/lecun-98b.pdf)
///
/// - Parameters:
/// - shape: The dimensions of the tensor.
/// - seed: The seed value.
init(leCunNormal shape: TensorShape, seed: TensorFlowSeed = Context.local.randomSeed) {
let (fanIn, _) = shape.fans()
var standardDeviation = Tensor<Scalar>(Scalar.sqrt(1 / Scalar(fanIn)))
// Standard deviation of truncated standard normal between `-2` and `2` standard deviations.
let truncationDeviation = Tensor<Scalar>(0.87962566103423978)
standardDeviation /= truncationDeviation // Smooths the tails of the clipped normal.
self.init(
randomTruncatedNormal: shape,
mean: Tensor<Scalar>(0),
standardDeviation: standardDeviation,
seed: seed)
}
}

public extension Tensor where Scalar: TensorFlowFloatingPoint {
/// Creates an orthogonal matrix or tensor.
///
Expand Down
58 changes: 55 additions & 3 deletions Sources/TensorFlow/Layers/Initialization.swift
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,9 @@ public func constantInitializer<Scalar: TensorFlowFloatingPoint>(
}
}

/// Returns a function that creates a tensor by performing Glorot uniform initialization for the
/// specified shape, randomly sampling scalar values from a uniform distribution between `-limit`
/// and `limit`, generated by the default random number generator, where limit is
/// Returns a function that creates a tensor by performing Glorot (Xavier) uniform initialization
/// for the specified shape, randomly sampling scalar values from a uniform distribution between
/// `-limit` and `limit`, generated by the default random number generator, where limit is
/// `sqrt(6 / (fanIn + fanOut))`, and `fanIn`/`fanOut` represent the number of input and output
/// features multiplied by the receptive field, if present.
public func glorotUniform<Scalar: TensorFlowFloatingPoint>(
Expand All @@ -54,6 +54,58 @@ public func glorotUniform<Scalar: TensorFlowFloatingPoint>(
{ Tensor<Scalar>(glorotUniform: $0, seed: seed) }
}

/// Returns a function that creates a tensor by performing Glorot (Xavier) normal initialization for
/// the specified shape, randomly sampling scalar values from a truncated normal distribution centered
/// on `0` with standard deviation `sqrt(2 / (fanIn + fanOut))`, where `fanIn`/`fanOut` represent
/// the number of input and output features multiplied by the receptive field size, if present.
public func glorotNormal<Scalar: TensorFlowFloatingPoint>(
seed: TensorFlowSeed = Context.local.randomSeed
) -> ParameterInitializer<Scalar> {
{ Tensor<Scalar>(glorotNormal: $0, seed: seed) }
}

/// Returns a function that creates a tensor by performing He (Kaiming) uniform initialization for
/// the specified shape, randomly sampling scalar values from a uniform distribution between `-limit`
/// and `limit`, generated by the default random number generator, where limit is
/// `sqrt(6 / fanIn)`, and `fanIn` represents the number of input features multiplied by the
/// receptive field, if present.
public func heUniform<Scalar: TensorFlowFloatingPoint>(
seed: TensorFlowSeed = Context.local.randomSeed
) -> ParameterInitializer<Scalar> {
{ Tensor<Scalar>(heUniform: $0, seed: seed) }
}

/// Returns a function that creates a tensor by performing He (Kaiming) normal initialization for the
/// specified shape, randomly sampling scalar values from a truncated normal distribution centered
/// on `0` with standard deviation `sqrt(2 / fanIn)`, where `fanIn` represents the number of input
/// features multiplied by the receptive field size, if present.
public func heNormal<Scalar: TensorFlowFloatingPoint>(
seed: TensorFlowSeed = Context.local.randomSeed
) -> ParameterInitializer<Scalar> {
{ Tensor<Scalar>(heNormal: $0, seed: seed) }
}

/// Returns a function that creates a tensor by performing LeCun uniform initialization for
/// the specified shape, randomly sampling scalar values from a uniform distribution between `-limit`
/// and `limit`, generated by the default random number generator, where limit is
/// `sqrt(3 / fanIn)`, and `fanIn` represents the number of input features multiplied by the
/// receptive field, if present.
public func leCunUniform<Scalar: TensorFlowFloatingPoint>(
seed: TensorFlowSeed = Context.local.randomSeed
) -> ParameterInitializer<Scalar> {
{ Tensor<Scalar>(leCunUniform: $0, seed: seed) }
}

/// Returns a function that creates a tensor by performing LeCun normal initialization for the
/// specified shape, randomly sampling scalar values from a truncated normal distribution centered
/// on `0` with standard deviation `sqrt(1 / fanIn)`, where `fanIn` represents the number of input
/// features multiplied by the receptive field size, if present.
public func leCunNormal<Scalar: TensorFlowFloatingPoint>(
seed: TensorFlowSeed = Context.local.randomSeed
) -> ParameterInitializer<Scalar> {
{ Tensor<Scalar>(leCunNormal: $0, seed: seed) }
}

/// Returns a function that creates a tensor by initializing all its values randomly from a
/// truncated Normal distribution. The generated values follow a Normal distribution with mean
/// `mean` and standard deviation `standardDeviation`, except that values whose magnitude is more
Expand Down
53 changes: 46 additions & 7 deletions Tests/TensorFlowTests/InitializerTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -129,14 +129,16 @@ final class InitializerTests: XCTestCase {
let t = Tensor<Float>(
randomUniform: fcShape,
lowerBound: Tensor(2),
upperBound: Tensor(3))
upperBound: Tensor(3),
seed: (0xFeed, 0xBeef))
testDistribution(t, expectedMean: 2.5, expectedMin: 2, expectedMax: 3)
}
do {
let t = Tensor<Float>(
randomUniform: fcShape,
lowerBound: Tensor(-1),
upperBound: Tensor(1))
upperBound: Tensor(1),
seed: (0xFeed, 0xBeef))
testDistribution(t, expectedMean: 0, expectedMin: -1, expectedMax: 1)
}
}
Expand All @@ -145,29 +147,62 @@ final class InitializerTests: XCTestCase {
let t = Tensor<Float>(
randomNormal: convShape,
mean: Tensor(1),
standardDeviation: Tensor(2))
standardDeviation: Tensor(2),
seed: (0xFeed, 0xBeef))
testDistribution(t, expectedMean: 1, expectedStandardDeviation: 2)
}

func testRandomTruncatedNormal() {
let t = Tensor<Float>(randomTruncatedNormal: convShape)
let t = Tensor<Float>(randomTruncatedNormal: convShape, seed: (0xFeed, 0xBeef))
testDistribution(t, expectedMean: 0, expectedMin: -2, expectedMax: 2)
}

func testGlorotUniform() {
let t = Tensor<Float>(glorotUniform: convShape)
let t = Tensor<Float>(glorotUniform: convShape, seed: (0xFeed, 0xBeef))
let spatialSize = convShape[0..<2].contiguousSize
let (fanIn, fanOut) = (convShape[2] * spatialSize, convShape[3] * spatialSize)
let stdDev = sqrt(Float(2.0) / Float(fanIn + fanOut))
testDistribution(t, expectedMean: 0, expectedStandardDeviation: stdDev, tolerance: 1e-4)
}

func testGlorotNormal() {
let t = Tensor<Float>(glorotNormal: convShape)
let t = Tensor<Float>(glorotNormal: convShape, seed: (0xFeed, 0xBeef))
let spatialSize = convShape[0..<2].contiguousSize
let (fanIn, fanOut) = (convShape[2] * spatialSize, convShape[3] * spatialSize)
let stdDev = sqrt(Float(2.0) / Float(fanIn + fanOut))
testDistribution(t, expectedMean: 0, expectedStandardDeviation: stdDev)
testDistribution(t, expectedMean: 0, expectedStandardDeviation: stdDev, tolerance: 1e-4)
}

func testHeUniform() {
let t = Tensor<Float>(heUniform: convShape, seed: (0xFeed, 0xBeef))
let spatialSize = convShape[0..<2].contiguousSize
let (fanIn, _) = (convShape[2] * spatialSize, convShape[3] * spatialSize)
let stdDev = sqrt(Float(2.0) / Float(fanIn))
testDistribution(t, expectedMean: 0, expectedStandardDeviation: stdDev, tolerance: 1e-4)
}

func testHeNormal() {
let t = Tensor<Float>(heNormal: convShape, seed: (0xFeed, 0xBeef))
let spatialSize = convShape[0..<2].contiguousSize
let (fanIn, _) = (convShape[2] * spatialSize, convShape[3] * spatialSize)
let stdDev = sqrt(Float(2.0) / Float(fanIn))
testDistribution(t, expectedMean: 0, expectedStandardDeviation: stdDev, tolerance: 1e-4)
}

func testLeCunUniform() {
let t = Tensor<Float>(leCunUniform: convShape, seed: (0xFeed, 0xBeef))
let spatialSize = convShape[0..<2].contiguousSize
let (fanIn, _) = (convShape[2] * spatialSize, convShape[3] * spatialSize)
let stdDev = sqrt(Float(1.0) / Float(fanIn))
testDistribution(t, expectedMean: 0, expectedStandardDeviation: stdDev, tolerance: 1e-4)
}

func testLeCunNormal() {
let t = Tensor<Float>(leCunNormal: convShape, seed: (0xFeed, 0xBeef))
let spatialSize = convShape[0..<2].contiguousSize
let (fanIn, _) = (convShape[2] * spatialSize, convShape[3] * spatialSize)
let stdDev = sqrt(Float(1.0) / Float(fanIn))
testDistribution(t, expectedMean: 0, expectedStandardDeviation: stdDev, tolerance: 1e-4)
}

func testCategoricalFromLogits() {
Expand Down Expand Up @@ -212,6 +247,10 @@ final class InitializerTests: XCTestCase {
("testRandomTruncatedNormal", testRandomTruncatedNormal),
("testGlorotUniform", testGlorotUniform),
("testGlorotNormal", testGlorotNormal),
("testHeUniform", testHeUniform),
("testHeNormal", testHeNormal),
("testLeCunUniform", testLeCunUniform),
("testLeCunNormal", testLeCunNormal),
("testCategoricalFromLogits", testCategoricalFromLogits),
("testOrthogonalShapesValues", testOrthogonalShapesValues)
]
Expand Down

0 comments on commit 0bb037f

Please sign in to comment.