From 0bb037f4b6ee22bc4b203a7cb67f78baff11aeba Mon Sep 17 00:00:00 2001 From: Brad Larson Date: Fri, 10 Jan 2020 12:08:27 -0600 Subject: [PATCH] Adding He and LeCun initializers (#612) * Adding He and LeCun initializers, tests for those, and helper functions for layer initialization. * Added documentation for the seed parameters on these initializers. * Calling out the Xavier synonym for Glorot initialization in documentation. --- Sources/TensorFlow/Initializers.swift | 100 +++++++++++++++++- .../TensorFlow/Layers/Initialization.swift | 58 +++++++++- Tests/TensorFlowTests/InitializerTests.swift | 53 ++++++++-- 3 files changed, 198 insertions(+), 13 deletions(-) diff --git a/Sources/TensorFlow/Initializers.swift b/Sources/TensorFlow/Initializers.swift index 5a2d969af..adc0ea070 100644 --- a/Sources/TensorFlow/Initializers.swift +++ b/Sources/TensorFlow/Initializers.swift @@ -508,7 +508,7 @@ fileprivate extension TensorShape { } public extension Tensor where Scalar: TensorFlowFloatingPoint { - /// Creates a tensor with the specified shape by performing Glorot uniform initialization. + /// Creates a tensor with the specified shape by performing Glorot (Xavier) uniform initialization. /// /// It draws random samples from a uniform distribution between `-limit` and `limit` /// generated by the default random number generator, where `limit` is @@ -520,16 +520,17 @@ public extension Tensor where Scalar: TensorFlowFloatingPoint { /// /// - Parameters: /// - shape: The dimensions of the tensor. + /// - seed: The seed value. init(glorotUniform shape: TensorShape, seed: TensorFlowSeed = Context.local.randomSeed) { let (fanIn, fanOut) = shape.fans() let limit = Tensor(Scalar.sqrt(6 / Scalar(fanIn + fanOut))) self.init(randomUniform: shape, lowerBound: -limit, upperBound: limit, seed: seed) } - /// Creates a tensor with the specified shape by performing Glorot normal initialization. + /// Creates a tensor with the specified shape by performing Glorot (Xavier) normal initialization. /// /// It draws random samples from a truncated normal distribution centered on `0` with - /// standard deviation `sqrt(2 / (fanIn + fanOut))`generated by the default random number + /// standard deviation `sqrt(2 / (fanIn + fanOut))` generated by the default random number /// generator, where `fanIn`/`fanOut` represent the number of input and output features /// multiplied by the receptive field size. /// @@ -538,6 +539,7 @@ public extension Tensor where Scalar: TensorFlowFloatingPoint { /// /// - Parameters: /// - shape: The dimensions of the tensor. + /// - seed: The seed value. init(glorotNormal shape: TensorShape, seed: TensorFlowSeed = Context.local.randomSeed) { let (fanIn, fanOut) = shape.fans() var standardDeviation = Tensor(Scalar.sqrt(2 / Scalar(fanIn + fanOut))) @@ -552,6 +554,98 @@ public extension Tensor where Scalar: TensorFlowFloatingPoint { } } +public extension Tensor where Scalar: TensorFlowFloatingPoint { + /// Creates a tensor with the specified shape by performing He (Kaiming) uniform initialization. + /// + /// It draws random samples from a uniform distribution between `-limit` and `limit` + /// generated by the default random number generator, where `limit` is + /// `sqrt(6 / fanIn)` and `fanIn` represents the number of input features multiplied by the + /// receptive field size. + /// + /// Reference: ["Delving Deep into Rectifiers: Surpassing Human-Level Performance on ImageNet + /// Classification"](https://www.cv-foundation.org/openaccess/content_iccv_2015/papers/He_Delving_Deep_into_ICCV_2015_paper.pdf) + /// + /// - Parameters: + /// - shape: The dimensions of the tensor. + /// - seed: The seed value. + init(heUniform shape: TensorShape, seed: TensorFlowSeed = Context.local.randomSeed) { + let (fanIn, _) = shape.fans() + let limit = Tensor(Scalar.sqrt(6 / Scalar(fanIn))) + self.init(randomUniform: shape, lowerBound: -limit, upperBound: limit, seed: seed) + } + + /// Creates a tensor with the specified shape by performing He (Kaiming) normal initialization. + /// + /// It draws random samples from a truncated normal distribution centered on `0` with + /// standard deviation `sqrt(2 / fanIn))` generated by the default random number + /// generator, where `fanIn` represents the number of input features multiplied by the + /// receptive field size. + /// + /// Reference: ["Delving Deep into Rectifiers: Surpassing Human-Level Performance on ImageNet + /// Classification"](https://www.cv-foundation.org/openaccess/content_iccv_2015/papers/He_Delving_Deep_into_ICCV_2015_paper.pdf) + /// + /// - Parameters: + /// - shape: The dimensions of the tensor. + /// - seed: The seed value. + init(heNormal shape: TensorShape, seed: TensorFlowSeed = Context.local.randomSeed) { + let (fanIn, _) = shape.fans() + var standardDeviation = Tensor(Scalar.sqrt(2 / Scalar(fanIn))) + // Standard deviation of truncated standard normal between `-2` and `2` standard deviations. + let truncationDeviation = Tensor(0.87962566103423978) + standardDeviation /= truncationDeviation // Smooths the tails of the clipped normal. + self.init( + randomTruncatedNormal: shape, + mean: Tensor(0), + standardDeviation: standardDeviation, + seed: seed) + } +} + +public extension Tensor where Scalar: TensorFlowFloatingPoint { + /// Creates a tensor with the specified shape by performing LeCun uniform initialization. + /// + /// It draws random samples from a uniform distribution between `-limit` and `limit` + /// generated by the default random number generator, where `limit` is + /// `sqrt(3 / fanIn)` and `fanIn` represents the number of input features multiplied + /// by the receptive field size. + /// + /// Reference: ["Efficient BackProp"](http://yann.lecun.com/exdb/publis/pdf/lecun-98b.pdf) + /// + /// - Parameters: + /// - shape: The dimensions of the tensor. + /// - seed: The seed value. + init(leCunUniform shape: TensorShape, seed: TensorFlowSeed = Context.local.randomSeed) { + let (fanIn, _) = shape.fans() + let limit = Tensor(Scalar.sqrt(3 / Scalar(fanIn))) + self.init(randomUniform: shape, lowerBound: -limit, upperBound: limit, seed: seed) + } + + /// Creates a tensor with the specified shape by performing LeCun normal initialization. + /// + /// It draws random samples from a truncated normal distribution centered on `0` with + /// standard deviation `sqrt(1 / fanIn)` generated by the default random number + /// generator, where `fanIn` represents the number of input features multiplied by the + /// receptive field size. + /// + /// Reference: ["Efficient BackProp"](http://yann.lecun.com/exdb/publis/pdf/lecun-98b.pdf) + /// + /// - Parameters: + /// - shape: The dimensions of the tensor. + /// - seed: The seed value. + init(leCunNormal shape: TensorShape, seed: TensorFlowSeed = Context.local.randomSeed) { + let (fanIn, _) = shape.fans() + var standardDeviation = Tensor(Scalar.sqrt(1 / Scalar(fanIn))) + // Standard deviation of truncated standard normal between `-2` and `2` standard deviations. + let truncationDeviation = Tensor(0.87962566103423978) + standardDeviation /= truncationDeviation // Smooths the tails of the clipped normal. + self.init( + randomTruncatedNormal: shape, + mean: Tensor(0), + standardDeviation: standardDeviation, + seed: seed) + } +} + public extension Tensor where Scalar: TensorFlowFloatingPoint { /// Creates an orthogonal matrix or tensor. /// diff --git a/Sources/TensorFlow/Layers/Initialization.swift b/Sources/TensorFlow/Layers/Initialization.swift index 209481ca8..e10561aac 100644 --- a/Sources/TensorFlow/Layers/Initialization.swift +++ b/Sources/TensorFlow/Layers/Initialization.swift @@ -43,9 +43,9 @@ public func constantInitializer( } } -/// Returns a function that creates a tensor by performing Glorot uniform initialization for the -/// specified shape, randomly sampling scalar values from a uniform distribution between `-limit` -/// and `limit`, generated by the default random number generator, where limit is +/// Returns a function that creates a tensor by performing Glorot (Xavier) uniform initialization +/// for the specified shape, randomly sampling scalar values from a uniform distribution between +/// `-limit` and `limit`, generated by the default random number generator, where limit is /// `sqrt(6 / (fanIn + fanOut))`, and `fanIn`/`fanOut` represent the number of input and output /// features multiplied by the receptive field, if present. public func glorotUniform( @@ -54,6 +54,58 @@ public func glorotUniform( { Tensor(glorotUniform: $0, seed: seed) } } +/// Returns a function that creates a tensor by performing Glorot (Xavier) normal initialization for +/// the specified shape, randomly sampling scalar values from a truncated normal distribution centered +/// on `0` with standard deviation `sqrt(2 / (fanIn + fanOut))`, where `fanIn`/`fanOut` represent +/// the number of input and output features multiplied by the receptive field size, if present. +public func glorotNormal( + seed: TensorFlowSeed = Context.local.randomSeed +) -> ParameterInitializer { + { Tensor(glorotNormal: $0, seed: seed) } +} + +/// Returns a function that creates a tensor by performing He (Kaiming) uniform initialization for +/// the specified shape, randomly sampling scalar values from a uniform distribution between `-limit` +/// and `limit`, generated by the default random number generator, where limit is +/// `sqrt(6 / fanIn)`, and `fanIn` represents the number of input features multiplied by the +/// receptive field, if present. +public func heUniform( + seed: TensorFlowSeed = Context.local.randomSeed +) -> ParameterInitializer { + { Tensor(heUniform: $0, seed: seed) } +} + +/// Returns a function that creates a tensor by performing He (Kaiming) normal initialization for the +/// specified shape, randomly sampling scalar values from a truncated normal distribution centered +/// on `0` with standard deviation `sqrt(2 / fanIn)`, where `fanIn` represents the number of input +/// features multiplied by the receptive field size, if present. +public func heNormal( + seed: TensorFlowSeed = Context.local.randomSeed +) -> ParameterInitializer { + { Tensor(heNormal: $0, seed: seed) } +} + +/// Returns a function that creates a tensor by performing LeCun uniform initialization for +/// the specified shape, randomly sampling scalar values from a uniform distribution between `-limit` +/// and `limit`, generated by the default random number generator, where limit is +/// `sqrt(3 / fanIn)`, and `fanIn` represents the number of input features multiplied by the +/// receptive field, if present. +public func leCunUniform( + seed: TensorFlowSeed = Context.local.randomSeed +) -> ParameterInitializer { + { Tensor(leCunUniform: $0, seed: seed) } +} + +/// Returns a function that creates a tensor by performing LeCun normal initialization for the +/// specified shape, randomly sampling scalar values from a truncated normal distribution centered +/// on `0` with standard deviation `sqrt(1 / fanIn)`, where `fanIn` represents the number of input +/// features multiplied by the receptive field size, if present. +public func leCunNormal( + seed: TensorFlowSeed = Context.local.randomSeed +) -> ParameterInitializer { + { Tensor(leCunNormal: $0, seed: seed) } +} + /// Returns a function that creates a tensor by initializing all its values randomly from a /// truncated Normal distribution. The generated values follow a Normal distribution with mean /// `mean` and standard deviation `standardDeviation`, except that values whose magnitude is more diff --git a/Tests/TensorFlowTests/InitializerTests.swift b/Tests/TensorFlowTests/InitializerTests.swift index e82e95d3a..b9ea75968 100644 --- a/Tests/TensorFlowTests/InitializerTests.swift +++ b/Tests/TensorFlowTests/InitializerTests.swift @@ -129,14 +129,16 @@ final class InitializerTests: XCTestCase { let t = Tensor( randomUniform: fcShape, lowerBound: Tensor(2), - upperBound: Tensor(3)) + upperBound: Tensor(3), + seed: (0xFeed, 0xBeef)) testDistribution(t, expectedMean: 2.5, expectedMin: 2, expectedMax: 3) } do { let t = Tensor( randomUniform: fcShape, lowerBound: Tensor(-1), - upperBound: Tensor(1)) + upperBound: Tensor(1), + seed: (0xFeed, 0xBeef)) testDistribution(t, expectedMean: 0, expectedMin: -1, expectedMax: 1) } } @@ -145,17 +147,18 @@ final class InitializerTests: XCTestCase { let t = Tensor( randomNormal: convShape, mean: Tensor(1), - standardDeviation: Tensor(2)) + standardDeviation: Tensor(2), + seed: (0xFeed, 0xBeef)) testDistribution(t, expectedMean: 1, expectedStandardDeviation: 2) } func testRandomTruncatedNormal() { - let t = Tensor(randomTruncatedNormal: convShape) + let t = Tensor(randomTruncatedNormal: convShape, seed: (0xFeed, 0xBeef)) testDistribution(t, expectedMean: 0, expectedMin: -2, expectedMax: 2) } func testGlorotUniform() { - let t = Tensor(glorotUniform: convShape) + let t = Tensor(glorotUniform: convShape, seed: (0xFeed, 0xBeef)) let spatialSize = convShape[0..<2].contiguousSize let (fanIn, fanOut) = (convShape[2] * spatialSize, convShape[3] * spatialSize) let stdDev = sqrt(Float(2.0) / Float(fanIn + fanOut)) @@ -163,11 +166,43 @@ final class InitializerTests: XCTestCase { } func testGlorotNormal() { - let t = Tensor(glorotNormal: convShape) + let t = Tensor(glorotNormal: convShape, seed: (0xFeed, 0xBeef)) let spatialSize = convShape[0..<2].contiguousSize let (fanIn, fanOut) = (convShape[2] * spatialSize, convShape[3] * spatialSize) let stdDev = sqrt(Float(2.0) / Float(fanIn + fanOut)) - testDistribution(t, expectedMean: 0, expectedStandardDeviation: stdDev) + testDistribution(t, expectedMean: 0, expectedStandardDeviation: stdDev, tolerance: 1e-4) + } + + func testHeUniform() { + let t = Tensor(heUniform: convShape, seed: (0xFeed, 0xBeef)) + let spatialSize = convShape[0..<2].contiguousSize + let (fanIn, _) = (convShape[2] * spatialSize, convShape[3] * spatialSize) + let stdDev = sqrt(Float(2.0) / Float(fanIn)) + testDistribution(t, expectedMean: 0, expectedStandardDeviation: stdDev, tolerance: 1e-4) + } + + func testHeNormal() { + let t = Tensor(heNormal: convShape, seed: (0xFeed, 0xBeef)) + let spatialSize = convShape[0..<2].contiguousSize + let (fanIn, _) = (convShape[2] * spatialSize, convShape[3] * spatialSize) + let stdDev = sqrt(Float(2.0) / Float(fanIn)) + testDistribution(t, expectedMean: 0, expectedStandardDeviation: stdDev, tolerance: 1e-4) + } + + func testLeCunUniform() { + let t = Tensor(leCunUniform: convShape, seed: (0xFeed, 0xBeef)) + let spatialSize = convShape[0..<2].contiguousSize + let (fanIn, _) = (convShape[2] * spatialSize, convShape[3] * spatialSize) + let stdDev = sqrt(Float(1.0) / Float(fanIn)) + testDistribution(t, expectedMean: 0, expectedStandardDeviation: stdDev, tolerance: 1e-4) + } + + func testLeCunNormal() { + let t = Tensor(leCunNormal: convShape, seed: (0xFeed, 0xBeef)) + let spatialSize = convShape[0..<2].contiguousSize + let (fanIn, _) = (convShape[2] * spatialSize, convShape[3] * spatialSize) + let stdDev = sqrt(Float(1.0) / Float(fanIn)) + testDistribution(t, expectedMean: 0, expectedStandardDeviation: stdDev, tolerance: 1e-4) } func testCategoricalFromLogits() { @@ -212,6 +247,10 @@ final class InitializerTests: XCTestCase { ("testRandomTruncatedNormal", testRandomTruncatedNormal), ("testGlorotUniform", testGlorotUniform), ("testGlorotNormal", testGlorotNormal), + ("testHeUniform", testHeUniform), + ("testHeNormal", testHeNormal), + ("testLeCunUniform", testLeCunUniform), + ("testLeCunNormal", testLeCunNormal), ("testCategoricalFromLogits", testCategoricalFromLogits), ("testOrthogonalShapesValues", testOrthogonalShapesValues) ]