From f2f0b1080fc590562a0579d3c99de658790af357 Mon Sep 17 00:00:00 2001 From: Nicholas Hirschey Date: Tue, 29 Dec 2020 20:56:14 -0700 Subject: [PATCH 1/4] make downloadResource use filename --- Library/Datasets/DatasetUtilities.fs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Library/Datasets/DatasetUtilities.fs b/Library/Datasets/DatasetUtilities.fs index 0ff592b..e628d37 100644 --- a/Library/Datasets/DatasetUtilities.fs +++ b/Library/Datasets/DatasetUtilities.fs @@ -28,7 +28,7 @@ type DatasetUtilities() = let extract = defaultArg extract true if not extract then use wc = new WebClient() - wc.DownloadFile(remoteRoot, localFileName) + wc.DownloadFile(Uri(remoteRoot,filename), localFileName) else failwith "TBD" // let r = new BinaryReader(new GZipStream(File.Open(filename, FileMode.Open, FileAccess.Read, FileShare.Read), CompressionMode.Decompress)) localFileName From 5878cddbc835b1467a1bda01fe0b0bcefe86507f Mon Sep 17 00:00:00 2001 From: Nicholas Hirschey Date: Wed, 30 Dec 2020 18:35:26 -0700 Subject: [PATCH 2/4] fix BostoHousing input file parsing --- .../Datasets/BostonHousing/BostonHousing.fs | 42 ++++++++++++------- 1 file changed, 26 insertions(+), 16 deletions(-) diff --git a/Library/Datasets/BostonHousing/BostonHousing.fs b/Library/Datasets/BostonHousing/BostonHousing.fs index f4bf0eb..185a6f0 100644 --- a/Library/Datasets/BostonHousing/BostonHousing.fs +++ b/Library/Datasets/BostonHousing/BostonHousing.fs @@ -32,7 +32,7 @@ type BostonHousing() = if not (File.Exists(downloadPath)) || Directory.GetFiles(downloadPath) |> Array.isEmpty then DatasetUtilities.downloadResource( - filename=downloadFile, + filename=Path.GetFileName(downloadFile), remoteRoot= remoteURL, localStorageDirectory=downloadPath, extract=false) |> ignore @@ -40,32 +40,42 @@ type BostonHousing() = File.ReadAllText(downloadFile, Encoding.UTF8) // Convert Space Separated CSV with no Header - let dataRecords = data.Split("\n") |> Array.map (fun s -> s.Split(" ") |> Array.map float) + let dataRecords = + data.Split("\n") + |> Array.filter(fun line -> line <> "") + |> Array.map (fun s -> + s.Split(" ") + |> Array.filter(fun x -> x <> "") + |> Array.map float) - let numRecords = dataRecords.Length - let numColumns = dataRecords.[0].Length + let nRecords = dataRecords.Length + let nColumns = dataRecords.[0].Length - let dataFeatures = dataRecords |> Array.map (fun arr -> arr.[0..numColumns - 2]) - let dataLabels = dataRecords |> Array.map (fun arr -> arr.[(numColumns - 1)..]) + let dataFeatures = dataRecords |> Array.map (fun arr -> arr.[0..nColumns - 2]) + let dataLabels = dataRecords |> Array.map (fun arr -> arr.[(nColumns - 1)..]) // Normalize let trainPercentage: double = 0.8 - let numTrainRecords = int(ceil(double(numRecords) * trainPercentage)) - let numTestRecords = numRecords - numTrainRecords + let nTrainRecords = int(ceil(double(nRecords) * trainPercentage)) + let nTestRecords = nRecords - nTrainRecords - let xTrainPrelim = dataFeatures.[0..numTrainRecords-1] |> Array.concat - let xTestPrelim = dataFeatures.[numTrainRecords..] |> Array.concat - let yTrainPrelim = dataLabels.[0..numTrainRecords-1] |> Array.concat - let yTestPrelim = dataLabels.[numTrainRecords..] |> Array.concat + let xTrainPrelim = dataFeatures.[0..nTrainRecords-1] |> Array.concat + let xTestPrelim = dataFeatures.[nTrainRecords..] |> Array.concat + let yTrainPrelim = dataLabels.[0..nTrainRecords-1] |> Array.concat + let yTestPrelim = dataLabels.[nTrainRecords..] |> Array.concat - let xTrainDeNorm = dsharp.tensor(xTrainPrelim, dtype=Dtype.Float32).view([numTrainRecords; numColumns - 1]) - let xTestDeNorm = dsharp.tensor(xTestPrelim, dtype=Dtype.Float32).view([numTestRecords; numColumns - 1]) + let xTrainDeNorm = dsharp.tensor(xTrainPrelim, dtype=Dtype.Float32).view([nTrainRecords; nColumns - 1]) + let xTestDeNorm = dsharp.tensor(xTestPrelim, dtype=Dtype.Float32).view([nTestRecords; nColumns - 1]) let mean = xTrainDeNorm.mean(dim=0) let std = xTrainDeNorm.stddev(dim=0) + member val numRecords = nRecords + member val numColumns = nColumns + member val numTrainRecords = nTrainRecords + member val numTestRecords = nTestRecords member val xTrain = (xTrainDeNorm - mean) / std member val xTest = (xTestDeNorm - mean) / std - member val yTrain = dsharp.tensor(yTrainPrelim, dtype=Dtype.Float32).view([numTrainRecords; 1]) - member val yTest = dsharp.tensor(yTestPrelim, dtype=Dtype.Float32).view([numTestRecords; 1]) + member val yTrain = dsharp.tensor(yTrainPrelim, dtype=Dtype.Float32).view([nTrainRecords; 1]) + member val yTest = dsharp.tensor(yTestPrelim, dtype=Dtype.Float32).view([nTestRecords; 1]) From b6a5f5ff10bbf1460933cacc0a452a94d3a0769a Mon Sep 17 00:00:00 2001 From: Nicholas Hirschey Date: Wed, 30 Dec 2020 18:47:50 -0700 Subject: [PATCH 3/4] closer to a working boston housing example --- Examples/Regression-BostonHousing/main.fsx | 52 ++++++++++++---------- 1 file changed, 28 insertions(+), 24 deletions(-) diff --git a/Examples/Regression-BostonHousing/main.fsx b/Examples/Regression-BostonHousing/main.fsx index 28c7b31..45bf3c3 100644 --- a/Examples/Regression-BostonHousing/main.fsx +++ b/Examples/Regression-BostonHousing/main.fsx @@ -19,6 +19,7 @@ open Datasets open DiffSharp open DiffSharp.Model +open DiffSharp.Util // open Dataset let dataset = BostonHousing() @@ -42,47 +43,50 @@ model.mode <- Mode.Train let epochCount = 500 let batchSize = 32 -let numberOfBatch = int(ceil(Double(dataset.numTrainRecords) / double(batchSize))) +let numberOfBatch = int(ceil(double(dataset.numTrainRecords) / double(batchSize))) let shuffle = true -let meanAbsoluteError(predictions=Tensor, truths: Tensor) = - abs(Tensor(predictions - truths)).mean().toScalar() +let meanAbsoluteError(predictions: Tensor, truths: Tensor) = + abs(predictions - truths).mean().toScalar() +let meanSquaredError(predicted: Tensor, expected: Tensor) = + (predicted - expected) |> fun error -> (error * error).mean() -print("Starting training..") +printfn("Starting training..") for epoch in 1..epochCount do - let epochLoss: double = 0 - let epochMAE: double = 0 - let batchCount: int = 0 - let batchArray = Array.replicate false, count: numberOfBatch) + let mutable epochLoss: double = 0.0 + let mutable epochMAE: double = 0.0 + let mutable batchCount: int = 0 + let batchArray = Array.replicate numberOfBatch false for batch in 0..numberOfBatch-1 do - let r = batch + let mutable r = batch if shuffle then - while true do - r = Int.random(0..numberOfBatch-1) + let mutable continueLooping = true + while continueLooping do + r <- Random.Integer(0,numberOfBatch-1) if not batchArray.[r] then - batchArray.[r] = true - break + batchArray.[r] <- true + continueLooping <- false let batchStart = r * batchSize - let batchEnd = min(dataset.numTrainRecords, batchStart + batchSize) + let batchEnd = min dataset.numTrainRecords (batchStart + batchSize) let (loss, grad) = valueWithGradient<| fun model -> = (model: RegressionModel) = Tensor in let logits = model(dataset.xTrain[batchStart.. Date: Wed, 30 Dec 2020 18:53:38 -0700 Subject: [PATCH 4/4] remove forward calls. --- Examples/Regression-BostonHousing/main.fsx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Examples/Regression-BostonHousing/main.fsx b/Examples/Regression-BostonHousing/main.fsx index 45bf3c3..3949478 100644 --- a/Examples/Regression-BostonHousing/main.fsx +++ b/Examples/Regression-BostonHousing/main.fsx @@ -77,7 +77,7 @@ for epoch in 1..epochCount do optimizer.update(&model, along=grad) - let logits = model.forward(dataset.xTrain.[batchStart..batchEnd-1]) + let logits = model(dataset.xTrain.[batchStart..batchEnd-1]) epochMAE <- epochMAE + meanAbsoluteError(logits, dataset.yTrain.[batchStart..batchEnd-1]).toDouble() epochLoss <- epochLoss + loss.toScalar().toDouble() batchCount <- batchCount + 1 @@ -96,7 +96,7 @@ print("Evaluating model..") model.mode <- Mode.Eval -let prediction = model.forward(dataset.xTest) +let prediction = model(dataset.xTest) let evalMse = meanSquaredError(prediction, dataset.yTest).toScalar().toDouble()/double(dataset.numTestRecords) let evalMae = meanAbsoluteError(prediction, dataset.yTest).toDouble()/double(dataset.numTestRecords)