Skip to content

Some work on Boston Housing Example #3

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 27 additions & 23 deletions Examples/Regression-BostonHousing/main.fsx
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
open Datasets
open DiffSharp
open DiffSharp.Model
open DiffSharp.Util

// open Dataset
let dataset = BostonHousing()
Expand All @@ -42,47 +43,50 @@ model.mode <- Mode.Train

let epochCount = 500
let batchSize = 32
let numberOfBatch = int(ceil(Double(dataset.numTrainRecords) / double(batchSize)))
let numberOfBatch = int(ceil(double(dataset.numTrainRecords) / double(batchSize)))
let shuffle = true

let meanAbsoluteError(predictions=Tensor, truths: Tensor) =
abs(Tensor(predictions - truths)).mean().toScalar()
let meanAbsoluteError(predictions: Tensor, truths: Tensor) =
abs(predictions - truths).mean().toScalar()
let meanSquaredError(predicted: Tensor, expected: Tensor) =
(predicted - expected) |> fun error -> (error * error).mean()


print("Starting training..")
printfn("Starting training..")

for epoch in 1..epochCount do
let epochLoss: double = 0
let epochMAE: double = 0
let batchCount: int = 0
let batchArray = Array.replicate false, count: numberOfBatch)
let mutable epochLoss: double = 0.0
let mutable epochMAE: double = 0.0
let mutable batchCount: int = 0
let batchArray = Array.replicate numberOfBatch false
for batch in 0..numberOfBatch-1 do
let r = batch
let mutable r = batch
if shuffle then
while true do
r = Int.random(0..numberOfBatch-1)
let mutable continueLooping = true
while continueLooping do
r <- Random.Integer(0,numberOfBatch-1)
if not batchArray.[r] then
batchArray.[r] = true
break
batchArray.[r] <- true
continueLooping <- false

let batchStart = r * batchSize
let batchEnd = min(dataset.numTrainRecords, batchStart + batchSize)
let batchEnd = min dataset.numTrainRecords (batchStart + batchSize)
let (loss, grad) = valueWithGradient<| fun model -> = (model: RegressionModel) = Tensor in
let logits = model(dataset.xTrain[batchStart..<batchEnd])
meanSquaredError(predicted=logits, expected=dataset.yTrain[batchStart..<batchEnd])

optimizer.update(&model, along=grad)

let logits = model(dataset.xTrain[batchStart..<batchEnd])
epochMAE <- epochMAE + meanAbsoluteError(predictions=logits, truths: dataset.yTrain[batchStart..<batchEnd])
epochLoss <- epochLoss + loss.toScalar()
let logits = model(dataset.xTrain.[batchStart..batchEnd-1])
epochMAE <- epochMAE + meanAbsoluteError(logits, dataset.yTrain.[batchStart..batchEnd-1]).toDouble()
epochLoss <- epochLoss + loss.toScalar().toDouble()
batchCount <- batchCount + 1

epochMAE /= double(batchCount)
epochLoss /= double(batchCount)
epochMAE <- epochMAE / double(batchCount)
epochLoss <- epochLoss / double(batchCount)

if epoch = epochCount-1 then
print($"MSE: {epochLoss}, MAE: {epochMAE}, Epoch: \(epoch+1)")
printfn $"MSE: {epochLoss}, MAE: {epochMAE}, Epoch: {epoch+1}"



Expand All @@ -94,7 +98,7 @@ model.mode <- Mode.Eval

let prediction = model(dataset.xTest)

let evalMse = meanSquaredError(predicted=prediction, expected=dataset.yTest).toScalar()/double(dataset.numTestRecords)
let evalMae = meanAbsoluteError(predictions=prediction, truths: dataset.yTest)/double(dataset.numTestRecords)
let evalMse = meanSquaredError(prediction, dataset.yTest).toScalar().toDouble()/double(dataset.numTestRecords)
let evalMae = meanAbsoluteError(prediction, dataset.yTest).toDouble()/double(dataset.numTestRecords)

print($"MSE: {evalMse}, MAE: {evalMae}")
print($"MSE: {evalMse}, MAE: {evalMae}")
42 changes: 26 additions & 16 deletions Library/Datasets/BostonHousing/BostonHousing.fs
Original file line number Diff line number Diff line change
Expand Up @@ -32,40 +32,50 @@ type BostonHousing() =

if not (File.Exists(downloadPath)) || Directory.GetFiles(downloadPath) |> Array.isEmpty then
DatasetUtilities.downloadResource(
filename=downloadFile,
filename=Path.GetFileName(downloadFile),
remoteRoot= remoteURL, localStorageDirectory=downloadPath,
extract=false)
|> ignore

File.ReadAllText(downloadFile, Encoding.UTF8)

// Convert Space Separated CSV with no Header
let dataRecords = data.Split("\n") |> Array.map (fun s -> s.Split(" ") |> Array.map float)
let dataRecords =
data.Split("\n")
|> Array.filter(fun line -> line <> "")
|> Array.map (fun s ->
s.Split(" ")
|> Array.filter(fun x -> x <> "")
|> Array.map float)

let numRecords = dataRecords.Length
let numColumns = dataRecords.[0].Length
let nRecords = dataRecords.Length
let nColumns = dataRecords.[0].Length

let dataFeatures = dataRecords |> Array.map (fun arr -> arr.[0..numColumns - 2])
let dataLabels = dataRecords |> Array.map (fun arr -> arr.[(numColumns - 1)..])
let dataFeatures = dataRecords |> Array.map (fun arr -> arr.[0..nColumns - 2])
let dataLabels = dataRecords |> Array.map (fun arr -> arr.[(nColumns - 1)..])

// Normalize
let trainPercentage: double = 0.8

let numTrainRecords = int(ceil(double(numRecords) * trainPercentage))
let numTestRecords = numRecords - numTrainRecords
let nTrainRecords = int(ceil(double(nRecords) * trainPercentage))
let nTestRecords = nRecords - nTrainRecords

let xTrainPrelim = dataFeatures.[0..numTrainRecords-1] |> Array.concat
let xTestPrelim = dataFeatures.[numTrainRecords..] |> Array.concat
let yTrainPrelim = dataLabels.[0..numTrainRecords-1] |> Array.concat
let yTestPrelim = dataLabels.[numTrainRecords..] |> Array.concat
let xTrainPrelim = dataFeatures.[0..nTrainRecords-1] |> Array.concat
let xTestPrelim = dataFeatures.[nTrainRecords..] |> Array.concat
let yTrainPrelim = dataLabels.[0..nTrainRecords-1] |> Array.concat
let yTestPrelim = dataLabels.[nTrainRecords..] |> Array.concat

let xTrainDeNorm = dsharp.tensor(xTrainPrelim, dtype=Dtype.Float32).view([numTrainRecords; numColumns - 1])
let xTestDeNorm = dsharp.tensor(xTestPrelim, dtype=Dtype.Float32).view([numTestRecords; numColumns - 1])
let xTrainDeNorm = dsharp.tensor(xTrainPrelim, dtype=Dtype.Float32).view([nTrainRecords; nColumns - 1])
let xTestDeNorm = dsharp.tensor(xTestPrelim, dtype=Dtype.Float32).view([nTestRecords; nColumns - 1])

let mean = xTrainDeNorm.mean(dim=0)
let std = xTrainDeNorm.stddev(dim=0)

member val numRecords = nRecords
member val numColumns = nColumns
member val numTrainRecords = nTrainRecords
member val numTestRecords = nTestRecords
member val xTrain = (xTrainDeNorm - mean) / std
member val xTest = (xTestDeNorm - mean) / std
member val yTrain = dsharp.tensor(yTrainPrelim, dtype=Dtype.Float32).view([numTrainRecords; 1])
member val yTest = dsharp.tensor(yTestPrelim, dtype=Dtype.Float32).view([numTestRecords; 1])
member val yTrain = dsharp.tensor(yTrainPrelim, dtype=Dtype.Float32).view([nTrainRecords; 1])
member val yTest = dsharp.tensor(yTestPrelim, dtype=Dtype.Float32).view([nTestRecords; 1])
2 changes: 1 addition & 1 deletion Library/Datasets/DatasetUtilities.fs
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ type DatasetUtilities() =
let extract = defaultArg extract true
if not extract then
use wc = new WebClient()
wc.DownloadFile(remoteRoot, localFileName)
wc.DownloadFile(Uri(remoteRoot,filename), localFileName)
else
failwith "TBD" // let r = new BinaryReader(new GZipStream(File.Open(filename, FileMode.Open, FileAccess.Read, FileShare.Read), CompressionMode.Decompress))
localFileName
Expand Down