<- make_image_list(psp,
image_list tox_levels = c(0,10,30,80),
forecast_steps = 1,
n_steps = 3,
minimum_gap=4,
maximum_gap=10,
toxins=c("gtx4","gtx1","dcgtx3","gtx5","dcgtx2","gtx3","gtx2","neo","dcstx","stx","c1","c2"),
environmentals=c())
Mainking predictions with the forecast model
Extract the train and test years out of the configuration file
#Splits image_list by year for grouping into train/test data
<- sapply(image_list, function(x) {return(x$year)})
years <- split(image_list, years)
image_list
#configuration
<- c("2015", "2016", "2017", "2018", "2019", "2020", "2021", "2022", "2023", "2024")
TRAIN <- c("2014") TEST
Prepare the train and test sets by calling pool_images_and_labels()
<- pool_images_and_labels(image_list[TRAIN],
train cfg = cfg,
downsample = FALSE,
upsample = FALSE)
<- pool_images_and_labels(image_list[TEST],
test cfg = cfg,
upsample = FALSE)
<- list(train = train,
model_input test = test)
Get the dimensions of one input sample. We’ll tell the model what to expect for the second dimension
<- dim(model_input$test$image) dim_test
Now define the model architecture:
Input layer
One hidden layer
Output layer
<- keras::keras_model_sequential() |>
model ::layer_dense(units = 32,
kerasactivation = "relu",
input_shape = dim_test[2],
name = "layer1") |>
::layer_dropout(rate = 0.3) |>
keras::layer_dense(units = 32,
kerasactivation = "relu",
name = "layer2") |>
::layer_dropout(rate = 0.3) |>
keras::layer_dense(units = 4,
kerasactivation = "softmax")
model
Model: "sequential"
________________________________________________________________________________
Layer (type) Output Shape Param #
================================================================================
layer1 (Dense) (None, 32) 1184
________________________________________________________________________________
dropout_1 (Dropout) (None, 32) 0
________________________________________________________________________________
layer2 (Dense) (None, 32) 1056
________________________________________________________________________________
dropout (Dropout) (None, 32) 0
________________________________________________________________________________
dense (Dense) (None, 4) 132
================================================================================
Total params: 2,372
Trainable params: 2,372
Non-trainable params: 0
________________________________________________________________________________
Compile the model we defined
<- model |>
model ::compile(optimizer = "adam",
kerasloss = "categorical_crossentropy",
metrics = "categorical_accuracy")
Fit the training data to the model (or the model to the training data?)
<- model |>
history ::fit(x = model_input$train$image,
kerasy = model_input$train$labels,
batch_size = 32,
epochs = 128,
validation_split = 0.2,
shuffle = TRUE,
verbose = 0)
If you want to dig into what happened during training, everything is saved into the variable history
str(history)
List of 2
$ params :List of 3
..$ verbose: int 0
..$ epochs : int 128
..$ steps : int 149
$ metrics:List of 4
..$ loss : num [1:128] 0.908 0.622 0.576 0.549 0.524 ...
..$ categorical_accuracy : num [1:128] 0.802 0.806 0.81 0.812 0.816 ...
..$ val_loss : num [1:128] 0.592 0.538 0.506 0.486 0.469 ...
..$ val_categorical_accuracy: num [1:128] 0.822 0.828 0.829 0.833 0.837 ...
- attr(*, "class")= chr "keras_training_history"
<- model |>
metrics ::evaluate(x = model_input$test$image,
kerasy = model_input$test$label)
1/42 [..............................] - ETA: 3s - loss: 0.5366 - categorical_accuracy: 0.8125
42/42 [==============================] - 0s 805us/step - loss: 0.6684 - categorical_accuracy: 0.7686
1:2] metrics[
loss categorical_accuracy
0.6684472 0.7686012
<- model |>
predicted_probs predict(model_input$test$image)
dim(predicted_probs)
[1] 1344 4
1:5,] predicted_probs[
[,1] [,2] [,3] [,4]
[1,] 0.9893402 0.0091348179 1.221180e-03 3.038650e-04
[2,] 0.9475452 0.0437280498 7.211816e-03 1.514922e-03
[3,] 0.9789481 0.0187224559 1.998076e-03 3.313504e-04
[4,] 0.9989892 0.0009272412 6.850111e-05 1.510272e-05
[5,] 0.9915884 0.0074802847 7.771981e-04 1.541969e-04
<- list(metrics = metrics,
test year = cfg$train_test$test,
dates = model_input$test$dates,
locations = model_input$test$locations,
test_classifications = model_input$test$classifications,
test_toxicity = model_input$test$toxicity,
predicted_probs = predicted_probs)
Using the model output, we can make a nice forecast table. In this example, we’re hindcasting so we will add the prediction and result to compare
<- make_forecast_list(cfg, test)
f
glimpse(f)
Rows: 1,344
Columns: 16
Rowwise:
$ version <chr> "test", "test", "test", "test", "test", "test", "t…
$ location <chr> "PSP24.13", "PSP12.03", "PSP27.02", "PSP12.34", "P…
$ date <date> 2014-09-09, 2014-04-28, 2014-08-18, 2014-08-12, 2…
$ name <chr> "Moose Neck", "Potts Pt.", "Johnsons Bay", "Head B…
$ lat <dbl> 44.49714, 43.73064, 44.85190, 43.71711, 43.78556, …
$ lon <dbl> -67.71252, -70.02556, -66.99979, -69.84999, -69.87…
$ class_bins <chr> "0,10,30,80", "0,10,30,80", "0,10,30,80", "0,10,30…
$ forecast_start_date <date> 2014-09-13, 2014-05-02, 2014-08-22, 2014-08-16, 2…
$ forecast_end_date <date> 2014-09-19, 2014-05-08, 2014-08-28, 2014-08-22, 2…
$ actual_class <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 2, 0, 0, 1,…
$ actual_toxicity <dbl> 0.0000000, 0.6160263, 5.8335250, 3.3931480, 0.9289…
$ prob_0 <dbl> 98.934025, 94.754517, 97.894806, 99.898916, 99.158…
$ prob_1 <dbl> 0.91348179, 4.37280498, 1.87224559, 0.09272412, 0.…
$ prob_2 <dbl> 0.122117985, 0.721181603, 0.199807575, 0.006850111…
$ prob_3 <dbl> 0.030386497, 0.151492166, 0.033135043, 0.001510272…
$ predicted_class <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 2, 0, 0, 0,…
We can also use a confusion matrix to assess the skill of the model we built on a class-by-class basis
make_confusion_matrix(cfg, f)