<- read_psp_data(fix_species=TRUE) |>
psp mutate(year = format(date, format="%Y")) |>
filter(species == "mytilus")
Preparing Model Input Data
To get started, let’s call make_image_list()
to carry out all the steps from Building training samples
<- make_image_list(psp,
image_list tox_levels = c(0,10,30,80),
forecast_steps = 1,
n_steps = 2,
minimum_gap=4,
maximum_gap=10,
toxins=c("gtx4","gtx1","dcgtx3","gtx5","dcgtx2","gtx3","gtx2","neo","dcstx","stx","c1","c2"),
environmentals=c())
A look inside pool_images_and_labels()
Steps:
Find the dimensions of a single image
<- dim(image_list[[1]]$image)
dim_image
dim_image
[1] 2 12
Bring all images into a single object
1. List of 2D image matrices -\>
2. 3D matrix -\>
3. 2D matrix of all images (dimensions: n images (rows), n weeks x n predictor variables (columns))
Use keras::array_reshape()
<- lapply(image_list, function(x){return(x$image)}) images
<- abind::abind(images, along = 3) |>
image aperm(c(3, 1, 2)) |>
::array_reshape(c(length(image_list), prod(dim_image))) keras
dim(image)
[1] 8241 24
Replace any NA values
Change label to a categorical (keras::to_categorical()
)
Our model will predict probabilities for each of the possible classes, so the label for each image will be a vector containing 0s for the incorrect classes and a 1 for the correct one
<- sapply(image_list, function(x){return(x$classification)}) |>
labels ::to_categorical(num_classes = 4) keras
Return a list containing the image input with labels and all of the metadata for each of the samples (location, date, actual toxicity, etc)
<- sapply(image_list, function(x){return(x$classification)})
classifications attr(classifications, "names") <- NULL
<- sapply(image_list, function(x){return(x$location_id)})
locations attr(locations, "names") <- NULL
<- sapply(image_list, function(x){return(x$date)})
dates attr(dates, "names") <- NULL
= sapply(image_list, function(x){x$toxicity})
toxicity attr(toxicity, "names") <- NULL
<- list(labels = labels,
r image = image,
classifications = classifications,
toxicity = toxicity,
locations = locations,
dates = dates)
Here’s an example of one label
$labels[1,] r
[1] 1 0 0 0
The dimensions of our list of labels should be the length of the test set x the number of possible classes
dim(r$labels)
[1] 8241 4
$toxicity[1] r
[1] 7.637664
$locations[1] r
[1] "PSP10.011"
$dates[1] r
[1] 16216