Preparing Model Input Data

cfg <- list(
  image_list = list(tox_levels = c(0,10,30,80),
                    forecast_steps = 1,
                    n_steps = 3,
                    minimum_gap = 4,
                    maximum_gap = 10,
                    multisample_weeks="last",
                    toxins = c("gtx4", "gtx1", "dcgtx3", "gtx5", "dcgtx2", "gtx3", 
                               "gtx2", "neo", "dcstx", "stx", "c1", "c2")))
psp <- read_psp_data(model_ready=TRUE) |>
  mutate(classification = psptools::recode_classification(.data$total_toxicity, c(0,10,30,80))) |>
  filter(species == "mytilus")

To get started, let’s call make_image_list() to carry out all the steps from Building training samples

image_list <- make_image_list(psp,cfg)

A look inside pool_images_and_labels()

Steps:

Find the dimensions of a single image

dim_image <- dim(image_list[[1]]$image)

dim_image
[1]  3 12

Bring all images into a single object

1. List of 2D image matrices -\> 

2. 3D matrix -\> 

3. 2D matrix of all images (dimensions: n images (rows), n weeks x n predictor variables (columns))

Use keras::array_reshape()

images <- lapply(image_list, function(x){return(x$image)})
image <- abind::abind(images, along = 3) |> 
    aperm(c(3, 1, 2)) |> 
    keras::array_reshape(c(length(image_list), prod(dim_image)))
dim(image)
[1] 7606   36

Replace any NA values

Change label to a categorical (keras::to_categorical())

Our model will predict probabilities for each of the possible classes, so the label for each image will be a vector containing 0s for the incorrect classes and a 1 for the correct one

labels <- sapply(image_list, function(x){return(x$classification)}) |> 
  keras::to_categorical(num_classes = 4)

Return a list containing the image input with labels and all of the metadata for each of the samples (location, date, actual toxicity, etc)

classifications <- sapply(image_list, function(x){return(x$classification)})
attr(classifications, "names") <- NULL

locations <- sapply(image_list, function(x){return(x$location_id)})
attr(locations, "names") <- NULL

dates <- sapply(image_list, function(x){return(x$date)})
attr(dates, "names") <- NULL

toxicity = sapply(image_list, function(x){x$toxicity})
attr(toxicity, "names") <- NULL

species <- sapply(image_list, function(x){x$species})
attr(species, "names") <- NULL
r <- list(labels = labels, 
          image = image, 
          classifications = classifications,
          toxicity = toxicity,
          species = species,
          locations = locations,
          dates = dates)

Here’s an example of one label

r$labels[1,]
[1] 0 1 0 0

The dimensions of our list of labels should be the length of the test set x the number of possible classes

dim(r$labels)
[1] 7606    4
r$toxicity[1]
[1] 12.69954
r$locations[1]
[1] "PSP10.011"
r$dates[1]
[1] 16237
r$species[1]
[1] "mytilus"