source("setup.R", echo = FALSE)
suppressPackageStartupMessages(library(tidysdm))
= get_bb(form = 'polygon')
bb = rnaturalearth::ne_coastline(scale = 'large', returnclass = 'sf') |>
coast ::st_geometry()
sf= read_obis() |>
obs ::filter(date >= as.Date("2000-01-01")) dplyr
Data thinning
1 Thinning the observations
Here we thin the data. The idea of thinning is to have one presence point per cell of the target raster output. For this purpose we’ll need not just the observation data, but also a raster of the desired extent and resolution. We have two rasterized covariate datasets we can load in, and then use one or the other as our template.
First, the observations…
And now the covariates…
= "data/oisst"
sst_path = oisster::read_database(sst_path) |>
sst_db ::arrange(date)
dplyr
= "data/nbs"
wind_path = nbs::read_database(wind_path) |>
wind_db ::arrange(date)
dplyr
= read_predictors(sst_db = sst_db,
preds windspeed_db = wind_db |> dplyr::filter(param == "windspeed"),
u_wind_db = wind_db |> dplyr::filter(param == "u_wind"),
v_wind_db = wind_db |> dplyr::filter(param == "v_wind"))
We actually provide a simpler method for loading the the predictor variables as raster. In the future you may see this preds = read_predictors(quick = TRUE)
.
We’ll take the first slice of sst as a template and convert it into a mask. We’ll also save the mask for later use.
= dplyr::slice(preds['sst'], "time", 1) |>
mask ::set_names("mask")|>
rlang::mutate(mask = factor(c("mask", NA_character_)[as.numeric(is.na(mask) + 1)],
dplyrlevels = "mask")) |>
::write_stars("data/mask/mask_factor.tif")
stars
plot(mask, breaks = "equal", axes = TRUE, reset = FALSE)
plot(sf::st_geometry(obs), pch = "+", add = TRUE)
plot(coast, col = "orange", add = TRUE)
Now we can thin using thin_by_cell()
. You can see the number of observations is greatly winnowed.
set.seed(1234)
<- tidysdm::thin_by_cell(obs, raster = mask)
thinned_obs plot(mask, breaks = "equal", axes = TRUE, reset = FALSE)
plot(sf::st_geometry(thinned_obs), pch = "+", add = TRUE)
plot(coast, col = "orange", add = TRUE)
Next is to thin again by separation distance. Note that now thins even more more observation points. We’ll also save these for later reuse.
<- tidysdm::thin_by_dist(thinned_obs, dist_min = km2m(20)) |>
thinned_obs ::write_sf("data/obs/thinned_obs.gpkg")
sfplot(mask, breaks = "equal", axes = TRUE, reset = FALSE)
plot(sf::st_geometry(thinned_obs), pch = "+", add = TRUE)
plot(coast, col = "orange", add = TRUE)