Intersect a vector data file of spatial zones (ecoregions, provisional
seed transfer zones, or other spatial partitions) with the range of a focal taxon
and select n zones to sample. If fewer than n zones exist, extra samples are
allocated using the specified method. If more than n zones exist, zones are selected
using the specified method.
Arguments
- x
sf object. Species range as a simple feature.
- zones
sf object. Spatial zones vector data (ecoregions, PSTZs, etc.).
- zone_key
Character. Column name identifying unique zones (required).
- n
Numeric. Desired total number of samples. Default = 20.
- decrease_method
Character. Method when n < number of zones. One of: "Largest", "Smallest", "Most", "Assist-warm", "Assist-drier". Default = "Largest".
- increase_method
Character. Method when n > number of zones. One of: "Largest", "Smallest", "Most", "Assist-warm", "Assist-drier". Default = "Largest".
- warmest_col
Character. Column name for warmest temperature metric (required for "Assist-warm" method). Higher values in this column assumed warmer.
- precip_col
Character. Column name for precipitation metric (required for "Assist-drier" method). Lower values in this column assumed drier.
Value
sf object with selected zones/polygons and an allocation column
indicating number of samples per polygon.
Details
Simple features can store polygon data as 'MULTIPOLYGON' (all polygons of a class stored collectively) or 'POLYGON' (each individual polygon is a unique entry). This function will cast MULTIPOLYGONS to POLYGONS as needed, but pre-casting will improve performance.
Available methods for zone selection:
Largest: Select zones by total area (descending)
Smallest: Select zones by total area (ascending)
Most: Select zones with most polygons (highest fragmentation)
Assist-warm: Select warmest zones (requires
warmest_col)Assist-drier: Select driest zones (requires
precip_col)
Examples
if (FALSE) { # \dontrun{
library(tidyverse)
sr_mat <- rbind(
c(0,0), c(10,0), c(10,10), c(0,10), c(0,0)
)
sr_poly <- sf::st_polygon(list(sr_mat))
x <- sf::st_sf(id = 1, geometry = sf::st_sfc(sr_poly))
rm(sr_mat, sr_poly)
zone_polys = data.frame(
## randomly generate some points in XY space.
x = runif(10, min = -2, max = 12),
y = runif(10, min = -2, max = 12)
) |>
# conver to spatial points
sf::st_as_sf(coords = c('x', 'y')) |>
## allocate XY space to it's nearest point
sf::st_union() |>
sf::st_voronoi() |>
## extract the contiguous pieces of XY space around points
sf::st_collection_extract('POLYGON') |>
sf::st_as_sf() |>
## make up seed zones on the fly, assign multiple polygons to some zones.
dplyr::mutate(pstz_key = sample(LETTERS[1:7], size = 10, replace = T)) |>
dplyr::rename('geometry' = x) |>
sf::st_crop(x)
bp <- ggplot2::ggplot(x) +
ggplot2::geom_sf(fill = NA, lwd = 2) +
ggplot2::geom_sf(data = zone_polys, ggplot2::aes(fill = pstz_key))
bp +
ggplot2::geom_sf_label(data = zone_polys, ggplot2::aes(label = pstz_key))
######################################################################
# example #1: request same numer of samples as zones - all zones returned.
res1 <- PolygonBasedSample(
x = x,
n = length(unique(zone_polys[['pstz_key']])),
zones = zone_polys,
zone_key = "pstz_key",
increase_method = "Most"
)
bp +
geom_sf(data = res1, alpha = 0.9) +
geom_sf_label(data = pstz,aes(label = pstz_key))
## note that we get the largest polygon from EACH group to sample from.
#####################################################################
# Example #2: request fewer samples than zones -> subset by method - choosing largest by area
res2 <- PolygonBasedSample(
x = x, n = 3, zones = zone_polys, zone_key = "pstz_key", increase_method = "Largest"
)
res2 |>
group_by(pstz_key) |>
mutate(total_area = sum(poly_area)) |>
sf::st_drop_geometry() |>
arrange(-total_area)|>
knitr::kable()
bp + # picks, the three largest
geom_sf(data = res2, alpha = 0.9) +
geom_sf_label(data = zone_polys, aes(label = pstz_key))
#######################################################################
# Example #3: request fewer samples than zones -> subset by method - choosing smallest by area
res3 <- PolygonBasedSample(
x = x, n = 3, zones = zone_polys, zone_key = "pstz_key", increase_method = "Smallest")
res3 |>
group_by(pstz_key) |>
mutate(total_area = sum(poly_area)) |>
sf::st_drop_geometry() |>
arrange(total_area) |>
knitr::kable()
## returns the largest polygon (poly_area) within the `pstz_key` group, ranked by (total_area)
bp + # picks, the n smallest - too small to see sometimes
geom_sf(data = filter(res3, allocation == 0), alpha = 0.9) +
geom_sf_label(data = zone_polys, aes(label = pstz_key))
####################################################################
# Example #4: request more samples than zones -> allocate extras to Largest pSTZs
## note that is really a rounding rule - 'Largest' favors giving extra collections to the largest
## polygons while 'smallest' favors giving them smaller polygons. It is really mostly for edge cases
## and the two will generally behave similarly on contrived examples.
res4 <- PolygonBasedSample(
x = x, n = 12, zones = zone_polys, zone_key = "pstz_key", increase_method = "Largest")
res4 |>
group_by(pstz_key) |>
summarize(total_area = sum(poly_area), Total_Allocation = sum(allocation)) |>
sf::st_drop_geometry() |>
arrange(-total_area) |>
knitr::kable()
bp +
theme(legend.position = 'none') +
geom_sf(data = res4, aes(fill = as.factor(allocation))) +
geom_sf_label(data = res4, aes(label = allocation))
####################################################################
# Example #5: request more samples than zones -> allocate extras to pSTZs with most polygons
res5 <- PolygonBasedSample(
x = x, n = 14, zones = zone_polys, zone_key = "pstz_key", increase_method = "Most")
res5 |>
group_by(pstz_key) |>
summarize(Count = n(), Total_Allocation = sum(allocation)) |>
sf::st_drop_geometry() |>
arrange(-Count) |>
knitr::kable()
} # }
