H. Sherry Zhang and Prof Di Cook
2022-12-06
Introduce the S3 class cubble to arrange spatio-temporal data
An example of importing COVID 19 data into cubble
Operations on cubble in space and time
Making glyph map to visualise Australian weather data
(cb <- as_cubble(
list(spatial = stations_sf, temporal = ts),
key = id, index = date, coords = c(long, lat)
))
# cubble: id [30]: nested form [sf]
# bbox: [114.09, -41.88, 152.87, -11.65]
# temporal: date [date], prcp [dbl], tmax [dbl], tmin [dbl]
id lat long elev name wmo_id geometry ts
<chr> <dbl> <dbl> <dbl> <chr> <dbl> <POINT [°]> <list>
1 ASN00003057 -16.5 123. 7 cygne… 94201 (123.0086 -16.4514) <tibble>
2 ASN00005007 -22.2 114. 5 learm… 94302 (114.0967 -22.2406) <tibble>
3 ASN00005084 -21.5 115. 5 theve… 94303 (115.0197 -21.4606) <tibble>
4 ASN00010515 -32.1 117. 199 bever… 95615 (116.9247 -32.1083) <tibble>
5 ASN00012314 -27.8 121. 497 leins… 95448 (120.7031 -27.8386) <tibble>
# … with 25 more rows
long form
# cubble: date, id [30]: long form
# bbox: [114.09, -41.88, 152.87, -11.65]
# spatial: lat [dbl], long [dbl], elev [dbl],
# name [chr], wmo_id [dbl], geometry [POINT
# [°]]
id date prcp tmax tmin
<chr> <date> <dbl> <dbl> <dbl>
1 ASN00003057 2020-01-01 0 36.7 26.9
2 ASN00003057 2020-01-02 41 34.2 24
3 ASN00003057 2020-01-03 0 35 25.4
4 ASN00003057 2020-01-04 40 29.1 25.4
5 ASN00003057 2020-01-05 1640 27.3 24.3
# … with 10,627 more rows
back to the nested form:
# cubble: id [30]: nested form [sf]
# bbox: [114.09, -41.88, 152.87, -11.65]
# temporal: date [date], prcp [dbl], tmax [dbl],
# tmin [dbl]
id lat long elev name wmo_id
<chr> <dbl> <dbl> <dbl> <chr> <dbl>
1 ASN00003057 -16.5 123. 7 cygnet bay 94201
2 ASN00005007 -22.2 114. 5 learmonth … 94302
3 ASN00005084 -21.5 115. 5 thevenard … 94303
4 ASN00010515 -32.1 117. 199 beverley 95615
5 ASN00012314 -27.8 121. 497 leinster a… 95448
# … with 25 more rows, and 2 more variables:
# geometry <POINT [°]>, ts <list>
[1] TRUE
# A tsibble: 6 x 5 [1D]
# Key: lga [1]
# Groups: lga, source [1]
date lga source n roll_mean
<date> <chr> <chr> <int> <dbl>
1 2022-01-01 Alpine (S) Contact with a confirmed case 1 NA
2 2022-01-02 Alpine (S) Contact with a confirmed case 2 NA
3 2022-01-03 Alpine (S) Contact with a confirmed case 4 NA
4 2022-01-04 Alpine (S) Contact with a confirmed case 4 NA
5 2022-01-05 Alpine (S) Contact with a confirmed case 2 NA
# … with 1 more row
#install from remotes::install_github("runapp-aus/strayr")
lga <- strayr::read_absmap("lga2018") |>
rename(lga = lga_name_2018) |>
filter(state_name_2016 == "Victoria")
head(lga)
Simple feature collection with 6 features and 7 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: 142.3535 ymin: -38.67876 xmax: 147.3909 ymax: -36.39269
Geodetic CRS: WGS 84
lga_code_2018 lga state_code_2016 state_name_2016 areasqkm_2018 cent_long cent_lat
132 20110 Alpine (S) 2 Victoria 4788.1568 146.9742 -36.85357
133 20260 Ararat (RC) 2 Victoria 4211.1171 142.8432 -37.47271
134 20570 Ballarat (C) 2 Victoria 739.0321 143.7815 -37.49286
135 20660 Banyule (C) 2 Victoria 62.5402 145.0851 -37.73043
136 20740 Bass Coast (S) 2 Victoria 865.8095 145.5581 -38.50730
137 20830 Baw Baw (S) 2 Victoria 4027.6287 146.1315 -37.98399
geometry
132 MULTIPOLYGON (((146.7258 -3...
133 MULTIPOLYGON (((143.1807 -3...
134 MULTIPOLYGON (((143.6622 -3...
135 MULTIPOLYGON (((145.1357 -3...
136 MULTIPOLYGON (((145.5207 -3...
137 MULTIPOLYGON (((145.765 -37...
(cb <- as_cubble(
list(spatial = lga, temporal = covid),
key = lga, index = date, coords = c(cent_long, cent_lat)))
! Some sites in the temporal table don't have spatial information
! Some sites in the spatial table don't have temporal information
! Use argument `output = "unmatch"` to check on the unmatched key
# cubble: lga [78]: nested form [sf]
# bbox: [141.27, -38.63, 148.3, -34.86]
# temporal: date [date], source [chr], n [int], roll_mean [dbl]
lga_code_2018 lga state…¹ state…² areas…³ cent_…⁴ cent_…⁵ ts geometry
<chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <list> <MULTIPOLYGON [°]>
1 20110 Alpine… 2 Victor… 4788. 147. -36.9 <gropd_ts> (((146.7258 -36.45922, 1…
2 20260 Ararat… 2 Victor… 4211. 143. -37.5 <gropd_ts> (((143.1807 -37.73152, 1…
3 20570 Ballar… 2 Victor… 739. 144. -37.5 <gropd_ts> (((143.6622 -37.57241, 1…
4 20660 Banyul… 2 Victor… 62.5 145. -37.7 <gropd_ts> (((145.1357 -37.74091, 1…
5 20740 Bass C… 2 Victor… 866. 146. -38.5 <gropd_ts> (((145.5207 -38.30667, 1…
# … with 73 more rows, and abbreviated variable names ¹state_code_2016, ²state_name_2016,
# ³areasqkm_2018, ⁴cent_long, ⁵cent_lat
(pair <- as_cubble(
list(spatial = lga, temporal = covid),
key = lga, index = date, coords = c(cent_long, cent_lat),
output = "unmatch"))
$paired
# A tibble: 80 × 2
spatial temporal
<chr> <chr>
1 Alpine (S) Alpine (S)
2 Ararat (RC) Ararat (RC)
3 Ballarat (C) Ballarat (C)
4 Banyule (C) Banyule (C)
5 Bass Coast (S) Bass Coast (S)
# … with 75 more rows
$others
$others$temporal
[1] "Interstate" "Overseas" "Unknown"
$others$spatial
[1] "No usual address (Vic.)" "Migratory - Offshore - Shipping (Vic.)"
# A tibble: 6 × 2
spatial temporal
<chr> <chr>
1 Wyndham (C) Wyndham (C)
2 Yarra (C) Yarra (C)
3 Yarra Ranges (S) Yarra Ranges (S)
4 Yarriambiack (S) Yarriambiack (S)
5 Kingston (C) (Vic.) Kingston (C)
# … with 1 more row
lga <- lga |>
mutate(lga = ifelse(lga == "Kingston (C) (Vic.)", "Kingston (C)", lga),
lga = ifelse(lga == "Latrobe (C) (Vic.)", "Latrobe (C)", lga)) |>
filter(!lga %in% pair$others$spatial)
covid <- covid |> filter(!lga %in% pair$others$temporal)
(cb <- as_cubble(data = list(spatial = lga, temporal = covid),
key = lga, index = date, coords = c(cent_long, cent_lat)))
# cubble: lga [80]: nested form [sf]
# bbox: [141.27, -38.63, 148.3, -34.86]
# temporal: date [date], source [chr], n [int], roll_mean [dbl]
lga_code_2018 lga state…¹ state…² areas…³ cent_…⁴ cent_…⁵ ts geometry
<chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <list> <MULTIPOLYGON [°]>
1 20110 Alpine… 2 Victor… 4788. 147. -36.9 <gropd_ts> (((146.7258 -36.45922, 1…
2 20260 Ararat… 2 Victor… 4211. 143. -37.5 <gropd_ts> (((143.1807 -37.73152, 1…
3 20570 Ballar… 2 Victor… 739. 144. -37.5 <gropd_ts> (((143.6622 -37.57241, 1…
4 20660 Banyul… 2 Victor… 62.5 145. -37.7 <gropd_ts> (((145.1357 -37.74091, 1…
5 20740 Bass C… 2 Victor… 866. 146. -38.5 <gropd_ts> (((145.5207 -38.30667, 1…
# … with 75 more rows, and abbreviated variable names ¹state_code_2016, ²state_name_2016,
# ³areasqkm_2018, ⁴cent_long, ⁵cent_lat
(cb_tm <- cb_space %>%
face_temporal() %>%
mutate(month = lubridate::month(date)) %>%
group_by(month) %>%
summarise(tmax = mean(tmax, na.rm = TRUE))
)
# cubble: month, id [20]: long form
# bbox: [114.09, -41.65, 152.72, -11.65]
# spatial: lat [dbl], long [dbl], elev [dbl], name [chr], wmo_id [dbl], geometry [POINT [°]]
id month tmax
<chr> <dbl> <dbl>
1 ASN00005007 1 38.8
2 ASN00005007 2 37.5
3 ASN00005007 3 38.1
4 ASN00005007 4 35.8
5 ASN00005007 5 28.6
# … with 235 more rows
Reference temporal variables with $
Simple feature collection with 30 features and 8 fields
Geometry type: POINT
Dimension: XY
Bounding box: xmin: 114.0967 ymin: -41.8739 xmax: 152.8655 ymax: -11.6502
Geodetic CRS: GDA94
# cubble: id [30]: nested form [sf]
# bbox: [114.09, -41.88, 152.87, -11.65]
# temporal: date [date], prcp [dbl], tmax [dbl], tmin [dbl]
id lat long elev name wmo_id geometry ts avg_tmax
* <chr> <dbl> <dbl> <dbl> <chr> <dbl> <POINT [°]> <list> <dbl>
1 ASN00003057 -16.5 123. 7 cygnet bay 94201 (123.0086 -16.4514) <tibble> 32.4
2 ASN00005007 -22.2 114. 5 learmonth airport 94302 (114.0967 -22.2406) <tibble> 33.2
3 ASN00005084 -21.5 115. 5 thevenard island 94303 (115.0197 -21.4606) <tibble> 30.7
4 ASN00010515 -32.1 117. 199 beverley 95615 (116.9247 -32.1083) <tibble> 26.4
5 ASN00012314 -27.8 121. 497 leinster aero 95448 (120.7031 -27.8386) <tibble> 29.6
# … with 25 more rows
Move spatial variables into the long form
# cubble: date, id [30]: long form
# bbox: [114.09, -41.88, 152.87, -11.65]
# spatial: lat [dbl], long [dbl], elev [dbl], name [chr], wmo_id [dbl], geometry [POINT [°]]
id date prcp tmax tmin long lat
<chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl>
1 ASN00003057 2020-01-01 0 36.7 26.9 123. -16.5
2 ASN00003057 2020-01-02 41 34.2 24 123. -16.5
3 ASN00003057 2020-01-03 0 35 25.4 123. -16.5
4 ASN00003057 2020-01-04 40 29.1 25.4 123. -16.5
5 ASN00003057 2020-01-05 1640 27.3 24.3 123. -16.5
# … with 10,627 more rows
cb <- as_cubble(
list(spatial = stations_sf, temporal = ts),
key = id, index = date, coords = c(long, lat)
)
set.seed(0927)
cb_glyph <- cb %>%
slice_sample(n = 20) %>%
face_temporal() %>%
mutate(month = lubridate::month(date)) %>%
group_by(month) %>%
summarise(tmax = mean(tmax, na.rm = TRUE)) %>%
unfold(long, lat)
ggplot() +
geom_sf(data = oz_simp,
fill = "grey95",
color = "white") +
geom_glyph(
data = cb_glyph,
aes(x_major = long, x_minor = month,
y_major = lat, y_minor = tmax),
width = 2, height = 0.7) +
ggthemes::theme_map()