Packages

library(dplyr)
library(sf)
library(purrr)
library(magrittr)
library(lubridate)
library(tidyr)

Selecting the climatic stations

Downloading the polygons of the provinces of Lao PDR from GADM:

if (!dir.exists("data-raw")) dir.create("data-raw")
file <- "data-raw/gadm36_LAO_1_sf.rds"
if (!file.exists(file))
  download.file("https://biogeo.ucdavis.edu/data/gadm3.6/Rsf/gadm36_LAO_1_sf.rds", file)

Loading the polygons of the provinces of Lao PDR:

laos <- readRDS("data-raw/gadm36_LAO_1_sf.rds")

Loading the climatic stations:

stations <- readxl::read_excel("/Users/choisy/Dropbox/aaa/R_packages/tutiempo/data-raw/climatic stations.xlsx")

Transforming the data frame into an sf object:

stations_sf <- stations %>% 
  select(-elevation, -from) %>% 
  na.exclude() %>% 
  st_as_sf(coords = c("longitude", "latitude"), crs = 4326)

plotting:

plot(st_geometry(laos))
plot(st_geometry(stations_sf), add = TRUE)
axis(1)
axis(2)
abline(h = 17.5)
abline(h = 18.5)
abline(v = 102)
abline(v = 104)

Filtering:

selected_stations <- stations %>% 
  filter(102 < longitude, longitude < 104, 17.5 < latitude, latitude < 18.5) %>% 
  select(-from)

Downloading the data

The website TuTiempo.net contains meteorological and climatic data from many climatic stations around the world. Here we download all the daily data from the climatic station of Vientiane from 2010 to today. For that we need a number of utilitary functions that we start by defining.

The following function removes the last 2 lines of a matrix m:

rm_summaries <- function(m) {
  n <- nrow(m)
  m[-((n - 1):n), ]
}

The following function coerces a matrix m to a data frame using the first line for the variable names:

as.data.frame2 <- function(m) {
  setNames(as.data.frame(m, as.is = TRUE), m[1, ])[-1, ]
}

The following function downloads data from the URL url and organizes it into a data frame:

get_page <- function(url) {
  require(magrittr) # for the " %>% " operator
  print(url)
  url %>%
    xml2::read_html() %>%
    rvest::html_nodes(".mensuales td , th") %>%
    rvest::html_text() %>%
    matrix(ncol = 15, byrow = TRUE) %>%
    rm_summaries() %>%
    as.data.frame2()
}

A safe version of the get_page() function, trying the URL again and again if internet is interrupted and handling specific errors (e.g. 404):

safe_get_page <- function(..., error) {
  repeat {
    out <- purrr::safely(get_page)(...)
    if(is.null(out$error) || grepl(error, out$error)) return(out)
  }
}

The following function pads 1-digit numbers to 2-digit ones with zeros on the left:

pad <- function(x) {
  stringr::str_pad(as.character(x), 2, pad = "0")
}

The following function builds an URL from a year, a month and a station:

make_url <- function(year, month, station) {
  paste0("http://en.tutiempo.net/climate/",
         pad(month), "-", year, "/ws-", station, ".html")
}

Here is the main function that downloads the data for the station station:

download_data <- function(station, years, months = 1:12, error = "HTTP error 404") {
  require(magrittr) # for the " %>% " operator
  require(zeallot) # for the " %<-% " operator
  c(months, years) %<-% expand.grid(months, years)
  out <- purrr::map2(years, months, make_url, station = station) %>%
    purrr::map(safe_get_page, error = error) %>%
    purrr::transpose()
  out <- out$result %>%
    setNames(paste(years, pad(months), sep = "-")) %>%
    `[`(sapply(out$error, is.null)) %>%
    dplyr::bind_rows(.id = "ym") %>%
    dplyr::mutate(day = lubridate::ymd(paste(ym, pad(Day), sep = "-"))) %>%
    dplyr::select(-ym, -Day) %>%
    dplyr::select(day, dplyr::everything()) %>%
    dplyr::mutate_if(is.factor, as.character) %>%
    dplyr::mutate_at(dplyr::vars(T, TM, Tm, SLP, PP, VV, V, VM, VG), as.numeric) %>%
    dplyr::mutate_at(dplyr::vars(H), as.integer) %>%
    dplyr::mutate_at(dplyr::vars(RA, SN, TS, FG), function(x) x == "o")
  names(out) %<>%
    sub("^T$", "ta", .) %>%
    sub("TM", "tx", .) %>%
    sub("Tm", "tn", .) %>%
    tolower()
  out
}

The following funtion trim errors from a list return from a safe call:

trim_errors <- function(x) {
  lapply(x[which(sapply(x, function(y) is.null(y$error)))], function(y) y$result)
}

This function safely() calls the above-defined download_data() on a dataframe of stations names, year and month values:

make_meteo_data <- function(grid) {
  setNames(with(grid, Map(safely(download_data), station, year, month)),
           apply(grid, 1, paste, collapse = "-")) %>% 
  trim_errors() %>% 
  bind_rows(.id = "station") %>%
  mutate(station = sub("-\\d*-\\d*$", "", station)) %>%
  split(., .$station) %>% 
  lapply(left_join, x = data.frame(day = seq(ymd("2010-01-01"), ymd("2019-05-31"), 1)), by = "day") %>% 
  bind_rows() %>% 
  mutate_at(station, as.integer) %>% 
  select(station, day, everything(), vg) %>% 
  arrange(station, day) %>% 
  filter(! is.na(station)) %>% 
}

Downloading the data and writing them to disk:

if (!dir.exists("data")) dir.create("data")
bind_rows(expand.grid(selected_stations$station, 2010:2018, 1:12),
          expand.grid(selected_stations$station, 2019,      1:5)) %>% 
  setNames(c("station", "year", "month")) %>% 
  make_meteo_data() %>% 
  write.csv("data/meteo.csv", quote = FALSE, row.names = FALSE)

Writing the stations definitions to disk:

write.csv(selected_stations, "data/stations.csv", quote = FALSE, row.names = FALSE)