Vector addition is also frustrating. Say you write x=1:10
, i.e., make the vector
> x
[1] 1 2 3 4 5 6 7 8 9 10
Then x+1
gives
> x+1
[1] 2 3 4 5 6 7 8 9 10 11
i.e., adds 1 to all entries in the vector
Beware of this in particular when addressing sets of indices in lists, vectors or matrices
end
to access the last entry in a matrix/vector/list..length
(lists or vectors), nchar
(character chains), dim
(matrices.. careful, of course returns 2 values)if (condition is true) {
list of stuff to do
}
Even if list of stuff to do
is a single instruction, best to use curly braces
if (condition is true) {
list of stuff to do
} else if (another condition) {
...
} else {
...
}
for
applies to lists or vectors
for (i in 1:10) {
something using integer i
}
for (j in c(1,3,4)) {
something using integer j
}
for (n in c("truc", "muche", "chose")) {
something using string n
}
for (m in list("truc", "muche", "chose", 1, 2)) {
something using string n or integer n, depending
}
Applies a function to one of the dimensions of a matrix
M = matrix(data = runif(50), nr = 10)
> apply(M, 1, max)
[1] 0.9943167 0.9219764 0.3220949 0.9752947 0.7750777 0.7490136 0.9628084 0.9850463
[9] 0.9098879 0.9764303
> apply(M, 2, max)
[1] 0.9943167 0.9764303 0.9850463 0.7528144 0.9628084
If you need a funkier function, just make it up on the fly or define it (if it is too complex)
apply(M, 1, function(x) sum(x))
[1] 2.0285250 2.8906874 0.8427094 1.7375687 2.6387124 1.6453108 2.7476479 2.5075278
[9] 2.1175915 1.9980510
> apply(M, 2, function(x) sum(x))
[1] 3.396630 6.485475 3.990447 3.969723 3.312057
(There are functions rowSums
and colSums
, but you get the point)
Very useful function (a few others in the same spirit: sapply
, vapply
, mapply
)
Applies a function to each entry in a list/vector/matrix. Because there is a parallel version (parLapply
) that we will see later, worth learning
l = list()
for (i in 1:10) {
l[[i]] = runif(i)
}
lapply(X = l, FUN = mean)
or, to make a vector
unlist(lapply(X = l, FUN = mean))
or
sapply(X = l, FUN = mean)
Can "pick up" nontrivial list entries
l = list()
for (i in 1:10) {
l[[i]] = list()
l[[i]]$a = runif(i)
l[[i]]$b = runif(2*i)
}
sapply(X = l, FUN = function(x) length(x$b))
gives
[1] 2 4 6 8 10 12 14 16 18 20
Just recall: the argument to the function you define is a list entry (l[[1]]
, l[[2]]
, etc., here)
# Suppose we want to vary 3 parameters
variations = list(
p1 = seq(1, 10, length.out = 10),
p2 = seq(0, 1, length.out = 10),
p3 = seq(-1, 1, length.out = 10)
)
# Create the list
tmp = expand.grid(variations)
PARAMS = list()
for (i in 1:dim(tmp)[1]) {
PARAMS[[i]] = list()
for (k in 1:length(variations)) {
PARAMS[[i]][[names(variations)[k]]] = tmp[i, k]
}
}
There is still a loop, but you can split this list, use it on different machines, etc. And can use parLapply
Recent movement (5-10 years): governments (local or higher) create portals where data are centralised and published
library(wbstats)
pop_data_CTRY <- wb_data(country = "ZAF", indicator = "SP.POP.TOTL",
mrv = 100, return_wide = FALSE)
y_range = range(pop_data_CTRY$value)
y_axis <- make_y_axis(y_range)
png(file = "pop_ZAF.png",
width = 800, height = 400)
plot(pop_data_CTRY$date, pop_data_CTRY$value * y_axis$factor,
xlab = "Year", ylab = "Population", type = "b", lwd = 2,
yaxt = "n")
axis(2, at = y_axis$ticks, labels = y_axis$labels, las = 1)
dev.off()
crop_figure("pop_ZAF.png")
allTrees = read.csv("https://data.winnipeg.ca/api/views/hfwk-jp4h/ro
After this,
dim(allTrees)
## [1] 300846
15
elms_idx = grep("American Elm", allTrees$Common.Name, ignore.case = TRUE)
elms = allTrees[elms_idx, ]
We are left with 54,036 American elms
(Needs a relatively large machine here - about 50GB RAM)
elms_xy = cbind(elms$X, elms$Y)
D = dist(elms_xy)
idx_D = which(D<50)
indices_LT
is a large indices_LT[idx_D]
are the pairs under consideration
Keep a little more..
indices_LT_kept = as.data.frame(cbind(indices_LT[idx_D,],
D[idx_D]))
colnames(indices_LT_kept) = c("i","j","dist")
tree_locs_orig = cbind(elms_latlon$lon[indices_LT_kept$i],
elms_latlon$lat[indices_LT_kept$i])
tree_locs_dest = cbind(elms_latlon$lon[indices_LT_kept$j],
elms_latlon$lat[indices_LT_kept$j])
tree_pairs = do.call(
sf::st_sfc,
lapply(
1:nrow(tree_locs_orig),
function(i){
sf::st_linestring(
matrix(
c(tree_locs_orig[i,],
tree_locs_dest[i,]),
ncol=2,
byrow=TRUE)
)
}
)
)
library(tidyverse)
# Get bounding polygon for Winnipeg
bb_poly = osmdata::getbb(place_name = "winnipeg",
format_out = "polygon")
# Get roads
roads <- osmdata::opq(bbox = bb_poly) %>%
osmdata::add_osm_feature(key = 'highway',
value = 'residential') %>%
osmdata::osmdata_sf () %>%
osmdata::trim_osmdata (bb_poly)
# Get rivers
rivers <- osmdata::opq(bbox = bb_poly) %>%
osmdata::add_osm_feature(key = 'waterway',
value = "river") %>%
osmdata::osmdata_sf () %>%
osmdata::trim_osmdata (bb_poly)
st_crs(tree_pairs) = sf::st_crs(roads$osm_lines$geometry)
iroads = sf::st_intersects(x = roads$osm_lines$geometry,
y = tree_pairs)
irivers = sf::st_intersects(x = rivers$osm_lines$geometry,
y = tree_pairs)
tree_pairs_roads_intersect = c()
for (i in 1:length(iroads)) {
if (length(iroads[[i]])>0) {
tree_pairs_roads_intersect = c(tree_pairs_roads_intersect,
iroads[[i]])
}
}
tree_pairs_roads_intersect = sort(tree_pairs_roads_intersect)
to_keep = 1:dim(tree_locs_orig)[1]
to_keep = setdiff(to_keep,tree_pairs_roads_intersect)
dplyr
vs sqldf
dplyr
is part of the tidyverse
set of libraries. Load magrittr
and its pipe %>%
sqldf
allows to use SQL on dataframes.. interesting alternative if you know SQL
library(sqldf)
library(dplyr)
SARS = read.csv("../DATA/SARS-CoV-1_data.csv")
## Three ways to keep only the data for one country
ctry = "Canada"
# The basic one
idx = which(SARS$country == ctry)
SARS_selected = SARS[idx,]
# The sqldf way
SARS_selected = sqldf(paste0("SELECT * FROM SARS WHERE country = '",
ctry, "'"))
# The dplyr way
SARS_selected = SARS %>%
filter(country == ctry)
# Create incidence for the selected country. diff does difference one by one,
# so one less entry than the vector on which it is being used, thus we pad with a 0.
SARS_selected$incidence = c(0, diff(SARS_selected$totalNumberCases))
# Keep only positive incidences (discard 0 or negative adjustments)
SARS_selected = SARS_selected %>%
filter(incidence > 0)
# Plot the result
# Before plotting, we need to make the dates column we will use be actual dates..
SARS_selected$toDate = lubridate::ymd(SARS_selected$toDate)
EpiCurve(SARS_selected,
date = "toDate", period = "day",
freq = "incidence",
title = "SARS-CoV-1 incidence in Canada in 2003")
deSolve
:The functions provide an interface to the FORTRAN functions 'lsoda', 'lsodar', 'lsode', 'lsodes' of the 'ODEPACK' collection, to the FORTRAN functions 'dvode', 'zvode' and 'daspk' and a C-implementation of solvers of the 'Runge-Kutta' family with fixed or variable time steps
So you are benefiting from years and year of experience: ODEPACK is a set of Fortran (77!) solvers developed at Lawrence Livermore National Laboratory (LLNL) starting in the late 70s
Other good solvers are also included, those written in C
Refer to the package help for details
As with more numerical solvers, you need to write a function returning the value of the right hand side of your equation (the vector field) at a given point in phase space, then call this function from the solver
library(deSolve)
rhs_logistic <- function(t, x, p) {
with(as.list(x), {
dN <- p$r * N *(1-N/p$K)
return(list(dN))
})
}
params = list(r = 0.1, K = 100)
IC = c(N = 50)
times = seq(0, 100, 1)
sol <- ode(IC, times, rhs_logistic, params)
This also works: add p
to arguments of as.list
and thus use without p$
prefix
library(deSolve)
rhs_logistic <- function(t, x, p) {
with(as.list(c(x, p)), {
dN <- r * N *(1-N/K)
return(list(dN))
})
}
params = list(r = 0.1, K = 100)
IC = c(N = 50)
times = seq(0, 100, 1)
sol <- ode(IC, times, rhs_logistic, params)
In this case, beware of not having a variable and a parameter with the same name..
lsoda
lsoda
switches automatically between stiff and nonstiff methods
You can also specify other methods: "lsode", "lsodes", "lsodar", "vode", "daspk", "euler", "rk4", "ode23", "ode45", "radau", "bdf", "bdf_d", "adams", "impAdams" or "impAdams_d" ,"iteration" (the latter for discrete-time systems)
ode(y, times, func, parms,
method = "ode45")