Suppose we obseve a dynamical system and we wish to estimate its parameters. One way of doing this is to use a Monte Carlo method. But if the system is chaotic then this approach is highly unlikely to work as the small changes in the proposals for the parameters will result in large changes in the path.
Here's a well-known chaotic dynamical system:
\begin{align} \frac{\mathrm{d}x}{\mathrm{d}t} &= \alpha (y - x), \\ \frac{\mathrm{d}y}{\mathrm{d}t} &= x (\rho - z) - y, \\ \frac{\mathrm{d}z}{\mathrm{d}t} &= x y - \beta z. \end{align}
And here is its representation in libbi together with some instructions to generate noisy values of the $x$ variable (we use the noisy values later).
library(readr)
model_file_name <- "LorenzGenerate.bi"
writeLines(read_file(model_file_name))
library('rbi')
library(ggplot2)
Lorenz <- bi_model(model_file_name)
T <- 10.0
nObs <- 100
init_parameters <- list(X = 1, Y = 1, Z = 1)
synthetic_dataset <- bi_generate_dataset(end_time=T, model=Lorenz,
init=init_parameters,
noutputs = nObs)
synthetic_data <- bi_read(synthetic_dataset)
synthetic_df <- as.data.frame(synthetic_data)
tail(synthetic_df)
p <- ggplot(synthetic_df, aes(X.time)) +
geom_path(aes(y = X.value, colour="alpha 16.0")) +
theme(legend.position="bottom") +
ggtitle("Lorenz") +
theme(plot.title = element_text(hjust = 0.5)) +
xlab("Time") +
ylab("X Value")
ggsave(filename = "diagrams/xpath.svg", plot = p)
We can check that the system becomes chaotic, in the sense that small changes in initial conditions lead to qualitatively different behaviour.
path0 <- ggplot() +
theme(legend.position="bottom") +
ggtitle("Lorenz") +
theme(plot.title = element_text(hjust = 0.5)) +
xlab("Time") +
ylab("Value")
set.seed(42)
T <- 20.0
for (i in c("red", "blue", "green")) {
init_parameters <- list(X = 1 + rnorm(1,0.0,0.01),
Y = 1 + rnorm(1,0.0,0.01),
Z = 1 + rnorm(1,0.0,0.01))
synthetic_dataset <- bi_generate_dataset(end_time=T, model=Lorenz,
init=init_parameters,
noutputs = nObs)
synthetic_data <- bi_read(synthetic_dataset)
synthetic_df <- as.data.frame(synthetic_data)
path0 <- path0 +
geom_line(data = synthetic_df, aes(x = X.time, y = X.value), color = i)
}
ggsave(filename = "diagrams/xpath4.svg", plot = path0)
Alternatively we can model the parameter as part of the state and assume that it undergoes Brownian Motion. This seems reasonable: the further we go into the future, the less certain we are about its value. An improvement might be to model it as an Ornstein-Uhlenbeck process which is mean-reverting - after all we don't expect the parameter to take on arbitrarily large or small values but let's learn to walk before we learn to run.
Since we expect our parameter to positive let's model it as geometric Brownian Motion.
\begin{equation} \mathrm{d}\alpha = \alpha\sigma\mathrm{d}W_t \end{equation}
By Itô's lemma, we have
\begin{equation} \mathrm{d}(\log \alpha) = -\frac{\sigma^2}{2}\mathrm{d}t + \sigma\mathrm{d}W_t \end{equation}
We can model this in libbi as:
model_file_name <- "LorenzState.bi"
writeLines(read_file(model_file_name))
We can then run the model.
LorenzState <- bi_model(model_file_name)
bi_state_model <- libbi(model=LorenzState)
bi_state <- filter(bi_state_model,
nparticles = 8192,
nthreads = 1,
end_time = T,
obs = synthetic_dataset,
init = init_parameters,
ess_rel = 1,
sample_obs = TRUE)
bi_file_summary(bi_state$output_file_name)
bi_state
summary(bi_state)
And then after some manipulation, draw the path of the "state" parameter.
output <- bi_read(bi_state)
logw <- xtabs(value ~ time + np, data = output$logweight, addNA = TRUE)
X <- output$X$value
Y <- output$Y$value
Z <- output$Z$value
A <- output$ln_alpha$value
log2normw <- function(lw){
w <- exp(lw - max(lw))
return(w / sum(w))
}
w = t(apply(X=logw, MARGIN=1, FUN=log2normw))
Xmeans = apply(X = X*w, MARGIN=1, FUN=sum)
Ymeans = apply(X = X*w, MARGIN=1, FUN=sum)
Zmeans = apply(X = Z*w, MARGIN=1, FUN=sum)
Ameans = apply(X = A*w, MARGIN=1, FUN=sum)
synthetic_data <- bi_read(synthetic_dataset)
X_original <- synthetic_data$X$value
Y_original <- synthetic_data$Y$value
Z_original <- synthetic_data$Z$value
synthetic_df <- as.data.frame(synthetic_data)
synthetic_df$Xmeans <- Xmeans
synthetic_df$Ymeans <- Ymeans
synthetic_df$Zmeans <- Zmeans
synthetic_df$Ameans <- Ameans
pAmeans <- ggplot(synthetic_df, aes(X.time)) +
geom_path(aes(y = exp(Ameans), colour="Ameans")) +
theme(legend.position="bottom") +
ggtitle("Lorenz") +
theme(plot.title = element_text(hjust = 0.5)) +
ylim(0.0, max(exp(Ameans))) +
xlab("Time") +
ylab("Value")
ggsave(filename = "diagrams/xpath5.svg", plot = pAmeans)
We can try inferring the parameter for the different chaotic solutions.
dataset_list <- list()
parameters_list <- list()
for (i in c(1,2,3)) {
init_parameters <- list(X = 1 + rnorm(1,0.0,0.01),
Y = 1 + rnorm(1,0.0,0.01),
Z = 1 + rnorm(1,0.0,0.01))
parameters_list[[i]] <- init_parameters
synthetic_dataset <- bi_generate_dataset(end_time=T, model=Lorenz,
init=init_parameters,
noutputs = nObs)
dataset_list[[i]] <- synthetic_dataset
}
X_list <- list()
Y_list <- list()
Z_list <- list()
A_list <- list()
for (i in c(1,2,3)) {
bi_state <- filter(bi_state_model, nparticles = 8192, nthreads = 1, end_time = T, obs = dataset_list[[i]], init = parameters_list[[i]], ess_rel = 1, sample_obs = TRUE)
output <- bi_read(bi_state)
logw <- xtabs(value ~ time + np, data = output$logweight, addNA = TRUE)
w = t(apply(X=logw, MARGIN=1, FUN=log2normw))
X <- output$X$value
Y <- output$Y$value
Z <- output$Z$value
A <- output$ln_alpha$value
X_list[[i]] = apply(X = X*w, MARGIN=1, FUN=sum)
Y_list[[i]] = apply(X = X*w, MARGIN=1, FUN=sum)
Z_list[[i]] = apply(X = Z*w, MARGIN=1, FUN=sum)
A_list[[i]] = apply(X = A*w, MARGIN=1, FUN=sum)
}
path2 <- ggplot() +
theme(legend.position="bottom") +
ggtitle("Lorenz") +
theme(plot.title = element_text(hjust = 0.5)) +
xlab("Time") +
ylab("Value")
for (i in c(1,2,3)) {
synthetic_data <- bi_read(dataset_list[[i]])
synthetic_df <- as.data.frame(synthetic_data)
synthetic_df$Ameans <- exp(A_list[[i]])
path2 <- path2 + geom_line(data = synthetic_df,
aes(x = X.time, y = Ameans), color = "blue")
}
ggsave(filename = "diagrams/xpath7.svg", plot = path2)
And if we take the mean of the tails then we get pretty decent estimates of the parameter.
x <- list()
for (i in c(1:3)) {
x[[i]] <- tail(exp(A_list[[i]]), n = 50)
}
for (i in 1:3) print(mean(x[[i]]))
for (i in 1:3) print(sd(x[[i]]))
This notebook can be downloaded from here