R: parallel execution using foreach and %dopar%

This post provides a quick introduction to parallel execution in R using foreach and %dopar%.




foreach and %dopar%


R offers foreach and %dopar% as one of the parallel execution. This post explores whether there's a time-saving advantage computationally.

In comparing a parallelized loop to a typical loop, I examine the grid search for a pair of decay parameters within the Nelson-Siegel-Svensson (NSS) model.

At first, extensive combinations of decay parameter pairs are generated, ensuring a minimum separation between parameters to address a multicollinearity problem.

Following that, the optimal parameter pair is found by minimizing the residual sum of squares (RSS) through two grid search methods: a conventional approach and a parallelized one.


Data, functions and combinations of decay parameters


The R code below reads data, defines functions associated with the NSS model, and generates comprehensive grids for two decay parameters:

#========================================================#
# R, Python, Financial Econometrics & Derivatives 
# Machine/Deep Learning, Tensorflow by Sang-Heon Lee 
#
# https://shleeai.blogspot.com
#--------------------------------------------------------#
# Parallel Grid search
# Period-by-Period OLS estimations of the NSS Model 
# with many combinations of two decay parameters
#========================================================#
 
graphics.off(); rm(list = ls())
setwd('your folder')
 
# data is located in the following address
# 'https://github.com/Financial-Times/yield-curve-sonification/blob/7d131970377380f118fb9a0e1626f5ed1ecd35ca/yield-curve-monthly-data.csv'
 
<- as.matrix(read.csv('yield-curve-monthly-data.csv')[,-1])
<- c(13612,  24366084120240360)
 
#=========================================== 
# NSS functions
#=========================================== 
 
# Svensson loadings
sv_factor_loading <-function(la, m) {
    return(cbind(rep(1,length(m)), 
        (1-exp(-la[1]*m))/(la[1]*m), 
        (1-exp(-la[1]*m))/(la[1]*m)-exp(-la[1]*m),
        (1-exp(-la[2]*m))/(la[2]*m)-exp(-la[2]*m)))
}
# OLS beta given lambda
est_sv_beta_reg <- function(la, y, m) {
    C    <- sv_factor_loading(la, m)
    beta <- solve(t(C)%*%C)%*%t(C)%*%y; 
    rss  <- sum((y - sv_factor_loading(la, m)%*%beta)^2)
    return(list(beta=beta, la=la, rss=rss))
}
 
# period-by-period Estimation
est_sv_beta_reg_ts <- function(la, y_all, m) {
    nobs <- nrow(y_all); nmat <- length(m)
    rss  <- rep(NA,nobs); beta <- matrix(NA, nobs, 4)
    for(t in 1:nobs) {
        lt <- est_sv_beta_reg(la,y_all[t,],m)
        beta[t,] <- lt$beta; rss[t] <- lt$rss}
    return(list(beta=beta, la=la, rss=rss))
}
 
#=========================================== 
# Two decay parameters for grid search
#=========================================== 
 
# Generate combinations of two decay parameters
comb <- expand.grid(de1 = 1:60, de2 = 1:120)
 
# a minimum difference of 24 months
comb <- comb[comb$de1 + 24 < comb$de2, ]
 
ncomb <- nrow(comb)
vrss  <- rep(NA, ncomb)
 
#-------------------------------------------
# function to be executed in a loop
#-------------------------------------------
func_in_loop <- function(i) {
    out_sv <- est_sv_beta_reg_ts(unlist(1.8/comb[i,]), y, m)
    return(sum(out_sv$rss))
}
 
cs



Conventional Grid search


The R code below finds the optimal pair of decay parameters using a typical grid search method, which took approximately 1 minute:

#=========================================== 
# Conventional grid search
#=========================================== 
 
start_time <- Sys.time()
 
# Iterate through each combination
for (i in 1:ncomb) {
    vrss[i] <- func_in_loop(i)
}
 
end_time <- Sys.time()
elased_time <- end_time - start_time
 
# Find the lambda pair that attains the lowest RSS
min_i  <- which.min(vrss); min_i
min_la <- unlist(1.8/comb[min_i,]); min_la
out_sv <- est_sv_beta_reg_ts(min_la, y, m)
 
# plot
x11(width = 16/2, height = 9/2.3)
matplot(out_sv$beta, type="l", lty=1, lwd=3,
        main = paste0("elased time = ", round(elased_time,2), " sec"))
legend("bottomright", pch = 16, col = 1:4, cex = 0.9,
       bty = "n", horiz=TRUE, legend=c("L","S","C1","C2"))
 
cs



Parallel Grid search


The R code below finds the optimal pair of decay parameters using foreach with %dopar%, one of the parallelized methods in R. It took less than half the time of the previous typical grid search.

#=========================================== 
# Parallel grid search
#=========================================== 
 
library(foreach)
library(doParallel)
 
#-------------------------------------------
# 1) number of cores to be used
#-------------------------------------------
numCores <- parallel::detectCores() - 1
#-------------------------------------------
# 2) Register parallel backend
#-------------------------------------------
myCluster <- parallel::makeCluster(numCores)
registerDoParallel(myCluster)
#-------------------------------------------
 
start_time <- Sys.time()
 
#------------------------------------------------
# 3) foreach iteration through each combination
#------------------------------------------------
vrss <- foreach (i=1:ncomb, .combine = c) %dopar% {
    func_in_loop(i)
}
 
#-------------------------------------------
# 4) Stop parallel backend
#-------------------------------------------
parallel::stopCluster(myCluster)
#-------------------------------------------
 
end_time <- Sys.time()
elased_time <- end_time - start_time
 
# Find the lambda pair that attains the lowest RSS
min_i  <- which.min(vrss); min_i
min_la <- unlist(1.8/comb[min_i,]); min_la
out_sv <- est_sv_beta_reg_ts(min_la, y, m)
 
# plot
x11(width = 16/2, height = 9/2.3)
matplot(out_sv$beta, type="l", lty=1, lwd=3,
        main = paste0("elased time (foreach with %dopar%)= "
                      round(elased_time,2), " sec"))
legend("bottomright", pch = 16, col = 1:4, cex = 0.9,
       bty = "n", horiz=TRUE, legend=c("L","S","C1","C2"))
 
cs


The optimal decay parameters are 0.1125 and 0.0439, which correspond to the 136th element of the combinations in both cases.

Indeed, while the grid search demonstrated in this post stands as a typical instance of independent execution, for your research, it would be worthwhile to attempt its application to different independent problems as well.



No comments:

Post a Comment