Event returns, R, and PostgreSQL

Here is another nice illustration of the performance of R+PostgreSQL. The routine at the bottom of this post calculates event returns (raw, size-adjusted, and relative to value-weighted CRSP, and allowing for delisting) from a list of events (PERMNOs and dates). I seem to recall that doing this in SAS took somewhat longer.

library(RPostgreSQL)
drv <- dbDriver("PostgreSQL")
pg <- dbConnect(drv, db="crsp")
# Pull a random sample of 5,000 "events" from CRSP daily file
source("~/Dropbox/research/activism/code/getEventReturnsDaily.R")
events <- dbGetQuery(pg,"
    SET work_mem='10GB';

    SELECT permno, date 
    FROM crsp.dsf 
    WHERE date BETWEEN '2001-01-01' AND '2010-09-30'
    ORDER BY RANDOM()
    LIMIT 5000")
system.time(ret.data <- getEventReturns(events$permno, events$date,
                                        days.before=-3, days.after=3))

Here is output from the code above:

Initializing database connection...                   0.01 seconds
Preparing data for upload to database...              0.00 seconds
Writing list of PERMNOs and event dates to database...0.32 seconds
Identifying relevant trading dates...                 0.19 seconds
Compounding raw returns...                            3.38 seconds
Compounding value-weighted market returns...          0.16 seconds
Compounding size-decile market returns...             0.15 seconds
   user  system elapsed 
  0.210   0.020   4.295 
> head(ret.data)
  permno event_date          ret        ret_sz      ret_mkt
1  85889 2007-11-21 -0.011986030  0.0230668629  0.005780422
2  10937 2005-03-11 -0.029125053  0.0007138083 -0.001866695
3  90058 2007-02-23  0.028571053  0.0391829777  0.059063347
4  90592 2006-07-05 -0.007624306 -0.0299885204 -0.026608077
5  85629 2003-08-08  0.013635199  0.0065366380  0.009632534
6  88741 2001-03-29 -0.024055291            NA  0.013400009

# Load up the SQLite driver, create a connection to the database
library(RPostgreSQL)

# The following function takes a list of permnos and event dates, then for each
# calls the function above to get event returns for each PERMNO-event date
# combination.
getEventReturns <- function(permno, event.date, days.before=0, days.after=0,
	event.date.begin=event.date, event.date.end=event.date, label="ret") {

	permno <- as.integer(permno)

	cat("Initializing database connection...                   ")
	timer <- Sys.time()
	drv <- dbDriver("PostgreSQL")
	crsp <- dbConnect(drv, dbname = "crsp")

 	# This is some code to handle the case in which beginning AND ending event dates
	# are supplied. The default case (when just "" is specified) is a
	# single event date.
	# If two dates are supplied, then the return is calculated from
	# event.date.begin + days.before until event.date.end + days.after.
	# To give effect to this, I calculate days.after for the underlying function so
	# as to have the return window run through to the desired point, then call the
	# basic function below.
	# rs <- dbGetQuery(crsp,"SET search_path TO temp")
	diff <- as.numeric(Sys.time() - timer); timer <- Sys.time();
	cat(sprintf("%0.2f seconds\nPreparing data for upload to database...              ",diff))
	event.date.end <- as.Date(event.date.end)
	event.date.begin <- as.Date(event.date.begin)

	if (!identical(event.date.begin,event.date.end)) {
		event.date <- event.date.begin
		has.days.after.vector <- TRUE
	} else {
		has.days.after.vector <- FALSE
		event.date <- as.Date(event.date)
	}

	temp <- data.frame(permno, event_date=event.date)
	temp <- cbind(temp, days_before=days.before, days_after=days.after,
                  event.date.begin,event.date.end)

	# dbGetQuery(crsp,"DROP TABLE IF EXISTS permnos")
	diff <- as.numeric(Sys.time() - timer); timer <- Sys.time();
	cat(sprintf("%0.2f seconds\nWriting list of PERMNOs and event dates to database...",diff))
	dbWriteTable(crsp,"permnos",
                 subset(temp, subset=!is.na(permno) & !is.na(event.date.begin)),
                 row.names=TRUE,overwrite=TRUE)

	dbGetQuery(crsp,"ALTER TABLE permnos ALTER permno TYPE bigint")
	dbGetQuery(crsp,"ALTER TABLE permnos ADD COLUMN begin_td bigint")
	dbGetQuery(crsp,"ALTER TABLE permnos ADD COLUMN end_td bigint")
	dbGetQuery(crsp,"ALTER TABLE permnos ADD COLUMN begin_date DATE")
	dbGetQuery(crsp,"ALTER TABLE permnos ADD COLUMN end_date DATE")
	dbGetQuery(crsp,"ALTER TABLE permnos ADD COLUMN ret double precision")
	dbGetQuery(crsp,"ALTER TABLE permnos ADD COLUMN vwretd double precision")
	dbGetQuery(crsp,"ALTER TABLE permnos ADD COLUMN decret double precision")

	diff <- as.numeric(Sys.time() - timer); timer <- Sys.time();
	cat(sprintf("%0.2f seconds\nIdentifying relevant trading dates...                 ",diff))

	sql <- paste("UPDATE permnos AS a",
		"SET begin_td = ((SELECT td FROM crsp.anncdates AS c",
		"WHERE c.anncdate=a.event_date_begin)+a.days_before)")
	dbGetQuery(crsp,sql)
	sql <- paste("UPDATE permnos AS a",
		"SET end_td = ((SELECT td FROM crsp.anncdates AS c",
		"WHERE c.anncdate=a.event_date_end)+a.days_after)")
	dbGetQuery(crsp,sql)

	sql <- paste("UPDATE permnos AS a",
		"SET begin_date = (SELECT date FROM crsp.trading_dates AS c",
		"WHERE c.td=a.begin_td)")
	dbGetQuery(crsp,sql)
	sql <- paste("UPDATE permnos AS a",
		"SET end_date = (SELECT date FROM crsp.trading_dates AS c",
		"WHERE c.td=a.end_td)")
	dbGetQuery(crsp,sql)

	diff <- as.numeric(Sys.time() - timer); timer <- Sys.time();
	cat(sprintf("%0.2f seconds\nCompounding raw returns...                            ",diff))

	# Compound returns
	sql <- paste(
		"UPDATE permnos AS a",
		"SET ret = (SELECT exp(sum(ln(1+b.ret)))-1 FROM crsp.rets AS b",
		"WHERE a.permno=b.permno AND b.date BETWEEN a.begin_date AND a.end_date",
		"AND a.begin_date IS NOT NULL AND a.end_date IS NOT NULL)")
	dbGetQuery(crsp,sql)

	# Compound value-weighted CRSP returns
	diff <- as.numeric(Sys.time() - timer); timer <- Sys.time();
	cat(sprintf("%0.2f seconds\nCompounding value-weighted market returns...          ",diff))
	sql <- paste(
		"UPDATE permnos AS a",
		"SET vwretd = (SELECT exp(sum(ln(1+b.vwretd)))-1 FROM crsp.rets AS b",
		"WHERE a.permno=b.permno AND b.date BETWEEN a.begin_date AND a.end_date",
		"AND a.begin_date IS NOT NULL AND a.end_date IS NOT NULL)")
	dbGetQuery(crsp,sql)

	# Compound size-decile returns
	diff <- as.numeric(Sys.time() - timer); timer <- Sys.time();
	cat(sprintf("%0.2f seconds\nCompounding size-decile market returns...             ",diff))

	sql <- paste(
		"UPDATE permnos AS a",
		"SET decret = (SELECT exp(sum(ln(1+b.decret)))-1  FROM crsp.rets AS b",
		"WHERE a.permno=b.permno AND b.date BETWEEN a.begin_date AND a.end_date",
		"AND a.begin_date IS NOT NULL AND a.end_date IS NOT NULL)")
	dbGetQuery(crsp,sql)

	diff <- as.numeric(Sys.time() - timer); timer <- Sys.time();
	cat(sprintf("%0.2f seconds\n",diff))
	ret.data <- dbGetQuery(crsp,paste(
		"SELECT row_names, permno, event_date, ret, ret - decret AS ret_sz,",
			"ret - vwretd AS ret_mkt",
		"FROM permnos",
		"ORDER BY row_names"))
	dbDisconnect(crsp)

	# Label variables using label given appended to suffixes
	suffixes <- c("","_sz","_mkt")
	new.names <- paste(label,suffixes,sep="")
	ret.data <- subset(ret.data,select=-row_names)
	names(ret.data) <- sub("^ret",label,names(ret.data),perl=TRUE)
	return(ret.data)
}

After initially posting this, I realized that I "cheated" a little. You may have wondered what "crsp.rets" referred to. This table was built as follows:

-- Create a table that integrates basic returns with delisting returns
CREATE TABLE crsp.rets AS
    SELECT coalesce(a.permno,b.permno) as permno, coalesce(a.date,b.dlstdt) as date,
    	coalesce(1+a.ret,1)* coalesce(1+b.dlret,1)-1 AS ret
    FROM crsp.dsf AS a
    FULL OUTER JOIN crsp.dsedelist AS b
    ON a.permno=b.permno and a.date = b.dlstdt
    WHERE a.ret IS NOT NULL OR b.dlret IS NOT NULL;

-- Add column for VWRETD
ALTER TABLE crsp.rets ADD COLUMN vwretd double precision;

-- Add column for DECRET
ALTER TABLE crsp.rets ADD COLUMN decret double precision;

-- Bring in data for DECRET
UPDATE crsp.rets AS a
	SET decret = (SELECT decret FROM crsp.erdport1 AS b
                  WHERE a.permno=b.permno AND a.date=b.date);

-- Bring in data for VWRETD
UPDATE crsp.rets AS a
	SET vwretd = (SELECT vwretd FROM crsp.dsi AS b WHERE a.date=b.date);

-- Create an index/key on PERMNO, DATE
CREATE INDEX rets_idx ON crsp.rets (permno, date);

This SQL code took 15 minutes (913733 ms to be precise) on my hardware, but it's something you'd only run each time you updated CRSP (i.e., no more than once a quarter).

Advertisements
This entry was posted in Uncategorized and tagged , , . Bookmark the permalink.

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s