dbWriteTable.fast.R

# Function to get data over to PostgreSQL quickly and with minimal loss
# of information due to data type conversions.
# Note that I have had cases where the pipe() call returned an error due
# to lack of memory. So care should be taken with this function
dbWriteTable.fast <- function (conn,name,value,row.names=FALSE,
                                overwrite=TRUE,fix.names=TRUE) {

  # Conform names to PostgreSQL requirements
  pg.names <- make.db.names(conn,names(value))
    
    # Convert variable names to lower case, if requested to do so
    if (fix.names) {
        pg.names <- tolower(pg.names)
  }  

  # Drop the table
  if (overwrite) {
    dbGetQuery(conn,paste("DROP TABLE IF EXISTS",name))
  }
  
  # Create the table (again?)
  if (FALSE) { # Can't do TEMP TABLEs with handoff to psql
      sql <- paste("CREATE TEMP TABLE",name)
  } else {
      sql <- paste("CREATE TABLE",name)
  } 
  
    # Allow for row names, if necessary
    if (row.names) {
        sql <- paste(sql, "(row_names text, ")
    } else {
        sql <- paste(sql, "(")
    }

  # Determine the appropriate types of the various variables based on
    # R type/class and characteristics of the data.
    # Support for boolean, Date, bigint, double precision,
    # varchar, character, and text
  pg.type <- NA
  length(pg.type) <- length <- dim(value)[2]
  for (i in 1:length) { 
    if (is.logical(value[,i])) {
      pg.type <- "boolean"
    } else if (inherits(value[,i],"Date")) {
      pg.type <- "date" 
    } else if (is.integer(value[,i])) {
      pg.type <- "bigint"
    } else if (is.double(value[,i])) {
      pg.type <- "double precision"
    } else {
      temp <- as.character(value[,i])
      max.char <- max(nchar(temp))
      min.char <- min(nchar(temp))
      if (max.char >= 255) {
        pg.type <- "text"
      } else if (min.char==max.char) {
        pg.type <- paste("character(",max.char,")",sep="")
      } else {
        pg.type <- paste("varchar(",max.char,")",sep="")
      }
    }

    # Add SQL for variable to the existing string
    sql <- paste(sql,pg.names[i]," ",pg.type,sep="")
    if (i < length) {
      sql <- paste(sql,", ",sep="")
    } else {
      sql <- paste(sql,")",sep="")
    }
  }

  # Create the table
  dbGetQuery(conn,sql)
  dbGetQuery(conn,paste("ANALYZE",name))
  
  # Send data to PostgreSQL via psql COPY command
	info <- dbGetInfo(conn)
  if (info$host=="") info$host <- "localhost"
  result <- write.csv(value, file=pipe(paste(
      " psql -h ",info$host," -U ",info$user," -d ",info$dbname," -c \"COPY ",
          name," FROM STDIN CSV HEADER\"",sep=""))
      ,na="", row.names=row.names) # 
    # Return TRUE (doesn't help much)
  return(TRUE)
}
Advertisements
This entry was posted in Uncategorized. Bookmark the permalink.

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s