Code for 5.2.5 Extended Example: A Salary Study from “The Art of R Programming”

Here is code for 5.2.5 Extended Example: A Salary Study from “The Art of R Programming“. The primary addition to the code in the book is the addition of code to get the data from the Department of Labor’s website.

# Get data from internet, read into R
zipped_data <- ""
raw_data <- unzip(zipped_data)
all2006 <- read.csv(raw_data,, header=TRUE)
unlink(raw_data); rm(raw_data)

# A little data-cleaning
all2006 <- within(all2006, {
  Wage_Offered_From <- as.numeric(gsub("\\$","", Wage_Offered_From))
  Prevailing_Wage_Amount <- as.numeric(gsub("\\$","", Prevailing_Wage_Amount))
  rat <- Wage_Offered_From/Prevailing_Wage_Amount

# Some more data-cleaning (per p.108)
all2006 <- subset(all2006, 
                  Wage_Per=="Year" &              # Exclude hourly-wagers
                    Wage_Offered_From > 20000 &   # Exclude weird cases
                    Prevailing_Wage_Amount > 200) # Exclude hourly prv wages

# Subsetting as on p.109 (I changed the code, as the code given in the book
# behaves strangely for me)
se2006 <- subset(all2006, grepl("Software Engineer", Prevailing_Wage_Job_Title))
prg2006 <- subset(all2006, grepl("Programmer", Prevailing_Wage_Job_Title))
ee2006 <- subset(all2006, grepl("Electronics Engineer", Prevailing_Wage_Job_Title))

medrat <- function(dataframe) {
  return(median(dataframe$rat, na.rm=TRUE))

makecorp <- function(corpname) {
  return(subset(all2006, Employer_Name == corpname))

corplist <- c("MICROSOFT CORPORATION", "ms", 
              "INTEL CORPORATION", "intel",
              "SUN MICROSYSTEMS, INC.", "sun",
              "GOOGLE INC.", "google")

for (i in 1:(length(corplist)/2)) {
  corp <- corplist[2*i-1]
  newdtf <- paste(corplist[2*i], "2006", sep="")
  assign(newdtf, makecorp(corp), pos=.GlobalEnv)
