Wednesday, October 30, 2013

Downloading daily USD/CAD fx rate within R

Below code illustrates how to download daily USD/CAD fx rate within R from http://ratedata.gaincapital.com
downloadAndExtract_USD_CAD <- function(years,outFile) {
  mStr=c("01","02","03","04","05","06","07","08","09","10","11","12")

  if (file.exists(outFile)) {
    file.remove(outFile)
  }
  
  URL="http://ratedata.gaincapital.com"
  for (y in years) {
    for (m in mStr) {
      for (w in 1:5){
        mName=paste(m,month.name[as.numeric(m)])
        wName=paste("USD_CAD_Week",w,".zip",sep="");
        fullName=paste(URL,y,mName,wName,sep="/")
        a=paste("Downloading:",fullName,"\n" ,sep=" ",collapse="")
        cat(a)
        try(downloadAndExtractData(fullName,outFile),silent=TRUE)
      }
    }
  }
}

downloadAndExtractData <- function(zipfile,outFile) {
  # Create a name for the dir where we'll unzip
  zipdir <- tempfile()
  # Create the dir using that name
  dir.create(zipdir)
  dFile=paste(zipdir,"\\zzz.zip",sep="");
  print (dFile)
  try(download.file(zipfile,destfile=dFile, mode="wb"),silent=T)
  # Unzip the file into the dir
  unzip(dFile, exdir=zipdir)
  # Get the files into the dir
  files <- list.files(zipdir)
  # Throw an error if there's more than one
  if(length(files)>2) stop("More than one data file inside zip")
  # Get the full name of the file
  f<- paste(zipdir, files[1], sep="/")
  size=file.info(f)$size
  if (size==0) { 
    cat("Zero file size\n")
    return()
  }
  # Read the file
  cat("\n")
  print(c("Downladed tmp file:",f))
  cat("\n")
  dat=read.csv(f,header=F)
  len=dim(dat)
  print(c("Downloaded #nrows:",len[1]))
   
  #we are just interested in prices at 16:59
  index=grepl(" 16:59",dat[,"V3"])
  d=dat[index,]
  
  # append to output file
  write.table(d,outFile,append=TRUE,row.names=FALSE,col.names=FALSE) 
  
  # lets read all written data so far
  dat=read.csv(outFile)  
  cat("\n")
  print(c("Total rows:",dim(dat)[1]))
  
  # tidy up a bit
  file.remove(f)
}

Friday, October 25, 2013

fBasics (Basic Stats) library in R

% Load the package fBasics.
> library(fBasics)

% Load the data.
% header=T means 1st row of the data file contains variable names. The default is header=F, i.e., no names.
> da=read.table("http://www.mif.vu.lt/~rlapinskas/DUOMENYS/Tsay_fts3/d-ibm3dx7008.txt",header=T) 

> ibm=da[,2] % Obtain IBM simple returns
> sibm=ibm*100 % Percentage simple returns

> basicStats(sim) % Compute the summary statistics

% Turn to log returns in percentages
> libm=log(ibm+1)*100
> t.test(libm) % Test mean being zero.