03 Apr

How to speed up R code?

Dear codeRs,

I recently found a very comprehensive document on how to speed up R code and wanted to share it with you!
It covers various aspects such as identifying where is your code slowing down, parallel computing, interfacing C code with R etc.
Don’t hesitate and go have a look here

02 Apr

Extract raster values in parallel

Dear fellow Remote Senseis,

Here is a code chunk that will help you speed up the processing time when you want to extract raster values at specific locations.
It uses the a package called snowfall which allows parallel processing.
As usual, please find here under a dummy example. As there is only 3 bands it the stack, you may not see the positive effect of parallel computing…try with a 100-layer stack and you’ll see.

Cheers

install.packages("snowfall")
 
library(snowfall)
library(raster)
 
ncpus <- 3
 
r <- (stack(system.file("external/rlogo.grd", package="raster")))
inSlist <- unstack(r) # convert to list of single raster layers
 
rId <- subset(r,1)
idPix <- which(rId[]>200)
 
sfInit(parallel=TRUE,cpus=ncpus)
sfLibrary(raster)
sfLibrary(rgdal)
a <- sfSapply(inSlist,extract,y=idPix)
sfStop()
13 Jan

Plot points with a google map background

Good Morning Followers!

Yesterday, I found a nice post here by Joseph Rickert. I produces nice plots with points and a google map background. Try it and have fun!

library(ggmap)
library(plyr)
library(gridExtra)
temp <- tempfile()
download.file("http://www.plantsciences.ucdavis.edu/plant/data.zip", temp)
connection <- unz(temp, "Data/Set3/Set3data.csv")
rice <- read.csv(connection)
names(rice) <- tolower(names(rice))
# Create a custom soil attribute plot
# @param df Data frame containing data for a field
# @param attribute Soil attribute
# @return Custom soil attribute plot
create_plot <- function(df, attribute) {
  map <- get_map(location = c(median(df$longitude), 
                              median(df$latitude)),
                 maptype = "satellite",
                 source = "google",
                 crop = FALSE,
                 zoom = 15)
  plot <- ggmap(map) + geom_point(aes_string(x = "longitude", 
                                             y = "latitude",
                                             color = attribute),
                                  size = 5,
                                  data = df)
  plot <- plot + ggtitle(paste("Farmer", df$farmer, "/ Field", df$field))
  plot <- plot + scale_color_gradient(low = "darkorange", high = "darkorchid4")
  return(plot)
}
ph_plot <- dlply(rice, "field", create_plot, attribute = "ph")
ph_plots <- do.call(arrangeGrob, ph_plot)

6a010534b1db25970b01bb07d2c8c1970d-800wi

07 Jan

Dreams come true: a function for RGB plots with GGPLOT2

In love with the ggplot2 look? Sick of using plotRGB to display your RGB raster?
Here comes the rggplot function!

Have a look:

 
rggbplot <- function(inRGBRst,npix=NA,scale = 'lin'){
 
  rgblinstretch <- function(rgbDf){
    maxList <- apply(rgbDf,2,max)
    minList <- apply(rgbDf,2,min)
    temp<-rgbDf
    for(i in c(1:3)){
      temp[,i] <- (temp[,i]-minList[i])/(maxList[i]-minList[i])
    }
    return(temp)
  }
 
  rgbeqstretch<-function(rgbDf){
 
    temp<-rgbDf
    for(i in c(1:3)){
      unique <- na.omit(temp[,i])
      if (length(unique>0)){
        ecdf<-ecdf(unique)
        temp[,i] <- apply(temp[,i,drop=FALSE],2,FUN=function(x) ecdf(x))
      }
    }
    return(temp)
  }
 
  if(is.na(npix)){
    if(ncell(inRGBRst)>5000){
      npix <- 5000
    }
    else{
      npix <- ncell(inRGBRst)
    }
  }
  x <- sampleRegular(inRGBRst, size=npix, asRaster = TRUE)
  dat <- as.data.frame(x, xy=TRUE)
  colnames(dat)[3:5]<-c('r','g','b')
 
  if(scale=='lin'){
    dat[,3:5]<- rgblinstretch(dat[,3:5])
  } else if(scale=='stretch'){
    dat[,3:5]<- rgbeqstretch(dat[,3:5])
  }
 
  p <- ggplot()+ geom_tile(data=dat, aes(x=x, y=y, fill=rgb(r,g,b))) + scale_fill_identity()
 
}
 
b <- (brick(system.file("external/rlogo.grd", package="raster")))*6-600
print(rggbplot(b))

rgb_ggplot2

06 Jan

Download MODIS products

Tired of browsing through the MODIS catalogue? Here is the solution for you!
Providing a list of tiles, year, product and FTP site, this here code browse all the products and download the ones your interested in. Have fun!

library(rgdal)
library(RCurl)
setInternet2(use = TRUE)
options(download.file.method="auto")
#set dir
setwd("/dir/on/my/raptor/machine")
# location of the MODIS data:
MOD09GQ <- "http://e4ftl01.cr.usgs.gov/MOLT/MOD09GQ.005/"
product = "MOD09GQ"
yearList = c('2008')
tileList <- c('h18v04')
 
for(tile in tileList){
  for(year in yearList){
    # get the list of directories (thanks to Barry Rowlingson):
    items1 <- strsplit(getURL(MOD09GQ), "\n")[[1]]
    # get the directory names and create a new data frame:
    dates=data.frame(dirname=substr(items1[20:length(items1)],52,62))
 
    # get the list of *.hdf files:
    dates <- data.frame(dirname=grep(year,dates$dirname,value = TRUE))
    for (i in 1:NROW(dates)){
      # for each date per year
      getlist <- strsplit(getURL(paste(MOD09GQ, dates$dirname[i], sep="")), ">")[[1]]
      getlist=getlist[grep(product,getlist)]
      getlist=getlist[grep(".hdf<",getlist)]
      filenames=substr(getlist,1,nchar(getlist[1])-3)
 
      BLOCK <- grep(pattern=paste0("MOD09GQ.*.",tile,".*.hdf"),filenames,value = TRUE)
 
      # Download all blocks from the list to a local drive:
      download.file(paste(MOD09GQ,  dates$dirname[i], BLOCK,sep=""),destfile=file.path(getwd(),BLOCK), cacheOK=FALSE, )
    }
  }
}
25 Dec

Charting time-series as calendar heat maps

Merry GISmas dearest followers!
A couple of weeks ago, I made a post on heatmaps. Today I came across a nice answer on stackoverflow, it goes like this:

# LOAD THE DATA
stock <- "MSFT"
start.date <- "2006-01-12"
end.date <- Sys.Date()
quote <- paste("http://ichart.finance.yahoo.com/table.csv?s=",
               stock, "&a=", substr(start.date,6,7),
               "&b=", substr(start.date, 9, 10),
               "&c=", substr(start.date, 1,4), 
               "&d=", substr(end.date,6,7),
               "&e=", substr(end.date, 9, 10),
               "&f=", substr(end.date, 1,4),
               "&g=d&ignore=.csv", sep="")    
stock.data <- read.csv(quote, as.is=TRUE)
stock.data <- transform(stock.data,
                        week = as.POSIXlt(Date)$yday %/% 7 + 1,
                        wday = as.POSIXlt(Date)$wday,
                        year = as.POSIXlt(Date)$year + 1900)

The data has the following structure:

> head(stock.data)
        Date  Open  High   Low Close   Volume Adj.Close week wday year
1 2014-12-24 48.64 48.64 48.08 48.14 11437800     48.14   52    3 2014
2 2014-12-23 48.37 48.80 48.13 48.45 23648100     48.45   51    2 2014
3 2014-12-22 47.78 48.12 47.71 47.98 26566000     47.98   51    1 2014
4 2014-12-19 47.63 48.10 47.17 47.66 64551200     47.66   51    5 2014
5 2014-12-18 46.58 47.52 46.34 47.52 40105600     47.52   51    4 2014
6 2014-12-17 45.05 45.95 44.90 45.74 34970900     45.74   51    3 2014

And now this is how we chart it:

library(ggplot2)
ggplot(stock.data, aes(week, wday, fill = Adj.Close)) + 
  geom_tile(colour = "white") + 
  scale_fill_gradientn('MSFT \n Adjusted Close',colours = c("#D61818","#FFAE63","#FFFFBD","#B5E384")) + 
  facet_wrap(~ year, ncol = 1)

Here is how it looks like :-)


Calendar heatmap


05 Dec

Plot heat maps for correlations, confusion matrices etc.

Hi folks!

Wanna present your table in a nice way? Have you ever thought of heat maps?
Here is an example on how to use them:

library(ggplot2)
library(plyr) # might be not needed here anyway it is a must-have package I think in R 
library(reshape2) # to "melt" your dataset
library (scales) # it has a "rescale" function which is needed in heatmaps 
library(RColorBrewer) # for convenience of heatmap colors, it reflects your mood sometimes
nba <- read.csv("http://datasets.flowingdata.com/ppg2008.csv")
head(nba)
nba$Name <- with(nba, reorder(Name, PTS))
nba.m <- melt(nba)
nba.m <- ddply(nba.m, .(variable), transform,rescale = rescale(value))
p <- ggplot(nba.m, aes(variable, Name)) + geom_tile(aes(fill = rescale),colour = "white") +  scale_fill_gradient2(low="#006400", mid="#f2f6c3",high="#cd0000",midpoint=0.5)

The nba data.frame has the folowing structure:

> head(nba)
            Name  G  MIN  PTS  FGM  FGA   FGP FTM FTA   FTP X3PM X3PA  X3PP ORB DRB TRB AST STL BLK  TO  PF
1   Dwyane Wade  79 38.6 30.2 10.8 22.0 0.491 7.5 9.8 0.765  1.1  3.5 0.317 1.1 3.9 5.0 7.5 2.2 1.3 3.4 2.3
2  LeBron James  81 37.7 28.4  9.7 19.9 0.489 7.3 9.4 0.780  1.6  4.7 0.344 1.3 6.3 7.6 7.2 1.7 1.1 3.0 1.7
3   Kobe Bryant  82 36.2 26.8  9.8 20.9 0.467 5.9 6.9 0.856  1.4  4.1 0.351 1.1 4.1 5.2 4.9 1.5 0.5 2.6 2.3
4 Dirk Nowitzki  81 37.7 25.9  9.6 20.0 0.479 6.0 6.7 0.890  0.8  2.1 0.359 1.1 7.3 8.4 2.4 0.8 0.8 1.9 2.2
5 Danny Granger  67 36.2 25.8  8.5 19.1 0.447 6.0 6.9 0.878  2.7  6.7 0.404 0.7 4.4 5.1 2.7 1.0 1.4 2.5 3.1
6  Kevin Durant  74 39.0 25.3  8.9 18.8 0.476 6.1 7.1 0.863  1.3  3.1 0.422 1.0 5.5 6.5 2.8 1.3 0.7 3.0 1.8

And here is the output. Nice isn’t it?


heat_map

04 Dec

Random sampling

Good Evening, fellow GIS analysts!

Here is a quick function to randomly draw a set of points over your area of interest, say for an accuracy assessment.

First the function:

 
randSamp <- function(n,ext,filename=''){
  if(class(ext)=='Extent'){
    x <- runif(n, ext@xmin, ext@xmax)
    y <- runif(n, ext@ymin, ext@ymax)
    s <- SpatialPoints(list(x,y))
  } else if (class(ext) %in% c('Raster','RasterStack','RasterBrick')){
    x <- runif(2*n, extent(ext)@xmin, extent(ext)@xmax)
    y <- runif(2*n, extent(ext)@ymin, extent(ext)@ymax)
    s <- SpatialPoints(list(x,y))
    proj4string(s) <- crs(ext)
  } else if (class(ext) %in% c('SpatialPolygonsDataFrame','SpatialPolygons')){
    x <- c()
    y <- c()
    id <- c()
    while(length(x)<n){
      xl <- runif(2*n, extent(ext)@xmin, extent(ext)@xmax)
      yl <- runif(2*n, extent(ext)@ymin, extent(ext)@ymax)
      sl <- SpatialPoints(list(xl,yl))
      proj4string(sl) <- proj4string(ext)
      point_in_pol <- over( sl , ext , fn = NULL,returnList = FALSE)
      idl <- which(!apply(point_in_pol, 1, function(x) all(is.na(x))))
      x <- cbind(x,xl[idl])
      y <- cbind(y,yl[idl])
    }
    id<-sample(c(1:length(x)),n)
    s <- SpatialPoints(list(x[id],y[id]))
    proj4string(s) <- proj4string(ext)
  } else {
    print('ext not of class "Extent" nor "SpatialPolygonsDataFrame", "SpatialPolygons", "Raster", "RasterStack","RasterBrick". I cannot work with this')
    break
  }
  if(filename!=''){
    s_points_df <- SpatialPointsDataFrame(s, data=data.frame(ID=1:n))
    shapefile(s_poly_df,filename,overwrite=TRUE)
  }
  return(s)
}

And a example on how to use it:

library(sp)
library(rgdal)
library(raster)
set.seed(1)
dat <- matrix(stats::rnorm(2000), ncol = 2)
ch <- chull(dat)
coords <- dat[c(ch, ch[1]), ]  # closed polygon
ext <- SpatialPolygons(list(Polygons(list(Polygon(coords)), ID=1)))
proj4string(ext) = CRS("+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0")
spsamp <- randSamp(10,ext)
 
# Now let's visualize this piece of art
plot(ext)
plot(spsamp,add=T,col='red')

sampling

03 Dec

On stretching histograms

Hello GIS addicts!

Here are two functions on how to stretch the histograms of your raster for better visualization.

#' @title Linear stretch of a multi-band raster.
#' 
#' @description Applies a linear stretch to any multiband raster \code{link{stack}} or \code{link{brick}}
#' 
#' @param img Raster* object
#' @param minmax  user defined minimum and maximum for the stretch (optional)
#' @return a Raster* object 
#' @author White-Guru
#' @seealso \code{\link{calc}}
#' @import raster
#' @export
#' 
 
linstretch<-function(img,minmax=NA){
  if(is.na(minmax)) minmax<-c(min(getValues(img),na.rm=T),max(getValues(img),na.rm=T))
  temp<-calc(img,fun=function(x) (255*(x-minmax[1]))/(minmax[2]-minmax[1]))
  #set all values above or below minmax to 0 or 255
  temp[temp<0]<-0;temp[temp>255]<-255;
  return(temp)
}
 
#' @title Histogram equalization stretch of a multi-band raster.
#' 
#' @description Applies a histogram equalization stretch to any multiband raster \code{link{stack}} or \code{link{brick}}
#' 
#' @param img Raster* object
#' @return a Raster* object 
#' @author White-Guru
#' @seealso \code{\link{calc}}
#' @import raster
#' @export
#'
eqstretch<-function(img){
  unique <- na.omit(getValues(img))
  if (length(unique>0)){
    ecdf<-ecdf(unique)
    out <- calc(img,fun=function(x) ecdf(x)*255)
  }
  return(out)
}

And here is a dummy example of their effect:

 
library(raster)
library(RColorBrewer)
 
#import a random raster
b <- (brick(system.file("external/rlogo.grd", package="raster")))*6-600
plotRGB(linstretch(b))
 
b4 <- subset(b,1)
 
nf<-layout(matrix(seq(1,8), byrow=T, nrow=2))
par(mar=c(3,3,1,1),oma=rep(0,4))
myramp<-colorRampPalette(brewer.pal(5,"PiYG")) #custom ramp from RColorBrewer
 
# no stretch
# BAD hack in order to get plot(*Raster) to look like it didn't apply a Min/Max stretch
temp<-b4; temp[1]<-0; temp[2]<-255;
plot(temp, col=myramp(255)) #plot with diff color map
hist(getValues(b4), freq=F, main="Original raster")
rm(temp)
 
#min/max stretch and histogram
b4.minmax<-linstretch(b4)
plot(b4.minmax, col=myramp(255))
hist(getValues(b4.minmax), breaks=seq(0,255), xlim=c(0,255), freq=F, main="Linear Min/Max")
rm(b4.minmax)
 
#histogram equalization
plot(eqstretch(b4), col=myramp(255))
b4.ecdf<-ecdf(getValues(b4))
hist(getValues(calc(b4, fun=function(x) b4.ecdf(x)*255)), breaks=seq(0,255), xlim=c(0,255), freq=F, main="Histogram Equalization")
 
#2% linear stretch
b4quant<-quantile(getValues(b4), c(0.02,0.98)) # 2% cutoffs
b4.linstretch<-linstretch(b4, minmax=c(b4quant[1],b4quant[2]))
plot(b4.linstretch, col=myramp(255))
hist(getValues(b4.linstretch), breaks=seq(0,255), xlim=c(0,255), freq=F, main="Linear 2%")

And here is the result:
hist

03 Dec

Plot a raster with discrete values

Howdy!

Here is a small post on how to plot a raster with discrete classes:

library(raster)
 
f <- system.file("external/test.grd", package="raster")
r <- raster(f)
r <- reclassify(r, c(0, 500, 1,
                     500, 2000, 2))
gplot(r) +
  geom_raster(aes(fill = factor(value))) +scale_fill_manual(values=c('red','green'),'legend',labels = c("1", "2"))+ 
  coord_equal()+
  labs(title = paste('Test title'))+scale_x_continuous(expand = c(0, 0))+scale_y_continuous(expand = c(0, 0))

And here is the result:
plot_discrete