Lab #12: Spatial Analysis

What you need:

SamplingSites.csv
bioclim-landuse1.tif
siteabundance.Rdata
Copy maxent.jar into dismo library folder (if unsure where library is, check Tools -> Install Packages and check pathway under “Install to Library”)

Installing packages and loading in site information

### install.packages("ggmap")
### install.packages("maps")
### install.packages("raster")
### install.packages("maptools")
### install.packages("plyr")
### install.packages("rworldmap")
### install.packages("sp")
### install.packages("reshape2")   
### install.packages("rJava")
### install.packages("dismo")

## Reading in abundance data   
#data<-read.table("2016_Ranavirus_Field_Data_MD.csv", header=T, sep=',', stringsAsFactors = F)
#head(data)

### Reading in site information
sites<-read.csv(file="SamplingSites.csv")    
head(sites)

##                      Site SiteLetter       Town Latitude Longitude
## 1             Berlin Pond          A     Berlin 44.20560 -72.58582
## 2            Gillett Pond          B Huntington 44.34901 -72.97020
## 3 Birds of Vermont Museum          C Huntington 44.35012 -73.00695
## 4              Delta Park          D Colchester 44.53506 -73.27750
## 5        PMRC Vernal Pool          E  Underhill 44.52511 -72.86521
## 6      PMRC Beaver Meadow          F  Underhill 44.52648 -72.87260

points<-sites[,4:5] ##assign points to be plotted

Creating a map and plotting points

## Let's make a map with a background   
library(ggmap)
map<-qmap(location=c(lon = -72.897343, lat = 44.351406),zoom=9, maptype="toner-lite",source="stamen")

map + geom_point(data = points, aes(x = Longitude, y = Latitude), color="red", size=3, alpha=0.5)

## Another simpler map
library(maps)
map(database="state", region='vermont', interior=T, fill=T, col="lightgreen")
library(sp)
coordinates(points)<- ~ Longitude + Latitude
points(x=points, pch=17, col="red")

Extracting climate data from points and analyses

### The following line usually works, maybe something is wrong with the website?
# bioclim <- getData('worldclim', var='bio', res=10)

### Reading in climate and landcover data from tif file
library(raster)
bioclimLC<-stack("bioclim-landuse1.tif") # we need to 'stack' the layers on top of each other so that it is one file
names(bioclimLC)<-c("landcover","bio1","bio2","bio3","bio4","bio5","bio6","bio7","bio8","bio9","bio10","bio11","bio12","bio13","bio14","bio15","bio16","bio17","bio18","bio19") #renaming
bioclimLC1<-crop(x=bioclimLC, extent(-74,-71, 42.5, 45.5)) # cropping it to Vermont's extent
plot(bioclimLC1)

set.seed(44) 
points<-sites[,4:5] # subset only the latitudes and longitudes
fakePres<-sample(c(rep("Y", 9),rep("N",9))) ## creating fake presence data to compare sites
points<-cbind(points, fakePres)
climData <- extract(x=bioclimLC1, y=points[, c("Longitude", "Latitude")]) # extracting values of each layer (19 bioclim, 1 landcover) at each of the sites
dbio1 <- cbind(points, climData) 

summary(aov(formula=bio1~fakePres, data=dbio1)) # comparing the annual mean temperature (bio1) of the sites 'with disease' and without

##             Df Sum Sq Mean Sq F value Pr(>F)  
## fakePres     1  272.2   272.2   7.647 0.0138 *
## Residuals   16  569.6    35.6                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

boxplot(bio1~fakePres, data=dbio1, col=c("blue","red"))

mean(subset(dbio1, fakePres=="Y")$bio1) # checking means

## [1] 59.22222

mean(subset(dbio1, fakePres=="N")$bio1)

## [1] 67

fakeP <- ifelse(dbio1$fakePres=="Y", 1, 0) # assigning presence as '1' and absence as '0' for the model
dbio1$fakeP<-fakeP # adding our new column of 0's and 1's to the original climate dataset
MyModel <- glm(fakeP~bio1, family=binomial("logit"),data=dbio1)
summary(MyModel)

## 
## Call:
## glm(formula = fakeP ~ bio1, family = binomial("logit"), data = dbio1)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -2.01075  -0.65756  -0.08662   0.91880   1.33184  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)  
## (Intercept) 12.85363    5.97432   2.151   0.0314 *
## bio1        -0.20323    0.09372  -2.168   0.0301 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 24.953  on 17  degrees of freedom
## Residual deviance: 18.558  on 16  degrees of freedom
## AIC: 22.558
## 
## Number of Fisher Scoring iterations: 3

plot(x=dbio1$bio1,y=fakeP,xlab="Annual Mean Temperature (C)",ylab="P(Extant)", type='n', ylim=c(0,1))
curve(predict(MyModel,data.frame(bio1=x),type="resp"),add=T)
points(x=dbio1$bio1,y=fakeP,cex=2,pch=21,bg="lightblue")

Creating maps with abundance data pie charts

library(plyr)
#siteAbundance<-ddply(data,.(Species, Site_Letter),summarize,total=length(Species))
#head(siteAbundance)
#save(siteAbundance,file="sitebundance.Rdata")
load(file="siteabundance.Rdata")

library(reshape2)
wSiteAbund<-dcast(siteAbundance,Site_Letter~Species) # manipulating so species abundance numbers are listed per site ('wide' format)
head(wSiteAbund)

##   Site_Letter Ambystoma spp Anaxyrus americanus Chrysemys<ca>picta
## 1           A            NA                  NA                 NA
## 2           B            NA                  NA                 NA
## 3           C             6                  NA                 NA
## 4           D            NA                  13                  1
## 5           E             6                  NA                 NA
## 6           F             4                  NA                 NA
##   Hyla versicolor Lithobates catesbeianus Lithobates clamitans
## 1              NA                      NA                   78
## 2               1                      NA                    7
## 3              11                      NA                   84
## 4              NA                       5                   11
## 5              NA                      NA                   52
## 6              22                      NA                   90
##   Lithobates palustris Lithobates pipiens Lithobates sylvaticus
## 1                   NA                 NA                    NA
## 2                    3                 NA                    NA
## 3                    1                  2                    NA
## 4                   NA                 40                    NA
## 5                   NA                 NA                    63
## 6                   NA                 NA                    NA
##   Notophthalmus viridescens Pseudacris crucifer
## 1                         7                  NA
## 2                       110                   5
## 3                       110                   4
## 4                        NA                  NA
## 5                        NA                   3
## 6                        34                   9

wSiteAbund$lat<-sites$Latitude # adding longitude and latitude data to site abundance
wSiteAbund$lon<-sites$Longitude
wSiteAbund[is.na(wSiteAbund)]<-0 # replacing NAs with zeros
Names<-names(wSiteAbund)[2:12] # leaving out C. picta because there was only one occurrence


library(rworldmap)
plot(bioclimLC1$bio2, 5, xlim=c(-73.5,-72.4), ylim=c(43,45),col="white", axes=TRUE, legend=F,main="",box=FALSE) # plotting an empty map to get axes
map("state", c('vermont'), interior=T, fill=T, col="lightgreen",add=T)
pies<-mapPies(dF=wSiteAbund[,-4],nameX="lon",nameY="lat",nameZs=Names[-3],zColours=c("blue","purple","orange","red","darkred","darkorchid1","darksalmon","goldenrod","cadetblue","chocolate","coral4"),xlim=c(-72,-74), ylim=c(43,44.8),landCol="gray50",addCatLegend = T,add=T,font=6,symbolSize=2.5)

## symbolMaxSize= 0.036  maxSumValues= 218  symbolScale= 0.002438228 
## List of 2
##  $ x: num [1:100] -74.9 -74.9 -74.9 -74.9 -74.9 ...
##  $ y: num [1:100] 44.8 44.8 44.8 44.8 44.8 ...

Predicting distribution

bioclimLC1<-crop(x=bioclimLC, extent(-75, -67, 40, 46)) # cropping to New England extent
plot(bioclimLC1)

library(maptools)
data("wrld_simpl")
ws1<-rasterize(wrld_simpl,bioclimLC1) #Transfer values associated with 'object' type spatial data (points, lines, polygons) to raster cells
values(bioclimLC1)[is.na(values(bioclimLC1))]<-0 # replacing NAs with zeros
layers1<-mask(x=bioclimLC1,mask=ws1) #Create a new Raster* object that has the same values as x, except for the cells that are NA
plot(layers1)

library(dismo)
PresPoints<-subset(dbio1, dbio1$fakePres=="Y")
presPoints<-data.frame(lon=PresPoints[,2],lat=PresPoints[,1]) # need to switch order for Maxent
group <- dismo::kfold(presPoints, 5)
pres_train <- presPoints[group != 1, ]
pres_test <- presPoints[group == 1, ]

### absence points
AbsPoints<-subset(dbio1, dbio1$fakePres=="N")
absPoints<-data.frame(lon=AbsPoints[,2],lat=AbsPoints[,1])
group <- dismo::kfold(absPoints, 5)
abs_train <- absPoints[group != 1, ]
abs_test <- absPoints[group == 1, ]

  
#run MAXENT
# train the model
xm <- maxent(x=layers1, removeDuplicates=T, p=pres_train,a=abs_train, args=c("-J","-P"))
# xm #opens browser and reveals more stats (importance of variables)

#evaluate (test the model)
e2 <- evaluate(pres_test, abs_test, xm, layers1) 
e2@auc # gives AUC value (1 is perfect prediction)

## [1] 1

## keep in mind were are using very low numbers - only using 4 points to test the model  

#predict with all points
xmFull <- maxent(x=layers1, removeDuplicates=T, p=presPoints,a=absPoints, args=c("-J","-P"))
px <- predict(layers1, xmFull, progress="")

#plot maxent
plot(px, main="Predicted distribution with absence data", xlim=c(-75,-67),ylim=c(40,46))
map("state", 'vermont', add=T)
map('usa',add=T)

#### create background set with 500 random points as "pseudo-absence"
backg <- dismo::randomPoints(layers1, n=500, warn=0)
colnames(backg) = c('lon', 'lat')
group <- dismo::kfold(backg, 5)
backg_train <- backg[group != 1, ]
backg_test <- backg[group == 1, ]

xm2 <- dismo::maxent(x=layers1, removeDuplicates=T, p=pres_train,a=backg_train, args=c("-J","-P"))

e2 <- evaluate(pres_test, backg_test, xm2, layers1)
e2@auc

## [1] 0.9775

#predict
xm2FULL <- maxent(x=layers1, removeDuplicates=T, p=presPoints,a=backg, args=c("-J","-P"))
px2 <- predict(layers1, xm2FULL, progress="")

#plot maxent
plot(px2, main="Predicted distribution with pseudo-absence",xlim=c(-75,-67),ylim=c(40,46))
map("state", 'vermont', add=T)
map('usa',add=T)
coordinates(points)<- ~ Longitude + Latitude
points(points, pch=17, col="red")

Lab #12: Spatial Analysis

Lauren Ash

April 12, 2017

Installing packages and loading in site information

Creating a map and plotting points

Extracting climate data from points and analyses

Creating maps with abundance data pie charts

Predicting distribution