What you need:
### install.packages("ggmap")
### install.packages("maps")
### install.packages("raster")
### install.packages("maptools")
### install.packages("plyr")
### install.packages("rworldmap")
### install.packages("sp")
### install.packages("reshape2")
### install.packages("rJava")
### install.packages("dismo")
## Reading in abundance data
#data<-read.table("2016_Ranavirus_Field_Data_MD.csv", header=T, sep=',', stringsAsFactors = F)
#head(data)
### Reading in site information
sites<-read.csv(file="SamplingSites.csv")
head(sites)
## Site SiteLetter Town Latitude Longitude
## 1 Berlin Pond A Berlin 44.20560 -72.58582
## 2 Gillett Pond B Huntington 44.34901 -72.97020
## 3 Birds of Vermont Museum C Huntington 44.35012 -73.00695
## 4 Delta Park D Colchester 44.53506 -73.27750
## 5 PMRC Vernal Pool E Underhill 44.52511 -72.86521
## 6 PMRC Beaver Meadow F Underhill 44.52648 -72.87260
points<-sites[,4:5] ##assign points to be plotted
## Let's make a map with a background
library(ggmap)
map<-qmap(location=c(lon = -72.897343, lat = 44.351406),zoom=9, maptype="toner-lite",source="stamen")
map + geom_point(data = points, aes(x = Longitude, y = Latitude), color="red", size=3, alpha=0.5)
## Another simpler map
library(maps)
map(database="state", region='vermont', interior=T, fill=T, col="lightgreen")
library(sp)
coordinates(points)<- ~ Longitude + Latitude
points(x=points, pch=17, col="red")
### The following line usually works, maybe something is wrong with the website?
# bioclim <- getData('worldclim', var='bio', res=10)
### Reading in climate and landcover data from tif file
library(raster)
bioclimLC<-stack("bioclim-landuse1.tif") # we need to 'stack' the layers on top of each other so that it is one file
names(bioclimLC)<-c("landcover","bio1","bio2","bio3","bio4","bio5","bio6","bio7","bio8","bio9","bio10","bio11","bio12","bio13","bio14","bio15","bio16","bio17","bio18","bio19") #renaming
bioclimLC1<-crop(x=bioclimLC, extent(-74,-71, 42.5, 45.5)) # cropping it to Vermont's extent
plot(bioclimLC1)
set.seed(44)
points<-sites[,4:5] # subset only the latitudes and longitudes
fakePres<-sample(c(rep("Y", 9),rep("N",9))) ## creating fake presence data to compare sites
points<-cbind(points, fakePres)
climData <- extract(x=bioclimLC1, y=points[, c("Longitude", "Latitude")]) # extracting values of each layer (19 bioclim, 1 landcover) at each of the sites
dbio1 <- cbind(points, climData)
summary(aov(formula=bio1~fakePres, data=dbio1)) # comparing the annual mean temperature (bio1) of the sites 'with disease' and without
## Df Sum Sq Mean Sq F value Pr(>F)
## fakePres 1 272.2 272.2 7.647 0.0138 *
## Residuals 16 569.6 35.6
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
boxplot(bio1~fakePres, data=dbio1, col=c("blue","red"))
mean(subset(dbio1, fakePres=="Y")$bio1) # checking means
## [1] 59.22222
mean(subset(dbio1, fakePres=="N")$bio1)
## [1] 67
fakeP <- ifelse(dbio1$fakePres=="Y", 1, 0) # assigning presence as '1' and absence as '0' for the model
dbio1$fakeP<-fakeP # adding our new column of 0's and 1's to the original climate dataset
MyModel <- glm(fakeP~bio1, family=binomial("logit"),data=dbio1)
summary(MyModel)
##
## Call:
## glm(formula = fakeP ~ bio1, family = binomial("logit"), data = dbio1)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.01075 -0.65756 -0.08662 0.91880 1.33184
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 12.85363 5.97432 2.151 0.0314 *
## bio1 -0.20323 0.09372 -2.168 0.0301 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 24.953 on 17 degrees of freedom
## Residual deviance: 18.558 on 16 degrees of freedom
## AIC: 22.558
##
## Number of Fisher Scoring iterations: 3
plot(x=dbio1$bio1,y=fakeP,xlab="Annual Mean Temperature (C)",ylab="P(Extant)", type='n', ylim=c(0,1))
curve(predict(MyModel,data.frame(bio1=x),type="resp"),add=T)
points(x=dbio1$bio1,y=fakeP,cex=2,pch=21,bg="lightblue")
library(plyr)
#siteAbundance<-ddply(data,.(Species, Site_Letter),summarize,total=length(Species))
#head(siteAbundance)
#save(siteAbundance,file="sitebundance.Rdata")
load(file="siteabundance.Rdata")
library(reshape2)
wSiteAbund<-dcast(siteAbundance,Site_Letter~Species) # manipulating so species abundance numbers are listed per site ('wide' format)
head(wSiteAbund)
## Site_Letter Ambystoma spp Anaxyrus americanus Chrysemys<ca>picta
## 1 A NA NA NA
## 2 B NA NA NA
## 3 C 6 NA NA
## 4 D NA 13 1
## 5 E 6 NA NA
## 6 F 4 NA NA
## Hyla versicolor Lithobates catesbeianus Lithobates clamitans
## 1 NA NA 78
## 2 1 NA 7
## 3 11 NA 84
## 4 NA 5 11
## 5 NA NA 52
## 6 22 NA 90
## Lithobates palustris Lithobates pipiens Lithobates sylvaticus
## 1 NA NA NA
## 2 3 NA NA
## 3 1 2 NA
## 4 NA 40 NA
## 5 NA NA 63
## 6 NA NA NA
## Notophthalmus viridescens Pseudacris crucifer
## 1 7 NA
## 2 110 5
## 3 110 4
## 4 NA NA
## 5 NA 3
## 6 34 9
wSiteAbund$lat<-sites$Latitude # adding longitude and latitude data to site abundance
wSiteAbund$lon<-sites$Longitude
wSiteAbund[is.na(wSiteAbund)]<-0 # replacing NAs with zeros
Names<-names(wSiteAbund)[2:12] # leaving out C. picta because there was only one occurrence
library(rworldmap)
plot(bioclimLC1$bio2, 5, xlim=c(-73.5,-72.4), ylim=c(43,45),col="white", axes=TRUE, legend=F,main="",box=FALSE) # plotting an empty map to get axes
map("state", c('vermont'), interior=T, fill=T, col="lightgreen",add=T)
pies<-mapPies(dF=wSiteAbund[,-4],nameX="lon",nameY="lat",nameZs=Names[-3],zColours=c("blue","purple","orange","red","darkred","darkorchid1","darksalmon","goldenrod","cadetblue","chocolate","coral4"),xlim=c(-72,-74), ylim=c(43,44.8),landCol="gray50",addCatLegend = T,add=T,font=6,symbolSize=2.5)
## symbolMaxSize= 0.036 maxSumValues= 218 symbolScale= 0.002438228
## List of 2
## $ x: num [1:100] -74.9 -74.9 -74.9 -74.9 -74.9 ...
## $ y: num [1:100] 44.8 44.8 44.8 44.8 44.8 ...
bioclimLC1<-crop(x=bioclimLC, extent(-75, -67, 40, 46)) # cropping to New England extent
plot(bioclimLC1)
library(maptools)
data("wrld_simpl")
ws1<-rasterize(wrld_simpl,bioclimLC1) #Transfer values associated with 'object' type spatial data (points, lines, polygons) to raster cells
values(bioclimLC1)[is.na(values(bioclimLC1))]<-0 # replacing NAs with zeros
layers1<-mask(x=bioclimLC1,mask=ws1) #Create a new Raster* object that has the same values as x, except for the cells that are NA
plot(layers1)
library(dismo)
PresPoints<-subset(dbio1, dbio1$fakePres=="Y")
presPoints<-data.frame(lon=PresPoints[,2],lat=PresPoints[,1]) # need to switch order for Maxent
group <- dismo::kfold(presPoints, 5)
pres_train <- presPoints[group != 1, ]
pres_test <- presPoints[group == 1, ]
### absence points
AbsPoints<-subset(dbio1, dbio1$fakePres=="N")
absPoints<-data.frame(lon=AbsPoints[,2],lat=AbsPoints[,1])
group <- dismo::kfold(absPoints, 5)
abs_train <- absPoints[group != 1, ]
abs_test <- absPoints[group == 1, ]
#run MAXENT
# train the model
xm <- maxent(x=layers1, removeDuplicates=T, p=pres_train,a=abs_train, args=c("-J","-P"))
# xm #opens browser and reveals more stats (importance of variables)
#evaluate (test the model)
e2 <- evaluate(pres_test, abs_test, xm, layers1)
e2@auc # gives AUC value (1 is perfect prediction)
## [1] 1
## keep in mind were are using very low numbers - only using 4 points to test the model
#predict with all points
xmFull <- maxent(x=layers1, removeDuplicates=T, p=presPoints,a=absPoints, args=c("-J","-P"))
px <- predict(layers1, xmFull, progress="")
#plot maxent
plot(px, main="Predicted distribution with absence data", xlim=c(-75,-67),ylim=c(40,46))
map("state", 'vermont', add=T)
map('usa',add=T)
#### create background set with 500 random points as "pseudo-absence"
backg <- dismo::randomPoints(layers1, n=500, warn=0)
colnames(backg) = c('lon', 'lat')
group <- dismo::kfold(backg, 5)
backg_train <- backg[group != 1, ]
backg_test <- backg[group == 1, ]
xm2 <- dismo::maxent(x=layers1, removeDuplicates=T, p=pres_train,a=backg_train, args=c("-J","-P"))
e2 <- evaluate(pres_test, backg_test, xm2, layers1)
e2@auc
## [1] 0.9775
#predict
xm2FULL <- maxent(x=layers1, removeDuplicates=T, p=presPoints,a=backg, args=c("-J","-P"))
px2 <- predict(layers1, xm2FULL, progress="")
#plot maxent
plot(px2, main="Predicted distribution with pseudo-absence",xlim=c(-75,-67),ylim=c(40,46))
map("state", 'vermont', add=T)
map('usa',add=T)
coordinates(points)<- ~ Longitude + Latitude
points(points, pch=17, col="red")