# File: in98est.txt
# Purpose: Estimation for Indiana Upper Wabash River Basin Biological Status
# Programmer:  Tony Olsen
# Date:  January 2006
#
#Example updated for R 2.2.1 and psurvey.analysis 2.9
#
# If R workspace from weight adjustment was not saved, then read in designstatus file
# Note that designstatus file contains the data results as well as design status information

designstatus <- read.table('designstatus.csv',sep=',',header=TRUE)
tail(designstatus)


# Set up sites, subpop, dsgn, and data dataframes for estimation functions

# sites identifies which sites to use in estimation
table(designstatus$StatusB)  # Note that all sites were evaluated
# All sites will be used to estimate stream length in each category
# sites must have SiteId and column indicating if site will be used
sites <- data.frame(siteID=designstatus$SiteID,
										Use=rep(TRUE,nrow(designstatus)) )


# Define subpopulations for which estimates are desired
# In this case only for entire Upper Wabash Basin
subpop <- data.frame( siteID=designstatus$SiteID,
										UpperWabash=designstatus$Basin )

# Specify design information
dsgn <- data.frame( siteID=designstatus$SiteID,
										stratum=designstatus$Stratum,
										wgt=designstatus$final.wt,
										xcoord=designstatus$xmarinus,
										ycoord=designstatus$ymarinus)

# Specify data for estimation of Site Evaluation categories
# Add a variable that assigns categories to Target or NonTarget streams
designstatus$StatusTNT <- designstatus$StatusB
levels(designstatus$StatusTNT) <- list(T=c('TS','LD','OT','SCNB','PB'),NT=c('NT','UK'))

data <- data.frame(siteID=designstatus$SiteID,
									BiologicalStatus=designstatus$StatusB,
									StatusTNT=designstatus$StatusTNT)


# Estimate Site Evaluation categories
# Note - set popsize to known length of frame for Upper Wabash.
sitestatussum <- cat.analysis(sites, subpop, dsgn, data,
				         popsize=list(UpperWabash=framelen) )
				         
# Write results out as csv file to read in spreadsheet
write.table(sitestatussum, file = "sitestatussum.csv", sep = ",",col.names=NA)


# Do estimates for IBI and QHEI status
# select which sites to use - only TS-target sampled sites
sites <- data.frame(siteID=designstatus$SiteID,
										Use=designstatus$StatusB=='TS')

# Can use same subpop and dsgn dataframes as before. so don't need to redo.

# Set up the data data frame
data <- data.frame( siteID=designstatus$SiteID,
						IBIStatus=designstatus$IBI.Status,
						QHEIStatus=designstatus$QHEI.Status)
						
# Calculate the estimates: note no longer know the size of the target stream length
# so we do not specify it.
conditionsum <- cat.analysis(sites,subpop,dsgn,data)

# Write results out as csv file to read in spreadsheet
write.table(conditionsum, file = " conditionsum.csv", sep = ",",col.names=NA)






# Complete continuous indicator estimates 
# Can use same sites, subpop, and dsgn dataframes as before

# Set up the data dataframe
data <- data.frame(siteID=designstatus$SiteID,
				IBIScore=designstatus$IBI.Score,
				QHEIScore=designstatus$QHEI.Score)
				
scoressum <- cont.analysis(sites,subpop,dsgn,data,
			pctval=c(5, 10, 25, 50, 75, 90, 95),
			)


# Write results out as csv file to read in spreadsheet
write.table(scoressum$CDF, file = "scoressumcdf.csv", sep = ",",col.names=NA)
write.table(scoressum$Pct, file = "scoressumpct.csv", sep = ",",col.names=NA)
write.table(scoressum$Tot, file = "scoressumtot.csv", sep = ",",col.names=NA)



# Can use R to Plot CDF estimates using function: cdfplot.fcn
#	File: cdfplot.fcn.r
#	Programmer: Tony Olsen
#	Date: Sept 23, 2002
# Input:
#	cdfest - dataframe with
#			x-value for cdf plot
#			y-value (cdf estimate)
#			lower confidence bound
#			upper confidence bound
#
# Output:
#	plot of cdf with confidence bounds	
cdfplot.fcn <- function(cdfest,prop=T,...){

if(prop == T) {
	plot(cdfest[,1],cdfest[,2]  ,
		type='l',ylim=c(0,100),...)
	tvalue <- cdfest[,2]>=5 & cdfest[,2]<=95
}
else {
	plot(cdfest[,1],cdfest[,2],type='l',...)
	tvalue <- cdfest[,2]>=0.05*max(cdfest[,2]) & 
			cdfest[,2]<=0.95*max(cdfest[,2])
}
value <- cdfest[,1][tvalue]
upper <- cdfest[,4][tvalue]
lower <- cdfest[,3][tvalue]
lines(value,lower,lty=2)
lines(value,upper,lty=2)

legend(x=min(cdfest[,1]),y=max(cdfest[,2]),
	legend=c('CDF estimate','95% Confidence Limits'),
	lty=c(1,2), bty='n', cex=.7)
}

pdf('results.pdf',width=8,height=11.5)
op <- par(mfrow=c(2,2))

# Plot IBI Score CDFs for percent and km stream length
tst <- scoressum$CDF$Indicator=='IBIScore'
cdf <- scoressum$CDF[tst,c('Value','Estimate.P','LCB95Pct.P','UCB95Pct.P')]
xlab <- 'IBI Score'
ylab <- 'Percent Stream Length'
cdfplot.fcn(cdf,prop=T,xlab=xlab,ylab=ylab)
title('IBI Score CDF Estimate')

tst <- scoressum$CDF$Indicator=='IBIScore'
cdf <- scoressum$CDF[tst,c('Value','Estimate.U','LCB95Pct.U','UCB95Pct.U')]
xlab <- 'IBI Score'
ylab <- 'Stream Length (km)'
cdfplot.fcn(cdf,prop=F,xlab=xlab,ylab=ylab)
title('IBI Score CDF Estimate')

# Plot QHEI Score CDFs for percent and km stream length
tst <- scoressum$CDF$Indicator=='QHEIScore'
cdf <- scoressum$CDF[tst,c('Value','Estimate.P','LCB95Pct.P','UCB95Pct.P')]
xlab <- 'QHEI Score'
ylab <- 'Percent Stream Length'
cdfplot.fcn(cdf,prop=T,xlab=xlab,ylab=ylab)
title('QHEI Score CDF Estimate')

tst <- scoressum$CDF$Indicator=='QHEIScore'
cdf <- scoressum$CDF[tst,c('Value','Estimate.U','LCB95Pct.U','UCB95Pct.U')]
xlab <- 'QHEI Score'
ylab <- 'Stream Length (km)'
cdfplot.fcn(cdf,prop=F,xlab=xlab,ylab=ylab)
title('QHEI Score CDF Estimate')

par(op)
graphics.off()