> # File: in98adjwts_arm.txt > # Purpose: 1998 survey design for Upper Wabash Basin adjust weights > # using DesignFile, frame information, and Site Evaluation file > # Programmer: Tony Olsen > # Date: March 2005 > # > #Example updated for R 2.0.1 and psurvey.analysis 2.6 > # > # Frame Stream length from RF3. Length includes all Strahler orders and possibly > # both perennial and Non-perennial streams > # The design documentation has 7663.497 km of stream; with 305.347 km in 5th-7th order > framelen <- c('Upper Wabash'=7358.150) > > > # Read Design File for 1998 Upper Wabash Basin > # file is in sub folder named Original Data > DesignFile <- read.delim('original_data\\inrb98.txt') > head(DesignFile) SiteID SiteName County RF3RCHID MAP24K 1 INRB98-001 Mud Creek RANDOLPH 5120103 26 3.43 Maxville 2 INRB98-002 Loon Creek HUNTINGTON 5120101 542 0.00 Andrews 3 INRB98-003 EIGHTMILE CR WELLS 5120101 17 9.63 Ossian 4 INRB98-004 SALAMONIE R JAY 5120102 147.04 Pennville 5 INRB98-005 HOAGLUND DITCH WHITE 5120106 29 0.00 Monon 6 INRB98-006 West Honey Creek HOWARD 5120107 145 0.00 Russiaville MAP100K MAP250K Long.dd Lat.dd Stratum MDCaty Weight StrahlerOrder 1 MUNCIE MUNCIE 85.00357 40.21902 3 21 368.52653 1 2 WABASH MUNCIE 85.60551 40.85739 3 21 368.52653 1 3 WABASH MUNCIE 85.17342 40.87987 3 22 117.81714 2 4 MUNCIE MUNCIE 85.14647 40.48114 3 24 53.90157 4 5 LOGANSPORT DANVILLE 86.96295 40.78734 3 23 60.42005 3 6 LA FAYETTE DANVILLE 86.27908 40.43969 3 22 117.81714 2 OECO_96 1 55 2 55 3 55 4 55 5 54 6 55 > > > # What are the MDcaty and StrahlerOrder present? > table(DesignFile$MDCaty) 21 22 23 24 25 25 26 24 > table(DesignFile$StrahlerOrder) 1 2 3 4 25 25 26 24 > table(DesignFile$StrahlerOrder,DesignFile$MDCaty) 21 22 23 24 1 25 0 0 0 2 0 25 0 0 3 0 0 26 0 4 0 0 0 24 > > > # Import 1998 Site Evaluation file from Indiana > SiteEval <- read.delim('original_data\\siteevaluation98.txt') > head(SiteEval) SiteIDF Basin HUC14Code StreamName StrahlerOrder StatusB 1 INRB98-001 Upper Wabash 5.120103e+12 Mud Cr 1 LD 2 INRB98-002 Upper Wabash 5.120101e+12 Loon Cr 1 TS 3 INRB98-003 Upper Wabash 5.120101e+12 Eightmile Cr 2 TS 4 INRB98-004 Upper Wabash 5.120102e+12 Salamonie River 4 LD 5 INRB98-005 Upper Wabash 5.120106e+12 Hoaglund Ditch 3 TS 6 INRB98-006 Upper Wabash 5.120107e+12 Walnut Fk Honey Cr 2 PB Comments BiologicalSampleDate QHEI.Score QHEI.Status IBI.Score 1 Access Denied NA NS NA 2 07/08/1998 48 Impaired 50 3 07/08/1998 65 Not Impaired 22 4 Access Denied NA NS NA 5 07/28/1998 31 Impaired 38 6 Physical Barriers NA NS NA IBI.Status 1 NS 2 Not Impaired 3 Impaired 4 NS 5 Not Impaired 6 NS > > # Check codes for StatusB - Biological Site Evaluation status > table(SiteEval$StatusB) LD NT OT PB SCNB TS UK 19 9 2 7 14 48 1 > # Note on code meanings > # TS - target and sampled site > # NT - site was non-target (dry, non-wadeable,...) > # LD - landowner denied access to site > # PB - physical barrier - unable to reach site > # OT - target site for which a sample was not collected > # UK - Unknown whether site is target or non-target (shouldn't happen) > # SCNB - target site Sampled for Chemistry but Not Biology (sampled fewer biology sites) > > # Create designstatus file > designstatus <- merge(SiteEval, DesignFile, by.x='SiteIDF', by.y='SiteID') > head(designstatus) SiteIDF Basin HUC14Code StreamName StrahlerOrder.x 1 INRB98-001 Upper Wabash 5.120103e+12 Mud Cr 1 2 INRB98-002 Upper Wabash 5.120101e+12 Loon Cr 1 3 INRB98-003 Upper Wabash 5.120101e+12 Eightmile Cr 2 4 INRB98-004 Upper Wabash 5.120102e+12 Salamonie River 4 5 INRB98-005 Upper Wabash 5.120106e+12 Hoaglund Ditch 3 6 INRB98-006 Upper Wabash 5.120107e+12 Walnut Fk Honey Cr 2 StatusB Comments BiologicalSampleDate QHEI.Score QHEI.Status 1 LD Access Denied NA NS 2 TS 07/08/1998 48 Impaired 3 TS 07/08/1998 65 Not Impaired 4 LD Access Denied NA NS 5 TS 07/28/1998 31 Impaired 6 PB Physical Barriers NA NS IBI.Score IBI.Status SiteName County RF3RCHID 1 NA NS Mud Creek RANDOLPH 5120103 26 3.43 2 50 Not Impaired Loon Creek HUNTINGTON 5120101 542 0.00 3 22 Impaired EIGHTMILE CR WELLS 5120101 17 9.63 4 NA NS SALAMONIE R JAY 5120102 147.04 5 38 Not Impaired HOAGLUND DITCH WHITE 5120106 29 0.00 6 NA NS West Honey Creek HOWARD 5120107 145 0.00 MAP24K MAP100K MAP250K Long.dd Lat.dd Stratum MDCaty Weight 1 Maxville MUNCIE MUNCIE 85.00357 40.21902 3 21 368.52653 2 Andrews WABASH MUNCIE 85.60551 40.85739 3 21 368.52653 3 Ossian WABASH MUNCIE 85.17342 40.87987 3 22 117.81714 4 Pennville MUNCIE MUNCIE 85.14647 40.48114 3 24 53.90157 5 Monon LOGANSPORT DANVILLE 86.96295 40.78734 3 23 60.42005 6 Russiaville LA FAYETTE DANVILLE 86.27908 40.43969 3 22 117.81714 StrahlerOrder.y OECO_96 1 1 55 2 1 55 3 2 55 4 4 55 5 3 54 6 2 55 > nrow(designstatus) [1] 100 > > # Add equal area x,y coordinates to be used in variance estimation > # Note that longitude must be changed to be negative > tmp <- marinus(designstatus$Lat.dd,-designstatus$Long.dd) > designstatus$xmarinus <- tmp[,'x'] > designstatus$ymarinus <- tmp[,'y'] > > # Adjust weights to match frame length > # Determine which sites to include in weight adjustment. > # All 100 sites were evaluated so all should be used. > # Note that original design weights assumed sample size of 50; > # now have sample size of 100 > sites <- rep(TRUE, nrow(designstatus) ) > designstatus$final.wt <- adjwgt(sites,designstatus$Weight , + designstatus$Basin, framelen) > > # Check out weights. sum of final weights must equal framelen. > sum(designstatus$Weight) [1] 15023.15 > sum(designstatus$final.wt) [1] 7358.15 > > > # Write out Design Status File > write.table(designstatus, file="designstatus.csv",sep=",",row.names=FALSE) > > # File: in98est.txt > # Purpose: Estimation for Indiana Upper Wabash River Basin Biological Status > # Programmer: Tony Olsen > # Date: March 2005 > # > #Example updated for R 2.0.1 and psurvey.analysis 2.6 > # > # If R workspace from weight adjustment was not saved, then read in designstatus file > # Note that designstatus file contains the data results as well as design status information > > designstatus <- read.table('designstatus.csv',sep=',',header=TRUE) > tail(designstatus) SiteIDF Basin HUC14Code StreamName StrahlerOrder.x 95 INRB98-095 Upper Wabash 5.120105e+12 Wise Grinslade Ditch 2 96 INRB98-096 Upper Wabash 5.120105e+12 Harter Shirar Ditch 1 97 INRB98-097 Upper Wabash 5.120105e+12 Rock Cr 3 98 INRB98-098 Upper Wabash 5.120105e+12 Rock Cr 1 99 INRB98-099 Upper Wabash 5.120106e+12 Tippecanoe River 4 100 INRB98-100 Upper Wabash 5.120106e+12 Tippecanoe River 4 StatusB Comments BiologicalSampleDate QHEI.Score 95 LD Access Denied NA 96 NT Dry NA 97 SCNB No biological sample collected. NA 98 NT Dry NA 99 TS 07/14/1998 78 100 SCNB No biological sample collected. NA QHEI.Status IBI.Score IBI.Status SiteName County RF3RCHID 95 NS NA NS MIAMI 5120105 194 0.00 96 NS NA NS CARROLL 5120105 166 4.27 97 NS NA NS ROCK CR CARROLL 5120105 8 2.65 98 NS NA NS ROCK CR CASS 5120105 821.62 99 Not Impaired 48 Not Impaired TIPPECANOE R FULTON 5120106 17 3.84 100 NS NA NS TIPPECANOE R PULASKI 5120106 9 0.00 MAP24K MAP100K MAP250K Long.dd Lat.dd Stratum MDCaty Weight 95 Miami LOGANSPORT DANVILLE 86.09874 40.60312 3 22 117.81714 96 Flora LOGANSPORT DANVILLE 86.50656 40.56342 3 21 368.52653 97 Burrows LOGANSPORT DANVILLE 86.58547 40.65874 3 23 60.42005 98 Onward LOGANSPORT DANVILLE 86.19225 40.64311 3 21 368.52653 99 Argos KNOX CHICAGO 86.12994 41.16369 3 24 53.90157 100 Winamac KNOX CHICAGO 86.59149 41.02049 3 24 53.90157 StrahlerOrder.y OECO_96 xmarinus ymarinus final.wt 95 2 55 -15.29491 -15.652854 57.70535 96 1 55 -49.65176 -20.067293 180.49965 97 3 55 -56.29937 -9.468415 29.59298 98 1 55 -23.17251 -11.206280 180.49965 99 4 56 -17.92302 46.679797 26.40031 100 4 56 -56.80670 30.756684 26.40031 > > > # Set up sites, subpop, dsgn, and data dataframes for estimation functions > > # sites identifies which sites to use in estimation > table(designstatus$StatusB) # Note that all sites were evaluated LD NT OT PB SCNB TS UK 19 9 2 7 14 48 1 > # All sites will be used to estimate stream length in each category > # sites must have SiteId and column indicating if site will be used > sites <- data.frame(siteID=designstatus$SiteID, + Use=rep(TRUE,nrow(designstatus)) ) > > > # Define subpopulations for which estimates are desired > # In this case only for entire Upper Wabash Basin > subpop <- data.frame( siteID=designstatus$SiteID, + UpperWabash=designstatus$Basin ) > > # Specify design information > dsgn <- data.frame( siteID=designstatus$SiteID, + stratum=designstatus$Stratum, + wgt=designstatus$final.wt, + xcoord=designstatus$xmarinus, + ycoord=designstatus$ymarinus) > > # Specify data for estimation of Site Evaluation categories > # Add a variable that assigns categories to Target or NonTarget streams > designstatus$StatusTNT <- designstatus$StatusB > levels(designstatus$StatusTNT) <- list(T=c('TS','LD','OT','SCNB','PB'),NT=c('NT','UK')) > > data <- data.frame(siteID=designstatus$SiteID, + BiologicalStatus=designstatus$StatusB, + StatusTNT=designstatus$StatusTNT) > > > # Estimate Site Evaluation categories > # Note - set popsize to known length of frame for Upper Wabash. > sitestatussum <- cat.analysis(sites, subpop, dsgn, data, + popsize=list(UpperWabash=framelen) ) > > # Write results out as csv file to read in spreadsheet > write.table(sitestatussum, file = "sitestatussum.csv", sep = ",",col.names=NA) > > > # Do estimates for IBI and QHEI status > # select which sites to use - only TS-target sampled sites > sites <- data.frame(siteID=designstatus$SiteID, + Use=designstatus$StatusB=='TS') > > # Can use same subpop and dsgn dataframes as before. so don't need to redo. > > # Set up the data data frame > data <- data.frame( siteID=designstatus$SiteID, + IBIStatus=designstatus$IBI.Status, + QHEIStatus=designstatus$QHEI.Status) > > # Calculate the estimates: note no longer know the size of the target stream length > # so we do not specify it. > conditionsum <- cat.analysis(sites,subpop,dsgn,data) > > # Write results out as csv file to read in spreadsheet > write.table(conditionsum, file = " conditionsum.csv", sep = ",",col.names=NA) > > > > > > > # Complete continuous indicator estimates > # Can use same sites, subpop, and dsgn dataframes as before > > # Set up the data dataframe > data <- data.frame(siteID=designstatus$SiteID, + IBIScore=designstatus$IBI.Score, + QHEIScore=designstatus$QHEI.Score) > > scoressum <- cont.analysis(sites,subpop,dsgn,data, + pctval=c(5, 10, 25, 50, 75, 90, 95), + ) > > > # Write results out as csv file to read in spreadsheet > write.table(scoressum$CDF, file = "scoressumcdf.csv", sep = ",",col.names=NA) > write.table(scoressum$Pct, file = "scoressumpct.csv", sep = ",",col.names=NA) > write.table(scoressum$Tot, file = "scoressumtot.csv", sep = ",",col.names=NA) > > > > # Can use R to Plot CDF estimates using function: cdfplot.fcn > #File: cdfplot.fcn.r > #Programmer: Tony Olsen > #Date: Sept 23, 2002 > # Input: > #cdfest - dataframe with > #x-value for cdf plot > #y-value (cdf estimate) > #lower confidence bound > #upper confidence bound > # > # Output: > #plot of cdf with confidence bounds > cdfplot.fcn <- function(cdfest,prop=T,...){ + + if(prop == T) { + plot(cdfest[,1],cdfest[,2] , + type='l',ylim=c(0,100),...) + tvalue <- cdfest[,2]>=5 & cdfest[,2]<=95 + } + else { + plot(cdfest[,1],cdfest[,2],type='l',...) + tvalue <- cdfest[,2]>=0.05*max(cdfest[,2]) & + cdfest[,2]<=0.95*max(cdfest[,2]) + } + value <- cdfest[,1][tvalue] + upper <- cdfest[,4][tvalue] + lower <- cdfest[,3][tvalue] + lines(value,lower,lty=2) + lines(value,upper,lty=2) + + legend(x=min(cdfest[,1]),y=max(cdfest[,2]), + legend=c('CDF estimate','95% Confidence Limits'), + lty=c(1,2), bty='n', cex=.7) + } > > pdf('results.pdf',width=8,height=11.5) > op <- par(mfrow=c(2,2)) > > # Plot IBI Score CDFs for percent and km stream length > tst <- scoressum$CDF$Indicator=='IBIScore' > cdf <- scoressum$CDF[tst,c('Value','Estimate.P','LCB95Pct.P','UCB95Pct.P')] > xlab <- 'IBI Score' > ylab <- 'Percent Stream Length' > cdfplot.fcn(cdf,prop=T,xlab=xlab,ylab=ylab) > title('IBI Score CDF Estimate') > > tst <- scoressum$CDF$Indicator=='IBIScore' > cdf <- scoressum$CDF[tst,c('Value','Estimate.U','LCB95Pct.U','UCB95Pct.U')] > xlab <- 'IBI Score' > ylab <- 'Stream Length (km)' > cdfplot.fcn(cdf,prop=F,xlab=xlab,ylab=ylab) > title('IBI Score CDF Estimate') > > # Plot QHEI Score CDFs for percent and km stream length > tst <- scoressum$CDF$Indicator=='QHEIScore' > cdf <- scoressum$CDF[tst,c('Value','Estimate.P','LCB95Pct.P','UCB95Pct.P')] > xlab <- 'QHEI Score' > ylab <- 'Percent Stream Length' > cdfplot.fcn(cdf,prop=T,xlab=xlab,ylab=ylab) > title('QHEI Score CDF Estimate') > > tst <- scoressum$CDF$Indicator=='QHEIScore' > cdf <- scoressum$CDF[tst,c('Value','Estimate.U','LCB95Pct.U','UCB95Pct.U')] > xlab <- 'QHEI Score' > ylab <- 'Stream Length (km)' > cdfplot.fcn(cdf,prop=F,xlab=xlab,ylab=ylab) > title('QHEI Score CDF Estimate') > > par(op) > graphics.off() > > >