> # File: in98adjwts_arm.txt
> # Purpose: 1998 survey design for Upper Wabash Basin adjust weights
> #          using DesignFile, frame information, and Site Evaluation file
> # Programmer: Tony Olsen
> # Date: March 2005
> #
> #Example updated for R 2.0.1 and psurvey.analysis 2.6
> #
> # Frame Stream length from RF3. Length includes all Strahler orders and possibly
> # both perennial and Non-perennial streams
> # The design documentation has 7663.497 km of stream; with 305.347 km in 5th-7th order
> framelen <- c('Upper Wabash'=7358.150)
> 
> 
> # Read Design File for 1998 Upper Wabash Basin
> # file is in sub folder named Original Data
> DesignFile <- read.delim('original_data\\inrb98.txt')
> head(DesignFile)
      SiteID         SiteName     County         RF3RCHID      MAP24K
1 INRB98-001        Mud Creek   RANDOLPH 5120103  26 3.43    Maxville
2 INRB98-002       Loon Creek HUNTINGTON 5120101 542 0.00     Andrews
3 INRB98-003     EIGHTMILE CR      WELLS 5120101  17 9.63      Ossian
4 INRB98-004      SALAMONIE R        JAY 5120102   147.04   Pennville
5 INRB98-005   HOAGLUND DITCH      WHITE 5120106  29 0.00       Monon
6 INRB98-006 West Honey Creek     HOWARD 5120107 145 0.00 Russiaville
     MAP100K  MAP250K  Long.dd   Lat.dd Stratum MDCaty    Weight StrahlerOrder
1     MUNCIE   MUNCIE 85.00357 40.21902       3     21 368.52653             1
2     WABASH   MUNCIE 85.60551 40.85739       3     21 368.52653             1
3     WABASH   MUNCIE 85.17342 40.87987       3     22 117.81714             2
4     MUNCIE   MUNCIE 85.14647 40.48114       3     24  53.90157             4
5 LOGANSPORT DANVILLE 86.96295 40.78734       3     23  60.42005             3
6 LA FAYETTE DANVILLE 86.27908 40.43969       3     22 117.81714             2
  OECO_96
1      55
2      55
3      55
4      55
5      54
6      55
> 
> 
> # What are the MDcaty and StrahlerOrder present?
> table(DesignFile$MDCaty)

21 22 23 24 
25 25 26 24 
> table(DesignFile$StrahlerOrder)

 1  2  3  4 
25 25 26 24 
> table(DesignFile$StrahlerOrder,DesignFile$MDCaty)
   
    21 22 23 24
  1 25  0  0  0
  2  0 25  0  0
  3  0  0 26  0
  4  0  0  0 24
> 
> 
> # Import 1998 Site Evaluation file from Indiana
> SiteEval <- read.delim('original_data\\siteevaluation98.txt')
> head(SiteEval)
     SiteIDF        Basin    HUC14Code         StreamName StrahlerOrder StatusB
1 INRB98-001 Upper Wabash 5.120103e+12             Mud Cr             1      LD
2 INRB98-002 Upper Wabash 5.120101e+12            Loon Cr             1      TS
3 INRB98-003 Upper Wabash 5.120101e+12       Eightmile Cr             2      TS
4 INRB98-004 Upper Wabash 5.120102e+12    Salamonie River             4      LD
5 INRB98-005 Upper Wabash 5.120106e+12     Hoaglund Ditch             3      TS
6 INRB98-006 Upper Wabash 5.120107e+12 Walnut Fk Honey Cr             2      PB
           Comments BiologicalSampleDate QHEI.Score  QHEI.Status IBI.Score
1     Access Denied                              NA           NS        NA
2                             07/08/1998         48     Impaired        50
3                             07/08/1998         65 Not Impaired        22
4     Access Denied                              NA           NS        NA
5                             07/28/1998         31     Impaired        38
6 Physical Barriers                              NA           NS        NA
    IBI.Status
1           NS
2 Not Impaired
3     Impaired
4           NS
5 Not Impaired
6           NS
> 
> # Check codes for StatusB - Biological Site Evaluation status
> table(SiteEval$StatusB)

  LD   NT   OT   PB SCNB   TS   UK 
  19    9    2    7   14   48    1 
> # Note on code meanings
> # TS - target and sampled site
> # NT - site was non-target (dry, non-wadeable,...)
> # LD - landowner denied access to site
> # PB - physical barrier - unable to reach site
> # OT - target site for which a sample was not collected
> # UK - Unknown whether site is target or non-target (shouldn't happen)
> # SCNB - target site Sampled for Chemistry but Not Biology (sampled fewer biology sites)
> 
> # Create designstatus file
> designstatus <- merge(SiteEval, DesignFile, by.x='SiteIDF', by.y='SiteID')
> head(designstatus)
     SiteIDF        Basin    HUC14Code         StreamName StrahlerOrder.x
1 INRB98-001 Upper Wabash 5.120103e+12             Mud Cr               1
2 INRB98-002 Upper Wabash 5.120101e+12            Loon Cr               1
3 INRB98-003 Upper Wabash 5.120101e+12       Eightmile Cr               2
4 INRB98-004 Upper Wabash 5.120102e+12    Salamonie River               4
5 INRB98-005 Upper Wabash 5.120106e+12     Hoaglund Ditch               3
6 INRB98-006 Upper Wabash 5.120107e+12 Walnut Fk Honey Cr               2
  StatusB          Comments BiologicalSampleDate QHEI.Score  QHEI.Status
1      LD     Access Denied                              NA           NS
2      TS                             07/08/1998         48     Impaired
3      TS                             07/08/1998         65 Not Impaired
4      LD     Access Denied                              NA           NS
5      TS                             07/28/1998         31     Impaired
6      PB Physical Barriers                              NA           NS
  IBI.Score   IBI.Status         SiteName     County         RF3RCHID
1        NA           NS        Mud Creek   RANDOLPH 5120103  26 3.43
2        50 Not Impaired       Loon Creek HUNTINGTON 5120101 542 0.00
3        22     Impaired     EIGHTMILE CR      WELLS 5120101  17 9.63
4        NA           NS      SALAMONIE R        JAY 5120102   147.04
5        38 Not Impaired   HOAGLUND DITCH      WHITE 5120106  29 0.00
6        NA           NS West Honey Creek     HOWARD 5120107 145 0.00
       MAP24K    MAP100K  MAP250K  Long.dd   Lat.dd Stratum MDCaty    Weight
1    Maxville     MUNCIE   MUNCIE 85.00357 40.21902       3     21 368.52653
2     Andrews     WABASH   MUNCIE 85.60551 40.85739       3     21 368.52653
3      Ossian     WABASH   MUNCIE 85.17342 40.87987       3     22 117.81714
4   Pennville     MUNCIE   MUNCIE 85.14647 40.48114       3     24  53.90157
5       Monon LOGANSPORT DANVILLE 86.96295 40.78734       3     23  60.42005
6 Russiaville LA FAYETTE DANVILLE 86.27908 40.43969       3     22 117.81714
  StrahlerOrder.y OECO_96
1               1      55
2               1      55
3               2      55
4               4      55
5               3      54
6               2      55
> nrow(designstatus)
[1] 100
> 
> # Add equal area x,y coordinates to be used in variance estimation
> # Note that longitude must be changed to be negative
> tmp <- marinus(designstatus$Lat.dd,-designstatus$Long.dd)
> designstatus$xmarinus <- tmp[,'x']
> designstatus$ymarinus <- tmp[,'y']
> 
> # Adjust weights to match frame length
> # Determine which sites to include in weight adjustment.
> # All 100 sites were evaluated so all should be used.
> # Note that original design weights assumed sample size of 50; 
> # now have sample size of 100
> sites <- rep(TRUE, nrow(designstatus) )
> designstatus$final.wt <- adjwgt(sites,designstatus$Weight ,
+ designstatus$Basin, framelen)
> 
> # Check out weights. sum of final weights must equal framelen.
> sum(designstatus$Weight)
[1] 15023.15
> sum(designstatus$final.wt)
[1] 7358.15
> 
> 
> # Write out Design Status File
> write.table(designstatus, file="designstatus.csv",sep=",",row.names=FALSE)
> 
> # File: in98est.txt
> # Purpose: Estimation for Indiana Upper Wabash River Basin Biological Status
> # Programmer:  Tony Olsen
> # Date:  March 2005
> #
> #Example updated for R 2.0.1 and psurvey.analysis 2.6
> #
> # If R workspace from weight adjustment was not saved, then read in designstatus file
> # Note that designstatus file contains the data results as well as design status information
> 
> designstatus <- read.table('designstatus.csv',sep=',',header=TRUE)
> tail(designstatus)
       SiteIDF        Basin    HUC14Code           StreamName StrahlerOrder.x
95  INRB98-095 Upper Wabash 5.120105e+12 Wise Grinslade Ditch               2
96  INRB98-096 Upper Wabash 5.120105e+12  Harter Shirar Ditch               1
97  INRB98-097 Upper Wabash 5.120105e+12              Rock Cr               3
98  INRB98-098 Upper Wabash 5.120105e+12              Rock Cr               1
99  INRB98-099 Upper Wabash 5.120106e+12     Tippecanoe River               4
100 INRB98-100 Upper Wabash 5.120106e+12     Tippecanoe River               4
    StatusB                        Comments BiologicalSampleDate QHEI.Score
95       LD                   Access Denied                              NA
96       NT                             Dry                              NA
97     SCNB No biological sample collected.                              NA
98       NT                             Dry                              NA
99       TS                                           07/14/1998         78
100    SCNB No biological sample collected.                              NA
     QHEI.Status IBI.Score   IBI.Status     SiteName  County         RF3RCHID
95            NS        NA           NS                MIAMI 5120105 194 0.00
96            NS        NA           NS              CARROLL 5120105 166 4.27
97            NS        NA           NS      ROCK CR CARROLL 5120105   8 2.65
98            NS        NA           NS      ROCK CR    CASS 5120105   821.62
99  Not Impaired        48 Not Impaired TIPPECANOE R  FULTON 5120106  17 3.84
100           NS        NA           NS TIPPECANOE R PULASKI 5120106   9 0.00
     MAP24K    MAP100K  MAP250K  Long.dd   Lat.dd Stratum MDCaty    Weight
95    Miami LOGANSPORT DANVILLE 86.09874 40.60312       3     22 117.81714
96    Flora LOGANSPORT DANVILLE 86.50656 40.56342       3     21 368.52653
97  Burrows LOGANSPORT DANVILLE 86.58547 40.65874       3     23  60.42005
98   Onward LOGANSPORT DANVILLE 86.19225 40.64311       3     21 368.52653
99    Argos       KNOX  CHICAGO 86.12994 41.16369       3     24  53.90157
100 Winamac       KNOX  CHICAGO 86.59149 41.02049       3     24  53.90157
    StrahlerOrder.y OECO_96  xmarinus   ymarinus  final.wt
95                2      55 -15.29491 -15.652854  57.70535
96                1      55 -49.65176 -20.067293 180.49965
97                3      55 -56.29937  -9.468415  29.59298
98                1      55 -23.17251 -11.206280 180.49965
99                4      56 -17.92302  46.679797  26.40031
100               4      56 -56.80670  30.756684  26.40031
> 
> 
> # Set up sites, subpop, dsgn, and data dataframes for estimation functions
> 
> # sites identifies which sites to use in estimation
> table(designstatus$StatusB)  # Note that all sites were evaluated

  LD   NT   OT   PB SCNB   TS   UK 
  19    9    2    7   14   48    1 
> # All sites will be used to estimate stream length in each category
> # sites must have SiteId and column indicating if site will be used
> sites <- data.frame(siteID=designstatus$SiteID,
+ Use=rep(TRUE,nrow(designstatus)) )
> 
> 
> # Define subpopulations for which estimates are desired
> # In this case only for entire Upper Wabash Basin
> subpop <- data.frame( siteID=designstatus$SiteID,
+ UpperWabash=designstatus$Basin )
> 
> # Specify design information
> dsgn <- data.frame( siteID=designstatus$SiteID,
+ stratum=designstatus$Stratum,
+ wgt=designstatus$final.wt,
+ xcoord=designstatus$xmarinus,
+ ycoord=designstatus$ymarinus)
> 
> # Specify data for estimation of Site Evaluation categories
> # Add a variable that assigns categories to Target or NonTarget streams
> designstatus$StatusTNT <- designstatus$StatusB
> levels(designstatus$StatusTNT) <- list(T=c('TS','LD','OT','SCNB','PB'),NT=c('NT','UK'))
> 
> data <- data.frame(siteID=designstatus$SiteID,
+ BiologicalStatus=designstatus$StatusB,
+ StatusTNT=designstatus$StatusTNT)
> 
> 
> # Estimate Site Evaluation categories
> # Note - set popsize to known length of frame for Upper Wabash.
> sitestatussum <- cat.analysis(sites, subpop, dsgn, data,
+          popsize=list(UpperWabash=framelen) )
>          
> # Write results out as csv file to read in spreadsheet
> write.table(sitestatussum, file = "sitestatussum.csv", sep = ",",col.names=NA)
> 
> 
> # Do estimates for IBI and QHEI status
> # select which sites to use - only TS-target sampled sites
> sites <- data.frame(siteID=designstatus$SiteID,
+ Use=designstatus$StatusB=='TS')
> 
> # Can use same subpop and dsgn dataframes as before. so don't need to redo.
> 
> # Set up the data data frame
> data <- data.frame( siteID=designstatus$SiteID,
+ IBIStatus=designstatus$IBI.Status,
+ QHEIStatus=designstatus$QHEI.Status)
> 
> # Calculate the estimates: note no longer know the size of the target stream length
> # so we do not specify it.
> conditionsum <- cat.analysis(sites,subpop,dsgn,data)
> 
> # Write results out as csv file to read in spreadsheet
> write.table(conditionsum, file = " conditionsum.csv", sep = ",",col.names=NA)
> 
> 
> 
> 
> 
> 
> # Complete continuous indicator estimates 
> # Can use same sites, subpop, and dsgn dataframes as before
> 
> # Set up the data dataframe
> data <- data.frame(siteID=designstatus$SiteID,
+ IBIScore=designstatus$IBI.Score,
+ QHEIScore=designstatus$QHEI.Score)
> 
> scoressum <- cont.analysis(sites,subpop,dsgn,data,
+ pctval=c(5, 10, 25, 50, 75, 90, 95),
+ )
> 
> 
> # Write results out as csv file to read in spreadsheet
> write.table(scoressum$CDF, file = "scoressumcdf.csv", sep = ",",col.names=NA)
> write.table(scoressum$Pct, file = "scoressumpct.csv", sep = ",",col.names=NA)
> write.table(scoressum$Tot, file = "scoressumtot.csv", sep = ",",col.names=NA)
> 
> 
> 
> # Can use R to Plot CDF estimates using function: cdfplot.fcn
> #File: cdfplot.fcn.r
> #Programmer: Tony Olsen
> #Date: Sept 23, 2002
> # Input:
> #cdfest - dataframe with
> #x-value for cdf plot
> #y-value (cdf estimate)
> #lower confidence bound
> #upper confidence bound
> #
> # Output:
> #plot of cdf with confidence bounds
> cdfplot.fcn <- function(cdfest,prop=T,...){
+ 
+ if(prop == T) {
+ plot(cdfest[,1],cdfest[,2]  ,
+ type='l',ylim=c(0,100),...)
+ tvalue <- cdfest[,2]>=5 & cdfest[,2]<=95
+ }
+ else {
+ plot(cdfest[,1],cdfest[,2],type='l',...)
+ tvalue <- cdfest[,2]>=0.05*max(cdfest[,2]) & 
+ cdfest[,2]<=0.95*max(cdfest[,2])
+ }
+ value <- cdfest[,1][tvalue]
+ upper <- cdfest[,4][tvalue]
+ lower <- cdfest[,3][tvalue]
+ lines(value,lower,lty=2)
+ lines(value,upper,lty=2)
+ 
+ legend(x=min(cdfest[,1]),y=max(cdfest[,2]),
+ legend=c('CDF estimate','95% Confidence Limits'),
+ lty=c(1,2), bty='n', cex=.7)
+ }
> 
> pdf('results.pdf',width=8,height=11.5)
> op <- par(mfrow=c(2,2))
> 
> # Plot IBI Score CDFs for percent and km stream length
> tst <- scoressum$CDF$Indicator=='IBIScore'
> cdf <- scoressum$CDF[tst,c('Value','Estimate.P','LCB95Pct.P','UCB95Pct.P')]
> xlab <- 'IBI Score'
> ylab <- 'Percent Stream Length'
> cdfplot.fcn(cdf,prop=T,xlab=xlab,ylab=ylab)
> title('IBI Score CDF Estimate')
> 
> tst <- scoressum$CDF$Indicator=='IBIScore'
> cdf <- scoressum$CDF[tst,c('Value','Estimate.U','LCB95Pct.U','UCB95Pct.U')]
> xlab <- 'IBI Score'
> ylab <- 'Stream Length (km)'
> cdfplot.fcn(cdf,prop=F,xlab=xlab,ylab=ylab)
> title('IBI Score CDF Estimate')
> 
> # Plot QHEI Score CDFs for percent and km stream length
> tst <- scoressum$CDF$Indicator=='QHEIScore'
> cdf <- scoressum$CDF[tst,c('Value','Estimate.P','LCB95Pct.P','UCB95Pct.P')]
> xlab <- 'QHEI Score'
> ylab <- 'Percent Stream Length'
> cdfplot.fcn(cdf,prop=T,xlab=xlab,ylab=ylab)
> title('QHEI Score CDF Estimate')
> 
> tst <- scoressum$CDF$Indicator=='QHEIScore'
> cdf <- scoressum$CDF[tst,c('Value','Estimate.U','LCB95Pct.U','UCB95Pct.U')]
> xlab <- 'QHEI Score'
> ylab <- 'Stream Length (km)'
> cdfplot.fcn(cdf,prop=F,xlab=xlab,ylab=ylab)
> title('QHEI Score CDF Estimate')
> 
> par(op)
> graphics.off()
> 
> 
>