      program accplt
c-----------------------------------------------------------------------
c
c Evaluate the Cross Validation / Jackknifing of Multiple True Values
c *******************************************************************
c
c
c
c NOTES:
c 
c   1. Basic flow in this program:
c
c        -- read "true" values at "nd" data locations (gridded or not)
c        -- read corresponding "nd" CCDFs that consist of either a set
c           of realizations, IK-type cumulative probabilities, or a 
c           Gaussian mean and variance.
c        -- calculate the probability associated to the true value using
c           each CCDF 
c        -- loop over a range of probability intervals calculting how
c           many true values fall within the interval
c        -- write accuracy plot and true probability (for detecting
c           local biases
c 
c   2. True values must be normal scores if working with Gaussian CCDFs
c 
c   3. For simplicity of coding this program has been set up to consume
c      a large amount of memory.  If you are working with a large 3-D
c      grid you will have to read the "sim" array as you need it rather
c      than all into memory at once.
c
c   4. The file containing the true values and the file containing the
c      distributions must be in the same order -- the program has no
c      way of checking
c
c
c
c Original: C.V. Deutsch                             Date: November 1995
c-----------------------------------------------------------------------
      parameter(MAXDAT=100000,MAXSIM=100,EPSLON=0.000001,VERSION=1.000)
      character truefl*40,simfl*40,sumfl*40,outfl*40,str*40
      logical   testfl
      real      sim(MAXDAT,MAXSIM),true(MAXDAT),cut(MAXSIM),val(MAXSIM)
      integer   ccdftype
      data      lin/1/,lsum/2/,lout/3/
c
c Note VERSION number:
c
      write(*,9999) VERSION
 9999 format(/' ACCPLT Version: ',f5.3/)
c
c Get the name of the parameter file - try the default name if no input:
c
      write(*,*) 'Which parameter file do you want to use?'
      read (*,'(a40)') str
      if(str(1:1).eq.' ')str='accplt.par                              '
      inquire(file=str,exist=testfl)
      if(.not.testfl) then
            write(*,*) 'ERROR - the parameter file does not exist,'
            write(*,*) '        check for the file and try again  '
            stop
      endif
      open(lin,file=str,status='OLD')
c
c Find start of parameters:
c
 1    read(lin,'(a4)',end=98) str(1:4)
      if(str(1:4).ne.'STAR') go to 1
c
c Read input parameters:
c

      read(lin,'(a40)',err=98) truefl
      call chknam(truefl,40)
      write(*,*) ' data file with true = ',truefl

      read(lin,*,err=98) icoltrue
      write(*,*) ' column number for the true value = ',icoltrue

      read(lin,*,err=98) ccdftype
      write(*,*) ' ccdf type = ',ccdftype

      read(lin,*,err=98) ncut
      write(*,*) ' number of realizations/thresholds = ',ncut

      if(ncut.gt.MAXSIM) stop 'ERROR: too many realizations'

      if(ccdftype.eq.1) then
            read(lin,*,err=98) (cut(i),i=1,ncut)
            write(*,*) ' thresholds = ',(cut(i),i=1,ncut)

            read(lin,*,err=98) zmin,zmax
            write(*,*) ' minimum and maximum values = ',zmin,zmax
      else
            read(lin,*,err=98)
            read(lin,*,err=98)
      end if

      read(lin,*,err=98) icolmean,icolvar
      write(*,*) ' columns for mean and variance = ',icolmean,icolvar
      if(ccdftype.eq.2) ncut = 2

      read(lin,'(a40)',err=98) simfl
      call chknam(simfl,40)
      write(*,*) ' data file with realizations = ',simfl

      read(lin,'(a40)',err=98) sumfl
      call chknam(sumfl,40)
      write(*,*) ' summary file = ',sumfl

      read(lin,'(a40)',err=98) outfl
      call chknam(outfl,40)
      write(*,*) ' output file = ',outfl

      read(lin,*,err=98) pinc
      write(*,*) ' probability increment = ',pinc

      close(lin)
c
c Read the true values:
c
      write(*,*)
      write(*,*) 'Reading true values'
      inquire(file=truefl,exist=testfl)
      if(.not.testfl) then
            write(*,*) 'ERROR there is no input file?',truefl
            stop
      end if
      open(lin,file=truefl,status='OLD')
      read(lin,*,err=99)
      read(lin,*,err=99) nvari
      do i=1,nvari
            read(lin,*,err=99)
      end do
      nd = 0
 2    read(lin,*,end=3) (val(i),i=1,nvari)
      nd = nd + 1
      true(nd) = val(icoltrue)
      go to 2
 3    continue
      close(lin)
      write(*,*)
      write(*,*) 'Found ',nd,' true data values'
c
c Read the CCDFs either as a set of realizations, as IK-type
c probability values, or as a Gaussian mean and variance:
c
      write(*,*)
      write(*,*) 'Reading the CCDFs'
      inquire(file=simfl,exist=testfl)
      if(.not.testfl) then
            write(*,*) 'ERROR there is no input file?',simfl
            stop
      end if
      open(lin,file=simfl,status='OLD')
      read(lin,*,err=99)
      read(lin,*,err=99) nvari
      do i=1,nvari
            read(lin,*,err=99)
      end do
      if(ccdftype.eq.0) then
            do isim=1,ncut
                  do i=1,nd
                        read(lin,*,err=99) (val(j),j=1,nvari)
                        sim(i,isim) = val(nvari)
                  end do
            end do
      else if(ccdftype.eq.1) then
            do i=1,nd
                  read(lin,*,err=99) (sim(i,icut),icut=1,ncut)
            end do
      else if(ccdftype.eq.2) then
            do i=1,nd
                  read(lin,*,err=99) (val(j),j=1,nvari)
                  sim(i,1) = val(icolmean)
                  sim(i,2) = val(icolvar)
            end do
      end if
      close(lin)
c
c Get quantiles associated to each true value on the grid and enter
c that value in the "true" array:
c
      write(*,*)
      write(*,*) 'Calculating quantiles associated to true values'
      avgvar = 0.0
      do id=1,nd
            do i=1,ncut
                  val(i) = sim(id,i)
            end do
            if(ccdftype.eq.0) then             
c
c Get quantile from realizations:
c
                  call sortem(1,ncut,val,0,b,c,d,e,f,g,h)
                  call locate(val,ncut,1,ncut,true(id),j)
                  true(id) = (real(j)-0.5) / real(ncut)
                  xm = 0.0
                  xv = 0.0
                  do i=1,ncut
                        xm = xm + val(i)
                        xv = xv + val(i)*val(i)
                  end do
                  xv = xv/real(ncut) - (xm/real(ncut))**2
            else if(ccdftype.eq.1) then
c
c Get quantile from IK-type distribution:
c
                  call locate(cut,ncut,1,ncut,true(id),j)
                  j = min(max(j,1),(ncut-1))
                  if(abs(true(id)-zmin).lt.EPSLON) then
c
c     Handle the case of a spike at the origin:
c
                        true(id) = val(1)
                  else if(true(id).le.zmin) then
c
c     Lower tail:
c
                        true(id) = powint(zmin,cut(1),
     +                           0.0,val(1),true(id),1.0)
                  else if(true(id).ge.zmax) then
c
c     Upper tail:
c
                        true(id) = powint(cut(ncut),zmax,
     +                           val(ncut),1.0,true(id),1.0)
                  else
c
c     Somewhere in the middle of the distribution:
c
                        true(id) = powint(cut(j),cut(j+1),
     +                           val(j),val(j+1),true(id),1.0)
                  end if
                  xm = cut(1)*val(1)
                  xv = cut(1)*cut(1)*val(1)
                  do i=2,ncut
                        xm = xm + cut(i)*(val(i)-val(i-1))
                        xv = xv + cut(i)*cut(i)*(val(i)-val(i-1))
                  end do
                  xm = xm + cut(ncut)*(1.0-val(ncut))
                  xv = xv + cut(ncut)*cut(ncut)*(1.0-val(ncut))
                  xv = xv - xm*xm
c
c Gaussian CCDF:
c
            else if(ccdftype.eq.2) then
                  yy = (true(id)-val(1))/max(sqrt(val(2)),EPSLON)
                  true(id) = gcum(yy)
                  xv = val(2)
            end if
            if(true(id).lt.0.0) true(id) = 0.0
            if(true(id).gt.1.0) true(id) = 1.0
            avgvar = avgvar + xv
      end do
      avgvar = avgvar / real(nd)
c
c Prepare summary and output files:
c
      write(*,*)
      write(*,*) 'Calculating accuracy and precision'
      open(lsum,file=sumfl,status='UNKNOWN')
      write(lsum,201)
 201  format('Results of ACCPLT',/,'2',/,'Width of Local Dists',/,
     +       'Number in This Width')
c
c Loop over all probability increments:
c
      nval = int(1.0/pinc) - 1
      pval = 0.0
      acc  = 0.0
      pre  = 0.0
      goo  = 0.0
      do ival = 1,nval
            pval = pval + pinc
            cdflo = (1.0 - pval) / 2.0
            cdfhi =  1.0 - cdflo
c
c Loop over data determining whether the datum is in interval:
c
            num = 0
            do id=1,nd
                    cdf  = true(id)
                    if(cdf.gt.cdflo.and.cdf.lt.cdfhi) num = num + 1
            end do
            act = real(num)/real(nd)
            write(lsum,202) pval,act
 202        format(f7.3,1x,f7.3)
c
c Summary statistics:
c
            if(act.ge.pval) then
                  acc = acc +    1.0
                  pre = pre + (act-pval)
                  goo = goo + (act-pval)
            else
                  goo = goo + 2.0*(pval-act)
            end if
      end do
      close(lsum)
      acc = acc / real(nval)
      pre = 1.0 - 2.0*pre/real(nval)
      goo = 1.0 - goo / real(nval)
      write(*,203) acc,pre,goo,avgvar
 203  format(/,'    Accuracy         = ',f16.4,
     +       /,'    Precision        = ',f16.4,
     +       /,'    Goodness         = ',f16.4,
     +       /,'    Average Variance = ',f16.4)
c
c Data for Posting Quantiles:
c
      open(lout,file=outfl,status='UNKNOWN')
      write(lout,301)
 301  format('Results of ACCPLT',/,'1',/,'Q')
      do id=1,nd
            write(lout,'(f7.3)') true(id)
      end do
      close(lout)
      write(*,9998) VERSION
 9998 format(/' ACCPLT Version: ',f5.3, ' Finished'/)
c
c Finished
c
      stop
 98   stop 'ERROR in parameter file!'
 99   stop 'ERROR in data file!'
      end



      subroutine chknam(str,len)
c-----------------------------------------------------------------------
c
c                     Check for a Valid File Name
c                     ***************************
c
c This subroutine takes the character string "str" of length "len" and
c removes all leading blanks and blanks out all characters after the
c first blank found in the string (leading blanks are removed first).
c
c
c
c Author: C.V. Deutsch                                Date: January 1993
c-----------------------------------------------------------------------
      parameter (MAXLEN=132)
      character str(MAXLEN)*1
c
c Remove leading blanks:
c
      do i=1,len-1
            if(str(i).ne.' ') then
                  if(i.eq.1) go to 1
                  do j=1,len-i+1
                        k = j + i - 1
                        str(j) = str(k)
                  end do
                  do j=len,len-i+2,-1
                        str(j) = ' '
                  end do
                  go to 1
            end if
      end do
 1    continue
c
c Find first blank and blank out the remaining characters:
c
      do i=1,len-1
            if(str(i).eq.' ') then
                  do j=i+1,len
                        str(j) = ' '
                  end do
                  go to 2
            end if
      end do
 2    continue
c
c Return with modified file name:
c
      return
      end



      subroutine sortem(ib,ie,a,iperm,b,c,d,e,f,g,h)
c-----------------------------------------------------------------------
c
c                      Quickersort Subroutine
c                      **********************
c
c This is a subroutine for sorting a real array in ascending order. This
c is a Fortran translation of algorithm 271, quickersort, by R.S. Scowen
c in collected algorithms of the ACM.
c
c The method used is that of continually splitting the array into parts
c such that all elements of one part are less than all elements of the
c other, with a third part in the middle consisting of one element.  An
c element with value t is chosen arbitrarily (here we choose the middle
c element). i and j give the lower and upper limits of the segment being
c split.  After the split a value q will have been found such that 
c a(q)=t and a(l)<=t<=a(m) for all i<=l<q<m<=j.  The program then
c performs operations on the two segments (i,q-1) and (q+1,j) as follows
c The smaller segment is split and the position of the larger segment is
c stored in the lt and ut arrays.  If the segment to be split contains
c two or fewer elements, it is sorted and another segment is obtained
c from the lt and ut arrays.  When no more segments remain, the array
c is completely sorted.
c
c
c INPUT PARAMETERS:
c
c   ib,ie        start and end index of the array to be sorteda
c   a            array, a portion of which has to be sorted.
c   iperm        0 no other array is permuted.
c                1 array b is permuted according to array a
c                2 arrays b,c are permuted.
c                3 arrays b,c,d are permuted.
c                4 arrays b,c,d,e are permuted.
c                5 arrays b,c,d,e,f are permuted.
c                6 arrays b,c,d,e,f,g are permuted.
c                7 arrays b,c,d,e,f,g,h are permuted.
c               >7 no other array is permuted.
c
c   b,c,d,e,f,g,h  arrays to be permuted according to array a.
c
c OUTPUT PARAMETERS:
c
c    a      = the array, a portion of which has been sorted.
c
c    b,c,d,e,f,g,h  =arrays permuted according to array a (see iperm)
c
c NO EXTERNAL ROUTINES REQUIRED:
c
c-----------------------------------------------------------------------
      dimension a(*),b(*),c(*),d(*),e(*),f(*),g(*),h(*)
c
c The dimensions for lt and ut have to be at least log (base 2) n
c
      integer   lt(64),ut(64),i,j,k,m,p,q
c
c Initialize:
c
      j     = ie
      m     = 1
      i     = ib
      iring = iperm+1
      if (iperm.gt.7) iring=1
c
c If this segment has more than two elements  we split it
c
 10   if (j-i-1) 100,90,15
c
c p is the position of an arbitrary element in the segment we choose the
c middle element. Under certain circumstances it may be advantageous
c to choose p at random.
c
 15   p    = (j+i)/2
      ta   = a(p)
      a(p) = a(i)
      go to (21,19,18,17,16,161,162,163),iring
 163     th   = h(p)
         h(p) = h(i)
 162     tg   = g(p)
         g(p) = g(i)
 161     tf   = f(p)
         f(p) = f(i)
 16      te   = e(p)
         e(p) = e(i)
 17      td   = d(p)
         d(p) = d(i)
 18      tc   = c(p)
         c(p) = c(i)
 19      tb   = b(p)
         b(p) = b(i)
 21   continue
c
c Start at the beginning of the segment, search for k such that a(k)>t
c
      q = j
      k = i
 20   k = k+1
      if(k.gt.q)     go to 60
      if(a(k).le.ta) go to 20
c
c Such an element has now been found now search for a q such that a(q)<t
c starting at the end of the segment.
c
 30   continue
      if(a(q).lt.ta) go to 40
      q = q-1
      if(q.gt.k)     go to 30
      go to 50
c
c a(q) has now been found. we interchange a(q) and a(k)
c
 40   xa   = a(k)
      a(k) = a(q)
      a(q) = xa
      go to (45,44,43,42,41,411,412,413),iring
 413     xh   = h(k)
         h(k) = h(q)
         h(q) = xh
 412     xg   = g(k)
         g(k) = g(q)
         g(q) = xg
 411     xf   = f(k)
         f(k) = f(q)
         f(q) = xf
 41      xe   = e(k)
         e(k) = e(q)
         e(q) = xe
 42      xd   = d(k)
         d(k) = d(q)
         d(q) = xd
 43      xc   = c(k)
         c(k) = c(q)
         c(q) = xc
 44      xb   = b(k)
         b(k) = b(q)
         b(q) = xb
 45   continue
c
c Update q and search for another pair to interchange:
c
      q = q-1
      go to 20
 50   q = k-1
 60   continue
c
c The upwards search has now met the downwards search:
c
      a(i)=a(q)
      a(q)=ta
      go to (65,64,63,62,61,611,612,613),iring
 613     h(i) = h(q)
         h(q) = th
 612     g(i) = g(q)
         g(q) = tg
 611     f(i) = f(q)
         f(q) = tf
 61      e(i) = e(q)
         e(q) = te
 62      d(i) = d(q)
         d(q) = td
 63      c(i) = c(q)
         c(q) = tc
 64      b(i) = b(q)
         b(q) = tb
 65   continue
c
c The segment is now divided in three parts: (i,q-1),(q),(q+1,j)
c store the position of the largest segment in lt and ut
c
      if (2*q.le.i+j) go to 70
      lt(m) = i
      ut(m) = q-1
      i = q+1
      go to 80
 70   lt(m) = q+1
      ut(m) = j
      j = q-1
c
c Update m and split the new smaller segment
c
 80   m = m+1
      go to 10
c
c We arrive here if the segment has  two elements we test to see if
c the segment is properly ordered if not, we perform an interchange
c
 90   continue
      if (a(i).le.a(j)) go to 100
      xa=a(i)
      a(i)=a(j)
      a(j)=xa
      go to (95,94,93,92,91,911,912,913),iring
 913     xh   = h(i)
         h(i) = h(j)
         h(j) = xh
 912     xg   = g(i)
         g(i) = g(j)
         g(j) = xg
 911     xf   = f(i)
         f(i) = f(j)
         f(j) = xf
   91    xe   = e(i)
         e(i) = e(j)
         e(j) = xe
   92    xd   = d(i)
         d(i) = d(j)
         d(j) = xd
   93    xc   = c(i)
         c(i) = c(j)
         c(j) = xc
   94    xb   = b(i)
         b(i) = b(j)
         b(j) = xb
   95 continue
c
c If lt and ut contain more segments to be sorted repeat process:
c
 100  m = m-1
      if (m.le.0) go to 110
      i = lt(m)
      j = ut(m)
      go to 10
 110  continue
      return
      end



      subroutine locate(xx,n,is,ie,x,j)
c-----------------------------------------------------------------------
c
c Given an array "xx" of length "n", and given a value "x", this routine
c returns a value "j" such that "x" is between xx(j) and xx(j+1).  xx
c must be monotonic, either increasing or decreasing.  j=0 or j=n is
c returned to indicate that x is out of range.
c
c Modified to set the start and end points by "is" and "ie" 
c
c Bisection Concept From "Numerical Recipes", Press et. al. 1986  pp 90.
c-----------------------------------------------------------------------
      dimension xx(n)
c
c Initialize lower and upper methods:
c
      jl = is-1
      ju = ie
c
c If we are not done then compute a midpoint:
c
 10   if(ju-jl.gt.1) then
            jm = (ju+jl)/2
c
c Replace the lower or upper limit with the midpoint:
c
            if((xx(ie).gt.xx(is+1)).eqv.(x.gt.xx(jm))) then
                  jl = jm
            else
                  ju = jm
            endif
            go to 10
      endif
c
c Return with the array index:
c
      j = jl
      return
      end



      real function powint(xlow,xhigh,ylow,yhigh,xval,pow)
c-----------------------------------------------------------------------
c
c Power interpolate the value of y between (xlow,ylow) and (xhigh,yhigh)
c                 for a value of x and a power pow.
c
c-----------------------------------------------------------------------
      parameter(EPSLON=1.0e-20)

      if((xhigh-xlow).lt.EPSLON) then
            powint = yhigh
      else
            powint = ylow + (yhigh-ylow)* 
     +               (((xval-xlow)/(xhigh-xlow))**pow)
      end if

      return
      end



      real function gcum(x)
c-----------------------------------------------------------------------
c
c Evaluate the standard normal cdf given a normal deviate x.  gcum is
c the area under a unit normal curve to the left of x.  The results are
c accurate only to about 5 decimal places.
c
c
c-----------------------------------------------------------------------
      z = x
      if(z.lt.0.) z = -z
      t    = 1./(1.+ 0.2316419*z)
      gcum = t*(0.31938153   + t*(-0.356563782 + t*(1.781477937 +
     +       t*(-1.821255978 + t*1.330274429))))
      e2   = 0.
c
c  6 standard deviations out gets treated as infinity:
c
      if(z.le.6.) e2 = exp(-z*z/2.)*0.3989422803
      gcum = 1.0- e2 * gcum
      if(x.ge.0.) return
      gcum = 1.0 - gcum
      return
      end
