Files
piscal/dataassim/math/othersupmath/outlier.f
2016-02-03 18:52:05 +00:00

345 lines
9.8 KiB
FortranFixed

subroutine meancycleoutliers(nsamp,xvar,yvar,nminno0)
implicit none
!Detect outliers in yvar with the mean cycle approach. Replace the detected
!outliers with -9999. xvar must be repeated cycles
integer nsamp,nminno0
double precision xvar(nsamp),yvar(nsamp)
integer i,j,k,m,n,noutliers,nk,nminno,ncyc
double precision yvector(nsamp),std_clean,
&fmean_clean(nsamp),fn9999,tiny,term
parameter(fn9999=-9999.0d0,tiny=1.0d-8)
!
!First remove the outliers for a given xvar value within a nminno window
5 noutliers=0
do i=1,nsamp
nminno=nminno0
if(dabs(yvar(i)-fn9999).gt.tiny)then
7 n=0
k=i
nk=-1
!The current value is the first value in yvector
10 if(nk.ge.nminno/2)goto 30
if(dabs(xvar(k)-xvar(i)).gt.tiny)goto 20
nk=nk+1
if(dabs(yvar(k)-fn9999).lt.tiny)goto 20
n=n+1
yvector(n)=yvar(k)
20 k=k-1
if(k.gt.0)goto 10
30 nk=0
m=i+1
40 if(m.gt.nsamp)goto 60
if(nk.ge.nminno/2)goto 60
if(dabs(xvar(m)-xvar(i)).gt.tiny)goto 50
nk=nk+1
if(dabs(yvar(m)-fn9999).lt.tiny)goto 50
n=n+1
yvector(n)=yvar(m)
50 m=m+1
goto 40
60 if(n.lt.nminno0)then
nminno=nminno+1
goto 7
endif
call whoareoutliers(n,yvector,std_clean,fmean_clean(i))
if(dabs(yvector(1)-fn9999).lt.tiny)then
noutliers=noutliers+1
yvar(i)=fn9999
else
fmean_clean(i)=yvar(i)-fmean_clean(i)
endif
endif
enddo
if(noutliers.gt.0)goto 5
goto 190
!
!Then remove the outliers from the mean cycle out of nminno cycles
105 noutliers=0
do i=1,nsamp
nminno=nminno0
if(dabs(yvar(i)-fn9999).gt.tiny)then
107 n=0
nk=-1
k=i
!The current value is the first value in yvector
110 if(dabs(xvar(k)-xvar(i)).lt.tiny)nk=nk+1
if(nk.ge.nminno/2)goto 130
if(dabs(yvar(k)-fn9999).lt.tiny)goto 120
n=n+1
yvector(n)=fmean_clean(k)
120 k=k-1
if(k.gt.0)goto 110
130 m=i+1
ncyc=nk
nk=0
140 if(m.gt.nsamp)goto 160
if(dabs(xvar(m)-xvar(i)).lt.tiny)nk=nk+1
if(nk.ge.nminno/2)goto 160
if(dabs(yvar(m)-fn9999).lt.tiny)goto 150
n=n+1
yvector(n)=fmean_clean(m)
150 m=m+1
goto 140
160 if((nk+ncyc).lt.nminno)then
nminno=nminno+1
goto 107
endif
call whoareoutliers(n,yvector,std_clean,term)
if(dabs(yvector(1)-fn9999).lt.tiny)then
noutliers=noutliers+1
yvar(i)=fn9999
endif
endif
enddo
190 if(noutliers.gt.0)goto 5
do i=2,nsamp-1
if((yvar(i)-fn9999).gt.tiny)then
if((yvar(i-1)-fn9999).lt.tiny.and.
&(yvar(i+1)-fn9999).lt.tiny)yvar(i)=fn9999
endif
enddo
return
end
!####################################################################
subroutine whoareoutliers(nsamp0,xvar0,std_clean,fmean_clean)
implicit none
!Detect outliers. On exit, outliers are given as -9999
integer nsamp,i,j,nsamp0,isoutlier_2sides,ivect(nsamp0)
double precision xvar(nsamp0),std_clean,fmean_clean,
& xvar0(nsamp0),gap
parameter(gap=-9999.0d0)
10 nsamp=0
do j=1,nsamp0
if(dabs(xvar0(j)-gap).gt.1.0d-5)then
nsamp=nsamp+1
xvar(nsamp)=xvar0(j)
ivect(nsamp)=j
endif
enddo
if(nsamp.lt.2)then
std_clean=gap
fmean_clean=gap
return
endif
i=isoutlier_2sides(nsamp,xvar)
if(i.lt.0)goto 100
xvar0(ivect(i))=gap
goto 10
100 fmean_clean=0.0d0
do j=1,nsamp
fmean_clean=fmean_clean+xvar(j)
enddo
fmean_clean=fmean_clean/dble(nsamp)
std_clean=0.0d0
do j=1,nsamp
std_clean=std_clean+(xvar(j)-fmean_clean)*(xvar(j)-fmean_clean)
enddo
std_clean=dsqrt(std_clean/dble(nsamp-1))
return
end
!####################################################################
! detecting outliers using Grubb's
integer function isoutlier_2sides(nsamp,yobs)
implicit none
! Detecting the outlier using the Grubb's test for two tails. If there is an outlier,
! isoutlier is the index number of the outlier in the sequence yobs. If there
! is no outlier, isoutlier is returned with -9999
integer nsamp
double precision yobs(nsamp)
integer i,imax
double precision zc,std,fmean,dev,devmax,
& alpha,grubbzc_2sides
parameter(alpha=0.05d0)
isoutlier_2sides=-9999
if(nsamp.le.2)then
return
endif
call stdmean(nsamp,yobs,std,fmean)
if(std.le.0.0d0)return
devmax=dabs(yobs(1)-fmean)/std
imax=1
do i=2,nsamp
dev=dabs(yobs(i)-fmean)/std
if(dev.gt.devmax)then
imax=i
devmax=dev
endif
enddo
zc=grubbzc_2sides(nsamp,alpha)
if(devmax.ge.zc)then
isoutlier_2sides=imax
endif
return
end
!*************************************************************
integer function isoutlier_1side(nsamp,yobs,iwhichside)
implicit none
! Detecting the outlier using the Grubb's test for one tail. If there is an outlier,
! isoutlier is the index number of the outlier in the sequence yobs. If there
! is no outlier, isoutlier is returned with -9999
! iwhichside < 0, detecting the outlier smaller than the mean
! iwhichside > 0, detecting the outlier greater than the mean
integer nsamp,iwhichside
double precision yobs(nsamp)
integer i,imax
double precision zc,std,fmean,dev,devmax,
& alpha,grubbzc_1side
parameter(alpha=0.05d0)
isoutlier_1side=-9999
if(nsamp.le.2)then
return
endif
call stdmean(nsamp,yobs,std,fmean)
devmax=-9999.0d0
do i=1,nsamp
dev=(yobs(i)-fmean)/std
if(iwhichside.gt.0)then
if(dev.gt.0.0d0.and.dev.gt.devmax)then
imax=i
devmax=dev
endif
else
if(dev.lt.0.0d0.and.dabs(dev).gt.devmax)then
imax=i
devmax=dabs(dev)
endif
endif
enddo
zc=grubbzc_1side(nsamp,alpha)
if(devmax.ge.zc)then
isoutlier_1side=imax
endif
return
end
double precision function grubbzc_2sides(nsamp,alpha)
implicit none
integer nsamp
double precision alpha
! Compute the critical Grubb Z valu
! nsamp: the number of samples (not the degree of freedom)
! alpha: the significance level (the sum of probabilities of
! upper and lower tails)
double precision Sign_Level,tc,student_t
integer Samples
Sign_Level=1.0d0-2.0d0*alpha/dble(2*nsamp)
Samples=(nsamp-2)
Samples=Samples+1
tc=student_t(Samples,Sign_Level)
grubbzc_2sides=(dble(nsamp)-1.0d0)*tc/dsqrt(dble(nsamp))
grubbzc_2sides=grubbzc_2sides/dsqrt(dble(nsamp-2)+tc*tc)
return
end
double precision function grubbzc_1side(nsamp,alpha)
implicit none
integer nsamp
double precision alpha
! Compute the critical Grubb Z valu
! nsamp: the number of samples (not the degree of freedom)
! alpha: the significance level (one tail probability) upper and lower tails)
double precision Sign_Level,tc,student_t
integer Samples
Sign_Level=1.0d0-2.0d0*alpha/dble(nsamp)
Samples=(nsamp-2)
Samples=Samples+1
tc=student_t(Samples,Sign_Level)
grubbzc_1side=(dble(nsamp)-1.0d0)*tc/dsqrt(dble(nsamp))
grubbzc_1side=grubbzc_1side/dsqrt(dble(nsamp-2)+tc*tc)
return
end
double precision function grubbzc_0_01(nsamp)
implicit none
integer nsamp,nlow,nhigh
double precision zvalue(140),
& a,b,x0,y0
if(nsamp.gt.140)then
a=1903.0377d0
b=-0.3756d0
x0=1.0369d-7
y0=-1898.8572d0
grubbzc_0_01=y0+a/(1.0d0+(dble(nsamp)/x0)**b)
else
zvalue(3)=1.15d0
zvalue(4)=1.48d0
zvalue(5)=1.71d0
zvalue(6)=1.89d0
zvalue(7)=2.02d0
zvalue(8)=2.13d0
zvalue(9)=2.21d0
zvalue(10)=2.29d0
zvalue(11)=2.34d0
zvalue(12)=2.41d0
zvalue(13)=2.46d0
zvalue(14)=2.51d0
zvalue(15)=2.55d0
zvalue(16)=2.59d0
zvalue(17)=2.62d0
zvalue(18)=2.65d0
zvalue(19)=2.68d0
zvalue(20)=2.71d0
zvalue(21)=2.73d0
zvalue(22)=2.76d0
zvalue(23)=2.78d0
zvalue(24)=2.8d0
zvalue(25)=2.82d0
zvalue(26)=2.84d0
zvalue(27)=2.86d0
zvalue(28)=2.88d0
zvalue(29)=2.89d0
zvalue(30)=2.91d0
zvalue(31)=2.92d0
zvalue(32)=2.94d0
zvalue(33)=2.95d0
zvalue(34)=2.97d0
zvalue(35)=2.98d0
zvalue(36)=2.99d0
zvalue(37)=3.0d0
zvalue(38)=3.01d0
zvalue(39)=3.03d0
zvalue(40)=3.04d0
zvalue(50)=3.13d0
zvalue(60)=3.2d0
zvalue(70)=3.26d0
zvalue(80)=3.31d0
zvalue(90)=3.35d0
zvalue(100)=3.38d0
zvalue(110)=3.42d0
zvalue(120)=3.44d0
zvalue(130)=3.47d0
zvalue(140)=3.49d0
if(nsamp.le.40)then
grubbzc_0_01=zvalue(nsamp)
else
if(nsamp.eq.140)then
grubbzc_0_01=zvalue(140)
else
nlow=idint(dble(nsamp)/10.0d0)*10
nhigh=10+idint(dble(nsamp)/10.0d0)*10
grubbzc_0_01=zvalue(nlow)+(zvalue(nhigh)-zvalue(nlow))*
& dble(nsamp-nlow)/dble(nhigh-nlow)
endif
endif
endif
return
end