This is third in a series (
part 1,
part 2), about TempLS ver 3. The first part gathered a big array x of temperatures of land and sea. The second made an array w (with similar form) of weights, and a combined station inventory iv. As explained
here, that is all you need for a simple iterative process to compute temperature anomalies and their average by a least squares process. The code to do that will be described here.
As earlier, we have a control word (job), and the fourth character could be either O,o,P or p. Lower case means skip the stage, so the first line tests this. The rest of the control tells which stored (x,w,iv) to load. Then to make linear ops work, the NA's are removed from x - they can have any value since w=0 there. x and w are re-dimensioned.
if(do[4]){
load(R[3]) # x are anomalies, w weights
x[is.na(x)]=0
d=c(nrow(x),12,ncol(x)%/%12) # stations, months,years
dim(w)=dim(x)=d
The iteration proceeds by alternately calculating L, the offsets for each station as the weighted average of temperatures minus G, the global anomaly, and then G as the average anomaly relative to those offsets. These are the calculations that matter; for the rest, changes in G are tested for convergence. Typically each iterations takes a second or two, and six or so may be needed. Note that a tiny number is added to the weight denominators wr and wc for the rare cases where these are zero, leading to a 0/0, which should resolve to 0.
L=G=v=0;
wr=rowSums(w,dims=2)+1e-20; wc=colSums(w)+1e-20; # right hand side
for(i in 1:20){ # gauss-seidel iterations
g=G
L=rowSums(w*(x-rep(G,each=d[1])),dims=2)/wr # Solve for L
G=colSums(w*(x-rep(L,d[3])))/wc # then G
if(mean(abs(G-g))<0.0001)break
}
if(i>19) stop("Not Converged")
For the rest, there is just post-processing, which you can adapt to your requirements. In the main stream, I just print and store results. The first statement is important, though. There are 12 loose degrees of freedom to add numbers to G by month, and subtract same from L. The calculation gets reasonable results here, but with small arbitrary shifts from month to month. The subtraction fixes these to be right for some fixed time period - typically a 3 decade stretch like 1961-90 here.
For the rest, it's saving output. I have the graphics code (used if the control (job) was "P") on a separate file which I won't plod through here.
# post processing
G=round(G-rowMeans(G[,62:91]),3) # Set to basis 1961:90
ow=c(colSums(w,dims=1)>0) # !ow means no data that month
G[!ow]=NA # usually months without data in last year.
save(G,L,r,w,iv,file=R[4])
ym=round(outer(0:11/12,yrs,"+"),2)[ow] # months in year units
write(paste(ym,G[ow]),file="templs.txt")
if(kind[4]=="p")source("LSpic.r") # graphics
}
So finally, here is the complete code
# Function TempLS3.r. Written in four stages, it collects data, makes weights
#and solves for global temperature anomaly.
#Written by Nick Stokes, June 24 2015.
#See https://moyhu.blogspot.com.au/2015/06/templs-new-v3.html
isin=function(v,lim){v>=lim[1] & v<=lim[2]}
mth=function(yr){round(seq(yr[1]+0.04,yr[2]+1,by=1/12),3)}
sp=function(s)(unlist(strsplit(s,"")))
pc=function(...,s=""){paste(...,sep=s,collapse=s)}
if(!exists("job"))job="UEGO"
i=sp(job)
kind=tolower(i)
do=i!=kind
yr=c(1900,2015); yrs=yr[1]:yr[2]; # you can change these
ny=diff(yr)+1
j=c(1,1,2,2,1,3,1,4); R=1:4; #R will be the names of storage files
for(i in 1:4){ # make .sav file names
R[i]=pc(c(kind[j[i*2-1]:j[i*2]],".sav"))
if(!file.exists(R[i]))do[i]=TRUE; # if filename will be needed, calc
}
if(do[1]){ # Stage 1 - read land (GHCN)
getghcn=function(){ # download and untar GHCN
fi="ghcnm.tavg.latest.qcu.tar.gz"
fi=sub("u",ki,fi) # switch to adjusted
download.file(pc("https://www1.ncdc.noaa.gov/pub/data/ghcn/v3/",fi),fi);
gunzip(fi,"ghcnv3.tar",overwrite=T);
untar("ghcnv3.tar",exdir=".");
di=dir(".",recursive=T)
h=grep(pc("qc",ki),di,val=T) # find the qcu files un subdir
h=grep("v3",h,val=T) # find the qcu files un subdir
file.rename(h,Rr)
unlink("ghcnm.*")
}
readqc=function(f){ # read the data file
b=readLines(f)
d=matrix(NA,14,length(b));
d[1,]=as.numeric(substr(b,1,11));
d[2,]=as.numeric(substr(b,12,15)); # year
iu=0:13*8+4
for(i in 3:14){
di=as.numeric(substr(b,iu[i],iu[i]+4)); # monthly aves
ds=substr(b,iu[i]+6,iu[i]+6); # quality flags
di[ds!=" " | di== -9999]=NA # remove flagged data
d[i,]=di/100 # convert to C
}
d
}
require("R.utils") # for gunzip
ki=kind[1]
Rr=paste("ghcn",ki,c(".dat",".inv"),sep="")
getghcn()
d=readqc(Rr[1])
b=readLines(Rr[2])
ns=length(b)
wd=c(1,11,12,20,21,30,1,3,31,37,88,88,107,107)
iv=data.frame(matrix(NA,ns,6))
for(i in 1:7){
k=substr(b,wd[i*2-1],wd[i*2]);
if(i<6)k=as.numeric(k)
if(i==1)K=k else iv[,i-1]=k;
}
colnames(iv)=c("lat","lon","country","alt","airprt","urban")
d=d[,isin(d[2,],yr)] # restrict to years in yr
ib=match(d[1,],K)-1 # convert station numbers to place in order (1:7280
x=matrix(NA,12,ny*ns)
ic=ib*ny+d[2,]-yr[1]+1 # locations for each row of data
x[,ic]=d[3:14,] # fill x
dim(x)=c(12*ny,ns); x=t(x) # rearrange
save(x,iv,file=R[1])
}
if(do[2]){ # Stage 2 - read SST
# ERSST has lon 0:358:2, lat -88:88:2
ki=kind[2];
s=c("ersst4","ersst.%s.%s.asc","ftp://ftp.ncdc.noaa.gov/pub/data/cmb/ersst/v4/ascii")
iy=1;
if(ki=="d"){s=sub("4","3b",s);s[1]="../../data/ersst";iy=10;yrs=seq(yr[1],yr[2],10);}
if(!file.exists(s[1]))dir.create(s[1])
x=NULL; n0=180*89;
for(ii in yrs){
# read and arrange the data
f=sprintf(s[2],ii,ii+9)
if(ki=="e")f=sprintf(s[2],"v4",ii)
g=file.path(s[1],f)
if(!file.exists(g))download.file(file.path(s[3],f),g);
v=matrix(scan(g),nrow=n0);
v=v[,(mth(c(ii,ii+iy-1)) %in% mth(yr))[1:ncol(v)]]/100 # Select years and convert to C
n=length(v);
o=v== -99.99 | v< -1 # eliminate ice -1
v[o]=NA
n=n/n0
dim(v)=c(89,180,n)
# reduce the grid
i1=1:45*2-1;i2=1:90*2 # Take every second one
v=v[i1,i2,]
# normalise the result; w is the set of wts that were added
x=c(x,v)
print(c(ii,length(x)/90/45))
}
str(x)
iv=cbind(rep(i1*2-90,90),rep((i2*2+178)%%360-180,each=45)) # Make lat/lon -88:88,2:358
m=length(x)/90/45
dim(x)=c(90*45,m)
rs=rowSums(!is.na(x))
o=rs>12 # to select only locations with more than 12 mths data in total
iv=iv[o,] # make the lat/lon
x=x[o,] #discard land and unmeasured sea
str(x)
ns=sum(o) # number stations in this month
rem=ncol(x)%%12
if(rem>0)x=cbind(x,matrix(NA,ns,12-rem)) # pad to full year
str(x)
dim(x)=c(ns,12*((m-1)%/%12+1))
save(x,iv,file=R[2])
}
if(do[3]){ #stage 3 - wts from supplied x
load(R[2]) # ERSST x,iv
x0=x;i0=cbind(iv,NA,NA,NA,NA); rad=pi/180;
load(R[1]) # GHCN x,iv
colnames(i0)=colnames(iv)
x=rbind(x,x0);iv=rbind(iv,i0) # combine
if(kind[3]=="g"){ #Grid
dl=180/36 # 36*72 grid for wts
jv=as.matrix(iv[,1:2])%/%dl # latlon integer number for each cell
y=cos((jv[,1]+.5)*dl*rad) #trig weighting for area each cell
kv=drop(jv%*%c(72,1)+1297) # Give each cell a unique number
w=array(0,dim(x))
for(i in 1:ncol(x)){ # over months
j=which(!is.na(x[,i])) # stations with data
s=1/table(kv[j]) # stats in each cell
u=as.numeric(names(s)) #cell numbers
k1=match(kv[j],u) # which cell numbers belong to each stat
w[j,i]=y[j]*s[k1]
}
}else{ # Mesh
require("geometry")
require("alphahull")
a=pc("w_",R[3])
if(file.exists(a)){load(a)}else{w=array(0,dim(x))} # old mesh
a=cos(cbind(iv[,1:2],90-iv[,1:2])*rad)
z=cbind(a[,1]*a[,c(2,4)],a[,3]) # to 3D coords
if(!exists("w_yr"))w_yr=yr
w=w[,mth(w_yr)%in%mth(yr)]
if(sum(dim(w)==dim(x))<2){w=array(0,dim(x)); print("Meshing");}
print(Sys.time())
for(ii in 1:ncol(x)){
o=!is.na(x[,ii]) # stats with data
no=sum(o); if(no<9)next; # skip months at the end
if(sum(xor(o,(w[,ii]>0)))==0)next; # w slots match x, so use old mesh
y=z[o,]
m=convhulln(y) # convex hull (tri mesh)
A=0
B=cbind(y[m[,1],],y[m[,2],],y[m[,3],])
for(j in 0:2){ # determinants for areas
k=(c(0:2,0,2,1)+j)%%3+c(1,4,7)
A=A+B[,k[1]]*B[,k[2]]*B[,k[3]]-B[,k[4]]*B[,k[5]]*B[,k[6]]
}
A=abs(A) # triangles from convhulln have varied orientation
n=cbind(c(m),rep(A,3)) # long list with areas matched to nodes
k=which(o)
w[!o,ii]=0
w[o,ii]=1e-9 # to mark stations that missed wt but have x entries
for(jj in 1:99){ # Adding areas for nodes
i=match(1:sum(o),n[,1]) # find first matches in mesh
q=which(!is.na(i))
w[k[q],ii]=w[k[q],ii]+n[i[q],2] # add in area
n=rbind(n[-i[q],]) # take out matches
if(length(n)<1)break;
}
print(c(ii,nrow(m),sum(w[,ii])/pi/24))
} # end mesh months loop
print(Sys.time())
f=paste("w_",R[3],sep="") # for saving the wts if wanted
w_yr=yr # save yrs to match when recalled
if(!file.exists(f))save(w_yr,w,file=f)
}# end mesh/grid
save(x,w,iv,file=R[3])
}# end step 3
if(do[4]){
load(R[3]) # x are anomalies, w weights
x[is.na(x)]=0
d=c(nrow(x),12,ncol(x)%/%12) # stations, months,years
dim(w)=dim(x)=d
L=G=v=0;
wr=rowSums(w,dims=2)+1e-20; wc=colSums(w)+1e-20; # right hand side
for(i in 1:20){ # gauss-seidel iterations
g=G
L=rowSums(w*(x-rep(G,each=d[1])),dims=2)/wr # Solve for L
G=colSums(w*(x-rep(L,d[3])))/wc # then G
if(mean(abs(G-g))<0.0001)break
}
if(i>19) stop("Not Converged")
# post processing
G=round(G-rowMeans(G[,62:91]),3) # Set to basis 1961:90
ow=c(colSums(w,dims=1)>0)
G[!ow]=NA
save(G,L,r,w,iv,file=R[4])
ym=round(outer(0:11/12,yrs,"+"),2)
write(paste(ym[ow],G[ow]),file="templs.txt")
if(kind[4]=="p")source("LSpic.r")
}