## R program 'WetDryClassification_LinkApproach.R'.
## Program for rainfall estimation using microwave links.
## Wet-dry classification of time intervals; determination of reference signal level; corrected received powers.
## Described in paper:
## Aart Overeem, Hidde Leijnse, Remko Uijlenhoet, 2015. Retrieval algorithm for rainfall mapping from microwave links in a cellular communication network. Atmos. Meas. Tech. Discuss., submitted.  

## Copyright (C) 2015 Aart Overeem
##
## This program is free software: you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
## the Free Software Foundation, either version 3 of the License, or
## (at your option) any later version.
##
## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
## GNU General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with this program. If not, see <http://www.gnu.org/licenses/>.


###############################################################################
# THIS PROGRAM CALCULATES THE MINIMUM AND MAXIMUM ATTENUATION OVER A LINK PATH#
# THE LINK APPROACH IS APPLIED TO IDENTIFY WET AND DRY SPELLS                 #
# RECEIVED POWERS ARE CORRECTED USING THE LINK APPROACH AND THE CALCULATED    #
# REFERENCE LEVEL.                                                            #
# THE FILTER TO REMOVE OUTLIERS IS NOT APPLIED, BUT THE VALUE IS COMPUTED.    #
###############################################################################
# source('WetDryClassification_LinkApproach.R')
# Start R and go to the directory "WorkingExample". Then run this script by pasting the line above (source(...)).
rm(list=ls(all=TRUE))
startTime = proc.time()	

# Used time step in minutes:
Timestep = 15

MinutesPerHour = 60
MinutesPerDay = 1440
NrStepsDayMinus1 = MinutesPerDay/Timestep - 1
NrStepsDay = MinutesPerDay/Timestep
NrStepsDayPlus1 = MinutesPerDay/Timestep + 1
NrStepsTwoDays = 2 * MinutesPerDay/Timestep
NrStepsTwoDaysPlus1 = NrStepsTwoDays + 1
NrStepsMaxP = 6 * MinutesPerHour / Timestep # Number of time steps in 6 hours
NrStepsDryPeriodsRefLevel = 2.5 * MinutesPerHour / Timestep  # Number of time steps in 2.5 hours


# Read file with values of exponents power-law relationship between rainfall intensity and specific attenuation:
coef_list = read.table("ab_values_vertical.txt")

# Create directory for output files:
dir.create("LinkData_Corrected")

# Directory with input files:
direc="LinkData_Preprocessed"

# Make list of input files: 
files1 <- list.files(path = paste(direc, sep=""), all.files=FALSE, full.names=TRUE, recursive=FALSE, pattern=".dat")
files1 = files1[which(file.info(files1)$size>0)]
if (length(files1)==0)
{
	print("No files with data! Program stops.")
	stop()
}


for (z in 1:length(files1))
{
   	print(z)	     

	# Obtain date:
   	date_str=substr(files1[z],31,38)

	# Construct output filename:   
   	filename_data = paste("LinkData_Corrected/linkdata_",Timestep,"min",date_str,"_val.dat",sep="")    
   	unlink(filename_data)
	# Construct header of output file:
   	input <- c(c('ID a b Am_max Am_min path_length interval_number x_start y_start x_end y_end meanval F frequency RXMIN RXMAX RXMIN_final RXMAX_final ref_level DATETIME'))
   	writeLines(input, filename_data)
	       
   	# Read microwave link data:
   	raw = read.table(files1[z],header=TRUE)

       
   	# Make lists with the coordinates of all the links:
   	Xstart_link = Ystart_link = Xend_link = Yend_link = length_link = id_link = c(NA)
	raw$ID = as.character(raw$ID)
   	hsel_reeks = unique(raw$ID)
   	lengte_reeks = length(hsel_reeks)
	RXMIN_link = RXMIN_link_max = array(NA,c(NrStepsTwoDays,lengte_reeks))

	if ( length(lengte_reeks)==0 )
	{
		next
	}

   	for (p in 1:lengte_reeks)
   	{	
		id_gsm = hsel_reeks[p]
		cond = which(raw$ID==id_gsm)
		Xstart_link[p] = unique(raw$x_start[cond])
        	Ystart_link[p] = unique(raw$y_start[cond]) 	
        	Xend_link[p] = unique(raw$x_end[cond]) 	
        	Yend_link[p] = unique(raw$y_end[cond]) 
		length_link[p] = unique(raw$path_length[cond]) 
		id_link[p] = unique(raw$ID[cond]) 	

		# Store data from the surrounding links as well as data from the selected link.
       		RXMIN_link[raw$interval_number[which(raw$ID==id_link[p])],p]=raw$RXMIN[which(raw$ID==id_link[p])]

       		if (length(which(!is.na(RXMIN_link[(1:NrStepsDay),p])) >= NrStepsMaxP))
     		{		     
        		# From previous day we just take the maximum value over the whole previous day. 
			# Is only calculated in case of a minimum of 6 hours of data for the considered link.
        		RXMIN_link_max[(1:NrStepsDay),p] = max(RXMIN_link[(1:NrStepsDay),p],na.rm=T)
     		}
     		# For present day maximum values can vary for different time steps.
     		for (y in 1:NrStepsDay)
     		{
        		# For a chosen time step max(P_min) can only be computed in case of a minimum of 6 hours of data 
			# in the previous 24 hours:
			if (length(which(!is.na(RXMIN_link[(y:(y+NrStepsDayMinus1)),p])) >= NrStepsMaxP))
			{
	   			RXMIN_link_max[(y+NrStepsDay),p] = max(RXMIN_link[(y:(y+NrStepsDayMinus1)),p],na.rm=T)
			}
     		}	
   	}	



     
   	for (i in 1:lengte_reeks)
   	{
	
		print(i)
		
		id_gsm=c(NA)
		id_gsm = hsel_reeks[i]	
	
  
		# Read data from link 
		freq_gsm=date_gsm=RXMIN=RXMAX=length_gsm=xstart_gsm=ystart_gsm=xend_gsm=yend_gsm=c(NA)    
        	freq_gsm[1:NrStepsTwoDays]=date_gsm[1:NrStepsTwoDays]=RXMIN[1:NrStepsTwoDays]=RXMAX[1:NrStepsTwoDays]=NA
		length_gsm[1:NrStepsTwoDays]=xstart_gsm[1:NrStepsTwoDays]=ystart_gsm[1:NrStepsTwoDays]=xend_gsm[1:NrStepsTwoDays]=yend_gsm[1:NrStepsTwoDays]=NA
		cond = raw$ID==id_gsm  
        	freq_gsm[raw$interval_number[cond]]=raw$frequency[cond]            
        	date_gsm[raw$interval_number[cond]]=raw$DATETIME[cond]	
        	RXMIN[raw$interval_number[cond]]=raw$RXMIN[cond] 
       		RXMAX[raw$interval_number[cond]]=raw$RXMAX[cond]         
        	length_gsm[raw$interval_number[cond]]=raw$path_length[cond]  
        	xstart_gsm[raw$interval_number[cond]]=raw$x_start[cond] 	
        	ystart_gsm[raw$interval_number[cond]]=raw$y_start[cond] 	
        	xend_gsm[raw$interval_number[cond]]=raw$x_end[cond] 	
        	yend_gsm[raw$interval_number[cond]]=raw$y_end[cond] 

		# Coordinates of selected link
		Xstart_fixed = Ystart_fixed = c(NA)
		Xstart_fixed = unique(xstart_gsm)
		Ystart_fixed = unique(ystart_gsm)
		Xend_fixed = Yend_fixed = c(NA)
		Xend_fixed = unique(xend_gsm)
		Yend_fixed = unique(yend_gsm)
		Xstart_fixed = Xstart_fixed[!is.na(Xstart_fixed)]
		Ystart_fixed = Ystart_fixed[!is.na(Ystart_fixed)]	
		Xend_fixed = Xend_fixed[!is.na(Xend_fixed)]	
		Yend_fixed = Yend_fixed[!is.na(Yend_fixed)]
	
		

		#################################
		# CALCULATIONS FOR LINK APPROACH#
		#################################
	
		# Make a list of links for which the distance to the selected link is smaller than distance_limit km.
		distance_limit = 15
		q = 0
		data_temp = data_temp_distw = array(NA,c(lengte_reeks,NrStepsTwoDays))
	   
        	RXstart_fixed = rep(Xstart_fixed,length(Xstart_link))
        	RXend_fixed = rep(Xend_fixed,length(Xend_link))
        	RYstart_fixed = rep(Ystart_fixed,length(Ystart_link))
        	RYend_fixed = rep(Yend_fixed,length(Yend_link))	   	   
	   
		# Compute distance:
		distance1 = sqrt( (RXstart_fixed-Xstart_link)^2 + (RYstart_fixed-Ystart_link)^2 )
     		distance2 = sqrt( (RXend_fixed-Xstart_link)^2 + (RYend_fixed-Ystart_link)^2 ) 	
		distance3 = sqrt( (RXstart_fixed-Xend_link)^2 + (RYstart_fixed-Yend_link)^2 )
     		distance4 = sqrt( (RXend_fixed-Xend_link)^2 + (RYend_fixed-Yend_link)^2 ) 
		select_dist = which(distance1 < distance_limit & distance2 < distance_limit & distance3 < distance_limit & 
		distance4 < distance_limit )	  
	
		select_dist = c(select_dist,i)
		select_dist = unique(select_dist) 
	

        	gsm_distw = c(NA)
		gsm_distw[1:NrStepsTwoDays] = NA 
		for (p in select_dist)
		{
	      
	     		# p==i: also the selected link itself is taken into account.
	
	     		# If the selected link, i.e. the one for which we want to estimate rainfall, is not available then the data from
			# the surroundings links should not be used. So these are made NA.
             		# So only time steps which have data for the selected link do get data.
             		RXMIN_link_temp = RXMIN_link_max_temp = c(NA)
	     		RXMIN_link_temp[1:NrStepsTwoDays] = RXMIN_link_max_temp[1:NrStepsTwoDays] = NA
	     		RXMIN_link_temp[which(!is.na(RXMIN))] = RXMIN_link[which(!is.na(RXMIN)),p]
             		RXMIN_link_max_temp[which(!is.na(RXMIN))] = RXMIN_link_max[which(!is.na(RXMIN)),p]
             
	     		# Only take into account a surrounding link if it has at least data for 1 time step on the present day. 
			# Note that due to the selection just above, this is not necessarily the case.
             		# If it is only available on the previous day, such a link could still be useful, because then it helpes to
			# determine whether a time step is wet or dry, and this would be helpful to establish the reference level.
			# Moreover, time step nr NrStepsDay can determine whether time step NrStepsDay + 1 is wet or dry. 
			# Nevertheless, we kept the original selection criterion.
	     		if ( length( which(!is.na(RXMIN_link_temp[NrStepsDayPlus1:NrStepsTwoDays]-RXMIN_link_max_temp[NrStepsDayPlus1:NrStepsTwoDays]))) > 0 )
	     		{
	        		q = q + 1
	        		data_temp[q,] = (RXMIN_link_temp-RXMIN_link_max_temp)
	        		data_temp_distw[q,] = (RXMIN_link_temp-RXMIN_link_max_temp)/length_link[p]
	        		if (p == i)
	        		{
		   			gsm_distw = data_temp_distw[q,]
	           			RXMIN_max = RXMIN_link_max_temp
                		}
	     		}	      
		}
 

	      
		minval = 99999
		cumval = 0
		number_steps = 0
		number_length = c(NA)
		for (k in 1:NrStepsTwoDays)
		{
	   		number_length = length(which(!is.na(data_temp[,k]))) 
	   		# If a time step has 0 values, then apparently the selected link is also not available. This is not an error, 
			# but we have to anticipate:
	   		if (number_length == 0)
	   		{
	      			next
	   		}
	   		if (number_length < minval)
	   		{
	      			minval = number_length
	   		}
	   		cumval = number_length + cumval	   
	   		number_steps = number_steps + 1
		}
		meanval = cumval/number_steps
        	# minval: Minimum number of surroundings links (including the selected link), which is available during the 
		# previous and present day on the time steps that the selected link is available.
	
	
        	# Criterion is used that demands that the selected link is at least available during 1 time step of present day. 
		# Only for present day rainfall will be estimated.
        	# Only use data if at least 4 links are minimal available for present and previous day (including the selected link), 
		# for those time steps that the selected link is available (the selected link is at least available at 1 time step 
		# on previous or present day).
		# minval!=99999 is an additional check, which should be redundant.
		if (minval > 3 & length(which(!is.na(RXMIN[NrStepsDayPlus1:NrStepsTwoDays]))) > 0 & minval!=99999)
		{


	   		####################################
	   		# PROCESSING DATA FOR LINK APPROACH#
	   		####################################
           		# Q is the number of different surrounding links that are used in total. Such a surrounding link is only used 
			# if the selected link has data for the chosen time step.
           		Q = q
	   
	   		data_links = data_links_distw = array(NA,c(Q,NrStepsTwoDays))
	   		for (y in 1:Q)
	   		{
	      			data_links[y,] = data_temp[y,]
	      			data_links_distw[y,] = data_temp_distw[y,]
	   		}	   
	   
	   		# Make table with the average RXMIN below the RXMIN_link_max for each time step
	   		median_gsm = median_gsm_distw = c(NA)
	   		median_gsm[1:NrStepsTwoDays] = median_gsm_distw[1:NrStepsTwoDays] = NA
	   		numbers = c(NA)
	   		numbers = which(RXMIN!="NA")
           		for (j in numbers)
	   		{
	      			median_gsm[j] = median(data_links[,j],na.rm=T)
	      			median_gsm_distw[j] = median(data_links_distw[,j],na.rm=T)	      
	   		}
	   
	   
           		######################################################################
	   		# APPLICATION OF LINK APPROACH, CORRECTION OF MINIMUM RECEIVED POWERS#
           		######################################################################
           		RXMIN_dry = c(NA)
           		RXMIN_dry[1:NrStepsTwoDays] = 999
	   		RXMIN_final = c(NA)
	   		RXMIN_final[1:NrStepsTwoDays] = RXMIN_dry 	   
	   		RXMIN_final[which(median_gsm_distw < -0.7 & median_gsm < -1.4 )] = 
	   		RXMIN[which(median_gsm_distw < -0.7 & median_gsm < -1.4  )]           
	      
	   
	   		RXMIN_temp = c(NA)
	   		RXMIN_temp = RXMIN_final
	   
	   		group = c(NA)
	   		group = which(RXMIN_temp!=RXMIN_dry)-1	      
	   		group[group==0]=NA
	   		group=group[!is.na(group)]
           		overlap = c(NA)
	   		overlap = group[match(which(RXMIN_temp<(RXMIN_max-2))-1,group)]
	   		overlap=overlap[!is.na(overlap)]
	   		if ( length(overlap)>0 )
	   		{
	      			RXMIN_final[overlap] = RXMIN[overlap]		      
           		} 
	   	      
	   		group = c(NA)
	   		group = which(RXMIN_temp!=RXMIN_dry)-2	      
	   		group[group==0]=NA
	   		group[group==-1]=NA
	   		group=group[!is.na(group)]
           		overlap = c(NA)
	   		overlap = group[match(which(RXMIN_temp<(RXMIN_max-2))-2,group)]
	   		overlap=overlap[!is.na(overlap)]	      
	   		if ( length(overlap)>0 )
	   		{
	      			RXMIN_final[overlap] = RXMIN[overlap]		      
           		}  	    
	      
	   		group = c(NA)
	   		group = which(RXMIN_temp!=RXMIN_dry)+1	      
	   		group[group==NrStepsTwoDaysPlus1]=NA
	   		group=group[!is.na(group)]
           		overlap = c(NA)
	   		overlap = group[match(which(RXMIN_temp<(RXMIN_max-2))+1,group)]
	   		overlap=overlap[!is.na(overlap)]	      
	   		if ( length(overlap)>0 )
	   		{
	      			RXMIN_final[overlap] = RXMIN[overlap]		      
           		}  	      
	      

	      
	   		##################################################################################################################
	   		# CALCULATE REFERENCE LEVEL FOR INTERVALS NrStepsDay + 1, NrStepsDay + 2, NrStepsDay + 3, ..., NrStepsTwoDays, AND
			# CORRECT MINIMUM RECEIVED POWERS#
			##################################################################################################################

           		ref_level = c(NA)
	   		for (q in 1:NrStepsDay)
	   		{
	       			# In principal RXMAX always exists if RXMIN exists and vice versa. RXMIN data determine whether a period is wet or dry.
	       			mean_reflevel_Timestep =(RXMIN[q:(q+NrStepsDayMinus1)][RXMIN_final[q:(q+NrStepsDayMinus1)] == RXMIN_dry[q:(q+NrStepsDayMinus1)]]+RXMAX[q:(q+NrStepsDayMinus1)][RXMIN_final[q:(q+NrStepsDayMinus1)] == RXMIN_dry[q:(q+NrStepsDayMinus1)]])/2
               			# mean_reflevel_Timestep can contain one or more NA values. This happens if RXMIN is NA for a time step. 
				# If criterion is not met (but RXMIN exists), nothing is used in mean_reflevel_Timestep of that time step.
				# Then the list mean_reflevel_Timestep becomes shorter.
               			# The average reference level is computed on the basis of RXMIN and RXMAX of each time interval (of
				# Timestep min) classified as dry.
               			# Subsequently, over a period of 24 hours the median is taken of the average reference signal, 
				# on the basis of those time steps classified as dry.
	       			if ( length(which(!is.na(mean_reflevel_Timestep))) >= NrStepsDryPeriodsRefLevel)
	       			{
 	          			ref_level[q+NrStepsDay] = median(mean_reflevel_Timestep,na.rm=T)   
	       			}
	       			else
	       			{
	          			ref_level[q+NrStepsDay] = NA
	       			}
	      			 # If less than 2.5 hours of dry periods: Reference level is not determined.
	   		}
	   		RXMIN_final[which(RXMIN_final > ref_level)] = ref_level[which(RXMIN_final > ref_level)]
	   		# If ref_level = NA, then RXMIN_final becomes equal to NA
	   		RXMIN_dump = c(NA)
           		RXMIN_dump[1:NrStepsTwoDays] = NA
	   		RXMIN_dump[which(!is.na(ref_level))] = RXMIN_final[which(!is.na(ref_level))]
	   		# Only if ref_level exists, RXMIN_final may have a value.
	   		RXMIN_final = c(NA)
	   		RXMIN_final[1:NrStepsTwoDays] = NA
	   		RXMIN_final = RXMIN_dump
	   
	   		RXMIN_temp = c(NA)	   
	   		RXMIN_temp[1:NrStepsTwoDays] = NA
	   		RXMIN_temp[which(!is.na(RXMIN))] = RXMIN_final[which(!is.na(RXMIN))]
	   		RXMIN_final = RXMIN_temp
	   	


           		##################################
 	   		# CORRECT MAXIMUM RECEIVED POWERS#
	   		##################################
           		RXMAX_final = c(NA)   
           		RXMAX_final[1:NrStepsTwoDays] = ref_level
	   		select = c(NA)
	   		select = which(RXMIN_final<ref_level)
           		# RXMAX_final gets original RXMAX value only if on the basis of the RXMIN signal a period is classified as wet.
           		# Otherwise RXMAX is equal to reference level.
	   		RXMAX_final[select] = RXMAX[select]	
           		# If RXMAX is larger than reference level: RXMAX_final is set equal to the reference level.
	   		RXMAX_final[which(RXMAX_final > ref_level)] = ref_level[which(RXMAX_final > ref_level)]
	   	   


	   		#######################################################################################                   
	   		# CALCULATE MINIMUM AND MAXIMUM ATTENUATION OVER LINK PATH AND WRITE LINK DATA TO FILE#
	   		#######################################################################################
	   		# NB: Only NrStepsDayPlus1:NrStepsTwoDays are suited for computing rainfall intensities.
           		# Convert received powers to attenuation over the link path
           		# Only proceed if RXMIN has a value and only for the present day.
   			a=b=c(NA)
	   		freq_id = unique(freq_gsm[!is.na(freq_gsm)])
       			b = coef_list[,3][which(abs(coef_list[,1]-freq_id) == min(abs(coef_list[,1]-freq_id)))[1]] 
       			a = coef_list[,2][which(abs(coef_list[,1]-freq_id) == min(abs(coef_list[,1]-freq_id)))[1]]  
       			#[1] has been added because it could happen that 2 frequencies from the list are both the closest 
			# to the link frequency.

          		cond = which(!is.na(RXMIN[NrStepsDayPlus1:NrStepsTwoDays])&!is.na(ref_level[NrStepsDayPlus1:NrStepsTwoDays])&!is.na(gsm_distw[NrStepsDayPlus1:NrStepsTwoDays])&!is.na(median_gsm_distw[NrStepsDayPlus1:NrStepsTwoDays]))

           		if ( length(cond)>0 )
			{

	       			# Attenuation related to maximum rainfall intensity, link approach:
	       			Am_max = ref_level[cond+NrStepsDay] - RXMIN_final[cond+NrStepsDay] 
				Am_max[Am_max<0]=0

	       			# Attenuation related to minimum rainfall intensity, link approach:
	       			Am_min = ref_level[cond+NrStepsDay] - RXMAX_final[cond+NrStepsDay]
				Am_min[Am_min<0]=0
		     
				cumul = c(NA)
				for (s in cond)
				{	       			
					# Filter. Calculate value of $\Sigma ( \Delta P_L - \mbox{median} ( \Delta P_L) )
               				cumul[s] = sum(gsm_distw[(s+1):(s+NrStepsDay)] - median_gsm_distw[(s+1):(s+NrStepsDay)],na.rm=T) * Timestep/MinutesPerHour
               				# Surrounding links exist, selected link has RXMIN data for this time step, so gsm_distw and
					# median_gsm_distw must both be not equal to NA.
				}
				cumul = cumul[!is.na(cumul)]

	       			data_fit_linkap = data.frame(cbind(id_gsm,a,b,Am_max,Am_min,length_gsm[cond+NrStepsDay],cond,Xstart_fixed,Ystart_fixed,Xend_fixed,Yend_fixed,meanval,cumul,freq_id,RXMIN[cond+NrStepsDay],RXMAX[cond+NrStepsDay],RXMIN_final[cond+NrStepsDay],RXMAX_final[cond+NrStepsDay],ref_level[cond+NrStepsDay],date_gsm[cond+NrStepsDay]))	
	
               			write.table(data_fit_linkap,filename_data,row.names=FALSE,col.names=FALSE,append=TRUE,quote=FALSE)	
			}
          		

 
      		}      
   	}
   	# Print warnings per day:
   	print(warnings())
   	# Remove warnings:
   	assign("last.warning", NULL, envir = baseenv())
}


cat(sprintf("Finished. (%.1f seconds)\n",round((proc.time()-startTime)[3],digits=1)))

