## R program 'Preprocessing_Linkdata.R'.
## Program for rainfall estimation using microwave links.
## Preprocessing of microwave link data.
## Described in paper:
## Aart Overeem, Hidde Leijnse, Remko Uijlenhoet, 2015. Retrieval algorithm for rainfall mapping from microwave links in a cellular communication network. Atmos. Meas. Tech. Discuss., submitted.  

## Copyright (C) 2015 Aart Overeem
##
## This program is free software: you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
## the Free Software Foundation, either version 3 of the License, or
## (at your option) any later version.
##
## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
## GNU General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with this program. If not, see <http://www.gnu.org/licenses/>.


# source('Preprocessing_Linkdata.R')
# Start R and go to the directory "WorkingExample". Then run this script by pasting the line above (source(...)).
rm(list=ls(all=TRUE))
startTime = proc.time()	

# Directory with input files:
direc="LinkData_Original"

# Load R library
#pathlib = "path name"
library(sp)	#,lib.loc=pathlib)
library(rgdal)

# Create directory for output files:
dir.create("LinkData_Preprocessed")

# Minimum and maximum allowed microwave frequency:
MinFrequency = 12.5
MaxFrequency = 40.5

# Used time step in minutes:
Timestep = 15

NrTimestepsHour = 60/Timestep
MinutesPerDay = 1440
NrStepsDay = MinutesPerDay/Timestep
NrStepsTwoDays = 2 * MinutesPerDay/Timestep


# Select daily time interval, i.e., "0800" implies 0800 UTC previous day - 0800 UTC present day. 
# Do not use "0000" for 0000 - 0000 UTC, but "2400"!
PERIOD = "0800"

# Make list of input files: 
files1 <- list.files(path = paste(direc, sep=""), all.files=FALSE, full.names=TRUE, recursive=FALSE, pattern=".dat")
files1 = files1[which(file.info(files1)$size>0)]
if (length(files1)==0)
{
	print("No files with data! Program stops.")
	stop()
}
range = seq(from=2,to=2,by=1)


# X_middle = center of coordinate system (longitude); Y_middle = center of coordinate system (latitude); 
# We chose 52.155223°N 5.387242°E as the middle of the Netherlands ('The Tower of Our Lady' is a church tower in Amersfoort and 
# used to be the middle point of the Dutch grid reference system).
X_middle = 5.387242
Y_middle = 52.155223


for (z in range)
{

	# Construct output filename:
	filename_data = paste("LinkData_Preprocessed/linkdata",substr(files1[z],27,34),".dat",sep="")
	# Remove output file if it already exists:
	unlink(filename_data)
	
	# Construct header of output file:	
	input <- c(c('frequency DATETIME interval_number RXMIN RXMAX path_length x_start y_start x_end y_end ID'))
	writeLines(input, filename_data)
  
	# Load link data from present day:
	raw = try(read.table(files1[z],sep=" ",header=TRUE),silent=TRUE)
	# Load link data from previous day:
	dateYesterday = strptime(substr(files1[z],27,33),"%Y%m%d",tz="UTC") - 86400
	dateYesterday = as.Date(dateYesterday)
	DateYesterday = as.numeric(paste(substr(dateYesterday,1,4),substr(dateYesterday,6,7),substr(dateYesterday,9,10),sep=""))
	fileYesterday = paste(substr(files1[z],1,26),DateYesterday,substr(files1[z],35,38),sep="")
	raw_prevday = try(read.table(files1[z-1],sep=" ",header=TRUE),silent=TRUE)

	print(z)

	# Only proceed when file for present and previous day can be opened:
	if (class(raw)=='data.frame' & class(raw_prevday)=='data.frame')
	{

		# Determine time interval number:
		date_ref=strptime(paste(substr(files1[z],27,34),PERIOD,sep=""),"%Y%m%d%H%M",tz="UTC") - 86400
		timestep_today = as.numeric((strptime(raw$DATETIME,"%Y%m%d%H%M",tz="UTC") - date_ref),units="hours") * NrTimestepsHour + NrStepsDay
		date_ref=strptime(paste(substr(files1[z-1],27,34),PERIOD,sep=""),"%Y%m%d%H%M",tz="UTC") - 86400
		timestep_prevday = as.numeric((strptime(raw_prevday$DATETIME,"%Y%m%d%H%M",tz="UTC") - date_ref),units="hours") * NrTimestepsHour 
		timestep = c(timestep_prevday,timestep_today)

		# Combine data from previous day and present day:
		DATA = rbind(raw_prevday,raw)

		# Only select data with certain frequencies:
		cond_freq = DATA$frequency >= MinFrequency & DATA$frequency < MaxFrequency
		DATA = DATA[cond_freq,]
		timestep = timestep[cond_freq]

	 	# ID is the link identifier.
		ID = as.character(DATA$ID)
		LINK_ID = as.character(unique(raw$ID[raw$frequency >= MinFrequency & raw$frequency < MaxFrequency]))
		# Number of links: length(LINK_ID)

		if ( length(LINK_ID)>0 )
		{

			for (i in 1:length(LINK_ID))
	      		{

				print(i)

				# Select data belonging to chosen link identifier (LINK_ID):            
	    			cond = timestep[ID==LINK_ID[i]]
	    			selection = which(ID==LINK_ID[i])

				# CONSISTENCY CHECKS
				# 1. Remove a time interval if > 1 time interval number, i.e., remove those intervals for which 
				# more than 1 observation is available. 
				temp2=rle(sort(cond))
				temp = temp2$values[which(temp2$lengths>1)]
				if ( length(temp)>0 )
				{
					cond[which(match(cond,temp)>0)] = NA
					selection[is.na(cond)]=NA
					cond = cond[!is.na(cond)]
					selection = selection[!is.na(selection)]
				}

				# Do not proceed if no link data are available for the selected LINK_ID.
				if ( length(selection)==0 | length(cond)==0 )
				{
					next
				}


				# 2. If for a given LINK ID frequency, link coordinates, or link length differ during the day: 
				# remove the link for the entire day.
				if ( length(unique(DATA$frequency[selection])) > 1 | length(unique(DATA$path_length[selection])) > 1 | length(unique(DATA$x_start[selection])) > 1 | length(unique(DATA$x_end[selection])) > 1 | length(unique(DATA$y_start[selection])) > 1 | length(unique(DATA$y_end[selection])) > 1 )
				{
					next
				}


				# Apply selection:
				data_raw = array(NA,c(NrStepsTwoDays,11))     
               			data_raw[cond,1] = DATA$frequency[selection]		  
               			data_raw[cond,2] = DATA$DATETIME[selection]		  
               			data_raw[cond,3] = timestep[selection]
	       			data_raw[cond,4] = DATA$RXMIN[selection]
               			data_raw[cond,5] = DATA$RXMAX[selection]		  
               			data_raw[cond,6] = DATA$path_length[selection]		  
	       			data_raw[cond,11] = LINK_ID[i]



				# Convert WGS84 coordinates (degrees) to an Azimuthal Equidistant Cartesian coordinate system 
				# (easting and northing of start of link, easting and northing of end of link, respectively; km). 
				d <- data.frame(lon=DATA$x_start[selection],lat=DATA$y_start[selection])
				coordinates(d) <- c("lon", "lat")
				proj4string(d) <- CRS("+init=epsg:4326") # WGS 84
				CRS.new <- CRS(paste("+proj=aeqd +a=6378.137 +b=6356.752 +R_A +lat_0=",Y_middle," +lon_0=",X_middle," +x_0=0 +y_0=0",sep=""))
				UTM <- spTransform(d, CRS.new)
				data_raw[cond,7] = UTM$lon  # Easting (in km)
				data_raw[cond,8] = UTM$lat  # Northing (in km)

				d <- data.frame(lon=DATA$x_end[selection],lat=DATA$y_end[selection])
				coordinates(d) <- c("lon", "lat")
				proj4string(d) <- CRS("+init=epsg:4326") # WGS 84
				CRS.new <- CRS(paste("+proj=aeqd +a=6378.137 +b=6356.752 +R_A +lat_0=",Y_middle," +lon_0=",X_middle," +x_0=0 +y_0=0",sep=""))
				UTM <- spTransform(d, CRS.new)
				data_raw[cond,9] = UTM$lon  # Easting (in km)
				data_raw[cond,10] = UTM$lat # Northing (in km)

       
				# Write data to file:
	       			write.table(na.omit(data_raw),filename_data,append=TRUE,row.names=FALSE,col.names=FALSE,quote=FALSE) 	
	
  			}
      		}
   	}	      
  	# Print warnings per day:
   	print(warnings())
   	# Remove warnings:
   	assign("last.warning", NULL, envir = baseenv())
 
}

##########################
# Print calculation time.#
##########################
cat(sprintf("Finished. (%.1f seconds)\n",round((proc.time()-startTime)[3],digits=1)))



