### ******************************************************************************
### ******************************************************************************
### ******************* main code for miniDOAS data evaluation *******************
### *************************** author: Joerg Sintermann *************************
### ********** this works with the Avaspec and QE65-pro from Jan.2014 on ********
### ******************************************************************************
### ******************************************************************************


### create result folder
### ******************************************************************************
cat("\ncreate directory")
now1 <- as.character(Sys.time())
now <- c(unlist(strsplit(unlist(strsplit(now1,split=" ",fixed=TRUE))[1],split="-",fixed=TRUE)), unlist(strsplit(unlist(strsplit(now1,split=" ",fixed=T))[2],split=":",fixed=T))[1:2])
meas.date <- paste(substring(dir.meas[1], nchar(dir.meas[1])-11, nchar(dir.meas[1])-4), "-", substring(dir.meas[length(dir.meas)], nchar(dir.meas[length(dir.meas)])-11, nchar(dir.meas[length(dir.meas)])-4), sep="")
folder <- paste(c("miniDOAS_", meas.date,"_Eval",now),collapse="")
dir.create(paste(c(save.dir,"/",folder),collapse=""),recursive=TRUE)
setwd(paste(c(save.dir,"/",folder),collapse=""))

### read and process reference/calibration files
### ******************************************************************************
cat("\nread and process reference files")
calref <- read.references()
NH3.cal.ug <- calref[[1]][[1]]; SO2.cal.ug <- calref[[1]][[2]]; NO.cal.ug <- calref[[1]][[3]]; NH3.cal <- calref[[1]][[4]]; SO2.cal <- calref[[1]][[5]]; NO.cal <- calref[[1]][[6]]; cuvette.length <- calref[[1]][[7]]
I.ref <- calref[[2]][[1]]; I.NH3 <- calref[[2]][[2]]; I.SO2 <- calref[[2]][[3]]; I.NO <- calref[[2]][[4]]; I.N2 <- calref[[2]][[5]]
NH3.doascurve <- calref[[3]][[1]]; SO2.doascurve <- calref[[3]][[2]]; NO.doascurve <- calref[[3]][[3]]
reference.times <- calref[[4]]
cal.diffspecs <- calref[[5]]
dat.dark <- calref[[6]][[1]]; dat.ref <- calref[[6]][[2]]; dat.N2 <- calref[[6]][[3]]; dat.N2.dark <- calref[[6]][[4]]; dat.NH3 <- calref[[6]][[5]]; dat.SO2 <- calref[[6]][[6]]; dat.NO <- calref[[6]][[7]]

### initiate first data matrices
### ******************************************************************************
nu <<- integer(0)
TEC.Temp <<- integer(0)
board.Temp <<- integer(0)
datetime.start <<- integer(0)
datetime.end <<- integer(0)
datetime.start.frac <<- integer(0)
datetime.end.frac <<- integer(0)
dark.offset <<- integer(0)
dat.meas <<- integer(0)
spectrometer <<- integer(0)
time.res <<- integer(0)
filenumber <- length(dir.meas)
stopifnot(filenumber > 0)


################################################################################
### data processing ############################################################
################################################################################


### step 1: read all measurement data (contained in dayly files)
### ******************************************************************************
cat(paste("\n",ifelse(corr.fixed.pattern,"step1 of 4:\n","step1 of 3:\n"), sep=""))
if (cores > 1) {cat(" read data files, parallel computing - can't display progress")}
### execute the calculation
l.dir.meas <- as.list(dir.meas)
i <<- 0
results.step1 <- mclapply(l.dir.meas, function(x) {

    i <<- i + 1
    if (cores == 1) {cat(paste("\r read data file #",i," of #",filenumber," = ",format(x=round(i/filenumber*100,1),nsmall=1),"% progress", sep=""))}

    ### read rawdata file, extract header information, average the summed spectrum
    ### ******************************************************************************
    data <- read.rawdata(x, x3, n.line, header.rows, pixel.number, split.date.char, split.date.format, spec.line, res.line, 1, board.Temp.line, dat.dark)

}, mc.cores=cores)

### write results of step 1 into matrices
i <<- 0
dummy1 <- lapply(results.step1,  function(x) {
    i <<- i + 1
    nu <<- c(nu, x[[1]][[1]]); dark.offset <<- c(dark.offset, x[[1]][[2]]); TEC.Temp <<- c(TEC.Temp, x[[1]][[3]]); datetime.start <<- c(datetime.start, x[[1]][[4]]); datetime.end <<- c(datetime.end, x[[1]][[5]]); datetime.start.frac <<- c(datetime.start.frac, x[[1]][[6]]); datetime.end.frac <<- c(datetime.end.frac, x[[1]][[7]]); spectrometer <<- c(spectrometer, x[[1]][[8]]); time.res <<- c(time.res, x[[1]][[9]]); board.Temp <<- c(board.Temp, x[[1]][[11]]); dat.meas <<- cbind(dat.meas, x[[2]])
})

### correct raw ccd data linearity
I.meas <- apply(dat.meas - dat.dark, 2, function(x) x / linearity.func(x, linearity.coeffs))

### create I = I.raw - darkspec - dark.offset
I.meas <- sweep(I.meas, 2 , dark.offset, "-")

### average spectra a-priori (faster overall calculation with mean spectra) over tme intervals beginning and ending at e.g. 01:00, 01:30, etc; -> weighted averages in order to account for intersecting measurement spectra
if (average.rawdat) {
    cat("\n average raw spectra\n")
    dts <- fast_strptime(datetime.start, "%d.%m.%Y %H:%M:%S")
    dte <- fast_strptime(datetime.end, "%d.%m.%Y %H:%M:%S")
    df_time <- data.frame(dts, dte); df <- as.data.frame(I.meas); res <- as.difftime(avg.period, units="mins")
    temperatures <- lapply(board.Temp, function(x) as.numeric(unlist(strsplit(x, split=",", fixed=TRUE))))
    temperatures <- t(data.frame(temperatures))
    colnames(temperatures) <- c("temp1","temp2","temp3")
    index <- as.POSIXct(as.character(cut(df_time[,1], paste0(as.numeric(res,units="mins")," mins"))), tz=attr(df_time[,1],"tzone"))
    ind <- which(df_time[,1] > index & df_time[,2] > index + res)
    frac2 <- (as.numeric(df_time[,2] - (index + res), units="secs") / as.numeric(df_time[,2] - df_time[,1], units="secs"))[ind]
    frac1 <- 1 - frac2
    xa <- as.data.frame(t(df))
    xb <- data.frame(temperatures, n=nu, tec=TEC.Temp, d=as.numeric(dark.offset))
    y <- data.frame(w=rep(1, length(dts)))
    z <- data.frame(t=index)
    for (i in 1:length(ind)) {
        xa <- insertRows(xa, xa[ind[i]+i-1,], ind[i]+i-1)
        xb <- insertRows(xb, xb[ind[i]+i-1,], ind[i]+i-1)
        y <- insertRows(y, as.data.frame(y[ind[i]+i-1,]), ind[i]+i-1)
        y[ind[i]+i-1,"w"] <- frac1[i]
        y[ind[i]+i,"w"] <- frac2[i]
        z <- insertRows(z, as.data.frame(z[ind[i]+i-1,]), ind[i]+i-1)
        z[ind[i]+i,"t"] <- index[ind[i]] + res
        cat(paste0("\r ..determine averaging weights - progress: ",round(i/length(ind)*100,1),"%"))
    }
    cat(paste0("\n ..complete averaging"))
    expanded <- data.frame(z, y, xa) %>% filter(!is.na(t))
    I.meas.avg <- group_by(expanded, t)
    I.meas.avg <- I.meas.avg %>% summarise_each(funs(weighted.mean(., w)), -w)
    expanded <- data.frame(z, y, xb) %>% filter(!is.na(t))
    b <- group_by(expanded, t)
    b.mean <- b %>% summarise_each(funs(weighted.mean(., w)), -w)
    n <- b %>% summarise(n=round(sum(w * n), 0))
    temps <- b.mean %>% select_(.dots=paste0("temp",1:ncol(temperatures)))
    temps <- apply(temps, 1, function(x) paste(c(x[1], x[2], x[3]),collapse=","))
    cat(paste0("\n ..finished averaging"))

    I.meas <- as.data.frame(t(I.meas.avg[,-1]))
    nu <<- n$n
    dark.offset <<- b.mean$d
    TEC.Temp <<- b.mean$tec
    board.Temp <<- as.list(temps)
    datetime.start <<- format(n$t, "%d.%m.%Y %H:%M:%S")
    datetime.end <<- format(n$t + res, "%d.%m.%Y %H:%M:%S")
    datetime.start.frac <<- rep(0, length(nu))
    datetime.end.frac <<- datetime.start.frac
    time.res <<- rep(avg.period * 60, length(nu))
    avg.period <- 1
}

### initiate second data matrices
### ******************************************************************************
cat("\n initiate data matrices")
files <<- ncol(I.meas)
results <- as.data.frame(matrix(nrow=files, ncol=31, dimnames=list(1:files, c("date_time_start","date_time_end","millisec_start","millisec_end","no_of_acc","tau_[pixel]","mult_GOF(tau)","mult_GOF(0)","NH3_[ug/m3]","NH3_se_[ug/m3]", "NH3_part_R2(tau)","SO2_[ug/m3]","SO2_se_[ug/m3]", "SO2_part_R2(tau)","NO_[ug/m3]","NO_se_[ug/m3]", "NO_part_R2(tau)","Iadj_max","spectrum_average","Iadj/Iref_intercept","Iadj/Iref_slope","Iadj/Iref_R2","resid_SSE","FP_SSE","dark_avg_offset_[counts]","TEC_T_[degC]","panel_T_[degC]","ambient_T_[degC]","ambient_RH_[perc]","min_Iadj/Iref","fit_order"))))
meas.diffspec <<- matrix(nrow=pixel.number, ncol=files)
meas.diffspec.broadband <<- matrix(nrow=pixel.number, ncol=files)
meas.broadband <<- matrix(nrow=pixel.number, ncol=files)
meas.doascurve <<- matrix(nrow=pixel.number, ncol=files)
avg.meas.doascurve <<- matrix(nrow=length(x2), ncol=files)
avg.fitted.doascurve.best <<- matrix(nrow=length(x2), ncol=files)
avg.fitted.NH3 <<- matrix(nrow=length(x2), ncol=files)
avg.fitted.SO2 <<- matrix(nrow=length(x2), ncol=files)
avg.fitted.NO <<- matrix(nrow=length(x2), ncol=files)
residual <<- matrix(nrow=length(x2), ncol=files)
residual.best <<- matrix(nrow=length(x2), ncol=files)
dyn.fixed.pattern <<- rep(0, length(x2))
best.tau <<- array(dim=files)
best.fitgoodness <<- array(dim=files)
fitted.doascurve.best <<- matrix(nrow=length(x2), ncol=files)
fitcurves.best <<- vector(mode="list", length=files)
zero.fitgoodness <<- array(dim=files)
diffspec.fit <<- vector(mode="list", length=files)
fit.order <<- array(dim=files)


### step 2: process raw-data record-wise (using parallel computing on OS other than Windows)
### ******************************************************************************
cat(paste("\n",ifelse(corr.fixed.pattern,"step2 of 4:\n","step2 of 3:\n")," derive DOAS curves & fit reference spectra\n", sep=""))
if (cores > 1) {cat(" processing records, parallel computing - can't display progress")}
l.I.meas <- as.list(as.data.frame(I.meas))
i <<- 0
results.step2 <- mclapply(l.I.meas, function(x) {

    i <<- i + 1
    if (cores == 1) {cat(paste("\r process record #",i," of #",files," = ",format(x=round(i/files*100,1),nsmall=1),"% progress", sep=""))}

    ### calculate measurement DOAS curve and intermediate steps
    ### ******************************************************************************
    meas.DOAS.data <- doas.curve(x, I.ref, x1, filter.type, filter.strength)
    dummy1 <- array(dim=pixel.number); dummy1[x1] <- meas.DOAS.data[[3]]

    ### fit calibration curves to measured DOAS curve (see Stutz & Platt, 1996, Applied Optics 30), determine best fit after shifting over given tau range, no fixed pattern considered
    ### ******************************************************************************#
    fitted <- fit.curves(dummy1, meas.DOAS.data[[1]], x2, x4, dyn.fixed.pattern, NH3.doascurve, SO2.doascurve, NO.doascurve, fit.weights, tau.shift, tau.divisor, fit.type, max.order)
    return(c(meas.DOAS.data, fitted))

}, mc.cores=cores)

### write results of step 2 into matrices
i <<- 0
dummy2 <- lapply(results.step2, function(x) {
    i <<- i + 1
    meas.diffspec[,i] <<- x[[1]]; meas.diffspec.broadband[x1,i] <<- x[[2]]; meas.doascurve[x1,i] <<- x[[3]]; meas.broadband[x1,i] <<- x[[4]]
    best.tau[i] <<- x[[5]]; best.fitgoodness[i] <<- x[[6]]; fitted.doascurve.best[,i] <<- x[[7]]; fitcurves.best[[i]] <<- x[[8]]; residual.best[,i] <<- x[[9]]; zero.fitgoodness[i] <<- x[[10]]; avg.fitted.doascurve.best[,i] <<- x[[11]]; avg.fitted.NH3[,i] <<- x[[12]]; avg.fitted.SO2[,i] <<- x[[13]]; avg.fitted.NO[,i] <<- x[[14]]; diffspec.fit[[i]] <<- x[[15]]; fit.order[i] <<- x[[16]]
})

### running median residual pattern
cat("\n creating median residual pattern")
if (files <= fixed.pattern.length) {
    fp.avg <<- matrix(rep(apply(residual.best, 1, median, na.rm=TRUE),files), nrow=files, ncol=length(x4), byrow=TRUE)
} else {
    fp.avg <<- apply(residual.best, 1, runmed, k=fixed.pattern.length, endrule="constant")
}

### step 3: optionally subtract averaged fixed residual pattern from measurement data and re-evaluate (using parallel computing on OS other than Windows)
### ******************************************************************************
if (corr.fixed.pattern) {
    cat("\nstep3 of 4:\n re-evaluate considering fixed residual pattern:\n")
    if (cores > 1) {cat(" re-processing records, parallel computing - can't display progress")}

    results.step1 <- Reduce(function(x,y) Map(list, x, y),list(as.list(as.data.frame(meas.doascurve)), as.list(as.data.frame(meas.diffspec))))
    i <<- 0
    results.step3 <- mclapply(results.step1, function(x) {

        i <<- i + 1
        if (cores == 1) {cat(paste("\r re-process record #",i," of #",files," = ",format(x=round(i/files*100,1),nsmall=1),"% progress", sep=""))}

        ### re-evaluate data, subtracting the running median fixed residual pattern beforehand
        ### ******************************************************************************
        fitted <- fit.curves(x[[1]], x[[2]], x2, x4, dyn.fixed.pattern=fp.avg[i,], NH3.doascurve, SO2.doascurve, NO.doascurve, fit.weights, tau.shift, tau.divisor, fit.type, max.order)

    }, mc.cores=cores)

    ### write results of step 3 into matrices
    i <<- 0
    dummy3 <- lapply(results.step3,  function(x) {
        i <<- i + 1
        best.tau[i] <<- x[[1]]; best.fitgoodness[i] <<- x[[2]]; fitted.doascurve.best[,i] <<- x[[3]]; fitcurves.best[[i]] <<- x[[4]]; residual.best[,i] <<- x[[5]]; zero.fitgoodness[i] <<- x[[6]]; avg.fitted.doascurve.best[,i] <<- x[[7]]; avg.fitted.NH3[,i] <<- x[[8]]; avg.fitted.SO2[,i] <<- x[[9]]; avg.fitted.NO[,i] <<- x[[10]]; diffspec.fit[[i]] <<- x[[11]]; fit.order[i] <<- x[[12]]
    })

}

### step 4: determine NH3 / SO2 / NO concentrations and associated prameters
### ******************************************************************************
cat(paste("\n",ifelse(corr.fixed.pattern,"step4 of 4:\n","step3 of 3:\n")," calculate concentrations", sep=""))
concentrations <- calc.conc(fitcurves.best, path.length)
NH3 <- concentrations[[1]]; SO2 <- concentrations[[2]]; NO <- concentrations[[3]]; NH3.sig <- concentrations[[4]]; SO2.sig <- concentrations[[5]]; NO.sig <- concentrations[[6]]; partcorr <- concentrations[[7]]

### write to result matrix
### ******************************************************************************
cat("\n write results")
results[,1] <- datetime.start
results[,2] <- datetime.end
results[,3] <- as.numeric(substring(datetime.start.frac,nchar(datetime.start.frac)-2,nchar(datetime.start.frac)))
results[,4] <- as.numeric(substring(datetime.end.frac,nchar(datetime.end.frac)-2,nchar(datetime.end.frac)))
results[,5] <- as.numeric(nu)
results[,6] <- as.numeric(best.tau)
results[,7] <- as.numeric(best.fitgoodness)
results[,8] <- as.numeric(zero.fitgoodness)
results[,9] <- as.numeric(NH3)
results[,10] <- as.numeric(NH3.sig)
results[,11] <- as.numeric(partcorr[,1])
results[,12] <- as.numeric(SO2)
results[,13] <- as.numeric(SO2.sig)
results[,14] <- as.numeric(partcorr[,2])
results[,15] <- as.numeric(NO)
results[,16] <- as.numeric(NO.sig)
results[,17] <- as.numeric(partcorr[,3])
results[,18] <- apply(I.meas[x2,], 2, max)
results[,19] <- apply(I.meas[x2,], 2, mean)
coeffs <- unlist(lapply(diffspec.fit, coefficients))
results[,20] <- as.numeric(coeffs[names(coeffs) == "(Intercept)"])
results[,21] <- as.numeric(coeffs[names(coeffs) == "f[x2]"])
results[,22] <- unlist(lapply(diffspec.fit, "[[", "r.squared"))
results[,23] <- apply(residual.best^2, 2, sum)
results[,24] <- apply(fp.avg^2, 1, sum)
results[,25] <- dark.offset
results[,26] <- TEC.Temp
board.Temp <- round(as.numeric(unlist(lapply(as.list(board.Temp), function(x) strsplit(x, split=",", fixed=TRUE)))),1)
results[,27] <- board.Temp[seq(1,length(datetime.start)*3,3)]
results[,28] <- board.Temp[seq(2,length(datetime.start)*3,3)]
results[,29] <- board.Temp[seq(3,length(datetime.start)*3,3)]
results[,30] <- apply(meas.diffspec[x2,], 2, min)
results[,31] <- fit.order

### save results
### ******************************************************************************
write.table(results, file=paste(c(save.dir,"/",folder,"/miniDOAS_",meas.date,"_Eval",now,".csv"),collapse=""), sep=";", na="#NV", dec=".", row.names=FALSE, col.names=TRUE)

### create and save log file
### ******************************************************************************
cat("\nsave log file")
logfile <- log.file()
write(logfile, file=paste(c(save.dir,"/",folder,"/miniDOAS_",meas.date,"_Eval",now,"_log.txt"), collapse=""))


################################################################################
### optional: plotting & save average fixed residual pattern  ##################
################################################################################


### plot calibration spectra and DOAS curves
### ******************************************************************************
cat("\n plot calibration spectra and DOAS curves")
pdf(file=paste(c(save.dir,"/",folder,"/miniDOAS-calrefs-",meas.date,"-Eval",now,".pdf"),collapse=""), width=0.8*7, height=1.25*7)
par(mfcol=c(3,1))
plot.calibration.spectra(calref.cols, f[x1], dat.ref[x1], dat.dark[x1], dat.N2[x1], dat.N2.dark[x1], dat.NH3[x1], dat.SO2[x1], dat.NO[x1])
plot.calibration.diffspecs(calref.cols, f[x1], cal.diffspecs[[1]][x1], cal.diffspecs[[2]][x1], cal.diffspecs[[3]][x1])
plot.calibration.DOAScurves(calref.cols, f[x1], f[x2], NH3.doascurve[x4], SO2.doascurve[x4], NO.doascurve[x4])
graphics.off()

### plot evaluation steps, and optionally merge individual plots into one pdf animation (via LaTex)
### ******************************************************************************
if (plot.results) {
    cat("\n plot evaluation details\n")
    plot.name <- paste(c("miniDOAS_",meas.date,"_Eval",now,"_","avg",avg.period),collapse="")
    dir.create("images")
    saveHTML(
        for (i in 1:floor(nrow(results) / avg.period)) {png(file=paste0("images/Rplot_",plot.name,"_",i,".png"), width=400, height=600); plot.offline(); dev.off()},#svg(file=paste0("images/Rplot_",plot.name,"_",i,".svg"), width=6, height=6*1.25)
        htmlfile=paste0(plot.name,".html"), autobrowse=FALSE, autoplay=FALSE, use.dev=FALSE, ani.type='png', width=400, height=600, interval=0.3, verbose=FALSE, img.name=paste0("Rplot_",plot.name,"_"), title=paste0(plot.name,"_")
    )
    graphics.off()
    cat(" ..finished plotting")
}


graphics.off()
options(opt)
cat("\n*** evaluation finished ***")

source(paste0(eval.dir,"/miniDOAS_cleanupalittle.R"))

################################################################################
### the end ####################################################################
################################################################################





