#!/usr/bin/env python3
import glob, csv, collections
import pylab as pl
import matplotlib as mpl
import json

data = [] # [(pressure, scaling factor), ...]
drctdata = []  # [(pressure, DRCT difference), ...]
ground_wind_scaling_by_day = {} # {yyyy-mm-dd: X, ...}

def average_by_pressure_difference(pressure, value):
    if not pressure: # cannot average
        return
    elif not pressure[1:]: # single value
        return float(value[0])
    elif not pressure[2:]: # cannot weight
        return sum(float(i) for i in value)/2.
    dp_sum = float(pressure[0]) - float(pressure[-1])
    weighted = []
    for i in range(1, len(pressure) - 1):
        weighted.append(float(value[i]) * 0.5 * (float(pressure[i-1]) - float(pressure[i+1])))
    return sum(weighted) / dp_sum

for i in sorted(glob.glob("tateno-sounding-data/*.html")):
    tsvlines = []
    with open(i) as f:
        for j in f:
            if j.startswith("-----"):
                break
        for j in f:
            if j.lower().startswith("</pre>"):
                break
            tsvlines.append(j)
    headerline = None
    unitline = None
    pressure = []
    height = []
    SKNT = []
    DRCT = []
    for j in csv.reader(tsvlines, delimiter=" ", skipinitialspace=True):
        if headerline is None:
            headerline = j
            continue
        elif unitline is None:
            unitline = j
            continue
        if headerline[0] == "PRES" and headerline[1] == "HGHT" and headerline[8] == "DRCT" and headerline[9] == "SKNT":
            if len(j) >= 13:
                pressure.append(j[0])
                height.append(j[1])
                DRCT.append(j[8])
                SKNT.append(j[9])
    maxidx = [n for n, h in enumerate(height) if float(h) > 1000]
    if maxidx[1:]:
        maxidx = maxidx[0]
    else:
        continue
    SKNTscaling = [float(s) / max(1, float(SKNT[0])) for s in SKNT[:maxidx]]
    DRCTdifference = [(float(d) - float(DRCT[0])) for d in DRCT[:maxidx]]
    pressure = [float(p) for p in pressure[:maxidx]]
    # store the daily scaling data
    ground_wind_scaling_by_day[i[len("tateno-sounding-data/tateno_"): - len("-info.html")]] = average_by_pressure_difference(
        pressure, SKNT[:maxidx]) / max(1, float(SKNT[:maxidx][0]))
    pl.plot(SKNT[:maxidx], pressure[:maxidx], alpha=0.05)
    data.extend(list(zip(pressure, SKNTscaling)))
    drctdata.extend(list(zip(pressure, DRCTdifference)))

# with open("tateno-wind-scaling-by-day.json", "w") as f:
#     json.dump(ground_wind_scaling_by_day, f)
# setup pylab
pl.rc('text', usetex=True)

# add double-figure plot for wind speed and direction
axes = pl.subplot(1, 2, 1)
binned = collections.defaultdict(list)
for p, d in data:
    binned[int((p-10)/20)].append(d)
labels = sorted(p*20 for p in binned.keys())
boxed = [v for k,v in sorted(binned.items())]
axes.boxplot(boxed, labels=labels, notch=True, vert=False, bootstrap=10000, showmeans=True)
axes.set_title("Scaling ground wind speed to profile below 1000m")
axes.set_xlabel("wind speed scaling factor against ground / unitless: $x_i x_0^{-1}$")
axes.set_ylabel("center of pressure bin / hPa")
axes.set_xlim(-1, 12)
axes.invert_yaxis()
axes.grid()
# direction
binned = collections.defaultdict(list)
for p, d in drctdata:
    binned[int((p-10)/20)].append(d)
labels = sorted(p*20 for p in binned.keys())
axes = pl.subplot(1, 2, 2)
boxed = [v for k,v in sorted(binned.items())]
axes.boxplot(boxed, labels=labels, notch=True, vert=False, bootstrap=10000, showmeans=True)
axes.set_title("Change in wind direction to lowest level")
axes.set_xlabel("wind direction change against ground / degree")
axes.set_ylabel("center of pressure bin / hPa")
axes.set_xlim(-120, 120)
axes.invert_yaxis()
axes.grid()
pl.gcf().set_size_inches(8.8,3.3)
pl.savefig("tateno-sounding-profiles-boxplot-speed-and-direction.pdf", bbox_inches='tight', transparent=False)
pl.close()


# individual plots
pl.xlabel("SKNT / knots")
pl.ylabel("pressure / hPa")
pl.xscale("log")
pl.xlim(1, 80)
pl.tick_params(axis='x', which='minor')
pl.gca().xaxis.set_minor_formatter(mpl.ticker.FormatStrFormatter("%.0f"))
pl.gca().invert_yaxis()
# color on black makes it easier to detect shading from multiple overlayed lines
pl.gca().set_facecolor('black')
pl.title("Wind speed profiles up to 1000m (Tateno, Japan)")
pl.gcf().set_size_inches(4.4,3.3)
pl.savefig("tateno-sounding-profiles-lines.pdf", bbox_inches='tight', transparent=False)
pl.close()
binned = collections.defaultdict(list)
for p, d in data:
    binned[int((p-10)/20)].append(d)
labels = sorted(p*20 for p in binned.keys())
boxed = [v for k,v in sorted(binned.items())]
pl.boxplot(boxed, labels=labels, notch=True, vert=False, bootstrap=10000, showmeans=True)
pl.title("Scaling ground wind speed to profile below 1000m")
pl.xlabel("wind speed scaling factor against ground / unitless: $x_i x_0^{-1}$")
pl.ylabel("center of pressure bin / hPa")
pl.xlim(-1, 12)
pl.gca().invert_yaxis()
pl.grid()
pl.gcf().set_size_inches(4.4,3.3)
pl.savefig("tateno-sounding-profiles-boxplot.pdf", bbox_inches='tight', transparent=False)
pl.close()
pl.boxplot([v for k, v in sorted(ground_wind_scaling_by_day.items())], notch=True, bootstrap=10000, showmeans=True)
pl.xlabel("wind speed scaling factor against ground / unitless: $x_i x_0^{-1}$")
pl.ylim(0, 10)
pl.title("Scaling ground wind speed to average wind profile up to 1000m")
pl.gcf().set_size_inches(4.4,3.3)
pl.savefig("tateno-wind-scaling-boxplot.pdf", bbox_inches='tight', transparent=False)
pl.close()
# direction
binned = collections.defaultdict(list)
for p, d in drctdata:
    binned[int((p-10)/20)].append(d)
labels = sorted(p*20 for p in binned.keys())
boxed = [v for k,v in sorted(binned.items())]
pl.boxplot(boxed, labels=labels, notch=True, vert=False, bootstrap=10000, showmeans=True)
pl.title("Change in wind direction to lowest level")
pl.xlabel("wind direction change against ground / degree")
pl.ylabel("center of pressure bin / hPa")
pl.xlim(-120, 120)
pl.gca().invert_yaxis()
pl.grid()
pl.gcf().set_size_inches(4.4,3.3)
pl.savefig("tateno-drct-shift-boxplot.pdf", bbox_inches='tight', transparent=False)
pl.close()
# add speed plot with direction subplot
binned = collections.defaultdict(list)
for p, d in data:
    binned[int((p-10)/20)].append(d)
labels = sorted(p*20 for p in binned.keys())
boxed = [v for k,v in sorted(binned.items())]
pl.boxplot(boxed, labels=labels, notch=True, vert=False, bootstrap=10000, showmeans=True)
pl.title("Scaling ground wind speed to profile below 1000m")
pl.xlabel("wind speed scaling factor against ground / unitless: $x_i x_0^{-1}$")
pl.ylabel("center of pressure bin / hPa")
pl.xlim(-1, 14)
pl.gca().invert_yaxis()
pl.grid()
pl.gcf().set_size_inches(4.4,3.3)
f = pl.gcf().add_subplot(3, 2, 4)
dinned = collections.defaultdict(list)
for p, d in drctdata:
    dinned[int((p-10)/20)].append(d)
labels = sorted(p*20 for p in dinned.keys())
boxed = [v for k,v in sorted(dinned.items())]
pl.boxplot(boxed, labels=labels, notch=True, vert=False, bootstrap=10000, showmeans=True, showfliers=False)
# pl.title("Change in wind direction to lowest level")
pl.xlabel("direction change / degree")
pl.xlim(-129, 129)
pl.gca().invert_yaxis()
pl.gca().set_yticks(pl.gca().get_yticks()[::2])
pl.setp(pl.gca().get_xticklabels(), backgroundcolor="white")
pl.setp(pl.gca().get_yticklabels(), backgroundcolor="white")
pl.grid()
pl.gcf().set_size_inches(4.4,3.3)
pl.savefig("tateno-subplot-boxplot.pdf", bbox_inches='tight', transparent=False)
pl.close()


