#!/usr/bin/env python3

"""Only the parts of plot_tccon_by_direction.py which are needed for
the publication to make the code easier to understand."""

import os.path
import glob
import json
import numpy as np
import numpy.ma
import math
import datetime
import matplotlib
matplotlib.use('agg',warn=False, force=True) # to run without graphical interface
import pylab as pl
import scipy.misc
import netCDF4 as nc
import collections
import mpl_toolkits.basemap as bm
import argparse
import logging
logging.basicConfig(
    level=logging.WARNING,
    format=(' [%(levelname)-7s] (%(asctime)s)'
            ' %(filename)s::%(lineno)d %(message)s'),
    datefmt='%Y-%m-%d %H:%M:%S')

# TODO: Einheit um Messmast ist in MEZ!

parser = argparse.ArgumentParser("Calculating Tokyo Emissions")
parser.add_argument("--tsukuba-start", default=170, type=float)
parser.add_argument("--tsukuba-stop", default=240, type=float)
args = parser.parse_args()

filetype = ".pdf"
hexbin_linewidth = (0.06 if filetype == ".pdf" else 0.2) # fix overlapping hexes

colors = collections.OrderedDict(
    darklila = "#332288",
    darkblue = "#4477aa",
    mediumblue = "#6699cc",
    brightblue = "#88ccee",
    bluegreen = "#44aa99",
    green = "#117733",
    olive = "#999933",
    yellow = "#ddcc77",
    darkred = "#661100",
    rose = "#cc6677",
    mediumrose = "#aa4466",
    darkrose = "#882255",
    purple = "#aa4499")

safemarkersfilled = ['o', 'H', '^', '*', 'h', 'd', 'D', 's', '.',
                     '8', 'p', 'v', '<', '>', 7, 4, 5, 6]*2


# tsukuba_tokyo_start_stop = 150, 260
# tsukuba_tokyo_start_stop = 180, 220
tsukuba_tokyo_start_stop = args.tsukuba_start, args.tsukuba_stop
# tsukuba_tokyo_start_stop = 170, 230

def plot_residuals_by_direction_composite(wdir_deg, wspd_m_s, residuals,
                                          time, species,
                                          speciestexstring, scale, unit, filename):
    pl.suptitle("Residuals by wind direction $\Delta$"
                + speciestexstring
                + " (y - fit), %s (%s)" % (name, name_short),
                va='bottom')

    # wind direction as radial component
    ax = pl.subplot(221, projection='polar')
    b = ax.scatter(x=(wdir_deg%360) / 360. * np.pi * 2,
                   y=wspd_m_s,
                   c=residuals,
                   s=8,
                   alpha=1.0,
                   edgecolors='none',
                   cmap=pl.cm.RdYlBu_r)  # marker=safemarkersfilled[n], linestyle='None', alpha=0.1)
    ax.grid(True)
    cb = pl.colorbar(b)
    cb.formatter.set_useOffset(False)
    cb.set_label("residuals / " + unit)
    ax.set_title("Wind speed / m/s", va='bottom')
    pl.legend(loc="best", fancybox=True, framealpha=0.5)

    # residuals as radial component
    ax = pl.subplot(222, projection='polar')
    b = ax.scatter(x=(wdir_deg%360) / 360. * np.pi * 2,
                   y=residuals + 10 * scale,
                   c=((time/365.25) + 1970),
                   s=wspd_m_s*10,
                   alpha=1./math.log10(len(residuals) + 10),
                   cmap=pl.cm.Paired) # marker=safemarkersfilled[n], linestyle='None', alpha=0.1)
    ax.set_rmax(20 * scale)
    ax.set_rticks([t * scale for t in range(5, 20, 5)])
    ax.set_yticklabels([str(l*scale) for l in range(-5, 10, 5)])  # less radial ticks
    # ax.set_rlabel_position(0) # show radial labels on zero-line
    ax.grid(True)
    cb = pl.colorbar(b)
    cb.formatter.set_useOffset(False)
    cb.set_alpha(0.5)
    cb.draw_all()
    cb.update_ticks()
    cb.set_label("time / year")
    if time[-1] - 730 > time[0]:
        cb.set_ticks(list(range(1970, 2100)))  # for two or more years simply show integers
    else:
        cb.set_ticks([l/4. for l in range(1970*4, 2100*4)]) # for less than two years show quarters
    # cb.ticklabel_format(style="plain")
    # cb.set_ticklabels([str(l) for l in range(1970, 2100)]) # simply integers
    ax.set_title("Residuals $\Delta$" + speciestexstring + " (y - fit) / " + unit, va='bottom')
    pl.legend(loc="best", fancybox=True, framealpha=0.5)

    def reject_outliers(data, m = 2.):
        d = np.abs(data - np.median(data))
        mdev = np.median(d)
        s = d/mdev if mdev else 0.
        return data[s < m]
    
    def red(data):
        if len(data) == 0:
            return None
        if len(data) == 1:
            return data[0]
        return np.mean(reject_outliers(np.array(data)))
    # hexbin residuals against wind direction and speed
    ax = pl.subplot(223)
    r = np.ma.masked_greater_equal(residuals, 0)
    ws = np.ma.masked_array(wspd_m_s, mask=r.mask)
    wd = np.ma.masked_array(wdir_deg, mask=r.mask)
    b = ax.hexbin(x=wd,
                  y=ws,
                  C=r, # reduce_C_function=red,
                  gridsize=25,
                  cmap=pl.cm.viridis,
                  vmin=-2 * scale, vmax=0,
                  linewidths=hexbin_linewidth) # avoid overlapping
    cb = pl.colorbar(b)
    pl.xlabel("wind direction / degree")
    pl.ylabel("column wind speed / m/s")
    ax.set_title("Negative $\Delta$" + speciestexstring + " (y - fit)", va='bottom')
    ax = pl.subplot(224)
    r = np.ma.masked_less(residuals, 0)
    ws = np.ma.masked_array(wspd_m_s, mask=r.mask)
    wd = np.ma.masked_array(wdir_deg, mask=r.mask)
    b = ax.hexbin(x=wd,
                  y=ws,
                  C=r, # reduce_C_function=red,
                  gridsize=25,
                  cmap=pl.cm.magma_r,
                  vmin=0, vmax=2 * scale,
                  linewidths=hexbin_linewidth) # avoid overlapping
    cb = pl.colorbar(b)
    cb.set_label("mean residuum / " + unit)
    pl.xlabel("wind direction / degree")
    ax.set_title("Positive $\Delta$" + speciestexstring + " (y - fit)", va='bottom')

    pl.savefig(filename, bbox_inches='tight', transparent=False)
    pl.close()



def plot_windrose_residuals(wdir_deg, residuals):
    from windrose import WindroseAxes
    ax = WindroseAxes.from_ax()
    ax.bar(wdir_deg, residuals, normed=True, opening=0.8, edgecolor='white')
    ax.set_legend()
        

def plot_hexbin_by_direction_composite(wdir_deg, wspd_m_s, residuals, time, species, speciestexstring, scale, unit, name, name_short, filename):
    pl.figure(figsize=(13, 3))
    pl.suptitle("Residuals by wind direction and speed at the %s site (%s)\n" % (name, name_short), va='bottom')

    def reject_outliers(data, m = 2.):
        d = np.abs(data - np.median(data))
        mdev = np.median(d)
        s = d/mdev if mdev else 0.
        return data[s < m]
    
    def red(data): # average the data with outlier detection
        if len(data) == 0:
            return None
        if len(data) == 1:
            return data[0]
        return np.mean(reject_outliers(np.array(data)))
    # hexbin residuals against wind direction and speed
    # negative residuals
    ax = pl.subplot(131)
    r = np.ma.masked_greater_equal(residuals, 0)
    ws = np.ma.masked_array(wspd_m_s, mask=r.mask)
    wd = np.ma.masked_array(wdir_deg, mask=r.mask)
    b = ax.hexbin(x=wd,
                  y=ws,
                  C=r, # reduce_C_function=red,
                  gridsize=(25,30),
                  cmap=pl.cm.viridis,
                  vmin=-2 * scale, vmax=0,
                  linewidths=hexbin_linewidth) # avoid overlapping
    cb = pl.colorbar(b)
    pl.xlabel("wind direction / degree")
    pl.xticks(list(range(60, 360, 60)))
    pl.ylabel("column wind speed / m/s")
    ax.set_title("Negative $\Delta$" + speciestexstring + " (y - fit)", va='bottom')
    if name == "Tsukuba":
        min_angle, max_angle = tsukuba_tokyo_start_stop
        pl.axvline(min_angle, color="w", linewidth=2, alpha=0.4)
        pl.axvline(max_angle, color="w", linewidth=2, alpha=0.4)
        pl.axvline(min_angle-30, color="w", linewidth=1.5, alpha=0.3)
        pl.axvline(max_angle+30, color="w", linewidth=1.5, alpha=0.3)
        pl.text((min_angle+max_angle)/2.0, 0.5, "from\nwest\nTokyo\narea", color="w", alpha=0.7, horizontalalignment='center', verticalalignment='bottom')
        pl.gca().axis([0,360,0,50]) # misses data above 50
        pl.gca().annotate("higher\nvalues\nclipped", xytext=(90, 45), xy=(90, 50), arrowprops=dict(arrowstyle="->"), horizontalalignment='center', verticalalignment='top')
        pl.ylabel("column wind speed / m/s")
        
    cb.set_label("mean residuum / " + unit)
    pl.xlabel("wind direction / degree")
    pl.xticks(list(range(60, 360, 60)))
    ax.set_title("Negative $\Delta$" + speciestexstring + " (y - fit)", va='bottom')

    pl.grid()
    # tokyo map
    ax = pl.subplot(132)
    r = np.ma.masked_greater_equal(residuals, 0)
    ws = np.ma.masked_array(wspd_m_s, mask=r.mask)
    wd = np.ma.masked_array(wdir_deg, mask=r.mask)
    if name == "Tsukuba":
        min_angle, max_angle = tsukuba_tokyo_start_stop
        m = bm.Basemap(projection='cea', lat_ts=37.5,
                       llcrnrlat=35.2, urcrnrlat=36.8,
                       llcrnrlon=139.2, urcrnrlon=140.8,
                       epsg=4301, # the map region for arcgisimage(), see http://spatialreference.org/ref/epsg/4301/
                       resolution="l")

        m.drawmeridians(np.arange(139, 141, 0.5), labels=[0,0,0,1], linewidth=1.0, color=(1,1,1,0.3), zorder=1) # , yoffset=6) # labels = [left,right,top,bottom]
        m.drawparallels(np.arange(35, 37, 0.5), labels=[1,0,0,0], linewidth=1.0, color=(1,1,1,0.3), zorder=1)
        m.arcgisimage(dpi=1200)
        # add background image with population density
        x0, y0 = m(138 + 42./60, 34 + 51./60)
        x1, y1 = m(140 + 59.3/60, 36 + 20./60)
        densityimage = scipy.misc.imread("hasi-yamagata-2014-doi-10_1088--1748-9326--9--6--064015-cropped-34_50-36_20-138_50-140_50.png")
        pl.imshow(densityimage, extent=[x0, x1, y0, y1], alpha=1.0, interpolation="nearest")
        pl.text(x1, y0, "Surface type overlay from\nBagan 2014, doi:10.1088/1748-9326/9/6/064015", color="gray", horizontalalignment='right', verticalalignment='bottom', fontsize="x-small")
        # annotate Tokyo
        lon, lat = 139.683333, 35.683333
        x, y = m(lon, lat)
        ax.annotate("Tokyo", xy=(x, y), xycoords="data", xytext=(10, -20), textcoords="offset points", color="w",
                    arrowprops=dict(arrowstyle="->", color='w'))
        # annotate Tsukuba (do this last: lon, lat, x and y are reused later!)
        lon, lat = 140.1215, 36.0513
        x, y = m(lon, lat) # tsukuba
        ax.annotate("Tsukuba", xy=(x, y), xycoords="data", xytext=(15, 5), textcoords="offset points", color="w",
                    arrowprops=dict(arrowstyle="->", color='w'))
        lonmin = lon + (math.sin(min_angle / 360. * 2 * math.pi) * 360. * 50 / 40000.)
        latmin = lat + (math.cos(min_angle / 360. * 2 * math.pi) * 360. * 50 / 40000.)
        lonmax = lon + (math.sin(max_angle / 360. * 2 * math.pi) * 360. * 50 / 40000.)
        latmax = lat + (math.cos(max_angle / 360. * 2 * math.pi) * 360. * 50 / 40000.)
        lonminpstd = lon + (math.sin((min_angle-30) / 360. * 2 * math.pi) * 360. * 50 / 40000.)
        latminpstd = lat + (math.cos((min_angle-30) / 360. * 2 * math.pi) * 360. * 50 / 40000.)
        lonmaxmstd = lon + (math.sin((max_angle+30) / 360. * 2 * math.pi) * 360. * 50 / 40000.)
        latmaxmstd = lat + (math.cos((max_angle+30) / 360. * 2 * math.pi) * 360. * 50 / 40000.)
        x1, y1 = m(lonmin, latmin)
        x2, y2 = m(lonmax, latmax)
        x1pstd, y1pstd = m(lonminpstd, latminpstd)
        x2mstd, y2mstd = m(lonmaxmstd, latmaxmstd)
        m.plot([x, x1], [y, y1], color="w", label="from west Tokyo area")
        m.plot([x, x2], [y, y2], color="w")
        m.plot([x, x1pstd], [y, y1pstd], color="w", alpha=0.5, label="30$^\circ$ uncertainty")
        m.plot([x, x2mstd], [y, y2mstd], color="w", alpha=0.5)
        pl.text(x1, y1, str(min_angle) + "$^\circ$", fontsize="x-small", color="w", horizontalalignment='right', verticalalignment='center')
        pl.text(x2, y2, str(max_angle) + "$^\circ$", fontsize="x-small", color="w", horizontalalignment='left', verticalalignment='top')
        legend = pl.legend(fontsize="smaller", loc="upper left", fancybox=True, framealpha=0.45)
        legend.get_frame().set_facecolor("k")
        legend.get_frame().set_edgecolor("w")
        for text in legend.get_texts():
            text.set_color("w")

        pl.title("Tokyo Area")
    ax = pl.subplot(133)
    r = np.ma.masked_less(residuals, 0)
    ws = np.ma.masked_array(wspd_m_s, mask=r.mask)
    wd = np.ma.masked_array(wdir_deg, mask=r.mask)
    b = ax.hexbin(x=wd,
                  y=ws,
                  C=r, # reduce_C_function=red,
                  gridsize=25,
                  cmap=pl.cm.magma_r,
                  vmin=0, vmax=2 * scale,
                  linewidths=hexbin_linewidth) # avoid overlapping
    cb = pl.colorbar(b)
    if name == "Tsukuba":
        pl.axvline(min_angle, color="w", linewidth=2, alpha=0.4)
        pl.axvline(max_angle, color="w", linewidth=2, alpha=0.4)
        pl.axvline(min_angle-30, color="w", linewidth=1.5, alpha=0.3)
        pl.axvline(max_angle+30, color="w", linewidth=1.5, alpha=0.3)
        pl.text((min_angle+max_angle)/2.0, 49, "from\nwest\nTokyo\narea", color="k", alpha=0.7, horizontalalignment='center', verticalalignment='top')
        pl.gca().axis([0,360,0,50])
        pl.ylabel("column wind speed / m/s")
        
    cb.set_label("mean residuum / " + unit)
    pl.xlabel("wind direction / degree")
    pl.xticks(list(range(60, 360, 60)))
    ax.set_title("Positive $\Delta$" + speciestexstring + " (y - fit)", va='bottom')

    pl.grid()
    pl.savefig(filename, bbox_inches='tight', transparent=False, dpi=600)
    pl.close()

    
binsize_deg = 60
labelled = set()
speciestex = {"xco2": "XCO$_2$", "xch4": "XCH$_4$", "xco": "XCO"}
speciesabsrange = {"xco2": 10, "xch4": 0.2, "xco": 200}
speciesunit = {"xco2": "ppm", "xch4": "ppm", "xco": "ppb"}
speciesmolarmass = {"xco2": 44.0, "xch4": 16.04, "xco": 28.01} # g/mol

with open("tateno-wind-scaling-by-day.json") as f:
    scaling_per_day = json.load(f)


def serial_date_to_string(srl_no):
    new_date = datetime.datetime(1970,1,1,0,0) + datetime.timedelta(srl_no - 1)
    return new_date.strftime("%Y-%m-%d")


species = "xco2"
# species = "xch4"
# species = "xco"

for n, i in enumerate(sorted(glob.glob("tccon-data/tk20110804_20160330*.nc"))):
    print(i)
    D = nc.Dataset(i)
    name = D.Location.split(",")[0]
    name_short = os.path.basename(i)[:2]
    wdir_deg = D.variables["wdir_deg"][:]
    wspd_m_s = D.variables["wspd_m_s"][:]
    time = D.variables["time"][:]
    # scale the ground wind speed to the average boundary layer wind speed using the sonde data from Tateno
    time_daydate = list(map(serial_date_to_string, time))
    wind_scaling = np.array([(scaling_per_day[daydate] if daydate in scaling_per_day else 1)
                             for daydate in time_daydate])
    wspd_m_s *= wind_scaling
            
    # check the variability of wind speed over the day
    binned = collections.defaultdict(list)
    toplot = []
    for q in range(int(min(time)), int(max(time))):
        indexes = np.argwhere(
            numpy.logical_and(time > q, time < (q + 0.5))) # does a rough japanese UTC adjustment to get all measurements of the same day
        if len(indexes) > 0:
            for p, d in zip([float(t) for t in time[indexes]%1],
                            [float(p) for p in (wspd_m_s[indexes] / max(1, wspd_m_s[indexes][-1]))]):
                binned[int((p - (1/48.)) * 24)].append(d)
            toplot.append(([float(t) for t in time[indexes]%1],
                           [float(p) for p in (wspd_m_s[indexes] / max(1, wspd_m_s[indexes][-1]))]))
    labels = sorted(p for p in list(binned.keys()))
    boxed = [v for k,v in sorted(binned.items())]
    for k, v in toplot:
        pl.plot([1 + (24 * l) for l in k], # boxplot indexes start at 1 and go in integer values
                v, "x", alpha=0.008, color="purple")
    pl.boxplot(boxed, labels=labels, notch=True, showmeans=True)
    pl.yscale('log')
    pl.ylim(0.3333, 3.333)
    pl.grid(axis="y", which="both") # which: minor or major or both
    pl.title("variability of ground wind speed over the day at Tsukuba, Japan")
    pl.xlabel("time of day / UTC hours")
    pl.ylabel("wind speed divided by speed closest to 12 UTC / unitless")
    pl.savefig("wind-speed-quotient-tateno.pdf", bbox_inches='tight', transparent=False)
    pl.close()
    unit = speciesunit[species]
    xco2_ppm = D.variables[species + "_" + unit][:]
    xco2_ppm_error = D.variables[species + "_" + unit + "_error"][:]
    # masked fill values
    xco2_ppm = np.ma.masked_equal(xco2_ppm, -9999.9)
    # if there is a flags variable, use it
    if "flags" in D.variables:
        flags = D.variables["flags"][:]
    elif "mask" in D.variables:
        flags = D.variables["mask"][:]
    else:
        flags = 0
    xco2_ppm = np.ma.masked_array(
        xco2_ppm, np.ma.mask_or(xco2_ppm.mask, flags > 0))
    xco2_ppm_error = np.ma.masked_array(xco2_ppm_error, mask=xco2_ppm.mask)
        
    time = np.ma.masked_array(time, mask=xco2_ppm.mask)
    
    wdir_deg = np.ma.masked_array(wdir_deg, mask=xco2_ppm.mask)
    wspd_m_s = np.ma.masked_array(wspd_m_s, mask=xco2_ppm.mask)
        
    # start fitting
    minidx = np.searchsorted(time, int(time[0]//365.25 + 1)*365.25)
    maxidx = np.searchsorted(time, int(time[-1]//365.25)*365.25)
    if len(time[minidx:maxidx].compressed()) < 2:
        minidx, maxidx = 0, len(time)
    should_mask_outside_tokyo = True

    min_angle, max_angle = tsukuba_tokyo_start_stop
    wdir_deg_outside = pl.ma.masked_inside(wdir_deg, min_angle - 10, max_angle + 10)
    outside_tokyo_unflagged_mask = np.ma.mask_or(
        time.mask, wdir_deg_outside.mask)
    wdir_deg_inside = pl.ma.masked_outside(wdir_deg, min_angle - 10, max_angle + 10)
    inside_tokyo_unflagged_mask = np.ma.mask_or(
        time.mask, wdir_deg_inside.mask)
    pl.hist(wdir_deg_inside.compressed(), bins=list(range(0, 390, 10)), label="From inside Tokyo Area")
    pl.ylim(0, 5000)
    pl.xlabel("incoming wind direction / deg")
    pl.ylabel("count from inside Tokyo")
    pl.legend(loc="upper left")
    pl.twinx()
    pl.ylabel("count from outside Tokyo")
    pl.hist(wdir_deg_outside.compressed(), bins=list(range(0, 390, 10)), label="From outside Tokyo Area", color="g")
    pl.ylim(0, 5000)
    pl.legend(loc="upper right")
    pl.savefig(i[:-3]+"-wind_mask-" + species + ".pdf", bbox_inches='tight', transparent=False)
    pl.close()
    # pl.hist(np.ma.masked_array(wdir_deg, mask=outside_tokyo_unflagged_mask).compressed())
    # pl.axvline(min_angle)
    # pl.axvline(max_angle)
    # pl.show()
    t = np.ma.masked_array(
        time, mask=outside_tokyo_unflagged_mask)
    c = np.ma.masked_array(
        xco2_ppm, mask=outside_tokyo_unflagged_mask)
    ti = np.ma.masked_array(
        time, mask=inside_tokyo_unflagged_mask)
    ci = np.ma.masked_array(
        xco2_ppm, mask=inside_tokyo_unflagged_mask)





    ### fit with ccgfilt start
    import ccgfilt
    # must divide the time (days since 2000) by 365 to get fractional dates (fraction of the year)

    ### experiment to filter only the trend by background locations, does not work.
    # filt_out_trend_fit = ccgfilt.ccgFilter(t[minidx:maxidx].compressed()/365,
    #                                    c[minidx:maxidx].compressed(),
    #                                    numpolyterms=2, # linear
    #                                    numharmonics=0) # none
    # xco2_ppm_ccgfilt_without_trend = xco2_ppm.compressed() - filt_out_trend_fit.getPolyValue(time.compressed()/365)
    # filt_all_without_out_trend = ccgfilt.ccgFilter(time.compressed()/365,
    #                                                xco2_ppm_ccgfilt_without_trend,
    #                                                numpolyterms=1, # none
    #                                                numharmonics=2) # fit cyclic by year
    filt_all = ccgfilt.ccgFilter(time.compressed()/365,
                                 xco2_ppm,
                                 numpolyterms=2, # squared
                                 numharmonics=2) # fit cyclic by year
    
    filt_out = ccgfilt.ccgFilter(t[minidx:maxidx].compressed()/365,
                             c[minidx:maxidx].compressed(),
                             numpolyterms=2, # linear
                             numharmonics=2) # avoids artifacts
    filt_inside = ccgfilt.ccgFilter(ti[minidx:maxidx].compressed()/365,
                                    ci[minidx:maxidx].compressed(),
                                    numpolyterms=2,
                                    numharmonics=2)
    # get x,y data for plotting
    filt_out_x0 = filt_out.xinterp
    filt_out_y1 = filt_out.getFunctionValue(filt_out_x0)
    filt_out_y2 = filt_out.getPolyValue(filt_out_x0)
    filt_out_y3 = filt_out.getSmoothValue(filt_out_x0)
    filt_out_y4 = filt_out.getTrendValue(filt_out_x0)
    # Seasonal Cycle
    # x and y are original data points
    filt_out_trend = filt_out.getTrendValue(t[minidx:maxidx].compressed()/365)
    filt_out_detrend = c[minidx:maxidx].compressed() - filt_out_trend
    filt_out_harmonics = filt_out.getHarmonicValue(filt_out_x0)
    filt_out_smooth_cycle = filt_out_harmonics + filt_out.smooth - filt_out.trend
    # residuals from the function
    filt_out_resid_from_func = filt_out.resid
    # smoothed residuals
    filt_out_resid_smooth = filt_out.smooth
    # trend of residuals
    filt_out_resid_trend = filt_out.trend
    # residuals about the smoothed line
    filt_out_resid_from_smooth = filt_out.yp - filt_out.getSmoothValue(t[minidx:maxidx].compressed()/365)
    # residuals equivalent to the simpler eval that only uses polynomials
    filt_out_residuals = filt_out.yp - filt_out.getPolyValue(t[minidx:maxidx].compressed()/365) - filt_out.getHarmonicValue(t[minidx:maxidx].compressed()/365)
    # equally spaced interpolated data with function removed
    filt_out_x1 = filt_out.xinterp
    filt_out_y9 = filt_out.yinterp
    
    ## from inside tokyo
    filt_in_x0 = filt_inside.xinterp
    filt_in_y1 = filt_inside.getFunctionValue(filt_in_x0)
    filt_in_y2 = filt_inside.getPolyValue(filt_in_x0)
    filt_in_y3 = filt_inside.getSmoothValue(filt_in_x0)
    filt_in_y4 = filt_inside.getTrendValue(filt_in_x0)
    # Seasonal Cycle
    # x and y are original data points
    filt_in_trend = filt_inside.getTrendValue(ti[minidx:maxidx].compressed()/365)
    filt_in_detrend = ci[minidx:maxidx].compressed() - filt_in_trend
    filt_in_harmonics = filt_inside.getHarmonicValue(filt_in_x0)
    filt_in_smooth_cycle = filt_in_harmonics + filt_inside.smooth - filt_inside.trend
    # residuals from the function
    filt_in_resid_from_func = filt_inside.resid
    # smoothed residuals
    filt_in_resid_smooth = filt_inside.smooth
    # trend of residuals
    filt_in_resid_trend = filt_inside.trend
    # residuals about the smoothed line
    filt_in_resid_from_smooth = filt_inside.yp - filt_inside.getSmoothValue(ti[minidx:maxidx].compressed()/365)
    # residuals equivalent to the simpler eval that only uses polynomials
    filt_in_residuals = filt_inside.yp - filt_inside.getPolyValue(ti[minidx:maxidx].compressed()/365) - filt_inside.getHarmonicValue(ti[minidx:maxidx].compressed()/365)
    # equally spaced interpolated data with function removed
    filt_in_x1 = filt_inside.xinterp
    filt_in_y9 = filt_inside.yinterp
    ## diff
    filt_out_minus_in_from_smooth = filt_inside.getSmoothValue(ti[minidx:maxidx].compressed()/365) - filt_out.getSmoothValue(ti[minidx:maxidx].compressed()/365)
    ### / ccgfilt end



    ### fit with polyfit start
    xco2_ppm_trendfitparams_outside = np.polyfit(t[minidx:maxidx].compressed(),
                                                 c[minidx:maxidx].compressed(), 1)
    xco2_ppm_trendfitparams_inside = np.polyfit(ti[minidx:maxidx].compressed(),
                                                ci[minidx:maxidx].compressed(), 1)
    
    pl.plot(ti.compressed(),ci.compressed(), label="Data from inside Tokyo Area", alpha=0.6, color="royalblue")
    pl.plot(t.compressed(),c.compressed(), label="Data from outside Tokyo Area", alpha=0.6, color="green")
    pl.plot(ti.compressed(),np.poly1d(xco2_ppm_trendfitparams_inside)(ti.compressed()), label="fit from inside Tokyo Area", color="turquoise")
    pl.plot(t.compressed(),np.poly1d(xco2_ppm_trendfitparams_outside)(t.compressed()), label="fit from outside Tokyo Area", color="limegreen")
    
    pl.text(0.98, 0.015, ( 'trend from inside Tokyo: %.3f ppm per year\n' % (365.25*xco2_ppm_trendfitparams_inside[0],) +
                           'trend from outside Tokyo: %.3f ppm per year' % (365.25*xco2_ppm_trendfitparams_outside[0],)),
            verticalalignment='bottom', horizontalalignment='right',
            transform=pl.gca().transAxes, color='gray', fontsize=9)

    pl.legend()
    pl.savefig(i[:-3]+"-data_by_wind_mask-" + species + ".pdf", bbox_inches='tight', transparent=False)
    pl.close()
    
    pl.plot(ti.compressed(),ci.compressed(), label="Data from inside Tokyo Area", alpha=0.6, color="royalblue")
    pl.plot(t.compressed(),c.compressed(), label="Data from outside Tokyo Area", alpha=0.6, color="green")

    # pl.plot(filt_in_x0*365,filt_in_y1, label="ccgfilt y1 from inside Tokyo Area", alpha=0.5)
    # pl.plot(filt_out_x0*365,filt_out_y1, label="ccgfilt y1 from outside Tokyo Area", alpha=0.5)

    pl.plot(filt_in_x0*365,filt_in_y2, label="ccgfilt linear fit from inside Tokyo Area", alpha=0.5)
    pl.plot(filt_out_x0*365,filt_out_y2, label="ccgfilt linear fit from outside Tokyo Area", alpha=0.5)

    # pl.plot(filt_in_x0*365,filt_in_y3, label="ccgfilt y3 from inside Tokyo Area", alpha=0.5)
    # pl.plot(filt_out_x0*365,filt_out_y3, label="ccgfilt y3 from outside Tokyo Area", alpha=0.5)

    # pl.plot(filt_in_x0*365,filt_in_y4, label="ccgfilt y4 from inside Tokyo Area", alpha=0.5)
    # pl.plot(filt_out_x0*365,filt_out_y4, label="ccgfilt y4 from outside Tokyo Area", alpha=0.5)

    # pl.plot(filt_in_x1*365,filt_in_y1, label="ccgfilt function from inside Tokyo Area", alpha=0.5)
    # pl.plot(filt_out_x1*365,filt_out_y1, label="ccgfilt function from outside Tokyo Area", alpha=0.5)

    # pl.plot(ti[minidx:maxidx].compressed(),filt_in_trend, label="ccgfilt trend from inside Tokyo Area", alpha=0.5)
    # pl.plot(t[minidx:maxidx].compressed(),filt_out_trend, label="ccgfilt trend from outside Tokyo Area", alpha=0.5)

    pl.plot(ti[minidx:maxidx].compressed(),filt_in_detrend+c[minidx] - 10, label="ccgfilt detrended - 10 from inside Tokyo Area", alpha=0.5)
    pl.plot(t[minidx:maxidx].compressed(),filt_out_detrend+c[minidx] - 10, label="ccgfilt detrended -10 from outside Tokyo Area", alpha=0.5)

    pl.plot(filt_in_x1*365,filt_in_harmonics+ci[minidx] - 10, label="ccgfilt harmonics -10 from inside Tokyo Area", alpha=0.5)
    pl.plot(filt_out_x1*365,filt_out_harmonics+c[minidx] - 10, label="ccgfilt harmonics -10 from outside Tokyo Area", alpha=0.5)

    pl.plot(ti[minidx:maxidx].compressed(),filt_in_residuals+c[minidx] - 20, label="ccgfilt residuals -20 from inside Tokyo Area", alpha=0.5)
    pl.plot(t[minidx:maxidx].compressed(),filt_out_residuals+c[minidx] - 20, label="ccgfilt residuals -20 from outside Tokyo Area", alpha=0.5)

    # pl.plot(filt_in_x1*365,filt_in_smooth_cycle+ci[minidx] - 10, label="ccgfilt smooth cycle from inside Tokyo Area - 10", alpha=0.5)
    # pl.plot(filt_out_x1*365,filt_out_smooth_cycle+c[minidx], label="ccgfilt smooth cycle from outside Tokyo Area", alpha=0.5)

    # pl.plot(filt_in_x1*365,filt_in_y9+c[minidx], label="ccgfilt residuals from inside Tokyo Area", color="navy", alpha=0.5)
    # pl.plot(filt_out_x1*365,filt_out_y9+c[minidx], label="ccgfilt residuals from outside Tokyo Area", color="magenta", alpha=0.5)

    pl.text(0.98, 0.015, ( 'trend from inside Tokyo: %.3f ppm per year\n' % (365.25*xco2_ppm_trendfitparams_inside[0],) +
                           'trend from outside Tokyo: %.3f ppm per year' % (365.25*xco2_ppm_trendfitparams_outside[0],)),
            verticalalignment='bottom', horizontalalignment='right',
            transform=pl.gca().transAxes, color='gray', fontsize=9)

    pl.legend(prop={'size': 6})
    pl.savefig(i[:-3]+"-data_by_wind_mask_ccgfilt_all-" + species + ".pdf", bbox_inches='tight', transparent=False)
    pl.close()

    pl.plot(ti[minidx:maxidx].compressed(),filt_in_residuals, label="ccgfilt residuals from inside Tokyo Area", alpha=0.5)
    pl.plot(t[minidx:maxidx].compressed(),filt_out_residuals, label="ccgfilt residuals from outside Tokyo Area", alpha=0.5)
    pl.legend(prop={'size': 6})
    pl.savefig(i[:-3]+"-data_by_wind_mask_ccgfilt_residuals-" + species + ".pdf", bbox_inches='tight', transparent=False)
    pl.close()


    def plot_daily_cycle_with_fit(t, ti, minidx, maxidx, residuals_inside, residuals_outside, filename):
        ## daily cycle fit after correction
        pl.plot(24 * ((t[minidx:maxidx].compressed() + 3./24)%1),residuals_outside, "x", label="ccgfilt residuals from outside Tokyo Area", alpha=0.2)
        pl.plot(24 * ((ti[minidx:maxidx].compressed() + 3./24)%1),residuals_inside - 10, "+", label="ccgfilt residuals from inside Tokyo Area - 10 ppm", alpha=0.2)
        
        # add boxplots
        hours_utc_plus3 = [1, 3, 5, 7, 9, 11, 13]
        hours_utc_labels = ["22", "0", "2", "4", "6", "8", "10"]
        boxarrays_outside = [
            filt_out_residuals[
                np.logical_and(
                    24 * ((t[minidx:maxidx].compressed() + 3./24)%1) > i,
                    24 * ((t[minidx:maxidx].compressed() + 3./24)%1) < i+2)]
            for i in hours_utc_plus3[:-1]]
        boxarrays_inside_minus_10 = [
            filt_in_residuals[
                np.logical_and(
                    24 * ((ti[minidx:maxidx].compressed() + 3./24)%1) > i,
                    24 * ((ti[minidx:maxidx].compressed() + 3./24)%1) < i+2)] - 10
            for i in hours_utc_plus3[:-1]]
        pl.boxplot(boxarrays_outside, positions=[i + 1 for i in hours_utc_plus3[:-1]], sym="", whis=[2.5, 97.5], notch=True, showmeans=True, bootstrap=1000)
        pl.boxplot(boxarrays_inside_minus_10, positions=[i + 1 for i in hours_utc_plus3[:-1]], sym="", whis=[2.5, 97.5], notch=True, showmeans=True, bootstrap=1000)
        pl.legend(prop={'size': 6})
        # xticks shifted to show UTC, inverse of the +3./24
        pl.xticks(hours_utc_plus3, hours_utc_labels)
        pl.ylim(-19.9 * speciesabsrange[species]/speciesabsrange["xco2"],
                19.9 * speciesabsrange[species]/speciesabsrange["xco2"])
        
        pl.title("Average daily cycle in Tsukuba, Japan (tk), by ccgfilt")
        pl.legend(loc="lower left", fancybox=True, framealpha=0.5)
        
        pl.xlabel("time of day / hour UTC")
        pl.ylabel(speciestex[species] + " / " + unit)
        pl.grid()
        
        # add a top left subplot with the mean daily cycle
        fig = pl.gcf()
        ax = fig.add_subplot(111)
        subpos = [0.07, 0.625, 0.337, 0.3]
        box = ax.get_position()
        width = box.width
        height = box.height
        inax_position  = ax.transAxes.transform(subpos[0:2])
        transFigure = fig.transFigure.inverted()
        infig_position = transFigure.transform(inax_position)    
        x = infig_position[0]
        y = infig_position[1]
        width *= subpos[2]
        height *= subpos[3]  # <= Typo was here
        subax = fig.add_axes([x,y,width,height],facecolor="w")
        x_labelsize = subax.get_xticklabels()[0].get_size()
        y_labelsize = subax.get_yticklabels()[0].get_size()
        x_labelsize *= subpos[2]**0.5
        y_labelsize *= subpos[3]**0.5
        subax.xaxis.set_tick_params(labelsize=x_labelsize)
        subax.yaxis.set_tick_params(labelsize=y_labelsize)
        pl.xlim(2, 12)
        pl.ylim(-1.6 * speciesabsrange[species]/speciesabsrange["xco2"],
                1.6 * speciesabsrange[species]/speciesabsrange["xco2"])
        pl.boxplot(boxarrays_outside, positions=[i + 1 for i in hours_utc_plus3[:-1]], sym="", whis=[2.5, 97.5], notch=True, showmeans=True, bootstrap=1000)
        pl.xticks([3, 5, 7, 9, 11], ["0", "2", "4", "6", "8"])
        pl.title("from outside")
        
        # add a top right subplot with the mean daily cycle
        fig = pl.gcf()
        ax = fig.add_subplot(111)
        subpos = [0.64, 0.625, 0.337, 0.3]
        box = ax.get_position()
        width = box.width
        height = box.height
        inax_position  = ax.transAxes.transform(subpos[0:2])
        transFigure = fig.transFigure.inverted()
        infig_position = transFigure.transform(inax_position)    
        x = infig_position[0]
        y = infig_position[1]
        width *= subpos[2]
        height *= subpos[3]  # <= Typo was here
        subax = fig.add_axes([x,y,width,height],facecolor="w")
        x_labelsize = subax.get_xticklabels()[0].get_size()
        y_labelsize = subax.get_yticklabels()[0].get_size()
        x_labelsize *= subpos[2]**0.5
        y_labelsize *= subpos[3]**0.5
        subax.xaxis.set_tick_params(labelsize=x_labelsize)
        subax.yaxis.set_tick_params(labelsize=y_labelsize)
        pl.xlim(2, 12)
        pl.ylim(-1.6 * speciesabsrange[species]/speciesabsrange["xco2"],
                1.6 * speciesabsrange[species]/speciesabsrange["xco2"])
        pl.boxplot([i + 10 for i in boxarrays_inside_minus_10], positions=[i + 1 for i in hours_utc_plus3[:-1]], sym="", whis=[2.5, 97.5], notch=True, showmeans=True, bootstrap=1000)
        pl.xticks([3, 5, 7, 9, 11], ["0", "2", "4", "6", "8"])
        pl.title("from inside")
        
        pl.savefig(filename, bbox_inches='tight', transparent=False)
        pl.close()
    plot_daily_cycle_with_fit(t, ti, minidx, maxidx, filt_in_residuals, filt_out_residuals, filename=i[:-3]+"-data_by_wind_mask_ccgfilt_residuals-daily-start-" + str(min_angle) + "-stop-" + str(max_angle) + "-" + species + ".pdf")
    

    # pl.plot(filt_in_x1,filt_in_y9, label="ccgfilt residuals from inside Tokyo Area", color="navy", alpha=0.5)
    # pl.plot(filt_out_x1,filt_out_y9, label="ccgfilt residuals from outside Tokyo Area", color="magenta", alpha=0.5)
    # pl.plot(ti[minidx:maxidx].compressed(), filt_in_resid_from_func, label="filt_in_resid_from_func", alpha=0.5)
    # pl.plot(filt_in_x1, filt_in_resid_smooth, label="filt_in_resid_smooth", alpha=0.5)
    # pl.plot(filt_in_x1, filt_in_resid_trend, label="filt_in_resid_trend", alpha=0.5)
    pl.plot(t[minidx:maxidx].compressed(), filt_out_resid_from_smooth, label="filt_out_resid_from_smooth", alpha=0.5)
    pl.plot(ti[minidx:maxidx].compressed(), filt_in_resid_from_smooth, label="filt_in_resid_from_smooth", alpha=0.5)
    pl.plot(ti[minidx:maxidx].compressed(), filt_out_minus_in_from_smooth, label="filt_out_minus_in_from_smooth", alpha=0.5)
    pl.legend()
    pl.savefig(i[:-3]+"-data_by_wind_mask_ccgfilt-" + species + ".pdf", bbox_inches='tight', transparent=False)
    pl.close()
    # correlation of out and in?
    pl.scatter(filt_inside.getSmoothValue(ti[minidx:maxidx].compressed()/365), filt_out.getSmoothValue(ti[minidx:maxidx].compressed()/365), label="filt_out (y) vs. filt_in (x)", alpha=0.1)
    pl.legend()
    pl.savefig(i[:-3]+"-data_by_wind_mask_ccgfilt_out_vs_in-" + species + ".pdf", bbox_inches='tight', transparent=False)
    pl.close()

    ### ccgfilt end
    

    

    def concentration2mass(xspecies, prior_date_index, prior_gravity, pressure, speciesmolarmass, molarmassair, concentration_unit="ppm"):
        """Convert X<species> [concentration_unit] 
        into total column mass [g/m^2].
        
        Keep the mask of xspecies."""
        mass = np.zeros(xspecies.shape) # by time
        if 1500 < pressure[0] < 500: # hPa, should be 1000 at surface
            raise ValueError("pressure[0] is %f, should be around 1000 hPa" % (
                pressure[0]))
        level = 0 # at the surface
        for i in range(mass.shape[0]):
            p = pressure[i] # hPa == N/m^2 / 100 == kg/(m * s^2) / 100
            g = prior_gravity[prior_date_index[i], level] # m/s^2
            column_mass = ((p * 100) / g) * 1000 # g / m^2
            if 20e6 < column_mass < 5e6:
                raise ValueError("column_mass is %f g but should be around 10 tons" % column_mass)
            mass[i] = xspecies[i] * column_mass
            # print "pressure", p, "gravity", g, "air column mass", column_mass, "xspecies", xspecies[i], "mass", mass[i]
        if concentration_unit == "ppm":
            mass /= 1e6
        elif concentration_unit == "ppb":
            mass /= 1e9
        elif concentration_unit == "ppt":
            mass /= 1e12
        else:
            raise ValueError("unknown concentration_unit: %s" % concentration_unit)
        # adjust the mass following Bannon et. al 1997,
        # 10.1175/1520-0477(1997)078<2637:dtspet>2.0.co;2
        mass /= 0.9975
        mass = mass * (speciesmolarmass / molarmassair)
        return np.ma.masked_array(mass, mask=xspecies.mask)



    # polyfit start real fit everything
    
    xco2_ppm_trendfit = np.poly1d(xco2_ppm_trendfitparams_outside)
    degree = 6
    degree_day = 3
    fitcycle_days = 365.25

    xco2_ppm_polyfitparams = np.polyfit(time.compressed()%fitcycle_days,
                                        xco2_ppm.compressed() - xco2_ppm_trendfit(
                                            time.compressed()), degree)
    xco2_ppm_polyfit = np.poly1d(xco2_ppm_polyfitparams)
    y_days = xco2_ppm.compressed() - xco2_ppm_trendfit(time.compressed()) - xco2_ppm_polyfit(time.compressed()%fitcycle_days)
    xco2_ppm_polyfitparams_days = np.polyfit((time.compressed() + 3./24)%1,
                                             y_days,
                                             degree_day)
    xco2_ppm_polyfit_days = np.poly1d(xco2_ppm_polyfitparams_days)
    yhat_days = xco2_ppm_polyfit_days((time + 3./24)%1)
    ybar_days = np.sum(y_days)/float(y_days.shape[0])
    ssreg_days = np.sum((yhat_days-ybar_days)**2)
    sstot_days = np.sum((y_days - ybar_days)**2)
    Rsquared_days = ssreg_days / sstot_days
    # here is the real fit
    residuals = xco2_ppm - xco2_ppm_trendfit(time) - xco2_ppm_polyfit(time%fitcycle_days) - yhat_days

    ### fit against background directions
    residuals_ccgfilt_fit = filt_out.getPolyValue(time/365) + filt_out.getHarmonicValue(time/365)
    ### fit against all directions
    residuals_ccgfilt_fit = filt_all.getPolyValue(time/365) + filt_all.getHarmonicValue(time/365)
    ### try a hybrid fit as with polyfit. This does not work,
    ### therefore I do not get ccgfilt to do exactly the same thing as
    ### the polyfit.
    # residuals_ccgfilt_fit = filt_all_without_out_trend.getHarmonicValue(time/365)
    
    residuals_ccgfilt_ignoring_daily_cycle = xco2_ppm - residuals_ccgfilt_fit
    
    # remove daily cycle for ccgfilt
    residuals_ccgfilt_ignoring_daily_cycle_outside_tokyo = np.ma.masked_array(
        residuals_ccgfilt_ignoring_daily_cycle, mask=outside_tokyo_unflagged_mask)
    filt_daily = ccgfilt.ccgFilter(t.compressed()%365., # fit a day as a year
                                   residuals_ccgfilt_ignoring_daily_cycle_outside_tokyo.compressed(),
                                   numpolyterms=2, # linear
                                   numharmonics=2) # harmonic components
    residuals_ccgfilt_daily_fit = filt_daily.getPolyValue(time%365) + filt_daily.getHarmonicValue(time%365)
    residuals_ccgfilt_with_outliers = residuals_ccgfilt_ignoring_daily_cycle - residuals_ccgfilt_daily_fit

    def mask_strong_outliers(residuals):
    # mask strong outliers
        std = np.std(residuals)
        a = np.ma.masked_greater(residuals, np.median(residuals) + 5*std)
        b = np.ma.masked_less(residuals, np.median(residuals) - 5*std)
        return np.ma.masked_array(residuals, mask = np.ma.mask_or(
            np.ma.mask_or(xco2_ppm.mask, a.mask),
            b.mask))
    
    residuals = mask_strong_outliers(residuals)
    residuals_ccgfilt_inside = np.ma.masked_array(
        residuals_ccgfilt_with_outliers, mask=inside_tokyo_unflagged_mask)
    residuals_ccgfilt_outside = np.ma.masked_array(
        residuals_ccgfilt_with_outliers, mask=outside_tokyo_unflagged_mask)
    residuals_ccgfilt = mask_strong_outliers(residuals_ccgfilt_with_outliers)
    residuals_ccgfilt_ignoring_daily_cycle = mask_strong_outliers(residuals_ccgfilt_ignoring_daily_cycle)
    pl.close()
    pl.ylim(-5, 5)
    pl.ylabel("residuals / XCO$_2$ ppm")
    pl.title("Daily cycle fit for Tsukuba using ccgfilt")
    pl.xticks([3, 5, 7, 9, 11], ["0", "2", "4", "6", "8"])
    pl.xlabel("time of day / hour UTC")
    pl.plot(24 * ((time + 3./24)%1), residuals_ccgfilt_ignoring_daily_cycle, "x", label="residuals ignoring daily cycle", alpha=0.3)
    pl.plot(24 * ((time + 3./24)%1), residuals_ccgfilt_with_outliers, "+", label="residuals", alpha=0.3)
    # pl.plot(24 * ((time + 3./24)%1), residuals_ccgfilt, ",", label="residuals w/o outliers", alpha=0.3)
    pl.plot(24 * ((time + 3./24)%1), residuals_ccgfilt_daily_fit, ".", label="daily cycle fit")
    pl.legend()
    pl.savefig(i[:-3]+"-residuals-ccgfilt-check-daily-cycle.pdf")
    pl.close()
        
    residuals_years = xco2_ppm - xco2_ppm_trendfit(time) - xco2_ppm_polyfit(time%fitcycle_days)
    


    # plot ccgfilt daily cycle after fitting away the daily cycle
    
    pl.plot(24 * ((t.compressed() + 3./24)%1),residuals_ccgfilt_outside.compressed(), "x", label="ccgfilt residuals from outside Tokyo Area", alpha=0.2)
    pl.plot(24 * ((ti.compressed() + 3./24)%1),residuals_ccgfilt_inside.compressed() - 10, "+", label="ccgfilt residuals from inside Tokyo Area - 10 ppm", alpha=0.2)

    # add boxplots
    hours_utc_plus3 = [1, 3, 5, 7, 9, 11, 13]
    hours_utc_labels = ["22", "0", "2", "4", "6", "8", "10"]
    boxarrays_outside = [
        residuals_ccgfilt_outside.compressed()[
            np.logical_and(
                24 * ((t.compressed() + 3./24)%1) > i,
                24 * ((t.compressed() + 3./24)%1) < i+2)]
        for i in hours_utc_plus3[:-1]]
    boxarrays_inside_minus_10 = [
        residuals_ccgfilt_inside.compressed()[
            np.logical_and(
                24 * ((ti.compressed() + 3./24)%1) > i,
                24 * ((ti.compressed() + 3./24)%1) < i+2)] - 10
        for i in hours_utc_plus3[:-1]]
    pl.boxplot(boxarrays_outside, positions=[i + 1 for i in hours_utc_plus3[:-1]], sym="", whis=[2.5, 97.5], notch=True, showmeans=True, bootstrap=1000)
    pl.boxplot(boxarrays_inside_minus_10, positions=[i + 1 for i in hours_utc_plus3[:-1]], sym="", whis=[2.5, 97.5], notch=True, showmeans=True, bootstrap=1000)
    pl.legend(prop={'size': 6})
    # xticks shifted to show UTC, inverse of the +3./24
    pl.xticks(hours_utc_plus3, hours_utc_labels)
    pl.ylim(-19.9 * speciesabsrange[species]/speciesabsrange["xco2"],
            19.9 * speciesabsrange[species]/speciesabsrange["xco2"])

    pl.title("Average daily cycle in Tsukuba, Japan (tk), by ccgfilt, after correction")
    pl.legend(loc="lower left", fancybox=True, framealpha=0.5)

    pl.xlabel("time of day / hour UTC")
    pl.ylabel(speciestex[species] + " / " + unit)
    pl.grid()

    # add a top left subplot with the mean daily cycle
    fig = pl.gcf()
    ax = fig.add_subplot(111)
    subpos = [0.07, 0.625, 0.337, 0.3]
    box = ax.get_position()
    width = box.width
    height = box.height
    inax_position  = ax.transAxes.transform(subpos[0:2])
    transFigure = fig.transFigure.inverted()
    infig_position = transFigure.transform(inax_position)    
    x = infig_position[0]
    y = infig_position[1]
    width *= subpos[2]
    height *= subpos[3]  # <= Typo was here
    subax = fig.add_axes([x,y,width,height],facecolor="w")
    x_labelsize = subax.get_xticklabels()[0].get_size()
    y_labelsize = subax.get_yticklabels()[0].get_size()
    x_labelsize *= subpos[2]**0.5
    y_labelsize *= subpos[3]**0.5
    subax.xaxis.set_tick_params(labelsize=x_labelsize)
    subax.yaxis.set_tick_params(labelsize=y_labelsize)
    pl.xlim(2, 12)
    pl.ylim(-1.6 * speciesabsrange[species]/speciesabsrange["xco2"],
            1.6 * speciesabsrange[species]/speciesabsrange["xco2"])
    pl.boxplot(boxarrays_outside, positions=[i + 1 for i in hours_utc_plus3[:-1]], sym="", whis=[2.5, 97.5], notch=True, showmeans=True, bootstrap=1000)
    pl.xticks([3, 5, 7, 9, 11], ["0", "2", "4", "6", "8"])
    pl.title("from outside")
    
    # add a top right subplot with the mean daily cycle
    fig = pl.gcf()
    ax = fig.add_subplot(111)
    subpos = [0.64, 0.625, 0.337, 0.3]
    box = ax.get_position()
    width = box.width
    height = box.height
    inax_position  = ax.transAxes.transform(subpos[0:2])
    transFigure = fig.transFigure.inverted()
    infig_position = transFigure.transform(inax_position)    
    x = infig_position[0]
    y = infig_position[1]
    width *= subpos[2]
    height *= subpos[3]  # <= Typo was here
    subax = fig.add_axes([x,y,width,height],facecolor="w")
    x_labelsize = subax.get_xticklabels()[0].get_size()
    y_labelsize = subax.get_yticklabels()[0].get_size()
    x_labelsize *= subpos[2]**0.5
    y_labelsize *= subpos[3]**0.5
    subax.xaxis.set_tick_params(labelsize=x_labelsize)
    subax.yaxis.set_tick_params(labelsize=y_labelsize)
    pl.xlim(2, 12)
    pl.ylim(-1.6 * speciesabsrange[species]/speciesabsrange["xco2"],
            1.6 * speciesabsrange[species]/speciesabsrange["xco2"])
    pl.boxplot([i + 10 for i in boxarrays_inside_minus_10], positions=[i + 1 for i in hours_utc_plus3[:-1]], sym="", whis=[2.5, 97.5], notch=True, showmeans=True, bootstrap=1000)
    pl.xticks([3, 5, 7, 9, 11], ["0", "2", "4", "6", "8"])
    pl.title("from inside")

    pl.savefig(i[:-3]+"-data_by_wind_mask_ccgfilt_residuals-corrected-daily-" + species + ".pdf", bbox_inches='tight', transparent=False)
    pl.close()


    
    # plot_windrose_residuals(wdir_deg, residuals)
    # pl.show()
    
    # fit tokyo emissions
    residualstimeswspd = np.ma.masked_array(residuals * wspd_m_s, mask=residuals.mask)
    residualstimeswspd_ccgfilt = np.ma.masked_array(residuals_ccgfilt * wspd_m_s, mask=residuals_ccgfilt.mask)

    prior_date_index = D.variables["prior_date_index"][:]
    prior_gravity = D.variables["prior_gravity"][:] # prior_date, height
    pressure_wetair = D.variables["pout_hPa"][:] # prior_date
    molarmassair = 28.9 # g/mol
    molarmasswatervapor = 18 # g/mol
    water_vapor_fraction_dry_air = D.variables["xh2o_ppm"][:] / 1.e6 # date
    air_plus_water_vapor = 1 + water_vapor_fraction_dry_air
    air_plus_water_vapor_molarmass = (molarmassair + water_vapor_fraction_dry_air * molarmasswatervapor) / air_plus_water_vapor
    wet_air_to_dry_air_pressure_correction = molarmassair / air_plus_water_vapor_molarmass
    # expectation: up to 4% water vapor
    print("water_vapor_fraction_dry_air", water_vapor_fraction_dry_air)
    print("air_plus_water_vapor", air_plus_water_vapor)
    print("wet_air_to_dry_air_pressure_correction", wet_air_to_dry_air_pressure_correction)
    pressure = pressure_wetair * wet_air_to_dry_air_pressure_correction
    # TODO: convert wet air pressure (outer pressure) to dry air pressure
    # measured pressure = dry air column *
    #                       (molarmassair * (1 - water vapor fraction) + molarmasswatervapor * water vapor fraction)
    #                   = dry air column * molarmassair *
    #                       ((1 - water vapor fraction) + molarmasswatervapor / molarmassair * water vapor fraction))
    #                   = dry air pressure *
    #                       ((1 - water vapor fraction) + molarmasswatervapor / molarmassair * water vapor fraction))
    # dry air pressure = measured pressure * 1 / (((1 - water vapor fraction) + molarmasswatervapor / molarmassair * water vapor fraction))


    residualscolumnmasstimeswspd = concentration2mass(
        residualstimeswspd, prior_date_index,
        prior_gravity, pressure, speciesmolarmass[species],
        molarmassair,
        concentration_unit=unit)
    
    residualscolumnmasstimeswspd_ccgfilt = concentration2mass(
        residualstimeswspd_ccgfilt, prior_date_index,
        prior_gravity, pressure, speciesmolarmass[species],
        molarmassair,
        concentration_unit=unit)
    
    wspeed_low = 5
    wspeed_high = 15
    _speedmask1 = pl.where(wspd_m_s > wspeed_low, False, True)
    _speedmask2 = pl.where(wspd_m_s < wspeed_high, False, True)
    speedmask = np.ma.mask_or(_speedmask1, _speedmask2)
    bgmask = np.ma.mask_or(speedmask,
                           np.ma.mask_or(residualscolumnmasstimeswspd.mask,
                                         pl.where((wdir_deg - 30)%360 > (270 - 30), False, True)))
    bgmask_ccgfilt = np.ma.mask_or(speedmask,
                           np.ma.mask_or(residualscolumnmasstimeswspd_ccgfilt.mask,
                                         pl.where((wdir_deg - 30)%360 > (270 - 30), False, True)))
    bgmedian = np.median(np.ma.masked_array(residualscolumnmasstimeswspd, mask=bgmask).compressed())
    bgmedian_ccgfilt = np.median(np.ma.masked_array(residualscolumnmasstimeswspd_ccgfilt, mask=bgmask_ccgfilt).compressed())

    start, stop = tsukuba_tokyo_start_stop
    # calculate outer medians, left side:
    outer_wdirs_left = list(range(0, int(start)))
    medians_outer_left = []
    medians_outer_ccgfilt_left = []
    for wd in outer_wdirs_left:
        mask = np.ma.mask_or(speedmask,
                             np.ma.mask_or(residualscolumnmasstimeswspd.mask,
                                           pl.where(np.around(wdir_deg) == wd, False, True)))
        medians_outer_left.append(np.median(np.ma.masked_array(residualscolumnmasstimeswspd, mask=mask).compressed()))
        mask_ccgfilt = np.ma.mask_or(speedmask,
                             np.ma.mask_or(residualscolumnmasstimeswspd_ccgfilt.mask,
                                           pl.where(np.around(wdir_deg) == wd, False, True)))
        medians_outer_ccgfilt_left.append(np.median(np.ma.masked_array(residualscolumnmasstimeswspd_ccgfilt, mask=mask).compressed()))

        
    # store the data for later json output
    residuals_mean_data = {}
    residuals_mean_data["wspd_m_s"] = [float(DUMMY_NAME_USED_NOWHEREELSE) for DUMMY_NAME_USED_NOWHEREELSE in wspd_m_s]
    residuals_mean_data["wdir_deg"] = [float(DUMMY_NAME_USED_NOWHEREELSE) for DUMMY_NAME_USED_NOWHEREELSE in wdir_deg]
    residuals_mean_data["wdirs_outer"] = [float(DUMMY_NAME_USED_NOWHEREELSE) for DUMMY_NAME_USED_NOWHEREELSE in outer_wdirs_left]
    residuals_mean_data["medians_outer"] = [float(DUMMY_NAME_USED_NOWHEREELSE) for DUMMY_NAME_USED_NOWHEREELSE in medians_outer_left]
    residuals_mean_data["medians_outer_ccgfilt"] = [float(DUMMY_NAME_USED_NOWHEREELSE) for DUMMY_NAME_USED_NOWHEREELSE in medians_outer_ccgfilt_left]
    
    # calculate outer medians, right side:
    medians_outer_right = []
    medians_outer_ccgfilt_right = []
    outer_wdirs_right = list(range(int(stop), 360))
    for wd in outer_wdirs_right:
        mask = np.ma.mask_or(speedmask,
                             np.ma.mask_or(residualscolumnmasstimeswspd.mask,
                                           pl.where(np.around(wdir_deg) == wd, False, True)))
        medians_outer_right.append(np.median(np.ma.masked_array(residualscolumnmasstimeswspd, mask=mask).compressed()))
        mask_ccgfilt = np.ma.mask_or(speedmask,
                             np.ma.mask_or(residualscolumnmasstimeswspd_ccgfilt.mask,
                                           pl.where(np.around(wdir_deg) == wd, False, True)))
        medians_outer_ccgfilt_right.append(np.median(np.ma.masked_array(residualscolumnmasstimeswspd_ccgfilt, mask=mask).compressed()))
    
    # add the right side of the values
    residuals_mean_data["wdirs_outer"].extend([float(DUMMY_NAME_USED_NOWHEREELSE) for DUMMY_NAME_USED_NOWHEREELSE in outer_wdirs_right])
    residuals_mean_data["medians_outer"].extend([float(DUMMY_NAME_USED_NOWHEREELSE) for DUMMY_NAME_USED_NOWHEREELSE in medians_outer_right])
    residuals_mean_data["medians_outer_ccgfilt"].extend([float(DUMMY_NAME_USED_NOWHEREELSE) for DUMMY_NAME_USED_NOWHEREELSE in medians_outer_ccgfilt_right])
    
    # calculate inside means, medians, stds
    wdirs = list(range(int(start), int(stop)))
    means = []
    medians = []
    stds = []
    means_ccgfilt = []
    medians_ccgfilt = []
    stds_ccgfilt = []
    for wd in wdirs:
        # polyfit
        mask = np.ma.mask_or(speedmask,
                             np.ma.mask_or(residualscolumnmasstimeswspd.mask,
                                           pl.where(np.around(wdir_deg) == wd, False, True)))
        means.append(np.mean(np.ma.masked_array(residualscolumnmasstimeswspd, mask=mask).compressed()))
        medians.append(np.median(np.ma.masked_array(residualscolumnmasstimeswspd, mask=mask).compressed()))
        stds.append(np.std(np.ma.masked_array(residualscolumnmasstimeswspd, mask=mask).compressed()))
        # ccgfilt
        mask_ccgfilt = np.ma.mask_or(speedmask,
                             np.ma.mask_or(residualscolumnmasstimeswspd_ccgfilt.mask,
                                           pl.where(np.around(wdir_deg) == wd, False, True)))
        means_ccgfilt.append(np.mean(np.ma.masked_array(residualscolumnmasstimeswspd_ccgfilt, mask=mask_ccgfilt).compressed()))
        medians_ccgfilt.append(np.median(np.ma.masked_array(residualscolumnmasstimeswspd_ccgfilt, mask=mask_ccgfilt).compressed()))
        stds_ccgfilt.append(np.std(np.ma.masked_array(residualscolumnmasstimeswspd_ccgfilt, mask=mask_ccgfilt).compressed()))
    residuals_mean_data["fromtokyo_wdirs"] = [float(DUMMY_NAME_USED_NOWHEREELSE) for DUMMY_NAME_USED_NOWHEREELSE in wdirs]
    # polyfit
    residuals_mean_data["fromtokyo_means"] = [float(DUMMY_NAME_USED_NOWHEREELSE) for DUMMY_NAME_USED_NOWHEREELSE in means]
    residuals_mean_data["fromtokyo_stds"] = [float(DUMMY_NAME_USED_NOWHEREELSE) for DUMMY_NAME_USED_NOWHEREELSE in stds]
    residuals_mean_data["fromtokyo_medians"] = [float(DUMMY_NAME_USED_NOWHEREELSE) for DUMMY_NAME_USED_NOWHEREELSE in medians]
    residuals_mean_data["bgmedian"] = bgmedian
    # ccgfilt
    residuals_mean_data["fromtokyo_means_ccgfilt"] = [float(DUMMY_NAME_USED_NOWHEREELSE) for DUMMY_NAME_USED_NOWHEREELSE in means_ccgfilt]
    residuals_mean_data["fromtokyo_stds_ccgfilt"] = [float(DUMMY_NAME_USED_NOWHEREELSE) for DUMMY_NAME_USED_NOWHEREELSE in stds_ccgfilt]
    residuals_mean_data["fromtokyo_medians_ccgfilt"] = [float(DUMMY_NAME_USED_NOWHEREELSE) for DUMMY_NAME_USED_NOWHEREELSE in medians_ccgfilt]
    residuals_mean_data["bgmedian_ccgfilt"] = bgmedian_ccgfilt
    
    # plot polyfit
    pl.axhline(bgmedian, color="black", linewidth=4, label="median background")
    pl.axvline(270, color="k", linewidth=3, alpha=0.4, label="background limits")
    pl.axvline(30, color="k", linewidth=3, alpha=0.4)

    pl.scatter(np.ma.masked_array(wdir_deg, mask=speedmask),
               np.ma.masked_array(residualscolumnmasstimeswspd, mask=speedmask), marker="x", alpha=0.1, color="gray")
    pl.plot(outer_wdirs_left, np.array(medians_outer_left), color="k", alpha=0.3, label="median")
    pl.plot(outer_wdirs_right, np.array(medians_outer_right), color="k", alpha=0.3)
    pl.fill_between(wdirs, np.array(means)-np.array(stds), np.array(means)+np.array(stds), label="std", alpha=0.2)
    print(species, "column mass times wspd, sum of means minus median background from", start, "to", stop, ":", sum(means) - bgmedian)
    print(species, "column mass times wspd, mean of means minus median background from", start, "to", stop, ":", np.mean(means) - bgmedian, "+-", np.std(means))
    pl.plot(wdirs, np.array(medians), label="median Tokyo")
    pl.plot(wdirs, np.array(means), label="mean Tokyo")
    pl.legend(loc="upper left", fancybox=True, framealpha=0.5)
    pl.title("%s, $\\bar\Delta$=%4.1f$\pm$%4.1f %s m$^{-1}$s$^{-1}$, %d < wspd ms$^{-1}$ < %d" % (name, np.mean(means) - bgmedian, np.std(means), "g", wspeed_low, wspeed_high))
    pl.xlabel("wind / degree")
    pl.xlim(0, 360)
    pl.ylabel("$\Delta$%s $\\times$ wspd / %s m$^{-2}$ $\\times$ ms$^{-1}$"  % (speciestex[species], "g"))
    pl.grid()
    
    pl.savefig(i[:-3]+"-tsukuba-residuals-" + species + filetype, bbox_inches='tight', transparent=False)
    pl.close()

    # plot ccgfilt
    pl.axhline(bgmedian_ccgfilt, color="black", linewidth=4, label="median background")
    pl.axvline(270, color="k", linewidth=2, alpha=0.2, label="background limits")
    pl.axvline(50, color="k", linewidth=2, alpha=0.2)

    pl.scatter(np.ma.masked_array(wdir_deg, mask=speedmask),
               np.ma.masked_array(residualscolumnmasstimeswspd_ccgfilt, mask=speedmask), marker="x", alpha=0.1, color="gray")
    pl.plot(outer_wdirs_left, np.array(medians_outer_ccgfilt_left), color="k", alpha=0.3, label="median")
    pl.plot(outer_wdirs_right, np.array(medians_outer_ccgfilt_right), color="k", alpha=0.3)
    pl.fill_between(wdirs, np.array(means_ccgfilt)-np.array(stds_ccgfilt), np.array(means_ccgfilt)+np.array(stds_ccgfilt), label="std", alpha=0.2)
    print(species, "column mass times wspd, sum of means minus median background from", start, "to", stop, ":", sum(means_ccgfilt) - bgmedian_ccgfilt)
    print(species, "column mass times wspd, mean of means minus median background from", start, "to", stop, ":", np.mean(means_ccgfilt) - bgmedian_ccgfilt, "+-", np.std(means_ccgfilt))
    pl.plot(wdirs, np.array(medians_ccgfilt), label="median Tokyo")
    pl.plot(wdirs, np.array(means_ccgfilt), label="mean Tokyo")
    pl.legend(loc="upper left", fancybox=True, framealpha=0.5)
    pl.title("%s, $\\bar\Delta$=%4.1f$\pm$%4.1f %s m$^{-1}$s$^{-1}$, %d < wspd ms$^{-1}$ < %d | ccgfilt" % (name, np.mean(means_ccgfilt) - bgmedian_ccgfilt, np.std(means_ccgfilt), "g", wspeed_low, wspeed_high))
    pl.xlabel("wind / degree")
    pl.xlim(0, 360)
    pl.ylabel("$\Delta$%s $\\times$ wspd / %s m$^{-2}$ $\\times$ ms$^{-1}$"  % (speciestex[species], "g"))
    pl.grid()
    
    pl.savefig(i[:-3]+"-tsukuba-residuals--ccgfilt-" + species + filetype, bbox_inches='tight', transparent=False)
    pl.close()


    # save the data as json
    with open(i[:-3] + "-tsukuba-residuals" + species + ".json", "w") as f:
        json.dump(residuals_mean_data, f)




    
    # plot the fits
    pl.plot(time/365.25 + 1970., xco2_ppm, label="data", color="royalblue")
    # pl.plot(time/365.25 + 1970., xco2_ppm_trendfit(time) + xco2_ppm_polyfit(time%fitcycle_days), label="fit trend + yearly")

    minidx = np.searchsorted(time, int(time[0]//365.25 + 1)*365.25)
    maxidx = np.searchsorted(time, int(time[-1]//365.25)*365.25)
    if len(time[minidx:maxidx].compressed()) < 2:
        minidx, maxidx = 0, len(time)
    pl.plot(time[minidx:maxidx]/365.25 + 1970., xco2_ppm_trendfit(time[minidx:maxidx]), label="trend", color="red")
    pl.plot(time/365.25 + 1970., xco2_ppm - xco2_ppm_trendfit(time) + xco2_ppm_trendfit(time[0]), label="detrended (data - trend)", color="deepskyblue")
    
    pl.plot(time/365.25 + 1970., xco2_ppm_polyfit(time%fitcycle_days) + xco2_ppm_trendfit(time[0]), label="yearly cycle fit, degree %d" % degree, color="magenta")
    shift = max(1e-12, xco2_ppm_trendfit(time[0]) - 6 * np.std(residuals))
    # round shift to give clearer output
    if math.log10(shift) < 0:
        shift = np.around(shift, -int(math.log10(shift)) + 2)
    else:
        shift = np.around(shift, -int(math.log10(shift)) + 1)
    pl.plot(time/365.25 + 1970, residuals_years + shift, label="residuals (data - fit), %+3.1f" % shift + unit, color="green")
    pl.legend(loc="best", fancybox=True, framealpha=0.5)
    pl.title("Trend and average seasonal cycle in Tsukuba, Japan (tk)")
    pl.xlabel("time / years")
    pl.xlim(41.5 + 1970, 46.5 + 1970)
    pl.text(0.98, 0.015, ( 'trend: %.3f ppm per year\n' % (365.25*xco2_ppm_trendfitparams_outside[0],)) +
            ('fitparams: ') +
            (('%.1e, ' * len(xco2_ppm_polyfitparams)) % tuple([365.25*k for k in xco2_ppm_polyfitparams[:-1]] + [xco2_ppm_polyfitparams[0]]))[:-2],
            verticalalignment='bottom', horizontalalignment='right',
            transform=pl.gca().transAxes, color='gray', fontsize=9)
    pl.axvline(time[minidx]/365.25 + 1970, color="y", linewidth=2, alpha=0.4, label="lower fit limit")
    pl.axvline(time[maxidx]/365.25 + 1970, color="m", linewidth=2, alpha=0.4, label="upper fit limit")

    pl.ylabel(speciestex[species] + " / " + unit)
    pl.gca().get_xaxis().get_major_formatter().set_useOffset(False)
    pl.grid()
    pl.savefig(i[:-3]+"-fit-" + species + filetype, bbox_inches='tight', transparent=False)
    pl.close()

    # plot daily cycle
    daily_change = xco2_ppm - xco2_ppm_trendfit(time) - xco2_ppm_polyfit(time%fitcycle_days)
    shift = 6 * np.std(daily_change)
    # round shift to give clearer output
    if math.log10(shift) < 0:
        shift = np.around(shift, -int(math.log10(shift)) + 1)
    else:
        shift = np.around(shift, -int(math.log10(shift)))
    pl.plot(((time + 3./24)%1) * 24, daily_change, ",", label="data (w/o trend, w/o yearly cycle)", alpha=0.3)
    pl.plot(((time + 3./24)%1) * 24, residuals - shift, ",", label="residuals (data - fit) - %3.1f" % shift, alpha=0.3)
    pl.plot(((time + 3./24)%1) * 24, xco2_ppm_polyfit_days((time + 3./24)%1), ".", label="daily cycle fit, degree %d" % (degree_day, ), alpha=0.5)
    pl.xticks([3, 5, 7, 9, 11], ["0", "2", "4", "6", "8"])
    pl.ylim(-19.9 * speciesabsrange[species]/speciesabsrange["xco2"],
            19.9 * speciesabsrange[species]/speciesabsrange["xco2"])
    pl.title("Average daily cycle in Tsukuba, Japan (tk)")
    pl.legend(loc="upper left", fancybox=True, framealpha=0.5)

    pl.xlabel("time of day / hour UTC")
    pl.ylabel(speciestex[species] + " / " + unit)
    pl.grid()
    pl.text(0.98, 0.015, ('\nfitparams: ') +
            (('%.2e, ' * len(xco2_ppm_polyfitparams_days)) % tuple(xco2_ppm_polyfitparams_days))[:-2] + '; R^2: %3.3f' % (Rsquared_days, ),
            verticalalignment='bottom', horizontalalignment='right',
            transform=pl.gca().transAxes, color='gray', fontsize=8)
    # pl.show()

    # add a subplot with the mean daily cycle
    fig = pl.gcf()
    ax = fig.add_subplot(111)
    subpos = [0.64, 0.625, 0.335, 0.34]
    box = ax.get_position()
    width = box.width
    height = box.height
    inax_position  = ax.transAxes.transform(subpos[0:2])
    transFigure = fig.transFigure.inverted()
    infig_position = transFigure.transform(inax_position)
    x = infig_position[0]
    y = infig_position[1]
    width *= subpos[2]
    height *= subpos[3]  # <= Typo was here
    subax = fig.add_axes([x,y,width,height],facecolor="w")
    x_labelsize = subax.get_xticklabels()[0].get_size()
    y_labelsize = subax.get_yticklabels()[0].get_size()
    x_labelsize *= subpos[2]**0.5
    y_labelsize *= subpos[3]**0.5
    
    subax.xaxis.set_tick_params(labelsize=x_labelsize)
    subax.yaxis.set_tick_params(labelsize=y_labelsize)
    pl.xlim(2, 12)
    pl.ylim(-1.2 * speciesabsrange[species]/speciesabsrange["xco2"],
            1.2 * speciesabsrange[species]/speciesabsrange["xco2"])
    pl.plot(((time + 3./24)%1) * 24, daily_change, ",", label="data", alpha=0.3)
    pl.plot(((time + 3./24)%1) * 24, xco2_ppm_polyfit_days((time + 3./24)%1), ".", label="daily cycle fit", alpha=0.5, color="g")

    # add a boxplot
    hours_utc_plus3 = [3, 5, 7, 9, 11]
    hours_utc_labels = ["0", "2", "4", "6", "8"]
    boxarrays = [
        daily_change[
            np.logical_and(
                24 * ((time + 3./24)%1) > i,
                24 * ((time + 3./24)%1) < i+2)]
        for i in hours_utc_plus3[:-1]]
    pl.boxplot(boxarrays, positions=[i + 1 for i in hours_utc_plus3[:-1]], sym="", whis=[2.5, 97.5], notch=True, showmeans=True, bootstrap=1000)

    
    pl.xticks(hours_utc_plus3, hours_utc_labels)
    pl.legend(loc="best", fancybox=True, framealpha=0.7)

    pl.savefig(i[:-3]+"-fit-daily-" + species + filetype, bbox_inches='tight', transparent=False)
    pl.close()

    # radial plot
    ax = pl.subplot(111, projection='polar')
    b = ax.scatter(x=(wdir_deg%360) / 360. * np.pi * 2,
                   y=residuals + speciesabsrange[species],
                   c=((time/365.25) + 1970),
                   s=wspd_m_s*10,
                   alpha=1./math.log10(len(residuals) + 10),
                   cmap=pl.cm.Paired) # marker=safemarkersfilled[n], linestyle='None', alpha=0.1)
    ax.set_rmax(2 * (speciesabsrange[species]))
    ax.set_rticks([t * speciesabsrange[species]/10. for t in range(5, 20, 5)])
    ax.set_yticklabels([str(l * speciesabsrange[species] / 10.) for l in range(-5, 10, 5)])  # less radial ticks
    # ax.set_rlabel_position(0) # show radial labels on zero-line
    ax.grid(True)
    cb = pl.colorbar(b)
    cb.formatter.set_useOffset(False)
    cb.set_alpha(0.5)
    cb.draw_all()
    cb.update_ticks()
    cb.set_label("time / year")
    if time[-1] - 730 > time[0]:
        cb.set_ticks(list(range(1970, 2100))) # for two or more years simply show integers
    else:
        cb.set_ticks([l/4. for l in range(1970*4, 2100*4)]) # for less than two years show quarters
    
    ax.set_title("Residuals by wind direction $\Delta$" + speciestex[species] + " (y - fit) / %s, %s (%s)" % (unit, name, name_short), va='bottom')
    pl.legend(loc="best", fancybox=True, framealpha=0.5)
    # always store this as .png, not as `filetype`, since the pdf would be huge.
    pl.savefig(i[:-3]+"-radial-" + species + ".png", bbox_inches='tight', transparent=False)
    pl.close()

    # radial plot
    ax = pl.subplot(111, projection='polar')
    b = ax.scatter(x=(wdir_deg%360) / 360. * np.pi * 2,
                   y=residuals + speciesabsrange[species],
                   c=(12 * ((time/365.25)%1)),
                   s=wspd_m_s*10,
                   alpha=1./math.log10(len(residuals) + 10),
                   cmap=pl.cm.Paired) # marker=safemarkersfilled[n], linestyle='None', alpha=0.1)
    ax.set_rmax(2 * (speciesabsrange[species]))
    ax.set_rticks([t * speciesabsrange[species]/10. for t in range(5, 20, 5)])
    ax.set_yticklabels([str(l * speciesabsrange[species] / 10.) for l in range(-5, 10, 5)])  # less radial ticks
    # ax.set_rlabel_position(0) # show radial labels on zero-line
    ax.grid(True)
    cb = pl.colorbar(b)
    cb.formatter.set_useOffset(False)
    cb.set_alpha(0.5)
    cb.draw_all()
    cb.update_ticks()
    cb.set_label("time / month")
    cb.set_ticks(list(range(0, 12))) # for two or more years simply show integers
    ax.set_title("Residuals by wind direction $\Delta$" + speciestex[species] + " (y - fit) / %s, %s (%s)" % (unit, name, name_short), va='bottom')
    pl.legend(loc="best", fancybox=True, framealpha=0.5)
    # always store this as .png, not as `filetype`, since the pdf would be huge.
    pl.savefig(i[:-3]+"-radial-month-" + species + ".png", bbox_inches='tight', transparent=False)
    pl.close()

    
    plot_hexbin_by_direction_composite(wdir_deg, wspd_m_s, residuals, time,
                                       species=species, speciestexstring=speciestex[species],
                                       scale=speciesabsrange[species] / float(speciesabsrange["xco2"]),
                                       unit=unit, name=name, name_short=name_short,
                                       filename=i[:-3]+"-hexbin-wind-speed-" + species + filetype)

    # always store residuals by direction as .png, not as `filetype`, since the pdf would be huge.
    plot_residuals_by_direction_composite(wdir_deg, wspd_m_s, residuals, time,
                                          species=species, speciestexstring=speciestex[species],
                                          scale=speciesabsrange[species] / float(speciesabsrange["xco2"]),
                                          unit=unit, filename=i[:-3]+"-radial-wind-speed-" + species + ".png")
    
    plot_hexbin_by_direction_composite(wdir_deg, wspd_m_s, residuals_ccgfilt, time,
                                       species=species, speciestexstring=speciestex[species],
                                       scale=speciesabsrange[species] / float(speciesabsrange["xco2"]),
                                       unit=unit, name=name, name_short=name_short,
                                       filename=i[:-3]+"-hexbin-wind-speed--ccgfilt-" + species + filetype)

    # always store residuals by direction as .png, not as `filetype`, since the pdf would be huge.
    plot_residuals_by_direction_composite(wdir_deg, wspd_m_s, residuals_ccgfilt, time,
                                          species=species, speciestexstring=speciestex[species],
                                          scale=speciesabsrange[species] / float(speciesabsrange["xco2"]),
                                          unit=unit, filename=i[:-3]+"-radial-wind-speed--ccgfilt-" + species + ".png")
    
    for nbin in range(360 // binsize_deg):
        w = np.ma.masked_outside(wdir_deg, nbin*binsize_deg, (nbin+1)*binsize_deg)
        t = np.ma.masked_array(time, mask=w.mask) # days since 1970
        # dco2peryear_ppm = 1.9
        # c = np.ma.masked_array(xco2_ppm, mask=w.mask) - (t/fitcycle_days.25 * dco2peryear_ppm)
        c = np.ma.masked_array(residuals, mask=w.mask)
        e = np.ma.masked_array(xco2_ppm_error, mask=w.mask)
        if nbin not in labelled:
            # add the label, but without transparency
            pl.plot(np.NaN, np.NaN,
                    marker=safemarkersfilled[n], linestyle='None', color=colors[list(colors.keys())[nbin]],
                    label="{}$^\circ$ < wdir < {}$^\circ$".format(nbin*binsize_deg, (nbin+1)*binsize_deg))
            labelled.add(nbin)
        pl.plot((t%1) * 24, c,
                marker=safemarkersfilled[n], linestyle='None', color=colors[list(colors.keys())[nbin]],
                alpha=1./math.log10(sum(t.mask)))
    leg = pl.legend(loc="best", fancybox=True, framealpha=0.5)
    pl.title("Residuals in %s (%s)" % (name, name_short))
    pl.xlabel("time of day / hour")
    pl.ylabel("$\Delta$" + speciestex[species] + " (measurement minus fit) / " + unit)
    # always store this file as .png, not as `filetype`, since the pdf would be large (3MiB) and I do not need it for publication.
    pl.savefig(i[:-3]+"-daily-by-wdir-" + species + ".png", bbox_inches='tight', transparent=False)
    pl.close()
    labelled = set()
