# SPD2CSV.py for python 3.8
# Dave Typinski, May 22, 2021
#
# Finds all SPD files in and below data root directory.  Converts header
# info to text format and data to CSV format.  Saves CSV and TXT files in
# same directory as source SPD file.
#
# NOTE: Works only with SPD files using UTC time stamps and floating point
# data. SPD files without time stamps and/or SPD files with integer data
# and/or SPD files with non-UTC (i.e., local time) time stamps are NOT
# supported by this routine.
#
# Inputs (configured below)
#   data_path -> string, full path to the directory of SPD files
#   overwrite -> boolean, if TRUE, old CSV files are overwritten
#
# Output
#   One CSV and one TXT file per input SPD file. Output files are named the
#	same as the source SPD file, but with different file extensions.
#       CSV output file format:
#           [Column 00] -> float, Julian date (JDUT) time stamp
#           [Column 01] -> float, 4-digit year time stamp 
#           [Column 02] -> float, 2-digit month time stamp 
#           [Column 03] -> float, 2-digit day time stamp 
#           [Column 04] -> float, 2-digit hour time stamp 
#           [Column 05] -> float, 2-digit minute time stamp 
#           [Column 06] -> float, 2.3-digit second.milisecond time stamp 
#           [Column 07] -> float, Channel 1 data 
#           [Column 08] -> float, Channel 2 data 
#           [Column 09] -> float, Channel 3 data 
#           [Column 10] -> float, Channel 4 data 
#           [Column 11] -> float, Channel 5 data 
#           [Column 12] -> float, Channel 6 data 
#           [Column 13] -> float, Channel 7 data 
#           [Column 14] -> float, Channel 8 data

import numpy as np
import csv
from pathlib import Path
import os
import struct
import re
import pandas as pd
from itertools import product
import matplotlib.pyplot as plt
from decimal import Decimal, ROUND_HALF_UP
import datetime

# Start of user-definable constants ---------------------------------------

datapath = Path('Z:/AJ4CO_OL_GC')

overwrite = False

# End of user-definable constants------------------------------------------

SPDfiles = []
SPDcounter = 0

def vbtime2dtime(vb_time):
    # Convert visual basic time to "normal".
    i,f = divmod(vb_time, 1) # split integer/fractional parts
    ymd = (i - 25569) * 86400.0
    return datetime.datetime.utcfromtimestamp(ymd+(f*86400))

# Find all SPD files in and below datapath directory------------------------

print()
print('Searching for SPD files in', str(datapath)+os.path.sep
      ,'(Overwrite CSV =',str(overwrite)+')')

for path, subdirs, files in os.walk(datapath):
    for name in files:
        if str(name).rsplit('.',1)[1].upper() == 'SPD':
            SPDcounter += 1
            fname = os.path.join(path, name)
            # see if CSV file already exists; if so, skip to next file
            if overwrite == False:
                if os.path.exists(fname.rsplit('.',1)[0]+'.csv'):
                    print(name.rsplit('.',1)[0]
                         +'.csv exists, skipping file', end='\r')
                else:
                    print(name.rsplit('.',1)[0]
                          +'.csv not found            ', end='\r')
                    SPDfiles.append(fname)
            else:
                print(name,' found', end='\r')
                SPDfiles.append(fname)

print('\nFound',f'{len(SPDfiles):,d}','SPD files to process out of'
      ,f'{SPDcounter:,d}','SPD files total')

# Process all SPD files that need processing--------------------------------
for ptr, SPDfile in enumerate(SPDfiles, 1):
    
    print(f'Processing file {ptr:,d} of {len(SPDfiles):,d} -> {SPDfile}'
          ,end='\r')
    
    # Process SPD file header and write to text file------------------------
    
    # Generate blank header list
    header = [['      Version: ',''],
              ['   Start Time: ',''],
              ['     End Time: ',''],
              ['     Latitude: ',''],
              ['    Longitude: ',''],
              ['        Max Y: ',''],
              ['        Min Y: ',''],
              ['    Time Zone: ',''],
              ['       Source: ',''],
              ['       Author: ',''],
              ['   Local Name: ',''],
              ['     Location: ',''],
              ['Channel Count: ',''],
              ['  Note Length: ',''],
              ['\nNote: ','']]
    
    # Open SPD file
    fp = open(SPDfile, 'rb')
    filesize = os.stat(SPDfile).st_size

    # Read Version
    header[0][1] = fp.read(10).decode('utf8', 'ignore')
    
    # Read Chart Start & End Times and truncate to milliseconds
    header[1][1] = str(vbtime2dtime(struct.unpack('d', fp.read(8))[0]))[:-3]
    header[2][1] = str(vbtime2dtime(struct.unpack('d', fp.read(8))[0]))[:-3]
    
    # Read Lat & Lon
    header[3][1] = f'{struct.unpack("d", fp.read(8))[0]:.6f}'
    header[4][1] = f'{struct.unpack("d", fp.read(8))[0]:.6f}'
    
    # Read Max & Min Y
    header[5][1] = f'{struct.unpack("d", fp.read(8))[0]:.3f}'
    header[6][1] = f'{struct.unpack("d", fp.read(8))[0]:.3f}'
    
    # Read Time Zone
    header[7][1] = f'{struct.unpack("h", fp.read(2))[0]:02d}'
    if int(header[7][1]) == 0:
        header[1][1] += ' UTC'
        header[2][1] += ' UTC'
    elif int(header[7][1]) > 0:
        header[1][1] += ' UTC+' + header[7][1]
        header[2][1] += ' UTC+' + header[7][1]
    else:
        header[1][1] += ' UTC-' + header[7][1]
        header[2][1] += ' UTC-' + header[7][1]
        
    # Read Source, Author, Local Name, Location
    header[8][1] = fp.read(10).decode('utf8', 'ignore')
    header[9][1] = fp.read(20).decode('utf8', 'ignore')
    header[10][1] = fp.read(20).decode('utf8', 'ignore')
    header[11][1] = fp.read(40).decode('utf8', 'ignore')
    
    # Read Channel count
    channelcount = struct.unpack('h', fp.read(2))[0]
    header[12][1] = str(channelcount)
        
    # Read Note length (bytes)
    notelength = struct.unpack('i', fp.read(4))[0]
    header[13][1] = str(notelength)

    # Read Note text & get rid of extraneous newlines
    header[14][1] = '\n\n' + fp.read(notelength).decode('utf8', 'ignore')
    header[14][1] = re.sub(r'[\r\n][\r\n]{2,}', '\n\n', header[14][1])
    
    # Write header data to text file
    headerfile = SPDfile.rsplit('.',1)[0]+'.txt'
    with open(headerfile,'w+') as fh:
        for line in header:
            fh.write(line[0]+line[1]+'\n')

    # Process SPD file data and write to CSV file---------------------------
    
    # Calculate size of data (bytes)
    datasize = filesize-fp.tell()

    # Calculate number of records present in data
    recordcount = int(datasize/(8*(channelcount + 1)))

    # Parse each record's time stamp and channel data, append each record
    # as a new row to the 2D data list
    data = []
    for i in range(0,recordcount):
        record = []
        for j in range(0, channelcount+1):
            d = struct.unpack('d', fp.read(8))[0]
            if j == 0:
                a = d + 2415018.5 # covert VB date to JDUT
                record.append(f'{a:.8f}')
                y = vbtime2dtime(d).strftime('%Y')
                record.append(y)
                m = vbtime2dtime(d).strftime('%m')
                record.append(m)
                a = vbtime2dtime(d).strftime('%d')
                record.append(a)
                h = vbtime2dtime(d).strftime('%H')
                record.append(h)
                m = vbtime2dtime(d).strftime('%M')
                record.append(m)
                s = (vbtime2dtime(d).strftime('%S')
                     +'.'+vbtime2dtime(d).strftime('%f')[:3])
                record.append(s)
            else:
                record.append(int(d)) # handle occasional non-integer count
        data.append(record)
     
    # Close SPD input file
    fp.close()
    
    # save data list to CSV file
    datafile = SPDfile.rsplit('.',1)[0]+'.csv'
    with open(datafile,'w+', newline='') as data_csv:
        csvWriter = csv.writer(data_csv, delimiter=',')
        csvWriter.writerows(data)

if len(SPDfiles) > 0:
    print('\nDone converting SPD files to CSV files.')
else:
    print('Done converting SPD files to CSV files.')

# SPDCSV21minSums.py for python 3.8
# Dave Typinski, May 24, 2021
#
# Finds all SPDCSV files in and below data root directory.  For each input
# CSV file, sums data into 1-minute bins and saves a new CSV file and PNG
# and PDF plot files.
#
# NOTE: Works only with CSV files created using SPD2CSV.py.
#
# Inputs (configured below)
#   data_path -> string, full path to the directory of SPDCSV files
#   overwrite -> boolean, if TRUE, old CSV/PNG/PDF files are overwritten
#
# Output
#   One CSV, PNG, and PDF file per input CSV file. Output files are named
#   the same as the source CSV file + '_1_minute_sums'.
#       CSV output file record format:
#           [Column 0] -> float, JDUT
#           [Column 1] -> int, year
#           [Column 2] -> int, month
#           [Column 3] -> int, day
#           [Column 4] -> int, minute of day in hhmm format
#           [Column 5] -> int, number of samples summed in this record
#           [Column 6] -> int, sum of all source data samples in one minute
#       PNG and PDF files contain a plot of the one-minute sums

SPDfiles = []
SPDcounter = 0
CSVheaderlist = ['JDUT','Year','Month','Day','Hour','Minute','Second'
                 ,'CH1Data','CH2Data','CH3Data','CH4Data'
                 ,'CH5Data','CH6Data','CH7Data','CH8Data']

# Find all SPD files in and below datapath directory------------------------

print()
print('Searching for SPD files in', str(datapath)+os.path.sep
      ,'(Overwrite 1-minute CSV =',str(overwrite)+')')

for path, subdirs, files in os.walk(datapath):
    for name in files:
        if str(name).rsplit('.',1)[1].upper() == 'SPD':
            SPDcounter += 1
            fname = os.path.join(path, name)
            # see if CSV file already exists; if so, skip to next file
            if overwrite == False:
                if os.path.exists(fname.rsplit('.',1)[0]
                                  +'_1_minute_sums.csv'):
                    print(name.rsplit('.',1)[0]
                         +'_1_minute_sums.csv exists, skipping file'
                          , end='\r')
                else:
                    print(name.rsplit('.',1)[0]
                          +'_1_minute_sums.csv not found            '
                          , end='\r')
                    SPDfiles.append(fname)
            else:
                print(name,' found', end='\r')
                SPDfiles.append(fname)

print('\nFound',f'{len(SPDfiles):,d}','SPDCSV files to process out of'
      ,f'{SPDcounter:,d}','SPDCSV files total')

# Process all SPD files that need processing--------------------------------
for ptr, SPDfile in enumerate(SPDfiles, 1):
    
    CSVinfile = SPDfile.rsplit('.',1)[0] + '.csv'
    CSVoutfile = SPDfile.rsplit('.',1)[0] + '_1_minute_sums.csv'
    PNGoutfile = SPDfile.rsplit('.',1)[0] + '_1_minute_sums.png'
    PDFoutfile = SPDfile.rsplit('.',1)[0] + '_1_minute_sums.pdf'
    
    print(f'Processing file {ptr:,d} of {len(SPDfiles):,d} -> {CSVoutfile}'
          ,end='\r')

    # Open and read SPDCSV file
    dfin = pd.read_csv(CSVinfile, names=CSVheaderlist)
    
    # create blank output data lists
    data = []
    
    # iterate over hours and minutes, summing CH1 data within each minute
    for h,m in product(range(24),range(60)):
        record = []
        
        datain = dfin.loc[(dfin.Hour == h) & (dfin.Minute == m)]
        
        record.append(f'{datain.iloc[0,0]:.8f}') # JDUT
        record.append(datain.iloc[0,1]) # year
        record.append(datain.iloc[0,2]) # month
        record.append(datain.iloc[0,3]) # day
        hrsmins = f'{h*100 + m:04}'
        record.append(hrsmins)
        
        datapointcount = len(datain)
        record.append(datapointcount)
        
        minutesum = int(datain.iloc[:,7].sum())
        record.append(minutesum)     
        
        data.append(record)
    
    # save data list to CSV file
    with open(CSVoutfile,'w+', newline='') as data_csv:
        csvWriter = csv.writer(data_csv, delimiter=',')
        csvWriter.writerows(data)

    # save daily plot to PNG and PDF files
    
    # create lists of x and y values & calculate some stats
    xs = np.arange(1,1441)
    ys = [row[6] for row in data]
    ymedian = int(Decimal(np.median(ys))
                  .to_integral_value(rounding=ROUND_HALF_UP))
    ymean = np.mean(ys)
    ystdev = np.std(ys)
    yscov = ystdev/ymean
    
    # create figure and axes
    fig, ax = plt.subplots(figsize=(8, 5))
    ax.plot(ys, color='navy', linewidth=0.7)
    
    # set plot limits
    plt.xlim([0,1440])
    plt.ylim([0,100])

    # configure y-axis ticks and grid lines
    ax.set_yticks(np.arange(0,101,20), minor=False)
    ax.set_yticks(np.arange(0,101,5), minor=True)
    ax.yaxis.grid(True, which='major', color='gray', lw=0.5)
    
    # configure x-axis ticks and grid lines
    xtloc = np.arange(start=0, stop=1441, step=180)
    xtlab = ['0000', '0300', '0600', '0900', '1200',
             '1500', '1800', '2100', '0000']
    ax.tick_params(axis='x', which='major', pad=8)
    plt.xticks(xtloc,xtlab, rotation=0, fontsize="10", va="center")
    ax.vlines(x=xtloc, ymin=0, ymax=100, color='gray', lw=0.5)
    
    # configure plot title and axis labels
    plt.suptitle('Observed Gamma Counts, AJ4CO, '
              +str(dfin.iat[0,1])+' '
              +f'{dfin.iat[0,2]:02d}' + ' '
              +f'{dfin.iat[0,3]:02d}')
    plt.title('µ = ' + f'{ymean:.1f}' + ' cpm'
              +'      median = ' + f'{ymedian:0d}' + ' cpm'
              +'      σ = ' + f'{ystdev:.1f}' + ' cpm'
              +'      σ/µ = ' + f'{yscov:.0%}',
              fontsize=10, y=1.01)
    plt.xlabel('UTC')
    plt.ylabel('Counts per Minute (cpm)')
       
    #plt.show()

    # Save the PNG and PDF plots
    plt.savefig(PNGoutfile, bbox_inches = 'tight', pad_inches = 0.06)
    plt.savefig(PDFoutfile, bbox_inches = 'tight', pad_inches = 0.06)

    plt.close('all')

if len(SPDfiles) > 0:
    print('\nDone creating 1-minutes sums files.')
else:
    print('Done creating 1-minutes sums files.')

# SPDCSV2DailyMeans.py for python 3.8
# Dave Typinski, May 24, 2021
#
# Finds all SPDCSV files in and below data root directory.  Finds the mean
# of each 1-minute bins CSV file and appends to a daily means CSV file.
# Saves CSV file and PNG and PDF plot files.
#
# NOTE: Works only with CSV files created using SPD2CSV.py.
#
# Inputs (configured below)
#   data_path -> string, full path to the directory of SPDCSV files
#   overwrite -> boolean, if TRUE, old CSV/PNG/PDF files are overwritten
#
# Output
#   One CSV, PNG, and PDF file per input CSV file. Output files are named
#   the same as the source CSV file + '_1_minute_sums'.
#       CSV output file record format:
#           [Column 0] -> float, JDUT
#           [Column 1] -> int, year
#           [Column 2] -> int, month
#           [Column 3] -> int, day
#           [Column 4] -> int, number of samples in mean for this record
#           [Column 5] -> float, mean of daily one-minute sums file

SPDfiles = []
SPDcounter = 0
CSVheaderlist = ['JDUT','Year','Month','Day','Hour','hhmm','samples','sum']

# Find all SPD files in and below datapath directory------------------------

print()
print('Searching for SPD files in', str(datapath)+os.path.sep
      ,'(Overwrite 1-minute CSV =',str(overwrite)+')')

for path, subdirs, files in os.walk(datapath):
    for name in files:
        if str(name).rsplit('.',1)[1].upper() == 'SPD':
            SPDcounter += 1
            fname = os.path.join(path, name)
            # see if CSV file already exists; if so, skip to next file
            if overwrite == False:
                testCSV = fname.rsplit('.',1)[0] + '_daily_means.csv'
                if os.path.exists(testCSV):
                    print(name.rsplit('.',1)[0]
                          + '_daily_means.csv exists, skipping file'
                          , end='\r')
                    lastCSV = testCSV
                else:
                    print(name.rsplit('.',1)[0]
                          + '_daily_means.csv not found            '
                          , end='\r')
                    SPDfiles.append(fname)
            else:
                print(name,' found', end='\r')
                SPDfiles.append(fname)

print('\nFound',f'{len(SPDfiles):,d}','SPDCSV files to process out of'
      ,f'{SPDcounter:,d}','SPDCSV files total')

# Process all SPD files that need processing--------------------------------
data = []

# If not re-process all data files, read the last daily means file 
if overwrite == False:
    with open(lastCSV,'r')as inCSV:
        inCSVdata = csv.reader(inCSV)
        for row in inCSVdata:
            data.append(row)
    
for ptr, SPDfile in enumerate(SPDfiles, 1):
    
    CSVinfile = SPDfile.rsplit('.',1)[0] + '_1_minute_sums.csv'
    CSVoutfile = SPDfile.rsplit('.',1)[0] + '_daily_means.csv'
    PNGoutfile = SPDfile.rsplit('.',1)[0] + '_daily_means.png'
    PDFoutfile = SPDfile.rsplit('.',1)[0] + '_daily_means.pdf'
    
    print(f'Processing file {ptr:,d} of {len(SPDfiles):,d} -> {CSVoutfile}'
          ,end='\r')

    # Open and read SPDCSV file
    dfin = pd.read_csv(CSVinfile, names=CSVheaderlist)
    
    # iterate over hours and minutes, summing CH1 data within each minute
    record = []
    record.append(f'{dfin.iloc[0,0]:.8f}') # JDUT
    record.append(dfin.iloc[0,1]) # year
    record.append(dfin.iloc[0,2]) # month
    record.append(dfin.iloc[0,3]) # day
    record.append(len(dfin.index)) #count of day's records (usually 1440)
    record.append(dfin.iloc[:,6].mean()) # day's mean
    data.append(record)
    
    # save data list to CSV file
    with open(CSVoutfile,'w+', newline='') as data_csv:
        csvWriter = csv.writer(data_csv, delimiter=',')
        csvWriter.writerows(data)
    
    # save daily plot to PNG and PDF files
    
    # create lists of x and y values & calculate some stats
    # convert Y, M, D to python dates for x values
    xs = [datetime.datetime(int(row[1]),
                            int(row[2]),
                            int(row[3])) for row in data]
    ys = [float(row[5]) for row in data]
    ymedian = int(Decimal(np.median(ys))
                  .to_integral_value(rounding=ROUND_HALF_UP))
    ymean = np.mean(ys)
    ystdev = np.std(ys)
    yscov = ystdev/ymean
    
    # create figure and axes
    fig, ax = plt.subplots(figsize=(8, 5))
    ax.plot(xs, ys, color='navy', linewidth=0.7)

    plt.gcf().subplots_adjust(bottom=0.20)
    
    # set plot limits
    # plt.xlim([xs[0],xs[-1]])
    plt.ylim([15,18.5])

    # configure y-axis ticks and grid lines
    ax.set_yticks(np.arange(15,19,0.5), minor=False)
    #ax.set_yticks(np.arange(15,20.1,0.1), minor=True)
    ax.yaxis.grid(True, which='major', color='gray', lw=0.5)
    
    # configure x-axis ticks and grid lines
    #xtloc = np.arange(start=0, stop=1441, step=180)
    #xtlab = ['0000', '0300', '0600', '0900', '1200',
    #         '1500', '1800', '2100', '0000']
    #ax.tick_params(axis='x', which='major', pad=8)
    #plt.xticks(xtloc,xtlab, rotation=0, fontsize="10", va="center")
    #ax.vlines(x=xtloc, ymin=0, ymax=100, color='gray', lw=0.5)
    ax.xaxis.grid(True, which='major', color='gray', lw=0.5)
    plt.xticks(rotation=45, ha='right')
    
    # configure plot title and axis labels
    plt.suptitle('Observed Daily Mean Gamma Counts, AJ4CO')
    plt.title('µ = ' + f'{ymean:.1f}' + ' cpm'
              +'      median = ' + f'{ymedian:0d}' + ' cpm'
              +'      σ = ' + f'{ystdev:.1f}' + ' cpm'
              +'      σ/µ = ' + f'{yscov:.0%}',
              fontsize=10, y=1.01)
    plt.xlabel('Date')
    plt.ylabel('Mean Counts per Minute (cpm)')
    
    # save the PNG and PDF plots
    plt.savefig(PNGoutfile, bbox_inches = 'tight', pad_inches = 0.06)
    plt.savefig(PDFoutfile, bbox_inches = 'tight', pad_inches = 0.06)

    plt.close('all')
    
if len(SPDfiles) > 0:
    print('\nDone creating daily means files.\n')
else:
    print('Done creating daily means files.\n')