# -*- coding: utf-8 -*-
"""
Created on Thu Apr 23 10:46:39 2020

@author: abombelli
"""

import numpy as np
import pandas as pd
import os
import openpyxl
import datetime
import random
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from matplotlib.dates import DateFormatter
from matplotlib.lines import Line2D
from collections import Counter

# Path to current folder
cwd = os.getcwd()

# Close all Figures
plt.close('all')

# All Markers (for plotting purposes) 
all_markers = list(Line2D.markers.keys())
# Define a list of colors
def hex2rgb(c):
    "Function to convert color codes from HEX to RGB"
    return tuple(int(c[i:i+2], 16)/256.0 for i in (1, 3 ,5))

colors = ['#CD6839','#4169E1','#2E8B57','#FFA500','#CD5C5C','#4682B4','#228B22','#191970','#1E90FF',
          '#8B8386','#00008B','#6C7B8B','#008B8B','#FFB90F','#CD853F','#CD3333','#CD0000','#6E6E6E',
          '#BF3EFF','#8B5A2B','#CDBA96','#008B45','#2E8B57','#104E8B','#009ACD','#B03060','#2E8B57',
          '#32CD32','#483D8B','#8B4513','#FF0000','#8B7765','#8B8970','#8B8878','#53868B','#CD9B1D']
colors = [hex2rgb(colors[i]) for i in range(len(colors))]

################################################
### Defining important dates related to COVID-19
################################################
dates    = ['31-December-2019','11-January-2020','31-January-2020','11-March-2020','13-March-2020']
date_str = []
for i in range(0,int(len(dates))):
    date_str.append([dates[i],i+1])

date_str = np.array(date_str)    
date_str[:,0].astype(str)
date_str[:,1].astype(int)
df_dates = pd.DataFrame(date_str,columns=['Date','Date_idx'])
df_dates['Date'] =pd.to_datetime(df_dates.Date)
df_dates['Date_idx'] = pd.to_numeric(df_dates['Date_idx'])
df_dates_sorted = df_dates.sort_values(by='Date')
df_dates_sorted = df_dates.reset_index(drop=True)    

# Just for plotting purposes    
offset           = [1,1,1,-6,3]
min_capacity     = 1000
font_number_date = 16


#############
### FedEx ###
#############
df_FedEx_airports = pd.read_excel(os.path.join(cwd,'FedEx','FedEx_Airports.xlsx'),header=None,names=['idx','IATA_code','orig_idx'])

# Loading full freighter data
wb          = openpyxl.load_workbook(os.path.join(cwd,'FedEx','FedEx_aircraft.xlsx'))
sheet       = wb['Sheet1']
eta         = sheet.max_row
freighterCode      = []
freighterMaxTonnes = []

for row in range(2, eta+1):
    thisfreighterCode      = sheet['A' + str(row)].value
    thisfreighterMaxTonnes = sheet['B' + str(row)].value
    freighterCode.append(thisfreighterCode)
    freighterMaxTonnes.append(float(thisfreighterMaxTonnes))

full_list           = os.listdir(os.path.join(cwd,'FedEx','FedEx_Network'))
included_extensions = ['xlsx']

airports_to_consider = ['ANC','CDG','EWR','HKG','KIX','LAX','MEM','MIA']

all_data = {}

for i in range(0,len(airports_to_consider)):
    origin_airport = airports_to_consider[i]
    for j in range(0,len(airports_to_consider)):
        if i != j:
            destination_airport = airports_to_consider[j]
            instances           = [fn for fn in full_list
                               if any(fn.endswith(ext) for ext in included_extensions) 
                               and (origin_airport+'_' in fn and
                                    '_'+destination_airport in fn)]
            #print(instances)
            # If there are instances, store all dates and capacities
            if len(instances)>0:
                period_capacity = []
                for instance in instances:
                    location_ = [pos for pos, char in enumerate(instance) if char == '_']
                    month     = instance[location_[1]+1:location_[2]]
                    day       = instance[location_[2]+1:location_[3]]
                    year     =  instance[location_[3]+1:location_[4]]
                    d = datetime.datetime.strptime(day+' '+month+' '+year, '%d %B %Y').strftime('%d-%B-%Y')
                    wb = openpyxl.load_workbook(os.path.join(cwd,'FedEx','FedEx_Network',instance))
                    sheet       = wb['Sheet_name_1']
                    eta         = sheet.max_row
                    this_capacity= 0
                    
                    all_ac_types = []
                    # Determine most common aircraft type for this instance
                    for row in range(2, eta+1):
                        all_ac_types.append(sheet['C' + str(row)].value)
                    c = Counter(all_ac_types)
                    most_common_ac = c.most_common(1)[0][0]
                    if most_common_ac == 'N/A':
                        avgMaxTonnes = np.mean(freighterMaxTonnes)
                    else:
                        if len(np.where(np.array(freighterCode) == most_common_ac)[0])>0:
                            avgMaxTonnes = freighterMaxTonnes[np.where(np.array(freighterCode) == most_common_ac)[0][0]]
                    
                    
                    
                    for row in range(2, eta+1):
                        thisAircraftType = sheet['C' + str(row)].value
                        
                        if thisAircraftType in freighterCode:
                            idxAircraft          = np.where(np.array(freighterCode) == thisAircraftType)[0][0]
                            this_capacity += freighterMaxTonnes[idxAircraft]
                        else:
                            if thisAircraftType == 'N/A':
                                this_capacity += avgMaxTonnes
                            
                    period_capacity.append([d,this_capacity])
                    
                all_data[origin_airport+'_'+destination_airport] = period_capacity

# ALl O-D pair strings
vocab = list(all_data.keys())

cmap = plt.cm.get_cmap('PuOr')
mean_values       = []
overall_max_value = []
# Compute max. capacity value to be used in the colorbar
for i in range(0,len(all_data.keys())):
    data_this_OD = np.array(all_data[vocab[i]])
    data_this_OD[:,0].astype(str)
    data_this_OD[:,1].astype(float)
    df = pd.DataFrame(data_this_OD,columns=['Date','Capacity'])
    df['Date'] =pd.to_datetime(df.Date)
    df['Capacity'] = pd.to_numeric(df['Capacity'])
    df_sorted = df.sort_values(by='Date')
    df_sorted = df_sorted.reset_index(drop=True)
    mean_values.append(np.mean(df_sorted['Capacity']))
    overall_max_value.append((np.max(df_sorted['Capacity'])))

max_value         = np.max(np.array(mean_values))
overall_max_value = np.max(np.array(overall_max_value))

# Plotting time-series of capacity along major OD airport pair lines.
# Note: the value associated with each observation refers to a 14-day
# period that culminates in the day representative of the observation.
# As example, a data point associated with April 27th, 2020 refers to the
# cumulative Available Freight Tonnes (AFT) for that line in the
# period April 13th-April 27th, 2020
fig, ax = plt.subplots()
for i in range(0,len(all_data.keys())):
    data_this_OD = np.array(all_data[vocab[i]])
    data_this_OD[:,0].astype(str)
    data_this_OD[:,1].astype(float)
    df = pd.DataFrame(data_this_OD,columns=['Date','Capacity'])
    df['Date'] =pd.to_datetime(df.Date)
    df['Capacity'] = pd.to_numeric(df['Capacity'])
    df_sorted = df.sort_values(by='Date')
    df_sorted = df_sorted.reset_index(drop=True)
    

    if np.mean(df_sorted['Capacity'])>=min_capacity:
        plt.plot(df_sorted['Date'],df_sorted['Capacity'].astype(float),
                  marker=all_markers[i],markersize=4,linestyle='-',linewidth=1.5,
                  label=vocab[i],color=cmap(mean_values[i]/max_value))
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%d-%B-%Y'))
plt.gca().xaxis.set_major_locator(mdates.DayLocator(interval=15))
        
for i in range(0,int(len(df_dates_sorted))):
    plt.plot([df_dates_sorted['Date'][i],df_dates_sorted['Date'][i]],
             [0,overall_max_value],linestyle='-',linewidth=2.5,color='r')
    plt.text(df_dates_sorted['Date'][i]+datetime.timedelta(days=offset[i]),overall_max_value,
             df_dates_sorted['Date_idx'][i],fontsize=font_number_date)
    
   
#fig.autofmt_xdate()
ax.grid(True)
ax.tick_params(axis='x', which='major', labelsize=8)
axis_font  = {'fontname':'Arial', 'size':'14'}
ax.set_xlabel('Date',**axis_font)
ax.set_ylabel('AFT [tonnes]',**axis_font)
fig.autofmt_xdate()
ax.legend(loc='upper right', bbox_to_anchor=(1.25, 1.05),
          ncol=1, fancybox=True, shadow=True)
#plt.legend(loc='upper right',prop={'size': 10})
plt.show()
plt.savefig(os.path.join(cwd,'Figures','FedEx_comparison_capacity.png'),dpi=600,bbox_inches='tight', 
              transparent=True,
              pad_inches=0.1)

#%%

###########
### UPS ###
###########
df_UPS_airports = pd.read_excel(os.path.join(cwd,'UPS','UPS_Airports.xlsx'),header=None,names=['idx','IATA_code','orig_idx'])

# Loading full freighter data
wb          = openpyxl.load_workbook(os.path.join(cwd,'UPS','UPS_aircraft.xlsx'))
sheet       = wb['Sheet1']
eta         = sheet.max_row
freighterCode      = []
freighterMaxTonnes = []

for row in range(2, eta+1):
    thisfreighterCode      = sheet['A' + str(row)].value
    thisfreighterMaxTonnes = sheet['B' + str(row)].value
    freighterCode.append(thisfreighterCode)
    freighterMaxTonnes.append(float(thisfreighterMaxTonnes))
    
avgMaxTonnes = 30

full_list           = os.listdir(os.path.join(cwd,'UPS','UPS_Network'))
included_extensions = ['xlsx']

airports_to_consider = ['ANC','CGN','DFW','HKG','HNL','ONT','SDF','SZX']

all_data = {}

for i in range(0,len(airports_to_consider)):
    origin_airport = airports_to_consider[i]
    for j in range(0,len(airports_to_consider)):
        if i != j:
            destination_airport = airports_to_consider[j]
            instances           = [fn for fn in full_list
                                if any(fn.endswith(ext) for ext in included_extensions) 
                                and (origin_airport+'_' in fn and
                                    '_'+destination_airport in fn)]
            #print(instances)
            # If there are instances, store all dates and capacities
            if len(instances)>0:
                period_capacity = []
                for instance in instances:
                    location_ = [pos for pos, char in enumerate(instance) if char == '_']
                    month     = instance[location_[1]+1:location_[2]]
                    day       = instance[location_[2]+1:location_[3]]
                    year     =  instance[location_[3]+1:location_[4]]
                    d = datetime.datetime.strptime(day+' '+month+' '+year, '%d %B %Y').strftime('%d-%B-%Y')
                    wb = openpyxl.load_workbook(os.path.join(cwd,'UPS','UPS_Network',instance))
                    sheet       = wb['Sheet_name_1']
                    eta         = sheet.max_row
                    this_capacity= 0
                    all_ac_types = []
                    # Determine most common aircraft type for this instance
                    for row in range(2, eta+1):
                        all_ac_types.append(sheet['C' + str(row)].value)
                    c = Counter(all_ac_types)
                    most_common_ac = c.most_common(1)[0][0]
                    if most_common_ac == 'N/A':
                        avgMaxTonnes = np.mean(freighterMaxTonnes)
                    else:
                        if len(np.where(np.array(freighterCode) == most_common_ac)[0])>0:
                            avgMaxTonnes = freighterMaxTonnes[np.where(np.array(freighterCode) == most_common_ac)[0][0]]
                    
                    
                    
                    for row in range(2, eta+1):
                        thisAircraftType = sheet['C' + str(row)].value
                        
                        if thisAircraftType in freighterCode:
                            idxAircraft          = np.where(np.array(freighterCode) == thisAircraftType)[0][0]
                            this_capacity += freighterMaxTonnes[idxAircraft]
                        else:
                            if thisAircraftType == 'N/A':
                                this_capacity += avgMaxTonnes
                            
                    period_capacity.append([d,this_capacity])
                    
                all_data[origin_airport+'_'+destination_airport] = period_capacity

# ALl O-D pair strings
vocab = list(all_data.keys())

cmap = plt.cm.get_cmap('PuOr')
mean_values       = []
overall_max_value = []
for i in range(0,len(all_data.keys())):
    data_this_OD = np.array(all_data[vocab[i]])
    data_this_OD[:,0].astype(str)
    data_this_OD[:,1].astype(float)
    df = pd.DataFrame(data_this_OD,columns=['Date','Capacity'])
    df['Date'] =pd.to_datetime(df.Date)
    df['Capacity'] = pd.to_numeric(df['Capacity'])
    df_sorted = df.sort_values(by='Date')
    df_sorted = df_sorted.reset_index(drop=True)
    mean_values.append(np.mean(df_sorted['Capacity']))
    overall_max_value.append((np.max(df_sorted['Capacity'])))

max_value         = np.max(np.array(mean_values))
overall_max_value = np.max(np.array(overall_max_value))

fig, ax = plt.subplots()
for i in range(0,len(all_data.keys())):
    data_this_OD = np.array(all_data[vocab[i]])
    data_this_OD[:,0].astype(str)
    data_this_OD[:,1].astype(float)
    df = pd.DataFrame(data_this_OD,columns=['Date','Capacity'])
    df['Date'] =pd.to_datetime(df.Date)
    df['Capacity'] = pd.to_numeric(df['Capacity'])
    df_sorted = df.sort_values(by='Date')
    df_sorted = df_sorted.reset_index(drop=True)
    
    #dates = [datetime.datetime(2012,1,i+3) for i in range(len(capacity))]
    if np.mean(df_sorted['Capacity'])>=min_capacity:
        plt.plot(df_sorted['Date'],df_sorted['Capacity'].astype(float),
                  marker=all_markers[i],markersize=4,linestyle='-',linewidth=1.5,
                  label=vocab[i],color=cmap(mean_values[i]/max_value))
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%d-%B-%Y'))
plt.gca().xaxis.set_major_locator(mdates.DayLocator(interval=15))
        
for i in range(0,int(len(df_dates_sorted))):
    plt.plot([df_dates_sorted['Date'][i],df_dates_sorted['Date'][i]],
             [0,overall_max_value],linestyle='-',linewidth=2.5,color='r')
    plt.text(df_dates_sorted['Date'][i]+datetime.timedelta(days=offset[i]),overall_max_value,
             df_dates_sorted['Date_idx'][i],fontsize=font_number_date)
    
#fig.autofmt_xdate()
ax.grid(True)
ax.tick_params(axis='x', which='major', labelsize=8)
axis_font  = {'fontname':'Arial', 'size':'14'}
ax.set_xlabel('Date',**axis_font)
ax.set_ylabel('AFT [tonnes]',**axis_font)
fig.autofmt_xdate()
ax.legend(loc='upper right', bbox_to_anchor=(1.25, 1.05),
          ncol=1, fancybox=True, shadow=True)
#plt.legend(loc='upper right',prop={'size': 10})
plt.show()
plt.savefig(os.path.join(cwd,'Figures','UPS_comparison_capacity.png'),dpi=600,bbox_inches='tight', 
              transparent=True,
              pad_inches=0.1)


#%%

###########
### DHL ###
###########
df_DHL_airports = pd.read_excel(os.path.join(cwd,'DHL','DHL_Airports.xlsx'),header=None,names=['idx','IATA_code','orig_idx'])

# Loading full freighter data
wb          = openpyxl.load_workbook(os.path.join(cwd,'DHL','DHL_aircraft.xlsx'))
sheet       = wb['Sheet1']
eta         = sheet.max_row
freighterCode      = []
freighterMaxTonnes = []

for row in range(2, eta+1):
    thisfreighterCode      = sheet['A' + str(row)].value
    thisfreighterMaxTonnes = sheet['B' + str(row)].value
    freighterCode.append(thisfreighterCode)
    freighterMaxTonnes.append(float(thisfreighterMaxTonnes))
    
avgMaxTonnes = 30

full_list           = os.listdir(os.path.join(cwd,'DHL','DHL_Network'))
included_extensions = ['xlsx']

airports_to_consider = ['ANC','CVG','EMA','HKG','ICN','LEJ','MIA','PVG']

all_data = {}

for i in range(0,len(airports_to_consider)):
    origin_airport = airports_to_consider[i]
    for j in range(0,len(airports_to_consider)):
        if i != j:
            destination_airport = airports_to_consider[j]
            instances           = [fn for fn in full_list
                                if any(fn.endswith(ext) for ext in included_extensions) 
                                and (origin_airport+'_' in fn and
                                    '_'+destination_airport in fn)]
            #print(instances)
            # If there are instances, store all dates and capacities
            if len(instances)>0:
                period_capacity = []
                for instance in instances:
                    location_ = [pos for pos, char in enumerate(instance) if char == '_']
                    month     = instance[location_[1]+1:location_[2]]
                    day       = instance[location_[2]+1:location_[3]]
                    year     =  instance[location_[3]+1:location_[4]]
                    d = datetime.datetime.strptime(day+' '+month+' '+year, '%d %B %Y').strftime('%d-%B-%Y')
                    wb = openpyxl.load_workbook(os.path.join(cwd,'DHL','DHL_Network',instance))
                    sheet       = wb['Sheet_name_1']
                    eta         = sheet.max_row
                    this_capacity= 0
                    all_ac_types = []
                    # Determine most common aircraft type for this instance
                    for row in range(2, eta+1):
                        all_ac_types.append(sheet['C' + str(row)].value)
                    c = Counter(all_ac_types)
                    most_common_ac = c.most_common(1)[0][0]
                    if most_common_ac == 'N/A':
                        avgMaxTonnes = np.mean(freighterMaxTonnes)
                    else:
                        if len(np.where(np.array(freighterCode) == most_common_ac)[0])>0:
                            avgMaxTonnes = freighterMaxTonnes[np.where(np.array(freighterCode) == most_common_ac)[0][0]]
                    
                    
                    
                    for row in range(2, eta+1):
                        thisAircraftType = sheet['C' + str(row)].value
                        
                        if thisAircraftType in freighterCode:
                            idxAircraft          = np.where(np.array(freighterCode) == thisAircraftType)[0][0]
                            this_capacity += freighterMaxTonnes[idxAircraft]
                        else:
                            if thisAircraftType == 'N/A':
                                this_capacity += avgMaxTonnes
                            
                    period_capacity.append([d,this_capacity])
                    
                all_data[origin_airport+'_'+destination_airport] = period_capacity

# ALl O-D pair strings
vocab = list(all_data.keys())

cmap = plt.cm.get_cmap('PuOr')
mean_values       = []
overall_max_value = []
for i in range(0,len(all_data.keys())):
    data_this_OD = np.array(all_data[vocab[i]])
    data_this_OD[:,0].astype(str)
    data_this_OD[:,1].astype(float)
    df = pd.DataFrame(data_this_OD,columns=['Date','Capacity'])
    df['Date'] =pd.to_datetime(df.Date)
    df['Capacity'] = pd.to_numeric(df['Capacity'])
    df_sorted = df.sort_values(by='Date')
    df_sorted = df_sorted.reset_index(drop=True)
    mean_values.append(np.mean(df_sorted['Capacity']))
    overall_max_value.append((np.max(df_sorted['Capacity'])))

max_value         = np.max(np.array(mean_values))
overall_max_value = np.max(np.array(overall_max_value))

fig, ax = plt.subplots()
for i in range(0,len(all_data.keys())):
    data_this_OD = np.array(all_data[vocab[i]])
    data_this_OD[:,0].astype(str)
    data_this_OD[:,1].astype(float)
    df = pd.DataFrame(data_this_OD,columns=['Date','Capacity'])
    df['Date'] =pd.to_datetime(df.Date)
    df['Capacity'] = pd.to_numeric(df['Capacity'])
    df_sorted = df.sort_values(by='Date')
    df_sorted = df_sorted.reset_index(drop=True)
    
    #dates = [datetime.datetime(2012,1,i+3) for i in range(len(capacity))]
    if np.mean(df_sorted['Capacity'])>=min_capacity:
        plt.plot(df_sorted['Date'],df_sorted['Capacity'].astype(float),
                  marker=all_markers[i],markersize=4,linestyle='-',linewidth=1.5,
                  label=vocab[i],color=cmap(mean_values[i]/max_value))
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%d-%B-%Y'))
plt.gca().xaxis.set_major_locator(mdates.DayLocator(interval=15))
        
for i in range(0,int(len(df_dates_sorted))):
    plt.plot([df_dates_sorted['Date'][i],df_dates_sorted['Date'][i]],
             [0,overall_max_value],linestyle='-',linewidth=2.5,color='r')
    plt.text(df_dates_sorted['Date'][i]+datetime.timedelta(days=offset[i]),overall_max_value,
             df_dates_sorted['Date_idx'][i],fontsize=font_number_date)
    
#fig.autofmt_xdate()
ax.grid(True)
ax.tick_params(axis='x', which='major', labelsize=8)
axis_font  = {'fontname':'Arial', 'size':'14'}
ax.set_xlabel('Date',**axis_font)
ax.set_ylabel('AFT [tonnes]',**axis_font)
fig.autofmt_xdate()
ax.legend(loc='upper right', bbox_to_anchor=(1.5, 1.05),
          ncol=2, fancybox=True, shadow=True)
#plt.legend(loc='upper right',prop={'size': 10})
plt.show()
plt.savefig(os.path.join(cwd,'Figures','DHL_comparison_capacity.png'),dpi=600,bbox_inches='tight', 
              transparent=True,
              pad_inches=0.1)

#%%

##############
### Cathay ###
##############
df_Cathay_airports = pd.read_excel(os.path.join(cwd,'Cathay','Cathay_Airports.xlsx'),header=None,names=['idx','IATA_code','orig_idx'])

# Loading full freighter data
wb          = openpyxl.load_workbook(os.path.join(cwd,'Cathay','Cathay_aircraft.xlsx'))
sheet       = wb['Sheet1']
eta         = sheet.max_row
freighterCode      = []
freighterMaxTonnes = []

for row in range(2, eta+1):
    thisfreighterCode      = sheet['A' + str(row)].value
    thisfreighterMaxTonnes = sheet['B' + str(row)].value
    freighterCode.append(thisfreighterCode)
    freighterMaxTonnes.append(float(thisfreighterMaxTonnes))
    
avgMaxTonnes = 30

full_list           = os.listdir(os.path.join(cwd,'Cathay','Cathay_Network'))
included_extensions = ['xlsx']

airports_to_consider = ['ANC','BKK','HKG','MNL','NRT','PVG','SIN','TPE']

all_data = {}

for i in range(0,len(airports_to_consider)):
    origin_airport = airports_to_consider[i]
    for j in range(0,len(airports_to_consider)):
        if i != j:
            destination_airport = airports_to_consider[j]
            instances           = [fn for fn in full_list
                                if any(fn.endswith(ext) for ext in included_extensions) 
                                and (origin_airport+'_' in fn and
                                    '_'+destination_airport in fn)]
            #print(instances)
            # If there are instances, store all dates and capacities
            if len(instances)>0:
                period_capacity = []
                for instance in instances:
                    location_ = [pos for pos, char in enumerate(instance) if char == '_']
                    month     = instance[location_[1]+1:location_[2]]
                    day       = instance[location_[2]+1:location_[3]]
                    year     =  instance[location_[3]+1:location_[4]]
                    d = datetime.datetime.strptime(day+' '+month+' '+year, '%d %B %Y').strftime('%d-%B-%Y')
                    wb = openpyxl.load_workbook(os.path.join(cwd,'Cathay','Cathay_Network',instance))
                    sheet       = wb['Sheet_name_1']
                    eta         = sheet.max_row
                    this_capacity= 0
                    all_ac_types = []
                    # Determine most common aircraft type for this instance
                    for row in range(2, eta+1):
                        all_ac_types.append(sheet['C' + str(row)].value)
                    c = Counter(all_ac_types)
                    most_common_ac = c.most_common(1)[0][0]
                    if most_common_ac == 'N/A':
                        avgMaxTonnes = np.mean(freighterMaxTonnes)
                    else:
                        if len(np.where(np.array(freighterCode) == most_common_ac)[0])>0:
                            avgMaxTonnes = freighterMaxTonnes[np.where(np.array(freighterCode) == most_common_ac)[0][0]]
                    
                    
                    
                    for row in range(2, eta+1):
                        thisAircraftType = sheet['C' + str(row)].value
                        
                        if thisAircraftType in freighterCode:
                            idxAircraft          = np.where(np.array(freighterCode) == thisAircraftType)[0][0]
                            this_capacity += freighterMaxTonnes[idxAircraft]
                        else:
                            if thisAircraftType == 'N/A':
                                this_capacity += avgMaxTonnes
                            
                    period_capacity.append([d,this_capacity])
                    
                all_data[origin_airport+'_'+destination_airport] = period_capacity

# ALl O-D pair strings
vocab = list(all_data.keys())

cmap = plt.cm.get_cmap('PuOr')
mean_values       = []
overall_max_value = []
for i in range(0,len(all_data.keys())):
    data_this_OD = np.array(all_data[vocab[i]])
    data_this_OD[:,0].astype(str)
    data_this_OD[:,1].astype(float)
    df = pd.DataFrame(data_this_OD,columns=['Date','Capacity'])
    df['Date'] =pd.to_datetime(df.Date)
    df['Capacity'] = pd.to_numeric(df['Capacity'])
    df_sorted = df.sort_values(by='Date')
    df_sorted = df_sorted.reset_index(drop=True)
    mean_values.append(np.mean(df_sorted['Capacity']))
    overall_max_value.append((np.max(df_sorted['Capacity'])))

max_value         = np.max(np.array(mean_values))
overall_max_value = np.max(np.array(overall_max_value))

fig, ax = plt.subplots()
for i in range(0,len(all_data.keys())):
    data_this_OD = np.array(all_data[vocab[i]])
    data_this_OD[:,0].astype(str)
    data_this_OD[:,1].astype(float)
    df = pd.DataFrame(data_this_OD,columns=['Date','Capacity'])
    df['Date'] =pd.to_datetime(df.Date)
    df['Capacity'] = pd.to_numeric(df['Capacity'])
    df_sorted = df.sort_values(by='Date')
    df_sorted = df_sorted.reset_index(drop=True)
    
    #dates = [datetime.datetime(2012,1,i+3) for i in range(len(capacity))]
    if np.mean(df_sorted['Capacity'])>=min_capacity:
        plt.plot(df_sorted['Date'],df_sorted['Capacity'].astype(float),
                  marker=all_markers[i],markersize=4,linestyle='-',linewidth=1.5,
                  label=vocab[i],color=cmap(mean_values[i]/max_value))
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%d-%B-%Y'))
plt.gca().xaxis.set_major_locator(mdates.DayLocator(interval=15))
        
for i in range(0,int(len(df_dates_sorted))):
    plt.plot([df_dates_sorted['Date'][i],df_dates_sorted['Date'][i]],
             [0,overall_max_value],linestyle='-',linewidth=2.5,color='r')
    plt.text(df_dates_sorted['Date'][i]+datetime.timedelta(days=offset[i]),overall_max_value,
             df_dates_sorted['Date_idx'][i],fontsize=font_number_date)
    
#fig.autofmt_xdate()
ax.grid(True)
ax.tick_params(axis='x', which='major', labelsize=8)
axis_font  = {'fontname':'Arial', 'size':'14'}
ax.set_xlabel('Date',**axis_font)
ax.set_ylabel('AFT [tonnes]',**axis_font)
fig.autofmt_xdate()
ax.legend(loc='upper right', bbox_to_anchor=(1.25, 1.05),
          ncol=1, fancybox=True, shadow=True)
#plt.legend(loc='upper right',prop={'size': 10})
plt.show()
plt.savefig(os.path.join(cwd,'Figures','Cathay_comparison_capacity.png'),dpi=600,bbox_inches='tight', 
              transparent=True,
              pad_inches=0.1)

#%%

################
### Cargolux ###
################
df_Cargolux_airports = pd.read_excel(os.path.join(cwd,'Cargolux','Cargolux_Airports.xlsx'),header=None,names=['idx','IATA_code','orig_idx'])

# Loading full freighter data
wb          = openpyxl.load_workbook(os.path.join(cwd,'Cargolux','Cargolux_aircraft.xlsx'))
sheet       = wb['Sheet1']
eta         = sheet.max_row
freighterCode      = []
freighterMaxTonnes = []

for row in range(2, eta+1):
    thisfreighterCode      = sheet['A' + str(row)].value
    thisfreighterMaxTonnes = sheet['B' + str(row)].value
    freighterCode.append(thisfreighterCode)
    freighterMaxTonnes.append(float(thisfreighterMaxTonnes))
    
avgMaxTonnes = 30

full_list           = os.listdir(os.path.join(cwd,'Cargolux','Cargolux_Network'))
included_extensions = ['xlsx']

airports_to_consider = ['AMS','ANC','HKG','JFK','LUX','MXP','ORD','OVB']

all_data = {}

for i in range(0,len(airports_to_consider)):
    origin_airport = airports_to_consider[i]
    for j in range(0,len(airports_to_consider)):
        if i != j:
            destination_airport = airports_to_consider[j]
            instances           = [fn for fn in full_list
                                if any(fn.endswith(ext) for ext in included_extensions) 
                                and (origin_airport+'_' in fn and
                                    '_'+destination_airport in fn)]
            #print(instances)
            # If there are instances, store all dates and capacities
            if len(instances)>0:
                period_capacity = []
                for instance in instances:
                    location_ = [pos for pos, char in enumerate(instance) if char == '_']
                    month     = instance[location_[1]+1:location_[2]]
                    day       = instance[location_[2]+1:location_[3]]
                    year     =  instance[location_[3]+1:location_[4]]
                    d = datetime.datetime.strptime(day+' '+month+' '+year, '%d %B %Y').strftime('%d-%B-%Y')
                    wb = openpyxl.load_workbook(os.path.join(cwd,'Cargolux','Cargolux_Network',instance))
                    sheet       = wb['Sheet_name_1']
                    eta         = sheet.max_row
                    this_capacity= 0
                    all_ac_types = []
                    # Determine most common aircraft type for this instance
                    for row in range(2, eta+1):
                        all_ac_types.append(sheet['C' + str(row)].value)
                    c = Counter(all_ac_types)
                    most_common_ac = c.most_common(1)[0][0]
                    if most_common_ac == 'N/A':
                        avgMaxTonnes = np.mean(freighterMaxTonnes)
                    else:
                        if len(np.where(np.array(freighterCode) == most_common_ac)[0])>0:
                            avgMaxTonnes = freighterMaxTonnes[np.where(np.array(freighterCode) == most_common_ac)[0][0]]
                    
                    
                    
                    for row in range(2, eta+1):
                        thisAircraftType = sheet['C' + str(row)].value
                        
                        if thisAircraftType in freighterCode:
                            idxAircraft          = np.where(np.array(freighterCode) == thisAircraftType)[0][0]
                            this_capacity += freighterMaxTonnes[idxAircraft]
                        else:
                            if thisAircraftType == 'N/A':
                                this_capacity += avgMaxTonnes
                            
                    period_capacity.append([d,this_capacity])
                    
                all_data[origin_airport+'_'+destination_airport] = period_capacity

# ALl O-D pair strings
vocab = list(all_data.keys())

cmap = plt.cm.get_cmap('PuOr')
mean_values       = []
overall_max_value = []
for i in range(0,len(all_data.keys())):
    data_this_OD = np.array(all_data[vocab[i]])
    data_this_OD[:,0].astype(str)
    data_this_OD[:,1].astype(float)
    df = pd.DataFrame(data_this_OD,columns=['Date','Capacity'])
    df['Date'] =pd.to_datetime(df.Date)
    df['Capacity'] = pd.to_numeric(df['Capacity'])
    df_sorted = df.sort_values(by='Date')
    df_sorted = df_sorted.reset_index(drop=True)
    mean_values.append(np.mean(df_sorted['Capacity']))
    overall_max_value.append((np.max(df_sorted['Capacity'])))

max_value         = np.max(np.array(mean_values))
overall_max_value = np.max(np.array(overall_max_value))

fig, ax = plt.subplots()
for i in range(0,len(all_data.keys())):
    data_this_OD = np.array(all_data[vocab[i]])
    data_this_OD[:,0].astype(str)
    data_this_OD[:,1].astype(float)
    df = pd.DataFrame(data_this_OD,columns=['Date','Capacity'])
    df['Date'] =pd.to_datetime(df.Date)
    df['Capacity'] = pd.to_numeric(df['Capacity'])
    df_sorted = df.sort_values(by='Date')
    df_sorted = df_sorted.reset_index(drop=True)
    
    #dates = [datetime.datetime(2012,1,i+3) for i in range(len(capacity))]
    if np.mean(df_sorted['Capacity'])>=min_capacity:
        plt.plot(df_sorted['Date'],df_sorted['Capacity'].astype(float),
                  marker=all_markers[i],markersize=4,linestyle='-',linewidth=1.5,
                  label=vocab[i],color=cmap(mean_values[i]/max_value))
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%d-%B-%Y'))
plt.gca().xaxis.set_major_locator(mdates.DayLocator(interval=15))
        
for i in range(0,int(len(df_dates_sorted))):
    plt.plot([df_dates_sorted['Date'][i],df_dates_sorted['Date'][i]],
             [0,overall_max_value],linestyle='-',linewidth=2.5,color='r')
    plt.text(df_dates_sorted['Date'][i]+datetime.timedelta(days=offset[i]),overall_max_value,
             df_dates_sorted['Date_idx'][i],fontsize=font_number_date)
    
#fig.autofmt_xdate()
ax.grid(True)
ax.tick_params(axis='x', which='major', labelsize=8)
axis_font  = {'fontname':'Arial', 'size':'14'}
ax.set_xlabel('Date',**axis_font)
ax.set_ylabel('AFT [tonnes]',**axis_font)
fig.autofmt_xdate()
ax.legend(loc='upper right', bbox_to_anchor=(1.25, 1.05),
          ncol=1, fancybox=True, shadow=True)
#plt.legend(loc='upper right',prop={'size': 10})
plt.show()
plt.savefig(os.path.join(cwd,'Figures','Cargolux_comparison_capacity.png'),dpi=600,bbox_inches='tight', 
              transparent=True,
              pad_inches=0.1)
                    
                
            