# -*- coding: utf-8 -*-
"""
Created on Sat Mar  7 15:07:25 2020

@author: Stefano Giani

File containing part of the code in mainscript.py to generate the data for the training from the raw data. 

"""
from obspy import read
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
import pickle

# In[1]


events = []
for i in range(1,90):
    events.append(read("../Data/seeddata32/TEST"+str(i)+".mseed"))


eventslist = [ 0, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 17, 18, 
20, 23, 26, 30, 32, 35, 36, 37, 39, 41, 43, 46, 50, 51, 
52, 54,  57, 58, 59, 63, 65, 66, 69 , 81, 87]


# In[2]
#Selecting events as in Section labelled 'Data Collection' in research paper
selectedevents = [events[i] for i in eventslist]


# In[3]
#Extract each channel in selected events
eventdata = []
for event in selectedevents:
    stra = event[0].data
    strb = event[1].data
    strc = event[2].data
    eventdata.append([stra, strb, strc])

# In[4]
import numpy as np
#Concatenate channels for each event
eventdataconcat = []
for event in eventdata:
    sta = np.expand_dims(event[0], axis=-1)
    stb = np.expand_dims(event[1], axis=-1)
    stc = np.expand_dims(event[2], axis=-1)
    eventdataconcat.append(np.concatenate((sta,stb, stc),axis=1))



# In[5]
    
window_length=16384
#Select the final 40000 time steps from each event (all 3 channels) and label as 'precursors' 
#Select the first 40000 time steps from each event (all 3 channels) and label as 'noise' 
def create_precursors(X1, last = 40000):
    precurstr = X1[-last:]
        
    return(precurstr)
    
def create_noise(X1, first = 0, second = 40000):
    precurstr = X1[first:second]
       
    return(precurstr)

precursors = []
noise = []
for event in eventdataconcat:
    precursors.append(create_precursors(event,40000))
    
    
for event in eventdataconcat:
    noise.append(create_noise(event, 0, 40000))

# In[6]
#Make windows in the selected 'noise' and 'precursor' data with an overlap of 650 timesteps
def make_windows(X1, sample_stride = 650):
    X2 = []
    for i in range(len(X1)-window_length):        
        if i % sample_stride == 0:
               X2.append(X1[i:i+window_length])
           
    return(X2)
   
precursor_windows = []
for event in precursors:
    precursor_windows.append(make_windows(event, 650))
    
noise_windows = []
for event in noise:
    noise_windows.append(make_windows(event, 650))
# In[7]
#Normalisation
def normalise(X1):
    #X1 = np.reshape(X1, (window_length,3))
    X2 = []
    for data in X1:
        values = np.zeros((len(data),3))
        #for i in range(3):
        values = data - np.mean(data)
        values = values / np.linalg.norm(values)
        X2.append(values)
    return X2


normpre = []
for event in precursor_windows:
    normpre.append(normalise(event))
    
   
normnoise = []
for event in noise_windows:
    normnoise.append(normalise(event))


# In[9]
#Select events for train and test datasets. These events were randomly selected.

trainlist = [7,8, 3, 4, 5, 6, 9, 10, 11, 16, 17, 20, 21, 22, 24, 25, 27, 28,  
             31,  33, 34, 35, 37, 38]
testlist = [12,  13, 14, 15, 29, 30, 39]    

# In[10]
#Save the data

pickle.dump( normpre, open( "normpre.p", "wb" ) )
pickle.dump( normnoise, open( "normnoise.p", "wb" ) )
pickle.dump( trainlist, open( "trainlist.p", "wb" ) )
pickle.dump( testlist, open( "testlist.p", "wb" ) )
