
import gym
from gym import spaces
import numpy as np
import stable_baselines
from stable_baselines import PPO2
from stable_baselines.common.vec_env import DummyVecEnv
import math
import matplotlib.pyplot as plt
import random

# Simulation parameters which will be used to build the classes
T = 50.1            # simulation time [s]
dt = 0.1          # simulation time step [s]
n = int(T / dt)        # number of simulation steps
tauA = 0.2     # actuator lag, representing driveline dyanmics [s]

# class of the leading vehicle
# Position, speed, and acceleration profiles are recorded in the vehicle object it self.
class leader():
    def __init__(self, speed0, pos0):
        self.speed = [0] * n
        self.pos = [0] * n
        self.a = [0] * n   
        self.speed[0] = speed0
        self.pos[0] = pos0
    def getSpeed(self,k):
        return self.speed[k]
    def getPos(self,k):
        return self.pos[k]
    def getA(self,k):
        return self.a[k]
    def updatePos(self,k):
        self.pos[k] = self.pos[k-1] + self.speed[k-1] * dt + 0.5 * self.a[k-1] * math.pow(dt,2)
    def updateSpeed(self,k):
        self.speed[k] = self.speed[k-1] + self.a[k-1] * dt
    def setA(self,u,k):
        self.a[k] = u

# class of autonomous vehicles
class av():
    def __init__(self, lead1, speed0, pos0):
        self.speed = [0] * n
        self.pos = [0] * n
        self.a = [0] * n   
        self.speed[0] = speed0
        self.pos[0] = pos0
        self.predecessor1 = lead1
    def getSpeed(self,k):
        return self.speed[k]
    def getPos(self,k):
        return self.pos[k]
    def getA(self,k):
        return self.a[k]
    def updatePos(self,k):
        self.pos[k] = self.pos[k-1] + self.speed[k-1] * dt + 0.5 * self.a[k-1] * math.pow(dt,2)
    def updateSpeed(self,k):
        self.speed[k] = self.speed[k-1] + self.a[k-1] * dt
    def updateA(self,u,k):
        self.a[k] = self.a[k-1] + (u - self.a[k-1]) * dt / tauA

# class of the customized simulation environment that follows gym interface
class one_ACC_KF(gym.Env):
    
    metadata = {'render.modes': ['human']}
    
    def __init__(self):
        super(one_ACC_KF, self).__init__()
        # Define the action space
        self.u_max = 3    # maximum acceleration [m/s^2]
        self.u_min = -6     # minimum acceleration [m/s^2]
        self.action_space = spaces.Box(low=self.u_min, high=self.u_max, shape=(1,), dtype='float32')
        
        # Define the observation space: [distance gap, ego-vehicle speed, relative speed, jerk]
        vlow = np.array([0,0,-40,0],dtype = 'f')
        vhigh = np.array([500,40,40,10],dtype = 'f')
        self.observation_space = spaces.Box(low=vlow, high=vhigh, shape=(4,))
        
        self.done = False
        
        # Define simulation parameters
        self.T = 50.1            # simulation time [s]
        self.dt = 0.1          # simulation time step [s]
        self.n = int(self.T / self.dt)        # number of simulation steps
        # Define system parameters
        self.min_gap = 2            # minimum distance gap at standstill [m]
        self.vlength = 4        # vehicle length [m]
        self.td = 1          # desired time gap [s]
        self.tauA = 0.2     # actuator lag, representing driveline dyanmics [s]
        self.tauS = 0.2     # delay in sensor [s]
    
    # Reset all the required variables and initialize every setting to start optimization again
    def reset_test_multi(self):
        self.done = False
        self.current_step = 0
        
        # Initialize the leader
        self.n_l = 1    # Number of the leaders
        self.l = [None] * self.n_l
        v0_leader = 33  # initial speed of the leader
        pos0_leader = 800  # initial position of the leader
        self.l[0] = leader(v0_leader,pos0_leader)
        
        # Intialize the follower
        self.n_f = 19      # number of followers
        v0_follower = 33   # initial speed of the follower
        gap0 = 1  # initial time gap
        s0 = v0_follower * gap0 + self.min_gap
       
        # Reset every variable and state (s,vf,rv,j) for every follower 
        self.f = [None] * self.n_f
        self.u = [0] * self.n_f
        self.f[0] = av(self.l[0],v0_follower,self.l[0].getPos(0)-s0-self.vlength)
        s = [v0_follower * gap0] * self.n_f
        jerk = [0] * self.n_f
        vf = [v0_follower] * self.n_f
        rv = [v0_leader-v0_follower] * self.n_f
        self.observedState = np.empty((0, 4), float)
        
        for v in range(1,self.n_f):
            self.f[v] = av(self.f[v-1],v0_follower,self.f[v-1].getPos(0)-s0-self.vlength)            
        
        # Initiailize the state for each follower
        for v in range(self.n_f):
            self.observedState = np.append(self.observedState, np.array([[s[v],vf[v],rv[v],jerk[v]]]), axis=0)
        
        # Parameters in the filter
        self.F = np.array([[1,self.dt,0.5*self.dt*self.dt],[0,1,self.dt],[0,0,1]])  # state transition matrix
        self.H = np.array([[1,0,0],[0,1,0]])    # observation matrix
        self.Q = []     # process noise covariance matrix
        self.R = []     # measurement noise covariance matrix
        var_a = 0.25    # acceleration noise
        for i in range(n):
            self.Q.append(np.array([[math.pow(self.dt,4)/4,math.pow(self.dt,3)/2,math.pow(self.dt,2)/2],[math.pow(self.dt,3)/2,math.pow(self.dt,2),self.dt],[math.pow(self.dt,2)/2,self.dt,1]]).dot(np.array([[var_a,0,0],[0,var_a,0],[0,0,var_a]])))
            self.R.append(np.array([[0.04,0],[0,0.04]]))
        self.P = []     # error covariance matrix
        self.P_h = []   # predicted error covariance matrix
        self.Z_h = []   # predicted mean of the state
        self.Z = []     # estimated state
        self.y_h = []   # predicted output variables
        self.y = []     # measurements
        self.G = []     # Kalman gain
        for v in range(self.n_f):
            self.P.append([])
            self.P_h.append([])
            self.Z_h.append([])
            self.Z.append([])
            self.y_h.append([])
            self.y.append([])
            self.G.append([])
        for v in range(self.n_f):
            for i in range(n):
                self.P[v].append(np.array([[0.0001,0,0],[0,0.0001,0],[0,0,0.0001]]))
                self.P_h[v].append(np.array([[None,None,None],[None,None,None],[None,None,None]]))
                self.Z_h[v].append(np.array([[None],[None],[None]]))
                self.Z[v].append(np.array([[None],[None],[None]]))
                self.y_h[v].append(np.array([[None],[None]]))
                self.y[v].append(np.array([[None],[None]]))
                self.G[v].append(np.array([[None,None],[None,None],[None,None]]))
        for v in range(self.n_f):
            self.Z[v][int(self.tauS/self.dt)] = np.array([[pos0_leader-v*(v0_leader*self.td+self.vlength+self.min_gap)],[v0_leader],[0]])
        
        return(self.observedState)
    
    # Kalman filtering process
    def KF(self,v_num,distance_m,speed_m):
        #process / prediction of mean and variance
        self.Z_h[v_num][self.current_step+1] = self.F.dot(self.Z[v_num][self.current_step])
        self.P_h[v_num][self.current_step+1] = self.F.dot(self.P[v_num][self.current_step]).dot(self.F.transpose()) + self.Q[self.current_step]
        
        #predict output
        self.y_h[v_num][self.current_step+1] = self.H.dot(self.Z_h[v_num][self.current_step+1])
        
        #compute kalman gain
        self.G[v_num][self.current_step+1] = self.P_h[v_num][self.current_step+1].dot(self.H.transpose()).dot(np.linalg.inv(self.H.dot(self.P_h[v_num][self.current_step+1]).dot(self.H.transpose()) + self.R[self.current_step]))
        
        #collect measurements
        self.y[v_num][self.current_step+1] = np.array([[distance_m],[speed_m]])
        
        #update mean and covariance
        self.Z[v_num][self.current_step+1] = self.Z_h[v_num][self.current_step+1] + self.G[v_num][self.current_step+1].dot(self.y[v_num][self.current_step+1]-self.y_h[v_num][self.current_step+1])
        self.P[v_num][self.current_step+1] = (np.identity(3)-self.G[v_num][self.current_step+1].dot(self.H)).dot(self.P_h[v_num][self.current_step+1])
        #print("z_k+1 ",self.Z[v_num][self.current_step+1][0][0])
        return self.Z[v_num][self.current_step+1][0][0], self.Z[v_num][self.current_step+1][1][0]
    
    # run one time step and make a decision
    def step_test_multi(self, action):
        
        self.done = False
        
        # Receive the actions given by the agents
        # Assert that they are valid actions 
        for a in action:
            assert self.action_space.contains(a), "Invalid Action"
        
        # Calculate the num of time steps of sensor delay
        sensor_delay = int(self.tauS / self.dt)
        
        # Update leader acceleration
        if self.current_step > 30 and self.current_step <= 70:
            self.l[0].setA(-3,self.current_step)
        elif self.current_step > 120 and self.current_step <= 200:
            self.l[0].setA(1.5,self.current_step)
        else:
            self.l[0].setA(0,self.current_step)
        
        # Apply leader behavior
        self.l[0].updateSpeed(self.current_step+1)
        self.l[0].updatePos(self.current_step+1)
        
        # Initialize the variables which will be used
        self.u = [0] * self.n_f
        reward = [0] * self.n_f
        g_previous = [0] * self.n_f
        g_now = [0] * self.n_f
        d_previous = [0] * self.n_f
        d_now = [0] * self.n_f
        lspeed_now = [0] * self.n_f
        
        # Apply followers actions
        # Read the actions
        for v in range(self.n_f):
            if self.current_step < sensor_delay:
                self.u[v] = 0
            else:
                self.u[v] = action[v][0]
            
            # Update the accelerations, speeds, positions, and states
            self.f[v].updateA(self.u[v],self.current_step) 
            
            self.f[v].updatePos(self.current_step+1) 
            self.f[v].updateSpeed(self.current_step+1)
                       
            std_level_d = 0.2   # std. of the random error for distance gap
            std_level_s = 0.2   # std. of the random error for speed
            d_error = np.random.normal(0, std_level_d, None)
            s_error = np.random.normal(0, std_level_s, None)
            
            d_previous[v] = (self.f[v].predecessor1.getPos(self.current_step-sensor_delay) - self.f[v].getPos(self.current_step-sensor_delay) - self.vlength - self.min_gap)
            d_now[v] = (self.f[v].predecessor1.getPos(self.current_step+1-sensor_delay) - self.f[v].getPos(self.current_step+1-sensor_delay) - self.vlength - self.min_gap)
            lspeed_now[v] = self.f[v].predecessor1.getSpeed(self.current_step+1-sensor_delay)
            
            if self.current_step < sensor_delay:
                self.observedState[v] = self.observedState[v]
            else:
                # Call the defined KF function to process the distance gap and speed measurements
                dist_filter, s_filter = self.KF(v,self.f[v].predecessor1.getPos(self.current_step+1-sensor_delay)+d_error,lspeed_now[v]+s_error)
                self.observedState[v] = np.array([dist_filter - self.f[v].getPos(self.current_step+1-sensor_delay) - self.vlength - self.min_gap,
                                        self.f[v].getSpeed(self.current_step+1),
                                        s_filter - self.f[v].getSpeed(self.current_step+1-sensor_delay),
                                        self.f[v].getA(self.current_step) - self.f[v].getA(self.current_step - 1)])
            
            # Calculate rewards (not important for simulation testing)
            g_previous[v] = d_previous[v] / self.f[v].getSpeed(self.current_step)
            g_now[v] = d_now[v] / self.f[v].getSpeed(self.current_step+1)
            
            max_es = self.td * 0.5
            max_jerk = (self.u_max - self.u_min) / 3
            w1 = 0.9
            w2 = 0.1
            reward[v] = w1 * (-1) * abs(g_now[v] - self.td) / max_es + w2 * (-1) * abs(self.observedState[v][3]) / max_jerk
            
            # wrong behavior, negative reward
            if abs(g_previous[v]-self.td) < abs(g_now[v]-self.td):
                reward[v] = reward[v] + (abs(g_previous[v]-self.td) - abs(g_now[v]-self.td)) / max_es
        
        self.current_step += 1
        
        if self.current_step == 500:
            self.done = True
        
        return(self.observedState, reward, self.done, {})
    
    # Function to plot the simulation result of an episode
    def render(self, mode='human', close=False):
        render_num = 5  # Number of followers to show in the plot
        time = [(i*dt) for i in range(self.n)]
        for i in range (self.n):
            time[i] = round(time[i],1)        
        # plot results (acceleration, speed, time gap profiles)
        fig, axs = plt.subplots(3,figsize=(10,6),sharex=True)
        linecolor = ['tab:blue','tab:orange','tab:green','tab:red','tab:purple','tab:brown','tab:pink']
        for j in range(self.n_l):
            axs[0].plot(time,[self.l[j].getA(i) for i in range(self.n)],'--',color=linecolor[j])
            axs[1].plot(time,[self.l[j].getSpeed(i) for i in range(self.n)],'--',color=linecolor[j])
        for j in range(render_num):
            axs[0].plot(time,[self.f[j].getA(i) for i in range(self.n)],color=linecolor[j+1])
            axs[1].plot(time,[self.f[j].getSpeed(i) for i in range(self.n)],color=linecolor[j+1])
            axs[2].plot(time,[(self.f[j].predecessor1.getPos(i) - self.f[j].getPos(i) - self.vlength - self.min_gap) / self.f[j].getSpeed(i) for i in range(self.n)],color=linecolor[j+1])
        axs[0].set(xlabel='time (s)',ylabel='acceleration (m/s\u00b2)')
        axs[0].set_ylim([-6, 3])
        axs[1].set(xlabel='time (s)',ylabel='velocity (m/s)')
        axs[1].set_ylim([15, 40])
        axs[2].set(xlabel='time (s)',ylabel='gap 1 (s)')
        axs[2].set_ylim([0.85, 1.15])
        axs[0].legend(['leader1','follower1','follower2','follower3','follower4','follower5','follower6'],loc='lower right',fontsize='medium', ncol=2)
        
        fig.tight_layout()
        
        plt.show()

# Create and load the model
def make_env():
    def _init():
        env = one_ACC_KF()
        return env
    return _init

env = DummyVecEnv([make_env()])

model = PPO2.load("best_model_ACC1", env=env)

# Test trained agent
obs = env.env_method('reset_test_multi')
action = [None] * env.get_attr('n_f')[0]
state = [None] * env.get_attr('n_f')[0]

for i in range(500):
    for j in range(env.get_attr('n_f')[0]):
        action[j], state[j] = model.predict([obs[0][j]], deterministic=True)
    obs, rewards, done, info = env.env_method('step_test_multi',[action[j][0] for j in range(env.get_attr('n_f')[0])])[0]
    obs = [obs]
    rewards = [rewards]
    done = [done]
    info = [info]

env.render()

