import gym
import numpy as np

import time

import myenv
from controllers.mpc.free_push_mpc_controller_casadi import Free_Push_mpc_controller
from general_problem_setup import Problem, Index


if __name__ == "__main__":
    env = gym.make('cylinder_robot_pushing_recBox-v0')
    env.init_path_candidates_both(1)

    pr = Problem()
    index = Index()
    obj_pos_goal = pr.obj_pos_goal
    obj_theta_goal = pr.obj_theta_goal

    # Initializing robot mpc casadi
    mpc_controller = Free_Push_mpc_controller()
    recompile = True
    solver_build_name = 'free_push_mpc_controller_casadi'    
    mpc_controller.generate_solver(recompile, solver_build_name)

    for i_episode in range(20):
        system_state_current = env.reset()   # state = np.concatenate([rob_state, obj_state], 0)

        for t in range(10000):
            env.render()
            print("observation: ", system_state_current)
            # action = env.action_space.sample()
            # compute control action 
            # Run mpc
            mpc_controller.set_system_state_current(system_state_current)
            mpc_controller.set_obj_pos_theta_goal(obj_pos_goal, obj_theta_goal)
            mpc_controller.run_mpc()
            env.show_paths_candidates_both(mpc_controller.system_state_plan)

            # retrieve info
            robot_control_current = mpc_controller.robot_control_current
            robot_acc_omega_acc_current = mpc_controller.robot_acc_omega_acc_current

            # action = np.array([0., 0.])  # action is the acceleration, even equals to zero. It will still move with the initial velocity
            print("action: ", robot_acc_omega_acc_current)
            system_state_current, reward, done, info = env.step(robot_acc_omega_acc_current)
            # print("mpc_solve_time: ", mpc_controller.mpc_solve_time)

            time. sleep(0.1)
            if done:
                print("Episode finished after {} timesteps".format(t+1))
                break
    env.close()