import os
from .base import get_run_ids_by_exp_name

import fire
import yaml
import sys

class Experimenter:

    def test_policies(self, 
                      env_name, 
                      exp_name,
                      model_steps,
                      num_episodes,
                      host="local", 
                      mode="test_first", 
                      update_config=False,
                      enable_log=False,
                      hours=1,
                      minutes=0,
                      cpus=2,
                      mem=1024,
                      gpus=0,
                      start_seed=0):
        
        test_name = "test_policies"
        config_folder_path = f"policy_evaluation/configs/{test_name}/"
        os.makedirs(config_folder_path, exist_ok=True)

        run_ids = get_run_ids_by_exp_name(exp_name)
        
        commands = []
        for run_i, run_id in enumerate(run_ids):
            for model_step in model_steps:
                config = {}
                config["env_name"] = env_name
                config["run_id"] = run_id
                config["model_steps"] = [model_step]
                config["num_episodes"] = num_episodes
                config["policies"] = {}
                config["seed"] = run_i+start_seed
                config["policies"]["MuZeroRaw"] = {
                    "use_MCTS": False,
                    "deterministic": False
                }
                config["policies"]["MuZeroRawGreedy"] = {
                    "use_MCTS": False,
                    "deterministic": True
                }
                config["policies"]["MuZeroMCTS_with_dirichlet_noise"] = {
                    "use_MCTS": True,
                    "with_exploration_noise": True,
                    "deterministic": False
                }
                config["policies"]["MuZeroMCTSGreedy_with_dirichlet_noise"] = {
                    "use_MCTS": True,
                    "with_exploration_noise": True,
                    "deterministic": True
                }
                config["policies"]["MuZeroMCTS_without_dirichlet_noise"] = {
                    "use_MCTS": True,
                    "with_exploration_noise": False,
                    "deterministic": False
                }
                config["policies"]["MuZeroMCTSGreedy_without_dirichlet_noise"] = {
                    "use_MCTS": True,
                    "with_exploration_noise": False,
                    "deterministic": True
                }
                
                config_name = "run_id_{}_step_{}_num_episodes_{}.yaml".format(
                    run_id, 
                    model_step,
                    num_episodes)
                
                config_path = os.path.join(config_folder_path, config_name)
                
                with open(config_path, "w") as f:
                    yaml.dump(config, f, sort_keys=False, default_flow_style=False)
                
                if update_config:
                    if host == "delftblue":
                        success = os.system(f"rsync {config_path} delftblue:EfficientZero/{config_folder_path}") == 0
                        if not success:
                            sys.exit(0)
                    elif host == "cluster":
                        success = os.system(f"rsync {config_path} cluster:EfficientZero/{config_folder_path}") == 0
                        if not success:
                            sys.exit(0)
                
                python_command = f"python3 -m policy_evaluation.test_policies policy_evaluation/configs/{test_name}/{config_name}"

                job_name = f"test_policies_{run_id}_{model_step}"

                resources = f"--cpus={cpus} --mem={mem} --hours={hours} --minutes={minutes} --job_name={job_name}"
                if gpus > 0:
                    resources += f" --gpus={gpus}"
                if enable_log:
                    resources += " --enable_log"
                
                if host == "local":
                    prefix = ""
                elif host == "cluster":
                    prefix = f"./run_cluster_pc {resources} "
                elif host == "delftblue":
                    prefix = f"./run_delftblue {resources} "
                else:
                    raise ValueError("host {} not supported".format(host))

                command = prefix + python_command
                print(command)
                commands.append(command)
        
        for command in commands:
            if mode == "test_first":
                os.system(command)
                break
            elif mode == "normal":
                os.system(command)
            elif mode == "dry_run":
                pass
            else:
                raise ValueError("mode {} not supported".format(mode))

    def test_self_value_prediction_error(
            self, 
            env_name, 
            exp_name,
            model_steps,
            n_states, 
            n_rollouts_per_state, 
            horizon, 
            host="local", 
            mode="test_first", 
            update_config=False,
            enable_log=False,
            hours=1,
            minutes=0,
            cpus=2,
            mem=1024,
            gpus=0,
            start_seed=0):

        test_name = "test_self_prediction_error"
        config_folder_path = f"policy_evaluation/configs/{test_name}/"
        os.makedirs(config_folder_path, exist_ok=True)

        run_ids = get_run_ids_by_exp_name(exp_name)

        commands = []
        for i, run_id in enumerate(run_ids):
            for model_step in model_steps:
                config = {}
                config["env_name"] = env_name
                config["run_id"] = run_id
                config["evaluation_horizon"] = horizon
                config["num_states_to_sample"] = n_states
                action_sampling = {
                    "method": "sample",
                    "noise_rate": 0.0,
                    "N_rollouts_per_state": n_rollouts_per_state
                }
                config["action_sampling"] = action_sampling
                config["seed"] = i+start_seed
                evaluations = [
                    {
                        "model_step": model_step,
                        "policy_step": model_step,
                        "state_distribution": "model"
                    }
                ]
                config["evaluations"] = evaluations
                config_name = "run_id_{}_step_{}_N_states_{}_N_rollouts_{}_horizon_{}.yaml".format(
                    run_id, 
                    model_step,
                    n_states,
                    n_rollouts_per_state,
                    horizon)
                config_path = os.path.join(config_folder_path, config_name)
                
                with open(config_path, "w") as f:
                    yaml.dump(config, f, sort_keys=False, default_flow_style=False)

                if update_config:
                    if host == "delftblue":
                        success = os.system(f"rsync {config_path} delftblue:EfficientZero/{config_folder_path}") == 0
                        if not success:
                            sys.exit(0)
                    elif host == "cluster":
                        success = os.system(f"rsync {config_path} cluster:EfficientZero/{config_folder_path}") == 0
                        if not success:
                            sys.exit(0)

                python_command = f"python3 -m policy_evaluation.test_value_prediction_error policy_evaluation/configs/{test_name}/{config_name}"

                job_name = f"SVPE_{run_id}_{model_step}"

                resources = f"--cpus={cpus} --mem={mem} --hours={hours} --minutes={minutes} --job_name={job_name}"
                if gpus > 0:
                    resources += f" --gpus={gpus}"
                if enable_log:
                    resources += " --enable_log"
                
                if host == "local":
                    prefix = ""
                elif host == "cluster":
                    prefix = f"./run_cluster_pc {resources} "
                elif host == "delftblue":
                    prefix = f"./run_delftblue {resources} "
                else:
                    raise ValueError("host {} not supported".format(host))
                
                command = prefix + python_command
                print(command)
                commands.append(command)

        for command in commands:
            if mode == "test_first":
                os.system(command)
                break
            elif mode == "normal":
                os.system(command)
            elif mode == "dry_run":
                pass
            else:
                raise ValueError("mode {} not supported".format(mode))
            
    def test_value_prediction_error_enumerate(
            self,
            env_name, 
            exp_name,
            model_steps,
            n_states, 
            horizon, 
            host="local", 
            mode="test_first", 
            update_config=False,
            enable_log=False,
            hours=1,
            minutes=0,
            cpus=2,
            mem=1024,
            gpus=0,
            start_seed=0):
        
        test_name = "test_value_prediction_error_enumerate"
        config_folder_path = f"policy_evaluation/configs/{test_name}/"
        os.makedirs(config_folder_path, exist_ok=True)

        run_ids = get_run_ids_by_exp_name(exp_name)

        commands = []
        for i, run_id in enumerate(run_ids):
            for model_step in model_steps:
                config = {}
                config["env_name"] = env_name
                config["run_id"] = run_id
                config["evaluation_horizon"] = horizon
                config["num_states_to_sample"] = n_states
                action_sampling = {
                    "method": "enumerate2"
                }
                config["action_sampling"] = action_sampling
                config["seed"] = i+start_seed
                evaluations = [
                    {
                        "model_step": model_step,
                        "policy_step": model_step,
                        "state_distribution": "model"
                    }
                ]
                config["evaluations"] = evaluations
                config_name = "run_id_{}_step_{}_N_states_{}_horizon_{}.yaml".format(
                    run_id, 
                    model_step,
                    n_states,
                    horizon)
                config_path = os.path.join(config_folder_path, config_name)
                
                with open(config_path, "w") as f:
                    yaml.dump(config, f, sort_keys=False, default_flow_style=False)
                
                if update_config:
                    if host == "delftblue":
                        success = os.system(f"rsync {config_path} delftblue:EfficientZero/{config_folder_path}") == 0
                        if not success:
                            sys.exit(0)
                    elif host == "cluster":
                        success = os.system(f"rsync {config_path} cluster:EfficientZero/{config_folder_path}") == 0
                        if not success:
                            sys.exit(0)

                python_command = f"python3 -m policy_evaluation.test_value_prediction_error policy_evaluation/configs/{test_name}/{config_name}"

                job_name = f"VPEE_{run_id}_{model_step}"

                resources = f"--cpus={cpus} --mem={mem} --hours={hours} --minutes={minutes} --job_name={job_name}"
                if gpus > 0:
                    resources += f" --gpus={gpus}"
                if enable_log:
                    resources += " --enable_log"
                
                if host == "local":
                    prefix = ""
                elif host == "cluster":
                    prefix = f"./run_cluster_pc {resources} "
                elif host == "delftblue":
                    prefix = f"./run_delftblue {resources} "
                else:
                    raise ValueError("host {} not supported".format(host))
                
                command = prefix + python_command
                print(command)
                commands.append(command)

        for command in commands:
            if mode == "test_first":
                os.system(command)
                break
            elif mode == "normal":
                os.system(command)
            elif mode == "dry_run":
                pass
            else:
                raise ValueError("mode {} not supported".format(mode))
            
    def test_cross_model_policy_value_prediction_error(
            self, 
            env_name,
            exp_name, 
            model_steps,
            policy_steps,
            state_distribution,
            n_states, 
            n_rollouts_per_state, 
            horizon, 
            host="local", 
            mode="test_first", 
            update_config=False,
            enable_log=False,
            hours=1,
            minutes=0,
            cpus=2,
            mem=1024,
            gpus=0,
            start_seed=0):
        
        test_name = "test_cross_model_policy_value_prediction_error"
        config_folder_path = f"policy_evaluation/configs/{test_name}/"
        os.makedirs(config_folder_path, exist_ok=True)

        run_ids = get_run_ids_by_exp_name(exp_name)

        commands = []
        print(len(run_ids))
        for i, run_id in enumerate(run_ids):
            for model_step in model_steps:
                for policy_step in policy_steps:
                    config = {}
                    config["env_name"] = env_name
                    config["run_id"] = run_id
                    config["evaluation_horizon"] = horizon
                    config["num_states_to_sample"] = n_states
                    action_sampling = {
                        "method": "sample",
                        "noise_rate": 0.0,
                        "N_rollouts_per_state": n_rollouts_per_state
                    }
                    config["action_sampling"] = action_sampling
                    config["seed"] = i+start_seed
                    evaluations = [
                        {
                            "model_step": model_step,
                            "policy_step": policy_step,
                            "state_distribution": state_distribution
                        }
                    ]
                    config["evaluations"] = evaluations
                    config_name = "run_id_{}_model_step_{}_policy_step_{}_N_states_{}_N_rollouts_{}_horizon_{}_sd_{}.yaml".format(
                        run_id, 
                        model_step,
                        policy_step,
                        n_states,
                        n_rollouts_per_state,
                        horizon,
                        state_distribution)
                    config_path = os.path.join(config_folder_path, config_name)
                    
                    with open(config_path, "w") as f:
                        yaml.dump(config, f, sort_keys=False, default_flow_style=False)

                    python_command = f"python3 -m policy_evaluation.test_value_prediction_error policy_evaluation/configs/{test_name}/{config_name}"

                    job_name = f"VPE_{run_id}_{model_step}_{policy_step}_{state_distribution}"

                    resources = f"--cpus={cpus} --mem={mem} --hours={hours} --minutes={minutes} --job_name={job_name}"
                    if gpus > 0:
                        resources += f" --gpus={gpus}"
                    if enable_log:
                        resources += " --enable_log"
                    
                    if host == "local":
                        prefix = ""
                    elif host == "cluster":
                        prefix = f"./run_cluster_pc {resources} "
                    elif host == "delftblue":
                        prefix = f"./run_delftblue {resources} "
                    else:
                        raise ValueError("host {} not supported".format(host))
                    
                    command = prefix + python_command
                    print(command)
                    commands.append(command)

                    if update_config:
                        if host == "delftblue":
                            success = os.system(f"rsync {config_path} delftblue:EfficientZero/{config_folder_path}") == 0
                            if not success:
                                sys.exit(0)
                        elif host == "cluster":
                            success = os.system(f"rsync {config_path} cluster:EfficientZero/{config_folder_path}") == 0
                            if not success:
                                sys.exit(0)

        for command in commands:
            if mode == "test_first":
                os.system(command)
                break
            elif mode == "normal":
                os.system(command)
            elif mode == "dry_run":
                pass
            else:
                raise ValueError("mode {} not supported".format(mode))
    
    def test_policy_improvement(
            self, 
            env_name,
            exp_name, 
            model_steps,
            num_episodes,
            horizons,
            sims, 
            temperature,
            host="local", 
            mode="test_first", 
            update_config=False,
            enable_log=False,
            hours=1,
            minutes=0,
            cpus=2,
            mem=1024,
            gpus=0,
            start_seed=0):
        
        test_name = "test_policy_improvement"
        config_folder_path = f"policy_evaluation/configs/{test_name}/"
        os.makedirs(config_folder_path, exist_ok=True)

        run_ids = get_run_ids_by_exp_name(exp_name)

        commands = []
        for i, run_id in enumerate(run_ids):
            for model_step in model_steps:
                config = {}
                config["env_name"] = env_name
                config["run_id"] = run_id
                config["seed"] = i+start_seed
                config["model_steps"] = [model_step]
                config["num_episodes"] = num_episodes
                config["policies"] = {}
                config["policies"]["MuZeroRaw"] = {
                    "use_MCTS": False,
                    "deterministic": False
                }

                for horizon in horizons:
                    for sim in sims:
                        config["policies"][f"MuZeroMCTS_with_uniform_prior_sim_{sim}_horizon_{horizon}_temperature_{temperature}"] = {
                            "use_MCTS": True,
                            "deterministic": False,
                            "with_exploration_noise": True,
                            "rollout_horizon": horizon,
                            "temperature": temperature,
                            "num_simulations": sim,
                            "use_true_model": False,
                            "use_uniform_prior": True,
                        }
                        config["policies"][f"MuZeroMCTS_without_uniform_prior_sim_{sim}_horizon_{horizon}_temperature_{temperature}"] = {
                            "use_MCTS": True,
                            "deterministic": False,
                            "rollout_horizon": horizon,
                            "with_exploration_noise": True,
                            "temperature": temperature,
                            "num_simulations": sim,
                            "use_true_model": False,
                            "use_uniform_prior": False,
                        }
                        config["policies"][f"TrueMCTS_with_uniform_prior_sim_{sim}_horizon_{horizon}_temperature_{temperature}"] = {
                            "use_MCTS": True,
                            "deterministic": False,
                            "rollout_horizon": horizon,
                            "with_exploration_noise": True,
                            "temperature": temperature,
                            "num_simulations": sim,
                            "use_true_model": True,
                            "use_uniform_prior": True,
                        }
                        config["policies"][f"TrueMCTS_without_uniform_prior_sim_{sim}_horizon_{horizon}_temperature_{temperature}"] = {
                            "use_MCTS": True,
                            "deterministic": False,
                            "with_exploration_noise": True,
                            "rollout_horizon": horizon,
                            "temperature": temperature,
                            "num_simulations": sim,
                            "use_true_model": True,
                            "use_uniform_prior": False,
                        }

                config_name = "run_id_{}_step_{}_num_episodes_{}_temperature_{}_horizons_{}_sims_{}.yaml".format(
                    run_id, 
                    model_step,
                    num_episodes,
                    temperature,
                    ",".join([str(x) for x in horizons]),
                    ",".join([str(x) for x in sims])
                )
                
                config_path = os.path.join(config_folder_path, config_name)
                with open(config_path, "w") as f:
                    yaml.dump(config, f, sort_keys=False, default_flow_style=False)

                if update_config:
                    if host == "delftblue":
                        success = os.system(f"rsync {config_path} delftblue:EfficientZero/{config_folder_path}") == 0
                        if not success:
                            sys.exit(0)
                    elif host == "cluster":
                        success = os.system(f"rsync {config_path} cluster:EfficientZero/{config_folder_path}") == 0
                        if not success:
                            sys.exit(0)
                    
                python_command = "python3 -m policy_evaluation.test_policies policy_evaluation/configs/{}/{}".format(test_name, config_name)

                job_name = f"PI_{run_id}_{model_step}"

                resources = f"--cpus={cpus} --mem={mem} --hours={hours} --minutes={minutes} --job_name={job_name}"
                if gpus > 0:
                    resources += f" --gpus={gpus}"
                if enable_log:
                    resources += " --enable_log"
                
                if host == "local":
                    prefix = ""
                elif host == "cluster":
                    prefix = f"./run_cluster_pc {resources} "
                elif host == "delftblue":
                    prefix = f"./run_delftblue {resources} "
                else:
                    raise ValueError("host {} not supported".format(host))
                
                command = prefix + python_command
                print(command)
                commands.append(command)

        for command in commands:
            if mode == "test_first":
                os.system(command)
                break
            elif mode == "normal":
                os.system(command)
            elif mode == "dry_run":
                pass
            else:
                raise ValueError("mode {} not supported".format(mode))
            
    def test_policy_improvement_c1(
            self, 
            env_name,
            exp_name, 
            model_steps,
            num_episodes,
            horizons,
            sims, 
            pb_c_inits,
            temperature,
            host="local", 
            mode="test_first", 
            update_config=False,
            enable_log=False,
            hours=1,
            minutes=0,
            cpus=2,
            mem=1024,
            gpus=0):
        
        test_name = "test_policy_improvement_c1"
        config_folder_path = f"policy_evaluation/configs/{test_name}/"
        os.makedirs(config_folder_path, exist_ok=True)

        run_ids = get_run_ids_by_exp_name(exp_name)

        commands = []
        for i, run_id in enumerate(run_ids):
            for model_step in model_steps:
                for pb_c_init in pb_c_inits:
                    config = {}
                    config["env_name"] = env_name
                    config["run_id"] = run_id
                    config["seed"] = i
                    config["model_steps"] = [model_step]
                    config["num_episodes"] = num_episodes
                    config["policies"] = {}
                    config["policies"]["MuZeroRaw"] = {
                        "use_MCTS": False,
                        "deterministic": False
                    }
                    for horizon in horizons:
                        for sim in sims:
                            config["policies"][f"MuZeroMCTS_without_uniform_prior_sim_{sim}_horizon_{horizon}_temperature_{temperature}_c1_{pb_c_init}"] = {
                                "use_MCTS": True,
                                "deterministic": False,
                                "rollout_horizon": horizon,
                                "with_exploration_noise": True,
                                "temperature": temperature,
                                "num_simulations": sim,
                                "use_true_model": False,
                                "use_uniform_prior": False,
                                "pb_c_init": pb_c_init
                            }
                            config["policies"][f"TrueMCTS_without_uniform_prior_sim_{sim}_horizon_{horizon}_temperature_{temperature}_c1_{pb_c_init}"] = {
                                "use_MCTS": True,
                                "deterministic": False,
                                "with_exploration_noise": True,
                                "rollout_horizon": horizon,
                                "temperature": temperature,
                                "num_simulations": sim,
                                "use_true_model": True,
                                "use_uniform_prior": False,
                                "pb_c_init": pb_c_init
                            }
                            config["policies"][f"MuZeroMCTS_with_uniform_prior_sim_{sim}_horizon_{horizon}_temperature_{temperature}_c1_{pb_c_init}"] = {
                                "use_MCTS": True,
                                "deterministic": False,
                                "rollout_horizon": horizon,
                                "with_exploration_noise": True,
                                "temperature": temperature,
                                "num_simulations": sim,
                                "use_true_model": False,
                                "use_uniform_prior": True,
                                "pb_c_init": pb_c_init
                            }
                            config["policies"][f"TrueMCTS_with_uniform_prior_sim_{sim}_horizon_{horizon}_temperature_{temperature}_c1_{pb_c_init}"] = {
                                "use_MCTS": True,
                                "deterministic": False,
                                "with_exploration_noise": True,
                                "rollout_horizon": horizon,
                                "temperature": temperature,
                                "num_simulations": sim,
                                "use_true_model": True,
                                "use_uniform_prior": True,
                                "pb_c_init": pb_c_init
                            }

                    config_name = "run_id_{}_step_{}_num_episodes_{}_temperature_{}_horizons_{}_sims_{}_pbcinit_{}.yaml".format(
                        run_id, 
                        model_step,
                        num_episodes,
                        temperature,
                        ",".join([str(x) for x in horizons]),
                        ",".join([str(x) for x in sims]),
                        pb_c_init
                    )
                
                    config_path = os.path.join(config_folder_path, config_name)
                    with open(config_path, "w") as f:
                        yaml.dump(config, f, sort_keys=False, default_flow_style=False)
                    
                    python_command = "python3 -m policy_evaluation.test_policies policy_evaluation/configs/{}/{}".format(test_name, config_name)

                    job_name = f"PIc_{run_id}_{model_step}_{pb_c_init}"

                    resources = f"--cpus={cpus} --mem={mem} --hours={hours} --minutes={minutes} --job_name={job_name}"
                    if gpus > 0:
                        resources += f" --gpus={gpus}"
                    if enable_log:
                        resources += " --enable_log"
                    
                    if host == "local":
                        prefix = ""
                    elif host == "cluster":
                        prefix = f"./run_cluster_pc {resources} "
                    elif host == "delftblue":
                        prefix = f"./run_delftblue {resources} "
                    else:
                        raise ValueError("host {} not supported".format(host))
                    
                    command = prefix + python_command
                    print(command)
                    commands.append(command)

                    if update_config:
                        if host == "delftblue":
                            success = os.system(f"rsync {config_path} delftblue:EfficientZero/{config_folder_path}") == 0
                            if not success:
                                sys.exit(0)
                        elif host == "cluster":
                            success = os.system(f"rsync {config_path} cluster:EfficientZero/{config_folder_path}") == 0
                            if not success:
                                sys.exit(0)

                    if mode == "test_first":
                        os.system(command)
                        break
                    elif mode == "normal":
                        os.system(command)
                    elif mode == "dry_run":
                        pass
                    else:
                        raise ValueError("mode {} not supported".format(mode))
            
                if mode == "test_first":
                    break

            if mode == "test_first":
                    break
            
    def test_policy_improvement_log_prob_error(
            self, 
            env_name,
            exp_name, 
            model_steps,
            num_episodes,
            horizons,
            sims, 
            pb_c_inits,
            temperature,
            host="local", 
            mode="test_first", 
            update_config=False,
            enable_log=False,
            hours=1,
            minutes=0,
            partition="mmll,insy,influence,general,st",
            cpus=2,
            mem=1024,
            gpus=0):
        
        test_name = "test_policy_improvement_log_prob_error"
        config_folder_path = f"policy_evaluation/configs/{test_name}/"
        os.makedirs(config_folder_path, exist_ok=True)

        run_ids = get_run_ids_by_exp_name(exp_name)

        commands = []
        for i, run_id in enumerate(run_ids):
            for model_step in model_steps:
                for pb_c_init in pb_c_inits:
                    config = {}
                    config["env_name"] = env_name
                    config["run_id"] = run_id
                    config["seed"] = i
                    config["model_steps"] = [model_step]
                    config["num_episodes"] = num_episodes
                    config["policies"] = {}
                    config["log_prob_error"] = True
                    for horizon in horizons:
                        for sim in sims:
                            config["policies"][f"MuZeroMCTS_without_uniform_prior_sim_{sim}_horizon_{horizon}_temperature_{temperature}_c1_{pb_c_init}_log_prob_error_fixed"] = {
                                "use_MCTS": True,
                                "deterministic": False,
                                "rollout_horizon": horizon,
                                "with_exploration_noise": False,
                                "temperature": temperature,
                                "num_simulations": sim,
                                "use_true_model": False,
                                "use_uniform_prior": False,
                                "pb_c_init": pb_c_init,
                                "log_prob_error": True
                            }
                            config["policies"][f"MuZeroMCTS_with_uniform_prior_sim_{sim}_horizon_{horizon}_temperature_{temperature}_c1_{pb_c_init}_log_prob_error_fixed"] = {
                                "use_MCTS": True,
                                "deterministic": False,
                                "rollout_horizon": horizon,
                                "with_exploration_noise": False,
                                "temperature": temperature,
                                "num_simulations": sim,
                                "use_true_model": False,
                                "use_uniform_prior": True,
                                "pb_c_init": pb_c_init,
                                "log_prob_error": True
                            }

                    config_name = "run_id_{}_step_{}_num_episodes_{}_temperature_{}_horizons_{}_sims_{}_pbcinit_{}_log_prob_error_fixed.yaml".format(
                        run_id, 
                        model_step,
                        num_episodes,
                        temperature,
                        ",".join([str(x) for x in horizons]),
                        ",".join([str(x) for x in sims]),
                        pb_c_init
                    )
                
                    config_path = os.path.join(config_folder_path, config_name)
                    with open(config_path, "w") as f:
                        yaml.dump(config, f, sort_keys=False, default_flow_style=False)

                    if update_config:
                        if host == "delftblue":
                            success = os.system(f"rsync {config_path} delftblue:muzero-general/{config_folder_path}") == 0
                            if not success:
                                sys.exit(0)
                        elif host == "cluster":
                            success = os.system(f"rsync {config_path} cluster:muzero-general/{config_folder_path}") == 0
                            if not success:
                                sys.exit(0)
                    
                    python_command = "python3 -m policy_evaluation.test_policies policy_evaluation/configs/{}/{}".format(test_name, config_name)

                    job_name = f"PIerror_{run_id}_{model_step}_{pb_c_init}"

                    resources = f"--cpus={cpus} --mem={mem} --hours={hours} --minutes={minutes} --job_name={job_name} --partition={partition}"
                    if gpus > 0:
                        resources += f" --gpus={gpus}"
                    if enable_log:
                        resources += " --enable_log"
                    
                    if host == "local":
                        prefix = ""
                    elif host == "cluster":
                        prefix = f"./run_cluster_pc {resources} "
                    elif host == "delftblue":
                        prefix = f"./run_delftblue {resources} "
                    else:
                        raise ValueError("host {} not supported".format(host))
                    
                    command = prefix + python_command
                    print(command)
                    commands.append(command)

                    if mode == "test_first":
                        os.system(command)
                        break
                    elif mode == "normal":
                        os.system(command)
                    elif mode == "dry_run":
                        pass
                    else:
                        raise ValueError("mode {} not supported".format(mode))
            
                if mode == "test_first":
                    break

            if mode == "test_first":
                    break

if __name__ == '__main__':
    fire.Fire(Experimenter)