{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "### Import cell\n", "import os \n", "import os.path as pt\n", "from os import path\n", "\n", "import numpy as np\n", "import pandas as pd\n", "import math\n", "\n", "import array\n", "import random\n", "from datetime import datetime, timedelta\n", "import shutil" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Unnamed: 0StationDateMeasurement
00TA000962020-01-01 03:00:000
11TA000962020-01-01 06:00:000
22TA000962020-01-01 09:00:000
33TA000962020-01-01 12:00:000
44TA000962020-01-01 15:00:000
...............
111221111221TA006872020-12-31 09:00:000
111222111222TA006872020-12-31 12:00:000
111223111223TA006872020-12-31 15:00:000
111224111224TA006872020-12-31 18:00:000
111225111225TA006872020-12-31 21:00:000
\n", "

111226 rows × 4 columns

\n", "
" ], "text/plain": [ " Unnamed: 0 Station Date Measurement\n", "0 0 TA00096 2020-01-01 03:00:00 0\n", "1 1 TA00096 2020-01-01 06:00:00 0\n", "2 2 TA00096 2020-01-01 09:00:00 0\n", "3 3 TA00096 2020-01-01 12:00:00 0\n", "4 4 TA00096 2020-01-01 15:00:00 0\n", "... ... ... ... ...\n", "111221 111221 TA00687 2020-12-31 09:00:00 0\n", "111222 111222 TA00687 2020-12-31 12:00:00 0\n", "111223 111223 TA00687 2020-12-31 15:00:00 0\n", "111224 111224 TA00687 2020-12-31 18:00:00 0\n", "111225 111225 TA00687 2020-12-31 21:00:00 0\n", "\n", "[111226 rows x 4 columns]" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data2019 = pd.read_csv('./Output_files/samples_2020.csv')\n", "data2019" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Unnamed: 0StationDateMeasurement
7609776097TA004572020-12-31 09:00:000
7609876098TA004572020-12-31 12:00:000
7609976099TA004572020-12-31 15:00:000
7610076100TA004572020-12-31 18:00:000
7610176101TA004572020-12-31 21:00:000
\n", "
" ], "text/plain": [ " Unnamed: 0 Station Date Measurement\n", "76097 76097 TA00457 2020-12-31 09:00:00 0\n", "76098 76098 TA00457 2020-12-31 12:00:00 0\n", "76099 76099 TA00457 2020-12-31 15:00:00 0\n", "76100 76100 TA00457 2020-12-31 18:00:00 0\n", "76101 76101 TA00457 2020-12-31 21:00:00 0" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data2019.loc[(data2019['Station']=='TA00457')].tail()" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "import time\n", "from multiprocessing import cpu_count\n", "from multiprocessing.pool import ThreadPool\n", "import warnings\n", "warnings.filterwarnings(\"ignore\")\n", "\n", "def sequence_date(iterable):\n", " date, output_parent_dir, msg_parent_dir = iterable\n", "\n", " data = pd.read_csv('./Output_files/samples_' + str(date.year) + '.csv', parse_dates=['Date'])\n", "\n", " date_start = pd.Timestamp(date) # Starting at the time of the image\n", " date_end = pd.Timestamp(date) + timedelta(hours=3) # Finishing2h 45 min later\n", " \n", " #1. Extract all stations that took data in a day and create folders\n", " len_sts_unique = len(data.loc[data['Date']==date, 'Station'].unique())\n", "\n", " print('DATE:', date)\n", " print('stations:', data.loc[data['Date']==date, 'Station'].unique())\n", " \n", " for ix, st in enumerate(data.loc[data['Date']==date, 'Station'].unique()):\n", " name_folder = st + '_' + str(date_start.strftime('%Y.%m.%d_%H'))\n", " \n", " # MAKING THE FOLDERS TO STORE THE IMAGES\n", " # if (pt.exists(output_parent_dir + name_folder) == 0): os.mkdir(output_parent_dir + name_folder)\n", " \n", " # POPULATE EACH FOLDER WITH THE RESPECTIVE FILE\n", " \n", " # # Timestamps of all 15 min files\n", " # for minu in np.arange(12):\n", " # timestamp = date_start + timedelta(minutes=int(minu*15))\n", " \n", " # # Convert that datetime to the format in the filenames of the MSG images\n", " # str_search = str(timestamp.strftime('%Y%m%d_%H'))\n", " \n", " # dir_st = msg_parent_dir+st\n", " # print('dir_st1:', dir_st)\n", " # directory = os.listdir(dir_st)\n", " # print('dir_st2:', dir_st)\n", " # ## Look for the files that have that string and keep them in a folder (different per hour)\n", " # for fname in directory:\n", " # if str_search in fname:\n", " # print('fname', fname)\n", " # #Copy the file to an output directory\n", " # shutil.copy(dir_st+'/'+fname, output_parent_dir + name_folder)\n", "\n", "\n", "def sequences_parallel(iterables):\n", " print('Running in parallel')\n", " cpus = cpu_count()\n", " results = ThreadPool(cpus - 1).imap_unordered(sequence_date, iterables, chunksize=100)\n", "\n", "def sequences_create(year, channel):\n", " data = pd.read_csv('./Output_files/samples_' + str(year) + '.csv', parse_dates=['Date'])\n", "\n", " # Directory where the MSG images are (from here, they are divided in subfolders per station)\n", " output_parent_dir = r'Q:/InputData/SAVANNA-MSG-32pix-' +str(year) + '-' + channel + '/'\n", " if (pt.exists(output_parent_dir) == 0): os.mkdir(output_parent_dir)\n", " msg_parent_dir = r'Q:/' + str(year) + '-MSG-32pix-' + channel + '/'\n", "\n", " iterables = pd.DataFrame()\n", " iterables['dates'] = data['Date'].unique()\n", " iterables['output_parent_dir'] = [output_parent_dir] * len(iterables)\n", " iterables['msg_parent_dir'] = [msg_parent_dir] * len(iterables)\n", " \n", " sequences_parallel(iterables.to_numpy())" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "YEAR: 2020\n", "CHANNEL: WV_073\n", "\t FINISHED CHANNEL WV_073\n", " -------------------------\n", "\t FINISHED YEAR 2020\n", " -------------------------\n" ] } ], "source": [ "for year in [2020]:\n", " print('YEAR: ', year)\n", " for channel in ['WV_073']:\n", " print('CHANNEL:', channel)\n", " \n", " data = pd.read_csv('./Output_files/samples_' + str(year) + '.csv', parse_dates=['Date'])\n", " msg_parent_dir = r'E:/InputData/' + str(year) + '-MSG-32pix-' + channel + '/'\n", " output_parent_dir = r'E:/InputData/SAVANNA-MSG-32pix-' + str(year) + '-' + channel + '/'\n", " \n", " if (pt.exists(output_parent_dir) == 0): os.mkdir(output_parent_dir)\n", " \n", " for ix, row in data.iterrows():\n", " if ix>76100:\n", " \n", " st = row.Station\n", " date_start = row.Date\n", "\n", " # The folder from which we will take the images\n", " source_dir = msg_parent_dir + st \n", " if(pt.exists(source_dir)):\n", " # Folder where the images will be stored - one per row (identified with sequence and station). \n", " name_folder = st + '_' + str(date_start.strftime('%Y.%m.%d_%H'))\n", " target_dir = output_parent_dir + name_folder \n", " if (pt.exists(target_dir) == 0): \n", " #if the sequence has not been processed yet, i.e. there is no folder, make the folder and take the files\n", " os.mkdir(target_dir)\n", "\n", " # Locate all the files in the source directory and copy them to the target directory\n", "\n", " for hrs in np.arange(3):\n", " timestamp = date_start + timedelta(hours=int(hrs))\n", " # Convert that datetime to the format in the filenames of the MSG images\n", " str_search = str(timestamp.strftime('%Y%m%d_%H'))\n", "\n", " ## Look for the files that have that string and keep them in a folder (different per hour)\n", " for fname in os.listdir(source_dir):\n", " if str_search in fname:\n", " #Copy the file to an output directory\n", " shutil.copy(source_dir+'/'+fname, target_dir)\n", " \n", " print(f'\\t FINISHED CHANNEL {channel}\\n -------------------------')\n", " \n", " print(f'\\t FINISHED YEAR {year}\\n -------------------------')" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.1" } }, "nbformat": 4, "nbformat_minor": 4 }