#!/bin/bash

# Input parameters
INPUT_PBF="../data/raw/osm/europe-190101.osm.pbf"    # Your source PBF file
#INPUT_PBF="../data/raw/osm/canary-islands-latest.osm.pbf" # Canary Islands
POLY_ROOT_DIR="../data/processed/perimeter"     # Directory containing country subdirectories
FRONTIERS_DIR="../data/processed/frontiers"         # Where to save the frontiers
BUILDINGS_DIR="../data/processed/buildings"         # Where to save the buildings


# Function to extract and filter fields
filter_fields() {
    local geojson_file=$1        # Input GeoJSON file
    local requested_fields=$2    # Comma-separated list of requested fields

    # Extract all unique keys from the "properties" object across all rows
    available_fields=$(jq -r 'select(.properties != null) | .properties | keys[]' "$geojson_file" | sort -u)

    # Filter the requested fields to include only those present in the data
    local filtered_fields=""
    IFS=',' read -ra FIELDS <<< "$requested_fields"
    for field in "${FIELDS[@]}"; do
        if grep -q "^${field}$" <<< "$available_fields"; then
            filtered_fields+="${field},"
        fi
    done

    # Remove trailing comma, if any
    filtered_fields=${filtered_fields%,}

    # Return the filtered fields
    echo "$filtered_fields"
}

for country_path in "$POLY_ROOT_DIR"/* ; do
    if [ ! -d "$country_path" ]; then
        continue
    fi
    
    country_dir=$(basename "$country_path")
    echo "Processing country: $country_dir"
    
    frontiers_out="$FRONTIERS_DIR/$country_dir"
    buildings_out="$BUILDINGS_DIR/$country_dir"

    # Create country directory in output
    mkdir -p "$frontiers_out"
    mkdir -p "$buildings_out"
    
    # Process all .poly files in the country directory
    find "$country_path" -maxdepth 1 -name "*.poly" | while read poly_file; do
        # Get the base name without extension (e.g., DEU53_0)
        base_name=$(basename "$poly_file" .poly)
        # If the output files already exist, skip
        if [ -f "$buildings_out/${base_name}_buildings.gpkg" ]; then
            echo "Skipping: $base_name"
            continue
        fi
  
        temp_file="$frontiers_out/${base_name}_temp.pbf"
        
        echo "Processing: $base_name"
        # If initial extract does not exist, create it
        if [ ! -f "$temp_file" ]; then
            echo "Creating spatial extract..."
            osmium extract --strategy complete_ways -p "$poly_file" "$INPUT_PBF" -o "$temp_file"
        fi
        
        echo "Filtering highways..."    
        osmium tags-filter -t "$temp_file" \
            highway=motorway,motorway_junction,motorway_link,trunk,trunk_link,primary,primary_link \
            railway=light_rail,rail \
            waterway=river,canal,fairway,rapids \
            natural=bay,strait,water,wetland \
            -o "$frontiers_out/${base_name}.pbf"

        
        #  Then export to separate GeoJSON files by geometry
        osmium export --geometry-types=polygon,multipolygon --output-format geojsonseq\
            -o "$frontiers_out/${base_name}_poly.geojson" \
            "$frontiers_out/${base_name}.pbf"

        osmium export --geometry-types=linestring --output-format geojsonseq\
            -o "$frontiers_out/${base_name}_line.geojson" \
            "$frontiers_out/${base_name}.pbf"
        
        rm "$frontiers_out/${base_name}.pbf"

        echo "Filtering buildings..."
        osmium tags-filter -t "$temp_file" building=* -o "$buildings_out/${base_name}_buildings.pbf"

        # Remove temporary file
       rm "$temp_file"

        # Then export to GeoJSON
        osmium export --geometry-types=polygon,multipolygon --output-format geojsonseq \
            -o "$buildings_out/${base_name}_buildings.geojson" \
            "$buildings_out/${base_name}_buildings.pbf"

        rm "$buildings_out/${base_name}_buildings.pbf"

        # Fields for frontiers
        fields_frontiers="highway,waterway,railway,natural"
        filtered_fields_frontiers_poly=$(filter_fields "$frontiers_out/${base_name}_poly.geojson" "$fields_frontiers")

        # Check if filtered_fields_frontiers_poly is not empty, if not empty, export to GPKG
        if [ ! -z "$filtered_fields_frontiers_poly" ]; then
            echo "Exporting polygons frontiers to GPKG..."
            ogr2ogr -f "GPKG" "$frontiers_out/${base_name}_poly.gpkg" "$frontiers_out/${base_name}_poly.geojson" \
            -select $filtered_fields_frontiers_poly \
            -t_srs EPSG:3035 
        fi

        # Fields for frontiers
        filtered_fields_frontiers_line=$(filter_fields "$frontiers_out/${base_name}_line.geojson" "$fields_frontiers")
        if [ ! -z "$filtered_fields_frontiers_line" ]; then
            echo "Exporting lines frontiers to GPKG..."
            ogr2ogr -f "GPKG" "$frontiers_out/${base_name}_line.gpkg" "$frontiers_out/${base_name}_line.geojson" \
            -select $filtered_fields_frontiers_line \
            -t_srs EPSG:3035 
        fi

        # Removing temporary file
        rm "$frontiers_out/${base_name}_poly.geojson" "$frontiers_out/${base_name}_line.geojson"

        # Fields for buildings
        fields_buildings="building,start_date,construction:date,built"
        filtered_fields_buildings=$(filter_fields "$buildings_out/${base_name}_buildings.geojson" "$fields_buildings")

        echo "$filtered_fields_buildings"
        echo "Exporting polygons buildings to GPKG..."
        ogr2ogr -f "GPKG" "$buildings_out/${base_name}_buildings.gpkg" "$buildings_out/${base_name}_buildings.geojson" \
        -select $filtered_fields_buildings \
        -t_srs EPSG:3035

        # Removing temporary file
        rm "$buildings_out/${base_name}_buildings.geojson"

        echo "Completed: $base_name"
        echo "------------------------"
    done
done

echo "All extracts completed!"

# Print summary
echo "Summary of created extracts:"
for country_dir in "$FRONTIERS_DIR"/*/ ; do
    count=$(ls "$country_dir"*.pbf 2>/dev/null | wc -l)
    echo "$country_dir: $count extracts"
done