{
 "cells": [
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Data preparation and infrastructure exposure to flooding\n",
    "\n",
    "This notebook forms the basis of \"Hands-On 5\" in the CCG course.\n",
    "\n",
    "1. Extract infrastructure data from OpenStreetMap\n",
    "2. Extract flood hazard data from Aqueduct\n",
    "3. Intersect floods with roads to calculate exposure\n",
    "4. Open QGIS to look at the data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# The os and subprocess modules are built into Python\n",
    "# see https://docs.python.org/3/library/os.html\n",
    "import os\n",
    "\n",
    "# see https://docs.python.org/3/library/subprocess.html\n",
    "import subprocess\n",
    "\n",
    "# see https://docs.python.org/3/library/time.html\n",
    "import time\n",
    "\n",
    "# see https://docs.python.org/3/library/pathlib.html\n",
    "from pathlib import Path"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Activity 1: Extract infrastructure data\n",
    "\n",
    "#### Step 1) In the parent directory of this package, create a folder called `ghana_tutorial`\n",
    "\n",
    "#### Step 2) Create a variable to store the folder location"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "dir = (\n",
    "    Path(os.getcwd()).resolve().parents[3]\n",
    ")  # get parent directory of snail package\n",
    "new_folder = os.path.join(dir, \"ghana_tutorial\")\n",
    "if not os.path.exists(new_folder):\n",
    "    os.makedirs(new_folder)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# edit this if using a Mac (otherwise delete)\n",
    "data_folder = Path(new_folder)  # Path(\"YOUR_PATH/ghana_tutorial\")\n",
    "\n",
    "# edit this if using Windows (otherwise delete)\n",
    "# data_folder = Path(\"C:YOUR_PATH/ghana_tutorial\")\n",
    "\n",
    "# delete this line\n",
    "# data_folder = Path(\"../data\")"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 3) Load Python libraries"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Pandas and GeoPandas are libraries for working with datasets\n",
    "# see https://geopandas.org/\n",
    "import geopandas as gpd\n",
    "\n",
    "gpd._compat.USE_PYGEOS = False\n",
    "# see https://pandas.pydata.org/\n",
    "import pandas as pd\n",
    "\n",
    "# This package interacts with a risk data extract service, also accessible at\n",
    "# https://global.infrastructureresilience.org/downloads\n",
    "import irv_autopkg_client\n",
    "\n",
    "# We'll use snail to intersect roads with flooding\n",
    "import snail.intersection\n",
    "import snail.io\n",
    "\n",
    "# snkit helps generate connected networks from lines and nodes\n",
    "# see https://snkit.readthedocs.io/\n",
    "import snkit\n",
    "import snkit.network\n",
    "\n",
    "# PyPROJ is a library for working with geographic projections\n",
    "# see https://pyproj4.github.io/\n",
    "from pyproj import Geod\n",
    "\n",
    "from matplotlib import pyplot as plt\n",
    "\n",
    "from urllib.request import urlretrieve\n",
    "import zipfile"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Step 4) and 5) Download and save data"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Download the `ghana-latest-free.shp.zip` dataset from http://download.geofabrik.de/africa/ghana.html, extract the zip folder and save the extracted folder within your new folder `ghana_tutorial`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "download_url = \"https://download.geofabrik.de/africa/ghana-latest-free.shp.zip\"\n",
    "file_path = os.path.join(data_folder, \"ghana-osm.zip\")\n",
    "# check if directory already exists\n",
    "if not os.path.exists(Path(os.path.splitext(file_path)[0])):\n",
    "    # check if data already exists in this directory\n",
    "    # if len(os.listdir(Path(os.path.splitext(file_path)[0]))) == 0:\n",
    "    urlretrieve(download_url, file_path)  # note: this can take a few minutes\n",
    "    with zipfile.ZipFile(file_path, \"r\") as zip_ref:\n",
    "        zip_ref.extractall(os.path.splitext(file_path)[0])\n",
    "else:\n",
    "    print(\"data already exists\")"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Step 6) Load the road dataset you've just downloaded"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "roads = gpd.read_file(\n",
    "    os.path.join(os.path.splitext(file_path)[0], \"gis_osm_roads_free_1.shp\")\n",
    ")"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Step 7) To take a look at the data and the attribute table fill in and run the next two cells"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "roads.head(5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "roads.fclass.unique()"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Step 8) Next we want to make a couple of changes to the data"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Filter out minor and residential roads, tracks and paths."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Keep only the specified columns\n",
    "roads = roads[[\"osm_id\", \"fclass\", \"name\", \"geometry\"]]\n",
    "# Keep only the roads whose \"fclass\" is in the list\n",
    "roads = roads[\n",
    "    roads.fclass.isin(\n",
    "        [\n",
    "            \"motorway\",\n",
    "            \"motorway_link\",\n",
    "            \"trunk\",\n",
    "            \"trunk_link\",\n",
    "            \"primary\",\n",
    "            \"primary_link\",\n",
    "            \"secondary\",\n",
    "            \"secondary_link\",\n",
    "            \"tertiary\",\n",
    "            \"tertiary_link\",\n",
    "        ]\n",
    "    )\n",
    "]\n",
    "# Rename some columns\n",
    "roads = roads.rename(\n",
    "    columns={\n",
    "        \"fclass\": \"road_type\",\n",
    "    }\n",
    ")"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Create topological network information - this adds information that will let us find routes over the road network.\n",
    "- add nodes at the start and end of each road segment\n",
    "- split roads at junctions, so each segment goes from junction to junction\n",
    "- add ids to each node and edge, and add `from_id` and `to_id` to each edge"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "road_network = snkit.Network(edges=roads)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "type(road_network)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "with_endpoints = snkit.network.add_endpoints(road_network)\n",
    "split_edges = snkit.network.split_edges_at_nodes(with_endpoints)\n",
    "with_ids = snkit.network.add_ids(\n",
    "    split_edges, id_col=\"id\", edge_prefix=\"roade\", node_prefix=\"roadn\"\n",
    ")\n",
    "connected = snkit.network.add_topology(with_ids)\n",
    "roads = connected.edges\n",
    "road_nodes = connected.nodes"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Calculate the length of each road segment in meters"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "geod = Geod(ellps=\"WGS84\")\n",
    "roads[\"length_m\"] = roads.geometry.apply(geod.geometry_length)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "roads.tail(5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "roads.set_crs(4326, inplace=True)\n",
    "road_nodes.set_crs(4326, inplace=True)\n",
    "road_nodes.crs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "main_roads = roads[\n",
    "    roads[\"road_type\"].isin(\n",
    "        [\n",
    "            \"trunk\",\n",
    "            \"secondary\",\n",
    "        ]\n",
    "    )\n",
    "]\n",
    "\n",
    "f, ax = plt.subplots()\n",
    "\n",
    "main_roads.plot(\n",
    "    ax=ax,\n",
    "    alpha=1,\n",
    "    linewidth=0.5,\n",
    ")\n",
    "\n",
    "ax.grid()\n",
    "ax.set_title(\"Main roads of Ghana\")\n",
    "ax.set_xlabel(\"Longitude [deg]\")\n",
    "ax.set_ylabel(\"Latitude [deg]\")"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Step 9) Save the pre-processed dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "roads.to_file(\n",
    "    data_folder / \"GHA_OSM_roads.gpkg\",\n",
    "    layer=\"edges\",\n",
    "    driver=\"GPKG\",\n",
    ")\n",
    "road_nodes.to_file(\n",
    "    data_folder / \"GHA_OSM_roads.gpkg\",\n",
    "    layer=\"nodes\",\n",
    "    driver=\"GPKG\",\n",
    ")"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Activity 2: Extract hazard data\n"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The full [Aqueduct dataset](https://www.wri.org/resources/data-sets/aqueduct-floods-hazard-maps) is available to download openly. \n",
    "\n",
    "Country-level extracts are available through the [Global Systemic Risk Assessment Tool (G-SRAT)](https://global.infrastructureresilience.org/downloads/). This section uses that service to download an extract for Ghana."
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Alternative: download flood hazard data from Aqueduct"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The full [Aqueduct dataset](https://www.wri.org/resources/data-sets/aqueduct-floods-hazard-maps) is available to download. There are some scripts and summary of the data you may find useful at [nismod/aqueduct](https://github.com/nismod/aqueduct).\n",
    "\n",
    "There are almost 700 files in the full Aqueduct dataset, of up to around 100MB each, so we don't recommend downloading all of them unless you intend to do further analysis.\n",
    "\n",
    "The next steps show how to clip a region out of the global dataset, in case you prefer to work from the original global Aqueduct files.\n",
    "\n",
    "To follow this step, we suggest downloading [inunriver_historical_000000000WATCH_1980_rp00100.tif](http://wri-projects.s3.amazonaws.com/AqueductFloodTool/download/v2/inunriver_historical_000000000WATCH_1980_rp00100.tif) to work through the next steps. Save the downloaded file in a new folder titled `flood_layer` under your data_folder."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "country_iso = \"gha\""
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Create a client to connect to the data API:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "client = irv_autopkg_client.Client()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# client.dataset_list()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# client.dataset(\"wri_aqueduct.version_2\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "client.extract_download(\n",
    "    country_iso,\n",
    "    data_folder / \"flood_layer\",\n",
    "    # there may be other datasets available, but only download the following\n",
    "    dataset_filter=[\"wri_aqueduct.version_2\"],\n",
    "    overwrite=True,\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "xmin = \"-3.262509\"\n",
    "ymin = \"4.737128\"\n",
    "xmax = \"1.187968\"\n",
    "ymax = \"11.162937\"\n",
    "\n",
    "for root, dirs, files in os.walk(os.path.join(data_folder, \"flood_layer\")):\n",
    "    print(\"Looking in\", root)\n",
    "    for file_ in sorted(files):\n",
    "        if file_.endswith(\".tif\") and not file_.endswith(\n",
    "            f\"-{country_iso}.tif\"\n",
    "        ):\n",
    "            print(\"Found tif file\", file_)\n",
    "            stem = file_[:-4]\n",
    "            input_file = os.path.join(root, file_)\n",
    "\n",
    "            # Clip file to bounds\n",
    "            clip_file = os.path.join(\n",
    "                root,\n",
    "                \"gha\",\n",
    "                \"wri_aqueduct_version_2\",\n",
    "                f\"{stem}-{country_iso}.tif\",\n",
    "            )\n",
    "            try:\n",
    "                os.remove(clip_file)\n",
    "            except FileNotFoundError:\n",
    "                pass\n",
    "            cmd = [\n",
    "                \"gdalwarp\",\n",
    "                \"-te\",\n",
    "                xmin,\n",
    "                ymin,\n",
    "                xmax,\n",
    "                ymax,\n",
    "                input_file,\n",
    "                clip_file,\n",
    "            ]\n",
    "            print(cmd)\n",
    "            p = subprocess.run(cmd, capture_output=True)\n",
    "            print(p.stdout.decode(\"utf8\"))\n",
    "            print(p.stderr.decode(\"utf8\"))\n",
    "            print(clip_file)"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Activity 3: Intersect hazard "
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Let us now intersect the hazard and the roads, starting with one hazard initially so we save time."
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Step 1) Specify your input and output path as well as the name of the intersection"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "flood_path = Path(\n",
    "    data_folder,\n",
    "    \"flood_layer\",\n",
    "    \"gha\",\n",
    "    \"wri_aqueduct_version_2\",\n",
    "    \"wri_aqueduct-version_2-inunriver_historical_000000000WATCH_1980_rp00100-gha.tif\",\n",
    ")\n",
    "\n",
    "output_path = Path(\n",
    "    data_folder,\n",
    "    \"results\",\n",
    "    \"inunriver_historical_000000000WATCH_1980_rp00100__roads_exposure.gpkg\",\n",
    ")"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Read in pre-processed road edges, as created earlier."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "roads = gpd.read_file(data_folder / \"GHA_OSM_roads.gpkg\", layer=\"edges\")"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Step 2) Run the intersection"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "grid, bands = snail.io.read_raster_metadata(flood_path)\n",
    "\n",
    "prepared = snail.intersection.prepare_linestrings(roads)\n",
    "flood_intersections = snail.intersection.split_linestrings(prepared, grid)\n",
    "flood_intersections = snail.intersection.apply_indices(\n",
    "    flood_intersections, grid\n",
    ")\n",
    "flood_data = snail.io.read_raster_band_data(flood_path)\n",
    "flood_intersections[\"inunriver__epoch_historical__rcp_baseline__rp_100\"] = (\n",
    "    snail.intersection.get_raster_values_for_splits(\n",
    "        flood_intersections, flood_data\n",
    "    )\n",
    ")"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Calculate the exposed length"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "geod = Geod(ellps=\"WGS84\")\n",
    "flood_intersections[\"flood_length_m\"] = flood_intersections.geometry.apply(\n",
    "    geod.geometry_length\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "flood_intersections.tail(2)"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Calculate the proportion of roads in our dataset which are exposed to >=1m flood depths in this scenario"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "exposed_1m = flood_intersections[\n",
    "    flood_intersections.inunriver__epoch_historical__rcp_baseline__rp_100 >= 1\n",
    "]\n",
    "exposed_length_km = exposed_1m.flood_length_m.sum() * 1e-3\n",
    "exposed_length_km"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "all_roads_in_dataset_length_km = roads.length_m.sum() * 1e-3\n",
    "all_roads_in_dataset_length_km"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "proportion = exposed_length_km / all_roads_in_dataset_length_km\n",
    "proportion"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "f\"{proportion:.1%} of roads in this dataset are exposed to flood depths of >= 1m in a historical 1-in-100 year flood\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "output_path.parent.mkdir(parents=True, exist_ok=True)"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Save to file (with spatial data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "flood_intersections.to_file(output_path, driver=\"GPKG\")"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Save to CSV (without spatial data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "flood_intersections.drop(columns=\"geometry\").to_csv(\n",
    "    output_path.parent / output_path.name.replace(\".gpkg\", \".csv\")\n",
    ")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}