Constraining layers¶
Among the layers of data available, some have high granularity and will be used as a rescaling variables to distribute values that would otherwise be uniform in space at a coarser resolution. We to link certain economic activity related variables to some infrastructural and geographic layers, and assuming some form of homogeneity, distribute the coarse resolutions layers proportionally to the the distribution, say, of infrastrucutre in a location. This notebook will lay some basis for this and show the scalenav functionalities developped for that purpose.
import numpy as np
# scalenav modules
import scalenav.scale_nav as sd
import scalenav.data as dt
from scalenav.plotting import cmap
import scalenav.oop as snoo
# data engineering
import pandas as pd
import h3
import ibis as ib
from ibis import _
ib.options.interactive = True
### Visualisation
import pydeck as pdk
import pypalettes as pypal
H3 + DuckDB scale up¶
Connect to a temporary duckdb database in which we will be running most of the computations. The output of the sn_connect function is a connection object to the duckdb instance with the necessary dependencies installed and ready to use.
# For a detailed explanation of what goes on here, refer to the oop guide.
conn = snoo.sn_connect()
Connecting to a temporary in-memory DB instance.
H3 and grid parameters¶
Reading data layers¶
To read layers into our newly created database, we call the sn_table function, which will receive as parameter the connection object representing the database into which the table should be loaded, the name that we want to assign to the table in the backend and the path ro find the data. It will return an ibis.Table object.
dose_wdi_geo_file = "/Users/cenv1069/Documents/data/datasets/local_data/dose-wdi/0_3/dose_wdi_geo.parquet"
mapspam_file = "/Users/cenv1069/Documents/data/datasets/mapspam/processed/spam_2020_yield_v2.parquet"
dwg = snoo.sn_table(conn=conn,name="dwg",path=dose_wdi_geo_file)
spam = snoo.sn_table(conn,name="spam",path=mapspam_file)
# Have a look at the data:
dwg.head()
┏━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━┓ ┃ gid_0 ┃ country ┃ gid_1 ┃ grp_usd_2015 ┃ services_usd_2015 ┃ manufacturing_usd_2015 ┃ agriculture_usd_2015 ┃ centr ┃ geometry ┃ color ┃ radius ┃ x ┃ y ┃ ┡━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━┩ │ string │ string │ string │ float64 │ float64 │ float64 │ float64 │ binary │ binary │ array<int64> │ float64 │ float64 │ float64 │ ├────────┼─────────┼─────────┼──────────────┼───────────────────┼────────────────────────┼──────────────────────┼─────────────────────────────────────────────────────────────────┼─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┼───────────────────┼───────────┼───────────┼───────────┤ │ ALB │ Albania │ ALB.1_1 │ 4.341915e+08 │ 2.009631e+08 │ 9.449606e+07 │ 8.588401e+07 │ b'\x01\x01\x00\x00\x00@\xa0\x88\x02=\x174@\x9d\xa3\xc5\xb3rPD@' │ b'\x01\x03\x00\x00\x00\x01\x00\x00\x00\xbe\x02\x00\x00\x07h\x11@ a4@\xd3\xa9\x02 \xb47D@\xcd\x15\x10 d`4@\xc4G\xfe?\xd27D@\xa0*\xf0_j_4@\xd8\x98\xf8\x1f\xf07D@$\xb3\xfe\xbf\x7f^4@.:\xf6?\x028D@\x88\xce\xf8'+11165 │ [238, 97, ... +2] │ 19.888996 │ 20.090775 │ 40.628500 │ │ ALB │ Albania │ ALB.2_1 │ 3.719427e+08 │ 1.721516e+08 │ 8.094841e+07 │ 7.357106e+07 │ b'\x01\x01\x00\x00\x00\xe6M\xcan\xe1?4@\x87\xf1\xed\xcdg\xcbD@' │ b'\x01\x03\x00\x00\x00\x01\x00\x00\x00\xbb\x02\x00\x00\xa0\x16\x07\xc0\rl4@\xf5\x0e\xf9_\x15\xa4D@\xdfC\x15@)l4@u\xe3\xfe\x9f\x1a\xa4D@\x1f\x99\xf5\xff\xfdk4@:\xa9\xf5\x9f1\xa4D@\xb5O\t\x80\xd8k4@\xab\xc4\x02\x80_\xa4D@\x9fY\x12'+11117 │ [238, 97, ... +2] │ 19.734250 │ 20.249534 │ 41.589105 │ │ ALB │ Albania │ ALB.3_1 │ 1.113524e+09 │ 5.153885e+08 │ 2.423439e+08 │ 2.202575e+08 │ b'\x01\x01\x00\x00\x00\xb5?\xc3\x1e`\xa13@\x14\x15\xcd2B\xb9D@' │ b'\x01\x03\x00\x00\x00\x01\x00\x00\x00\x8f\x0c\x00\x00\xb3@\xfd_\\\xa33@9\xad\x04 \xa8\xabD@\x85\x1a\xf0\xbf\xb6\xa23@\x13U\t`\x14\xabD@~r\x14 \n\xa33@+\xf0\xfc?\xda\xaaD@\xa8O\xf6\xbf\xf1\xa33@\xd1\x9a\xf6\xff7\xaaD@\xc0@\x10'+51373 │ [238, 97, ... +2] │ 20.830796 │ 19.630373 │ 41.447333 │ │ ALB │ Albania │ ALB.4_1 │ 7.954835e+08 │ 3.681851e+08 │ 1.731265e+08 │ 1.573483e+08 │ b'\x01\x01\x00\x00\x00\x17\xb66{\xbb/4@m\xda[\xa0\x1f\x85D@' │ b'\x01\x03\x00\x00\x00\x01\x00\x00\x00\xc5\x02\x00\x00\xe3\xe0\xf3?\xd0o4@\xe3\xa1\xf7\xdf\xadlD@ti\x15`\xcco4@\xb0\xb3\xf8\x7f\x9blD@A\x93\x0e \x8dp4@\x1f\x03\x05 flD@\xd8E\x13 \xf1p4@\xd5\x0f\x03\xc0\xa5kD@\xa6,\x01'+11277 │ [238, 97, ... +2] │ 20.494461 │ 20.186454 │ 41.040028 │ │ ALB │ Albania │ ALB.5_1 │ 1.345159e+09 │ 6.225993e+08 │ 2.927561e+08 │ 2.660753e+08 │ b"\x01\x01\x00\x00\x00\xee\x16g\xe7\xda\x9e3@Y\x90\xfc/'cD@" │ b'\x01\x03\x00\x00\x00\x01\x00\x00\x00,\n\x00\x00x\x06\xf4_s\xbe3@\xfc\x0c\xfb\x7f\xcd9D@+\xb1\x00\xe0\xb7\xbe3@\x8c\xc1\xfd_y:D@\xf2.\x0f\x80U\xbe3@\x159\x02\xc0\xca:D@\xdf\xed\xee\xbf\x9d\xbd3@\xf6\xb7\x04\xe0\x1f;D@,B\x14'+41597 │ [238, 97, ... +2] │ 21.019778 │ 19.620528 │ 40.774633 │ └────────┴─────────┴─────────┴──────────────┴───────────────────┴────────────────────────┴──────────────────────┴─────────────────────────────────────────────────────────────────┴─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┴───────────────────┴───────────┴───────────┴───────────┘
spam.head()
┏━━━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━┓ ┃ lon ┃ lat ┃ band_var ┃ ┡━━━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━┩ │ float32 │ float32 │ float32 │ ├────────────┼───────────┼─────────────┤ │ 120.041664 │ 49.375000 │ 857.500000 │ │ 119.791664 │ 49.291668 │ 1027.000000 │ │ 119.875000 │ 49.291668 │ 1050.800049 │ │ 120.041664 │ 49.291668 │ 1065.599976 │ │ 120.125000 │ 49.291668 │ 1005.900024 │ └────────────┴───────────┴─────────────┘
# have a look at the table in the backend :
conn.list_tables()
['dwg', 'spam']
Data¶
# known grid parameter of the data.
grid_param = 10_000
Some typical recommended H3 resolution values for projecting raster grids based on the size were precomputed and provided in a dictionary :
h3_res = dt.rast_to_h3[str(grid_param)]["h3_res"]
print(h3_res)
8
Then apply the sn_project function, it will try to automatically detect the coordinates columns, but they can be given as parameter in case it fails.
print("Using base H3 resolution : ", h3_res)
spam_h3 = snoo.sn_project(spam,res=h3_res)
Using base H3 resolution : 8
Assuming coordinates columns ('lon','lat')
This will result in a new table expression containing the newly created h3_id column.
snoo.sn_head(spam_h3)
┌──────────┐
│ 19459073 │
└──────────┘
┏━━━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓
┃ lon ┃ lat ┃ band_var ┃ h3_id ┃
┡━━━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩
│ float32 │ float32 │ float32 │ string │
├────────────┼───────────┼─────────────┼─────────────────┤
│ 120.041664 │ 49.375000 │ 857.500000 │ 88159c7aa3fffff │
│ 119.791664 │ 49.291668 │ 1027.000000 │ 88159c4e03fffff │
│ 119.875000 │ 49.291668 │ 1050.800049 │ 88159c4ea3fffff │
│ 120.041664 │ 49.291668 │ 1065.599976 │ 88159c793bfffff │
│ 120.125000 │ 49.291668 │ 1005.900024 │ 88159c6b4bfffff │
└────────────┴───────────┴─────────────┴─────────────────┘
# check the ids
spam_h3.h3_id.nunique()
┌────────┐
│ 514471 │
└────────┘
spam_h3 = spam_h3.group_by("h3_id").agg(band=_.band_var.sum(),
x=_.lon.first(),
y=_.lat.first(),)
spam_h3.count()
┌────────┐
│ 514471 │
└────────┘
conn.list_tables()
['dwg', 'spam']
One very important aspect of this workflow is that apart from the original data sets that we read into the database, no other data frames exist in the environment. The table expressions that ibis creates in the background and shows us in python are unrealised SQL queries. They only compute the minimum required for us to work with in the environment, say when we call the sn_head(n) function, it only needs to know the first n elements and will not bother computing all of them just to show us how the head of the table looks like. In SQL language, the table expressions correspond to views. The table expressions that we end up working with on the python side a just unrealised queries.
Let us now have a look at the data, for that we execute the table expression and set a limit to keep things smaller. This will result in a traditional pandas.DataFrame.
spam_h3_df = spam_h3.execute(limit=10_000)
spam_h3_df.head()
| h3_id | band | x | y | |
|---|---|---|---|---|
| 0 | 886b690a39fffff | 5359.599915 | 34.291668 | 15.291667 |
| 1 | 88521b18c9fffff | 56822.800415 | 43.791668 | 15.291667 |
| 2 | 8852e6d965fffff | 57313.799438 | 44.375000 | 15.291667 |
| 3 | 8852e6c73bfffff | 54263.199533 | 44.708332 | 15.291667 |
| 4 | 88525228b3fffff | 58432.401428 | 47.958332 | 15.291667 |
Coordinates to plot only points¶
# h3_layer_ghsl_h3 = pdk.Layer(
# "H3HexagonLayer",
# spam_h3_df,
# pickable=True,
# stroked=True,
# filled=True,
# opacity=1,
# extruded=False,
# get_hexagon= "h3_id",
# get_fill_color = [200,200,100,255],
# get_line_color=[0, 0, 0, 100],
# line_width_min_pixels=0,
# line_width_max_pixels=1,
# )
points_layer = pdk.Layer(
"ScatterplotLayer",
spam_h3_df,
pickable=False,
opacity=0.8,
stroked=False,
filled=True,
radius_scale=1,
radius_min_pixels=1,
radius_max_pixels=20,
line_width_min_pixels=1,
get_position = ["x","y"],
get_radius=50,
get_fill_color=[255, 140, 0, 255],
get_line_color=[0, 0, 0, 0],
)
# view_state_global = pdk.data_utils.compute_view(spam_h3_df[["x","y"]])
# # Create mapdeck object
# deck_map_ghsl_h3 = pdk.Deck(layers=[
# points_layer,
# ]
# ,initial_view_state=view_state_global
# ,tooltip= {"text": "Value : {band}"}
# ,map_style="road"
# )
# deck_map_ghsl_h3.to_html("../../deck_maps/deck_ghsl_h3_grid.html",
# iframe_height=800,
# )
Selecting an area¶
Let’s now restrict our area of interest :
dwg.filter(_.country.re_search("Nigeria")).select("gid_0","country")
┏━━━━━━━━┳━━━━━━━━━┓ ┃ gid_0 ┃ country ┃ ┡━━━━━━━━╇━━━━━━━━━┩ │ string │ string │ ├────────┼─────────┤ │ NGA │ Nigeria │ │ NGA │ Nigeria │ │ NGA │ Nigeria │ │ NGA │ Nigeria │ │ NGA │ Nigeria │ │ NGA │ Nigeria │ │ NGA │ Nigeria │ │ NGA │ Nigeria │ │ NGA │ Nigeria │ │ NGA │ Nigeria │ │ … │ … │ └────────┴─────────┘
selected_country = "NGA"
Rescaling approach to econ data¶
We will now restrict the area of interest and combine the layers at hand, namely the economic output per regions and the mapspam agricultural yield, into one table. For that, we need to do a bit of sql and create the corresponding table expression.
country = conn.sql(f"""
SELECT sel_country.* EXCLUDE (color,centr,geom,radius,x,y,grp_usd_2015,services_usd_2015,manufacturing_usd_2015),
spam.*
FROM spam AS spam
JOIN (SELECT * EXCLUDE geometry, ST_GeomFromWKB(geometry) as geom FROM dwg where gid_0='{selected_country}') AS sel_country
ON ST_Intersects(ST_Point(spam.lon,spam.lat),sel_country.geom);
""")
print(country.count())
country.head(3)
┌────────┐
│ 292275 │
└────────┘
┏━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━┓ ┃ gid_0 ┃ country ┃ gid_1 ┃ agriculture_usd_2015 ┃ lon ┃ lat ┃ band_var ┃ ┡━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━┩ │ string │ string │ string │ float64 │ float32 │ float32 │ float32 │ ├────────┼─────────┼──────────┼──────────────────────┼──────────┼───────────┼────────────┤ │ NGA │ Nigeria │ NGA.19_1 │ 4.318394e+09 │ 7.458333 │ 11.208333 │ 269.200012 │ │ NGA │ Nigeria │ NGA.19_1 │ 4.318394e+09 │ 7.541667 │ 11.208333 │ 269.100006 │ │ NGA │ Nigeria │ NGA.19_1 │ 4.318394e+09 │ 7.625000 │ 11.208333 │ 269.200012 │ └────────┴─────────┴──────────┴──────────────────────┴──────────┴───────────┴────────────┘
country = snoo.sn_project(country,h3_res)
Assuming coordinates columns ('lon','lat')
The previous cell runs for a long time compared due to the fact that it has to actually perform the relatively compelex query in order to give us the head of the data. Generally, we will avoid calling this kind of functions in the middle of a workflow. If we want the results of a query to be available right away, we can either ‘cache’ the output, or promote the table expression to an actual backend table.
Summaries¶
Let’s compute some simple summary statistics on our data, for example the yield per region :
gid_1_band = country.group_by("gid_1").agg(total_band=_.band_var.sum())
gid_1_band
┏━━━━━━━━━━┳━━━━━━━━━━━━━━┓ ┃ gid_1 ┃ total_band ┃ ┡━━━━━━━━━━╇━━━━━━━━━━━━━━┩ │ string │ float64 │ ├──────────┼──────────────┤ │ NGA.31_1 │ 5.000652e+07 │ │ NGA.12_1 │ 4.038842e+07 │ │ NGA.19_1 │ 7.312160e+07 │ │ NGA.23_1 │ 6.812528e+07 │ │ NGA.13_1 │ 1.131993e+07 │ │ NGA.37_1 │ 4.579285e+07 │ │ NGA.16_1 │ 3.300971e+07 │ │ NGA.30_1 │ 2.064093e+07 │ │ NGA.9_1 │ 4.657446e+07 │ │ NGA.20_1 │ 4.123441e+07 │ │ … │ … │ └──────────┴──────────────┘
Now that we have restricted our analysis to a smaller region of interest and have significantly decreased the size of the data that we work with, we can switch to python for the rest fo the analysis. Let us export the data for the selected country into a pandas dataFrame. But before, we will perform the rescaling operation with our regional outputs.
country_gdf = (country
.join(gid_1_band,"gid_1",how="left")
.mutate(band_frac=_.band_var/_.total_band)
# .mutate(h3_id_gdp=_.band_frac*_.build_gdp)
).to_pandas()
The previous step has added a variable band_frac which corresponds to the fractions of the agricultural yield that each h3 cell contributes to the regional total. In other words we have rescaled the yield variable to be a density other the the raster (or h3) cells whithin each regions.
print(country_gdf.shape)
country_gdf.head()
(292275, 11)
| gid_0 | country | gid_1 | agriculture_usd_2015 | lon | lat | band_var | h3_id | gid_1_right | total_band | band_frac | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | NGA | Nigeria | NGA.37_1 | 1.815483e+09 | 6.624997 | 12.791668 | 8120.799805 | 8858034957fffff | NGA.37_1 | 4.579285e+07 | 0.000177 |
| 1 | NGA | Nigeria | NGA.37_1 | 1.815483e+09 | 6.708330 | 12.791668 | 8120.799805 | 88580348abfffff | NGA.37_1 | 4.579285e+07 | 0.000177 |
| 2 | NGA | Nigeria | NGA.37_1 | 1.815483e+09 | 6.791664 | 12.791668 | 8120.799805 | 8858034d1bfffff | NGA.37_1 | 4.579285e+07 | 0.000177 |
| 3 | NGA | Nigeria | NGA.18_1 | 2.143236e+09 | 8.458330 | 12.791668 | 8120.799805 | 88580a9a1dfffff | NGA.18_1 | 3.557801e+07 | 0.000228 |
| 4 | NGA | Nigeria | NGA.18_1 | 2.143236e+09 | 8.541664 | 12.791668 | 8120.799805 | 88580a91abfffff | NGA.18_1 | 3.557801e+07 | 0.000228 |
Check that things add up¶
The previous rescaling operations has resulted in density distributions whithin each region. If we add up the density values for each region, we should get 1. This density has the same behaviour as a standard probability distribution.
country_gdf[["gid_1","band_frac"]].groupby("gid_1").agg("sum")
| band_frac | |
|---|---|
| gid_1 | |
| NGA.10_1 | 1.0 |
| NGA.11_1 | 1.0 |
| NGA.12_1 | 1.0 |
| NGA.13_1 | 1.0 |
| NGA.15_1 | 1.0 |
| NGA.16_1 | 1.0 |
| NGA.18_1 | 1.0 |
| NGA.19_1 | 1.0 |
| NGA.20_1 | 1.0 |
| NGA.23_1 | 1.0 |
| NGA.27_1 | 1.0 |
| NGA.28_1 | 1.0 |
| NGA.29_1 | 1.0 |
| NGA.30_1 | 1.0 |
| NGA.31_1 | 1.0 |
| NGA.33_1 | 1.0 |
| NGA.37_1 | 1.0 |
| NGA.3_1 | 1.0 |
| NGA.4_1 | 1.0 |
| NGA.5_1 | 1.0 |
| NGA.6_1 | 1.0 |
| NGA.9_1 | 1.0 |
Mapping¶
Let’s have a look at this. To help us, the cmap function from the scalenav plotting module will transform the values into RGB code that pydeck will read :
col_pal = "viridis"
col_pal = pypal.load_cmap(col_pal)
cols = cmap((country_gdf.band_var),palette=col_pal,log=True) #pd.Series([[255*j for j in x] for x in bmlunge(compact_geo_downscaled["log_value"])])
country_gdf["color"] = cols
country_gdf.head(5)
Max input : 11.06, palette colors : 10
| gid_0 | country | gid_1 | agriculture_usd_2015 | lon | lat | band_var | h3_id | gid_1_right | total_band | band_frac | color | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | NGA | Nigeria | NGA.37_1 | 1.815483e+09 | 6.624997 | 12.791668 | 8120.799805 | 8858034957fffff | NGA.37_1 | 4.579285e+07 | 0.000177 | [134, 213, 73, 255] |
| 1 | NGA | Nigeria | NGA.37_1 | 1.815483e+09 | 6.708330 | 12.791668 | 8120.799805 | 88580348abfffff | NGA.37_1 | 4.579285e+07 | 0.000177 | [134, 213, 73, 255] |
| 2 | NGA | Nigeria | NGA.37_1 | 1.815483e+09 | 6.791664 | 12.791668 | 8120.799805 | 8858034d1bfffff | NGA.37_1 | 4.579285e+07 | 0.000177 | [134, 213, 73, 255] |
| 3 | NGA | Nigeria | NGA.18_1 | 2.143236e+09 | 8.458330 | 12.791668 | 8120.799805 | 88580a9a1dfffff | NGA.18_1 | 3.557801e+07 | 0.000228 | [134, 213, 73, 255] |
| 4 | NGA | Nigeria | NGA.18_1 | 2.143236e+09 | 8.541664 | 12.791668 | 8120.799805 | 88580a91abfffff | NGA.18_1 | 3.557801e+07 | 0.000228 | [134, 213, 73, 255] |
view_state = pdk.data_utils.compute_view(points=country_gdf[["lon","lat"]])
# # this map is not really usefull at high resolutions
# h3_layer_down = pdk.Layer(
# "H3HexagonLayer",
# country_gdf[["gid_0","gid_1","h3_id","total_band","color"]],
# pickable=True,
# stroked=True,
# filled=True,
# opacity=.8,
# extruded=False,
# get_hexagon= "h3_id",
# get_fill_color = "color",
# get_line_color= [0, 0, 0, 100],
# line_width_min_pixels=0,
# line_width_max_pixels=1,
# )
layer_down = pdk.Layer(
"ScatterplotLayer",
country_gdf[["lon","lat"]],
pickable=False,
opacity=0.4,
stroked=False,
filled=True,
radius_scale=6,
radius_min_pixels=1,
radius_max_pixels=10,
line_width_min_pixels=1,
get_position = ["lon","lat"],
get_radius=50,
get_fill_color=[255, 140, 0, 255],
get_line_color=[0, 0, 0, 0],
)
# # Create mapdeck object
# deck_map_down = pdk.Deck(layers=[
# # h3_layer_down,
# layer_down
# ]
# ,initial_view_state=view_state
# # ,tooltip= {"text": "Value : {h3_id_gdp}"}
# ,height=800
# ,map_style="road"
# )
# deck_map_down.to_html("../../deck_maps/deck_ghsl_h3_down_grid.html")
Gridding¶
At this point, we come to a limitation. The developed process to convert raster grids into their centroids, and then projecting the centroids into h3 only gives the index values for the centroids. Ideally, we want to have an H3 grid covering the area that was covered by the rater in which the cells have an assigned value from the original. For this, a process referred to as gridding was implemented. It consists in filling the empty space between cell centroids with other cells, and then redistributing the variable on them based on the proximity to the centroid.
The following function assists us with that, it computes the neighbourhood (set of h3 neighbouring cells) needed to fill the space around a given centre cell, covering an area roughly equivalent to the original raster grid cell.
rast_to_h3 = dt.rast_to_h3_map(x=country_gdf["lon"].mean(),y = country_gdf["lat"].mean())
Using angles for meter grid.
# ideally, this is precomputed, but for some reason it has not worked...
neighbs = rast_to_h3[str(grid_param)]["nn"]
country_gdf["h3_gridded"] = pd.Series(country_gdf.apply(lambda row: dt.centre_cell_to_square(h3_cell = row["h3_id"],neighbs=neighbs),axis=1).tolist())
country_gdf.dropna(subset=["h3_gridded"],inplace=True)
As a result of this process, a new variable is added to the dataframe containing a list of cells that are asigned to the centroid. Similarly to a voronoi process.
country_gdf.head()
| gid_0 | country | gid_1 | agriculture_usd_2015 | lon | lat | band_var | h3_id | gid_1_right | total_band | band_frac | color | h3_gridded | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | NGA | Nigeria | NGA.37_1 | 1.815483e+09 | 6.624997 | 12.791668 | 8120.799805 | 8858034957fffff | NGA.37_1 | 4.579285e+07 | 0.000177 | [134, 213, 73, 255] | [8858034839fffff, 8858034911fffff, 88581c86c1f... |
| 1 | NGA | Nigeria | NGA.37_1 | 1.815483e+09 | 6.708330 | 12.791668 | 8120.799805 | 88580348abfffff | NGA.37_1 | 4.579285e+07 | 0.000177 | [134, 213, 73, 255] | [8858034d4dfffff, 88580348b5fffff, 88581cb265f... |
| 2 | NGA | Nigeria | NGA.37_1 | 1.815483e+09 | 6.791664 | 12.791668 | 8120.799805 | 8858034d1bfffff | NGA.37_1 | 4.579285e+07 | 0.000177 | [134, 213, 73, 255] | [8858034cedfffff, 8858034dc5fffff, 88581cb2d5f... |
| 3 | NGA | Nigeria | NGA.18_1 | 2.143236e+09 | 8.458330 | 12.791668 | 8120.799805 | 88580a9a1dfffff | NGA.18_1 | 3.557801e+07 | 0.000228 | [134, 213, 73, 255] | [88580a9131fffff, 88580a9a17fffff, 88580a9847f... |
| 4 | NGA | Nigeria | NGA.18_1 | 2.143236e+09 | 8.541664 | 12.791668 | 8120.799805 | 88580a91abfffff | NGA.18_1 | 3.557801e+07 | 0.000228 | [134, 213, 73, 255] | [88580a824dfffff, 88580a91b5fffff, 88580a8365f... |
Gridded data set¶
TO get the full data, we ‘explode’ the variable :
spam_gridded = country_gdf.explode("h3_gridded")
print(spam_gridded.shape)
spam_gridded.head(3)
(44133525, 13)
| gid_0 | country | gid_1 | agriculture_usd_2015 | lon | lat | band_var | h3_id | gid_1_right | total_band | band_frac | color | h3_gridded | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | NGA | Nigeria | NGA.37_1 | 1.815483e+09 | 6.624997 | 12.791668 | 8120.799805 | 8858034957fffff | NGA.37_1 | 4.579285e+07 | 0.000177 | [134, 213, 73, 255] | 8858034839fffff |
| 0 | NGA | Nigeria | NGA.37_1 | 1.815483e+09 | 6.624997 | 12.791668 | 8120.799805 | 8858034957fffff | NGA.37_1 | 4.579285e+07 | 0.000177 | [134, 213, 73, 255] | 8858034911fffff |
| 0 | NGA | Nigeria | NGA.37_1 | 1.815483e+09 | 6.624997 | 12.791668 | 8120.799805 | 8858034957fffff | NGA.37_1 | 4.579285e+07 | 0.000177 | [134, 213, 73, 255] | 88581c86c1fffff |
We further remove the potential duplicates that could have been created :
spam_gridded = spam_gridded[~spam_gridded.duplicated(subset=["h3_gridded"])]
print(spam_gridded.shape)
spam_gridded.reset_index(inplace=True,drop=True)
(569505, 13)
spam_gridded.head(3)
| gid_0 | country | gid_1 | agriculture_usd_2015 | lon | lat | band_var | h3_id | gid_1_right | total_band | band_frac | color | h3_gridded | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | NGA | Nigeria | NGA.37_1 | 1.815483e+09 | 6.624997 | 12.791668 | 8120.799805 | 8858034957fffff | NGA.37_1 | 4.579285e+07 | 0.000177 | [134, 213, 73, 255] | 8858034839fffff |
| 1 | NGA | Nigeria | NGA.37_1 | 1.815483e+09 | 6.624997 | 12.791668 | 8120.799805 | 8858034957fffff | NGA.37_1 | 4.579285e+07 | 0.000177 | [134, 213, 73, 255] | 8858034911fffff |
| 2 | NGA | Nigeria | NGA.37_1 | 1.815483e+09 | 6.624997 | 12.791668 | 8120.799805 | 8858034957fffff | NGA.37_1 | 4.579285e+07 | 0.000177 | [134, 213, 73, 255] | 88581c86c1fffff |
spam_gridded.set_index("h3_id",inplace=True,drop=True)
We count the number of cells that were added to each centroid cell:
spam_gridded["count"] = spam_gridded.groupby("h3_id")["gid_1"].transform("count")
Rescale the values to distribute them across the cells :
# ghsl_gridded["h3_id_gdp_gridded"] = np.round(ghsl_gridded["h3_id_gdp"]/ghsl_gridded["count"])
spam_gridded["h3_id_band_gridded"] = np.round(spam_gridded["band_var"]/spam_gridded["count"],2)
spam_gridded.reset_index(inplace=True,drop=False)
print(spam_gridded.shape)
spam_gridded.head()
(569505, 15)
| h3_id | gid_0 | country | gid_1 | agriculture_usd_2015 | lon | lat | band_var | gid_1_right | total_band | band_frac | color | h3_gridded | count | h3_id_band_gridded | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 8858034957fffff | NGA | Nigeria | NGA.37_1 | 1.815483e+09 | 6.624997 | 12.791668 | 8120.799805 | NGA.37_1 | 4.579285e+07 | 0.000177 | [134, 213, 73, 255] | 8858034839fffff | 151 | 53.78 |
| 1 | 8858034957fffff | NGA | Nigeria | NGA.37_1 | 1.815483e+09 | 6.624997 | 12.791668 | 8120.799805 | NGA.37_1 | 4.579285e+07 | 0.000177 | [134, 213, 73, 255] | 8858034911fffff | 151 | 53.78 |
| 2 | 8858034957fffff | NGA | Nigeria | NGA.37_1 | 1.815483e+09 | 6.624997 | 12.791668 | 8120.799805 | NGA.37_1 | 4.579285e+07 | 0.000177 | [134, 213, 73, 255] | 88581c86c1fffff | 151 | 53.78 |
| 3 | 8858034957fffff | NGA | Nigeria | NGA.37_1 | 1.815483e+09 | 6.624997 | 12.791668 | 8120.799805 | NGA.37_1 | 4.579285e+07 | 0.000177 | [134, 213, 73, 255] | 885803482dfffff | 151 | 53.78 |
| 4 | 8858034957fffff | NGA | Nigeria | NGA.37_1 | 1.815483e+09 | 6.624997 | 12.791668 | 8120.799805 | NGA.37_1 | 4.579285e+07 | 0.000177 | [134, 213, 73, 255] | 885803480bfffff | 151 | 53.78 |
# view_state = pdk.data_utils.compute_view(points=ghsl_gridded.sample(10000)["h3_gridded"].apply(lambda cell: h3.cell_to_latlng(cell)[::-1]))
# lagos: latitude = 6.450151,longitude = 3.531193 !!!! DOES NOT WORK BECAUSE IT IS EXCLUDED FROM DOSE!!!
# Abidjan : latitude = 5.367, longitude = -4.009
# Kano : latitude = 11.985, longitude = 8.533
zoom_lat=11.985
zoom_lon=8.533
zoom_width=70_000
view_state_zoomed = pdk.ViewState(latitude = zoom_lat,longitude = zoom_lon,zoom=11,pitch=30,bearing=0)
# extracting data for the plotting region :
plotting_region = dt.square_poly(lat=zoom_lat,lon=zoom_lon,distance=zoom_width)
limits = [np.round(x,3) for x in plotting_region.bounds]
limits
[8.212, 11.664, 8.854, 12.306]
Another beauty of ibis is that we can run sql on top of tables inside out python environment :
# ghsl_gridded
spam_gridded_zoom = conn.sql(
f"""
Select * from
(select *,
array_extract(h3_cell_to_latlng(h3_gridded),2) as x_h3,
array_extract(h3_cell_to_latlng(h3_gridded),1) as y_h3
from spam_gridded)
where (lon <= {limits[2]}) and (lon >= {limits[0]}) and (lat <= {limits[3]}) and (lat >= {limits[1]});
""").execute()
print(spam_gridded_zoom.shape)
spam_gridded_zoom.head()
(6509, 17)
| h3_id | gid_0 | country | gid_1 | agriculture_usd_2015 | lon | lat | band_var | gid_1_right | total_band | band_frac | color | h3_gridded | count | h3_id_band_gridded | x_h3 | y_h3 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 88580ac9d7fffff | NGA | Nigeria | NGA.20_1 | 2.325922e+09 | 8.291664 | 12.291668 | 4914.5 | NGA.20_1 | 4.123441e+07 | 0.000119 | [82, 197, 105, 255] | 88580ac8b9fffff | 120 | 40.95 | 7.870442 | 12.315062 |
| 1 | 88580ac9d7fffff | NGA | Nigeria | NGA.20_1 | 2.325922e+09 | 8.291664 | 12.291668 | 4914.5 | NGA.20_1 | 4.123441e+07 | 0.000119 | [82, 197, 105, 255] | 88580ac991fffff | 120 | 40.95 | 7.884474 | 12.267223 |
| 2 | 88580ac9d7fffff | NGA | Nigeria | NGA.20_1 | 2.325922e+09 | 8.291664 | 12.291668 | 4914.5 | NGA.20_1 | 4.123441e+07 | 0.000119 | [82, 197, 105, 255] | 88580a5341fffff | 120 | 40.95 | 7.911435 | 12.251724 |
| 3 | 88580ac9d7fffff | NGA | Nigeria | NGA.20_1 | 2.325922e+09 | 8.291664 | 12.291668 | 4914.5 | NGA.20_1 | 4.123441e+07 | 0.000119 | [82, 197, 105, 255] | 88580ac8adfffff | 120 | 40.95 | 7.912636 | 12.273132 |
| 4 | 88580ac9d7fffff | NGA | Nigeria | NGA.20_1 | 2.325922e+09 | 8.291664 | 12.291668 | 4914.5 | NGA.20_1 | 4.123441e+07 | 0.000119 | [82, 197, 105, 255] | 88580ac88bfffff | 120 | 40.95 | 7.845809 | 12.322583 |
spam_gridded_zoom["color"] = cmap(spam_gridded_zoom["h3_id_band_gridded"],palette=col_pal)
Max input : 45.50, palette colors : 10
h3_layer_ghsl_h3_gridded = pdk.Layer(
"H3HexagonLayer",
# ghsl_gridded.loc[0:200_000],
spam_gridded_zoom,
pickable=True,
stroked=True,
filled=True,
opacity=.5,
extruded=False,
get_hexagon= "h3_gridded",
get_fill_color = "color",
get_line_color= [0, 0, 0, 100],
line_width_min_pixels=1,
line_width_max_pixels=1,
)
layer_down = pdk.Layer(
"ScatterplotLayer",
country_gdf[["lon","lat"]],
pickable=False,
opacity=0.8,
stroked=False,
filled=True,
radius_scale=10,
radius_min_pixels=1,
radius_max_pixels=20,
line_width_min_pixels=0,
line_width_max_pixels=0,
get_position = ["lon","lat"],
get_radius=25,
get_fill_color=[255, 140, 0, 255],
get_line_color=[0, 0, 0, 0],
)
And we can see the result of gridding overlayed with the original centroids of cells. Note that the value of the yield from a raster grid centroid is distributed across all the cells covering the area nearest to it.
# # Create mapdeck object
# deck_map_ghsl_h3_gridded = pdk.Deck(layers=[h3_layer_ghsl_h3_gridded,
# layer_down
# ]
# ,initial_view_state=view_state_zoomed
# ,tooltip= {"text": "Value : {h3_id_band_gridded}"}
# ,height=800
# ,map_style="road"
# )
# deck_map_ghsl_h3_gridded.to_html("../../deck_maps/deck_ghsl_nres_h3_gridded.html",
# # open_browser=True,
# iframe_height=800
# )
# spam_rescaled = spam_gridded_zoom.rename(columns={"h3_id_gdp_gridded" : "h3_id_gdp_gridded_var",
# "h3_id_band_gridded" : "h3_id_band_gridded_var",
# "h3_id" : "h3_gridded_parent"
# },inplace=False)
# spam_rescaled.rename(columns={"h3_gridded" : "h3_id"
# },inplace=True)
# spam_rescaled.head()
# h3_gridded_new_scale = sd.change_res(spam_rescaled,level=-1)
# print(h3_gridded_new_scale.shape)
# h3_gridded_new_scale.head()
# h3_gridded_new_scale["color"] = cmap((h3_gridded_new_scale.h3_id_band_gridded_var),palette=col_pal)
# #####
# h3_layer_ghsl_h3_gridded_new_scale = pdk.Layer(
# "H3HexagonLayer",
# h3_gridded_new_scale,
# pickable=True,
# stroked=True,
# filled=True,
# opacity=.5,
# extruded=False,
# get_hexagon= "h3_id",
# get_fill_color = "color",
# get_line_color= [0, 0, 0, 100],
# line_width_min_pixels=1,
# line_width_max_pixels=1,
# )
# # Create mapdeck object
# deck_map_ghsl_h3_gridded_rescaled = pdk.Deck(layers=[h3_layer_ghsl_h3_gridded_new_scale]
# ,initial_view_state=view_state_zoomed
# ,tooltip= {"text": "Value : {h3_id_gdp_gridded_var}"}
# ,map_style="road"
# )
# deck_map_ghsl_h3_gridded_rescaled.to_html("../../deck_maps/deck_ghsl_nres_h3_gridded_new_scale.html",
# iframe_height=800,
# # open_browser=True
# )
Adding constraint layer¶
Let’s now use GHSL Human settlement layer data to hint at where there is no agricultural lands. Start with loading in the ghsl layer just like a base layer
grid_ghsl_param = 100
ghsl_file = f"/Users/cenv1069/Documents/data/datasets/JRC/processed_samples/S_100_R8_C19/*.parquet"
ghsl_file
# tiffs = [x for x in glob.glob(ghsl_file,recursive=True) if re.search(pattern=".tif$",string=x)]
# print(len(tiffs))
# tiffs
'/Users/cenv1069/Documents/data/datasets/JRC/processed_samples/S_100_R8_C19/*.parquet'
raw_grid_constraint = snoo.sn_table(conn=conn,name="raw_grid_constraint",path=ghsl_file,overwrite=True, bbox=limits)
overwriting existing
reading bbox
This has added a new table into the backend :
conn.list_tables()
['dwg', 'raw_grid_constraint', 'spam']
snoo.sn_head(raw_grid_constraint)
┌────────┐
│ 172826 │
└────────┘
┏━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━┓
┃ lon ┃ lat ┃ band_var ┃
┡━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━┩
│ float32 │ float32 │ uint16 │
├──────────┼───────────┼──────────┤
│ 8.216865 │ 12.305492 │ 1530 │
│ 8.217780 │ 12.305492 │ 680 │
│ 8.219611 │ 12.305492 │ 50 │
│ 8.220527 │ 12.305492 │ 85 │
│ 8.225102 │ 12.305492 │ 339 │
└──────────┴───────────┴──────────┘
ghsl_constrain_res = 12
ghsl_constraint = snoo.sn_project(raw_grid_constraint,res=ghsl_constrain_res)
Assuming coordinates columns ('lon','lat')
print(ghsl_constraint.count())
ghsl_constraint.head()
┌────────┐
│ 172826 │
└────────┘
┏━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓ ┃ lon ┃ lat ┃ band_var ┃ h3_id ┃ ┡━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩ │ float32 │ float32 │ uint16 │ string │ ├──────────┼───────────┼──────────┼─────────────────┤ │ 8.216865 │ 12.305492 │ 1530 │ 8c580ac9090edff │ │ 8.217780 │ 12.305492 │ 680 │ 8c580ac90909dff │ │ 8.219611 │ 12.305492 │ 50 │ 8c580ac90943dff │ │ 8.220527 │ 12.305492 │ 85 │ 8c580ac90948dff │ │ 8.225102 │ 12.305492 │ 339 │ 8c580ac954d43ff │ └──────────┴───────────┴──────────┴─────────────────┘
constraint = ghsl_constraint.execute()
Matching Resolution¶
bringing the variable layer and the constrain layer to a common resolution
spam_gridded_zoom.head(
)
| h3_id | gid_0 | country | gid_1 | agriculture_usd_2015 | lon | lat | band_var | gid_1_right | total_band | band_frac | color | h3_gridded | count | h3_id_band_gridded | x_h3 | y_h3 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 88580ac9d7fffff | NGA | Nigeria | NGA.20_1 | 2.325922e+09 | 8.291664 | 12.291668 | 4914.5 | NGA.20_1 | 4.123441e+07 | 0.000119 | [194, 223, 35, 255] | 88580ac8b9fffff | 120 | 40.95 | 7.870442 | 12.315062 |
| 1 | 88580ac9d7fffff | NGA | Nigeria | NGA.20_1 | 2.325922e+09 | 8.291664 | 12.291668 | 4914.5 | NGA.20_1 | 4.123441e+07 | 0.000119 | [194, 223, 35, 255] | 88580ac991fffff | 120 | 40.95 | 7.884474 | 12.267223 |
| 2 | 88580ac9d7fffff | NGA | Nigeria | NGA.20_1 | 2.325922e+09 | 8.291664 | 12.291668 | 4914.5 | NGA.20_1 | 4.123441e+07 | 0.000119 | [194, 223, 35, 255] | 88580a5341fffff | 120 | 40.95 | 7.911435 | 12.251724 |
| 3 | 88580ac9d7fffff | NGA | Nigeria | NGA.20_1 | 2.325922e+09 | 8.291664 | 12.291668 | 4914.5 | NGA.20_1 | 4.123441e+07 | 0.000119 | [194, 223, 35, 255] | 88580ac8adfffff | 120 | 40.95 | 7.912636 | 12.273132 |
| 4 | 88580ac9d7fffff | NGA | Nigeria | NGA.20_1 | 2.325922e+09 | 8.291664 | 12.291668 | 4914.5 | NGA.20_1 | 4.123441e+07 | 0.000119 | [194, 223, 35, 255] | 88580ac88bfffff | 120 | 40.95 | 7.845809 | 12.322583 |
constraint_res = h3.get_resolution(constraint.h3_id[0])
layer_res = h3.get_resolution(spam_gridded_zoom.h3_gridded[0])
print("Constraint res : ",constraint_res,", Layer res: ",layer_res)
Constraint res : 12 , Layer res: 8
Let’s meet somewhere in between.
spam_gridded_zoom.rename(columns={"h3_id" : "h3_native"
,"h3_gridded" : "h3_id"},inplace=True)
final_res = 10
Use the scalenav set_res function to set a desired resolution to both layers
constraint_gridded_zoom_rescaled = sd.set_res(constraint,final=final_res)
spam_gridded_zoom_rescaled = sd.set_res(spam_gridded_zoom,final=final_res)
constraint_res = h3.get_resolution(constraint_gridded_zoom_rescaled.h3_id[0])
layer_res = h3.get_resolution(spam_gridded_zoom_rescaled.h3_id[0])
print("Constraint res : ",constraint_res,", Layer res: ",layer_res)
Constraint res : 10 , Layer res: 10
Both resolutions match now. This will allow next to perform operations on the index variables of both layers together.
print(spam_gridded_zoom_rescaled.shape)
spam_gridded_zoom_rescaled.head()
(318941, 17)
| h3_native | gid_0 | country | gid_1 | agriculture_usd_2015 | lon | lat | band_var | gid_1_right | total_band | band_frac | color | h3_id | count | h3_id_band_gridded | x_h3 | y_h3 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 88580ac9d7fffff | NGA | Nigeria | NGA.20_1 | 2.325922e+09 | 8.291664 | 12.291668 | 100.295914 | NGA.20_1 | 4.123441e+07 | 0.000119 | [194, 223, 35, 255] | 8a580ac8b807fff | 120 | 40.95 | 7.870442 | 12.315062 |
| 1 | 88580ac9d7fffff | NGA | Nigeria | NGA.20_1 | 2.325922e+09 | 8.291664 | 12.291668 | 100.295914 | NGA.20_1 | 4.123441e+07 | 0.000119 | [194, 223, 35, 255] | 8a580ac8b80ffff | 120 | 40.95 | 7.870442 | 12.315062 |
| 2 | 88580ac9d7fffff | NGA | Nigeria | NGA.20_1 | 2.325922e+09 | 8.291664 | 12.291668 | 100.295914 | NGA.20_1 | 4.123441e+07 | 0.000119 | [194, 223, 35, 255] | 8a580ac8b817fff | 120 | 40.95 | 7.870442 | 12.315062 |
| 3 | 88580ac9d7fffff | NGA | Nigeria | NGA.20_1 | 2.325922e+09 | 8.291664 | 12.291668 | 100.295914 | NGA.20_1 | 4.123441e+07 | 0.000119 | [194, 223, 35, 255] | 8a580ac8b81ffff | 120 | 40.95 | 7.870442 | 12.315062 |
| 4 | 88580ac9d7fffff | NGA | Nigeria | NGA.20_1 | 2.325922e+09 | 8.291664 | 12.291668 | 100.295914 | NGA.20_1 | 4.123441e+07 | 0.000119 | [194, 223, 35, 255] | 8a580ac8b827fff | 120 | 40.95 | 7.870442 | 12.315062 |
print(constraint_gridded_zoom_rescaled.shape)
constraint_gridded_zoom_rescaled.head()
(132809, 3)
| h3_id | child_cells | band_var | |
|---|---|---|---|
| 0 | 8a580a00a097fff | [8b580a00a091fff] | 298 |
| 1 | 8a580a00a09ffff | [8b580a00a09bfff] | 2 |
| 2 | 8a580a00a19ffff | [8b580a00a19bfff] | 136 |
| 3 | 8a580a00a297fff | [8b580a00a296fff] | 98 |
| 4 | 8a580a00a2d7fff | [8b580a00a2d0fff] | 11 |
Set up a common layer by merging the data and constraint layers.
layer_constrained = spam_gridded_zoom_rescaled.merge(constraint_gridded_zoom_rescaled,on="h3_id",how="left")
print(layer_constrained.shape)
layer_constrained.head()
(318941, 19)
| h3_native | gid_0 | country | gid_1 | agriculture_usd_2015 | lon | lat | band_var_x | gid_1_right | total_band | band_frac | color | h3_id | count | h3_id_band_gridded | x_h3 | y_h3 | child_cells | band_var_y | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 88580ac9d7fffff | NGA | Nigeria | NGA.20_1 | 2.325922e+09 | 8.291664 | 12.291668 | 100.295914 | NGA.20_1 | 4.123441e+07 | 0.000119 | [194, 223, 35, 255] | 8a580ac8b807fff | 120 | 40.95 | 7.870442 | 12.315062 | NaN | NaN |
| 1 | 88580ac9d7fffff | NGA | Nigeria | NGA.20_1 | 2.325922e+09 | 8.291664 | 12.291668 | 100.295914 | NGA.20_1 | 4.123441e+07 | 0.000119 | [194, 223, 35, 255] | 8a580ac8b80ffff | 120 | 40.95 | 7.870442 | 12.315062 | NaN | NaN |
| 2 | 88580ac9d7fffff | NGA | Nigeria | NGA.20_1 | 2.325922e+09 | 8.291664 | 12.291668 | 100.295914 | NGA.20_1 | 4.123441e+07 | 0.000119 | [194, 223, 35, 255] | 8a580ac8b817fff | 120 | 40.95 | 7.870442 | 12.315062 | NaN | NaN |
| 3 | 88580ac9d7fffff | NGA | Nigeria | NGA.20_1 | 2.325922e+09 | 8.291664 | 12.291668 | 100.295914 | NGA.20_1 | 4.123441e+07 | 0.000119 | [194, 223, 35, 255] | 8a580ac8b81ffff | 120 | 40.95 | 7.870442 | 12.315062 | NaN | NaN |
| 4 | 88580ac9d7fffff | NGA | Nigeria | NGA.20_1 | 2.325922e+09 | 8.291664 | 12.291668 | 100.295914 | NGA.20_1 | 4.123441e+07 | 0.000119 | [194, 223, 35, 255] | 8a580ac8b827fff | 120 | 40.95 | 7.870442 | 12.315062 | NaN | NaN |
Exclude the cells for which the constraint was identified, their ‘child_cell’ variable is not na. To keep things simple, the values of the constraint variable are not accounted for, it is used as a hard, binary constraint. If there is any infrastructure in the cell, it is removed fully, even for small values.
layer_constrained = layer_constrained[layer_constrained.child_cells.isna()]
# layer_constrained.reset_index(drop=False,inplace=True)
layer_constrained["color"] = cmap((layer_constrained.h3_id_band_gridded),palette=col_pal)
Max input : 45.50, palette colors : 10
#####
constrained_layer = pdk.Layer(
"H3HexagonLayer",
layer_constrained,
pickable=True,
stroked=True,
filled=True,
opacity=.5,
extruded=False,
get_hexagon= "h3_id",
get_fill_color = "color",
get_line_color= [0, 0, 0, 100],
line_width_min_pixels=0,
line_width_max_pixels=1,
)
constraint_layer = pdk.Layer(
"H3HexagonLayer",
constraint_gridded_zoom_rescaled,
pickable=True,
stroked=True,
filled=True,
opacity=.5,
extruded=False,
get_hexagon= "h3_id",
get_fill_color = [56, 65, 87, 255],
get_line_color= [0, 0, 0, 100],
line_width_min_pixels=0,
line_width_max_pixels=1,
)
# # Create mapdeck object
# deck_map_agri_layer_constrained = pdk.Deck(layers=[
# constrained_layer,
# constraint_layer]
# ,initial_view_state=view_state_zoomed
# ,tooltip= {"text": "Value : {h3_id_band_gridded}"}
# ,map_style="road"
# )
# deck_map_agri_layer_constrained.to_html("../../deck_maps/deck_ghsl_agri_constrained.html",
# iframe_height=800,
# # open_browser=True
# )