Data layer¶
import re
import numpy as np
# data
import pandas as pd
import scalenav.data as snd
import scalenav.oop as snoo
from scalenav.plotting import cmap
import ibis as ib
from ibis import _
ib.options.interactive = True
# plots
import pypalettes as pypal
import pydeck as pdk
conn = snoo.sn_connect()
Connecting to a temporary in-memory DB instance.
processed_data_folder = "../../outputs/"
# trying an interpolation method for the right resolution from a given grid size
dist = 0
alpha = 5/np.log(1_000)
A = 13 + alpha*np.log(10)
A
alpha
# 13+alpha*np.log(10)
0.7238241365054198
Setup¶
extra_res = 0
grid_params = []
res_params = []
if dist > 0 :
# extra_res=round(A-np.log(alpha*dist))
grid_params = [10,100,1000,5000,10000,dist]
grid_params.sort()
# res_params = [13,12,11,10,8,extra_res]
res_params = [round(A-alpha*np.log(size)) for size in grid_params]
# res_params.sort(reverse=True)
else:
grid_params = [10,100,1000,5000,10000]
res_params = [14,12,11,10,8]
res_params
rast_to_h3 = { str(size): {"h3_res" : res, "nn" : [] } for (size,res) in zip(grid_params,res_params)}
rast_to_h3
{'10': {'h3_res': 14, 'nn': []},
'100': {'h3_res': 12, 'nn': []},
'1000': {'h3_res': 11, 'nn': []},
'5000': {'h3_res': 10, 'nn': []},
'10000': {'h3_res': 8, 'nn': []}}
grid_param = 10
rast_to_h3 = snd.rast_to_h3_map(x=8,y=12,ref="m",dist=dist)
base_res = rast_to_h3[str(grid_param)]["h3_res"]
neighbs = rast_to_h3[str(grid_param)]["nn"]
print("Using base resolution: ",base_res)
Using angles for meter grid.
Using base resolution: 14
neighbs
[(-2, -1),
(-1, -2),
(1, 2),
(0, 0),
(-2, -2),
(3, 2),
(1, 1),
(2, 0),
(-1, 1),
(1, -2),
(0, 2),
(3, 1),
(-1, 0),
(1, -1),
(2, 1),
(0, -1),
(0, 1),
(-1, -1),
(1, 0),
(2, 2),
(0, -2)]
GHSL MSZ data set¶
could be moved to another notebook.
# close up london-oxford
# limits = [-1.486,51.138,0.352,51.882]
# wide europe window
# limits = [-14.06,48.34,31.95,59.27]
# Kano Nigeria
limits = [8.46085,11.96741,8.53552,12.01733]
# Lagos
# limits = [3.0624,6.3699,3.5925,6.6592]
src_file = "MSZ_10_R8_C19/*.parquet"
layer = conn.read_parquet(f"{processed_data_folder}{src_file}",table_name="layer")
msz_categories = {
"1" : "Open spaces, low vegetation",
"2" : "Open spaces, medium vegetation",
"3" : "Open spaces, high vegetation",
"4" : "Open spaces, water surfaces",
"5" : "Open spaces, road surfaces",
"11" : "Built spaces, residential building height <= 3m",
"12" : "Built spaces, residential building 3m < height <= 6m",
"13" : "Built spaces, residential building 6m < height <= 15m",
"14" : "Built spaces, residential building 15m < height <= 30m",
"15" : "Built spaces, residential building 30m < height",
"21" : "Built spaces, non-residential building height <= 3m",
"22" : "Built spaces, non-residential building 3m < height <= 6m",
"23" : "Built spaces, non-residential building 6m < height <= 15m",
"24" : "Built spaces, non-residential building 15m < height <= 30m",
"25" : "Built spaces, non-residential building 30m < height",
}
layer_ = (
layer
# filter region
.filter(_.lon>limits[0]
,_.lon<limits[2]
,_.lat>limits[1]
,_.lat<limits[3])
# filter a band
.filter(_.band_var == 24)
)
layer_.head()
┏━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━┓ ┃ lon ┃ lat ┃ band_var ┃ ┡━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━┩ │ float32 │ float32 │ uint8 │ ├──────────┼───────────┼──────────┤ │ 8.523082 │ 12.009527 │ 24 │ │ 8.522899 │ 12.009436 │ 24 │ │ 8.522990 │ 12.009436 │ 24 │ │ 8.523082 │ 12.009436 │ 24 │ │ 8.523173 │ 12.009436 │ 24 │ └──────────┴───────────┴──────────┘
layer_.count()
┌──────┐
│ 1835 │
└──────┘
Deck map¶
cat_palette = "Classic_10"
num_palette = "Burg"
gdp_pc_pal = pypal.load_cmap(cat_palette,reverse=True)
deck_data = layer_.execute()
deck_data.head()
| lon | lat | band_var | |
|---|---|---|---|
| 0 | 8.523082 | 12.009527 | 24 |
| 1 | 8.522899 | 12.009436 | 24 |
| 2 | 8.522990 | 12.009436 | 24 |
| 3 | 8.523082 | 12.009436 | 24 |
| 4 | 8.523173 | 12.009436 | 24 |
deck_data["col"] = cmap(deck_data.band_var,palette=gdp_pc_pal,log=False)
deck_data["value"] = deck_data["band_var"].apply(lambda x: msz_categories[str(int(x))])
Max input : 24.00, palette colors : 10
print(deck_data.shape)
deck_data.head()
(1835, 5)
| lon | lat | band_var | col | value | |
|---|---|---|---|---|---|
| 0 | 8.523082 | 12.009527 | 24 | [31, 119, 180, 255] | Built spaces, non-residential building 15m < h... |
| 1 | 8.522899 | 12.009436 | 24 | [31, 119, 180, 255] | Built spaces, non-residential building 15m < h... |
| 2 | 8.522990 | 12.009436 | 24 | [31, 119, 180, 255] | Built spaces, non-residential building 15m < h... |
| 3 | 8.523082 | 12.009436 | 24 | [31, 119, 180, 255] | Built spaces, non-residential building 15m < h... |
| 4 | 8.523173 | 12.009436 | 24 | [31, 119, 180, 255] | Built spaces, non-residential building 15m < h... |
viewstate = pdk.data_utils.compute_view(deck_data[["lon","lat"]])
deck_points = pdk.Layer(type="ScatterplotLayer"
,data=deck_data
,get_position = ["lon","lat"]
,get_radius=10
,get_fill_color= "col"
,pickable=True
,opacity=0.2
,stroked=True
,filled=True
,radius_scale=2
,radius_min_pixels=3
,radius_max_pixels=5
,line_width_min_pixels=1
,get_line_color=[0, 0, 0, 0]
)
deck = pdk.Deck(layers=[deck_points],
initial_view_state=viewstate,
tooltip= {"text": " {band_var} : {value}"},
map_style="road")
deck.to_html("../../deck_maps/ghsl_msz_vis.html",iframe_height=700)
Gridding function¶
# layer_df = layer.execute(limit=100_000)
# base_res = 14
layer_df = snd.df_project_on_grid(deck_data,res=base_res)
print(layer_df.shape)
layer_df.head()
(1835, 6)
| lon | lat | band_var | col | value | h3_id | |
|---|---|---|---|---|---|---|
| 0 | 8.523082 | 12.009527 | 24 | [31, 119, 180, 255] | Built spaces, non-residential building 15m < h... | 8e580a4e0b6e687 |
| 1 | 8.522899 | 12.009436 | 24 | [31, 119, 180, 255] | Built spaces, non-residential building 15m < h... | 8e580a4e0b45167 |
| 2 | 8.522990 | 12.009436 | 24 | [31, 119, 180, 255] | Built spaces, non-residential building 15m < h... | 8e580a4e0b45a1f |
| 3 | 8.523082 | 12.009436 | 24 | [31, 119, 180, 255] | Built spaces, non-residential building 15m < h... | 8e580a4e0b6ada7 |
| 4 | 8.523173 | 12.009436 | 24 | [31, 119, 180, 255] | Built spaces, non-residential building 15m < h... | 8e580a4e0b6ac2f |
cat = True
if cat:
layer_df["band_var"]=layer_df["band_var"].astype("int").astype("str")
neighbs
[(-2, -1),
(-1, -2),
(1, 2),
(0, 0),
(-2, -2),
(3, 2),
(1, 1),
(2, 0),
(-1, 1),
(1, -2),
(0, 2),
(3, 1),
(-1, 0),
(1, -1),
(2, 1),
(0, -1),
(0, 1),
(-1, -1),
(1, 0),
(2, 2),
(0, -2)]
def layer_gridded(layer : pd.DataFrame, neighbs : list[tuple[int]]):
"""" Take a layer of raster centroids expressed as H3 cells and do the 'gridding'. It means filling in all the gaps respecting the original raster grid size.
Duplicates are removed, the value from the single cell is uniformly distributed across the set of cells associated to it.
"""
vars_columns = [x for x in layer.columns if re.search(pattern="_var$",string=x)]
layer["h3_gridded"] = pd.Series(layer.apply(lambda row: snd.centre_cell_to_square(h3_cell = row["h3_id"],neighbs=neighbs),axis=1).tolist())
layer.dropna(subset=["h3_gridded"],inplace=True)
gridded_layer = layer.explode("h3_gridded")
grid_layer_b = gridded_layer.shape[0]
gridded_layer = gridded_layer[~gridded_layer.duplicated(subset=["h3_gridded"])]
grid_layer_a = gridded_layer.shape[0]
print("Duplicated ratio : ",1-np.round(grid_layer_a/grid_layer_b,3))
gridded_layer.reset_index(inplace=True,drop=True)
gridded_layer.set_index("h3_id",inplace=True,drop=True)
gridded_layer = gridded_layer.join(gridded_layer.value_counts("h3_id"),how="left",sort=False,validate="m:1")
# since only one layer is being read at the moment, just check if it's string or not.
# in the future, separate into categorical and numerical variables
if len(vars_columns)>1:
if (str not in gridded_layer[vars_columns].dtypes.values.tolist()):
gridded_layer[vars_columns] = np.round(gridded_layer[vars_columns].div(gridded_layer["count"],axis=0),3)
# if len(vars_columns)==1:
# if gridded_layer[vars_columns].dtype!=str:
# gridded_layer[vars_columns] = np.round(gridded_layer[vars_columns].div(gridded_layer["count"],axis=0),3)
gridded_layer.reset_index(inplace=True,drop=False)
gridded_layer.dropna(axis=0,subset=["h3_gridded"],inplace=True)
return gridded_layer
layer_grid = layer_gridded(layer=layer_df,neighbs=neighbs)
Duplicated ratio : 0.18700000000000006
print(layer_grid.shape)
layer_grid.head()
(31326, 8)
| h3_id | lon | lat | band_var | col | value | h3_gridded | count | |
|---|---|---|---|---|---|---|---|---|
| 0 | 8e580a4e0b6e687 | 8.523082 | 12.009527 | 24 | [31, 119, 180, 255] | Built spaces, non-residential building 15m < h... | 8e580a4e0b6ad27 | 21 |
| 1 | 8e580a4e0b6e687 | 8.523082 | 12.009527 | 24 | [31, 119, 180, 255] | Built spaces, non-residential building 15m < h... | 8e580a4e0b6e6f7 | 21 |
| 2 | 8e580a4e0b6e687 | 8.523082 | 12.009527 | 24 | [31, 119, 180, 255] | Built spaces, non-residential building 15m < h... | 8e580a4e0b45b4f | 21 |
| 3 | 8e580a4e0b6e687 | 8.523082 | 12.009527 | 24 | [31, 119, 180, 255] | Built spaces, non-residential building 15m < h... | 8e580a4e0b6e687 | 21 |
| 4 | 8e580a4e0b6e687 | 8.523082 | 12.009527 | 24 | [31, 119, 180, 255] | Built spaces, non-residential building 15m < h... | 8e580a4e0b6e6d7 | 21 |
Mapping gridded¶
sample_size = np.min([layer_grid.shape[0],30_000])
plotting_layer = (layer_grid
.sample(sample_size))
plotting_layer["color"] = cmap(plotting_layer["band_var"].astype("int",copy=False)
,palette=gdp_pc_pal
,log=False)
Max input : 24.00, palette colors : 10
#####
layer_gridded = pdk.Layer(
"H3HexagonLayer",
plotting_layer,
pickable=True,
stroked=True,
filled=True,
opacity=.5,
extruded=False,
# elevation_scale = 1e-4,
# elevation_range = [0,100],
get_elevation="band_var",
get_hexagon= "h3_gridded",
get_fill_color = "color",
get_line_color= [0, 0, 0, 100],
line_width_min_pixels=0,
line_width_max_pixels=1,
)
# Create mapdeck object
layer_gridded_deck = pdk.Deck(layers=[layer_gridded]
,initial_view_state=viewstate
,tooltip= {"text": "{band_var} : {value}"}
,map_style="road"
)
layer_gridded_deck.to_html("../../deck_maps/msz_ghsl_gridded.html",iframe_height=800)