Data layer

import re
import numpy as np

# data
import pandas as pd

import scalenav.data as snd
import scalenav.oop as snoo
from scalenav.plotting import cmap
import ibis as ib
from ibis import _
ib.options.interactive = True

# plots
import pypalettes as pypal
import pydeck as pdk
conn = snoo.sn_connect()
Connecting to a temporary in-memory DB instance.
processed_data_folder = "../../outputs/"
# trying an interpolation method for the right resolution from a given grid size
dist = 0

alpha = 5/np.log(1_000)

A = 13 + alpha*np.log(10)

A 
alpha
# 13+alpha*np.log(10)
0.7238241365054198

Setup

extra_res = 0
grid_params = []
res_params = []

if dist > 0 : 
    # extra_res=round(A-np.log(alpha*dist))

    grid_params = [10,100,1000,5000,10000,dist]
    grid_params.sort()
    
    # res_params = [13,12,11,10,8,extra_res]
    res_params = [round(A-alpha*np.log(size)) for size in grid_params]
    # res_params.sort(reverse=True)

else:
    grid_params = [10,100,1000,5000,10000]
    res_params = [14,12,11,10,8]

res_params

rast_to_h3 = { str(size): {"h3_res" : res, "nn" : [] } for (size,res) in zip(grid_params,res_params)}
rast_to_h3
{'10': {'h3_res': 14, 'nn': []},
 '100': {'h3_res': 12, 'nn': []},
 '1000': {'h3_res': 11, 'nn': []},
 '5000': {'h3_res': 10, 'nn': []},
 '10000': {'h3_res': 8, 'nn': []}}
grid_param = 10

rast_to_h3 = snd.rast_to_h3_map(x=8,y=12,ref="m",dist=dist)

base_res = rast_to_h3[str(grid_param)]["h3_res"]
neighbs = rast_to_h3[str(grid_param)]["nn"]

print("Using base resolution: ",base_res)
Using angles for meter grid.
Using base resolution:  14
neighbs
[(-2, -1),
 (-1, -2),
 (1, 2),
 (0, 0),
 (-2, -2),
 (3, 2),
 (1, 1),
 (2, 0),
 (-1, 1),
 (1, -2),
 (0, 2),
 (3, 1),
 (-1, 0),
 (1, -1),
 (2, 1),
 (0, -1),
 (0, 1),
 (-1, -1),
 (1, 0),
 (2, 2),
 (0, -2)]

GHSL MSZ data set

could be moved to another notebook.

# close up london-oxford
# limits = [-1.486,51.138,0.352,51.882]

# wide europe window
# limits = [-14.06,48.34,31.95,59.27]


# Kano Nigeria
limits = [8.46085,11.96741,8.53552,12.01733]

# Lagos
# limits = [3.0624,6.3699,3.5925,6.6592]
src_file = "MSZ_10_R8_C19/*.parquet"

layer = conn.read_parquet(f"{processed_data_folder}{src_file}",table_name="layer")
msz_categories = {
    "1" : "Open spaces, low vegetation",
    "2" : "Open spaces, medium vegetation",
    "3" : "Open spaces, high vegetation",
    "4" : "Open spaces, water surfaces",
    "5" : "Open spaces, road surfaces",
    "11" : "Built spaces, residential building height <= 3m",
    "12" : "Built spaces, residential building 3m < height <= 6m",
    "13" : "Built spaces, residential building 6m < height <= 15m",
    "14" : "Built spaces, residential building 15m < height <= 30m",
    "15" : "Built spaces, residential building 30m < height",
    "21" : "Built spaces, non-residential building height <= 3m",
    "22" : "Built spaces, non-residential building 3m < height <= 6m",
    "23" : "Built spaces, non-residential building 6m < height <= 15m",
    "24" : "Built spaces, non-residential building 15m < height <= 30m",
    "25" : "Built spaces, non-residential building 30m < height",
}
layer_ = (
    layer
#     filter region
    .filter(_.lon>limits[0]
            ,_.lon<limits[2]
            ,_.lat>limits[1]
            ,_.lat<limits[3])
#     filter a band
    .filter(_.band_var == 24)
)
layer_.head()
┏━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━┓
┃ lon       lat        band_var ┃
┡━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━┩
│ float32float32uint8    │
├──────────┼───────────┼──────────┤
│ 8.52308212.00952724 │
│ 8.52289912.00943624 │
│ 8.52299012.00943624 │
│ 8.52308212.00943624 │
│ 8.52317312.00943624 │
└──────────┴───────────┴──────────┘
layer_.count()



┌──────┐
│ 1835 │
└──────┘

Deck map

cat_palette = "Classic_10"
num_palette = "Burg"

gdp_pc_pal = pypal.load_cmap(cat_palette,reverse=True)
deck_data = layer_.execute()
deck_data.head()
lon lat band_var
0 8.523082 12.009527 24
1 8.522899 12.009436 24
2 8.522990 12.009436 24
3 8.523082 12.009436 24
4 8.523173 12.009436 24
deck_data["col"] = cmap(deck_data.band_var,palette=gdp_pc_pal,log=False)
deck_data["value"] = deck_data["band_var"].apply(lambda x: msz_categories[str(int(x))])
Max input : 24.00, palette colors : 10
print(deck_data.shape)
deck_data.head()
(1835, 5)
lon lat band_var col value
0 8.523082 12.009527 24 [31, 119, 180, 255] Built spaces, non-residential building 15m < h...
1 8.522899 12.009436 24 [31, 119, 180, 255] Built spaces, non-residential building 15m < h...
2 8.522990 12.009436 24 [31, 119, 180, 255] Built spaces, non-residential building 15m < h...
3 8.523082 12.009436 24 [31, 119, 180, 255] Built spaces, non-residential building 15m < h...
4 8.523173 12.009436 24 [31, 119, 180, 255] Built spaces, non-residential building 15m < h...
viewstate = pdk.data_utils.compute_view(deck_data[["lon","lat"]])
deck_points = pdk.Layer(type="ScatterplotLayer"
                        ,data=deck_data
                        ,get_position = ["lon","lat"]
                        ,get_radius=10
                        ,get_fill_color= "col"
                        ,pickable=True
                        ,opacity=0.2
                        ,stroked=True
                        ,filled=True
                        ,radius_scale=2
                        ,radius_min_pixels=3
                        ,radius_max_pixels=5
                        ,line_width_min_pixels=1
                        ,get_line_color=[0, 0, 0, 0]
                       )
deck = pdk.Deck(layers=[deck_points],
                      initial_view_state=viewstate,
                      tooltip= {"text": " {band_var} : {value}"},
                      map_style="road")
deck.to_html("../../deck_maps/ghsl_msz_vis.html",iframe_height=700)

Gridding function

# layer_df = layer.execute(limit=100_000)
# base_res = 14

layer_df = snd.df_project_on_grid(deck_data,res=base_res) 
print(layer_df.shape)
layer_df.head()
(1835, 6)
lon lat band_var col value h3_id
0 8.523082 12.009527 24 [31, 119, 180, 255] Built spaces, non-residential building 15m < h... 8e580a4e0b6e687
1 8.522899 12.009436 24 [31, 119, 180, 255] Built spaces, non-residential building 15m < h... 8e580a4e0b45167
2 8.522990 12.009436 24 [31, 119, 180, 255] Built spaces, non-residential building 15m < h... 8e580a4e0b45a1f
3 8.523082 12.009436 24 [31, 119, 180, 255] Built spaces, non-residential building 15m < h... 8e580a4e0b6ada7
4 8.523173 12.009436 24 [31, 119, 180, 255] Built spaces, non-residential building 15m < h... 8e580a4e0b6ac2f
cat = True

if cat:
    layer_df["band_var"]=layer_df["band_var"].astype("int").astype("str")
neighbs
[(-2, -1),
 (-1, -2),
 (1, 2),
 (0, 0),
 (-2, -2),
 (3, 2),
 (1, 1),
 (2, 0),
 (-1, 1),
 (1, -2),
 (0, 2),
 (3, 1),
 (-1, 0),
 (1, -1),
 (2, 1),
 (0, -1),
 (0, 1),
 (-1, -1),
 (1, 0),
 (2, 2),
 (0, -2)]
def layer_gridded(layer : pd.DataFrame, neighbs : list[tuple[int]]): 
    """" Take a layer of raster centroids expressed as H3 cells and do the 'gridding'. It means filling in all the gaps respecting the original raster grid size.
    Duplicates are removed, the value from the single cell is uniformly distributed across the set of cells associated to it. 
    
    """
    
    vars_columns = [x for x in layer.columns if re.search(pattern="_var$",string=x)]

    layer["h3_gridded"] = pd.Series(layer.apply(lambda row: snd.centre_cell_to_square(h3_cell = row["h3_id"],neighbs=neighbs),axis=1).tolist())
    layer.dropna(subset=["h3_gridded"],inplace=True)
    gridded_layer = layer.explode("h3_gridded")
    grid_layer_b = gridded_layer.shape[0]
    gridded_layer = gridded_layer[~gridded_layer.duplicated(subset=["h3_gridded"])]
    grid_layer_a = gridded_layer.shape[0]
    
    print("Duplicated ratio : ",1-np.round(grid_layer_a/grid_layer_b,3))
    
    gridded_layer.reset_index(inplace=True,drop=True)
    gridded_layer.set_index("h3_id",inplace=True,drop=True)
    gridded_layer = gridded_layer.join(gridded_layer.value_counts("h3_id"),how="left",sort=False,validate="m:1")

    # since only one layer is being read at the moment, just check if it's string or not. 
    # in the future, separate into categorical and numerical variables
    if len(vars_columns)>1:
        if (str not in gridded_layer[vars_columns].dtypes.values.tolist()):
            gridded_layer[vars_columns] = np.round(gridded_layer[vars_columns].div(gridded_layer["count"],axis=0),3)
    
    # if len(vars_columns)==1:
    #     if gridded_layer[vars_columns].dtype!=str:
    #         gridded_layer[vars_columns] = np.round(gridded_layer[vars_columns].div(gridded_layer["count"],axis=0),3)
    
    gridded_layer.reset_index(inplace=True,drop=False)
    gridded_layer.dropna(axis=0,subset=["h3_gridded"],inplace=True)

    return gridded_layer
layer_grid = layer_gridded(layer=layer_df,neighbs=neighbs)
Duplicated ratio :  0.18700000000000006
print(layer_grid.shape)
layer_grid.head()
(31326, 8)
h3_id lon lat band_var col value h3_gridded count
0 8e580a4e0b6e687 8.523082 12.009527 24 [31, 119, 180, 255] Built spaces, non-residential building 15m < h... 8e580a4e0b6ad27 21
1 8e580a4e0b6e687 8.523082 12.009527 24 [31, 119, 180, 255] Built spaces, non-residential building 15m < h... 8e580a4e0b6e6f7 21
2 8e580a4e0b6e687 8.523082 12.009527 24 [31, 119, 180, 255] Built spaces, non-residential building 15m < h... 8e580a4e0b45b4f 21
3 8e580a4e0b6e687 8.523082 12.009527 24 [31, 119, 180, 255] Built spaces, non-residential building 15m < h... 8e580a4e0b6e687 21
4 8e580a4e0b6e687 8.523082 12.009527 24 [31, 119, 180, 255] Built spaces, non-residential building 15m < h... 8e580a4e0b6e6d7 21

Mapping gridded

sample_size = np.min([layer_grid.shape[0],30_000])
plotting_layer = (layer_grid
                  .sample(sample_size))
plotting_layer["color"] = cmap(plotting_layer["band_var"].astype("int",copy=False)
                               ,palette=gdp_pc_pal
                               ,log=False)
Max input : 24.00, palette colors : 10
#####

layer_gridded = pdk.Layer(
    "H3HexagonLayer",
    plotting_layer,
    pickable=True,
    stroked=True,
    filled=True,
    opacity=.5,
    extruded=False,
    # elevation_scale = 1e-4,
    # elevation_range = [0,100],
    get_elevation="band_var",
    get_hexagon= "h3_gridded",
    get_fill_color = "color",
    get_line_color= [0, 0, 0, 100],
    line_width_min_pixels=0,
    line_width_max_pixels=1,
)
# Create mapdeck object
layer_gridded_deck = pdk.Deck(layers=[layer_gridded]
             ,initial_view_state=viewstate
             ,tooltip= {"text": "{band_var} :  {value}"}
             ,map_style="road"
             )

layer_gridded_deck.to_html("../../deck_maps/msz_ghsl_gridded.html",iframe_height=800)