Data layer¶
import json
import io
import os
import re
import itertools as iter
import numpy as np
# data
import pandas as pd
import geopandas as gpd
import shapely
import duckdb
import h3
import scalenav.data as snd
import scalenav.scale_nav as sn
from scalenav.plotting import cmap
# from scipy import io as io
# import nctoolkit as nc
# import xarray as xr
# import rioxarray as rx
import glob
import ibis as ib
from ibis import _
import ibis.selectors as s
ib.options.interactive = True
# plots
# from datashader import transfer_functions as tf, reductions as rd
import pypalettes as pypal
import pydeck as pdk
from seaborn import color_palette
Using angles for meter grid.
Using angles for meter grid.
Using angles for meter grid.
Using angles for meter grid.
Using angles for meter grid.
conn = ib.connect("duckdb://")
conn.raw_sql("install spatial; load spatial")
<duckdb.duckdb.DuckDBPyConnection at 0x17eb026b0>
%pwd
'/Users/cenv1069/Documents/scale_nav/source/notebook'
processed_data_folder = "../../outputs/"
# trying an interpolation method for the right resolution from a given grid size
dist = 0
alpha = 5/np.log(1_000)
A = 13 + alpha*np.log(10)
A
alpha
# 13+alpha*np.log(10)
0.7238241365054198
Setup¶
extra_res = 0
grid_params = []
res_params = []
if dist > 0 :
# extra_res=round(A-np.log(alpha*dist))
grid_params = [10,100,1000,5000,10000,dist]
grid_params.sort()
# res_params = [13,12,11,10,8,extra_res]
res_params = [round(A-alpha*np.log(size)) for size in grid_params]
# res_params.sort(reverse=True)
else:
grid_params = [10,100,1000,5000,10000]
res_params = [14,12,11,10,8]
res_params
rast_to_h3 = { str(size): {"h3_res" : res, "nn" : [] } for (size,res) in zip(grid_params,res_params)}
rast_to_h3
{'10': {'h3_res': 14, 'nn': []},
'100': {'h3_res': 12, 'nn': []},
'1000': {'h3_res': 11, 'nn': []},
'5000': {'h3_res': 10, 'nn': []},
'10000': {'h3_res': 8, 'nn': []}}
grid_param = 10
rast_to_h3 = snd.rast_to_h3_map(x=8,y=12,ref="m",dist=dist)
base_res = rast_to_h3[str(grid_param)]["h3_res"]
neighbs = rast_to_h3[str(grid_param)]["nn"]
print("Using base resolution: ",base_res)
Using angles for meter grid.
Using angles for meter grid.
Using angles for meter grid.
Using angles for meter grid.
Using angles for meter grid.
Using base resolution: 14
neighbs
[(0, 0),
(2, 1),
(1, -1),
(1, 1),
(0, 2),
(-1, 0),
(0, -2),
(-2, -1),
(1, 2),
(2, 0),
(0, 1),
(-1, -1),
(1, -2),
(3, 1),
(1, 0),
(2, 2),
(-1, -2),
(0, -1)]
GHSL MSZ data set¶
could be moved to another notebook.
# close up london-oxford
# limits = [-1.486,51.138,0.352,51.882]
# wide europe window
# limits = [-14.06,48.34,31.95,59.27]
# Kano Nigeria
limits = [8.46085,11.96741,8.53552,12.01733]
# Lagos
# limits = [3.0624,6.3699,3.5925,6.6592]
# src_file = "outputs/kummu.parquet"
src_file = "new_process.parquet"
layer = conn.read_parquet(f"{processed_data_folder}{src_file}",table_name="layer")
msz_categories = {
"1" : "Open spaces, low vegetation",
"2" : "Open spaces, medium vegetation",
"3" : "Open spaces, high vegetation",
"4" : "Open spaces, water surfaces",
"5" : "Open spaces, road surfaces",
"11" : "Built spaces, residential building height <= 3m",
"12" : "Built spaces, residential building 3m < height <= 6m",
"13" : "Built spaces, residential building 6m < height <= 15m",
"14" : "Built spaces, residential building 15m < height <= 30m",
"15" : "Built spaces, residential building 30m < height",
"21" : "Built spaces, non-residential building height <= 3m",
"22" : "Built spaces, non-residential building 3m < height <= 6m",
"23" : "Built spaces, non-residential building 6m < height <= 15m",
"24" : "Built spaces, non-residential building 15m < height <= 30m",
"25" : "Built spaces, non-residential building 30m < height",
}
print(conn.list_tables())
conn.sql("Select * from layer limit 10")
['layer']
┏━━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━┓ ┃ lon ┃ lat ┃ band_var ┃ ┡━━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━┩ │ float32 │ float32 │ float32 │ ├───────────┼───────────┼──────────┤ │ -0.322563 │ 16.243849 │ 1.0 │ │ -0.322461 │ 16.243849 │ 1.0 │ │ -0.322359 │ 16.243849 │ 1.0 │ │ -0.322563 │ 16.243767 │ 1.0 │ │ -0.322461 │ 16.243767 │ 11.0 │ │ -0.322359 │ 16.243767 │ 11.0 │ │ -0.322563 │ 16.243685 │ 1.0 │ │ -0.322461 │ 16.243685 │ 11.0 │ │ -0.322358 │ 16.243685 │ 11.0 │ │ -0.322153 │ 16.242781 │ 2.0 │ └───────────┴───────────┴──────────┘
# conn.raw_sql("SUMMARIZE layer;")
# layer.describe()
### Fine scale data with CL tool
# the coarse resolution file
# ! python -m scalenav.rast_converter /Users/cenv1069/Documents/data/datasets/kummu_etal/GDP_PPP_1990_2015_5arcmin_v2.nc outputs/kummu_5arcmin.parquet
# the fine resolution file
# At the moment fails
# ! python -m scalenav.rast_converter /Users/cenv1069/Documents/data/datasets/kummu_etal/GDP_PPP_30arcsec_v3.nc outputs/kummu_30arcsec.parquet
layer_ = (
layer
# filter region
.filter(_.lon>limits[0]
,_.lon<limits[2]
,_.lat>limits[1]
,_.lat<limits[3])
# filter something on the band_var
.filter(_.band_var > 5)
.sample(.1)
.execute(
limit=30_000
)
)
layer_.head()
lon | lat | band_var | |
---|---|---|---|
0 | 8.462206 | 12.017195 | 13.0 |
1 | 8.461496 | 12.017113 | 13.0 |
2 | 8.461597 | 12.017113 | 13.0 |
3 | 8.463519 | 12.017113 | 13.0 |
4 | 8.463822 | 12.017113 | 13.0 |
# # sample_data = "data/sample_data_kummu.parquet"
# # sample_data = "data/sample_data_ghsl_msz.parquet"
# if not os.path.exists(sample_data):
# layer.to_parquet(sample_data)
layer_.shape
(24332, 3)
# layer_.count()
Deck map¶
cat_palette = "Classic_10"
num_palette = "Burg"
gdp_pc_pal = pypal.load_cmap(cat_palette,reverse=True)
deck_data = layer_#.execute(limit=500_000)
deck_data.head()
lon | lat | band_var | |
---|---|---|---|
0 | 8.462206 | 12.017195 | 13.0 |
1 | 8.461496 | 12.017113 | 13.0 |
2 | 8.461597 | 12.017113 | 13.0 |
3 | 8.463519 | 12.017113 | 13.0 |
4 | 8.463822 | 12.017113 | 13.0 |
deck_data["col"] = cmap(deck_data.band_var,palette=gdp_pc_pal,log=False)
deck_data["value"] = deck_data["band_var"].apply(lambda x: msz_categories[str(int(x))])
Max input : 24.00, palette colors : 10
print(deck_data.shape)
deck_data.head()
(24332, 5)
lon | lat | band_var | col | value | |
---|---|---|---|---|---|
0 | 8.462206 | 12.017195 | 13.0 | [148, 103, 189, 255] | Built spaces, residential building 6m < height... |
1 | 8.461496 | 12.017113 | 13.0 | [148, 103, 189, 255] | Built spaces, residential building 6m < height... |
2 | 8.461597 | 12.017113 | 13.0 | [148, 103, 189, 255] | Built spaces, residential building 6m < height... |
3 | 8.463519 | 12.017113 | 13.0 | [148, 103, 189, 255] | Built spaces, residential building 6m < height... |
4 | 8.463822 | 12.017113 | 13.0 | [148, 103, 189, 255] | Built spaces, residential building 6m < height... |
viewstate = pdk.data_utils.compute_view(deck_data[["lon","lat"]])
deck_points = pdk.Layer(type="ScatterplotLayer"
,data=deck_data
,get_position = ["lon","lat"]
,get_radius=10
,get_fill_color= "col"
,pickable=True
,opacity=0.2
,stroked=True
,filled=True
,radius_scale=2
,radius_min_pixels=3
,radius_max_pixels=5
,line_width_min_pixels=1
,get_line_color=[0, 0, 0, 0]
)
deck = pdk.Deck(layers=[deck_points],
initial_view_state=viewstate,
tooltip= {"text": " {band_var} : {value}"},
map_style="road")
deck.to_html("../../deck_maps/ghsl_msz_vis.html",iframe_height=700)
Gridding function¶
# square = snd.square_poly(lat=limits[1],lon=limits[0],distance=grid_param)
# ref_cell = h3.latlng_to_cell(lat=limits[1],lng=limits[0],res=base_res)
# ref_cell_ij = h3.cell_to_local_ij(origin=ref_cell,h=ref_cell)
# cells = h3.geo_to_cells(square,res=base_res)
# cells_ij = [h3.cell_to_local_ij(origin=ref_cell,h=cell) for cell in cells]
# snd.rast_to_h3[str(grid_param)]["nn"] = [(cell_i-ref_cell_ij[0],cell_j-ref_cell_ij[1]) for (cell_i,cell_j) in cells_ij]
# cells_redone = [h3.local_ij_to_cell(origin=ref_cell,i=cells_i+ref_cell_ij[0],j=cells_j+ref_cell_ij[1]) for (cells_i,cells_j) in snd.rast_to_h3[str(grid_param)]["nn"]]
# gpd.GeoSeries([h3.cells_to_h3shape(cells=cells_redone)],crs="epsg:4326").explore()
# limits = [-1.486,51.138,0.352,51.882]
# wide europe window
# -14.06
# 48.34
# 31.95
# 59.27
layer_.rename(columns={"lon":"x","lat":"y"},inplace=True)
layer_.head()
x | y | band_var | col | value | |
---|---|---|---|---|---|
0 | 8.462206 | 12.017195 | 13.0 | [148, 103, 189, 255] | Built spaces, residential building 6m < height... |
1 | 8.461496 | 12.017113 | 13.0 | [148, 103, 189, 255] | Built spaces, residential building 6m < height... |
2 | 8.461597 | 12.017113 | 13.0 | [148, 103, 189, 255] | Built spaces, residential building 6m < height... |
3 | 8.463519 | 12.017113 | 13.0 | [148, 103, 189, 255] | Built spaces, residential building 6m < height... |
4 | 8.463822 | 12.017113 | 13.0 | [148, 103, 189, 255] | Built spaces, residential building 6m < height... |
# layer_df = layer.execute(limit=100_000)
# base_res = 14
layer_df = snd.df_project_on_grid(layer_,res=base_res)
layer_df.head()
x | y | band_var | col | value | h3_id | |
---|---|---|---|---|---|---|
0 | 8.462206 | 12.017195 | 13.0 | [148, 103, 189, 255] | Built spaces, residential building 6m < height... | 8e580a4183a6aa7 |
1 | 8.461496 | 12.017113 | 13.0 | [148, 103, 189, 255] | Built spaces, residential building 6m < height... | 8e580a41805bcaf |
2 | 8.461597 | 12.017113 | 13.0 | [148, 103, 189, 255] | Built spaces, residential building 6m < height... | 8e580a41805b187 |
3 | 8.463519 | 12.017113 | 13.0 | [148, 103, 189, 255] | Built spaces, residential building 6m < height... | 8e580a4183ac0e7 |
4 | 8.463822 | 12.017113 | 13.0 | [148, 103, 189, 255] | Built spaces, residential building 6m < height... | 8e580a4183adcdf |
layer_df.rename(columns={"band":"band_var"},inplace=True)
print(layer_df.shape)
layer_df.head()
(24332, 6)
x | y | band_var | col | value | h3_id | |
---|---|---|---|---|---|---|
0 | 8.462206 | 12.017195 | 13.0 | [148, 103, 189, 255] | Built spaces, residential building 6m < height... | 8e580a4183a6aa7 |
1 | 8.461496 | 12.017113 | 13.0 | [148, 103, 189, 255] | Built spaces, residential building 6m < height... | 8e580a41805bcaf |
2 | 8.461597 | 12.017113 | 13.0 | [148, 103, 189, 255] | Built spaces, residential building 6m < height... | 8e580a41805b187 |
3 | 8.463519 | 12.017113 | 13.0 | [148, 103, 189, 255] | Built spaces, residential building 6m < height... | 8e580a4183ac0e7 |
4 | 8.463822 | 12.017113 | 13.0 | [148, 103, 189, 255] | Built spaces, residential building 6m < height... | 8e580a4183adcdf |
# layer_df["band_var"].astype(copy=)
cat = True
if cat:
layer_df["band_var"]=layer_df["band_var"].astype("int").astype("str")
neighbs
[(0, 0),
(2, 1),
(1, -1),
(1, 1),
(0, 2),
(-1, 0),
(0, -2),
(-2, -1),
(1, 2),
(2, 0),
(0, 1),
(-1, -1),
(1, -2),
(3, 1),
(1, 0),
(2, 2),
(-1, -2),
(0, -1)]
def layer_gridded(layer : pd.DataFrame, neighbs : list[tuple[int]]):
"""" Take a layer of raster centroids expressed as H3 cells and do the 'gridding'. It means filling in all the gaps respecting the original raster grid size.
Duplicates are removed, the value from the single cell is uniformly distributed across the set of cells associated to it.
"""
vars_columns = [x for x in layer.columns if re.search(pattern="_var$",string=x)]
layer["h3_gridded"] = pd.Series(layer.apply(lambda row: snd.centre_cell_to_square(h3_cell = row["h3_id"],neighbs=neighbs),axis=1).tolist())
layer.dropna(subset=["h3_gridded"],inplace=True)
gridded_layer = layer.explode("h3_gridded")
grid_layer_b = gridded_layer.shape[0]
gridded_layer = gridded_layer[~gridded_layer.duplicated(subset=["h3_gridded"])]
grid_layer_a = gridded_layer.shape[0]
print("Duplicated ratio : ",1-np.round(grid_layer_a/grid_layer_b,3))
gridded_layer.reset_index(inplace=True,drop=True)
gridded_layer.set_index("h3_id",inplace=True,drop=True)
gridded_layer = gridded_layer.join(gridded_layer.value_counts("h3_id"),how="left",sort=False,validate="m:1")
# since only one layer is being read at the moment, just check if it's string or not.
# in the future, separate into categorical and numerical variables
if len(vars_columns)>1:
if (str not in gridded_layer[vars_columns].dtypes.values.tolist()):
gridded_layer[vars_columns] = np.round(gridded_layer[vars_columns].div(gridded_layer["count"],axis=0),3)
# if len(vars_columns)==1:
# if gridded_layer[vars_columns].dtype!=str:
# gridded_layer[vars_columns] = np.round(gridded_layer[vars_columns].div(gridded_layer["count"],axis=0),3)
gridded_layer.reset_index(inplace=True,drop=False)
gridded_layer.dropna(axis=0,subset=["h3_gridded"],inplace=True)
return gridded_layer
layer_grid = layer_gridded(layer=layer_df,neighbs=neighbs)
Duplicated ratio : 0.014000000000000012
print(layer_grid.shape)
layer_grid.head()
(432000, 8)
h3_id | x | y | band_var | col | value | h3_gridded | count | |
---|---|---|---|---|---|---|---|---|
0 | 8e580a4183a6aa7 | 8.462206 | 12.017195 | 13 | [148, 103, 189, 255] | Built spaces, residential building 6m < height... | 8e580a4183a6aa7 | 18 |
1 | 8e580a4183a6aa7 | 8.462206 | 12.017195 | 13 | [148, 103, 189, 255] | Built spaces, residential building 6m < height... | 8e580a4183a6b87 | 18 |
2 | 8e580a4183a6aa7 | 8.462206 | 12.017195 | 13 | [148, 103, 189, 255] | Built spaces, residential building 6m < height... | 8e580a4183a6a37 | 18 |
3 | 8e580a4183a6aa7 | 8.462206 | 12.017195 | 13 | [148, 103, 189, 255] | Built spaces, residential building 6m < height... | 8e580a4183a6b9f | 18 |
4 | 8e580a4183a6aa7 | 8.462206 | 12.017195 | 13 | [148, 103, 189, 255] | Built spaces, residential building 6m < height... | 8e580a4183a614f | 18 |
Mapping gridded¶
sample_size = np.min([layer_grid.shape[0],30_000])
plotting_layer = (layer_grid
# .iloc[0:sample_size]
.sample(sample_size))
plotting_layer["color"] = cmap(plotting_layer["band_var"].astype("int",copy=False)
,palette=gdp_pc_pal
,log=False)
Max input : 24.00, palette colors : 10
#####
layer_gridded = pdk.Layer(
"H3HexagonLayer",
plotting_layer,
pickable=True,
stroked=True,
filled=True,
opacity=.5,
extruded=False,
# elevation_scale = 1e-4,
# elevation_range = [0,100],
get_elevation="band_var",
get_hexagon= "h3_gridded",
get_fill_color = "color",
get_line_color= [0, 0, 0, 100],
line_width_min_pixels=0,
line_width_max_pixels=1,
)
# Create mapdeck object
layer_gridded_deck = pdk.Deck(layers=[layer_gridded]
,initial_view_state=viewstate
,tooltip= {"text": "{band_var} : {value}"}
,map_style="road"
)
layer_gridded_deck.to_html("../../deck_maps/msz_ghsl_gridded.html",iframe_height=800,open_browser=True)