Skip to article frontmatterSkip to article content

Map Vectors!

DSE EcoTech Series (2024)

The Eric and Wendy Schmidt Center for Data Science & Environment
University of California Berkeley

license: CC-BY-4.0


IMPORTS

from pathlib import Path
import geopandas as gpd
import pandas as pd
import leafmap
import lonboard
import matplotlib.pyplot as plt
import seaborn as sns
from sidecar import Sidecar
import rich.pretty as rp

CONSTANTS

FIRES = '../data/vector/CalFires_Boundaries21'
ZOOM = 6
MAP_HEIGHT = 1000
X = -120.0
Y = 37.0
MAP_CENTER = (Y, X)
COLOR_LIST = [
    "#FF0000",  # Red
    "#00FF00",  # Green
    "#0000FF",  # Blue
    "#FFFF00",  # Yellow
    "#FF00FF",  # Magenta
    "#00FFFF",  # Cyan
    "#FFFFFF",  # White
    "#000000",  # Black
    "#FFC0CB",  # Pink
    "#FFA500",  # Orange
    "#800080",  # Purple
    "#808000",  # Olive
    "#008000",  # Dark Green
    "#008080",  # Teal
    "#A52A2A",  # Brown
    "#C0C0C0",  # Silver
    "#800000",  # Maroon
    "#87CEEB",  # Sky Blue
    "#FFD700",  # Gold
]
CONTINUOUS_PALETTE = sns.color_palette("blend:#6e1209,#f21a05,#fdf903", as_cmap=True)
display(CONTINUOUS_PALETTE)
Loading...

HELPERS

def minmax_norm(values, clip=[0,50000]):
    if clip:
        values = values.clip(*clip) 
    return (values - values.min()) / (values.max() - values.min())

def directory_size(directory):
    total_size = 0
    for file in Path(directory).rglob("*"):
        if file.is_file():
            total_size += file.stat().st_size
    return total_size

def line():
    print('-' * 75)

GEOPANDAS

gdf = gpd.read_file(FIRES)
gdf = gdf.to_crs(epsg=4326)
print('data shape:', gdf.shape)
print('example:')
row = gdf[(gdf.GIS_ACRES > 100) & (gdf.YEAR_=='2021')].sample().iloc[0]
display(row.geometry)
rp.pprint(row.to_dict())
Loading...
ax = gdf.plot()
txt = ax.set_title('CalFires Data')
<Figure size 640x480 with 1 Axes>

FILE TYPES

line()
print('parq, json, gpkg, (shp)')
line()
print('SAVE')
%time gdf.to_parquet('fires.parquet')
%time gdf.to_file('fires.json', driver="GPKG")
%time gdf.to_file("fires.gpkg", layer='calfires', driver="GPKG")
line()
print('READ')
%time gdf_parq = gpd.read_parquet('fires.parquet')
%time gdf_json = gpd.read_file('fires.json', driver="GPKG")
%time gdf_gpkg = gpd.read_file("fires.gpkg", layer='calfires', driver="GPKG")
line()
print('SIZE')
print('parq:', Path('fires.parquet').stat().st_size)
print('json:', Path('fires.json').stat().st_size)
print('gpkg:', Path('fires.gpkg').stat().st_size)
print('shp: ', directory_size(FIRES))
---------------------------------------------------------------------------
parq, json, gpkg, (shp)
---------------------------------------------------------------------------
SAVE
CPU times: user 219 ms, sys: 54 ms, total: 273 ms
Wall time: 285 ms
CPU times: user 345 ms, sys: 349 ms, total: 693 ms
Wall time: 818 ms
CPU times: user 351 ms, sys: 350 ms, total: 700 ms
Wall time: 835 ms
---------------------------------------------------------------------------
READ
CPU times: user 175 ms, sys: 73.6 ms, total: 249 ms
Wall time: 242 ms
CPU times: user 202 ms, sys: 46.5 ms, total: 248 ms
Wall time: 249 ms
CPU times: user 202 ms, sys: 42 ms, total: 244 ms
Wall time: 245 ms
---------------------------------------------------------------------------
SIZE
parq: 95010603
json: 108892160
gpkg: 108892160
shp:  117379885

SOME PREPROCESSING

cause_na = gdf.CAUSE.isna()
gdf.loc[cause_na,'CAUSE'] = -1
gdf.loc[~cause_na,'CAUSE'] = gdf.loc[~cause_na].CAUSE.astype(int)
gdf['year'] = gdf.YEAR_.astype(int)
CAUSES = gdf.CAUSE.sort_values().unique()
gdf.year.min(), gdf.year.max()
(np.int64(1898), np.int64(2021))

LEAFMAP

If I try and add all 16k polygons it crashes so I’ll filter to years > 2015... but it’s still slow

_gdf = gdf[gdf.year>2015]
print('dataset sizes:', gdf.shape[0], _gdf.shape[0], _gdf.shape[0]/gdf.shape[0])
dataset sizes: 16327 2553 0.1563667544558094
_gdf = gdf[gdf.year>2015]
leafmap_map = leafmap.Map(zoom=ZOOM)
leafmap_map.center = MAP_CENTER
leafmap_map.add_gdf(_gdf, layer_name='Fires', fill_colors=["red", "yellow", "green", "orange"])
leafmap_map
Loading...

LONBOARD


lonboard_map = lonboard.Map(
    [], 
    view_state=dict(
        longitude=X,
        latitude=Y,
        zoom=ZOOM),
    use_device_pixels=False)
lonboard_map._height =  MAP_HEIGHT

layer = lonboard.PolygonLayer.from_geopandas(
    gdf,
    get_line_width=20,
    line_width_min_pixels=0.2,
    get_fill_color='#039d89',
    get_line_color='#103c28',
)

lonboard_map.add_layer(layer)
sc = Sidecar(title='Lonboard Fires')
with sc:
    display(lonboard_map)
layer.get_fill_color = lonboard.colormap.apply_categorical_cmap(
    gdf.CAUSE,
    {c: clr for c, clr in zip(CAUSES, COLOR_LIST)})
severity_values = minmax_norm(gdf.GIS_ACRES.values)
layer.get_fill_color = lonboard.colormap.apply_continuous_cmap(
    severity_values,
    CONTINUOUS_PALETTE)