Biomass Yield and Spectral Trends
date: 2025-01-15
authors:
- name: Brookie Guzder-Williams
affiliations:
- University of California Berkeley, The Eric and Wendy Schmidt Center for Data Science & Environment
license: CC-BY-4.0
This notebook uses DSE’s Spectral Trend Database (STDB) to produce and interactive chart displaying Biomass Yield vs a number of different spectral incides over time.
- Fetch Data for a random point: We use STDS’s query module and in particular the
QueryConstructor
to generate a ... - Build Chart
- Save Chart JSON to improve responsiveness of chart
IMPORTS¶
from importlib import reload
from typing import Callable, Union, Optional, Literal, TypeAlias, Sequence, Any
import re
from pprint import pprint
import random
import pandas as pd
import numpy as np
import xarray as xr
import tensorflow as tf
from IPython.display import HTML
import matplotlib.pyplot as plt
import seaborn as sns
import altair as alt
from spectral_trend_database.config import config as c
from spectral_trend_database import query
from spectral_trend_database import utils
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import HistGradientBoostingClassifier
from sklearn import metrics
CONFIG¶
BQ_PREFIX = 'dse-regenag.SpectralTrendDatabase'
SAMPLES_TABLE = 'SAMPLE_POINTS'
CROP_TYPE_TABLE = 'CDL_CROP_TYPE'
YIELD_TABLE = 'QDANN_YIELD'
INDICES_TABLE = 'SMOOTHED_INDICES_V1'
SAMPLE_ID = '9zphqkb3hh8'
CHART_DATA_PATH = 'spectral_trends.chart_data.json'
SAMPLE_FRAC = 0.0005
YEAR_START = 2008
YEAR_END = 2018
START_MMDD = '11-01'
END_MMDD = START_MMDD
ATTR_COLS = [
'sample_id',
'lon',
'lat']
LIST_ATTRS = [
'year',
'biomass',
'crop_type']
IDENT_DATE_COLUMNS = ['sample_id', 'year', 'date']
HELPER METHODS¶
def print_list(lst, max_len=7, view_size=3, sep=', ', connector=' ... '):
size = len(lst)
if size <= max_len:
lst_str = sep.join(lst)
else:
head = sep.join(lst[:view_size])
tail = sep.join(lst[-view_size:])
lst_str = f'{head}{connector}{tail} [{size}]'
print(lst_str)
def line(marker='-', length=100):
print(marker*length)
SMOOTHED_INDICES_COLUMNS = query.column_names(INDICES_TABLE, run_query=True)
INDICES = [c for c in SMOOTHED_INDICES_COLUMNS if c not in IDENT_DATE_COLUMNS]
INDICES = sorted(INDICES)
print_list(INDICES)
afri1600, atsavi, blue ... tvi, wdrvi, wet [42]
1. Fetch Data¶
query = reload(query)
def fetch_yield_and_trend_data(
year_start: int,
year_end: Optional[int] = None,
sample_id: Optional[Union[list[str], str]] = None,
print_sql: Optional[bool] = False,
limit: Optional[int] = None) -> pd.DataFrame:
"""
Builds and Executes a SQL Query to get all data
form a given set of sample_ids during a specified time
period.
Args:
year_start (int): start year to select data
year_end (Optional[int] = None):
last year to select data from (inclusive). if None use <year_start>
sample_id (Optional[Union[list[str], str]] = None)
sample_id or list of sample-ids of data to select. if none select
from all samples.
print_sql (Optional[bool] = False)
limit (Optional[int] = None)
Returns:
pd.DataFrame of smoothed-spectral-indices
"""
if year_end is None:
year_end=year_start
qc = query.QueryConstructor(
SAMPLES_TABLE,
table_prefix=BQ_PREFIX,
using=['sample_id'],
how='inner')
qc.join(CROP_TYPE_TABLE, 'sample_id')
qc.join(YIELD_TABLE, 'sample_id', 'year')
qc.join(INDICES_TABLE, 'sample_id', 'year')
if sample_id:
if isinstance(sample_id, list):
qc.where_in(sample_id=sample_id)
else:
qc.where(sample_id=sample_id)
qc.where(INDICES_TABLE, year=year_start, year_op='>=')
qc.where(INDICES_TABLE, year=year_end, year_op='<=')
qc.orderby('date', table=INDICES_TABLE)
df = query.run(sql=qc.sql(), print_sql=print_sql)
return df
data = fetch_yield_and_trend_data(
year_start=YEAR_START,
year_end=YEAR_END,
sample_id=SAMPLE_ID,
print_sql=True)
print('shape:', data.shape)
data.sample(3)
Loading...
utils = reload(utils)
ds = utils.rows_to_xr(
data.drop_duplicates(['date']),
attr_cols=ATTR_COLS,
list_cols=LIST_ATTRS,
list_distinct_cols=['sample_id', 'year'])
ds
Loading...
yield_biomass = { y: b for y,b in zip(ds.year, ds.biomass)}
crop_types = { y: b for y,b in zip(ds.year, ds.crop_type)}
dvars = { k: ds[k].data for k in ds.data_vars }
chart_data = pd.DataFrame(dvars)
chart_data['date'] = ds.date.data
chart_data['idx'] = list(chart_data.index)
chart_data['year'] = chart_data.date.apply(lambda d: d.year)
chart_data['biomass'] = chart_data.year.apply(lambda y: yield_biomass.get(y, None))
chart_data['crop_type'] = chart_data.year.apply(lambda y: crop_types.get(y, 'soy'))
CHART¶
S = 1.25
DEFAULT_INDEX = 'slavi'
HEIGHT = 400 * S
GRAPH_WIDTH = 600 * S
SI_COLOR = '#515e68'
SI_OPACITY = 0.6
YIELD_OPACITY = 0.5
TITLE_COLOR = '#333'
TITLE_SIZE = 22
TITLE_WEIGHT = 200
SUBTITLE_COLOR = '#aaa'
SUBTITLE_SIZE = 14
SOY_COLOR = '#4e9561'
CORN_COLOR = '#e2d644'
TITLE = 'SPECTRAL TRENDS'
SUBTITLE = 'exploring yield as a function of spectral indices'
BIOMASS_YLABEL = 'Biomass Yield'
SI_YLABEL = 'Spectral Index'
display(HTML("""
<style>
span.vega-bind-name {
color: #555;
margin: 0 10px 0 60px;
font-size: 20px;
}
.vega-bind label select {
color: #555;
font-size: 18px;
}
</style>
"""))
# yield chart
yield_scale = alt.Scale(
domain=[0.0, 14000],
clamp=True
)
yield_color_scale = alt.Scale(
domain=['soy', 'corn'],
range=[SOY_COLOR, CORN_COLOR])
yield_yaxis = alt.Axis(title=BIOMASS_YLABEL, titleFontSize=18, titleColor=TITLE_COLOR, titleFontWeight=TITLE_WEIGHT)
yield_chart = alt.Chart(chart_data).encode(
x=alt.X('date:T', title=None),
y=alt.Y('biomass:Q', axis=yield_yaxis, scale=yield_scale),
color=alt.Color('crop_type:N', scale=yield_color_scale)
).mark_area(
filled=True,
opacity=YIELD_OPACITY,
interpolate='step-before')
# interactive spectral index chart
si_yaxis = alt.Axis(title=SI_YLABEL, titleFontSize=18, titleColor=TITLE_COLOR, titleFontWeight=TITLE_WEIGHT)
si_dropdown = alt.binding_select(
options=INDICES,
labels=[n.upper() for n in INDICES],
name='Spectral Index: '.upper()
)
ycol_param = alt.param(
value=DEFAULT_INDEX,
bind=si_dropdown
)
si_chart = alt.Chart(chart_data).encode(
x=alt.X('date:T', title=None),
y=alt.Y('y:Q', axis=si_yaxis)
).properties(
width=GRAPH_WIDTH,
height=HEIGHT
).transform_calculate(
y=f'datum[{ycol_param.name}]'
).add_params(
ycol_param
).mark_area(
fill=SI_COLOR,
fillOpacity=SI_OPACITY
)
# display
title = alt.Title(
TITLE,
color=TITLE_COLOR,
fontSize=TITLE_SIZE,
fontWeight=TITLE_WEIGHT,
subtitle=SUBTITLE,
subtitleColor=SUBTITLE_COLOR,
subtitleFontSize=SUBTITLE_SIZE)
chart = alt.layer(yield_chart, si_chart).resolve_scale(y='independent')
chart = chart.properties(
title=title).configure_legend(
title=None,
labelFontSize=18,
labelColor=SUBTITLE_COLOR
)
chart
Loading...