Skip to article frontmatterSkip to article content

Biomass Yield and Spectral Trends

Authors
Affiliations
The Eric and Wendy Schmidt Center for Data Science & Environment
University of California, Berkeley
The Eric and Wendy Schmidt Center for Data Science & Environment
University of California, Berkeley
date: 2025-01-15
authors:
    - name: Brookie Guzder-Williams
affiliations:
    - University of California Berkeley, The Eric and Wendy Schmidt Center for Data Science & Environment
license: CC-BY-4.0

This notebook uses DSE’s Spectral Trend Database (STDB) to produce and interactive chart displaying Biomass Yield vs a number of different spectral incides over time.

  1. Fetch Data for a random point: We use STDS’s query module and in particular the QueryConstructor to generate a ...
  2. Build Chart
  3. Save Chart JSON to improve responsiveness of chart

IMPORTS

from importlib import reload
from typing import Callable, Union, Optional, Literal, TypeAlias, Sequence, Any
import re
from pprint import pprint
import random
import pandas as pd
import numpy as np
import xarray as xr
import tensorflow as tf
from IPython.display import HTML
import matplotlib.pyplot as plt
import seaborn as sns
import altair as alt
from spectral_trend_database.config import config as c
from spectral_trend_database import query
from spectral_trend_database import utils
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import HistGradientBoostingClassifier
from sklearn import metrics

CONFIG

BQ_PREFIX = 'dse-regenag.SpectralTrendDatabase'
SAMPLES_TABLE = 'SAMPLE_POINTS'
CROP_TYPE_TABLE = 'CDL_CROP_TYPE'
YIELD_TABLE = 'QDANN_YIELD'
INDICES_TABLE = 'SMOOTHED_INDICES_V1'
SAMPLE_ID = '9zphqkb3hh8'
CHART_DATA_PATH = 'spectral_trends.chart_data.json'
SAMPLE_FRAC = 0.0005
YEAR_START = 2008
YEAR_END = 2018
START_MMDD = '11-01'
END_MMDD = START_MMDD
ATTR_COLS = [
    'sample_id',
    'lon',
    'lat']
LIST_ATTRS = [
    'year',
    'biomass',
    'crop_type']
IDENT_DATE_COLUMNS = ['sample_id', 'year', 'date']

HELPER METHODS

def print_list(lst, max_len=7, view_size=3, sep=', ', connector=' ... '):
    size = len(lst)
    if size <= max_len:
        lst_str = sep.join(lst)
    else:
        head = sep.join(lst[:view_size])        
        tail = sep.join(lst[-view_size:])
        lst_str = f'{head}{connector}{tail}  [{size}]'
    print(lst_str)

def line(marker='-', length=100):
    print(marker*length)

SMOOTHED_INDICES_COLUMNS = query.column_names(INDICES_TABLE, run_query=True)
INDICES = [c for c in SMOOTHED_INDICES_COLUMNS if c not in IDENT_DATE_COLUMNS]
INDICES = sorted(INDICES)
print_list(INDICES)
afri1600, atsavi, blue ... tvi, wdrvi, wet  [42]

1. Fetch Data

query = reload(query)
def fetch_yield_and_trend_data(
        year_start: int,
        year_end: Optional[int] = None,
        sample_id: Optional[Union[list[str], str]] = None,
        print_sql: Optional[bool] = False,
        limit: Optional[int] = None) -> pd.DataFrame:
    """
    Builds and Executes a SQL Query to get all data
    form a given set of sample_ids during a specified time 
    period.
    
    Args: 
        year_start (int): start year to select data
        year_end (Optional[int] = None): 
            last year to select data from (inclusive). if None use <year_start>
        sample_id (Optional[Union[list[str], str]] = None)
            sample_id or list of sample-ids of data to select. if none select 
            from all samples.
        print_sql (Optional[bool] = False)
        limit (Optional[int] = None)

    Returns:
        pd.DataFrame of smoothed-spectral-indices
    """
    if year_end is None: 
        year_end=year_start
    qc = query.QueryConstructor(
        SAMPLES_TABLE, 
        table_prefix=BQ_PREFIX,
        using=['sample_id'],  
        how='inner')
    qc.join(CROP_TYPE_TABLE, 'sample_id')
    qc.join(YIELD_TABLE, 'sample_id', 'year')
    qc.join(INDICES_TABLE, 'sample_id', 'year')
    if sample_id:
        if isinstance(sample_id, list):
            qc.where_in(sample_id=sample_id)
        else:
            qc.where(sample_id=sample_id)
    qc.where(INDICES_TABLE, year=year_start, year_op='>=')
    qc.where(INDICES_TABLE, year=year_end, year_op='<=')
    qc.orderby('date', table=INDICES_TABLE)
    df = query.run(sql=qc.sql(), print_sql=print_sql)
    return df
data = fetch_yield_and_trend_data(
    year_start=YEAR_START,
    year_end=YEAR_END,
    sample_id=SAMPLE_ID,
    print_sql=True)

print('shape:', data.shape)
data.sample(3)
Loading...
utils = reload(utils)
ds = utils.rows_to_xr(
    data.drop_duplicates(['date']), 
    attr_cols=ATTR_COLS,
    list_cols=LIST_ATTRS,
    list_distinct_cols=['sample_id', 'year'])
ds
Loading...
yield_biomass = { y: b for y,b in zip(ds.year, ds.biomass)}
crop_types = { y: b for y,b in zip(ds.year, ds.crop_type)}
dvars = { k: ds[k].data for k in ds.data_vars }
chart_data = pd.DataFrame(dvars)
chart_data['date'] = ds.date.data
chart_data['idx'] = list(chart_data.index)
chart_data['year'] = chart_data.date.apply(lambda d: d.year)
chart_data['biomass'] = chart_data.year.apply(lambda y: yield_biomass.get(y, None))
chart_data['crop_type'] = chart_data.year.apply(lambda y: crop_types.get(y, 'soy'))

CHART

S = 1.25
DEFAULT_INDEX = 'slavi'
HEIGHT = 400 * S
GRAPH_WIDTH = 600 * S
SI_COLOR = '#515e68'
SI_OPACITY = 0.6
YIELD_OPACITY = 0.5
TITLE_COLOR = '#333'
TITLE_SIZE = 22
TITLE_WEIGHT = 200
SUBTITLE_COLOR = '#aaa'
SUBTITLE_SIZE = 14
SOY_COLOR = '#4e9561'
CORN_COLOR = '#e2d644'
TITLE = 'SPECTRAL TRENDS'
SUBTITLE = 'exploring yield as a function of spectral indices'
BIOMASS_YLABEL = 'Biomass Yield'
SI_YLABEL = 'Spectral Index'
display(HTML("""
<style>
  span.vega-bind-name {
    color: #555;
    margin: 0 10px 0 60px;
    font-size: 20px;
  }
  .vega-bind label select {
    color: #555;
    font-size: 18px;
  }
</style>
"""))

# yield chart
yield_scale = alt.Scale(
    domain=[0.0, 14000],
    clamp=True
)
yield_color_scale = alt.Scale(
    domain=['soy', 'corn'], 
    range=[SOY_COLOR, CORN_COLOR])
yield_yaxis = alt.Axis(title=BIOMASS_YLABEL, titleFontSize=18, titleColor=TITLE_COLOR, titleFontWeight=TITLE_WEIGHT)
yield_chart = alt.Chart(chart_data).encode(
    x=alt.X('date:T', title=None),
    y=alt.Y('biomass:Q', axis=yield_yaxis, scale=yield_scale),
    color=alt.Color('crop_type:N', scale=yield_color_scale)
).mark_area(
    filled=True,
    opacity=YIELD_OPACITY,
    interpolate='step-before')

# interactive spectral index chart
si_yaxis = alt.Axis(title=SI_YLABEL, titleFontSize=18, titleColor=TITLE_COLOR, titleFontWeight=TITLE_WEIGHT)
si_dropdown = alt.binding_select(
    options=INDICES,
    labels=[n.upper() for n in INDICES],
    name='Spectral Index: '.upper()
)
ycol_param = alt.param(
    value=DEFAULT_INDEX,
    bind=si_dropdown
)
si_chart = alt.Chart(chart_data).encode(
    x=alt.X('date:T', title=None),
    y=alt.Y('y:Q', axis=si_yaxis)
).properties(
    width=GRAPH_WIDTH,
    height=HEIGHT
).transform_calculate(
    y=f'datum[{ycol_param.name}]'
).add_params(
    ycol_param
).mark_area(
    fill=SI_COLOR,
    fillOpacity=SI_OPACITY
)


# display
title = alt.Title(
    TITLE,
    color=TITLE_COLOR,
    fontSize=TITLE_SIZE,
    fontWeight=TITLE_WEIGHT,
    subtitle=SUBTITLE,
    subtitleColor=SUBTITLE_COLOR,
    subtitleFontSize=SUBTITLE_SIZE)

chart = alt.layer(yield_chart, si_chart).resolve_scale(y='independent')
chart = chart.properties(
    title=title).configure_legend(
    title=None,
    labelFontSize=18,  
    labelColor=SUBTITLE_COLOR 
)
chart 
Loading...